[gold] Introduce GitCommits table

Bug: skia:11367
Change-Id: I012945a3820c2ccc51dd08929c4ca6bb47e27d7f
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/375662
Reviewed-by: Leandro Lovisolo <lovisolo@google.com>
diff --git a/golden/go/diff/worker/worker.go b/golden/go/diff/worker/worker.go
index 79efdd9..3543e69 100644
--- a/golden/go/diff/worker/worker.go
+++ b/golden/go/diff/worker/worker.go
@@ -283,9 +283,8 @@
 // getStartingTile returns the commit ID which is the beginning of the tile of interest (so we
 // get enough data to do our comparisons).
 func (w *WorkerImpl) getStartingTile(ctx context.Context) (schema.TileID, error) {
-	row := w.db.QueryRow(ctx, `SELECT tile_id FROM Commits
+	row := w.db.QueryRow(ctx, `SELECT tile_id FROM CommitsWithData
 AS OF SYSTEM TIME '-0.1s'
-WHERE has_data = TRUE
 ORDER BY commit_id DESC
 LIMIT 1 OFFSET $1`, w.commitsWithDataToSearch-1)
 	var lc pgtype.Int4
diff --git a/golden/go/sql/databuilder/databuilder.go b/golden/go/sql/databuilder/databuilder.go
index d8feb87..39e885b 100644
--- a/golden/go/sql/databuilder/databuilder.go
+++ b/golden/go/sql/databuilder/databuilder.go
@@ -36,6 +36,7 @@
 
 	changelistBuilders  []*ChangelistBuilder
 	commitsWithData     *CommitBuilder
+	commitsWithNoData   *GitCommitBuilder
 	diffMetrics         []schema.DiffMetricRow
 	expectationBuilders []*ExpectationsBuilder
 	groupingKeys        []string
@@ -48,12 +49,21 @@
 // It panics if called more than once.
 func (b *TablesBuilder) CommitsWithData() *CommitBuilder {
 	if b.commitsWithData != nil {
-		logAndPanic("Cannot call Commits() more than once.")
+		logAndPanic("Cannot call CommitsWithData() more than once.")
 	}
 	b.commitsWithData = &CommitBuilder{}
 	return b.commitsWithData
 }
 
+// CommitsWithNoData returns a new CommitBuilder that will fill in the GitCommits table.
+func (b *TablesBuilder) CommitsWithNoData() *GitCommitBuilder {
+	if b.commitsWithNoData != nil {
+		logAndPanic("Cannot call CommitsWithNoData() more than once.")
+	}
+	b.commitsWithNoData = &GitCommitBuilder{}
+	return b.commitsWithNoData
+}
+
 // SetDigests loads a mapping of runes to the digest that they represent. This allows
 // specifying the trace history be done with a string of characters. If a rune is invalid or
 // the digests are invalid, this will panic. It panics if called more than once.
@@ -91,7 +101,7 @@
 	if b.commitsWithData == nil {
 		logAndPanic("Must add commits before traces")
 	}
-	if len(b.commitsWithData.commits) == 0 {
+	if len(b.commitsWithData.commitRows) == 0 {
 		logAndPanic("Must specify at least one commit")
 	}
 	if len(b.groupingKeys) == 0 {
@@ -101,7 +111,7 @@
 		logAndPanic("Must add digests before traces")
 	}
 	tb := &TraceBuilder{
-		commits:         b.commitsWithData.commits,
+		commits:         b.commitsWithData.commitRows,
 		commonKeys:      params,
 		symbolsToDigest: b.runeToDigest,
 		groupingKeys:    b.groupingKeys,
@@ -378,10 +388,10 @@
 	}
 	var tables schema.Tables
 
-	tables.Commits = b.commitsWithData.commits
-	applyTilingToCommits(tables.Commits, b.TileWidth)
+	tables.CommitsWithData = b.commitsWithData.commitRows
+	applyTilingToCommits(tables.CommitsWithData, b.TileWidth)
+	tables.GitCommits = assembleGitCommits(b.commitsWithData, b.commitsWithNoData)
 
-	commitsWithData := map[schema.CommitID]bool{}
 	valuesAtHead := map[schema.MD5Hash]*schema.ValueAtHeadRow{}
 	for _, traceBuilder := range b.traceBuilders {
 		// Add unique rows from the tables gathered by tracebuilders.
@@ -421,7 +431,6 @@
 						panic("Incomplete data - you must call IngestedFrom()")
 					}
 					tables.TraceValues = append(tables.TraceValues, *tv)
-					commitsWithData[tv.CommitID] = true
 					vHead := valuesAtHead[sql.AsMD5Hash(tv.TraceID)]
 					vHead.Digest = tv.Digest
 					vHead.MostRecentCommitID = tv.CommitID
@@ -430,15 +439,9 @@
 			}
 		}
 	}
-	for i := range tables.Commits {
-		cid := tables.Commits[i].CommitID
-		if commitsWithData[cid] {
-			tables.Commits[i].HasData = true
-		}
-	}
 
-	tables.TiledTraceDigests = b.computeTiledTraceDigests(tables.Commits)
-	tables.PrimaryBranchParams = b.computePrimaryBranchParams(tables.Commits)
+	tables.TiledTraceDigests = b.computeTiledTraceDigests(tables.CommitsWithData)
+	tables.PrimaryBranchParams = b.computePrimaryBranchParams(tables.CommitsWithData)
 	exp := b.finalizeExpectations()
 	for _, e := range exp {
 		tables.Expectations = append(tables.Expectations, *e)
@@ -529,7 +532,18 @@
 	return tables
 }
 
-func applyTilingToCommits(commits []schema.CommitRow, tileWidth int) {
+func assembleGitCommits(withData *CommitBuilder, withNoData *GitCommitBuilder) []schema.GitCommitRow {
+	gitCommits := withData.gitRows
+	if withNoData != nil {
+		gitCommits = append(gitCommits, withNoData.gitRows...)
+	}
+	sort.Slice(gitCommits, func(i, j int) bool {
+		return gitCommits[i].CommitTime.Before(gitCommits[j].CommitTime)
+	})
+	return gitCommits
+}
+
+func applyTilingToCommits(commits []schema.CommitWithDataRow, tileWidth int) {
 	// We sort the commits by CommitID in lexicographical order. By definition of CommitID, this is
 	// the order they happen in.
 	sort.Slice(commits, func(i, j int) bool {
@@ -613,7 +627,7 @@
 	digest  schema.MD5Hash
 }
 
-func (b *TablesBuilder) computeTiledTraceDigests(commits []schema.CommitRow) []schema.TiledTraceDigestRow {
+func (b *TablesBuilder) computeTiledTraceDigests(commits []schema.CommitWithDataRow) []schema.TiledTraceDigestRow {
 	seenRows := map[tiledTraceDigest]bool{}
 	for _, builder := range b.traceBuilders {
 		for _, xtv := range builder.traceValues {
@@ -645,7 +659,7 @@
 	return rv
 }
 
-func getTileID(id schema.CommitID, commits []schema.CommitRow) schema.TileID {
+func getTileID(id schema.CommitID, commits []schema.CommitWithDataRow) schema.TileID {
 	for _, c := range commits {
 		if c.CommitID == id {
 			return c.TileID
@@ -656,7 +670,7 @@
 
 // computePrimaryBranchParams goes through all trace data and returns the PrimaryBranchParamRow
 // with the appropriately tiled key/value pairs that showed up in the trace keys and params.
-func (b *TablesBuilder) computePrimaryBranchParams(commits []schema.CommitRow) []schema.PrimaryBranchParamRow {
+func (b *TablesBuilder) computePrimaryBranchParams(commits []schema.CommitWithDataRow) []schema.PrimaryBranchParamRow {
 	seenRows := map[schema.PrimaryBranchParamRow]bool{}
 	for _, builder := range b.traceBuilders {
 		findTraceKeys := func(traceID schema.TraceID) paramtools.Params {
@@ -748,11 +762,12 @@
 
 // CommitBuilder has methods for easily building commit history. All methods are chainable.
 type CommitBuilder struct {
-	commits []schema.CommitRow
+	commitRows []schema.CommitWithDataRow
+	gitRows    []schema.GitCommitRow
 }
 
 // Insert adds a commit with the given data. It panics if the commitTime is not formatted to
-// RFC3339 or if the commitID is not monotonically increasing from the last one.
+// RFC3339.
 func (b *CommitBuilder) Insert(commitID schema.CommitID, author, subject, commitTime string) *CommitBuilder {
 	h := sha1.Sum([]byte(commitID))
 	gitHash := hex.EncodeToString(h[:])
@@ -760,13 +775,40 @@
 	if err != nil {
 		logAndPanic("Invalid time %q: %s", commitTime, err)
 	}
-	b.commits = append(b.commits, schema.CommitRow{
-		CommitID:    commitID,
+	b.commitRows = append(b.commitRows, schema.CommitWithDataRow{
+		CommitID: commitID,
+		// tiling will be computed in Build.
+	})
+	b.gitRows = append(b.gitRows, schema.GitCommitRow{
+		GitHash:     gitHash,
+		CommitID:    &commitID,
+		CommitTime:  ct,
+		AuthorEmail: author,
+		Subject:     subject,
+	})
+	return b
+}
+
+// GitCommitBuilder has methods for building rows in the GitCommits table.
+type GitCommitBuilder struct {
+	gitRows []schema.GitCommitRow
+}
+
+// Insert adds a commit with the given data. It panics if the commitTime is not formatted to
+// RFC3339 or if the gitHash is invalid.
+func (b *GitCommitBuilder) Insert(gitHash string, author, subject, commitTime string) *GitCommitBuilder {
+	if len(gitHash) != 40 {
+		panic("invalid git hash length; must be 40 chars")
+	}
+	ct, err := time.Parse(time.RFC3339, commitTime)
+	if err != nil {
+		logAndPanic("Invalid time %q: %s", commitTime, err)
+	}
+	b.gitRows = append(b.gitRows, schema.GitCommitRow{
 		GitHash:     gitHash,
 		CommitTime:  ct,
 		AuthorEmail: author,
 		Subject:     subject,
-		HasData:     false,
 	})
 	return b
 }
@@ -774,7 +816,7 @@
 // TraceBuilder has methods for easily building trace data. All methods are chainable.
 type TraceBuilder struct {
 	// inputs needed upon creation
-	commits         []schema.CommitRow
+	commits         []schema.CommitWithDataRow
 	commonKeys      paramtools.Params
 	groupingKeys    []string
 	symbolsToDigest map[rune]schema.DigestBytes
diff --git a/golden/go/sql/databuilder/databuilder_test.go b/golden/go/sql/databuilder/databuilder_test.go
index eca361d..0a2a4cf 100644
--- a/golden/go/sql/databuilder/databuilder_test.go
+++ b/golden/go/sql/databuilder/databuilder_test.go
@@ -32,8 +32,9 @@
 		Insert("001", "author_one", "subject_one", "2020-12-05T16:00:00Z").
 		Insert("002", "author_two", "subject_two", "2020-12-06T17:00:00Z").
 		Insert("003", "author_three", "subject_three", "2020-12-07T18:00:00Z").
-		Insert("004", "author_four", "subject_four", "2020-12-08T19:00:00Z").
-		Insert("005", "author_five", "no data yet", "2020-12-08T20:00:00Z")
+		Insert("004", "author_four", "subject_four", "2020-12-08T19:00:00Z")
+	b.CommitsWithNoData().
+		Insert("5555555555555555555555555555555555555555", "author_five", "no data yet", "2020-12-08T20:00:00Z")
 	b.SetDigests(map[rune]types.Digest{
 		// by convention, upper case are positively triaged, lowercase
 		// are untriaged, numbers are negative, symbols are special.
@@ -49,15 +50,15 @@
 		"color_mode":      "rgb",
 		types.CorpusField: "corpus_one",
 	}).History(
-		"AAbb-",
-		"D--D-",
+		"AAbb",
+		"D--D",
 	).Keys([]paramtools.Params{{
 		types.PrimaryKeyField: "test_one",
 	}, {
 		types.PrimaryKeyField: "test_two",
 	}}).OptionsAll(paramtools.Params{"ext": "png"}).
-		IngestedFrom([]string{"crosshatch_file1", "crosshatch_file2", "crosshatch_file3", "crosshatch_file4", ""},
-			[]string{"2020-12-11T10:09:00Z", "2020-12-11T10:10:00Z", "2020-12-11T10:11:00Z", "2020-12-11T10:12:13Z", ""})
+		IngestedFrom([]string{"crosshatch_file1", "crosshatch_file2", "crosshatch_file3", "crosshatch_file4"},
+			[]string{"2020-12-11T10:09:00Z", "2020-12-11T10:10:00Z", "2020-12-11T10:11:00Z", "2020-12-11T10:12:13Z"})
 
 	b.AddTracesWithCommonKeys(paramtools.Params{
 		"os":                  "Windows10.7",
@@ -65,11 +66,11 @@
 		"color_mode":          "rgb",
 		types.CorpusField:     "corpus_one",
 		types.PrimaryKeyField: "test_two",
-	}).History("11D--").
+	}).History("11D-").
 		Keys([]paramtools.Params{{types.PrimaryKeyField: "test_one"}}).
 		OptionsPerTrace([]paramtools.Params{{"ext": "png"}}).
-		IngestedFrom([]string{"windows_file1", "windows_file2", "windows_file3", "", ""},
-			[]string{"2020-12-11T14:15:00Z", "2020-12-11T15:16:00Z", "2020-12-11T16:17:00Z", "", ""})
+		IngestedFrom([]string{"windows_file1", "windows_file2", "windows_file3", ""},
+			[]string{"2020-12-11T14:15:00Z", "2020-12-11T15:16:00Z", "2020-12-11T16:17:00Z", ""})
 
 	b.AddTriageEvent("user_one", "2020-12-12T12:12:12Z").
 		ExpectationsForGrouping(map[string]string{
@@ -156,47 +157,50 @@
 		Keys:                 paramtools.Params{"color_mode": "rgb", "device": "NUC1234", "name": "test_two", "os": "Windows10.7", "source_type": "corpus_one"},
 		MatchesAnyIgnoreRule: schema.NBTrue,
 	}}, tables.Traces)
-	assert.Equal(t, []schema.CommitRow{{
-		CommitID:    "001",
-		TileID:      0,
+	assert.Equal(t, []schema.CommitWithDataRow{{
+		CommitID: "001",
+		TileID:   0,
+	}, {
+		CommitID: "002",
+		TileID:   0,
+	}, {
+		CommitID: "003",
+		TileID:   0,
+	}, {
+		CommitID: "004",
+		TileID:   1,
+	}}, tables.CommitsWithData)
+	assert.Equal(t, []schema.GitCommitRow{{
 		GitHash:     gitHash("001"),
+		CommitID:    cID("001"),
 		CommitTime:  time.Date(2020, time.December, 5, 16, 0, 0, 0, time.UTC),
 		AuthorEmail: "author_one",
 		Subject:     "subject_one",
-		HasData:     true,
 	}, {
-		CommitID:    "002",
-		TileID:      0,
 		GitHash:     gitHash("002"),
+		CommitID:    cID("002"),
 		CommitTime:  time.Date(2020, time.December, 6, 17, 0, 0, 0, time.UTC),
 		AuthorEmail: "author_two",
 		Subject:     "subject_two",
-		HasData:     true,
 	}, {
-		CommitID:    "003",
-		TileID:      0,
 		GitHash:     gitHash("003"),
+		CommitID:    cID("003"),
 		CommitTime:  time.Date(2020, time.December, 7, 18, 0, 0, 0, time.UTC),
 		AuthorEmail: "author_three",
 		Subject:     "subject_three",
-		HasData:     true,
 	}, {
-		CommitID:    "004",
-		TileID:      1,
 		GitHash:     gitHash("004"),
+		CommitID:    cID("004"),
 		CommitTime:  time.Date(2020, time.December, 8, 19, 0, 0, 0, time.UTC),
 		AuthorEmail: "author_four",
 		Subject:     "subject_four",
-		HasData:     true,
 	}, {
-		CommitID:    "005",
-		TileID:      1,
-		GitHash:     gitHash("005"),
+		GitHash:     "5555555555555555555555555555555555555555",
+		CommitID:    nil,
 		CommitTime:  time.Date(2020, time.December, 8, 20, 0, 0, 0, time.UTC),
 		AuthorEmail: "author_five",
 		Subject:     "no data yet",
-		HasData:     false,
-	}}, tables.Commits)
+	}}, tables.GitCommits)
 
 	pngOptionsID := h(`{"ext":"png"}`)
 	testOneGroupingID := h(`{"name":"test_one","source_type":"corpus_one"}`)
@@ -472,6 +476,10 @@
 	}}, tables.IgnoreRules)
 }
 
+func cID(s schema.CommitID) *schema.CommitID {
+	return &s
+}
+
 func TestBuild_CalledWithChangelistData_ProducesCorrectData(t *testing.T) {
 	unittest.SmallTest(t)
 
@@ -897,40 +905,61 @@
 		Insert("0098", "author_two", "subject_98", "2020-12-05T14:00:00Z").
 		Insert("2000", "author_2k", "subject_2k", "2022-02-02T02:02:00Z")
 
+	b.CommitsWithNoData().
+		Insert("4444444444444444444444444444444444444444", "somebody", "no data 1900", "2021-02-03T04:05:06Z").
+		Insert("3333333333333333333333333333333333333333", "somebody", "no data 1850", "2021-02-03T04:05:00Z")
+
 	tables := b.Build()
-	assert.Equal(t, []schema.CommitRow{{
-		CommitID:    "0098",
-		TileID:      0,
+	assert.Equal(t, []schema.CommitWithDataRow{{
+		CommitID: "0098",
+		TileID:   0,
+	}, {
+		CommitID: "0099",
+		TileID:   0,
+	}, {
+		CommitID: "0100",
+		TileID:   1,
+	}, {
+		CommitID: "2000",
+		TileID:   1,
+	}}, tables.CommitsWithData)
+	assert.Equal(t, []schema.GitCommitRow{{
 		GitHash:     gitHash("0098"),
+		CommitID:    cID("0098"),
 		CommitTime:  time.Date(2020, time.December, 5, 14, 0, 0, 0, time.UTC),
 		AuthorEmail: "author_two",
 		Subject:     "subject_98",
-		HasData:     false,
 	}, {
-		CommitID:    "0099",
-		TileID:      0,
 		GitHash:     gitHash("0099"),
+		CommitID:    cID("0099"),
 		CommitTime:  time.Date(2020, time.December, 5, 15, 0, 0, 0, time.UTC),
 		AuthorEmail: "author_one",
 		Subject:     "subject_99",
-		HasData:     false,
 	}, {
-		CommitID:    "0100",
-		TileID:      1,
 		GitHash:     gitHash("0100"),
+		CommitID:    cID("0100"),
 		CommitTime:  time.Date(2021, time.January, 1, 1, 1, 0, 0, time.UTC),
 		AuthorEmail: "author_100",
 		Subject:     "subject_100",
-		HasData:     false,
 	}, {
-		CommitID:    "2000",
-		TileID:      1,
+		GitHash:     "3333333333333333333333333333333333333333",
+		CommitID:    nil,
+		CommitTime:  time.Date(2021, time.February, 3, 4, 5, 0, 0, time.UTC),
+		AuthorEmail: "somebody",
+		Subject:     "no data 1850",
+	}, {
+		GitHash:     "4444444444444444444444444444444444444444",
+		CommitID:    nil,
+		CommitTime:  time.Date(2021, time.February, 3, 4, 5, 6, 0, time.UTC),
+		AuthorEmail: "somebody",
+		Subject:     "no data 1900",
+	}, {
 		GitHash:     gitHash("2000"),
+		CommitID:    cID("2000"),
 		CommitTime:  time.Date(2022, time.February, 2, 2, 2, 0, 0, time.UTC),
 		AuthorEmail: "author_2k",
 		Subject:     "subject_2k",
-		HasData:     false,
-	}}, tables.Commits)
+	}}, tables.GitCommits)
 }
 
 func TestSetDigests_CalledMultipleTimes_Panics(t *testing.T) {
diff --git a/golden/go/sql/datakitchensink/kitchensink.go b/golden/go/sql/datakitchensink/kitchensink.go
index 6e42877..9a19af3 100644
--- a/golden/go/sql/datakitchensink/kitchensink.go
+++ b/golden/go/sql/datakitchensink/kitchensink.go
@@ -23,16 +23,17 @@
 		Insert("0000000100", UserThree, "commit 100", "2020-12-03T00:00:00Z").
 		Insert("0000000101", UserTwo, "Update Windows 10.2 to 10.3", "2020-12-04T00:00:00Z").
 		Insert("0000000102", UserOne, "commit 102", "2020-12-05T00:00:00Z").
-		// TODO(kjlubick) bring this back with GitCommits table.
-		//Insert("0000000103", UserFour, "no data 103", "2020-12-06T01:00:00Z").
-		//Insert("0000000104", UserFour, "no data 104", "2020-12-06T02:00:00Z").
-		//Insert("0000000105", UserFour, "no data 105", "2020-12-06T03:00:00Z").
 		Insert("0000000106", UserTwo, "Add walleye device", "2020-12-07T00:00:00Z").
 		Insert("0000000107", UserThree, "Add taimen device [flaky]", "2020-12-08T00:00:00Z").
 		Insert("0000000108", UserTwo, "Fix iOS Triangle tests [accidental break of circle tests]", "2020-12-09T00:00:00Z").
 		Insert("0000000109", UserOne, "Enable autotriage of walleye", "2020-12-10T00:00:00Z").
 		Insert("0000000110", UserTwo, "commit 110", "2020-12-11T00:00:00Z")
 
+	b.CommitsWithNoData().
+		Insert("0103010301030103010301030103010301030103", UserFour, "no data 103", "2020-12-06T01:00:00Z").
+		Insert("0104010401040104010401040104010401040104", UserFour, "no data 104", "2020-12-06T02:00:00Z").
+		Insert("0105010501050105010501050105010501050105", UserFour, "no data 105", "2020-12-06T03:00:00Z")
+
 	b.SetDigests(map[rune]types.Digest{
 		// by convention, upper case are positively triaged, lowercase
 		// are untriaged, numbers are negative, symbols are special.
diff --git a/golden/go/sql/datakitchensink/kitchensink_test.go b/golden/go/sql/datakitchensink/kitchensink_test.go
index 89ce6f9..7683404 100644
--- a/golden/go/sql/datakitchensink/kitchensink_test.go
+++ b/golden/go/sql/datakitchensink/kitchensink_test.go
@@ -71,4 +71,19 @@
 	count = 0
 	assert.NoError(t, row.Scan(&count))
 	assert.Equal(t, 6, count)
+
+	row = db.QueryRow(ctx, "SELECT count(*) from GitCommits")
+	count = 0
+	assert.NoError(t, row.Scan(&count))
+	assert.Equal(t, 13, count)
+
+	row = db.QueryRow(ctx, "SELECT count(*) from GitCommits WHERE commit_id IS NULL")
+	count = 0
+	assert.NoError(t, row.Scan(&count))
+	assert.Equal(t, 3, count)
+
+	row = db.QueryRow(ctx, "SELECT count(*) from CommitsWithData")
+	count = 0
+	assert.NoError(t, row.Scan(&count))
+	assert.Equal(t, 10, count)
 }
diff --git a/golden/go/sql/schema/sql.go b/golden/go/sql/schema/sql.go
index 1498d6a..d170558 100644
--- a/golden/go/sql/schema/sql.go
+++ b/golden/go/sql/schema/sql.go
@@ -12,14 +12,9 @@
   last_ingested_data TIMESTAMP WITH TIME ZONE NOT NULL,
   INDEX system_status_ingested_idx (system, status, last_ingested_data)
 );
-CREATE TABLE IF NOT EXISTS Commits (
+CREATE TABLE IF NOT EXISTS CommitsWithData (
   commit_id STRING PRIMARY KEY,
-  tile_id INT4 NOT NULL,
-  git_hash STRING NOT NULL,
-  commit_time TIMESTAMP WITH TIME ZONE NOT NULL,
-  author_email STRING NOT NULL,
-  subject STRING NOT NULL,
-  has_data BOOL NOT NULL
+  tile_id INT4 NOT NULL
 );
 CREATE TABLE IF NOT EXISTS DiffMetrics (
   left_digest BYTES,
@@ -55,6 +50,14 @@
   expectation_record_id UUID,
   PRIMARY KEY (grouping_id, digest)
 );
+CREATE TABLE IF NOT EXISTS GitCommits (
+  git_hash STRING PRIMARY KEY,
+  commit_id STRING,
+  commit_time TIMESTAMP WITH TIME ZONE NOT NULL,
+  author_email STRING NOT NULL,
+  subject STRING NOT NULL,
+  INDEX commit_idx (commit_id)
+);
 CREATE TABLE IF NOT EXISTS Groupings (
   grouping_id BYTES PRIMARY KEY,
   keys JSONB NOT NULL
diff --git a/golden/go/sql/schema/tables.go b/golden/go/sql/schema/tables.go
index 4a9734d..071a41d 100644
--- a/golden/go/sql/schema/tables.go
+++ b/golden/go/sql/schema/tables.go
@@ -80,11 +80,12 @@
 //go:generate go run ../exporter/tosql --output_file sql.go --logtostderr --output_pkg schema
 type Tables struct {
 	Changelists                 []ChangelistRow                 `sql_backup:"weekly"`
-	Commits                     []CommitRow                     `sql_backup:"daily"`
+	CommitsWithData             []CommitWithDataRow             `sql_backup:"daily"`
 	DiffMetrics                 []DiffMetricRow                 `sql_backup:"monthly"`
 	ExpectationDeltas           []ExpectationDeltaRow           `sql_backup:"daily"`
 	ExpectationRecords          []ExpectationRecordRow          `sql_backup:"daily"`
 	Expectations                []ExpectationRow                `sql_backup:"daily"`
+	GitCommits                  []GitCommitRow                  `sql_backup:"daily"`
 	Groupings                   []GroupingRow                   `sql_backup:"monthly"`
 	IgnoreRules                 []IgnoreRuleRow                 `sql_backup:"daily"`
 	Options                     []OptionsRow                    `sql_backup:"monthly"`
@@ -142,7 +143,9 @@
 		[]interface{}{r.Shard, r.TraceID, r.CommitID, r.Digest, r.GroupingID, r.OptionsID, r.SourceFileID}
 }
 
-type CommitRow struct {
+// CommitWithDataRow represents a commit that has produced some data on the primary branch.
+// It is expected to be created during ingestion.
+type CommitWithDataRow struct {
 	// CommitID is a potentially arbitrary string. commit_ids will be treated as occurring in
 	// lexicographical order.
 	CommitID CommitID `sql:"commit_id STRING PRIMARY KEY"`
@@ -152,28 +155,35 @@
 	// It is expected that tile_id be set the first time we see data from a given commit on the
 	// primary branch and not changed after, even if the tile size used for an instance changes.
 	TileID TileID `sql:"tile_id INT4 NOT NULL"`
+}
 
-	// TODO(kjlubick) split the git stuff into a GitCommits table.
+// ToSQLRow implements the sqltest.SQLExporter interface.
+func (r CommitWithDataRow) ToSQLRow() (colNames []string, colData []interface{}) {
+	return []string{"commit_id", "tile_id"},
+		[]interface{}{r.CommitID, r.TileID}
+}
 
+// GitCommitRow represents a git commit that we may or may not have seen data for.
+type GitCommitRow struct {
 	// GitHash is the git hash of the commit.
-	GitHash string `sql:"git_hash STRING NOT NULL"`
+	GitHash string `sql:"git_hash STRING PRIMARY KEY"`
+	// CommitID is a potentially arbitrary string. If non-null, it is a foreign key in the
+	// CommitsWithData table.
+	CommitID *CommitID `sql:"commit_id STRING"`
 	// CommitTime is the timestamp associated with the commit.
 	CommitTime time.Time `sql:"commit_time TIMESTAMP WITH TIME ZONE NOT NULL"`
 	// AuthorEmail is the email address associated with the author.
 	AuthorEmail string `sql:"author_email STRING NOT NULL"`
 	// Subject is the subject line of the commit.
 	Subject string `sql:"subject STRING NOT NULL"`
-	// HasData is set the first time data lands on the primary branch for this commit number. We
-	// use this to determine the dense tile of data. Previously, we had tried to determine this
-	// with a DISTINCT search over TraceValues, but that takes several minutes when there are
-	// 1M+ traces per commit.
-	HasData bool `sql:"has_data BOOL NOT NULL"`
+
+	commitIDIndex struct{} `sql:"INDEX commit_idx (commit_id)"`
 }
 
 // ToSQLRow implements the sqltest.SQLExporter interface.
-func (r CommitRow) ToSQLRow() (colNames []string, colData []interface{}) {
-	return []string{"commit_id", "tile_id", "git_hash", "commit_time", "author_email", "subject", "has_data"},
-		[]interface{}{r.CommitID, r.TileID, r.GitHash, r.CommitTime, r.AuthorEmail, r.Subject, r.HasData}
+func (r GitCommitRow) ToSQLRow() (colNames []string, colData []interface{}) {
+	return []string{"git_hash", "commit_id", "commit_time", "author_email", "subject"},
+		[]interface{}{r.GitHash, r.CommitID, r.CommitTime, r.AuthorEmail, r.Subject}
 }
 
 type TraceRow struct {