[gold] Introduce GitCommits table
Bug: skia:11367
Change-Id: I012945a3820c2ccc51dd08929c4ca6bb47e27d7f
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/375662
Reviewed-by: Leandro Lovisolo <lovisolo@google.com>
diff --git a/golden/go/diff/worker/worker.go b/golden/go/diff/worker/worker.go
index 79efdd9..3543e69 100644
--- a/golden/go/diff/worker/worker.go
+++ b/golden/go/diff/worker/worker.go
@@ -283,9 +283,8 @@
// getStartingTile returns the commit ID which is the beginning of the tile of interest (so we
// get enough data to do our comparisons).
func (w *WorkerImpl) getStartingTile(ctx context.Context) (schema.TileID, error) {
- row := w.db.QueryRow(ctx, `SELECT tile_id FROM Commits
+ row := w.db.QueryRow(ctx, `SELECT tile_id FROM CommitsWithData
AS OF SYSTEM TIME '-0.1s'
-WHERE has_data = TRUE
ORDER BY commit_id DESC
LIMIT 1 OFFSET $1`, w.commitsWithDataToSearch-1)
var lc pgtype.Int4
diff --git a/golden/go/sql/databuilder/databuilder.go b/golden/go/sql/databuilder/databuilder.go
index d8feb87..39e885b 100644
--- a/golden/go/sql/databuilder/databuilder.go
+++ b/golden/go/sql/databuilder/databuilder.go
@@ -36,6 +36,7 @@
changelistBuilders []*ChangelistBuilder
commitsWithData *CommitBuilder
+ commitsWithNoData *GitCommitBuilder
diffMetrics []schema.DiffMetricRow
expectationBuilders []*ExpectationsBuilder
groupingKeys []string
@@ -48,12 +49,21 @@
// It panics if called more than once.
func (b *TablesBuilder) CommitsWithData() *CommitBuilder {
if b.commitsWithData != nil {
- logAndPanic("Cannot call Commits() more than once.")
+ logAndPanic("Cannot call CommitsWithData() more than once.")
}
b.commitsWithData = &CommitBuilder{}
return b.commitsWithData
}
+// CommitsWithNoData returns a new CommitBuilder that will fill in the GitCommits table.
+func (b *TablesBuilder) CommitsWithNoData() *GitCommitBuilder {
+ if b.commitsWithNoData != nil {
+ logAndPanic("Cannot call CommitsWithNoData() more than once.")
+ }
+ b.commitsWithNoData = &GitCommitBuilder{}
+ return b.commitsWithNoData
+}
+
// SetDigests loads a mapping of runes to the digest that they represent. This allows
// specifying the trace history be done with a string of characters. If a rune is invalid or
// the digests are invalid, this will panic. It panics if called more than once.
@@ -91,7 +101,7 @@
if b.commitsWithData == nil {
logAndPanic("Must add commits before traces")
}
- if len(b.commitsWithData.commits) == 0 {
+ if len(b.commitsWithData.commitRows) == 0 {
logAndPanic("Must specify at least one commit")
}
if len(b.groupingKeys) == 0 {
@@ -101,7 +111,7 @@
logAndPanic("Must add digests before traces")
}
tb := &TraceBuilder{
- commits: b.commitsWithData.commits,
+ commits: b.commitsWithData.commitRows,
commonKeys: params,
symbolsToDigest: b.runeToDigest,
groupingKeys: b.groupingKeys,
@@ -378,10 +388,10 @@
}
var tables schema.Tables
- tables.Commits = b.commitsWithData.commits
- applyTilingToCommits(tables.Commits, b.TileWidth)
+ tables.CommitsWithData = b.commitsWithData.commitRows
+ applyTilingToCommits(tables.CommitsWithData, b.TileWidth)
+ tables.GitCommits = assembleGitCommits(b.commitsWithData, b.commitsWithNoData)
- commitsWithData := map[schema.CommitID]bool{}
valuesAtHead := map[schema.MD5Hash]*schema.ValueAtHeadRow{}
for _, traceBuilder := range b.traceBuilders {
// Add unique rows from the tables gathered by tracebuilders.
@@ -421,7 +431,6 @@
panic("Incomplete data - you must call IngestedFrom()")
}
tables.TraceValues = append(tables.TraceValues, *tv)
- commitsWithData[tv.CommitID] = true
vHead := valuesAtHead[sql.AsMD5Hash(tv.TraceID)]
vHead.Digest = tv.Digest
vHead.MostRecentCommitID = tv.CommitID
@@ -430,15 +439,9 @@
}
}
}
- for i := range tables.Commits {
- cid := tables.Commits[i].CommitID
- if commitsWithData[cid] {
- tables.Commits[i].HasData = true
- }
- }
- tables.TiledTraceDigests = b.computeTiledTraceDigests(tables.Commits)
- tables.PrimaryBranchParams = b.computePrimaryBranchParams(tables.Commits)
+ tables.TiledTraceDigests = b.computeTiledTraceDigests(tables.CommitsWithData)
+ tables.PrimaryBranchParams = b.computePrimaryBranchParams(tables.CommitsWithData)
exp := b.finalizeExpectations()
for _, e := range exp {
tables.Expectations = append(tables.Expectations, *e)
@@ -529,7 +532,18 @@
return tables
}
-func applyTilingToCommits(commits []schema.CommitRow, tileWidth int) {
+func assembleGitCommits(withData *CommitBuilder, withNoData *GitCommitBuilder) []schema.GitCommitRow {
+ gitCommits := withData.gitRows
+ if withNoData != nil {
+ gitCommits = append(gitCommits, withNoData.gitRows...)
+ }
+ sort.Slice(gitCommits, func(i, j int) bool {
+ return gitCommits[i].CommitTime.Before(gitCommits[j].CommitTime)
+ })
+ return gitCommits
+}
+
+func applyTilingToCommits(commits []schema.CommitWithDataRow, tileWidth int) {
// We sort the commits by CommitID in lexicographical order. By definition of CommitID, this is
// the order they happen in.
sort.Slice(commits, func(i, j int) bool {
@@ -613,7 +627,7 @@
digest schema.MD5Hash
}
-func (b *TablesBuilder) computeTiledTraceDigests(commits []schema.CommitRow) []schema.TiledTraceDigestRow {
+func (b *TablesBuilder) computeTiledTraceDigests(commits []schema.CommitWithDataRow) []schema.TiledTraceDigestRow {
seenRows := map[tiledTraceDigest]bool{}
for _, builder := range b.traceBuilders {
for _, xtv := range builder.traceValues {
@@ -645,7 +659,7 @@
return rv
}
-func getTileID(id schema.CommitID, commits []schema.CommitRow) schema.TileID {
+func getTileID(id schema.CommitID, commits []schema.CommitWithDataRow) schema.TileID {
for _, c := range commits {
if c.CommitID == id {
return c.TileID
@@ -656,7 +670,7 @@
// computePrimaryBranchParams goes through all trace data and returns the PrimaryBranchParamRow
// with the appropriately tiled key/value pairs that showed up in the trace keys and params.
-func (b *TablesBuilder) computePrimaryBranchParams(commits []schema.CommitRow) []schema.PrimaryBranchParamRow {
+func (b *TablesBuilder) computePrimaryBranchParams(commits []schema.CommitWithDataRow) []schema.PrimaryBranchParamRow {
seenRows := map[schema.PrimaryBranchParamRow]bool{}
for _, builder := range b.traceBuilders {
findTraceKeys := func(traceID schema.TraceID) paramtools.Params {
@@ -748,11 +762,12 @@
// CommitBuilder has methods for easily building commit history. All methods are chainable.
type CommitBuilder struct {
- commits []schema.CommitRow
+ commitRows []schema.CommitWithDataRow
+ gitRows []schema.GitCommitRow
}
// Insert adds a commit with the given data. It panics if the commitTime is not formatted to
-// RFC3339 or if the commitID is not monotonically increasing from the last one.
+// RFC3339.
func (b *CommitBuilder) Insert(commitID schema.CommitID, author, subject, commitTime string) *CommitBuilder {
h := sha1.Sum([]byte(commitID))
gitHash := hex.EncodeToString(h[:])
@@ -760,13 +775,40 @@
if err != nil {
logAndPanic("Invalid time %q: %s", commitTime, err)
}
- b.commits = append(b.commits, schema.CommitRow{
- CommitID: commitID,
+ b.commitRows = append(b.commitRows, schema.CommitWithDataRow{
+ CommitID: commitID,
+ // tiling will be computed in Build.
+ })
+ b.gitRows = append(b.gitRows, schema.GitCommitRow{
+ GitHash: gitHash,
+ CommitID: &commitID,
+ CommitTime: ct,
+ AuthorEmail: author,
+ Subject: subject,
+ })
+ return b
+}
+
+// GitCommitBuilder has methods for building rows in the GitCommits table.
+type GitCommitBuilder struct {
+ gitRows []schema.GitCommitRow
+}
+
+// Insert adds a commit with the given data. It panics if the commitTime is not formatted to
+// RFC3339 or if the gitHash is invalid.
+func (b *GitCommitBuilder) Insert(gitHash string, author, subject, commitTime string) *GitCommitBuilder {
+ if len(gitHash) != 40 {
+ panic("invalid git hash length; must be 40 chars")
+ }
+ ct, err := time.Parse(time.RFC3339, commitTime)
+ if err != nil {
+ logAndPanic("Invalid time %q: %s", commitTime, err)
+ }
+ b.gitRows = append(b.gitRows, schema.GitCommitRow{
GitHash: gitHash,
CommitTime: ct,
AuthorEmail: author,
Subject: subject,
- HasData: false,
})
return b
}
@@ -774,7 +816,7 @@
// TraceBuilder has methods for easily building trace data. All methods are chainable.
type TraceBuilder struct {
// inputs needed upon creation
- commits []schema.CommitRow
+ commits []schema.CommitWithDataRow
commonKeys paramtools.Params
groupingKeys []string
symbolsToDigest map[rune]schema.DigestBytes
diff --git a/golden/go/sql/databuilder/databuilder_test.go b/golden/go/sql/databuilder/databuilder_test.go
index eca361d..0a2a4cf 100644
--- a/golden/go/sql/databuilder/databuilder_test.go
+++ b/golden/go/sql/databuilder/databuilder_test.go
@@ -32,8 +32,9 @@
Insert("001", "author_one", "subject_one", "2020-12-05T16:00:00Z").
Insert("002", "author_two", "subject_two", "2020-12-06T17:00:00Z").
Insert("003", "author_three", "subject_three", "2020-12-07T18:00:00Z").
- Insert("004", "author_four", "subject_four", "2020-12-08T19:00:00Z").
- Insert("005", "author_five", "no data yet", "2020-12-08T20:00:00Z")
+ Insert("004", "author_four", "subject_four", "2020-12-08T19:00:00Z")
+ b.CommitsWithNoData().
+ Insert("5555555555555555555555555555555555555555", "author_five", "no data yet", "2020-12-08T20:00:00Z")
b.SetDigests(map[rune]types.Digest{
// by convention, upper case are positively triaged, lowercase
// are untriaged, numbers are negative, symbols are special.
@@ -49,15 +50,15 @@
"color_mode": "rgb",
types.CorpusField: "corpus_one",
}).History(
- "AAbb-",
- "D--D-",
+ "AAbb",
+ "D--D",
).Keys([]paramtools.Params{{
types.PrimaryKeyField: "test_one",
}, {
types.PrimaryKeyField: "test_two",
}}).OptionsAll(paramtools.Params{"ext": "png"}).
- IngestedFrom([]string{"crosshatch_file1", "crosshatch_file2", "crosshatch_file3", "crosshatch_file4", ""},
- []string{"2020-12-11T10:09:00Z", "2020-12-11T10:10:00Z", "2020-12-11T10:11:00Z", "2020-12-11T10:12:13Z", ""})
+ IngestedFrom([]string{"crosshatch_file1", "crosshatch_file2", "crosshatch_file3", "crosshatch_file4"},
+ []string{"2020-12-11T10:09:00Z", "2020-12-11T10:10:00Z", "2020-12-11T10:11:00Z", "2020-12-11T10:12:13Z"})
b.AddTracesWithCommonKeys(paramtools.Params{
"os": "Windows10.7",
@@ -65,11 +66,11 @@
"color_mode": "rgb",
types.CorpusField: "corpus_one",
types.PrimaryKeyField: "test_two",
- }).History("11D--").
+ }).History("11D-").
Keys([]paramtools.Params{{types.PrimaryKeyField: "test_one"}}).
OptionsPerTrace([]paramtools.Params{{"ext": "png"}}).
- IngestedFrom([]string{"windows_file1", "windows_file2", "windows_file3", "", ""},
- []string{"2020-12-11T14:15:00Z", "2020-12-11T15:16:00Z", "2020-12-11T16:17:00Z", "", ""})
+ IngestedFrom([]string{"windows_file1", "windows_file2", "windows_file3", ""},
+ []string{"2020-12-11T14:15:00Z", "2020-12-11T15:16:00Z", "2020-12-11T16:17:00Z", ""})
b.AddTriageEvent("user_one", "2020-12-12T12:12:12Z").
ExpectationsForGrouping(map[string]string{
@@ -156,47 +157,50 @@
Keys: paramtools.Params{"color_mode": "rgb", "device": "NUC1234", "name": "test_two", "os": "Windows10.7", "source_type": "corpus_one"},
MatchesAnyIgnoreRule: schema.NBTrue,
}}, tables.Traces)
- assert.Equal(t, []schema.CommitRow{{
- CommitID: "001",
- TileID: 0,
+ assert.Equal(t, []schema.CommitWithDataRow{{
+ CommitID: "001",
+ TileID: 0,
+ }, {
+ CommitID: "002",
+ TileID: 0,
+ }, {
+ CommitID: "003",
+ TileID: 0,
+ }, {
+ CommitID: "004",
+ TileID: 1,
+ }}, tables.CommitsWithData)
+ assert.Equal(t, []schema.GitCommitRow{{
GitHash: gitHash("001"),
+ CommitID: cID("001"),
CommitTime: time.Date(2020, time.December, 5, 16, 0, 0, 0, time.UTC),
AuthorEmail: "author_one",
Subject: "subject_one",
- HasData: true,
}, {
- CommitID: "002",
- TileID: 0,
GitHash: gitHash("002"),
+ CommitID: cID("002"),
CommitTime: time.Date(2020, time.December, 6, 17, 0, 0, 0, time.UTC),
AuthorEmail: "author_two",
Subject: "subject_two",
- HasData: true,
}, {
- CommitID: "003",
- TileID: 0,
GitHash: gitHash("003"),
+ CommitID: cID("003"),
CommitTime: time.Date(2020, time.December, 7, 18, 0, 0, 0, time.UTC),
AuthorEmail: "author_three",
Subject: "subject_three",
- HasData: true,
}, {
- CommitID: "004",
- TileID: 1,
GitHash: gitHash("004"),
+ CommitID: cID("004"),
CommitTime: time.Date(2020, time.December, 8, 19, 0, 0, 0, time.UTC),
AuthorEmail: "author_four",
Subject: "subject_four",
- HasData: true,
}, {
- CommitID: "005",
- TileID: 1,
- GitHash: gitHash("005"),
+ GitHash: "5555555555555555555555555555555555555555",
+ CommitID: nil,
CommitTime: time.Date(2020, time.December, 8, 20, 0, 0, 0, time.UTC),
AuthorEmail: "author_five",
Subject: "no data yet",
- HasData: false,
- }}, tables.Commits)
+ }}, tables.GitCommits)
pngOptionsID := h(`{"ext":"png"}`)
testOneGroupingID := h(`{"name":"test_one","source_type":"corpus_one"}`)
@@ -472,6 +476,10 @@
}}, tables.IgnoreRules)
}
+func cID(s schema.CommitID) *schema.CommitID {
+ return &s
+}
+
func TestBuild_CalledWithChangelistData_ProducesCorrectData(t *testing.T) {
unittest.SmallTest(t)
@@ -897,40 +905,61 @@
Insert("0098", "author_two", "subject_98", "2020-12-05T14:00:00Z").
Insert("2000", "author_2k", "subject_2k", "2022-02-02T02:02:00Z")
+ b.CommitsWithNoData().
+ Insert("4444444444444444444444444444444444444444", "somebody", "no data 1900", "2021-02-03T04:05:06Z").
+ Insert("3333333333333333333333333333333333333333", "somebody", "no data 1850", "2021-02-03T04:05:00Z")
+
tables := b.Build()
- assert.Equal(t, []schema.CommitRow{{
- CommitID: "0098",
- TileID: 0,
+ assert.Equal(t, []schema.CommitWithDataRow{{
+ CommitID: "0098",
+ TileID: 0,
+ }, {
+ CommitID: "0099",
+ TileID: 0,
+ }, {
+ CommitID: "0100",
+ TileID: 1,
+ }, {
+ CommitID: "2000",
+ TileID: 1,
+ }}, tables.CommitsWithData)
+ assert.Equal(t, []schema.GitCommitRow{{
GitHash: gitHash("0098"),
+ CommitID: cID("0098"),
CommitTime: time.Date(2020, time.December, 5, 14, 0, 0, 0, time.UTC),
AuthorEmail: "author_two",
Subject: "subject_98",
- HasData: false,
}, {
- CommitID: "0099",
- TileID: 0,
GitHash: gitHash("0099"),
+ CommitID: cID("0099"),
CommitTime: time.Date(2020, time.December, 5, 15, 0, 0, 0, time.UTC),
AuthorEmail: "author_one",
Subject: "subject_99",
- HasData: false,
}, {
- CommitID: "0100",
- TileID: 1,
GitHash: gitHash("0100"),
+ CommitID: cID("0100"),
CommitTime: time.Date(2021, time.January, 1, 1, 1, 0, 0, time.UTC),
AuthorEmail: "author_100",
Subject: "subject_100",
- HasData: false,
}, {
- CommitID: "2000",
- TileID: 1,
+ GitHash: "3333333333333333333333333333333333333333",
+ CommitID: nil,
+ CommitTime: time.Date(2021, time.February, 3, 4, 5, 0, 0, time.UTC),
+ AuthorEmail: "somebody",
+ Subject: "no data 1850",
+ }, {
+ GitHash: "4444444444444444444444444444444444444444",
+ CommitID: nil,
+ CommitTime: time.Date(2021, time.February, 3, 4, 5, 6, 0, time.UTC),
+ AuthorEmail: "somebody",
+ Subject: "no data 1900",
+ }, {
GitHash: gitHash("2000"),
+ CommitID: cID("2000"),
CommitTime: time.Date(2022, time.February, 2, 2, 2, 0, 0, time.UTC),
AuthorEmail: "author_2k",
Subject: "subject_2k",
- HasData: false,
- }}, tables.Commits)
+ }}, tables.GitCommits)
}
func TestSetDigests_CalledMultipleTimes_Panics(t *testing.T) {
diff --git a/golden/go/sql/datakitchensink/kitchensink.go b/golden/go/sql/datakitchensink/kitchensink.go
index 6e42877..9a19af3 100644
--- a/golden/go/sql/datakitchensink/kitchensink.go
+++ b/golden/go/sql/datakitchensink/kitchensink.go
@@ -23,16 +23,17 @@
Insert("0000000100", UserThree, "commit 100", "2020-12-03T00:00:00Z").
Insert("0000000101", UserTwo, "Update Windows 10.2 to 10.3", "2020-12-04T00:00:00Z").
Insert("0000000102", UserOne, "commit 102", "2020-12-05T00:00:00Z").
- // TODO(kjlubick) bring this back with GitCommits table.
- //Insert("0000000103", UserFour, "no data 103", "2020-12-06T01:00:00Z").
- //Insert("0000000104", UserFour, "no data 104", "2020-12-06T02:00:00Z").
- //Insert("0000000105", UserFour, "no data 105", "2020-12-06T03:00:00Z").
Insert("0000000106", UserTwo, "Add walleye device", "2020-12-07T00:00:00Z").
Insert("0000000107", UserThree, "Add taimen device [flaky]", "2020-12-08T00:00:00Z").
Insert("0000000108", UserTwo, "Fix iOS Triangle tests [accidental break of circle tests]", "2020-12-09T00:00:00Z").
Insert("0000000109", UserOne, "Enable autotriage of walleye", "2020-12-10T00:00:00Z").
Insert("0000000110", UserTwo, "commit 110", "2020-12-11T00:00:00Z")
+ b.CommitsWithNoData().
+ Insert("0103010301030103010301030103010301030103", UserFour, "no data 103", "2020-12-06T01:00:00Z").
+ Insert("0104010401040104010401040104010401040104", UserFour, "no data 104", "2020-12-06T02:00:00Z").
+ Insert("0105010501050105010501050105010501050105", UserFour, "no data 105", "2020-12-06T03:00:00Z")
+
b.SetDigests(map[rune]types.Digest{
// by convention, upper case are positively triaged, lowercase
// are untriaged, numbers are negative, symbols are special.
diff --git a/golden/go/sql/datakitchensink/kitchensink_test.go b/golden/go/sql/datakitchensink/kitchensink_test.go
index 89ce6f9..7683404 100644
--- a/golden/go/sql/datakitchensink/kitchensink_test.go
+++ b/golden/go/sql/datakitchensink/kitchensink_test.go
@@ -71,4 +71,19 @@
count = 0
assert.NoError(t, row.Scan(&count))
assert.Equal(t, 6, count)
+
+ row = db.QueryRow(ctx, "SELECT count(*) from GitCommits")
+ count = 0
+ assert.NoError(t, row.Scan(&count))
+ assert.Equal(t, 13, count)
+
+ row = db.QueryRow(ctx, "SELECT count(*) from GitCommits WHERE commit_id IS NULL")
+ count = 0
+ assert.NoError(t, row.Scan(&count))
+ assert.Equal(t, 3, count)
+
+ row = db.QueryRow(ctx, "SELECT count(*) from CommitsWithData")
+ count = 0
+ assert.NoError(t, row.Scan(&count))
+ assert.Equal(t, 10, count)
}
diff --git a/golden/go/sql/schema/sql.go b/golden/go/sql/schema/sql.go
index 1498d6a..d170558 100644
--- a/golden/go/sql/schema/sql.go
+++ b/golden/go/sql/schema/sql.go
@@ -12,14 +12,9 @@
last_ingested_data TIMESTAMP WITH TIME ZONE NOT NULL,
INDEX system_status_ingested_idx (system, status, last_ingested_data)
);
-CREATE TABLE IF NOT EXISTS Commits (
+CREATE TABLE IF NOT EXISTS CommitsWithData (
commit_id STRING PRIMARY KEY,
- tile_id INT4 NOT NULL,
- git_hash STRING NOT NULL,
- commit_time TIMESTAMP WITH TIME ZONE NOT NULL,
- author_email STRING NOT NULL,
- subject STRING NOT NULL,
- has_data BOOL NOT NULL
+ tile_id INT4 NOT NULL
);
CREATE TABLE IF NOT EXISTS DiffMetrics (
left_digest BYTES,
@@ -55,6 +50,14 @@
expectation_record_id UUID,
PRIMARY KEY (grouping_id, digest)
);
+CREATE TABLE IF NOT EXISTS GitCommits (
+ git_hash STRING PRIMARY KEY,
+ commit_id STRING,
+ commit_time TIMESTAMP WITH TIME ZONE NOT NULL,
+ author_email STRING NOT NULL,
+ subject STRING NOT NULL,
+ INDEX commit_idx (commit_id)
+);
CREATE TABLE IF NOT EXISTS Groupings (
grouping_id BYTES PRIMARY KEY,
keys JSONB NOT NULL
diff --git a/golden/go/sql/schema/tables.go b/golden/go/sql/schema/tables.go
index 4a9734d..071a41d 100644
--- a/golden/go/sql/schema/tables.go
+++ b/golden/go/sql/schema/tables.go
@@ -80,11 +80,12 @@
//go:generate go run ../exporter/tosql --output_file sql.go --logtostderr --output_pkg schema
type Tables struct {
Changelists []ChangelistRow `sql_backup:"weekly"`
- Commits []CommitRow `sql_backup:"daily"`
+ CommitsWithData []CommitWithDataRow `sql_backup:"daily"`
DiffMetrics []DiffMetricRow `sql_backup:"monthly"`
ExpectationDeltas []ExpectationDeltaRow `sql_backup:"daily"`
ExpectationRecords []ExpectationRecordRow `sql_backup:"daily"`
Expectations []ExpectationRow `sql_backup:"daily"`
+ GitCommits []GitCommitRow `sql_backup:"daily"`
Groupings []GroupingRow `sql_backup:"monthly"`
IgnoreRules []IgnoreRuleRow `sql_backup:"daily"`
Options []OptionsRow `sql_backup:"monthly"`
@@ -142,7 +143,9 @@
[]interface{}{r.Shard, r.TraceID, r.CommitID, r.Digest, r.GroupingID, r.OptionsID, r.SourceFileID}
}
-type CommitRow struct {
+// CommitWithDataRow represents a commit that has produced some data on the primary branch.
+// It is expected to be created during ingestion.
+type CommitWithDataRow struct {
// CommitID is a potentially arbitrary string. commit_ids will be treated as occurring in
// lexicographical order.
CommitID CommitID `sql:"commit_id STRING PRIMARY KEY"`
@@ -152,28 +155,35 @@
// It is expected that tile_id be set the first time we see data from a given commit on the
// primary branch and not changed after, even if the tile size used for an instance changes.
TileID TileID `sql:"tile_id INT4 NOT NULL"`
+}
- // TODO(kjlubick) split the git stuff into a GitCommits table.
+// ToSQLRow implements the sqltest.SQLExporter interface.
+func (r CommitWithDataRow) ToSQLRow() (colNames []string, colData []interface{}) {
+ return []string{"commit_id", "tile_id"},
+ []interface{}{r.CommitID, r.TileID}
+}
+// GitCommitRow represents a git commit that we may or may not have seen data for.
+type GitCommitRow struct {
// GitHash is the git hash of the commit.
- GitHash string `sql:"git_hash STRING NOT NULL"`
+ GitHash string `sql:"git_hash STRING PRIMARY KEY"`
+ // CommitID is a potentially arbitrary string. If non-null, it is a foreign key in the
+ // CommitsWithData table.
+ CommitID *CommitID `sql:"commit_id STRING"`
// CommitTime is the timestamp associated with the commit.
CommitTime time.Time `sql:"commit_time TIMESTAMP WITH TIME ZONE NOT NULL"`
// AuthorEmail is the email address associated with the author.
AuthorEmail string `sql:"author_email STRING NOT NULL"`
// Subject is the subject line of the commit.
Subject string `sql:"subject STRING NOT NULL"`
- // HasData is set the first time data lands on the primary branch for this commit number. We
- // use this to determine the dense tile of data. Previously, we had tried to determine this
- // with a DISTINCT search over TraceValues, but that takes several minutes when there are
- // 1M+ traces per commit.
- HasData bool `sql:"has_data BOOL NOT NULL"`
+
+ commitIDIndex struct{} `sql:"INDEX commit_idx (commit_id)"`
}
// ToSQLRow implements the sqltest.SQLExporter interface.
-func (r CommitRow) ToSQLRow() (colNames []string, colData []interface{}) {
- return []string{"commit_id", "tile_id", "git_hash", "commit_time", "author_email", "subject", "has_data"},
- []interface{}{r.CommitID, r.TileID, r.GitHash, r.CommitTime, r.AuthorEmail, r.Subject, r.HasData}
+func (r GitCommitRow) ToSQLRow() (colNames []string, colData []interface{}) {
+ return []string{"git_hash", "commit_id", "commit_time", "author_email", "subject"},
+ []interface{}{r.GitHash, r.CommitID, r.CommitTime, r.AuthorEmail, r.Subject}
}
type TraceRow struct {