| package schema |
| |
| import ( |
| "crypto/md5" |
| "time" |
| |
| "go.skia.org/infra/golden/go/types" |
| |
| "github.com/google/uuid" |
| "github.com/jackc/pgtype" |
| |
| "go.skia.org/infra/go/paramtools" |
| "go.skia.org/infra/go/skerr" |
| "go.skia.org/infra/golden/go/expectations" |
| ) |
| |
| // MD5Hash is a specialized type for an array of bytes representing an MD5Hash. We use MD5 hashes |
| // a lot because they are a deterministic way to generate primary keys, which allows us to more |
| // easily cache and deduplicate data when ingesting. Additionally, these hashes are somewhat |
| // space-efficient (compared to UUIDs) and allow us to not have too big of data when |
| // cross-referencing other tables. |
| type MD5Hash [md5.Size]byte |
| |
| // TraceID and the other related types are declared as byte slices instead of MD5Hash because |
| // 1. we want to avoid copying around the the array data every time (reminder: in Golang, arrays |
| // are passed by value); and |
| // 2. passing arrays into the pgx driver is a little awkward (it only accepts slices so we would |
| // have to turn the arrays into slices before passing in and if we forget, it's a runtime error). |
| type TraceID []byte |
| type GroupingID []byte |
| type OptionsID []byte |
| type DigestBytes []byte |
| type SourceFileID []byte |
| |
| // CommitID is responsible for indicating when a commit happens in time. CommitIDs should be |
| // treated as ordered lexicographically, but need not be densely populated. |
| type CommitID string |
| |
| // TileID is a mechanism for batching together data to speed up queries. A tile will consist of a |
| // configurable number of commits with data. |
| type TileID int |
| |
| type NullableBool int |
| |
| // ToSQL returns the nullable value as a type compatible with SQL backends. |
| func (b NullableBool) ToSQL() *bool { |
| var rv bool |
| switch b { |
| case NBNull: |
| return nil |
| case NBFalse: |
| rv = false |
| case NBTrue: |
| rv = true |
| default: |
| return nil |
| } |
| return &rv |
| } |
| |
| func toNullableBool(matches pgtype.Bool) NullableBool { |
| if matches.Status == pgtype.Present { |
| if matches.Bool { |
| return NBTrue |
| } else { |
| return NBFalse |
| } |
| } |
| return NBNull |
| } |
| |
| const ( |
| NBNull NullableBool = 0 |
| NBFalse NullableBool = 1 |
| NBTrue NullableBool = 2 |
| ) |
| |
| type ExpectationLabel string |
| |
| const ( |
| LabelUntriaged ExpectationLabel = "u" |
| LabelPositive ExpectationLabel = "p" |
| LabelNegative ExpectationLabel = "n" |
| ) |
| |
| func (e ExpectationLabel) ToExpectation() expectations.Label { |
| switch e { |
| case LabelPositive: |
| return expectations.Positive |
| case LabelNegative: |
| return expectations.Negative |
| case LabelUntriaged: |
| return expectations.Untriaged |
| } |
| return expectations.Untriaged |
| } |
| |
| func FromExpectationLabel(label expectations.Label) ExpectationLabel { |
| switch label { |
| case expectations.Positive: |
| return LabelPositive |
| case expectations.Negative: |
| return LabelNegative |
| case expectations.Untriaged: |
| return LabelUntriaged |
| } |
| return LabelUntriaged |
| } |
| |
| type ChangelistStatus string |
| |
| const ( |
| StatusOpen ChangelistStatus = "open" |
| StatusAbandoned ChangelistStatus = "abandoned" |
| StatusLanded ChangelistStatus = "landed" |
| ) |
| |
| // Tables represents all SQL tables used by Gold. We define them as Go structs so that we can |
| // more easily generate test data (see sql/databuilder). With the following command, the struct |
| // is turned into an actual SQL statement. |
| // Note: If a table is added/removed/renamed, it is important to re-run //golden/cmd/sqlinit |
| // for all instances to make sure the backup schedules continue to work (they will fail if a table |
| // is missing or silently not backup new tables). |
| // |
| //go:generate bazelisk run --config=mayberemote //:go -- run ../exporter/tosql --output_file sql.go --output_pkg schema |
| type Tables struct { |
| Changelists []ChangelistRow `sql_backup:"weekly"` |
| CommitsWithData []CommitWithDataRow `sql_backup:"daily"` |
| DiffMetrics []DiffMetricRow `sql_backup:"monthly"` |
| ExpectationDeltas []ExpectationDeltaRow `sql_backup:"daily"` |
| ExpectationRecords []ExpectationRecordRow `sql_backup:"daily"` |
| Expectations []ExpectationRow `sql_backup:"daily"` |
| GitCommits []GitCommitRow `sql_backup:"daily"` |
| Groupings []GroupingRow `sql_backup:"monthly"` |
| IgnoreRules []IgnoreRuleRow `sql_backup:"daily"` |
| MetadataCommits []MetadataCommitRow `sql_backup:"daily"` |
| Options []OptionsRow `sql_backup:"monthly"` |
| Patchsets []PatchsetRow `sql_backup:"weekly"` |
| PrimaryBranchDiffCalculationWork []PrimaryBranchDiffCalculationRow `sql_backup:"none"` |
| PrimaryBranchParams []PrimaryBranchParamRow `sql_backup:"monthly"` |
| ProblemImages []ProblemImageRow `sql_backup:"none"` |
| SecondaryBranchDiffCalculationWork []SecondaryBranchDiffCalculationRow `sql_backup:"none"` |
| SecondaryBranchExpectations []SecondaryBranchExpectationRow `sql_backup:"daily"` |
| SecondaryBranchParams []SecondaryBranchParamRow `sql_backup:"monthly"` |
| SecondaryBranchValues []SecondaryBranchValueRow `sql_backup:"monthly"` |
| SourceFiles []SourceFileRow `sql_backup:"monthly"` |
| TiledTraceDigests []TiledTraceDigestRow `sql_backup:"monthly"` |
| TraceValues []TraceValueRow `sql_backup:"monthly"` |
| Traces []TraceRow `sql_backup:"monthly"` |
| TrackingCommits []TrackingCommitRow `sql_backup:"daily"` |
| Tryjobs []TryjobRow `sql_backup:"weekly"` |
| ValuesAtHead []ValueAtHeadRow `sql_backup:"monthly"` |
| |
| // DeprecatedIngestedFiles allows us to keep track of files ingested with the old FS/BT ways |
| // until all the SQL ingestion is ready. |
| DeprecatedIngestedFiles []DeprecatedIngestedFileRow `sql_backup:"daily"` |
| DeprecatedExpectationUndos []DeprecatedExpectationUndoRow `sql_backup:"daily"` |
| } |
| |
| type TraceValueRow struct { |
| // Shard is a small piece of the trace id to slightly break up trace data. TODO(kjlubick) could |
| // this be a computed column? |
| Shard byte `sql:"shard INT2"` |
| // TraceID is the MD5 hash of the keys and values describing how this data point (i.e. the |
| // Digest) was drawn. This is a foreign key into the Traces table. |
| TraceID TraceID `sql:"trace_id BYTES"` |
| // CommitID represents when in time this data was drawn. This is a foreign key into the |
| // CommitIDs table. |
| CommitID CommitID `sql:"commit_id STRING"` |
| // Digest is the MD5 hash of the pixel data; this is "what was drawn" at this point in time |
| // (specified by CommitID) and by the machine (specified by TraceID). |
| Digest DigestBytes `sql:"digest BYTES NOT NULL"` |
| // GroupingID is the MD5 hash of the key/values belonging to the grouping (e.g. corpus + |
| // test name). If the grouping changes, this would require changing the entire column here and |
| // in several other tables. In theory, changing the grouping should be done very rarely. This |
| // is a foreign key into the Groupings table. |
| GroupingID GroupingID `sql:"grouping_id BYTES NOT NULL"` |
| // OptionsID is the MD5 hash of the key/values that belong to the options. Options do not impact |
| // the TraceID and thus act as metadata. This is a foreign key into the Options table. |
| OptionsID OptionsID `sql:"options_id BYTES NOT NULL"` |
| // SourceFileID is the MD5 hash of the source file that produced this data point. This is a |
| // foreign key into the SourceFiles table. |
| SourceFileID SourceFileID `sql:"source_file_id BYTES NOT NULL"` |
| // By creating the primary key using the shard and the commit_id, we give some data locality |
| // to data from the same trace, but in different commits w/o overloading a single range (if |
| // commit_id were first) and w/o spreading our data too thin (if trace_id were first). |
| primaryKey struct{} `sql:"PRIMARY KEY (shard, commit_id, trace_id)"` |
| |
| traceCommitIndex struct{} `sql:"INDEX trace_commit_idx (trace_id, commit_id) STORING (digest, options_id, grouping_id)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r TraceValueRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"shard", "trace_id", "commit_id", "digest", "grouping_id", "options_id", "source_file_id"}, |
| []interface{}{r.Shard, r.TraceID, r.CommitID, r.Digest, r.GroupingID, r.OptionsID, r.SourceFileID} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *TraceValueRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.Shard, &r.TraceID, &r.CommitID, &r.Digest, &r.GroupingID, |
| &r.OptionsID, &r.SourceFileID) |
| } |
| |
| // CommitWithDataRow represents a commit that has produced some data on the primary branch. |
| // It is expected to be created during ingestion. |
| type CommitWithDataRow struct { |
| // CommitID is a potentially arbitrary string. commit_ids will be treated as occurring in |
| // lexicographical order. |
| CommitID CommitID `sql:"commit_id STRING PRIMARY KEY"` |
| // TileID is an integer that corresponds to the tile for which this commit belongs. Tiles are |
| // intended to be about 100 commits wide, but this could vary slightly if commits are ingested |
| // in not quite sequential order. Additionally, tile widths could change over time if necessary. |
| // It is expected that tile_id be set the first time we see data from a given commit on the |
| // primary branch and not changed after, even if the tile size used for an instance changes. |
| TileID TileID `sql:"tile_id INT4 NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r CommitWithDataRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"commit_id", "tile_id"}, |
| []interface{}{r.CommitID, r.TileID} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *CommitWithDataRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.CommitID, &r.TileID) |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface to sort commits by CommitID. |
| func (r CommitWithDataRow) RowsOrderBy() string { |
| return `ORDER BY commit_id ASC` |
| } |
| |
| // GitCommitRow represents a git commit that we may or may not have seen data for. |
| type GitCommitRow struct { |
| // GitHash is the git hash of the commit. |
| GitHash string `sql:"git_hash STRING PRIMARY KEY"` |
| // CommitID is a potentially arbitrary string. It is a foreign key in the CommitsWithData table. |
| CommitID CommitID `sql:"commit_id STRING NOT NULL"` |
| // CommitTime is the timestamp associated with the commit. |
| CommitTime time.Time `sql:"commit_time TIMESTAMP WITH TIME ZONE NOT NULL"` |
| // AuthorEmail is the email address associated with the author. |
| AuthorEmail string `sql:"author_email STRING NOT NULL"` |
| // Subject is the subject line of the commit. |
| Subject string `sql:"subject STRING NOT NULL"` |
| |
| commitIDIndex struct{} `sql:"INDEX commit_idx (commit_id)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r GitCommitRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"git_hash", "commit_id", "commit_time", "author_email", "subject"}, |
| []interface{}{r.GitHash, r.CommitID, r.CommitTime, r.AuthorEmail, r.Subject} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *GitCommitRow) ScanFrom(scan func(...interface{}) error) error { |
| if err := scan(&r.GitHash, &r.CommitID, &r.CommitTime, &r.AuthorEmail, &r.Subject); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.CommitTime = r.CommitTime.UTC() |
| return nil |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface to sort commits by CommitID. |
| func (r GitCommitRow) RowsOrderBy() string { |
| return `ORDER BY commit_id DESC` |
| } |
| |
| type TraceRow struct { |
| // TraceID is the MD5 hash of the keys field. |
| TraceID TraceID `sql:"trace_id BYTES PRIMARY KEY"` |
| // Corpus is the value associated with the "source_type" key. It is its own field for easier |
| // searches and joins. |
| Corpus string `sql:"corpus STRING AS (keys->>'source_type') STORED NOT NULL"` |
| // GroupingID is the MD5 hash of the subset of keys that make up the grouping. It is its own |
| // field for easier searches and joins. This is a foreign key into the Groupings table. |
| GroupingID GroupingID `sql:"grouping_id BYTES NOT NULL"` |
| // Keys is a JSON representation of a map[string]string. The keys and values of that |
| // map describe how a series of data points were created. We store this as a JSON map because |
| // CockroachDB supports searching for traces by key/values in this field. |
| Keys paramtools.Params `sql:"keys JSONB NOT NULL"` |
| // MatchesAnyIgnoreRule is true if this trace is matched by any of the ignore rules. If true, |
| // this trace will be ignored from most queries by default. This is stored here because |
| // recalculating it on the fly is too expensive and only needs updating if an ignore rule is |
| // changed. There is a background process that computes this field for any traces with this |
| // unset (i.e. NULL). |
| MatchesAnyIgnoreRule NullableBool `sql:"matches_any_ignore_rule BOOL"` |
| |
| // This index speeds up fetching traces by grouping, e.g. when enumerating the work needed for |
| // creating diffs. |
| groupingIgnoredIndex struct{} `sql:"INDEX grouping_ignored_idx (grouping_id, matches_any_ignore_rule)"` |
| // This index makes application of all ignore rules easier. |
| ignoredGroupingIndex struct{} `sql:"INDEX ignored_grouping_idx (matches_any_ignore_rule, grouping_id)"` |
| // This index makes querying by keys faster |
| keysIndex struct{} `sql:"INVERTED INDEX keys_idx (keys)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r TraceRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"trace_id", "grouping_id", "keys", "matches_any_ignore_rule"}, |
| []interface{}{r.TraceID, r.GroupingID, r.Keys, r.MatchesAnyIgnoreRule.ToSQL()} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *TraceRow) ScanFrom(scan func(...interface{}) error) error { |
| var matches pgtype.Bool |
| if err := scan(&r.TraceID, &r.Corpus, &r.GroupingID, &r.Keys, &matches); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.MatchesAnyIgnoreRule = toNullableBool(matches) |
| return nil |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface to sort traces by test name. |
| func (r TraceRow) RowsOrderBy() string { |
| return `ORDER BY keys->>'name'` |
| } |
| |
| type GroupingRow struct { |
| // GroupingID is the MD5 hash of the key/values belonging to the grouping, that is, the |
| // mechanism by which we partition our test data into "things that should all look the same". |
| // This is commonly corpus + test name, but could include things like the color_type (e.g. |
| // RGB vs greyscale). |
| GroupingID GroupingID `sql:"grouping_id BYTES PRIMARY KEY"` |
| // Keys is a JSON representation of a map[string]string. The keys and values of that |
| // map are the grouping. |
| Keys paramtools.Params `sql:"keys JSONB NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r GroupingRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"grouping_id", "keys"}, |
| []interface{}{r.GroupingID, r.Keys} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *GroupingRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.GroupingID, &r.Keys) |
| } |
| |
| type OptionsRow struct { |
| // OptionsID is the MD5 hash of the key/values that act as metadata and do not impact the |
| // uniqueness of traces. |
| OptionsID OptionsID `sql:"options_id BYTES PRIMARY KEY"` |
| // Keys is a JSON representation of a map[string]string. The keys and values of that |
| // map are the options. |
| Keys paramtools.Params `sql:"keys JSONB NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r OptionsRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"options_id", "keys"}, |
| []interface{}{r.OptionsID, r.Keys} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *OptionsRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.OptionsID, &r.Keys) |
| } |
| |
| type SourceFileRow struct { |
| // SourceFileID is the MD5 hash of the source file that has been ingested. |
| SourceFileID SourceFileID `sql:"source_file_id BYTES PRIMARY KEY"` |
| // SourceFile is the fully qualified name of the source file that was ingested, e.g. |
| // "gs://bucket/2020/01/02/03/15/foo.json" |
| SourceFile string `sql:"source_file STRING NOT NULL"` |
| // LastIngested is the time at which this file was most recently read in and successfully |
| // processed. |
| LastIngested time.Time `sql:"last_ingested TIMESTAMP WITH TIME ZONE NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r SourceFileRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"source_file_id", "source_file", "last_ingested"}, |
| []interface{}{r.SourceFileID, r.SourceFile, r.LastIngested} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *SourceFileRow) ScanFrom(scan func(...interface{}) error) error { |
| if err := scan(&r.SourceFileID, &r.SourceFile, &r.LastIngested); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.LastIngested = r.LastIngested.UTC() |
| return nil |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface. |
| func (r SourceFileRow) RowsOrderBy() string { |
| return "ORDER BY last_ingested ASC" |
| } |
| |
| type DeprecatedIngestedFileRow struct { |
| // SourceFileID is the MD5 hash of the source file that has been ingested. |
| SourceFileID SourceFileID `sql:"source_file_id BYTES PRIMARY KEY"` |
| // SourceFile is the fully qualified name of the source file that was ingested, e.g. |
| // "gs://bucket/2020/01/02/03/15/foo.json" |
| SourceFile string `sql:"source_file STRING NOT NULL"` |
| // LastIngested is the time at which this file was most recently read in and successfully |
| // processed. |
| LastIngested time.Time `sql:"last_ingested TIMESTAMP WITH TIME ZONE NOT NULL"` |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *DeprecatedIngestedFileRow) ScanFrom(scan func(...interface{}) error) error { |
| if err := scan(&r.SourceFileID, &r.SourceFile, &r.LastIngested); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.LastIngested = r.LastIngested.UTC() |
| return nil |
| } |
| |
| type ExpectationRecordRow struct { |
| // ExpectationRecordID is a unique ID for a triage event, which could impact one or more |
| // digests across one or more groupings. |
| ExpectationRecordID uuid.UUID `sql:"expectation_record_id UUID PRIMARY KEY DEFAULT gen_random_uuid()"` |
| // BranchName identifies to which branch the triage event happened. Can be nil for the |
| // primary branch. |
| BranchName *string `sql:"branch_name STRING"` |
| // UserName is the email address of the logged-on user who initiated the triage event. |
| UserName string `sql:"user_name STRING NOT NULL"` |
| // TriageTime is the time at which this event happened. |
| TriageTime time.Time `sql:"triage_time TIMESTAMP WITH TIME ZONE NOT NULL"` |
| // NumChanges is how many digests were affected. It corresponds to the number of |
| // ExpectationDelta rows have this record as their parent. It is a denormalized field. |
| NumChanges int `sql:"num_changes INT4 NOT NULL"` |
| branchTriagedIndex struct{} `sql:"INDEX branch_ts_idx (branch_name, triage_time)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r ExpectationRecordRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"expectation_record_id", "branch_name", "user_name", "triage_time", "num_changes"}, |
| []interface{}{r.ExpectationRecordID, r.BranchName, r.UserName, r.TriageTime, r.NumChanges} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *ExpectationRecordRow) ScanFrom(scan func(...interface{}) error) error { |
| err := scan(&r.ExpectationRecordID, &r.BranchName, &r.UserName, &r.TriageTime, &r.NumChanges) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| r.TriageTime = r.TriageTime.UTC() |
| return nil |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface, sorting rows to have the most recent |
| // records first, with ties broken by num_changes and user_name |
| func (r ExpectationRecordRow) RowsOrderBy() string { |
| return "ORDER BY triage_time DESC, num_changes DESC, user_name ASC" |
| } |
| |
| type ExpectationDeltaRow struct { |
| // ExpectationRecordID corresponds to the parent ExpectationRecordRow. |
| ExpectationRecordID uuid.UUID `sql:"expectation_record_id UUID"` |
| // GroupingID identifies the grouping that was triaged by this change. This is a foreign key |
| // into the Groupings table. |
| GroupingID GroupingID `sql:"grouping_id BYTES"` |
| // Digest is the MD5 hash of the pixel data. It identifies the image that was triaged in a |
| // given grouping. |
| Digest DigestBytes `sql:"digest BYTES"` |
| // LabelBefore is the label that was applied to this digest in this grouping before the |
| // parent expectation event happened. By storing this, we can undo that event in the future. |
| LabelBefore ExpectationLabel `sql:"label_before CHAR NOT NULL"` |
| // LabelAfter is the label that was applied as a result of the parent expectation event. |
| LabelAfter ExpectationLabel `sql:"label_after CHAR NOT NULL"` |
| // In any given expectation event, a single digest in a single grouping can only be affected |
| // once, so it makes sense to use a composite primary key here. Additionally, this gives the |
| // deltas good locality for a given record. |
| primaryKey struct{} `sql:"PRIMARY KEY (expectation_record_id, grouping_id, digest)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r ExpectationDeltaRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"expectation_record_id", "grouping_id", "digest", "label_before", "label_after"}, |
| []interface{}{r.ExpectationRecordID, r.GroupingID, r.Digest, r.LabelBefore, r.LabelAfter} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *ExpectationDeltaRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.ExpectationRecordID, &r.GroupingID, &r.Digest, &r.LabelBefore, &r.LabelAfter) |
| } |
| |
| // ExpectationRow contains an entry for every recent digest+grouping pair. This includes untriaged |
| // digests, because that allows us to have an index against label and just extract the untriaged |
| // ones instead of having to do a LEFT JOIN and look for nulls (which is slow at scale). |
| type ExpectationRow struct { |
| // GroupingID identifies the grouping to which the triaged digest belongs. This is a foreign key |
| // into the Groupings table. |
| GroupingID GroupingID `sql:"grouping_id BYTES"` |
| // Digest is the MD5 hash of the pixel data. It identifies the image that is currently triaged. |
| Digest DigestBytes `sql:"digest BYTES"` |
| // Label is the current label associated with the given digest in the given grouping. |
| Label ExpectationLabel `sql:"label CHAR NOT NULL"` |
| // ExpectationRecordID corresponds to most recent ExpectationRecordRow that set the given label. |
| ExpectationRecordID *uuid.UUID `sql:"expectation_record_id UUID"` |
| primaryKey struct{} `sql:"PRIMARY KEY (grouping_id, digest)"` |
| labelIndex struct{} `sql:"INDEX label_idx (label)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r ExpectationRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"grouping_id", "digest", "label", "expectation_record_id"}, |
| []interface{}{r.GroupingID, r.Digest, r.Label, r.ExpectationRecordID} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *ExpectationRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.GroupingID, &r.Digest, &r.Label, &r.ExpectationRecordID) |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface, sorting the rows first by digest, then |
| // by grouping id (which is a hash). |
| func (r ExpectationRow) RowsOrderBy() string { |
| return "ORDER BY digest, grouping_id ASC" |
| } |
| |
| // DiffMetricRow represents the pixel-by-pixel comparison between two images (identified by their |
| // digests). To avoid having n^2 comparisons (where n is the number of unique digests ever seen), |
| // we only calculate diffs against recent images that are in the same grouping. These rows don't |
| // contain the grouping information for the following reasons: 1) regardless of which grouping or |
| // groupings an image may have been generated in, the difference between any two images is the same; |
| // 2) images can be produced by multiple groupings. To make certain queries easier, data for a given |
| // image pairing is inserted twice - once with A being Left, B being Right and once with A being |
| // Right and B being Left. See diff.go for more about how these fields are computed. |
| type DiffMetricRow struct { |
| // LeftDigest represents one of the images compared. |
| LeftDigest DigestBytes `sql:"left_digest BYTES"` |
| // RightDigest represents the other image compared. |
| RightDigest DigestBytes `sql:"right_digest BYTES"` |
| // NumPixelsDiff represents the number of pixels that differ between the two images. |
| NumPixelsDiff int `sql:"num_pixels_diff INT4 NOT NULL"` |
| // PercentPixelsDiff is the percentage of pixels that are different. |
| PercentPixelsDiff float32 `sql:"percent_pixels_diff FLOAT4 NOT NULL"` |
| // MaxRGBADiffs is the maximum delta between the two images in the red, green, blue, and |
| // alpha channels. |
| MaxRGBADiffs [4]int `sql:"max_rgba_diffs INT2[] NOT NULL"` |
| // MaxChannelDiff is max(MaxRGBADiffs). This is its own field because using max() on an array |
| // field is not supported. TODO(kjlubick) could this be computed with array_upper()? |
| MaxChannelDiff int `sql:"max_channel_diff INT2 NOT NULL"` |
| // CombinedMetric is a value in [0, 10] that represents how large the diff is between two |
| // images. It is based off the MaxRGBADiffs and PixelDiffPercent. |
| CombinedMetric float32 `sql:"combined_metric FLOAT4 NOT NULL"` |
| // DimensionsDiffer is true if the dimensions between the two images are different. |
| DimensionsDiffer bool `sql:"dimensions_differ BOOL NOT NULL"` |
| // Timestamp represents when this metric was computed or verified (i.e. still in use). This |
| // allows for us to periodically clean up this large table. |
| Timestamp time.Time `sql:"ts TIMESTAMP WITH TIME ZONE NOT NULL"` |
| primaryKey struct{} `sql:"PRIMARY KEY (left_digest, right_digest)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r DiffMetricRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"left_digest", "right_digest", "num_pixels_diff", "percent_pixels_diff", "max_rgba_diffs", |
| "max_channel_diff", "combined_metric", "dimensions_differ", "ts"}, |
| []interface{}{r.LeftDigest, r.RightDigest, r.NumPixelsDiff, r.PercentPixelsDiff, r.MaxRGBADiffs, |
| r.MaxChannelDiff, r.CombinedMetric, r.DimensionsDiffer, r.Timestamp} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *DiffMetricRow) ScanFrom(scan func(...interface{}) error) error { |
| err := scan(&r.LeftDigest, &r.RightDigest, &r.NumPixelsDiff, &r.PercentPixelsDiff, |
| &r.MaxRGBADiffs, &r.MaxChannelDiff, &r.CombinedMetric, &r.DimensionsDiffer, &r.Timestamp) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| r.Timestamp = r.Timestamp.UTC() |
| return nil |
| } |
| |
| // ValueAtHeadRow represents the most recent data point for a each trace. It contains some |
| // denormalized data to reduce the number of joins needed to do some frequent queries. |
| type ValueAtHeadRow struct { |
| // TraceID is the MD5 hash of the keys and values describing how this data point (i.e. the |
| // Digest) was drawn. This is a foreign key into the Traces table. |
| TraceID TraceID `sql:"trace_id BYTES PRIMARY KEY"` |
| // MostRecentCommitID represents when in time this data was drawn. This is a foreign key into |
| // the CommitIDs table. |
| MostRecentCommitID CommitID `sql:"most_recent_commit_id STRING NOT NULL"` |
| // Digest is the MD5 hash of the pixel data; this is "what was drawn" at this point in time |
| // (specified by MostRecentCommitID) and by the machine (specified by TraceID). |
| Digest DigestBytes `sql:"digest BYTES NOT NULL"` |
| // OptionsID is the MD5 hash of the key/values that belong to the options. Options do not impact |
| // the TraceID and thus act as metadata. This is a foreign key into the Options table. |
| OptionsID OptionsID `sql:"options_id BYTES NOT NULL"` |
| // GroupingID is the MD5 hash of the key/values belonging to the grouping (e.g. corpus + |
| // test name). |
| |
| GroupingID GroupingID `sql:"grouping_id BYTES NOT NULL"` |
| // Corpus is the value associated with the "source_type" key. It is its own field for easier |
| // searches and joins. |
| Corpus string `sql:"corpus STRING AS (keys->>'source_type') STORED NOT NULL"` |
| // Keys is a JSON representation of a map[string]string that are the trace keys. |
| Keys paramtools.Params `sql:"keys JSONB NOT NULL"` |
| |
| // MatchesAnyIgnoreRule is true if this trace is matched by any of the ignore rules. |
| MatchesAnyIgnoreRule NullableBool `sql:"matches_any_ignore_rule BOOL"` |
| |
| // This index makes application of all ignore rules easier. |
| ignoredGroupingIndex struct{} `sql:"INDEX ignored_grouping_idx (matches_any_ignore_rule, grouping_id)"` |
| // This index makes searching for recent untriaged digests faster. The STORING clause is |
| // important to not have to do a lookup after finding the item in the index. |
| corpusCommitIgnoreIndex struct{} `sql:"INDEX corpus_commit_ignore_idx (corpus, most_recent_commit_id, matches_any_ignore_rule) STORING (grouping_id, digest)"` |
| // This index makes querying by keys faster |
| keysIndex struct{} `sql:"INVERTED INDEX keys_idx (keys)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r ValueAtHeadRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"trace_id", "most_recent_commit_id", "digest", "options_id", "grouping_id", |
| "keys", "matches_any_ignore_rule"}, |
| []interface{}{r.TraceID, r.MostRecentCommitID, r.Digest, r.OptionsID, r.GroupingID, |
| r.Keys, r.MatchesAnyIgnoreRule.ToSQL()} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *ValueAtHeadRow) ScanFrom(scan func(...interface{}) error) error { |
| var matches pgtype.Bool |
| err := scan(&r.TraceID, &r.MostRecentCommitID, &r.Digest, &r.OptionsID, |
| &r.GroupingID, &r.Corpus, &r.Keys, &matches) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| r.MatchesAnyIgnoreRule = toNullableBool(matches) |
| return nil |
| } |
| |
| // PrimaryBranchParamRow corresponds to a given key/value pair that was seen within a range (tile) |
| // of commits. Originally, we had done a join between Traces and TraceValues to find the params |
| // where commit_id was in a given range. However, this took several minutes when there were 1M+ |
| // traces per commit. This table is effectively an index for getting just that data. Note, this |
| // contains Keys. |
| // TODO(kjlubick) Add another table to hold Options, so we can distinguish between the two |
| // |
| // and properly search by them. |
| type PrimaryBranchParamRow struct { |
| // TileID indicates which tile the given Key and Value were seen on in the primary branch. |
| // This is a foreign key into the Commits table. |
| TileID TileID `sql:"tile_id INT4"` |
| // Key is the key of a trace key or option. |
| Key string `sql:"key STRING"` |
| // Value is the value associated with the key. |
| Value string `sql:"value STRING"` |
| // We generally want locality by tile, so that goes first in the primary key. |
| primaryKey struct{} `sql:"PRIMARY KEY (tile_id, key, value)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r PrimaryBranchParamRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"tile_id", "key", "value"}, |
| []interface{}{r.TileID, r.Key, r.Value} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *PrimaryBranchParamRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.TileID, &r.Key, &r.Value) |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface. |
| func (r PrimaryBranchParamRow) RowsOrderBy() string { |
| return `ORDER BY tile_id, key ASC` |
| } |
| |
| // TiledTraceDigestRow corresponds to a given trace producing a given digest within a range (tile) |
| // of commits. Originally, we did a SELECT DISTINCT over TraceValues, but that was too slow for |
| // many queries when the number of TraceValues was high. |
| type TiledTraceDigestRow struct { |
| // TraceID is the MD5 hash of the keys and values describing how this data point (i.e. the |
| // Digest) was drawn. This is a foreign key into the Traces table. |
| TraceID TraceID `sql:"trace_id BYTES"` |
| // TileID represents the tile for this row. |
| TileID TileID `sql:"tile_id INT4"` |
| // Digest is the MD5 hash of the pixel data; this is "what was drawn" at least once in the tile |
| // specified by StartCommitID and by the machine (specified by TraceID). |
| Digest DigestBytes `sql:"digest BYTES NOT NULL"` |
| // GroupingID is the grouping of the trace. |
| GroupingID GroupingID `sql:"grouping_id BYTES NOT NULL"` |
| // We generally want locality by TraceID, so that goes first in the primary key. |
| primaryKey struct{} `sql:"PRIMARY KEY (trace_id, tile_id, digest)"` |
| |
| // This index makes it easier to answer the question "What digests are being produced by |
| // a given grouping on the primary branch). |
| groupingDigestIndex struct{} `sql:"INDEX grouping_digest_idx (grouping_id, digest)"` |
| tileTraceIndex struct{} `sql:"INDEX tile_trace_idx (tile_id, trace_id)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r TiledTraceDigestRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"trace_id", "tile_id", "digest", "grouping_id"}, |
| []interface{}{r.TraceID, r.TileID, r.Digest, r.GroupingID} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *TiledTraceDigestRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.TraceID, &r.TileID, &r.Digest, &r.GroupingID) |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface. |
| func (r TiledTraceDigestRow) RowsOrderBy() string { |
| return `ORDER BY tile_id, digest ASC` |
| } |
| |
| type IgnoreRuleRow struct { |
| // IgnoreRuleID is the id for this rule. |
| IgnoreRuleID uuid.UUID `sql:"ignore_rule_id UUID PRIMARY KEY DEFAULT gen_random_uuid()"` |
| // CreatorEmail is the email address of the user who originally created this rule. |
| CreatorEmail string `sql:"creator_email STRING NOT NULL"` |
| // UpdatedEmail is the email address of the user who most recently updated this rule. |
| UpdatedEmail string `sql:"updated_email STRING NOT NULL"` |
| // Expires represents when this rule should be re-evaluated for validity. |
| Expires time.Time `sql:"expires TIMESTAMP WITH TIME ZONE NOT NULL"` |
| // Note is a comment explaining this rule. It typically links to a bug. |
| Note string `sql:"note STRING"` |
| // Query is a map[string][]string that describe which traces should be ignored. |
| // Note that this can only apply to trace keys, not options. |
| Query paramtools.ReadOnlyParamSet `sql:"query JSONB NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r IgnoreRuleRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"ignore_rule_id", "creator_email", "updated_email", "expires", "note", "query"}, |
| []interface{}{r.IgnoreRuleID, r.CreatorEmail, r.UpdatedEmail, r.Expires, r.Note, r.Query} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *IgnoreRuleRow) ScanFrom(scan func(...interface{}) error) error { |
| if err := scan(&r.IgnoreRuleID, &r.CreatorEmail, &r.UpdatedEmail, &r.Expires, &r.Note, &r.Query); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.Expires = r.Expires.UTC() |
| paramtools.ParamSet(r.Query).Normalize() |
| return nil |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface. |
| func (r IgnoreRuleRow) RowsOrderBy() string { |
| return `ORDER BY expires ASC` |
| } |
| |
| type ChangelistRow struct { |
| // ChangelistID is the fully qualified id of this changelist. "Fully qualified" means it has |
| // the system as a prefix (e.g "gerrit_1234") which simplifies joining logic and ensures |
| // uniqueness |
| ChangelistID string `sql:"changelist_id STRING PRIMARY KEY"` |
| // System is the Code Review System to which this changelist belongs. |
| System string `sql:"system STRING NOT NULL"` |
| // Status indicates if this CL is open or not. |
| Status ChangelistStatus `sql:"status STRING NOT NULL"` |
| // OwnerEmail is the email address of the CL's owner. |
| OwnerEmail string `sql:"owner_email STRING NOT NULL"` |
| // Subject is the first line of the CL's commit message (usually). |
| Subject string `sql:"subject STRING NOT NULL"` |
| // LastIngestedData indicates when Gold last saw data for this CL. |
| LastIngestedData time.Time `sql:"last_ingested_data TIMESTAMP WITH TIME ZONE NOT NULL"` |
| |
| // This index helps query for recently updated, open CLs. Keep an eye on this index, as it could |
| // lead to hotspotting: https://www.cockroachlabs.com/docs/v20.2/indexes.html#indexing-columns |
| systemStatusIngestedIndex struct{} `sql:"INDEX system_status_ingested_idx (system, status, last_ingested_data)"` |
| // This index helps with listing CLs. |
| statusIngestedIndex struct{} `sql:"INDEX status_ingested_idx (status, last_ingested_data DESC)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r ChangelistRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"changelist_id", "system", "status", "owner_email", "subject", "last_ingested_data"}, |
| []interface{}{r.ChangelistID, r.System, r.Status, r.OwnerEmail, r.Subject, r.LastIngestedData} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *ChangelistRow) ScanFrom(scan func(...interface{}) error) error { |
| if err := scan(&r.ChangelistID, &r.System, &r.Status, &r.OwnerEmail, &r.Subject, &r.LastIngestedData); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.LastIngestedData = r.LastIngestedData.UTC() |
| return nil |
| } |
| |
| type PatchsetRow struct { |
| // PatchsetID is the fully qualified id of this patchset. "Fully qualified" means it has |
| // the system as a prefix (e.g "gerrit_abcde") which simplifies joining logic and ensures |
| // uniqueness. |
| PatchsetID string `sql:"patchset_id STRING PRIMARY KEY"` |
| // System is the Code Review System to which this patchset belongs. |
| System string `sql:"system STRING NOT NULL"` |
| // ChangelistID refers to the parent CL. |
| ChangelistID string `sql:"changelist_id STRING NOT NULL REFERENCES Changelists (changelist_id)"` |
| // Order is a 1 indexed number telling us where this PS fits in time. |
| Order int `sql:"ps_order INT2 NOT NULL"` |
| // GitHash is the hash associated with the patchset. For many CRS, it is the same as the |
| // unqualified PatchsetID. |
| GitHash string `sql:"git_hash STRING NOT NULL"` |
| // CommentedOnCL keeps track of if Gold has commented on the CL indicating there are digests |
| // that need human attention (e.g. there are non-flaky, untriaged, and unignored digests). |
| // We should comment on a CL at most once per Patchset. |
| CommentedOnCL bool `sql:"commented_on_cl BOOL NOT NULL"` |
| // Created is the timestamp that this Patchset was created. It is used to determine |
| // "most recent" Patchset and is provided by the CodeReviewSystem. It can be null for data that |
| // was ingested before we decided to add this (skbug.com/12093) |
| Created time.Time `sql:"created_ts TIMESTAMP WITH TIME ZONE"` |
| |
| clOrderIndex struct{} `sql:"INDEX cl_order_idx (changelist_id, ps_order)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r PatchsetRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| var created *time.Time |
| if !r.Created.IsZero() { |
| created = &r.Created |
| } |
| return []string{"patchset_id", "system", "changelist_id", "ps_order", "git_hash", |
| "commented_on_cl", "created_ts"}, |
| []interface{}{r.PatchsetID, r.System, r.ChangelistID, r.Order, r.GitHash, |
| r.CommentedOnCL, created} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *PatchsetRow) ScanFrom(scan func(...interface{}) error) error { |
| var created pgtype.Timestamptz |
| err := scan(&r.PatchsetID, &r.System, &r.ChangelistID, &r.Order, &r.GitHash, |
| &r.CommentedOnCL, &created) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| if created.Status == pgtype.Present { |
| r.Created = created.Time.UTC() |
| } |
| return nil |
| } |
| |
| type TryjobRow struct { |
| // TryjobID is the fully qualified id of this tryjob. "Fully qualified" means it has the system |
| // as a prefix (e.g "buildbucket_1234") which simplifies joining logic and ensures uniqueness. |
| TryjobID string `sql:"tryjob_id STRING PRIMARY KEY"` |
| // System is the Continuous Integration System to which this tryjob belongs. |
| System string `sql:"system STRING NOT NULL"` |
| // ChangelistID refers to the CL for which this Tryjob produced data. |
| ChangelistID string `sql:"changelist_id STRING NOT NULL REFERENCES Changelists (changelist_id)"` |
| // PatchsetID refers to the PS for which this Tryjob produced data. |
| PatchsetID string `sql:"patchset_id STRING NOT NULL REFERENCES Patchsets (patchset_id)"` |
| // DisplayName is a human readable name for this Tryjob. |
| DisplayName string `sql:"display_name STRING NOT NULL"` |
| // LastIngestedData indicates when Gold last saw data from this Tryjob. |
| LastIngestedData time.Time `sql:"last_ingested_data TIMESTAMP WITH TIME ZONE NOT NULL"` |
| |
| clOrderIndex struct{} `sql:"INDEX cl_idx (changelist_id)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r TryjobRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"tryjob_id", "system", "changelist_id", "patchset_id", "display_name", "last_ingested_data"}, |
| []interface{}{r.TryjobID, r.System, r.ChangelistID, r.PatchsetID, r.DisplayName, r.LastIngestedData} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *TryjobRow) ScanFrom(scan func(...interface{}) error) error { |
| err := scan(&r.TryjobID, &r.System, &r.ChangelistID, &r.PatchsetID, &r.DisplayName, &r.LastIngestedData) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| r.LastIngestedData = r.LastIngestedData.UTC() |
| return nil |
| } |
| |
| // SecondaryBranchValueRow corresponds to a data point produced by a changelist or on a branch. |
| type SecondaryBranchValueRow struct { |
| // BranchName is a something like "gerrit_12345" or "chrome_m86" to identify the branch. |
| BranchName string `sql:"branch_name STRING"` |
| // VersionName is something like the patchset id or a branch commit hash to identify when |
| // along the branch the data happened. |
| VersionName string `sql:"version_name STRING"` |
| // TraceID is the MD5 hash of the keys and values describing how this data point (i.e. the |
| // Digest) was drawn. This is a foreign key into the Traces table. |
| TraceID TraceID `sql:"secondary_branch_trace_id BYTES"` |
| // Digest is the MD5 hash of the pixel data; this is "what was drawn" at this point in time |
| // (specified by CommitID) and by the machine (specified by TraceID). |
| Digest DigestBytes `sql:"digest BYTES NOT NULL"` |
| // GroupingID is the MD5 hash of the key/values belonging to the grouping (e.g. corpus + |
| // test name). If the grouping changes, this would require changing the entire column here and |
| // in several other tables. In theory, changing the grouping should be done very rarely. This |
| // is a foreign key into the Groupings table. |
| GroupingID GroupingID `sql:"grouping_id BYTES NOT NULL"` |
| // OptionsID is the MD5 hash of the key/values that belong to the options. Options do not impact |
| // the TraceID and thus act as metadata. This is a foreign key into the Options table. |
| OptionsID OptionsID `sql:"options_id BYTES NOT NULL"` |
| // SourceFileID is the MD5 hash of the source file that produced this data point. This is a |
| // foreign key into the SourceFiles table. |
| SourceFileID SourceFileID `sql:"source_file_id BYTES NOT NULL"` |
| // TryjobID corresponds to the latest tryjob (if any) that produced this data. When/if we |
| // support branches, this may be null, e.g. data coming from chrome_m86. |
| TryjobID string `sql:"tryjob_id string"` |
| // We include source_file_id as part of the primary key in order to support multiple datapoints |
| // per patchset for the same trace. See https://skbug.com/12149. |
| primaryKey struct{} `sql:"PRIMARY KEY (branch_name, version_name, secondary_branch_trace_id, source_file_id)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r SecondaryBranchValueRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"branch_name", "version_name", "secondary_branch_trace_id", "digest", "grouping_id", |
| "options_id", "source_file_id", "tryjob_id"}, |
| []interface{}{r.BranchName, r.VersionName, r.TraceID, r.Digest, r.GroupingID, |
| r.OptionsID, r.SourceFileID, r.TryjobID} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *SecondaryBranchValueRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.BranchName, &r.VersionName, &r.TraceID, &r.Digest, &r.GroupingID, |
| &r.OptionsID, &r.SourceFileID, &r.TryjobID) |
| } |
| |
| // SecondaryBranchParamRow corresponds to a given key/value pair that was seen in data from a |
| // specific patchset or commit on a branch. |
| type SecondaryBranchParamRow struct { |
| // BranchName is a something like "gerrit_12345" or "chrome_m86" to identify the branch. |
| BranchName string `sql:"branch_name STRING"` |
| // VersionName is something like the patchset id or a branch commit hash to identify when |
| // along the branch the data happened. |
| VersionName string `sql:"version_name STRING"` |
| // Key is the key of a trace key or option. |
| Key string `sql:"key STRING"` |
| // Value is the value associated with the key. |
| Value string `sql:"value STRING"` |
| // We generally want locality by branch_name, so that goes first in the primary key. |
| primaryKey struct{} `sql:"PRIMARY KEY (branch_name, version_name, key, value)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r SecondaryBranchParamRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"branch_name", "version_name", "key", "value"}, |
| []interface{}{r.BranchName, r.VersionName, r.Key, r.Value} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *SecondaryBranchParamRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.BranchName, &r.VersionName, &r.Key, &r.Value) |
| } |
| |
| // SecondaryBranchExpectationRow responds to a new expectation rule applying to a single Changelist. |
| // We save expectations per Changelist to avoid the extra effort having having to re-triage |
| // everything if a new Patchset was updated that fixes something slightly. |
| type SecondaryBranchExpectationRow struct { |
| // BranchName is a something like "gerrit_12345" or "chrome_m86" to identify the branch. |
| BranchName string `sql:"branch_name STRING"` |
| // GroupingID identifies the grouping to which the triaged digest belongs. This is a foreign key |
| // into the Groupings table. |
| GroupingID GroupingID `sql:"grouping_id BYTES"` |
| // Digest is the MD5 hash of the pixel data. It identifies the image that is currently triaged. |
| Digest DigestBytes `sql:"digest BYTES"` |
| // Label is the current label associated with the given digest in the given grouping. |
| Label ExpectationLabel `sql:"label CHAR NOT NULL"` |
| // ExpectationRecordID corresponds to most recent ExpectationRecordRow that set the given label. |
| // Unlike the primary branch, this can never be nil/null because we only keep track of |
| // secondary branch expectations for triaged events. |
| ExpectationRecordID uuid.UUID `sql:"expectation_record_id UUID NOT NULL"` |
| primaryKey struct{} `sql:"PRIMARY KEY (branch_name, grouping_id, digest)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r SecondaryBranchExpectationRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"branch_name", "grouping_id", "digest", "label", "expectation_record_id"}, |
| []interface{}{r.BranchName, r.GroupingID, r.Digest, string(r.Label), r.ExpectationRecordID} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *SecondaryBranchExpectationRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.BranchName, &r.GroupingID, &r.Digest, &r.Label, &r.ExpectationRecordID) |
| } |
| |
| type ProblemImageRow struct { |
| // Digest is the identifier of an image we had a hard time downloading or decoding while |
| // computing the diffs. This is a string because it may be a malformed digest. |
| Digest string `sql:"digest STRING PRIMARY KEY"` |
| // NumErrors counts the number of times this digest has been the cause of an error. |
| NumErrors int `sql:"num_errors INT2 NOT NULL"` |
| // LatestError is the string content of the last error associated with this digest. |
| LatestError string `sql:"latest_error STRING NOT NULL"` |
| // ErrorTS is the last time we had an error on this digest. |
| ErrorTS time.Time `sql:"error_ts TIMESTAMP WITH TIME ZONE NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r ProblemImageRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"digest", "num_errors", "latest_error", "error_ts"}, |
| []interface{}{r.Digest, r.NumErrors, r.Digest, r.ErrorTS} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *ProblemImageRow) ScanFrom(scan func(...interface{}) error) error { |
| if err := scan(&r.Digest, &r.NumErrors, &r.LatestError, &r.ErrorTS); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.ErrorTS = r.ErrorTS.UTC() |
| return nil |
| } |
| |
| // RowsOrderBy implements the sqltest.RowsOrder interface. |
| func (r ProblemImageRow) RowsOrderBy() string { |
| return `ORDER BY digest ASC` |
| } |
| |
| // DeprecatedExpectationUndoRow represents an undo operation that we could not automatically |
| // apply during the transitional period of expectations. A human will manually apply these when |
| // removing the firestore implementation from the loop. |
| type DeprecatedExpectationUndoRow struct { |
| ID int `sql:"id SERIAL PRIMARY KEY"` |
| ExpectationID string `sql:"expectation_id STRING NOT NULL"` |
| UserID string `sql:"user_id STRING NOT NULL"` |
| TS time.Time `sql:"ts TIMESTAMP WITH TIME ZONE NOT NULL"` |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *DeprecatedExpectationUndoRow) ScanFrom(scan func(...interface{}) error) error { |
| if err := scan(&r.ID, &r.ExpectationID, &r.UserID, &r.TS); err != nil { |
| return skerr.Wrap(err) |
| } |
| r.TS = r.TS.UTC() |
| return nil |
| } |
| |
| // TrackingCommitRow represents a repo for which we have checked to see if the commits landed |
| // correspond to any Changelists. |
| type TrackingCommitRow struct { |
| // Repo is the url of the repo we are tracking |
| Repo string `sql:"repo STRING PRIMARY KEY"` |
| // LastGitHash is the git hash of the commit that we know landed most recently. |
| LastGitHash string `sql:"last_git_hash STRING NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r TrackingCommitRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"repo", "last_git_hash"}, |
| []interface{}{r.Repo, r.LastGitHash} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *TrackingCommitRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.Repo, &r.LastGitHash) |
| } |
| |
| type MetadataCommitRow struct { |
| // CommitID is a potentially arbitrary string. It is a foreign key in the CommitsWithData table. |
| CommitID CommitID `sql:"commit_id STRING PRIMARY KEY"` |
| // CommitMetadata is an arbitrary string; For current implementations, it is a link to a GCS |
| // file that has more information about the state of the repo when the data was generated. |
| CommitMetadata string `sql:"commit_metadata STRING NOT NULL"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r MetadataCommitRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"commit_id", "commit_metadata"}, |
| []interface{}{r.CommitID, r.CommitMetadata} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *MetadataCommitRow) ScanFrom(scan func(...interface{}) error) error { |
| return scan(&r.CommitID, &r.CommitMetadata) |
| } |
| |
| // PrimaryBranchDiffCalculationRow represents a grouping for which we need to compute diffs using |
| // digests seen on the primary branch. |
| type PrimaryBranchDiffCalculationRow struct { |
| // GroupingID is the grouping for which we should compute diffs. |
| GroupingID GroupingID `sql:"grouping_id BYTES PRIMARY KEY"` |
| // LastCalculated is the timestamp at which we last calculated diffs for the grouping. |
| LastCalculated time.Time `sql:"last_calculated_ts TIMESTAMP WITH TIME ZONE NOT NULL"` |
| // CalculationLeaseEnds is set to a future time when a worker starts computing diffs on this |
| // grouping. It allows workers to not do the same computation at the same time. |
| CalculationLeaseEnds time.Time `sql:"calculation_lease_ends TIMESTAMP WITH TIME ZONE NOT NULL"` |
| |
| calculatedIndex struct{} `sql:"INDEX calculated_idx (last_calculated_ts)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r PrimaryBranchDiffCalculationRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"grouping_id", "last_calculated_ts", "calculation_lease_ends"}, |
| []interface{}{r.GroupingID, r.LastCalculated, r.CalculationLeaseEnds} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *PrimaryBranchDiffCalculationRow) ScanFrom(scan func(...interface{}) error) error { |
| err := scan(&r.GroupingID, &r.LastCalculated, &r.CalculationLeaseEnds) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| r.LastCalculated = r.LastCalculated.UTC() |
| r.CalculationLeaseEnds = r.CalculationLeaseEnds.UTC() |
| return nil |
| } |
| |
| // SecondaryBranchDiffCalculationRow represents a grouping for a CL for which we need to compute |
| // diffs for using digests from that CL as well as the digests on the primary branch. |
| type SecondaryBranchDiffCalculationRow struct { |
| // BranchName is a something like "gerrit_12345" or "chrome_m86" to identify the branch. |
| BranchName string `sql:"branch_name STRING"` |
| // GroupingID identifies the grouping to which the triaged digest belongs. This is a foreign key |
| // into the Groupings table. |
| GroupingID GroupingID `sql:"grouping_id BYTES"` |
| // LastUpdated is the timestamp for which the most recent data was uploaded to the branch. |
| LastUpdated time.Time `sql:"last_updated_ts TIMESTAMP WITH TIME ZONE NOT NULL"` |
| // DigestsNotOnPrimary is a list of extra digests that do not appear on the primary branch but |
| // do appear on the secondary branch. It should not be empty - if it were to be empty, the |
| // normal diff computation on the primary branch would be sufficient to handle all diffs. |
| DigestsNotOnPrimary []types.Digest `sql:"digests STRING[] NOT NULL"` |
| // LastCalculated is the timestamp at which we last calculated diffs for the grouping. |
| LastCalculated time.Time `sql:"last_calculated_ts TIMESTAMP WITH TIME ZONE NOT NULL"` |
| // CalculationLeaseEnds is set to a future time when a worker starts computing diffs on this |
| // grouping. It allows workers to not do the same computation at the same time. |
| CalculationLeaseEnds time.Time `sql:"calculation_lease_ends TIMESTAMP WITH TIME ZONE NOT NULL"` |
| |
| primaryKey struct{} `sql:"PRIMARY KEY (branch_name, grouping_id)"` |
| |
| calculatedIndex struct{} `sql:"INDEX calculated_idx (last_calculated_ts)"` |
| } |
| |
| // ToSQLRow implements the sqltest.SQLExporter interface. |
| func (r SecondaryBranchDiffCalculationRow) ToSQLRow() (colNames []string, colData []interface{}) { |
| return []string{"branch_name", "grouping_id", "last_updated_ts", "digests", |
| "last_calculated_ts", "calculation_lease_ends"}, |
| []interface{}{r.BranchName, r.GroupingID, r.LastUpdated, r.DigestsNotOnPrimary, |
| r.LastCalculated, r.CalculationLeaseEnds} |
| } |
| |
| // ScanFrom implements the sqltest.SQLScanner interface. |
| func (r *SecondaryBranchDiffCalculationRow) ScanFrom(scan func(...interface{}) error) error { |
| var digests []string |
| err := scan(&r.BranchName, &r.GroupingID, &r.LastUpdated, &digests, |
| &r.LastCalculated, &r.CalculationLeaseEnds) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| r.DigestsNotOnPrimary = make([]types.Digest, 0, len(digests)) |
| for _, d := range digests { |
| r.DigestsNotOnPrimary = append(r.DigestsNotOnPrimary, types.Digest(d)) |
| } |
| r.LastUpdated = r.LastUpdated.UTC() |
| r.LastCalculated = r.LastCalculated.UTC() |
| r.CalculationLeaseEnds = r.CalculationLeaseEnds.UTC() |
| return nil |
| } |