blob: 0b45f6427801471276b236925449a515ac7c29e6 [file] [log] [blame]
// Package summary summarizes the current state of triaging.
package summary
import (
"context"
"net/url"
"sort"
"sync"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/tiling"
"go.skia.org/infra/golden/go/blame"
"go.skia.org/infra/golden/go/diff"
"go.skia.org/infra/golden/go/digest_counter"
"go.skia.org/infra/golden/go/shared"
"go.skia.org/infra/golden/go/types"
"go.skia.org/infra/golden/go/types/expectations"
)
// TriageStatus contains rolled up digest counts/blames for one test in one corpus.
// It is immutable and should be thread safe.
type TriageStatus struct {
// TODO(kjlubick) Change Name/Corpus to be a more generic "Grouping"
Name types.TestName `json:"name"`
Corpus string `json:"corpus"`
Pos int `json:"pos"`
Neg int `json:"neg"`
Untriaged int `json:"untriaged"`
Num int `json:"num"`
UntHashes types.DigestSlice `json:"untHashes"`
Blame []blame.WeightedBlame `json:"blame"`
// currently unused
Diameter int `json:"diameter"`
}
// MergeSorted creates a new []*TriageStatus from this and the passed
// in slices. The passed in data will "win" in the event there are tests
// in both. We assume that the two passed in slices are sorted by TestName,Corpus already.
func MergeSorted(existing, newOnes []*TriageStatus) []*TriageStatus {
ret := make([]*TriageStatus, 0, len(existing)+len(newOnes))
// Basic algorithm for merging two sorted arrays, with a small tweak to have
// the second one win for an exact match on Name and Corpus.
i, j := 0, 0
for i < len(existing) && j < len(newOnes) {
e, n := existing[i], newOnes[j]
if e.Name == n.Name && e.Corpus == n.Corpus {
ret = append(ret, n)
i++
j++
} else if e.Name > n.Name || (e.Name == n.Name && e.Corpus > n.Corpus) {
ret = append(ret, n)
j++
} else {
ret = append(ret, e)
i++
}
}
// Only one of these will actually append something, since j or i are at the end.
ret = append(ret, newOnes[j:]...)
ret = append(ret, existing[i:]...)
return ret
}
// TODO(jcgregorio) Make diameter faster, and also make the actual diameter
// metric better. Until then disable it.
const computeDiameter = false
// Data is a helper struct containing the data that goes into computing a summary.
type Data struct {
Traces map[tiling.TraceID]tiling.Trace
Expectations expectations.ReadOnly
// ByTrace maps all traces in Trace to the counts of digests that appeared
// in those traces.
ByTrace map[tiling.TraceID]digest_counter.DigestCount
Blamer blame.Blamer
DiffStore diff.DiffStore // only needed if computeDiameter = true
}
// Calculate calculates a slice of TriageStatus for the given data and query options. It will
// summarize only those traces that match the given testNames (if any), the query (if any), and
// optionally be only for those digests at head. At head means just the non-empty digests that
// appear in the most recent commit. The return value will have its entries sorted by TestName
// first, then sorted by Corpus in the event of a tie.
// TODO(kjlubick): make CalculateWithDiameter its own function (needs context.Context too)
func (s *Data) Calculate(testNames types.TestNameSet, query url.Values, head bool) []*TriageStatus {
defer shared.NewMetricsTimer("calc_summaries_total").Stop()
sklog.Infof("CalcSummaries: head %v", head)
var ret []*TriageStatus
// Filter down to just the traces we are interested in, based on query.
filtered := map[grouping][]*tracePair{}
t := shared.NewMetricsTimer("calc_summaries_filter_traces")
for id, tr := range s.Traces {
gt := tr.(*types.GoldenTrace)
if len(testNames) > 0 && !testNames[gt.TestName()] {
continue
}
if tiling.Matches(tr, query) {
k := grouping{test: gt.TestName(), corpus: gt.Corpus()}
if slice, ok := filtered[k]; ok {
filtered[k] = append(slice, &tracePair{tr: gt, id: id})
} else {
filtered[k] = []*tracePair{{tr: gt, id: id}}
}
}
}
t.Stop()
// Now create summaries for each test using the filtered set of traces.
t = shared.NewMetricsTimer("calc_summaries_tally")
for k, traces := range filtered {
digestMap := types.DigestSet{}
for _, pair := range traces {
if head {
// Find the last non-missing value in the trace.
for i := len(pair.tr.Digests) - 1; i >= 0; i-- {
if pair.tr.IsMissing(i) {
continue
} else {
digestMap[pair.tr.Digests[i]] = true
break
}
}
} else {
// Use the digests by trace if available, otherwise just inspect the trace.
if t, ok := s.ByTrace[pair.id]; ok {
for d := range t {
digestMap[d] = true
}
} else {
for i := len(pair.tr.Digests) - 1; i >= 0; i-- {
if !pair.tr.IsMissing(i) {
digestMap[pair.tr.Digests[i]] = true
}
}
}
}
}
ret = append(ret, s.makeSummary(k.test, k.corpus, digestMap.Keys()))
}
t.Stop()
// Sort for determinism and to allow clients to use binary search.
t = shared.NewMetricsTimer("calc_summaries_sort")
sort.Slice(ret, func(i, j int) bool {
return ret[i].Name < ret[j].Name || (ret[i].Name == ret[j].Name && ret[i].Corpus < ret[j].Corpus)
})
t.Stop()
return ret
}
// grouping is the set of information used to combine like traces.
type grouping struct {
test types.TestName
corpus string
}
// tracePair is used to hold traces, along with their ids.
type tracePair struct {
id tiling.TraceID
tr *types.GoldenTrace
}
// makeSummary returns a TriageStatus for the given digests.
func (s *Data) makeSummary(name types.TestName, corpus string, digests types.DigestSlice) *TriageStatus {
pos := 0
neg := 0
unt := 0
diamDigests := types.DigestSlice{}
untHashes := types.DigestSlice{}
for _, digest := range digests {
switch s.Expectations.Classification(name, digest) {
case expectations.Untriaged:
unt += 1
diamDigests = append(diamDigests, digest)
untHashes = append(untHashes, digest)
case expectations.Negative:
neg += 1
case expectations.Positive:
pos += 1
diamDigests = append(diamDigests, digest)
}
}
sort.Sort(diamDigests)
sort.Sort(untHashes)
d := 0
if computeDiameter {
d = diameter(diamDigests, s.DiffStore)
}
return &TriageStatus{
Name: name,
Diameter: d,
Pos: pos,
Neg: neg,
Untriaged: unt,
UntHashes: untHashes,
Num: pos + neg + unt,
Corpus: corpus,
Blame: s.Blamer.GetBlamesForTest(name),
}
}
func diameter(digests types.DigestSlice, diffStore diff.DiffStore) int {
// TODO Parallelize.
lock := sync.Mutex{}
max := 0
wg := sync.WaitGroup{}
for {
if len(digests) <= 2 {
break
}
wg.Add(1)
go func(d1 types.Digest, d2 types.DigestSlice) {
defer wg.Done()
dms, err := diffStore.Get(context.TODO(), d1, d2)
if err != nil {
sklog.Errorf("Unable to get diff: %s", err)
return
}
localMax := 0
for _, dm := range dms {
if dm.NumDiffPixels > localMax {
localMax = dm.NumDiffPixels
}
}
lock.Lock()
defer lock.Unlock()
if localMax > max {
max = localMax
}
}(digests[0], digests[1:2])
digests = digests[1:]
}
wg.Wait()
return max
}