blob: e77dfca61bf51e9792a21a9a48a5e252a370a6ad [file] [log] [blame]
package blame
import (
"sort"
"go.skia.org/infra/go/tiling"
"go.skia.org/infra/go/util"
"go.skia.org/infra/golden/go/shared"
"go.skia.org/infra/golden/go/types"
)
// Blamer provides the results of blame calculations from a given
// tile and set of expectations. Specifically, blame is trying to identify
// who is responsible for UNTRIAGED digests showing up (it essentially
// ignores positive/negative digests).
// A Blamer should be immutable after creation.
type Blamer interface {
// GetAllBlameLists returns all BlameLists that have been computed.
GetAllBlameLists() (map[types.TestName]map[types.Digest]*BlameDistribution, []*tiling.Commit)
// GetBlamesForTest returns the list of WeightedBlame for the given test.
GetBlamesForTest(testName types.TestName) []*WeightedBlame
// GetBlame returns the indices of the provided list of commits that likely
// caused the given test name/digest pair. If the result is empty we are not
// able to determine blame, because the test name/digest appeared prior
// to the current tile.
GetBlame(testName types.TestName, digest types.Digest, commits []*tiling.Commit) *BlameDistribution
}
// BlamerImpl implements the Blamer interface.
type BlamerImpl struct {
// commits are the commits corresponding to the current blamelists.
commits []*tiling.Commit
// testBlameLists are the blamelists keyed by testName and digest.
testBlameLists map[types.TestName]map[types.Digest]*BlameDistribution
}
// BlameDistribution contains a rough estimation of the probabilities that
// a commit was responsible for the contained digest.
// We also use it as the output structure for the front end.
type BlameDistribution struct {
// Freq contains likelihood counts that a commit was responsible
// for the observed digest. The counts apply to the last len(Freq)
// commits. When used as output structure in the GetBlame function
// Freq contains the indices of commits.
// TODO(kjlubick): What does this actually represent? The comment
// above says two conflicting things. Can freq be anything other than
// length 0 or 1?
// dogben says: "When Freq contains counts, the array lines up with the
// tail of the associated slice of commits. It counts for how many traces
// the given digest was first seen at a particular commit.
// When Freq contains commit indices, it's essentially just a slice of
// commits. Any of those commits might be to blame, and there's no
// probability associated with them."
// TODO(kjlubick): refactor this into two structs, one for each condition
// mentioned by dogben.
Freq []int `json:"freq"`
// Old indicates whether the digest has been seen prior to the current
// tile. In that case the blame might be unreliable.
Old bool `json:"old"`
}
// WeightedBlame combines an authors name with a probability that they
// are on a blamelist. This is aggregated over the digests of a test.
type WeightedBlame struct {
Author string `json:"author"`
Prob float64 `json:"prob"`
}
// Sorting wrapper around WeightedBlame.
type WeightedBlameSlice []*WeightedBlame
func (w WeightedBlameSlice) Len() int { return len(w) }
func (w WeightedBlameSlice) Less(i, j int) bool { return w[i].Prob < w[j].Prob }
func (w WeightedBlameSlice) Swap(i, j int) { w[i], w[j] = w[j], w[i] }
// New returns a new Blamer instance and error. The error is not
// nil if the first run of calculating the blame lists failed.
func New(tile *tiling.Tile, exp types.Expectations) (*BlamerImpl, error) {
b := &BlamerImpl{}
return b, b.calculate(tile, exp)
}
// GetAllBlameLists fulfills the Blamer interface.
func (b *BlamerImpl) GetAllBlameLists() (map[types.TestName]map[types.Digest]*BlameDistribution, []*tiling.Commit) {
return b.testBlameLists, b.commits
}
// GetBlamesForTest fulfills the Blamer interface.
func (b *BlamerImpl) GetBlamesForTest(testName types.TestName) []*WeightedBlame {
blameLists, commits := b.GetAllBlameLists()
digestBlameList := blameLists[testName]
total := 0.0
blameMap := map[string]int{}
for _, blameDistribution := range digestBlameList {
commitIndices, maxCount := b.getBlame(blameDistribution, commits, commits)
for _, commitIdx := range commitIndices {
blameMap[commits[commitIdx].Author] += maxCount
}
total += float64(maxCount * len(commitIndices))
}
ret := make([]*WeightedBlame, 0, len(blameMap))
for author, count := range blameMap {
ret = append(ret, &WeightedBlame{author, float64(count) / total})
}
sort.Sort(sort.Reverse(WeightedBlameSlice(ret)))
return ret
}
// TODO(stephana): Remove all public functions other than GetBlame
// once blame is working on the front-end and refactor BlameDistribution
// to be more obvious about the ways it is used (as intermediated and output
// format).
// GetBlame fulfills the Blamer interface.
func (b *BlamerImpl) GetBlame(testName types.TestName, digest types.Digest, commits []*tiling.Commit) *BlameDistribution {
blameLists, blameCommits := b.GetAllBlameLists()
commitIndices, maxCount := b.getBlame(blameLists[testName][digest], blameCommits, commits)
return &BlameDistribution{
Freq: commitIndices,
Old: (maxCount != 0) && blameLists[testName][digest].Old,
}
}
func (b *BlamerImpl) getBlame(blameDistribution *BlameDistribution, blameCommits, commits []*tiling.Commit) ([]int, int) {
if (blameDistribution == nil) || (len(blameDistribution.Freq) == 0) {
return []int{}, 0
}
// We have a blamelist. Let's find the indices relative to the given
// list of commits.
freq := blameDistribution.Freq
ret := make([]int, 0, len(freq))
maxCount := util.MaxInt(freq...)
// Find the first element in the list and align the commits.
idx := 0
for freq[idx] < maxCount {
idx++
}
tgtCommit := blameCommits[len(blameCommits)-len(freq)+idx]
commitIdx := sort.Search(len(commits), func(i int) bool { return commits[i].CommitTime >= tgtCommit.CommitTime })
for (idx < len(freq)) && (freq[idx] > 0) && (commitIdx < len(commits)) {
ret = append(ret, commitIdx)
idx++
commitIdx++
}
return ret, maxCount
}
func (b *BlamerImpl) calculate(tile *tiling.Tile, exp types.Expectations) error {
defer shared.NewMetricsTimer("blame_calculate").Stop()
// Note: blameStart and blameEnd are continuously updated to contain the
// smallest start and end index of the ranges for a testName/digest pair.
blameStart := map[types.TestName]map[types.Digest]int{}
blameEnd := map[types.TestName]map[types.Digest]int{}
// blameRange stores the candidate ranges for a testName/digest pair.
blameRange := map[types.TestName]map[types.Digest][][]int{}
tileLen := tile.LastCommitIndex() + 1
ret := map[types.TestName]map[types.Digest]*BlameDistribution{}
for _, trace := range tile.Traces {
gtr := trace.(*types.GoldenTrace)
testName := gtr.TestName()
// lastIdx tracks the index of the last digest that is definitely
// not in the blamelist.
lastIdx := -1
found := types.DigestSet{}
for idx, digest := range gtr.Digests[:tileLen] {
if digest == types.MISSING_DIGEST {
continue
}
status := exp.Classification(testName, digest)
if (status == types.UNTRIAGED) && !found[digest] {
found[digest] = true
var startIdx int
endIdx := idx
// If we have only seen empty digests, then we do not
// consider any digest before the current one.
if lastIdx == -1 {
startIdx = idx
} else {
startIdx = lastIdx + 1
}
// Check if the digest was first seen outside the current tile.
isOld := false
commitRange := []int{startIdx, endIdx}
if blameStartFound, ok := blameStart[testName]; !ok {
blameStart[testName] = map[types.Digest]int{digest: startIdx}
blameEnd[testName] = map[types.Digest]int{digest: endIdx}
blameRange[testName] = map[types.Digest][][]int{digest: {commitRange}}
ret[testName] = map[types.Digest]*BlameDistribution{digest: {Old: isOld}}
} else if currentStart, ok := blameStartFound[digest]; !ok {
blameStart[testName][digest] = startIdx
blameEnd[testName][digest] = endIdx
blameRange[testName][digest] = [][]int{commitRange}
ret[testName][digest] = &BlameDistribution{Old: isOld}
} else {
blameStart[testName][digest] = util.MinInt(currentStart, startIdx)
blameEnd[testName][digest] = util.MinInt(blameEnd[testName][digest], endIdx)
blameRange[testName][digest] = append(blameRange[testName][digest], commitRange)
ret[testName][digest].Old = isOld || ret[testName][digest].Old
}
}
lastIdx = idx
}
}
commits := tile.Commits[:tileLen]
for testName, digests := range blameRange {
for digest, commitRanges := range digests {
start := blameStart[testName][digest]
end := blameEnd[testName][digest]
freq := make([]int, len(commits)-start)
for _, commitRange := range commitRanges {
// If the commit range is nil, we cannot calculate the a
// blamelist.
if commitRange == nil {
freq = []int{}
break
}
// Calculate the blame.
idxEnd := util.MinInt(commitRange[1], end)
for i := commitRange[0]; i <= idxEnd; i++ {
freq[i-start]++
}
}
ret[testName][digest].Freq = freq
}
}
// store the computations in the struct to be used by
// the query methods (see Blamer interface).
b.testBlameLists, b.commits = ret, commits
return nil
}
// Make sure BlamerImpl fulfills the Blamer Interface
var _ Blamer = (*BlamerImpl)(nil)