golden/go/blame/blame.go - buildbot

blob: 650721bb639b4f1514d9d2f9668b19b3993d99a4 [file] [log] [blame]

	package blame

	import (
	"sort"
	"sync"

	"go.skia.org/infra/go/tiling"
	"go.skia.org/infra/go/timer"
	"go.skia.org/infra/go/util"
	"go.skia.org/infra/golden/go/storage"
	"go.skia.org/infra/golden/go/types"
	)

	// Blamer calculates blame lists by continously loading tiles
	// and changed expectations.
	// It is not thread safe. The client of this package needs to make sure there
	// are no conflicts.
	type Blamer struct {
	// commits are the commits corresponding to the current blamelists.
	commits []*tiling.Commit

	// testBlameLists are the blamelists keyed by testName and digest.
	testBlameLists map[string]map[string]*BlameDistribution
	storages *storage.Storage
	mutex sync.Mutex
	}

	// BlameDistribution contains a rough estimation of the probabilities that
	// a commit was responsible for the contained digest.
	// We also use it as the output structure for the front end.
	type BlameDistribution struct {
	// Freq contains likelihood counts that a commit was responsible
	// for the observed digest. The counts apply to the last len(Freq)
	// commits. When used as output structure in the GetBlame function
	// Freq contains the indices of commits.
	Freq []int `json:"freq"`

	// Old indicates whether the digest has been seen prior to the current
	// tile. In that case the blame might be unreliable.
	Old bool `json:"old"`
	}

	// WeightedBlame combines an authors name with a probabily that she
	// is on a blamelist this is aggregated over the digests of a test.
	type WeightedBlame struct {
	Author string `json:"author"`
	Prob float64 `json:"prob"`
	}

	// Sorting wrapper around WeightedBlame.
	type WeightedBlameSlice []*WeightedBlame

	func (w WeightedBlameSlice) Len() int { return len(w) }
	func (w WeightedBlameSlice) Less(i, j int) bool { return w[i].Prob < w[j].Prob }
	func (w WeightedBlameSlice) Swap(i, j int) { w[i], w[j] = w[j], w[i] }

	// New returns a new Blamer instance and error. The error is not
	// nil if the first run of calculating the blame lists failed.
	func New(storages storage.Storage) Blamer {
	return &Blamer{
	storages: storages,
	}
	}

	func (b Blamer) GetAllBlameLists() (map[string]map[string]BlameDistribution, []*tiling.Commit) {
	b.mutex.Lock()
	defer b.mutex.Unlock()
	return b.testBlameLists, b.commits
	}

	// GetBlamesForTest returns the list of authors that have blame assigned to
	// them for the given test.
	func (b Blamer) GetBlamesForTest(testName string) []WeightedBlame {
	blameLists, commits := b.GetAllBlameLists()

	digestBlameList := blameLists[testName]
	total := 0.0
	blameMap := map[string]int{}
	for _, blameDistribution := range digestBlameList {
	commitIndices, maxCount := b.getBlame(blameDistribution, commits, commits)
	for _, commitIdx := range commitIndices {
	blameMap[commits[commitIdx].Author] += maxCount
	}
	total += float64(maxCount * len(commitIndices))
	}

	ret := make([]*WeightedBlame, 0, len(blameMap))
	for author, count := range blameMap {
	ret = append(ret, &WeightedBlame{author, float64(count) / total})
	}

	sort.Sort(sort.Reverse(WeightedBlameSlice(ret)))
	return ret
	}

	// TODO(stephana): Remove all public functions other than GetBlame
	// once blame is working on the front-end and refactor BlameDistribution
	// to be more obvious about the ways it is used (as intermediated and output
	// format).

	// GetBlame returns the indices of the provided list of commits that likely
	// caused the given test name/digest pair. If the result is empty we are not
	// able to determine blame, because the test name/digest appeared prior
	// to the current tile.
	func (b Blamer) GetBlame(testName string, digest string, commits []tiling.Commit) *BlameDistribution {
	blameLists, blameCommits := b.GetAllBlameLists()
	commitIndices, maxCount := b.getBlame(blameLists[testName][digest], blameCommits, commits)
	return &BlameDistribution{
	Freq: commitIndices,
	Old: (maxCount != 0) && blameLists[testName][digest].Old,
	}
	}

	func (b Blamer) getBlame(blameDistribution BlameDistribution, blameCommits, commits []*tiling.Commit) ([]int, int) {
	if (blameDistribution == nil) \|\| (len(blameDistribution.Freq) == 0) {
	return []int{}, 0
	}

	// We have a blamelist. Let's find the indices relative to the given
	// list of commits.
	freq := blameDistribution.Freq
	ret := make([]int, 0, len(freq))
	maxCount := util.MaxInt(freq...)

	// Find the first element in the list and align the commits.
	idx := 0
	for freq[idx] < maxCount {
	idx++
	}
	tgtCommit := blameCommits[len(blameCommits)-len(freq)+idx]
	commitIdx := sort.Search(len(commits), func(i int) bool { return commits[i].CommitTime >= tgtCommit.CommitTime })
	for (idx < len(freq)) && (freq[idx] > 0) && (commitIdx < len(commits)) {
	ret = append(ret, commitIdx)
	idx++
	commitIdx++
	}

	return ret, maxCount
	}

	// Calculate calculates the blame list from the given tile.
	func (b Blamer) Calculate(tile tiling.Tile) error {
	exp, err := b.storages.ExpectationsStore.Get()
	if err != nil {
	return err
	}

	defer timer.New("blame").Stop()

	// Note: blameStart and blameEnd are continously updated to contain the
	// smalles start and end index of the ranges for a testName/digest pair.
	blameStart := map[string]map[string]int{}
	blameEnd := map[string]map[string]int{}

	// blameRange stores the candidate ranges for a testName/digest pair.
	blameRange := map[string]map[string][][]int{}
	firstCommit := tile.Commits[0]
	tileLen := tile.LastCommitIndex() + 1
	ret := map[string]map[string]*BlameDistribution{}

	for _, trace := range tile.Traces {
	gtr := trace.(*types.GoldenTrace)
	testName := gtr.Params()[types.PRIMARY_KEY_FIELD]

	// lastIdx tracks the index of the last digest that is definitely
	// not in the blamelist.
	lastIdx := -1
	found := map[string]bool{}
	for idx, digest := range gtr.Values[:tileLen] {
	if digest == types.MISSING_DIGEST {
	continue
	}

	status := exp.Classification(testName, digest)
	if (status == types.UNTRIAGED) && !found[digest] {
	found[digest] = true

	var startIdx int
	endIdx := idx

	// If we have only seen empty digests, then we do not
	// consider any digest before the current one.
	if lastIdx == -1 {
	startIdx = idx
	} else {
	startIdx = lastIdx + 1
	}

	// Get the info about this digest.
	digestInfo, err := b.storages.GetOrUpdateDigestInfo(testName, digest, tile.Commits[idx])
	if err != nil {
	return err
	}

	// Check if the digest was first seen outside the current tile.
	isOld := digestInfo.First < firstCommit.CommitTime
	commitRange := []int{startIdx, endIdx}
	if blameStartFound, ok := blameStart[testName]; !ok {
	blameStart[testName] = map[string]int{digest: startIdx}
	blameEnd[testName] = map[string]int{digest: endIdx}
	blameRange[testName] = map[string][][]int{digest: {commitRange}}
	ret[testName] = map[string]*BlameDistribution{digest: {Old: isOld}}
	} else if currentStart, ok := blameStartFound[digest]; !ok {
	blameStart[testName][digest] = startIdx
	blameEnd[testName][digest] = endIdx
	blameRange[testName][digest] = [][]int{commitRange}
	ret[testName][digest] = &BlameDistribution{Old: isOld}
	} else {
	blameStart[testName][digest] = util.MinInt(currentStart, startIdx)
	blameEnd[testName][digest] = util.MinInt(blameEnd[testName][digest], endIdx)
	blameRange[testName][digest] = append(blameRange[testName][digest], commitRange)
	ret[testName][digest].Old = isOld \|\| ret[testName][digest].Old
	}
	}
	lastIdx = idx
	}
	}

	commits := tile.Commits[:tileLen]
	for testName, digests := range blameRange {
	for digest, commitRanges := range digests {
	start := blameStart[testName][digest]
	end := blameEnd[testName][digest]

	freq := make([]int, len(commits)-start)
	for _, commitRange := range commitRanges {
	// If the commit range is nil, we cannot calculate the a
	// blamelist.
	if commitRange == nil {
	freq = []int{}
	break
	}

	// Calculate the blame.
	idxEnd := util.MinInt(commitRange[1], end)
	for i := commitRange[0]; i <= idxEnd; i++ {
	freq[i-start]++
	}
	}

	ret[testName][digest].Freq = freq
	}
	}

	// Swap out the old blame lists for the new ones.
	b.mutex.Lock()
	defer b.mutex.Unlock()
	b.testBlameLists, b.commits = ret, commits
	return nil
	}