blob: d40eb51a2a52967631c1589a0bdb1aee6052c368 [file] [log] [blame]
package cache
import (
"context"
"sort"
"strconv"
"time"
lru "github.com/hashicorp/golang-lru"
"go.skia.org/infra/go/metrics2"
"go.skia.org/infra/perf/go/chromeperf"
"go.skia.org/infra/go/skerr"
"go.skia.org/infra/go/sklog"
)
// cacheSize is the size of the lru cache.
const cacheSize = 25000
// How often to cleanup the lru cache.
const cacheCleanupPeriod = time.Minute
// TTL of the lru cache items.
const cacheItemTTL = 10 * time.Minute
// How often to wipe out the invalidation map.
const invalidationCleanupPeriod = 24 * time.Hour
// store implements anomalies.Store.
type store struct {
// testsCache contains data for the tests-minRevision-maxRevision requests.
testsCache *lru.Cache
// revisionCache contains data for anomalies around a revision requests.
revisionCache *lru.Cache
// invalidationMap that let's us know which traces have been modified and therefore,
// invalidated from the cache. This map is deleted every 24 hours to prevent it from growing
// too big. We should go back and revisit if this period is too short or too long.
// Pros:
// - Solution is simple.
// - Minimal additional memory.
// - Minimal additional runtime, all O(1) operations.
// Cons:
// - Not accurate. Modifying an anomaly, will invalidate all anomalies that share the same
// trace.
// - One can get unlucky by modifying an anomaly and the cleanUp happens right after.
// We landed on this approach as other solutions would require significantly more memory or
// were less accurate. This can be re-evaluated later on.
invalidationMap map[string]bool
// metrics
numEntriesInCache metrics2.Int64Metric
ChromePerf chromeperf.AnomalyApiClient
}
type commitAnomalyMapCacheEntry struct {
addTime time.Time // When this entry was added.
commitAnomalyMap chromeperf.CommitNumberAnomalyMap
}
// New returns a new anomalies.Store instance with LRU cache.
func New(chromePerf chromeperf.AnomalyApiClient) (*store, error) {
testsCache, err := lru.New(cacheSize)
if err != nil {
return nil, skerr.Wrapf(err, "Failed to create anomaly store tests cache.")
}
revisionCache, err := lru.New(cacheSize)
if err != nil {
return nil, skerr.Wrapf(err, "Failed to create anomaly store revisions cache.")
}
invalidationMap := make(map[string]bool)
// cleanup the lru cache periodically.
go func() {
for range time.Tick(cacheCleanupPeriod) {
cleanupCache(testsCache)
cleanupCache(revisionCache)
}
}()
// Clean up invalidation map. Re-assigning it to a new map will leave the old map with no references
// and the garbage collector will eventually clean it.
go func() {
for range time.Tick(invalidationCleanupPeriod) {
sklog.Debug("[SkiaTriage] Clearing Invalidation map with size: %d", len(invalidationMap))
invalidationMap = make(map[string]bool)
}
}()
ret := &store{
testsCache: testsCache,
revisionCache: revisionCache,
invalidationMap: invalidationMap,
numEntriesInCache: metrics2.GetInt64Metric("anomaly_store_num_entries_in_cache"),
ChromePerf: chromePerf,
}
return ret, nil
}
// cleanupCache cleans up the oldest cache items based on the cache item TTL.
func cleanupCache(cache *lru.Cache) {
for {
if cache.Len() == 0 {
return
}
_, iOldestCommitAnomalyMapCacheEntry, ok := cache.GetOldest()
if !ok {
sklog.Warningf("Failed to get the oldest cache item.")
return
}
oldestCommitAnomalyMapCacheEntry, _ := iOldestCommitAnomalyMapCacheEntry.(commitAnomalyMapCacheEntry)
if oldestCommitAnomalyMapCacheEntry.addTime.Before(time.Now().Add(-cacheItemTTL)) {
cache.RemoveOldest()
sklog.Info("Cache item %s was removed from anomaly store cache.", oldestCommitAnomalyMapCacheEntry)
} else {
return
}
}
}
// GetAnomalies implements anomalies.Store
// It fetches anomalies from cache at first, then calls chrome perf API to fetch the anomlies missing from cache.
func (as *store) GetAnomalies(ctx context.Context, traceNames []string, startCommitPosition int, endCommitPosition int) (chromeperf.AnomalyMap, error) {
// Get anomalies from cache
traceNamesMissingFromCache := make([]string, 0)
result := chromeperf.AnomalyMap{}
numEntriesInCache := 0
for _, traceName := range traceNames {
iCommitAnomalyMapCacheEntry, hitCache := as.testsCache.Get(getAnomalyCacheKey(traceName, startCommitPosition, endCommitPosition))
_, invalidated := as.invalidationMap[traceName]
// If Trace exists in the invalidationMap, the trace has been modified and needs to be re-fetched from Chromeperf.
if !hitCache || invalidated {
traceNamesMissingFromCache = append(traceNamesMissingFromCache, traceName)
continue
}
commitAnomalyMapCacheEntry, _ := iCommitAnomalyMapCacheEntry.(commitAnomalyMapCacheEntry)
result[traceName] = commitAnomalyMapCacheEntry.commitAnomalyMap
numEntriesInCache++
}
as.numEntriesInCache.Update(int64(numEntriesInCache))
if len(traceNamesMissingFromCache) == 0 {
return result, nil
}
// Get anomalies from Chrome Perf
sort.Strings(traceNamesMissingFromCache)
chromePerfAnomalies, err := as.ChromePerf.GetAnomalies(ctx, traceNamesMissingFromCache, startCommitPosition, endCommitPosition)
if err != nil {
sklog.Errorf("Failed to get chrome perf anomalies: %s", err)
} else {
for traceName, commitNumberAnomalyMap := range chromePerfAnomalies {
result[traceName] = commitNumberAnomalyMap
cacheKey := getAnomalyCacheKey(traceName, startCommitPosition, endCommitPosition)
sklog.Debugf("[SkiaTriage] Adding entry to testsCache with key: %s", cacheKey)
// Add anomalies to cache
as.testsCache.Add(cacheKey, commitAnomalyMapCacheEntry{
addTime: time.Now(),
commitAnomalyMap: commitNumberAnomalyMap,
})
}
}
return result, nil
}
// GetAnomaliesTimeBased implements anomalies.Store
// Retrieves anomalies for each trace within the begin and end times.
func (as *store) GetAnomaliesInTimeRange(ctx context.Context, traceNames []string, startTime time.Time, endTime time.Time) (chromeperf.AnomalyMap, error) {
result := chromeperf.AnomalyMap{}
if len(traceNames) == 0 {
return result, nil
}
sort.Strings(traceNames)
chromePerfAnomalies, err := as.ChromePerf.GetAnomaliesTimeBased(ctx, traceNames, startTime, endTime)
if err != nil {
sklog.Errorf("Failed to get chrome perf anomalies: %s", err)
} else {
for traceName, commitNumberAnomalyMap := range chromePerfAnomalies {
result[traceName] = commitNumberAnomalyMap
}
}
return result, nil
}
// GetAnomaliesAroundRevision implements anomalies.Store
// It fetches anomalies that occured around the specified revision number.
func (as *store) GetAnomaliesAroundRevision(ctx context.Context, revision int) ([]chromeperf.AnomalyForRevision, error) {
iAnomalies, ok := as.revisionCache.Get(revision)
if ok {
return iAnomalies.([]chromeperf.AnomalyForRevision), nil
} else {
result, err := as.ChromePerf.GetAnomaliesAroundRevision(ctx, revision)
if err != nil {
return nil, err
}
as.revisionCache.Add(revision, result)
return result, nil
}
}
// InvalidateTestsCacheForTraceName implements anomalies.Store
// Invalidates a specific traceName from the tests cache.
func (as *store) InvalidateTestsCacheForTraceName(ctx context.Context, traceName string) {
sklog.Debugf("[SkiaTriage] Adding entry to invalidationMap with trace name: %s", traceName)
as.invalidationMap[traceName] = true
sklog.Debug("[SkiaTriage] Invalidation map size: %d", len(as.invalidationMap))
}
// Anomaly cache key is "traceName:startCommitPosition:endCommitPosition"
func getAnomalyCacheKey(traceName string, startCommitPosition int, endCommitPosition int) string {
return traceName + ":" + strconv.Itoa(startCommitPosition) + ":" + strconv.Itoa(endCommitPosition)
}