blob: bff2760838337b80fc97bec2bd911bf47d3f496c [file] [log] [blame]
package store
import (
"context"
"encoding/json"
"sort"
"strings"
"time"
lru "github.com/hashicorp/golang-lru"
"go.skia.org/infra/codesize/go/common"
"go.skia.org/infra/go/skerr"
"go.skia.org/infra/go/sklog"
)
// Size of the in-memory cache with the contents of Bloaty output files.
const gcsLRUCacheSize = 100 // Arbitrarily chosen.
// CommitOrPatchset is used to group Bloaty outputs by commit, i.e. those generated by a post-submit
// task, or by patchset, i.e. those generated by a tryjob.
type CommitOrPatchset struct {
Commit string `json:"commit"`
PatchIssue string `json:"patch_issue"`
PatchSet string `json:"patch_set"`
}
func (c CommitOrPatchset) IsPatchset() bool {
return c.PatchIssue != "" || c.PatchSet != ""
}
// Binary represents a single binary that has been analyzed with Bloaty.
type Binary struct {
Metadata common.BloatyOutputMetadata `json:"metadata"`
BloatyOutputFileGCSPath string `json:"-"`
BloatySizeDiffOutputFileGCSPath string `json:"-"`
// Timestamp should reflect the "timestamp" field in the JSON metadata.
Timestamp time.Time `json:"-"`
}
// BinariesFromCommitOrPatchset groups all the binaries produced by a single commit or patchset.
type BinariesFromCommitOrPatchset struct {
CommitOrPatchset
Binaries []Binary `json:"binaries" go2ts:"ignorenil"`
}
// DownloadFn is the type of a function that downloads a file from GCS.
//
// Its purpose is to abstract away any interactions with GCS to facilitate testing.
type DownloadFn func(ctx context.Context, path string) ([]byte, error)
// Store keeps track of the Bloaty output files found in GCS, and provides methods to access them.
type Store struct {
downloadFn DownloadFn
gcsCache *lru.Cache
binariesByCommitOrPatchset map[CommitOrPatchset][]Binary
indexedFiles map[string]bool
}
// New builds a new Store.
func New(downloadFn DownloadFn) Store {
cache, err := lru.New(gcsLRUCacheSize)
if err != nil {
// This only happens if the provided cache size value is negative, so not a recoverable error.
panic(err)
}
return Store{
downloadFn: downloadFn,
gcsCache: cache,
binariesByCommitOrPatchset: map[CommitOrPatchset][]Binary{},
indexedFiles: map[string]bool{},
}
}
// Index indexes the given Bloaty output file (*.tsv). It downloads the corresponding JSON metadata
// file, which is kept in memory, but it does not download the Bloaty output file itself.
func (s *Store) Index(ctx context.Context, bloatyOutputFileGCSPath string) error {
if !strings.HasSuffix(bloatyOutputFileGCSPath, ".tsv") {
return skerr.Fmt(`file must end with ".tsv", got: %s`, bloatyOutputFileGCSPath)
}
// Prevent indexing the same file twice, which can cause duplicate entries on the web UI. This
// can happen because PubSub guarantees delivering each event at least once, but not exactly once.
// lovisolo@ has observed some instances of a GCS file upload event delivered multiple times.
if s.indexedFiles[bloatyOutputFileGCSPath] {
sklog.Warningf("Attempted to index an already indexed Bloaty output file: %s", bloatyOutputFileGCSPath)
return nil
}
s.indexedFiles[bloatyOutputFileGCSPath] = true
sklog.Infof("Indexing file: %s", bloatyOutputFileGCSPath)
// Download and parse JSON metadata file.
basePath := bloatyOutputFileGCSPath[:len(bloatyOutputFileGCSPath)-4] // Drop the ".tsv" extension.
jsonMetadataFilePath := basePath + ".json"
bytes, err := s.downloadFn(ctx, jsonMetadataFilePath) // We skip the GCS cache as JSON files are only downloaded once.
if err != nil {
return skerr.Wrap(err)
}
metadata := common.BloatyOutputMetadata{}
if err := json.Unmarshal(bytes, &metadata); err != nil {
return skerr.Wrap(err)
}
timestamp, err := time.Parse(time.RFC3339, metadata.Timestamp)
if err != nil {
return skerr.Wrap(err)
}
// If it exists, the Bloaty size diff output file should be found at this location.
bloatySizeDiffOutputFileGCSPath := ""
if len(metadata.BloatyDiffArgs) != 0 {
bloatySizeDiffOutputFileGCSPath = basePath + ".diff.txt"
}
// Add a new entry to the index.
binary := Binary{
Metadata: metadata,
BloatyOutputFileGCSPath: bloatyOutputFileGCSPath,
BloatySizeDiffOutputFileGCSPath: bloatySizeDiffOutputFileGCSPath,
Timestamp: timestamp,
}
commitOrPatchset := CommitOrPatchset{}
if metadata.PatchIssue != "" || metadata.PatchSet != "" {
commitOrPatchset.PatchIssue = metadata.PatchIssue
commitOrPatchset.PatchSet = metadata.PatchSet
} else {
commitOrPatchset.Commit = metadata.Revision
}
s.binariesByCommitOrPatchset[commitOrPatchset] = append(s.binariesByCommitOrPatchset[commitOrPatchset], binary)
return nil
}
// GetMostRecentBinaries gets the most recent binaries grouped by commit or patchset.
func (s *Store) GetMostRecentBinaries(limit int) []BinariesFromCommitOrPatchset {
// Sort commits/patchsets by their timestamp in descending order.
var commitsOrPatchsets []CommitOrPatchset
for commitOrPatchset := range s.binariesByCommitOrPatchset {
commitsOrPatchsets = append(commitsOrPatchsets, commitOrPatchset)
}
sort.Slice(commitsOrPatchsets, func(i, j int) bool {
timeStampI := s.binariesByCommitOrPatchset[commitsOrPatchsets[i]][0].Timestamp
timeStampJ := s.binariesByCommitOrPatchset[commitsOrPatchsets[j]][0].Timestamp
return timeStampI.After(timeStampJ)
})
// Keep only the most recent ones.
if limit < len(commitsOrPatchsets) {
commitsOrPatchsets = commitsOrPatchsets[:limit]
}
// Build return value.
retval := make([]BinariesFromCommitOrPatchset, len(commitsOrPatchsets))
for i, commitOrPatchset := range commitsOrPatchsets {
retval[i] = BinariesFromCommitOrPatchset{
CommitOrPatchset: commitOrPatchset,
Binaries: s.binariesByCommitOrPatchset[commitOrPatchset],
}
}
return retval
}
// GetBinary returns a specific binary.
func (s *Store) GetBinary(commitOrPatchset CommitOrPatchset, binaryName, compileTaskName string) (Binary, bool) {
for _, binary := range s.binariesByCommitOrPatchset[commitOrPatchset] {
if binary.Metadata.BinaryName == binaryName && binary.Metadata.CompileTaskName == compileTaskName {
return binary, true
}
}
return Binary{}, false
}
// GetBloatyOutputFileContents downloads and returns the raw contents of a Bloaty output file.
func (s *Store) GetBloatyOutputFileContents(ctx context.Context, binary Binary) ([]byte, error) {
return s.downloadAndCache(ctx, binary.BloatyOutputFileGCSPath)
}
// GetBloatySizeDiffOutputFileContents downloads and returns the raw contents of a Bloaty size diff
// output file.
func (s *Store) GetBloatySizeDiffOutputFileContents(ctx context.Context, binary Binary) ([]byte, error) {
return s.downloadAndCache(ctx, binary.BloatySizeDiffOutputFileGCSPath)
}
func (s *Store) downloadAndCache(ctx context.Context, path string) ([]byte, error) {
// Read the file from the cache.
if bytes, ok := s.gcsCache.Get(path); ok {
return bytes.([]byte), nil
}
// Download the file if it wasn't cached.
bytes, err := s.downloadFn(ctx, path)
if err != nil {
return nil, skerr.Wrap(err)
}
// Cache the downloaded file and return its contents.
s.gcsCache.Add(path, bytes)
return bytes, nil
}