| package store |
| |
| import ( |
| "context" |
| "encoding/json" |
| "sort" |
| "strings" |
| "time" |
| |
| lru "github.com/hashicorp/golang-lru" |
| "go.skia.org/infra/codesize/go/common" |
| "go.skia.org/infra/go/skerr" |
| "go.skia.org/infra/go/sklog" |
| ) |
| |
| // Size of the in-memory cache with the contents of Bloaty output files. |
| const gcsLRUCacheSize = 100 // Arbitrarily chosen. |
| |
| // CommitOrPatchset is used to group Bloaty outputs by commit, i.e. those generated by a post-submit |
| // task, or by patchset, i.e. those generated by a tryjob. |
| type CommitOrPatchset struct { |
| Commit string `json:"commit"` |
| PatchIssue string `json:"patch_issue"` |
| PatchSet string `json:"patch_set"` |
| } |
| |
| func (c CommitOrPatchset) IsPatchset() bool { |
| return c.PatchIssue != "" || c.PatchSet != "" |
| } |
| |
| // Binary represents a single binary that has been analyzed with Bloaty. |
| type Binary struct { |
| Metadata common.BloatyOutputMetadata `json:"metadata"` |
| BloatyOutputFileGCSPath string `json:"-"` |
| BloatySizeDiffOutputFileGCSPath string `json:"-"` |
| |
| // Timestamp should reflect the "timestamp" field in the JSON metadata. |
| Timestamp time.Time `json:"-"` |
| } |
| |
| // BinariesFromCommitOrPatchset groups all the binaries produced by a single commit or patchset. |
| type BinariesFromCommitOrPatchset struct { |
| CommitOrPatchset |
| Binaries []Binary `json:"binaries" go2ts:"ignorenil"` |
| } |
| |
| // DownloadFn is the type of a function that downloads a file from GCS. |
| // |
| // Its purpose is to abstract away any interactions with GCS to facilitate testing. |
| type DownloadFn func(ctx context.Context, path string) ([]byte, error) |
| |
| // Store keeps track of the Bloaty output files found in GCS, and provides methods to access them. |
| type Store struct { |
| downloadFn DownloadFn |
| gcsCache *lru.Cache |
| |
| binariesByCommitOrPatchset map[CommitOrPatchset][]Binary |
| indexedFiles map[string]bool |
| } |
| |
| // New builds a new Store. |
| func New(downloadFn DownloadFn) Store { |
| cache, err := lru.New(gcsLRUCacheSize) |
| if err != nil { |
| // This only happens if the provided cache size value is negative, so not a recoverable error. |
| panic(err) |
| } |
| |
| return Store{ |
| downloadFn: downloadFn, |
| gcsCache: cache, |
| binariesByCommitOrPatchset: map[CommitOrPatchset][]Binary{}, |
| indexedFiles: map[string]bool{}, |
| } |
| } |
| |
| // Index indexes the given Bloaty output file (*.tsv). It downloads the corresponding JSON metadata |
| // file, which is kept in memory, but it does not download the Bloaty output file itself. |
| func (s *Store) Index(ctx context.Context, bloatyOutputFileGCSPath string) error { |
| if !strings.HasSuffix(bloatyOutputFileGCSPath, ".tsv") { |
| return skerr.Fmt(`file must end with ".tsv", got: %s`, bloatyOutputFileGCSPath) |
| } |
| |
| // Prevent indexing the same file twice, which can cause duplicate entries on the web UI. This |
| // can happen because PubSub guarantees delivering each event at least once, but not exactly once. |
| // lovisolo@ has observed some instances of a GCS file upload event delivered multiple times. |
| if s.indexedFiles[bloatyOutputFileGCSPath] { |
| sklog.Warningf("Attempted to index an already indexed Bloaty output file: %s", bloatyOutputFileGCSPath) |
| return nil |
| } |
| s.indexedFiles[bloatyOutputFileGCSPath] = true |
| |
| sklog.Infof("Indexing file: %s", bloatyOutputFileGCSPath) |
| |
| // Download and parse JSON metadata file. |
| basePath := bloatyOutputFileGCSPath[:len(bloatyOutputFileGCSPath)-4] // Drop the ".tsv" extension. |
| jsonMetadataFilePath := basePath + ".json" |
| bytes, err := s.downloadFn(ctx, jsonMetadataFilePath) // We skip the GCS cache as JSON files are only downloaded once. |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| metadata := common.BloatyOutputMetadata{} |
| if err := json.Unmarshal(bytes, &metadata); err != nil { |
| return skerr.Wrap(err) |
| } |
| timestamp, err := time.Parse(time.RFC3339, metadata.Timestamp) |
| if err != nil { |
| return skerr.Wrap(err) |
| } |
| |
| // If it exists, the Bloaty size diff output file should be found at this location. |
| bloatySizeDiffOutputFileGCSPath := "" |
| if len(metadata.BloatyDiffArgs) != 0 { |
| bloatySizeDiffOutputFileGCSPath = basePath + ".diff.txt" |
| } |
| |
| // Add a new entry to the index. |
| binary := Binary{ |
| Metadata: metadata, |
| BloatyOutputFileGCSPath: bloatyOutputFileGCSPath, |
| BloatySizeDiffOutputFileGCSPath: bloatySizeDiffOutputFileGCSPath, |
| Timestamp: timestamp, |
| } |
| commitOrPatchset := CommitOrPatchset{} |
| if metadata.PatchIssue != "" || metadata.PatchSet != "" { |
| commitOrPatchset.PatchIssue = metadata.PatchIssue |
| commitOrPatchset.PatchSet = metadata.PatchSet |
| } else { |
| commitOrPatchset.Commit = metadata.Revision |
| } |
| s.binariesByCommitOrPatchset[commitOrPatchset] = append(s.binariesByCommitOrPatchset[commitOrPatchset], binary) |
| |
| return nil |
| } |
| |
| // GetMostRecentBinaries gets the most recent binaries grouped by commit or patchset. |
| func (s *Store) GetMostRecentBinaries(limit int) []BinariesFromCommitOrPatchset { |
| // Sort commits/patchsets by their timestamp in descending order. |
| var commitsOrPatchsets []CommitOrPatchset |
| for commitOrPatchset := range s.binariesByCommitOrPatchset { |
| commitsOrPatchsets = append(commitsOrPatchsets, commitOrPatchset) |
| } |
| sort.Slice(commitsOrPatchsets, func(i, j int) bool { |
| timeStampI := s.binariesByCommitOrPatchset[commitsOrPatchsets[i]][0].Timestamp |
| timeStampJ := s.binariesByCommitOrPatchset[commitsOrPatchsets[j]][0].Timestamp |
| return timeStampI.After(timeStampJ) |
| }) |
| |
| // Keep only the most recent ones. |
| if limit < len(commitsOrPatchsets) { |
| commitsOrPatchsets = commitsOrPatchsets[:limit] |
| } |
| |
| // Build return value. |
| retval := make([]BinariesFromCommitOrPatchset, len(commitsOrPatchsets)) |
| for i, commitOrPatchset := range commitsOrPatchsets { |
| retval[i] = BinariesFromCommitOrPatchset{ |
| CommitOrPatchset: commitOrPatchset, |
| Binaries: s.binariesByCommitOrPatchset[commitOrPatchset], |
| } |
| } |
| return retval |
| } |
| |
| // GetBinary returns a specific binary. |
| func (s *Store) GetBinary(commitOrPatchset CommitOrPatchset, binaryName, compileTaskName string) (Binary, bool) { |
| for _, binary := range s.binariesByCommitOrPatchset[commitOrPatchset] { |
| if binary.Metadata.BinaryName == binaryName && binary.Metadata.CompileTaskName == compileTaskName { |
| return binary, true |
| } |
| } |
| return Binary{}, false |
| } |
| |
| // GetBloatyOutputFileContents downloads and returns the raw contents of a Bloaty output file. |
| func (s *Store) GetBloatyOutputFileContents(ctx context.Context, binary Binary) ([]byte, error) { |
| return s.downloadAndCache(ctx, binary.BloatyOutputFileGCSPath) |
| } |
| |
| // GetBloatySizeDiffOutputFileContents downloads and returns the raw contents of a Bloaty size diff |
| // output file. |
| func (s *Store) GetBloatySizeDiffOutputFileContents(ctx context.Context, binary Binary) ([]byte, error) { |
| return s.downloadAndCache(ctx, binary.BloatySizeDiffOutputFileGCSPath) |
| } |
| |
| func (s *Store) downloadAndCache(ctx context.Context, path string) ([]byte, error) { |
| // Read the file from the cache. |
| if bytes, ok := s.gcsCache.Get(path); ok { |
| return bytes.([]byte), nil |
| } |
| |
| // Download the file if it wasn't cached. |
| bytes, err := s.downloadFn(ctx, path) |
| if err != nil { |
| return nil, skerr.Wrap(err) |
| } |
| |
| // Cache the downloaded file and return its contents. |
| s.gcsCache.Add(path, bytes) |
| return bytes, nil |
| } |