blob: 663e9fe3a26c6841b47a1f163a8f032c57788a42 [file]
package coverageingest
// The coverageingest package contains the code needed to download and interpret
// the results from our LLVM-based coverage tasks
import (
"bytes"
"context"
"crypto/md5"
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"cloud.google.com/go/storage"
"go.skia.org/infra/coverage/go/common"
"go.skia.org/infra/coverage/go/db"
"go.skia.org/infra/go/exec"
"go.skia.org/infra/go/fileutil"
"go.skia.org/infra/go/gcs"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/util"
"go.skia.org/infra/go/vcsinfo"
)
// Don't download the raw coverage data, which is put in a .tar.gz file for storage.
// We can't make anything out of it without the original binaries.
var INGEST_BLACKLIST = []*regexp.Regexp{regexp.MustCompile(`.+tar\.gz`)}
// The Ingester interface abstracts the logic for ingesting results from a source
// (e.g. GCS). An Ingester should not be assumed to be thread safe.
type Ingester interface {
// IngestCommits will ingest files belonging to the specified commits.
IngestCommits(context.Context, []*vcsinfo.LongCommit)
// GetResults returns everything that was ingested on the last IngestCommits() call.
GetResults() []IngestedResults
}
// The IngestedResults links information about a commit with the coverage information
// produced by a list of jobs.
type IngestedResults struct {
Commit *vcsinfo.ShortCommit `json:"info"`
Jobs []common.CoverageSummary `json:"jobs"`
TotalCoverage common.CoverageSummary `json:"combined"`
}
// The gcsingester implements the Ingester interface with Google Cloud Storage (GCS)
type gcsingester struct {
dir string
gcsClient gcs.GCSClient
results []IngestedResults
cache db.CoverageCache
resultsMutex sync.Mutex
}
// New returns an Ingester that is ready to be used.
func New(ingestionDir string, gcsClient gcs.GCSClient, cache db.CoverageCache) *gcsingester {
return &gcsingester{
gcsClient: gcsClient,
dir: ingestionDir,
cache: cache,
}
}
// The function unTar will untar and unzip a .tar.gz file to a given output path.
// This tar file is assumed to be produced by our Coverage bots, which have
// a certain format.
func unTar(ctx context.Context, tarpath, outpath string) error {
if _, err := fileutil.EnsureDirExists(outpath); err != nil {
return fmt.Errorf("Could not set up directory to tar to: %s", err)
}
return exec.Run(ctx, &exec.Command{
Name: "tar",
// Strip components 6 removes /mnt/pd0/s/w/ir/coverage_html/ from the
// tar file's internal folders.
Args: []string{"xf", tarpath, "--strip-components=6", "-C", outpath},
})
}
// The renderInfo struct contains information needed to create the combined reports.
type renderInfo struct {
outputPath string
commit string
jobName string
}
// getCoverage returns the CoverageSummary from cache or calculates it and
// puts it into the cache. If there was any error, it is returned.
func (n *gcsingester) getCoverage(cacheKey string, ri renderInfo, folders ...string) (common.CoverageSummary, error) {
if obj, ok := n.cache.CheckCache(cacheKey); ok {
return obj, nil
}
if cov, err := calculateCoverage(ri, folders...); err != nil {
return common.CoverageSummary{}, err
} else {
return cov, n.cache.StoreToCache(cacheKey, cov)
}
}
// calcuateCoverage analyzes one or more folders of coverage data and combines them together
// to get a complete picture of the coverage. It is a variable for easier mocking.
// If the renderInfo's outputPath is not "", a coverage report will be generated there
// in addition to returning the CoverageSummary.
var calculateCoverage = defaultCalculateTotalCoverage
func defaultCalculateTotalCoverage(ri renderInfo, folders ...string) (common.CoverageSummary, error) {
if len(folders) == 0 {
return common.CoverageSummary{}, nil
}
if ri.outputPath != "" {
if _, err := fileutil.EnsureDirExists(path.Join(ri.outputPath, "coverage")); err != nil {
return common.CoverageSummary{}, fmt.Errorf("Could not create output directories: %s", err)
}
}
totalLines := 0
missedLines := 0
// relPaths is a set of paths relative to the passed in folders of where
// the coverage data is.
relPaths := util.StringSet{}
// Make a list of all files in all folders. This is needed to make sure we analyze
// all the files that may be run. For example, the vulkan bots use vulkan specific
// files that do not show up in the CPU only run. So we must do this first pass
// to make sure we collect all the files that we have data for.
for _, f := range folders {
err := filepath.Walk(f, func(p string, info os.FileInfo, err error) error {
if fi, err := os.Stat(p); err != nil {
return fmt.Errorf("Could not get file info for %s: %s", p, err)
} else if fi.IsDir() {
return nil
}
relPath := strings.TrimPrefix(p, f)
relPaths[relPath] = true
return nil
})
if err != nil {
return common.CoverageSummary{}, fmt.Errorf("Error while walking directory %s: %s", f, err)
}
}
// This will hold the information needed to create the summary page, that is, the coverage
// data for each file.
summaryData := coverageSummaryTemplateData{
Commit: ri.commit,
JobName: ri.jobName,
}
// Go through all the relative files and figure out the coverage data for them.
// We union together all the data for the same relative file (e.g. the CPU config's
// coverage of DM.cpp and the GPU config's coverage of DM.cpp), then add that data
// to our total summary.
for rp, _ := range relPaths {
linesCovered := &coverageData{}
for _, f := range folders {
p := path.Join(f, rp)
contents, err := ioutil.ReadFile(p)
if err != nil {
// The file might not exist for all configurations (see the
// above vulkan example), so we simply skip a file that we don't see.
continue
}
newlyCovered := parseLinesCovered(string(contents))
linesCovered = linesCovered.Union(newlyCovered)
}
normPath, shouldSummarize := normalizePath(rp)
if !shouldSummarize {
continue
}
totalLines += linesCovered.TotalExecutable()
missedLines += linesCovered.MissedExecutable()
// Write out an html file representing the combined coverage of the file represented
// by the given relative path to ri.outputPath if ri.outputPath is defined.
if ri.outputPath != "" {
percent := "--"
if tl, ml := linesCovered.TotalExecutable(), linesCovered.MissedExecutable(); tl != 0 {
percent = fmt.Sprintf("%1.2f", 100.0*float32(tl-ml)/float32(tl))
}
summaryData.Files = append(summaryData.Files, fileSummaryTemplateData{
FileName: normPath,
CoveredLines: linesCovered.TotalExecutable() - linesCovered.MissedExecutable(),
TotalLines: linesCovered.TotalExecutable(),
PercentLines: percent,
})
dest := path.Join(ri.outputPath, "coverage", normPath+".html")
if err := fileutil.EnsureDirPathExists(dest); err != nil {
return common.CoverageSummary{}, err
}
content, err := linesCovered.ToHTMLPage(CoverageFileData{
FileName: rp,
Commit: ri.commit,
JobName: ri.jobName,
})
if err != nil {
return common.CoverageSummary{}, err
}
if err := ioutil.WriteFile(dest, []byte(content), 0644); err != nil {
return common.CoverageSummary{}, err
}
}
}
// Write out an html file summarizing the coverage of all the files if ri.outputPath
// is defined.
if ri.outputPath != "" {
// Sort for determinism and ease of reading.
sort.Sort(summaryData.Files)
b := bytes.Buffer{}
if err := HTML_TEMPLATE_SUMMARY.Execute(&b, summaryData); err != nil {
return common.CoverageSummary{}, err
}
if err := ioutil.WriteFile(path.Join(ri.outputPath, "index.html"), []byte(b.String()), 0644); err != nil {
return common.CoverageSummary{}, err
}
}
return common.CoverageSummary{TotalLines: totalLines, MissedLines: missedLines}, nil
}
// normalizePath returns the path with any unnecessary prefix stripped off.
// For example, LLVM outputs the absolute path to all these files, which includes
// the path to the source folder on the bots - we strip this off. normalizePath
// also returns true if this file should be included in our analysis (e.g. skip
// third_party).
func normalizePath(p string) (string, bool) {
p = strings.TrimPrefix(p, "/mnt/pd0/work/skia/")
// .txt sneaks on the end of these files because that's the suffix LLVM adds on
// in the .txt.tar archive of the analysis.
p = strings.TrimSuffix(p, ".txt")
// This removes things like /usr/lib/fontconfig, some created things and third_party.
// TODO(kjlubick): Keep third_party in and make it configurable from the UI what to show.
return p, !strings.HasPrefix(p, "/") && !strings.HasPrefix(p, "out") && !strings.HasPrefix(p, "third_party")
}
// IngestCommits fulfills the Ingester interface.
func (n *gcsingester) IngestCommits(ctx context.Context, commits []*vcsinfo.LongCommit) {
newResults := []IngestedResults{}
for _, c := range commits {
if _, err := fileutil.EnsureDirExists(path.Join(n.dir, c.Hash)); err != nil {
sklog.Warningf("Could not create commit directories: %s", err)
}
basePath := "commit/" + c.Hash + "/"
toDownload, err := n.getIngestableFilesFromGCS(basePath)
if err != nil {
sklog.Warningf("Problem ingesting for commit %s: %s", c, err)
continue
}
toSummarize := map[string]string{}
outer:
for _, name := range toDownload {
for _, b := range INGEST_BLACKLIST {
if b.MatchString(name) {
continue outer
}
}
// There are at least 2 parts in the name. We expect something like:
// Job.file
// Job.type.tar
parts := strings.Split(name, ".")
outpath := path.Join(n.dir, c.Hash, name)
if len(parts) == 1 {
sklog.Warningf("Unknown file to ingest: %s", name)
continue
}
// Don't re-download files that already exist
if !fileExists(outpath) {
if err := n.ingestFile(ctx, basePath, name, c.Hash); err != nil {
sklog.Warningf("Problem ingesting file: %s", err)
continue
}
}
job := parts[0]
ext := parts[1]
if ext == "text" {
// This is where the .text.tar gets extracted to.
toSummarize[job] = path.Join(n.dir, c.Hash, job, ext, "coverage")
}
}
// We go through the list of all the jobs we know of and analyze their coverage
// individually and then add them to the list to be joined together in a combined
// fashion.
jobs := common.CoverageSummarySlice{}
toCombine := []string{}
for job, folder := range toSummarize {
cov, err := n.getCoverage(makeCacheKey(c.Hash, job), renderInfo{}, folder)
if err != nil {
sklog.Warningf("Was unable to create a coverage data: %s", err)
continue
}
cov.Name = job
jobs = append(jobs, cov)
toCombine = append(toCombine, folder)
}
// Sort jobs alphabetically for determinism
sort.Sort(jobs)
sort.Strings(toCombine)
// Mimic the structure that LLVM outputs, e.g.
// .../[hash]/[name]/html/
// index.html
// coverage/
// foo.cpp.html
// bar.cpp.html
ri := renderInfo{
outputPath: path.Join(n.dir, c.Hash, "Combined", "html"),
commit: c.Hash,
jobName: "Combined",
}
totalCoverage, err := n.getCoverage(makeCacheKey(c.Hash, toCombine...), ri, toCombine...)
if err != nil {
sklog.Errorf("Was unable to create a combined summary: %s", err)
}
newResults = append(newResults, IngestedResults{Commit: c.ShortCommit, Jobs: jobs, TotalCoverage: totalCoverage})
sklog.Infof("Ingestion completed for commit %s - %s", c.ShortCommit.Hash, c.ShortCommit.Author)
}
n.resultsMutex.Lock()
defer n.resultsMutex.Unlock()
n.results = newResults
}
// makeCacheKey returns a unique key for one or more job names and a given commit.
// It is somewhat human readable.
func makeCacheKey(commit string, names ...string) string {
// for readability, if theres' one name, use it, otherwise, combine the names of the
// folders being analyzed and hash them together. This "invalidates" the cache if 2
// jobs finish and report coverage, then a 3rd finishes and is ready to be analyzed.
if len(names) == 1 {
return names[0] + ":" + commit
}
toHash := strings.Join(names, "|")
return fmt.Sprintf("Combined(%x):%s", md5.Sum([]byte(toHash)), commit)
}
// getIngestableFilesFromGCS returns the list of files to (possibly) ingest from GCS.
func (n *gcsingester) getIngestableFilesFromGCS(basePath string) ([]string, error) {
toDownload := []string{}
if err := n.gcsClient.AllFilesInDirectory(context.Background(), basePath, func(item *storage.ObjectAttrs) {
name := strings.TrimPrefix(item.Name, basePath)
toDownload = append(toDownload, name)
}); err != nil {
return nil, fmt.Errorf("Could not get ingestible files from path %s: %s", basePath, err)
}
return toDownload, nil
}
// ingestFile downloads the given file. If it is a tar file, it extracts it to a sub-folder
// based on the original file name. E.g. My-Config.text.tar -> My-Config/text/
func (n *gcsingester) ingestFile(ctx context.Context, basePath, name, commit string) error {
dl := basePath + name
if contents, err := n.gcsClient.GetFileContents(context.Background(), dl); err != nil {
return fmt.Errorf("Could not download file %s from GCS : %s", dl, err)
} else {
outpath := path.Join(n.dir, commit, name)
file, err := os.Create(outpath)
if err != nil {
return fmt.Errorf("Could not open file %s for writing", outpath)
}
defer util.Close(file)
if i, err := file.Write(contents); err != nil {
return fmt.Errorf("Could not write completely to %s. Only wrote %d bytes: %s", outpath, i, err)
}
if strings.HasSuffix(name, "tar") {
// Split My-Config-Name.type.tar into 3 parts. type is "text" or "html"
parts := strings.Split(name, ".")
if len(parts) != 3 {
return fmt.Errorf("Invalid tar name to ingest %s - must have 3 parts", name)
}
if err := unTar(ctx, outpath, path.Join(n.dir, commit, parts[0], parts[1])); err != nil {
return fmt.Errorf("Could not untar %s: %s", outpath, err)
}
}
return nil
}
}
// GetResults fulfills the Ingester interface
func (n *gcsingester) GetResults() []IngestedResults {
n.resultsMutex.Lock()
defer n.resultsMutex.Unlock()
return n.results
}
// fileExists is a helper function that returns true if a file already exists at the given path.
func fileExists(path string) bool {
if _, err := os.Stat(path); os.IsNotExist(err) {
return false
} else if err != nil {
sklog.Warningf("Error getting file info about %s: %s", path, err)
return false
} else {
return true
}
}