coverage/go/coverageingest/ingester.go - buildbot - Git at Google

 package coverageingest

 // The coverageingest package contains the code needed to download and interpret
 // the results from our LLVM-based coverage tasks

 import (
 	"bytes"
 	"context"
 	"crypto/md5"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path"
 	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
 	"sync"

 	"cloud.google.com/go/storage"
 	"go.skia.org/infra/coverage/go/common"
 	"go.skia.org/infra/coverage/go/db"
 	"go.skia.org/infra/go/exec"
 	"go.skia.org/infra/go/fileutil"
 	"go.skia.org/infra/go/gcs"
 	"go.skia.org/infra/go/sklog"
 	"go.skia.org/infra/go/util"
 	"go.skia.org/infra/go/vcsinfo"
 )

 // Don't download the raw coverage data, which is put in a .tar.gz file for storage.
 // We can't make anything out of it without the original binaries.
 var INGEST_BLACKLIST = []*regexp.Regexp{regexp.MustCompile(`.+tar\.gz`)}

 // The Ingester interface abstracts the logic for ingesting results from a source
 // (e.g. GCS).  An Ingester should not be assumed to be thread safe.
 type Ingester interface {
 	// IngestCommits will ingest files belonging to the specified commits.
 	IngestCommits(context.Context, []*vcsinfo.LongCommit)

 	// GetResults returns everything that was ingested on the last IngestCommits() call.
 	GetResults() []IngestedResults
 }

 // The IngestedResults links information about a commit with the coverage information
 // produced by a list of jobs.
 type IngestedResults struct {
 	Commit        *vcsinfo.ShortCommit     `json:"info"`
 	Jobs          []common.CoverageSummary `json:"jobs"`
 	TotalCoverage common.CoverageSummary   `json:"combined"`
 }

 // The gcsingester implements the Ingester interface with Google Cloud Storage (GCS)
 type gcsingester struct {
 	dir          string
 	gcsClient    gcs.GCSClient
 	results      []IngestedResults
 	cache        db.CoverageCache
 	resultsMutex sync.Mutex
 }

 // New returns an Ingester that is ready to be used.
 func New(ingestionDir string, gcsClient gcs.GCSClient, cache db.CoverageCache) *gcsingester {
 	return &gcsingester{
 		gcsClient: gcsClient,
 		dir:       ingestionDir,
 		cache:     cache,
 	}
 }

 // The function unTar will untar and unzip a .tar.gz file to a given output path.
 // This tar file is assumed to be produced by our Coverage bots, which have
 // a certain format.
 func unTar(ctx context.Context, tarpath, outpath string) error {
 	if _, err := fileutil.EnsureDirExists(outpath); err != nil {
 		return fmt.Errorf("Could not set up directory to tar to: %s", err)
 	}
 	return exec.Run(ctx, &exec.Command{
 		Name: "tar",
 		// Strip components 6 removes /mnt/pd0/s/w/ir/coverage_html/ from the
 		// tar file's internal folders.
 		Args: []string{"xf", tarpath, "--strip-components=6", "-C", outpath},
 	})
 }

 // The renderInfo struct contains information needed to create the combined reports.
 type renderInfo struct {
 	outputPath string
 	commit     string
 	jobName    string
 }

 // getCoverage returns the CoverageSummary from cache or calculates it and
 // puts it into the cache. If there was any error, it is returned.
 func (n *gcsingester) getCoverage(cacheKey string, ri renderInfo, folders ...string) (common.CoverageSummary, error) {
 	if obj, ok := n.cache.CheckCache(cacheKey); ok {
 		return obj, nil
 	}
 	if cov, err := calculateCoverage(ri, folders...); err != nil {
 		return common.CoverageSummary{}, err
 	} else {
 		return cov, n.cache.StoreToCache(cacheKey, cov)
 	}
 }

 // calcuateCoverage analyzes one or more folders of coverage data and combines them together
 // to get a complete picture of the coverage. It is a variable for easier mocking.
 // If the renderInfo's outputPath is not "", a coverage report will be generated there
 // in addition to returning the CoverageSummary.
 var calculateCoverage = defaultCalculateTotalCoverage

 func defaultCalculateTotalCoverage(ri renderInfo, folders ...string) (common.CoverageSummary, error) {
 	if len(folders) == 0 {
 		return common.CoverageSummary{}, nil
 	}
 	if ri.outputPath != "" {
 		if _, err := fileutil.EnsureDirExists(path.Join(ri.outputPath, "coverage")); err != nil {
 			return common.CoverageSummary{}, fmt.Errorf("Could not create output directories: %s", err)
 		}
 	}
 	totalLines := 0
 	missedLines := 0

 	// relPaths is a set of paths relative to the passed in folders of where
 	// the coverage data is.
 	relPaths := util.StringSet{}

 	// Make a list of all files in all folders.  This is needed to make sure we analyze
 	// all the files that may be run.  For example, the vulkan bots use vulkan specific
 	// files that do not show up in the CPU only run.  So we must do this first pass
 	// to make sure we collect all the files that we have data for.
 	for _, f := range folders {
 		err := filepath.Walk(f, func(p string, info os.FileInfo, err error) error {
 			if fi, err := os.Stat(p); err != nil {
 				return fmt.Errorf("Could not get file info for %s: %s", p, err)
 			} else if fi.IsDir() {
 				return nil
 			}
 			relPath := strings.TrimPrefix(p, f)
 			relPaths[relPath] = true
 			return nil
 		})
 		if err != nil {
 			return common.CoverageSummary{}, fmt.Errorf("Error while walking directory %s: %s", f, err)
 		}
 	}

 	// This will hold the information needed to create the summary page, that is, the coverage
 	// data for each file.
 	summaryData := coverageSummaryTemplateData{
 		Commit:  ri.commit,
 		JobName: ri.jobName,
 	}

 	// Go through all the relative files and figure out the coverage data for them.
 	// We union together all the data for the same relative file (e.g. the CPU config's
 	// coverage of DM.cpp and the GPU config's coverage of DM.cpp), then add that data
 	// to our total summary.
 	for rp, _ := range relPaths {
 		linesCovered := &coverageData{}
 		for _, f := range folders {
 			p := path.Join(f, rp)
 			contents, err := ioutil.ReadFile(p)
 			if err != nil {
 				// The file might not exist for all configurations (see the
 				// above vulkan example), so we simply skip a file that we don't see.
 				continue
 			}
 			newlyCovered := parseLinesCovered(string(contents))
 			linesCovered = linesCovered.Union(newlyCovered)
 		}

 		normPath, shouldSummarize := normalizePath(rp)
 		if !shouldSummarize {
 			continue
 		}
 		totalLines += linesCovered.TotalExecutable()
 		missedLines += linesCovered.MissedExecutable()

 		// Write out an html file representing the combined coverage of the file represented
 		// by the given relative path to ri.outputPath if ri.outputPath is defined.
 		if ri.outputPath != "" {
 			percent := "--"
 			if tl, ml := linesCovered.TotalExecutable(), linesCovered.MissedExecutable(); tl != 0 {
 				percent = fmt.Sprintf("%1.2f", 100.0*float32(tl-ml)/float32(tl))
 			}

 			summaryData.Files = append(summaryData.Files, fileSummaryTemplateData{
 				FileName:     normPath,
 				CoveredLines: linesCovered.TotalExecutable() - linesCovered.MissedExecutable(),
 				TotalLines:   linesCovered.TotalExecutable(),
 				PercentLines: percent,
 			})

 			dest := path.Join(ri.outputPath, "coverage", normPath+".html")
 			if err := fileutil.EnsureDirPathExists(dest); err != nil {
 				return common.CoverageSummary{}, err
 			}
 			content, err := linesCovered.ToHTMLPage(CoverageFileData{
 				FileName: rp,
 				Commit:   ri.commit,
 				JobName:  ri.jobName,
 			})
 			if err != nil {
 				return common.CoverageSummary{}, err
 			}
 			if err := ioutil.WriteFile(dest, []byte(content), 0644); err != nil {
 				return common.CoverageSummary{}, err
 			}
 		}
 	}

 	// Write out an html file summarizing the coverage of all the files if ri.outputPath
 	// is defined.
 	if ri.outputPath != "" {
 		// Sort for determinism and ease of reading.
 		sort.Sort(summaryData.Files)
 		b := bytes.Buffer{}
 		if err := HTML_TEMPLATE_SUMMARY.Execute(&b, summaryData); err != nil {
 			return common.CoverageSummary{}, err
 		}
 		if err := ioutil.WriteFile(path.Join(ri.outputPath, "index.html"), []byte(b.String()), 0644); err != nil {
 			return common.CoverageSummary{}, err
 		}
 	}

 	return common.CoverageSummary{TotalLines: totalLines, MissedLines: missedLines}, nil
 }

 // normalizePath returns the path with any unnecessary prefix stripped off.
 // For example, LLVM outputs the absolute path to all these files, which includes
 // the path to the source folder on the bots - we strip this off. normalizePath
 // also returns true if this file should be included in our analysis (e.g. skip
 // third_party).
 func normalizePath(p string) (string, bool) {
 	p = strings.TrimPrefix(p, "/mnt/pd0/work/skia/")
 	// .txt sneaks on the end of these files because that's the suffix LLVM adds on
 	// in the .txt.tar archive of the analysis.
 	p = strings.TrimSuffix(p, ".txt")
 	// This removes things like /usr/lib/fontconfig, some created things and third_party.
 	// TODO(kjlubick): Keep third_party in and make it configurable from the UI what to show.
 	return p, !strings.HasPrefix(p, "/") && !strings.HasPrefix(p, "out") && !strings.HasPrefix(p, "third_party")
 }

 // IngestCommits fulfills the Ingester interface.
 func (n *gcsingester) IngestCommits(ctx context.Context, commits []*vcsinfo.LongCommit) {
 	newResults := []IngestedResults{}
 	for _, c := range commits {
 		if _, err := fileutil.EnsureDirExists(path.Join(n.dir, c.Hash)); err != nil {
 			sklog.Warningf("Could not create commit directories: %s", err)
 		}

 		basePath := "commit/" + c.Hash + "/"
 		toDownload, err := n.getIngestableFilesFromGCS(basePath)
 		if err != nil {
 			sklog.Warningf("Problem ingesting for commit %s: %s", c, err)
 			continue
 		}
 		toSummarize := map[string]string{}
 	outer:
 		for _, name := range toDownload {
 			for _, b := range INGEST_BLACKLIST {
 				if b.MatchString(name) {
 					continue outer
 				}
 			}
 			// There are at least 2 parts in the name. We expect something like:
 			// Job.file
 			// Job.type.tar
 			parts := strings.Split(name, ".")
 			outpath := path.Join(n.dir, c.Hash, name)
 			if len(parts) == 1 {
 				sklog.Warningf("Unknown file to ingest: %s", name)
 				continue
 			}
 			// Don't re-download files that already exist
 			if !fileExists(outpath) {
 				if err := n.ingestFile(ctx, basePath, name, c.Hash); err != nil {
 					sklog.Warningf("Problem ingesting file: %s", err)
 					continue
 				}
 			}
 			job := parts[0]
 			ext := parts[1]
 			if ext == "text" {
 				// This is where the .text.tar gets extracted to.
 				toSummarize[job] = path.Join(n.dir, c.Hash, job, ext, "coverage")
 			}
 		}
 		// We go through the list of all the jobs we know of and analyze their coverage
 		// individually and then add them to the list to be joined together in a combined
 		// fashion.
 		jobs := common.CoverageSummarySlice{}
 		toCombine := []string{}
 		for job, folder := range toSummarize {
 			cov, err := n.getCoverage(makeCacheKey(c.Hash, job), renderInfo{}, folder)
 			if err != nil {
 				sklog.Warningf("Was unable to create a coverage data: %s", err)
 				continue
 			}
 			cov.Name = job
 			jobs = append(jobs, cov)
 			toCombine = append(toCombine, folder)
 		}
 		// Sort jobs alphabetically for determinism
 		sort.Sort(jobs)
 		sort.Strings(toCombine)

 		// Mimic the structure that LLVM outputs, e.g.
 		// .../[hash]/[name]/html/
 		//                        index.html
 		//                        coverage/
 		//                                 foo.cpp.html
 		//                                 bar.cpp.html
 		ri := renderInfo{
 			outputPath: path.Join(n.dir, c.Hash, "Combined", "html"),
 			commit:     c.Hash,
 			jobName:    "Combined",
 		}

 		totalCoverage, err := n.getCoverage(makeCacheKey(c.Hash, toCombine...), ri, toCombine...)
 		if err != nil {
 			sklog.Errorf("Was unable to create a combined summary: %s", err)
 		}
 		newResults = append(newResults, IngestedResults{Commit: c.ShortCommit, Jobs: jobs, TotalCoverage: totalCoverage})
 		sklog.Infof("Ingestion completed for commit %s - %s", c.ShortCommit.Hash, c.ShortCommit.Author)
 	}
 	n.resultsMutex.Lock()
 	defer n.resultsMutex.Unlock()
 	n.results = newResults
 }

 // makeCacheKey returns a unique key for one or more job names and a given commit.
 // It is somewhat human readable.
 func makeCacheKey(commit string, names ...string) string {
 	// for readability, if theres' one name, use it, otherwise, combine the names of the
 	// folders being analyzed and hash them together.  This "invalidates" the cache if 2
 	// jobs finish and report coverage, then a 3rd finishes and is ready to be analyzed.
 	if len(names) == 1 {
 		return names[0] + ":" + commit
 	}
 	toHash := strings.Join(names, "|")
 	return fmt.Sprintf("Combined(%x):%s", md5.Sum([]byte(toHash)), commit)
 }

 // getIngestableFilesFromGCS returns the list of files to (possibly) ingest from GCS.
 func (n *gcsingester) getIngestableFilesFromGCS(basePath string) ([]string, error) {
 	toDownload := []string{}
 	if err := n.gcsClient.AllFilesInDirectory(context.Background(), basePath, func(item *storage.ObjectAttrs) {
 		name := strings.TrimPrefix(item.Name, basePath)
 		toDownload = append(toDownload, name)
 	}); err != nil {
 		return nil, fmt.Errorf("Could not get ingestible files from path %s: %s", basePath, err)
 	}
 	return toDownload, nil
 }

 // ingestFile downloads the given file. If it is a tar file, it extracts it to a sub-folder
 // based on the original file name.  E.g. My-Config.text.tar -> My-Config/text/
 func (n *gcsingester) ingestFile(ctx context.Context, basePath, name, commit string) error {
 	dl := basePath + name
 	if contents, err := n.gcsClient.GetFileContents(context.Background(), dl); err != nil {
 		return fmt.Errorf("Could not download file %s from GCS : %s", dl, err)

 	} else {
 		outpath := path.Join(n.dir, commit, name)
 		file, err := os.Create(outpath)
 		if err != nil {
 			return fmt.Errorf("Could not open file %s for writing", outpath)
 		}
 		defer util.Close(file)
 		if i, err := file.Write(contents); err != nil {
 			return fmt.Errorf("Could not write completely to %s. Only wrote %d bytes: %s", outpath, i, err)
 		}

 		if strings.HasSuffix(name, "tar") {
 			// Split My-Config-Name.type.tar into 3 parts.  type is "text" or "html"
 			parts := strings.Split(name, ".")
 			if len(parts) != 3 {
 				return fmt.Errorf("Invalid tar name to ingest %s - must have 3 parts", name)
 			}
 			if err := unTar(ctx, outpath, path.Join(n.dir, commit, parts[0], parts[1])); err != nil {
 				return fmt.Errorf("Could not untar %s: %s", outpath, err)
 			}
 		}
 		return nil
 	}
 }

 // GetResults fulfills the Ingester interface
 func (n *gcsingester) GetResults() []IngestedResults {
 	n.resultsMutex.Lock()
 	defer n.resultsMutex.Unlock()
 	return n.results
 }

 // fileExists is a helper function that returns true if a file already exists at the given path.
 func fileExists(path string) bool {
 	if _, err := os.Stat(path); os.IsNotExist(err) {
 		return false
 	} else if err != nil {
 		sklog.Warningf("Error getting file info about %s: %s", path, err)
 		return false
 	} else {
 		return true
 	}
 }
	package coverageingest

	// The coverageingest package contains the code needed to download and interpret
	// the results from our LLVM-based coverage tasks

	import (
	"bytes"
	"context"
	"crypto/md5"
	"fmt"
	"io/ioutil"
	"os"
	"path"
	"path/filepath"
	"regexp"
	"sort"
	"strings"
	"sync"

	"cloud.google.com/go/storage"
	"go.skia.org/infra/coverage/go/common"
	"go.skia.org/infra/coverage/go/db"
	"go.skia.org/infra/go/exec"
	"go.skia.org/infra/go/fileutil"
	"go.skia.org/infra/go/gcs"
	"go.skia.org/infra/go/sklog"
	"go.skia.org/infra/go/util"
	"go.skia.org/infra/go/vcsinfo"
	)

	// Don't download the raw coverage data, which is put in a .tar.gz file for storage.
	// We can't make anything out of it without the original binaries.
	var INGEST_BLACKLIST = []*regexp.Regexp{regexp.MustCompile(`.+tar\.gz`)}

	// The Ingester interface abstracts the logic for ingesting results from a source
	// (e.g. GCS). An Ingester should not be assumed to be thread safe.
	type Ingester interface {
	// IngestCommits will ingest files belonging to the specified commits.
	IngestCommits(context.Context, []*vcsinfo.LongCommit)

	// GetResults returns everything that was ingested on the last IngestCommits() call.
	GetResults() []IngestedResults
	}

	// The IngestedResults links information about a commit with the coverage information
	// produced by a list of jobs.
	type IngestedResults struct {
	Commit *vcsinfo.ShortCommit `json:"info"`
	Jobs []common.CoverageSummary `json:"jobs"`
	TotalCoverage common.CoverageSummary `json:"combined"`
	}

	// The gcsingester implements the Ingester interface with Google Cloud Storage (GCS)
	type gcsingester struct {
	dir string
	gcsClient gcs.GCSClient
	results []IngestedResults
	cache db.CoverageCache
	resultsMutex sync.Mutex
	}

	// New returns an Ingester that is ready to be used.
	func New(ingestionDir string, gcsClient gcs.GCSClient, cache db.CoverageCache) *gcsingester {
	return &gcsingester{
	gcsClient: gcsClient,
	dir: ingestionDir,
	cache: cache,
	}
	}

	// The function unTar will untar and unzip a .tar.gz file to a given output path.
	// This tar file is assumed to be produced by our Coverage bots, which have
	// a certain format.
	func unTar(ctx context.Context, tarpath, outpath string) error {
	if _, err := fileutil.EnsureDirExists(outpath); err != nil {
	return fmt.Errorf("Could not set up directory to tar to: %s", err)
	}
	return exec.Run(ctx, &exec.Command{
	Name: "tar",
	// Strip components 6 removes /mnt/pd0/s/w/ir/coverage_html/ from the
	// tar file's internal folders.
	Args: []string{"xf", tarpath, "--strip-components=6", "-C", outpath},
	})
	}

	// The renderInfo struct contains information needed to create the combined reports.
	type renderInfo struct {
	outputPath string
	commit string
	jobName string
	}

	// getCoverage returns the CoverageSummary from cache or calculates it and
	// puts it into the cache. If there was any error, it is returned.
	func (n *gcsingester) getCoverage(cacheKey string, ri renderInfo, folders ...string) (common.CoverageSummary, error) {
	if obj, ok := n.cache.CheckCache(cacheKey); ok {
	return obj, nil
	}
	if cov, err := calculateCoverage(ri, folders...); err != nil {
	return common.CoverageSummary{}, err
	} else {
	return cov, n.cache.StoreToCache(cacheKey, cov)
	}
	}

	// calcuateCoverage analyzes one or more folders of coverage data and combines them together
	// to get a complete picture of the coverage. It is a variable for easier mocking.
	// If the renderInfo's outputPath is not "", a coverage report will be generated there
	// in addition to returning the CoverageSummary.
	var calculateCoverage = defaultCalculateTotalCoverage

	func defaultCalculateTotalCoverage(ri renderInfo, folders ...string) (common.CoverageSummary, error) {
	if len(folders) == 0 {
	return common.CoverageSummary{}, nil
	}
	if ri.outputPath != "" {
	if _, err := fileutil.EnsureDirExists(path.Join(ri.outputPath, "coverage")); err != nil {
	return common.CoverageSummary{}, fmt.Errorf("Could not create output directories: %s", err)
	}
	}
	totalLines := 0
	missedLines := 0

	// relPaths is a set of paths relative to the passed in folders of where
	// the coverage data is.
	relPaths := util.StringSet{}

	// Make a list of all files in all folders. This is needed to make sure we analyze
	// all the files that may be run. For example, the vulkan bots use vulkan specific
	// files that do not show up in the CPU only run. So we must do this first pass
	// to make sure we collect all the files that we have data for.
	for _, f := range folders {
	err := filepath.Walk(f, func(p string, info os.FileInfo, err error) error {
	if fi, err := os.Stat(p); err != nil {
	return fmt.Errorf("Could not get file info for %s: %s", p, err)
	} else if fi.IsDir() {
	return nil
	}
	relPath := strings.TrimPrefix(p, f)
	relPaths[relPath] = true
	return nil
	})
	if err != nil {
	return common.CoverageSummary{}, fmt.Errorf("Error while walking directory %s: %s", f, err)
	}
	}

	// This will hold the information needed to create the summary page, that is, the coverage
	// data for each file.
	summaryData := coverageSummaryTemplateData{
	Commit: ri.commit,
	JobName: ri.jobName,
	}

	// Go through all the relative files and figure out the coverage data for them.
	// We union together all the data for the same relative file (e.g. the CPU config's
	// coverage of DM.cpp and the GPU config's coverage of DM.cpp), then add that data
	// to our total summary.
	for rp, _ := range relPaths {
	linesCovered := &coverageData{}
	for _, f := range folders {
	p := path.Join(f, rp)
	contents, err := ioutil.ReadFile(p)
	if err != nil {
	// The file might not exist for all configurations (see the
	// above vulkan example), so we simply skip a file that we don't see.
	continue
	}
	newlyCovered := parseLinesCovered(string(contents))
	linesCovered = linesCovered.Union(newlyCovered)
	}

	normPath, shouldSummarize := normalizePath(rp)
	if !shouldSummarize {
	continue
	}
	totalLines += linesCovered.TotalExecutable()
	missedLines += linesCovered.MissedExecutable()

	// Write out an html file representing the combined coverage of the file represented
	// by the given relative path to ri.outputPath if ri.outputPath is defined.
	if ri.outputPath != "" {
	percent := "--"
	if tl, ml := linesCovered.TotalExecutable(), linesCovered.MissedExecutable(); tl != 0 {
	percent = fmt.Sprintf("%1.2f", 100.0*float32(tl-ml)/float32(tl))
	}

	summaryData.Files = append(summaryData.Files, fileSummaryTemplateData{
	FileName: normPath,
	CoveredLines: linesCovered.TotalExecutable() - linesCovered.MissedExecutable(),
	TotalLines: linesCovered.TotalExecutable(),
	PercentLines: percent,
	})

	dest := path.Join(ri.outputPath, "coverage", normPath+".html")
	if err := fileutil.EnsureDirPathExists(dest); err != nil {
	return common.CoverageSummary{}, err
	}
	content, err := linesCovered.ToHTMLPage(CoverageFileData{
	FileName: rp,
	Commit: ri.commit,
	JobName: ri.jobName,
	})
	if err != nil {
	return common.CoverageSummary{}, err
	}
	if err := ioutil.WriteFile(dest, []byte(content), 0644); err != nil {
	return common.CoverageSummary{}, err
	}
	}
	}

	// Write out an html file summarizing the coverage of all the files if ri.outputPath
	// is defined.
	if ri.outputPath != "" {
	// Sort for determinism and ease of reading.
	sort.Sort(summaryData.Files)
	b := bytes.Buffer{}
	if err := HTML_TEMPLATE_SUMMARY.Execute(&b, summaryData); err != nil {
	return common.CoverageSummary{}, err
	}
	if err := ioutil.WriteFile(path.Join(ri.outputPath, "index.html"), []byte(b.String()), 0644); err != nil {
	return common.CoverageSummary{}, err
	}
	}

	return common.CoverageSummary{TotalLines: totalLines, MissedLines: missedLines}, nil
	}

	// normalizePath returns the path with any unnecessary prefix stripped off.
	// For example, LLVM outputs the absolute path to all these files, which includes
	// the path to the source folder on the bots - we strip this off. normalizePath
	// also returns true if this file should be included in our analysis (e.g. skip
	// third_party).
	func normalizePath(p string) (string, bool) {
	p = strings.TrimPrefix(p, "/mnt/pd0/work/skia/")
	// .txt sneaks on the end of these files because that's the suffix LLVM adds on
	// in the .txt.tar archive of the analysis.
	p = strings.TrimSuffix(p, ".txt")
	// This removes things like /usr/lib/fontconfig, some created things and third_party.
	// TODO(kjlubick): Keep third_party in and make it configurable from the UI what to show.
	return p, !strings.HasPrefix(p, "/") && !strings.HasPrefix(p, "out") && !strings.HasPrefix(p, "third_party")
	}

	// IngestCommits fulfills the Ingester interface.
	func (n gcsingester) IngestCommits(ctx context.Context, commits []vcsinfo.LongCommit) {
	newResults := []IngestedResults{}
	for _, c := range commits {
	if _, err := fileutil.EnsureDirExists(path.Join(n.dir, c.Hash)); err != nil {
	sklog.Warningf("Could not create commit directories: %s", err)
	}

	basePath := "commit/" + c.Hash + "/"
	toDownload, err := n.getIngestableFilesFromGCS(basePath)
	if err != nil {
	sklog.Warningf("Problem ingesting for commit %s: %s", c, err)
	continue
	}
	toSummarize := map[string]string{}
	outer:
	for _, name := range toDownload {
	for _, b := range INGEST_BLACKLIST {
	if b.MatchString(name) {
	continue outer
	}
	}
	// There are at least 2 parts in the name. We expect something like:
	// Job.file
	// Job.type.tar
	parts := strings.Split(name, ".")
	outpath := path.Join(n.dir, c.Hash, name)
	if len(parts) == 1 {
	sklog.Warningf("Unknown file to ingest: %s", name)
	continue
	}
	// Don't re-download files that already exist
	if !fileExists(outpath) {
	if err := n.ingestFile(ctx, basePath, name, c.Hash); err != nil {
	sklog.Warningf("Problem ingesting file: %s", err)
	continue
	}
	}
	job := parts[0]
	ext := parts[1]
	if ext == "text" {
	// This is where the .text.tar gets extracted to.
	toSummarize[job] = path.Join(n.dir, c.Hash, job, ext, "coverage")
	}
	}
	// We go through the list of all the jobs we know of and analyze their coverage
	// individually and then add them to the list to be joined together in a combined
	// fashion.
	jobs := common.CoverageSummarySlice{}
	toCombine := []string{}
	for job, folder := range toSummarize {
	cov, err := n.getCoverage(makeCacheKey(c.Hash, job), renderInfo{}, folder)
	if err != nil {
	sklog.Warningf("Was unable to create a coverage data: %s", err)
	continue
	}
	cov.Name = job
	jobs = append(jobs, cov)
	toCombine = append(toCombine, folder)
	}
	// Sort jobs alphabetically for determinism
	sort.Sort(jobs)
	sort.Strings(toCombine)

	// Mimic the structure that LLVM outputs, e.g.
	// .../[hash]/[name]/html/
	// index.html
	// coverage/
	// foo.cpp.html
	// bar.cpp.html
	ri := renderInfo{
	outputPath: path.Join(n.dir, c.Hash, "Combined", "html"),
	commit: c.Hash,
	jobName: "Combined",
	}

	totalCoverage, err := n.getCoverage(makeCacheKey(c.Hash, toCombine...), ri, toCombine...)
	if err != nil {
	sklog.Errorf("Was unable to create a combined summary: %s", err)
	}
	newResults = append(newResults, IngestedResults{Commit: c.ShortCommit, Jobs: jobs, TotalCoverage: totalCoverage})
	sklog.Infof("Ingestion completed for commit %s - %s", c.ShortCommit.Hash, c.ShortCommit.Author)
	}
	n.resultsMutex.Lock()
	defer n.resultsMutex.Unlock()
	n.results = newResults
	}

	// makeCacheKey returns a unique key for one or more job names and a given commit.
	// It is somewhat human readable.
	func makeCacheKey(commit string, names ...string) string {
	// for readability, if theres' one name, use it, otherwise, combine the names of the
	// folders being analyzed and hash them together. This "invalidates" the cache if 2
	// jobs finish and report coverage, then a 3rd finishes and is ready to be analyzed.
	if len(names) == 1 {
	return names[0] + ":" + commit
	}
	toHash := strings.Join(names, "\|")
	return fmt.Sprintf("Combined(%x):%s", md5.Sum([]byte(toHash)), commit)
	}

	// getIngestableFilesFromGCS returns the list of files to (possibly) ingest from GCS.
	func (n *gcsingester) getIngestableFilesFromGCS(basePath string) ([]string, error) {
	toDownload := []string{}
	if err := n.gcsClient.AllFilesInDirectory(context.Background(), basePath, func(item *storage.ObjectAttrs) {
	name := strings.TrimPrefix(item.Name, basePath)
	toDownload = append(toDownload, name)
	}); err != nil {
	return nil, fmt.Errorf("Could not get ingestible files from path %s: %s", basePath, err)
	}
	return toDownload, nil
	}

	// ingestFile downloads the given file. If it is a tar file, it extracts it to a sub-folder
	// based on the original file name. E.g. My-Config.text.tar -> My-Config/text/
	func (n *gcsingester) ingestFile(ctx context.Context, basePath, name, commit string) error {
	dl := basePath + name
	if contents, err := n.gcsClient.GetFileContents(context.Background(), dl); err != nil {
	return fmt.Errorf("Could not download file %s from GCS : %s", dl, err)

	} else {
	outpath := path.Join(n.dir, commit, name)
	file, err := os.Create(outpath)
	if err != nil {
	return fmt.Errorf("Could not open file %s for writing", outpath)
	}
	defer util.Close(file)
	if i, err := file.Write(contents); err != nil {
	return fmt.Errorf("Could not write completely to %s. Only wrote %d bytes: %s", outpath, i, err)
	}

	if strings.HasSuffix(name, "tar") {
	// Split My-Config-Name.type.tar into 3 parts. type is "text" or "html"
	parts := strings.Split(name, ".")
	if len(parts) != 3 {
	return fmt.Errorf("Invalid tar name to ingest %s - must have 3 parts", name)
	}
	if err := unTar(ctx, outpath, path.Join(n.dir, commit, parts[0], parts[1])); err != nil {
	return fmt.Errorf("Could not untar %s: %s", outpath, err)
	}
	}
	return nil
	}
	}

	// GetResults fulfills the Ingester interface
	func (n *gcsingester) GetResults() []IngestedResults {
	n.resultsMutex.Lock()
	defer n.resultsMutex.Unlock()
	return n.results
	}

	// fileExists is a helper function that returns true if a file already exists at the given path.
	func fileExists(path string) bool {
	if _, err := os.Stat(path); os.IsNotExist(err) {
	return false
	} else if err != nil {
	sklog.Warningf("Error getting file info about %s: %s", path, err)
	return false
	} else {
	return true
	}
	}