blob: 833b2de808a046a6fba842fbd9a3910d6a899e99 [file] [log] [blame]
package buildbot
import (
"encoding/json"
"fmt"
"strings"
"sync"
"time"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/git/repograph"
"go.skia.org/infra/go/httputils"
"go.skia.org/infra/go/metrics2"
"go.skia.org/infra/go/util"
)
const (
MAX_BLAMELIST_COMMITS = 500
)
var (
// BUILD_BLACKLIST is a set of builds which, for one reason or another,
// we want to skip during ingestion. Typically this means that there is
// something wrong with the build which prevents it from being ingested
// properly.
BUILD_BLACKLIST = map[string]map[int]bool{
"Build-Mac10.9-Clang-x86_64-Debug": {
5222: true, // This build doesn't exist on the server.
},
"Build-Mac10.9-Clang-x86_64-Release": {
5207: true, // This build doesn't exist on the server.
},
"Build-Mac10.9-Clang-x86_64-Release-CMake": {
891: true, // This build doesn't exist on the server.
},
// Something went haywire with this, don't know what. -dogben
"Build-Ubuntu-GCC-x86-Release": {
2586: true,
},
"Perf-Android-GCC-Nexus7-GPU-Tegra3-Arm7-Release-BuildBucket": {
1: true, // Cannot be ingested because its repo is "???"
},
"Perf-Ubuntu-GCC-ShuttleA-GPU-GTX660-x86_64-Release-ANGLE": {
350: true, // This bot was removed before this build finished ingesting.
},
"Perf-Ubuntu-GCC-ShuttleA-GPU-GTX660-x86_64-Release-VisualBench": {
0: true, // Wrong repo.
2: true, // Wrong repo.
3: true, // Wrong repo.
},
// This bot was removed before these build finished ingesting.
"Perf-Win8-MSVC-ShuttleB-CPU-AVX2-x86_64-Release-Swarming": {
12510: true,
12511: true,
},
"Linux Tests": {
// For some reason, these builds don't exist on the server.
2872: true,
2920: true,
2995: true,
3144: true,
3193: true,
3197: true,
},
"Mac10.9 Tests": {
1727: true, // This build doesn't exist on the server.
},
// This bot was removed before these build finished ingesting.
"Test-Win8-MSVC-ShuttleB-CPU-AVX2-x86_64-Release-Swarming": {
12588: true,
12589: true,
12590: true,
},
"Win7 Tests (1)": {
1797: true, // This build doesn't exist on the server?
},
// This bot was removed before the build finished ingesting.
"Test-Ubuntu-GCC-ShuttleA-GPU-GTX550Ti-x86_64-Release-SwarmingValgrind": {
107: true,
},
}
// TODO(borenet): Avoid hard-coding this list. Instead, obtain it from
// checked-in code or the set of masters which are actually running.
MASTER_NAMES = []string{"client.skia", "client.skia.android", "client.skia.compile", "client.skia.fyi"}
httpClient = httputils.NewTimeoutClient()
)
// get loads data from a buildbot JSON endpoint.
func get(url string, rv interface{}) error {
resp, err := httpClient.Get(url)
if err != nil {
return fmt.Errorf("Failed to GET %s: %s", url, err)
}
defer util.Close(resp.Body)
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(rv); err != nil {
return fmt.Errorf("Failed to decode JSON: %s", err)
}
return nil
}
// findCommitsRecursive is a recursive function called by FindCommitsForBuild.
// It traces the history to find builds which were first included in the given
// build.
func findCommitsRecursive(db DB, commits map[string]bool, b *Build, commit *repograph.Commit, stealFrom int, stolen []string) (map[string]bool, int, []string, error) {
// Shortcut in case we missed this case before; if this is the first
// build on this bot which has a valid GotRevision, the blamelist will
// be the entire Git history. If we find too many commits, assume we've
// hit this case and just return the GotRevision as the blamelist.
if len(commits) > MAX_BLAMELIST_COMMITS && stealFrom == -1 {
return map[string]bool{b.GotRevision: true}, -1, []string{}, nil
}
// Determine whether any build already includes this commit.
n, err := db.GetBuildNumberForCommit(b.Master, b.Builder, commit.Hash)
if err != nil {
return commits, stealFrom, stolen, fmt.Errorf("Could not find build for commit %s: %s", commit.Hash, err)
}
// If we're stealing commits from a previous build but the current
// commit is not in any build's blamelist, we must have scrolled past
// the beginning of the builds. Just return.
if n < 0 && stealFrom >= 0 {
return commits, stealFrom, stolen, nil
}
// If a previous build already included this commit, we have to make a decision.
if n >= 0 {
// If the build we found is the current build, keep going,
// since we may have already ingested data for this build but still
// need to find accurate revision data.
if n != b.Number {
// If this Build's GotRevision is already included in a different
// Build, then we're "inserting" this one in between two already-ingested
// Builds. In that case, this build is providing "better" information
// on the already-claimed commits, so we steal them from the other Build.
if commit.Hash == b.GotRevision {
stealFrom = n
// Another shortcut: If our GotRevision is the same as the
// GotRevision of the Build we're stealing commits from,
// ie. both builds ran at the same commit, just take all of
// its commits without doing any more work.
stealFromBuild, err := db.GetBuildFromDB(b.Master, b.Builder, stealFrom)
if err != nil {
return commits, stealFrom, stolen, fmt.Errorf("Could not retrieve build: %s", err)
}
if stealFromBuild.GotRevision == b.GotRevision && stealFromBuild.Number < b.Number {
commits = map[string]bool{}
for _, c := range stealFromBuild.Commits {
commits[c] = true
}
return commits, stealFrom, stealFromBuild.Commits, nil
}
}
if stealFrom == n {
// Continue stealing commits from the older build.
stolen = append(stolen, commit.Hash)
} else {
// If we've hit a commit belonging to a different build,
// just return.
return commits, stealFrom, stolen, nil
}
}
}
// Add the commit.
commits[commit.Hash] = true
// Recurse on the commit's parents.
for _, p := range commit.GetParents() {
// If we've already seen this parent commit, don't revisit it.
if _, ok := commits[p.Hash]; ok {
continue
}
commits, stealFrom, stolen, err = findCommitsRecursive(db, commits, b, p, stealFrom, stolen)
if err != nil {
return commits, stealFrom, stolen, err
}
}
return commits, stealFrom, stolen, nil
}
// FindCommitsForBuild determines which commits were first included in the
// given build. Assumes that all previous builds for the given builder/master
// are already in the database.
func FindCommitsForBuild(db DB, b *Build, repos repograph.Map) ([]string, int, []string, error) {
defer metrics2.FuncTimer().Stop()
// Shortcut: Don't bother computing commit blamelists for trybots.
if IsTrybot(b.Builder) {
return []string{}, -1, []string{}, nil
}
// If there's no repo or got revision, there's no blamelist.
if b.Repository == "" {
return []string{}, -1, []string{}, nil
}
if b.GotRevision == "" {
return []string{}, -1, []string{}, nil
}
// Shortcut for the first build for a given builder: Just use GotRevision
// as the blamelist.
if b.Number == 0 {
return []string{b.GotRevision}, -1, []string{}, nil
}
// Get the repo and commit.
repo, ok := repos[b.Repository]
if !ok {
return nil, -1, nil, fmt.Errorf("Could not find commits for build. No such repo: %s", b.Repository)
}
// Update (git pull) on demand.
commit := repo.Get(b.GotRevision)
if commit == nil {
if err := repo.Update(); err != nil {
return nil, -1, nil, fmt.Errorf("Could not find commits for build: failed to update repo: %s", err)
}
commit = repo.Get(b.GotRevision)
if commit == nil {
return nil, -1, nil, fmt.Errorf("Commit %s does not exist in repo %s", b.GotRevision, b.Repository)
}
}
// Start tracing commits back in time until we hit a previous build.
commitMap, stealFrom, stolen, err := findCommitsRecursive(db, map[string]bool{}, b, commit, -1, []string{})
if err != nil {
return nil, -1, nil, err
}
commits := make([]string, 0, len(commitMap))
for c := range commitMap {
commits = append(commits, c)
}
return commits, stealFrom, stolen, nil
}
// getBuildFromMaster retrieves the given build from the build master's JSON
// interface as specified by the master, builder, and build number.
func getBuildFromMaster(master, builder string, buildNumber int, repos repograph.Map) (*Build, error) {
var build Build
url := fmt.Sprintf("%s%s/json/builders/%s/builds/%d", BUILDBOT_URL, master, builder, buildNumber)
err := get(url, &build)
if err != nil {
return nil, fmt.Errorf("Failed to retrieve build #%d for %s: %s", buildNumber, builder, err)
}
build.fixup()
if build.Repository == "" {
// Attempt to determine the repository.
sklog.Infof("No repository set for %s #%d; attempting to find it.", build.Builder, build.Number)
_, r, _, err := repos.FindCommit(build.GotRevision)
if err != nil {
sklog.Warningf("Unable to find repo for commit %s; %s", build.GotRevision, err)
} else {
sklog.Infof("Found %s for %s", r, build.GotRevision)
build.Repository = r
}
}
return &build, nil
}
// retryGetBuildFromMaster retrieves the given build from the build master's JSON
// interface as specified by the master, builder, and build number. Makes
// multiple attempts in case the master fails to respond.
func retryGetBuildFromMaster(master, builder string, buildNumber int, repos repograph.Map) (*Build, error) {
defer metrics2.FuncTimer().Stop()
var b *Build
var err error
for attempt := 0; attempt < 3; attempt++ {
b, err = getBuildFromMaster(master, builder, buildNumber, repos)
if err == nil {
break
}
time.Sleep(500 * time.Millisecond)
}
return b, err
}
// validateBuildForIngestion verifies that the build is ready to be ingested.
func validateBuildForIngestion(b *Build) error {
if b.Master == "" {
return fmt.Errorf("Build has no master name!")
}
if b.Builder == "" {
return fmt.Errorf("Build has no builder name!")
}
if util.TimeIsZero(b.Started) {
return fmt.Errorf("Build has no start time!")
}
return nil
}
// IngestBuild retrieves the given build from the build master's JSON interface
// and pushes it into the database.
func IngestBuild(db DB, b *Build, repos repograph.Map) error {
defer metrics2.FuncTimer().Stop()
if err := validateBuildForIngestion(b); err != nil {
return err
}
// Find the previously-inserted version of this build, if it exists,
// and update it rather than inserting a brand new build.
needToComputeBlamelist := true
oldBuild, err := db.GetBuild(b.Id())
if err == nil {
if oldBuild.GotRevision == "" {
oldBuild.GotRevision = b.GotRevision
} else {
needToComputeBlamelist = false
}
if b.GotRevision != oldBuild.GotRevision {
return fmt.Errorf("Cannot change an already-ingested build's GotRevision.")
}
oldBuild.Results = b.Results
oldBuild.Properties = b.Properties
oldBuild.PropertiesStr = b.PropertiesStr
oldBuild.Steps = b.Steps
oldBuild.Finished = b.Finished
b = oldBuild
}
if needToComputeBlamelist {
// Find the commits for this build.
commits, stoleFrom, stolen, err := FindCommitsForBuild(db, b, repos)
if err != nil {
return err
}
b.Commits = commits
// Log the case where we found no revisions for the build.
if !(IsTrybot(b.Builder) || strings.Contains(b.Builder, "Housekeeper")) && len(b.Commits) == 0 {
sklog.Infof("Got build with 0 revs: %s #%d GotRev=%s", b.Builder, b.Number, b.GotRevision)
}
// Insert the build.
if stoleFrom >= 0 && stolen != nil && len(stolen) > 0 {
// Remove the commits we stole from the previous owner.
oldBuild, err := db.GetBuildFromDB(b.Master, b.Builder, stoleFrom)
if err != nil {
return err
}
if oldBuild == nil {
return fmt.Errorf("Attempted to retrieve %s #%d, but got a nil build from the DB.", b.Builder, stoleFrom)
}
newCommits := make([]string, 0, len(oldBuild.Commits))
for _, c := range oldBuild.Commits {
keep := true
for _, s := range stolen {
if c == s {
keep = false
break
}
}
if keep {
newCommits = append(newCommits, c)
}
}
oldBuild.Commits = newCommits
return db.PutBuilds([]*Build{b, oldBuild})
}
}
return db.PutBuild(b)
}
// getLatestBuilds returns a map whose keys are master names and values are
// sub-maps whose keys are builder names and values are build numbers
// representing the newest build for each builder/master pair.
func getLatestBuilds(m string) (map[string]int, error) {
type builder struct {
CachedBuilds []int
}
builders := map[string]*builder{}
if err := get(BUILDBOT_URL+m+"/json/builders", &builders); err != nil {
return nil, fmt.Errorf("Failed to retrieve builders for %s: %s", m, err)
}
res := map[string]int{}
for name, b := range builders {
if len(b.CachedBuilds) > 0 {
res[name] = b.CachedBuilds[len(b.CachedBuilds)-1]
}
}
return res, nil
}
// GetBuilders returns the set of builders from all masters.
func GetBuilders() (map[string]*Builder, error) {
var mtx sync.Mutex
builders := map[string][]*Builder{}
errs := map[string]error{}
var wg sync.WaitGroup
for _, m := range MASTER_NAMES {
wg.Add(1)
go func(master string) {
defer wg.Done()
b := map[string]*Builder{}
err := get(BUILDBOT_URL+master+"/json/builders", &b)
mtx.Lock()
defer mtx.Unlock()
if err != nil {
errs[master] = err
return
}
builderList := make([]*Builder, 0, len(b))
for builderName, builder := range b {
builder.Name = builderName
builder.Master = master
builderList = append(builderList, builder)
}
builders[master] = builderList
}(m)
}
wg.Wait()
if len(errs) > 0 {
errString := "Failed to get retrieve builders:"
for _, err := range errs {
errString += fmt.Sprintf("\n%v", err)
}
return nil, fmt.Errorf(errString)
}
rv := map[string]*Builder{}
for _, buildersForMaster := range builders {
for _, b := range buildersForMaster {
rv[b.Name] = b
}
}
return rv, nil
}
// GetBuildSlaves returns a map whose keys are master names and values are
// sub-maps whose keys are slave names and values are BuildSlave objects.
func GetBuildSlaves() (map[string]map[string]*BuildSlave, error) {
var mtx sync.Mutex
res := map[string]map[string]*BuildSlave{}
errs := map[string]error{}
var wg sync.WaitGroup
for _, master := range MASTER_NAMES {
wg.Add(1)
go func(m string) {
defer wg.Done()
slaves := map[string]*BuildSlave{}
err := get(BUILDBOT_URL+m+"/json/slaves", &slaves)
mtx.Lock()
defer mtx.Unlock()
if err != nil {
errs[m] = fmt.Errorf("Failed to retrieve buildslaves for %s: %s", m, err)
return
}
for name, s := range slaves {
s.Name = name
s.Master = m
}
res[m] = slaves
}(master)
}
wg.Wait()
if len(errs) != 0 {
return nil, fmt.Errorf("Encountered errors while loading buildslave data from masters: %v", errs)
}
return res, nil
}
// getUningestedBuilds returns a map whose keys are master names and values are
// sub-maps whose keys are builder names and values are slices of ints
// representing the numbers of builds which have not yet been ingested.
func getUningestedBuilds(db DB, m string) (map[string][]int, error) {
defer metrics2.FuncTimer().Stop()
// Get the latest and last-processed builds for all builders.
latest, err := getLatestBuilds(m)
if err != nil {
return nil, fmt.Errorf("Failed to get latest builds: %s", err)
}
lastProcessed, err := db.GetLastProcessedBuilds(m)
if err != nil {
return nil, fmt.Errorf("Failed to get last-processed builds: %s", err)
}
// Find the range of uningested builds for each builder.
type numRange struct {
Start int // The last-ingested build number.
End int // The latest build number.
}
ranges := map[string]*numRange{}
for _, id := range lastProcessed {
b, err := db.GetBuild(id)
if err != nil {
return nil, err
}
ranges[b.Builder] = &numRange{
Start: b.Number,
End: b.Number,
}
}
for b, n := range latest {
if _, ok := ranges[b]; !ok {
ranges[b] = &numRange{
Start: -1,
End: n,
}
} else {
ranges[b].End = n
}
}
// Create a slice of build numbers for the uningested builds.
unprocessed := map[string][]int{}
for b, r := range ranges {
if r.End < r.Start {
sklog.Warningf("Cannot create slice of builds to ingest for %q; invalid range (%d, %d)", b, r.Start, r.End)
continue
}
builds := make([]int, r.End-r.Start)
for i := r.Start + 1; i <= r.End; i++ {
builds[i-r.Start-1] = i
}
if len(builds) > 0 {
unprocessed[b] = builds
}
}
return unprocessed, nil
}
// ingestNewBuilds finds the set of uningested builds and ingests them.
func ingestNewBuilds(db DB, m string, repos repograph.Map) error {
defer metrics2.FuncTimer().Stop()
sklog.Infof("Ingesting builds for %s", m)
// TODO(borenet): Investigate the use of channels here. We should be
// able to start ingesting builds as the data becomes available rather
// than waiting until the end.
buildsToProcess, err := getUningestedBuilds(db, m)
if err != nil {
return fmt.Errorf("Failed to obtain the set of uningested builds: %s", err)
}
unfinished, err := db.GetUnfinishedBuilds(m)
if err != nil {
return fmt.Errorf("Failed to obtain the set of unfinished builds: %s", err)
}
for _, b := range unfinished {
if _, ok := buildsToProcess[b.Builder]; !ok {
buildsToProcess[b.Builder] = []int{}
}
buildsToProcess[b.Builder] = append(buildsToProcess[b.Builder], b.Number)
}
// TODO(borenet): Can we ingest builders in parallel?
errs := map[string]error{}
for b, w := range buildsToProcess {
for _, n := range w {
if BUILD_BLACKLIST[b][n] {
sklog.Warningf("Skipping blacklisted build: %s # %d", b, n)
continue
}
if IsTrybot(b) {
continue
}
sklog.Infof("Ingesting build: %s, %s, %d", m, b, n)
build, err := retryGetBuildFromMaster(m, b, n, repos)
if err != nil {
// If we couldn't get the build from the master after multiple
// tries, assume that the build has somehow disappeared and
// skip it.
sklog.Errorf("Failed to retrieve build from master; skipping: %s", err)
continue
}
if err := IngestBuild(db, build, repos); err != nil {
errs[b] = fmt.Errorf("Failed to ingest build: %s", err)
break
}
}
}
if len(errs) > 0 {
msg := fmt.Sprintf("Encountered errors ingesting builds for %s:", m)
for b, err := range errs {
msg += fmt.Sprintf("\n%s: %s", b, err)
}
return fmt.Errorf(msg)
}
sklog.Infof("Done ingesting builds for %s", m)
return nil
}
// NumTotalBuilds finds the total number of builds which have ever run.
func NumTotalBuilds() (int, error) {
total := 0
for _, m := range MASTER_NAMES {
latest, err := getLatestBuilds(m)
if err != nil {
return 0, fmt.Errorf("Failed to get latest builds: %s", err)
}
for _, n := range latest {
total += n + 1 // Include build #0.
}
}
return total, nil
}
// IngestNewBuildsLoop continually ingests new builds.
func IngestNewBuildsLoop(db DB, repos repograph.Map) error {
local, ok := db.(*localDB)
if !ok {
return fmt.Errorf("Can only ingest builds with a local DB instance.")
}
cache := newIngestCache(local)
lv := map[string]metrics2.Liveness{}
for _, m := range MASTER_NAMES {
lv[m] = metrics2.NewLiveness("buildbot-ingest", map[string]string{"master": m})
}
go func() {
for range time.Tick(10 * time.Second) {
failedUpdate := false
if err := repos.Update(); err != nil {
sklog.Errorf("Failed to update repo: %s", err)
failedUpdate = true
}
if failedUpdate {
continue
}
var wg sync.WaitGroup
for _, m := range MASTER_NAMES {
wg.Add(1)
go func(master string) {
defer wg.Done()
if err := ingestNewBuilds(cache, master, repos); err != nil {
sklog.Errorf("Failed to ingest new builds: %s", err)
} else {
lv[master].Reset()
}
}(m)
}
wg.Wait()
}
}()
return nil
}