| package analyzer |
| |
| import ( |
| "fmt" |
| "sort" |
| "strings" |
| |
| "go.chromium.org/luci/common/api/swarming/swarming/v1" |
| |
| cpb "go.skia.org/infra/cabe/go/proto" |
| "go.skia.org/infra/go/sklog" |
| "go.skia.org/infra/go/util" |
| "go.skia.org/infra/perf/go/perfresults" |
| ) |
| |
| // Returns an ArmSpec proto containing field values that are common between a and b. |
| func intersectArmSpecs(a, b *cpb.ArmSpec) *cpb.ArmSpec { |
| ret := &cpb.ArmSpec{} |
| |
| ret.BuildSpec = intersectBuildSpecs(a.GetBuildSpec(), b.GetBuildSpec()) |
| ret.RunSpec = intersectRunSpecs(a.GetRunSpec(), b.GetRunSpec()) |
| return ret |
| } |
| |
| // Returns an ArmSpec proto containing field values that are present in a but not in b. |
| func diffArmSpecs(a, b *cpb.ArmSpec) *cpb.ArmSpec { |
| ret := &cpb.ArmSpec{} |
| |
| ret.BuildSpec = diffBuildSpecs(a.GetBuildSpec(), b.GetBuildSpec()) |
| ret.RunSpec = diffRunSpecs(a.GetRunSpec(), b.GetRunSpec()) |
| return ret |
| } |
| |
| // Returns a BuildSpec proto containing field values that are common between a and b. |
| func intersectBuildSpecs(a, b []*cpb.BuildSpec) []*cpb.BuildSpec { |
| ret := []*cpb.BuildSpec{} |
| for i, aBuildSpec := range a { |
| if i >= len(b) { |
| break |
| } |
| bBuildSpec := b[i] |
| cBuildSpec := &cpb.BuildSpec{} |
| |
| // Get intersection of gitiles commit fields. |
| aGitilesCommit := aBuildSpec.GetGitilesCommit() |
| bGitilesCommit := bBuildSpec.GetGitilesCommit() |
| if aGitilesCommit != nil && bGitilesCommit != nil { |
| cgc := &cpb.GitilesCommit{} |
| if aGitilesCommit.GetProject() == bGitilesCommit.GetProject() && aGitilesCommit.GetId() == bGitilesCommit.GetId() { |
| cgc.Project = aGitilesCommit.GetProject() |
| cgc.Id = aGitilesCommit.GetId() |
| cBuildSpec.GitilesCommit = cgc |
| } |
| } |
| |
| aGerritChanges := aBuildSpec.GetGerritChanges() |
| bGerritChanges := bBuildSpec.GetGerritChanges() |
| cGerritChanges := []*cpb.GerritChange{} |
| if aGerritChanges != nil && bGerritChanges != nil { |
| for j, aGerritChange := range aGerritChanges { |
| if j >= len(bGerritChanges) { |
| break |
| } |
| bGerritChange := bGerritChanges[j] |
| if aGerritChange.GetProject() == bGerritChange.GetProject() && aGerritChange.GetPatchsetHash() == bGerritChange.GetPatchsetHash() { |
| cGerritChanges = append(cGerritChanges, &cpb.GerritChange{ |
| Project: aGerritChange.GetProject(), |
| PatchsetHash: aGerritChange.GetPatchsetHash(), |
| }) |
| } |
| } |
| } |
| |
| if len(cGerritChanges) > 0 { |
| cBuildSpec.GerritChanges = cGerritChanges |
| } |
| |
| if cBuildSpec.GitilesCommit != nil || len(cBuildSpec.GerritChanges) > 0 { |
| ret = append(ret, cBuildSpec) |
| } |
| } |
| return ret |
| } |
| |
| // Returns a BuildSpec proto containing field values that are set in a but not b. |
| func diffBuildSpecs(a, b []*cpb.BuildSpec) []*cpb.BuildSpec { |
| ret := []*cpb.BuildSpec{} |
| for i, aBuildSpec := range a { |
| if i >= len(b) { |
| ret = append(ret, aBuildSpec) |
| continue |
| } |
| bBuildSpec := b[i] |
| dBuildSpec := &cpb.BuildSpec{} |
| |
| // Get intersection of gitiles commit fields. |
| aGitilesCommit := aBuildSpec.GetGitilesCommit() |
| bGitilesCommit := bBuildSpec.GetGitilesCommit() |
| if aGitilesCommit != nil || bGitilesCommit != nil { |
| dgc := &cpb.GitilesCommit{} |
| if aGitilesCommit.GetProject() != bGitilesCommit.GetProject() { |
| dgc.Project = aGitilesCommit.GetProject() |
| dBuildSpec.GitilesCommit = dgc |
| } |
| if aGitilesCommit.GetId() != bGitilesCommit.GetId() { |
| dgc.Id = aGitilesCommit.GetId() |
| dBuildSpec.GitilesCommit = dgc |
| } |
| } |
| |
| aGerritChanges := aBuildSpec.GetGerritChanges() |
| bGerritChanges := bBuildSpec.GetGerritChanges() |
| dGerritChanges := []*cpb.GerritChange{} |
| if aGerritChanges != nil || bGerritChanges != nil { |
| for j, aGerritChange := range aGerritChanges { |
| if j >= len(bGerritChanges) { |
| dGerritChanges = append(dGerritChanges, aGerritChange) |
| continue |
| } |
| bGerritChange := bGerritChanges[j] |
| dGerritChange := &cpb.GerritChange{} |
| if aGerritChange.GetProject() != bGerritChange.GetProject() { |
| dGerritChange.Project = aGerritChange.GetProject() |
| } |
| if aGerritChange.GetPatchsetHash() != bGerritChange.GetPatchsetHash() { |
| dGerritChange.PatchsetHash = aGerritChange.GetPatchsetHash() |
| // Even if the projects are the same, if the hash is different, still include the Project. |
| // This makes the diff'd BuildSpec more useful, since otherwise it would just give you |
| // a patch without identifying which project (therefore which git repo) it came from. |
| dGerritChange.Project = aGerritChange.GetProject() |
| } |
| dGerritChanges = append(dGerritChanges, dGerritChange) |
| } |
| } |
| |
| if len(dGerritChanges) > 0 { |
| dBuildSpec.GerritChanges = dGerritChanges |
| } |
| |
| if dBuildSpec.GitilesCommit != nil || len(dBuildSpec.GerritChanges) > 0 { |
| ret = append(ret, dBuildSpec) |
| } |
| } |
| return ret |
| } |
| |
| // Returns a RunSpec proto containing field values that are common between a and b. |
| func intersectRunSpecs(a, b []*cpb.RunSpec) []*cpb.RunSpec { |
| ret := []*cpb.RunSpec{} |
| for i, aRunSpec := range a { |
| if i >= len(b) { |
| break |
| } |
| bRunSpec := b[i] |
| cRunSpec := &cpb.RunSpec{} |
| if aRunSpec.GetOs() == bRunSpec.GetOs() { |
| cRunSpec.Os = aRunSpec.GetOs() |
| } |
| if aRunSpec.GetSyntheticProductName() == bRunSpec.GetSyntheticProductName() { |
| cRunSpec.SyntheticProductName = aRunSpec.GetSyntheticProductName() |
| } |
| if aRunSpec.FinchConfig != nil && bRunSpec.FinchConfig != nil { |
| aFinchConfig := aRunSpec.GetFinchConfig() |
| bFinchConfig := bRunSpec.GetFinchConfig() |
| cFinchConfig := &cpb.FinchConfig{} |
| if aFinchConfig.GetSeedHash() != "" && aFinchConfig.GetSeedHash() == bFinchConfig.GetSeedHash() { |
| cFinchConfig.SeedHash = aFinchConfig.GetSeedHash() |
| cRunSpec.FinchConfig = cFinchConfig |
| } |
| if aFinchConfig.GetSeedChangelist() != 0 && aFinchConfig.GetSeedChangelist() == bFinchConfig.GetSeedChangelist() { |
| cFinchConfig.SeedChangelist = aFinchConfig.GetSeedChangelist() |
| cRunSpec.FinchConfig = cFinchConfig |
| } |
| } |
| |
| if cRunSpec.FinchConfig != nil || cRunSpec.SyntheticProductName != "" || cRunSpec.Os != "" { |
| ret = append(ret, cRunSpec) |
| } |
| } |
| return ret |
| } |
| |
| // Returns a RunSpec proto containing field values that are set in a but not in b. |
| func diffRunSpecs(a, b []*cpb.RunSpec) []*cpb.RunSpec { |
| ret := []*cpb.RunSpec{} |
| for i, aRunSpec := range a { |
| if i >= len(b) { |
| ret = append(ret, aRunSpec) |
| continue |
| } |
| bRunSpec := b[i] |
| dRunSpec := &cpb.RunSpec{} |
| if aRunSpec.GetOs() != bRunSpec.GetOs() { |
| dRunSpec.Os = aRunSpec.GetOs() |
| } |
| if aRunSpec.GetSyntheticProductName() != bRunSpec.GetSyntheticProductName() { |
| dRunSpec.SyntheticProductName = aRunSpec.GetSyntheticProductName() |
| } |
| if aRunSpec.FinchConfig != nil || bRunSpec.FinchConfig != nil { |
| aFinchConfig := aRunSpec.GetFinchConfig() |
| bFinchConfig := bRunSpec.GetFinchConfig() |
| cFinchConfig := &cpb.FinchConfig{} |
| if aFinchConfig.GetSeedHash() != "" && aFinchConfig.GetSeedHash() != bFinchConfig.GetSeedHash() { |
| cFinchConfig.SeedHash = aFinchConfig.GetSeedHash() |
| dRunSpec.FinchConfig = cFinchConfig |
| } |
| if aFinchConfig.GetSeedChangelist() != 0 && aFinchConfig.GetSeedChangelist() != bFinchConfig.GetSeedChangelist() { |
| cFinchConfig.SeedChangelist = aFinchConfig.GetSeedChangelist() |
| dRunSpec.FinchConfig = cFinchConfig |
| } |
| } |
| |
| if dRunSpec.FinchConfig != nil || dRunSpec.SyntheticProductName != "" || dRunSpec.Os != "" { |
| ret = append(ret, dRunSpec) |
| } |
| } |
| return ret |
| } |
| |
| func fromKeys(in map[string]perfresults.PerfResults) util.StringSet { |
| ret := util.StringSet{} |
| for key := range in { |
| ret[key] = true |
| } |
| return ret |
| } |
| |
| // returns a map of benchmark names to sets of histogram names. A histogram name is only included |
| // if *every* task in controlTaskResults and treatmentTaskResults reported a non-empty set of sample values under that histogram name. |
| func commonBenchmarkWorkloads(controlTaskResults, treatmentTaskResults []map[string]perfresults.PerfResults) (map[string]util.StringSet, error) { |
| // Only try to analyze benchmarks and histograms that appear in data from all tasks. |
| commonBenchmarks := util.StringSet{} |
| commonHistograms := map[string]util.StringSet{} |
| for i, controlResults := range controlTaskResults { |
| if i >= len(treatmentTaskResults) { |
| return nil, fmt.Errorf("missing treatment task result: %d", i) |
| } |
| treatmentResults := treatmentTaskResults[i] |
| pairCommonBenchmarks := fromKeys(controlResults).Intersect(fromKeys(treatmentResults)) |
| if i == 0 { |
| commonBenchmarks = pairCommonBenchmarks |
| } |
| commonBenchmarks = commonBenchmarks.Intersect(pairCommonBenchmarks) |
| |
| for benchmarkName, results := range controlResults { |
| if commonHistograms[benchmarkName] == nil { |
| commonHistograms[benchmarkName] = util.NewStringSet(results.NonEmptyHistogramNames()) |
| } |
| commonHistograms[benchmarkName] = commonHistograms[benchmarkName].Intersect(util.NewStringSet(results.NonEmptyHistogramNames())) |
| } |
| for benchmarkName, results := range treatmentResults { |
| if commonHistograms[benchmarkName] == nil { |
| commonHistograms[benchmarkName] = util.NewStringSet(results.NonEmptyHistogramNames()) |
| } |
| commonHistograms[benchmarkName] = commonHistograms[benchmarkName].Intersect(util.NewStringSet(results.NonEmptyHistogramNames())) |
| } |
| } |
| |
| for benchmarkName, histogramNames := range commonHistograms { |
| if len(histogramNames) == 0 { |
| delete(commonHistograms, benchmarkName) |
| } |
| } |
| return commonHistograms, nil |
| } |
| |
| // This parses the "change:..." tag strings generated and added to the swarming task requests in |
| // this part of the pinpoint source (which really should be conveyed in a more structured way so |
| // we don't have to resort to hand-written parsing code like this on the receiving end): |
| // https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/models/change/change.py;l=52 |
| func buildSpecForChangeString(s string) (*cpb.BuildSpec, error) { |
| changeParts := strings.Split(s, ":") |
| if len(changeParts) < 2 || (changeParts[0] != "exp" && changeParts[0] != "base") { |
| return nil, fmt.Errorf("failed to parse buildspec from change tag: %q", s) |
| } |
| |
| // changeParts = "exp", "project@commit_hash + patch_id (args) (Variant: 0)" |
| buildParts := strings.Split(strings.Join(changeParts[1:], ":"), "+") |
| |
| // buildParts = "project@commit_hash", "patch_id (args) (Variant: 0)" |
| commitParts := strings.Split(buildParts[0], "@") |
| |
| // commitParts = "project", "commit_hash" |
| if len(commitParts) != 2 { |
| return nil, fmt.Errorf("failed to parse commit parts from change tag: %q", s) |
| } |
| repoProject := strings.TrimSpace(commitParts[0]) |
| |
| gitHashPlusExtraParts := strings.Split(commitParts[1], " ") |
| gitHash := strings.TrimSpace(gitHashPlusExtraParts[0]) |
| |
| ret := &cpb.BuildSpec{ |
| GitilesCommit: &cpb.GitilesCommit{ |
| Project: repoProject, |
| Id: gitHash, |
| }, |
| } |
| |
| if len(buildParts) == 2 { |
| gerritPactchsetHash := strings.TrimSpace(strings.Split(strings.TrimSpace(buildParts[1]), " ")[0]) |
| // This value is the git hash of the patchset, without reference to the actual |
| // gerrit change ID or which patchset on that change we're talking about. |
| // Need to rethink this, either update pinpoint's code to put all of the data we need |
| // into the swarming tags, or resign to using an opaque "applied git patch" string and |
| // forget about gerrit's details. |
| ret.GerritChanges = []*cpb.GerritChange{ |
| { |
| PatchsetHash: gerritPactchsetHash, |
| }, |
| } |
| } |
| |
| return ret, nil |
| } |
| |
| // Returns an ArmSpec proto populated with fields matching the details of s. |
| func inferArmSpec(s *swarming.SwarmingRpcsTaskRequestMetadata) (*cpb.ArmSpec, error) { |
| ret := &cpb.ArmSpec{} |
| |
| ppc := pinpointChangeTagForTask(s) |
| if ppc != "" { |
| } else { |
| sklog.Errorf("couldn't get pinpoint change info for a pinpoint task. Swarming ID %s", s.TaskId) |
| } |
| bs, err := buildSpecForChangeString(ppc) |
| if err != nil { |
| return nil, err |
| } |
| |
| ret.BuildSpec = []*cpb.BuildSpec{bs} |
| |
| runInfo, err := runInfoForTask(s) |
| if err != nil { |
| return nil, err |
| } |
| |
| ret.RunSpec = []*cpb.RunSpec{ |
| { |
| Os: runInfo.os, |
| SyntheticProductName: runInfo.syntheticProductName, |
| }, |
| } |
| |
| return ret, nil |
| } |
| |
| // Because we don't *currently* have users specify up-front what the ExperimentSpec should be |
| // (they just give us a pinpoint job ID, rather than telling us the actual build/run details), |
| // we do a bit of inference here to reconstruct that information from what we have in the |
| // available swarming task metadata. |
| func inferExperimentSpec(controlSpecs, treatmentSpecs []*cpb.ArmSpec, controlResults, treatmentResults []map[string]perfresults.PerfResults) (*cpb.ExperimentSpec, error) { |
| if len(controlSpecs) != len(treatmentSpecs) || len(controlSpecs) == 0 || len(treatmentSpecs) == 0 { |
| return nil, fmt.Errorf("control and treatment spec length must be equal and non-zero: %d vs %d", len(controlSpecs), len(treatmentSpecs)) |
| } |
| |
| ret := &cpb.ExperimentSpec{} |
| |
| // accumulate the common Spec proto field values that are identical across all tasks within three |
| // subsets of tasks in the experiment data: |
| // - commonArmSpecIntersection for Spec proto fields that are the same across all tasks |
| // - controlArmSpecIntersection for Spec proto files that are the same across all control tasks |
| // - treatmentArmSpecIntersection for Spec proto fields that are the same across all treatment tasks |
| controlArmSpecIntersection := controlSpecs[0] |
| treatmentArmSpecIntersection := treatmentSpecs[0] |
| commonArmSpecIntersection := intersectArmSpecs(controlArmSpecIntersection, treatmentArmSpecIntersection) |
| |
| for _, cArmSpec := range controlSpecs[1:] { |
| controlArmSpecIntersection = intersectArmSpecs(controlArmSpecIntersection, cArmSpec) |
| commonArmSpecIntersection = intersectArmSpecs(commonArmSpecIntersection, cArmSpec) |
| } |
| |
| for _, tArmSpec := range treatmentSpecs[1:] { |
| treatmentArmSpecIntersection = intersectArmSpecs(treatmentArmSpecIntersection, tArmSpec) |
| commonArmSpecIntersection = intersectArmSpecs(commonArmSpecIntersection, tArmSpec) |
| } |
| |
| // Now remove the Spec proto fields that are common to both arms from each arms' CommonArmSpec |
| // so that they only reflect the differences between control and treatment relative to the attributes |
| // that are common between them. |
| controlArmSpecIntersection = diffArmSpecs(controlArmSpecIntersection, commonArmSpecIntersection) |
| treatmentArmSpecIntersection = diffArmSpecs(treatmentArmSpecIntersection, commonArmSpecIntersection) |
| |
| // We only need to infer *common* benchmark/workload measurement values (no diffs) reported by both |
| // arms' tasks, because there's no way to compare response variables that don't appear in both arms. |
| // So we just ignore values that do not appear in every tasks' output files. |
| // |
| // Note that in practice, many jobs produce disjoint sets of "metrics", because they report |
| // things that are not actual response variables (e.g. optional diagnostic info used for debugging) |
| // that just happen to use the same data format used by response variables in their json files. Ignoring |
| // any of these "metrics" that do not appear in every task output is an admittedly coarse heuristic, |
| // but a scalable solution requires either cleaner benchmark output files, or more explicit |
| // analysis requests that enumerate the exact benchmark/workloads to look for (neither of which |
| // is something expect to have by 2023Q2). |
| commonHistograms, err := commonBenchmarkWorkloads(controlResults, treatmentResults) |
| if err != nil { |
| return nil, err |
| } |
| benchmarks := []*cpb.Benchmark{} |
| |
| for benchmarkName, histograms := range commonHistograms { |
| workloads := histograms.Keys() |
| sort.Strings(workloads) |
| benchmarks = append(benchmarks, &cpb.Benchmark{ |
| Name: benchmarkName, |
| Workload: workloads, |
| }) |
| } |
| ret.Analysis = &cpb.AnalysisSpec{ |
| Benchmark: benchmarks, |
| } |
| ret.Common = commonArmSpecIntersection |
| ret.Control = controlArmSpecIntersection |
| ret.Treatment = treatmentArmSpecIntersection |
| |
| return ret, nil |
| } |