| package perfresults |
| |
| import ( |
| "encoding/json" |
| "io" |
| "math" |
| "slices" |
| |
| "go.skia.org/infra/go/skerr" |
| "go.skia.org/infra/go/sklog" |
| ) |
| |
| // PerfResults represents the contenst of a perf_results.json file generated by a |
| // telemetry-based benchmark. The full format is not formally defined, but some |
| // documnentation for it exists in various places. The most comprehensive doc is |
| // https://chromium.googlesource.com/external/github.com/catapult-project/catapult/+/HEAD/docs/Histogram-set-json-format.md |
| type PerfResults struct { |
| Histograms map[TraceKey]Histogram |
| } |
| |
| // NonEmptyHistogramNames returns a list of names of histograms whose SampleValues arrays are non-empty. |
| func (pr *PerfResults) NonEmptyHistogramNames() []string { |
| ret := []string{} |
| for k, h := range pr.Histograms { |
| if len(h.SampleValues) > 0 { |
| ret = append(ret, k.ChartName) |
| } |
| } |
| return ret |
| } |
| |
| // TraceKey is a unique identifier for one trace. |
| // |
| // The Telemetry trace from one perf run is identified as: |
| // ChromePerf/[BotConfig]/[benchmark]/[ChartName]/[Story] where |
| // BotConfig is usually the build name that defines the bot dimension where it runs |
| // benchmark is a collection of runs |
| // ChartName is a specific measurement |
| type TraceKey struct { |
| // ChartName is a specific measurement, this is also equivalent to metric. |
| ChartName string `json:"chart"` |
| |
| // Unit is usually tied to the chart, we still save it for the reference. |
| Unit string `json:"unit"` |
| |
| // Story is a specific user journey that collects the metrics (ChartName). |
| Story string `json:"story"` |
| |
| // Architecture and OSName are defined by the BotConfig, within each perf result file, |
| // this should be a unique combo throughput as it runs on a single machine. In other cases, |
| // the key pair can identify a unique trace because they are running on a different machine. |
| Architecture string `json:"arch"` |
| OSName string `json:"os"` |
| |
| // ignored fields for now, those are not used for analysis. |
| // benchmarkDescriptions |
| // benchmarkStart |
| // benchmarks |
| // storysetRepeats |
| // traceStart |
| // traceUrls |
| // botId |
| // owners |
| // osVersions |
| // osDetailedVersions |
| } |
| |
| // Histogram is an individual benchmark measurement. |
| type Histogram struct { |
| SampleValues []float64 |
| } |
| |
| // AggregationMapping maps the string literals to the aggregation methods to be used in the |
| // trace generations and user-facing Json/OpenAPIs. |
| var AggregationMapping = map[string]func(Histogram) float64{ |
| "max": Histogram.Max, |
| "min": Histogram.Min, |
| "mean": Histogram.Mean, |
| "std": Histogram.Stddev, |
| "sum": Histogram.Sum, |
| "count": func(h Histogram) float64 { |
| return float64(h.Count()) |
| }, |
| } |
| |
| type histogramRaw struct { |
| Name string `json:"name"` |
| Unit string `json:"unit"` |
| |
| // optional fields |
| Description string `json:"description"` |
| SampleValues []float64 `json:"sampleValues"` |
| // Diagnostics maps a diagnostic key to a guid, which points to e.g. a genericSet. |
| Diagnostics map[string]string `json:"diagnostics"` |
| } |
| |
| // genericSet is a normalized value that other parts of the json file can reference by guid. |
| type genericSet struct { |
| Values []any `json:"values"` // Can be string or number. sigh. |
| } |
| |
| // dateRange is a range of dates. |
| type dateRange struct { |
| Min float64 `json:"min"` |
| Max float64 `json:"max"` |
| } |
| |
| // relatedNameMap is a map from short names to full histogram names. |
| type relatedNameMap struct { |
| Names map[string]string `json:"names"` |
| } |
| |
| type singleEntry struct { |
| Type string `json:"type"` |
| GUID string `json:"guid"` |
| |
| histogramRaw |
| genericSet |
| dateRange |
| relatedNameMap |
| } |
| |
| // asTraceKeyAndHistogram converts raw data into a unique trace key and histogram samples. |
| func (hr *histogramRaw) asTraceKeyAndHistogram(metadata map[string]any) (TraceKey, Histogram) { |
| tk := TraceKey{ |
| ChartName: hr.Name, |
| Unit: hr.Unit, |
| } |
| |
| // The original key is plural but they are actually singular. |
| if arch, ok := hr.Diagnostics["architectures"]; ok { |
| if v, ok := metadata[arch]; ok { |
| tk.Architecture = v.(genericSet).Values[0].(string) |
| } else { |
| sklog.Warningf("Unable to find the value for architectures (%v).", arch) |
| } |
| } |
| |
| if osNames, ok := hr.Diagnostics["osNames"]; ok { |
| if v, ok := metadata[osNames]; ok { |
| tk.OSName = v.(genericSet).Values[0].(string) |
| } else { |
| sklog.Warningf("Unable to find the value for osNames (%v).", osNames) |
| } |
| } |
| |
| if stories, ok := hr.Diagnostics["stories"]; ok { |
| if v, ok := metadata[stories]; ok { |
| tk.Story = v.(genericSet).Values[0].(string) |
| } else { |
| sklog.Warningf("Unable to find the value for stories (%v).", stories) |
| } |
| } |
| return tk, Histogram{SampleValues: hr.SampleValues} |
| } |
| |
| // NewResults creates a new PerfResults from the given data stream. |
| // |
| // It decodes the data in a streaming manner to reduce the memory footprint as the JSON files |
| // are sometimes bigger than 10MB. |
| func NewResults(r io.Reader) (*PerfResults, error) { |
| pr := &PerfResults{ |
| Histograms: make(map[TraceKey]Histogram), |
| } |
| decoder := json.NewDecoder(r) |
| |
| // perf_results.json is an array of objects |
| // read the open '[' |
| t, err := decoder.Token() |
| |
| // don't panic on an empty file |
| if err == io.EOF { |
| return pr, nil |
| } |
| if err != nil { |
| return nil, skerr.Wrap(err) |
| } |
| if delim, ok := t.(json.Delim); !ok || delim.String() != "[" { |
| return nil, skerr.Fmt("expecting the open '['") |
| } |
| |
| // metadata only useful within the file scope. |
| md := make(map[string]any) |
| |
| // looping all the elements |
| for decoder.More() { |
| var entry singleEntry |
| err := decoder.Decode(&entry) |
| if err != nil { |
| return nil, skerr.Wrap(err) |
| } |
| // If Name is not empty, it is a histogram |
| if entry.Name != "" { |
| pr.merge(entry.asTraceKeyAndHistogram(md)) |
| continue |
| } |
| switch entry.Type { |
| case "GenericSet": |
| md[entry.GUID] = entry.genericSet |
| case "DateRange": |
| md[entry.GUID] = entry.dateRange |
| case "RelatedNameMap": |
| md[entry.GUID] = entry.relatedNameMap |
| } |
| } |
| |
| t, err = decoder.Token() |
| if err != nil { |
| return nil, skerr.Wrap(err) |
| } |
| if delim, ok := t.(json.Delim); !ok || delim.String() != "]" { |
| return nil, skerr.Fmt("expecting the closing ']'") |
| } |
| |
| return pr, nil |
| } |
| |
| func (h Histogram) Aggregate(method string) float64 { |
| if m, ok := AggregationMapping[method]; len(h.SampleValues) > 0 && ok { |
| return m(h) |
| } |
| return math.NaN() |
| } |
| |
| // This should be deprecated in favor of streaming decoding. |
| // |
| // UnmarshalJSON parses a byte slice into a PerfResults instance. |
| func (pr *PerfResults) UnmarshalJSON(data []byte) error { |
| pr.Histograms = make(map[TraceKey]Histogram) |
| var raw []json.RawMessage |
| if err := json.Unmarshal(data, &raw); err != nil { |
| return err |
| } |
| |
| md := make(map[string]any) |
| for _, m := range raw { |
| var entry singleEntry |
| if err := json.Unmarshal(m, &entry); err != nil { |
| return err |
| } |
| // If Name is not empty, it is a histogram |
| if entry.Name != "" { |
| pr.merge(entry.asTraceKeyAndHistogram(md)) |
| continue |
| } |
| switch entry.Type { |
| case "GenericSet": |
| md[entry.GUID] = entry.genericSet |
| case "DateRange": |
| md[entry.GUID] = entry.dateRange |
| case "RelatedNameMap": |
| md[entry.GUID] = entry.relatedNameMap |
| } |
| } |
| return nil |
| } |
| |
| // GetSampleValues returns the all the sampled values for the same chart. |
| // |
| // Deprecated: this will merge from all the stories, and this is only the results files that |
| // contain only one story. This is the only use case for cabe. |
| func (pr *PerfResults) GetSampleValues(chart string) []float64 { |
| var values []float64 |
| for k, sv := range pr.Histograms { |
| if k.ChartName == chart { |
| values = append(values, sv.SampleValues...) |
| } |
| } |
| return values |
| } |
| |
| func (pr *PerfResults) MergeResults(other *PerfResults) { |
| for key, hist := range other.Histograms { |
| pr.merge(key, hist) |
| } |
| } |
| |
| // Merge takes the given histogram and merges sample values. |
| func (pr *PerfResults) merge(key TraceKey, other Histogram) { |
| if h, ok := pr.Histograms[key]; ok { |
| other.SampleValues = append(h.SampleValues, other.SampleValues...) |
| } |
| pr.Histograms[key] = other |
| } |
| |
| func (h Histogram) Min() float64 { |
| return slices.Min(h.SampleValues) |
| } |
| |
| func (h Histogram) Max() float64 { |
| return slices.Max(h.SampleValues) |
| } |
| |
| func (h Histogram) Count() int { |
| return len(h.SampleValues) |
| } |
| |
| func (h Histogram) Mean() float64 { |
| return h.Sum() / float64(h.Count()) |
| } |
| |
| func (h Histogram) Stddev() float64 { |
| sum := h.Sum() |
| mean := sum / float64(h.Count()) |
| vr := 0.0 |
| for _, x := range h.SampleValues { |
| vr += (x - mean) * (x - mean) |
| } |
| stddev := math.Sqrt(float64(vr / float64(h.Count()-1))) |
| return stddev |
| } |
| |
| func (h Histogram) Sum() float64 { |
| s := 0.0 |
| for i := range h.SampleValues { |
| s += h.SampleValues[i] |
| } |
| return s |
| } |