blob: 843f99ce794c0fc1650348d8abf093b5a84ee63c [file] [log] [blame] [edit]
package perfresults
import (
"encoding/json"
"io"
"math"
"slices"
"go.skia.org/infra/go/skerr"
"go.skia.org/infra/go/sklog"
)
// PerfResults represents the contenst of a perf_results.json file generated by a
// telemetry-based benchmark. The full format is not formally defined, but some
// documnentation for it exists in various places. The most comprehensive doc is
// https://chromium.googlesource.com/external/github.com/catapult-project/catapult/+/HEAD/docs/Histogram-set-json-format.md
type PerfResults struct {
Histograms map[TraceKey]Histogram
}
// NonEmptyHistogramNames returns a list of names of histograms whose SampleValues arrays are non-empty.
func (pr *PerfResults) NonEmptyHistogramNames() []string {
ret := []string{}
for k, h := range pr.Histograms {
if len(h.SampleValues) > 0 {
ret = append(ret, k.ChartName)
}
}
return ret
}
// TraceKey is a unique identifier for one trace.
//
// The Telemetry trace from one perf run is identified as:
// ChromePerf/[BotConfig]/[benchmark]/[ChartName]/[Story] where
// BotConfig is usually the build name that defines the bot dimension where it runs
// benchmark is a collection of runs
// ChartName is a specific measurement
type TraceKey struct {
// ChartName is a specific measurement, this is also equivalent to metric.
ChartName string `json:"chart"`
// Unit is usually tied to the chart, we still save it for the reference.
Unit string `json:"unit"`
// Story is a specific user journey that collects the metrics (ChartName).
Story string `json:"story"`
// Architecture and OSName are defined by the BotConfig, within each perf result file,
// this should be a unique combo throughput as it runs on a single machine. In other cases,
// the key pair can identify a unique trace because they are running on a different machine.
Architecture string `json:"arch"`
OSName string `json:"os"`
// ignored fields for now, those are not used for analysis.
// benchmarkDescriptions
// benchmarkStart
// benchmarks
// storysetRepeats
// traceStart
// traceUrls
// botId
// owners
// osVersions
// osDetailedVersions
}
// Histogram is an individual benchmark measurement.
type Histogram struct {
SampleValues []float64
}
// AggregationMapping maps the string literals to the aggregation methods to be used in the
// trace generations and user-facing Json/OpenAPIs.
var AggregationMapping = map[string]func(Histogram) float64{
"max": Histogram.Max,
"min": Histogram.Min,
"mean": Histogram.Mean,
"std": Histogram.Stddev,
"sum": Histogram.Sum,
"count": func(h Histogram) float64 {
return float64(h.Count())
},
}
type histogramRaw struct {
Name string `json:"name"`
Unit string `json:"unit"`
// optional fields
Description string `json:"description"`
SampleValues []float64 `json:"sampleValues"`
// Diagnostics maps a diagnostic key to a guid, which points to e.g. a genericSet.
Diagnostics map[string]string `json:"diagnostics"`
}
// genericSet is a normalized value that other parts of the json file can reference by guid.
type genericSet struct {
Values []any `json:"values"` // Can be string or number. sigh.
}
// dateRange is a range of dates.
type dateRange struct {
Min float64 `json:"min"`
Max float64 `json:"max"`
}
// relatedNameMap is a map from short names to full histogram names.
type relatedNameMap struct {
Names map[string]string `json:"names"`
}
type singleEntry struct {
Type string `json:"type"`
GUID string `json:"guid"`
histogramRaw
genericSet
dateRange
relatedNameMap
}
// asTraceKeyAndHistogram converts raw data into a unique trace key and histogram samples.
func (hr *histogramRaw) asTraceKeyAndHistogram(metadata map[string]any) (TraceKey, Histogram) {
tk := TraceKey{
ChartName: hr.Name,
Unit: hr.Unit,
}
// The original key is plural but they are actually singular.
if arch, ok := hr.Diagnostics["architectures"]; ok {
if v, ok := metadata[arch]; ok {
tk.Architecture = v.(genericSet).Values[0].(string)
} else {
sklog.Warningf("Unable to find the value for architectures (%v).", arch)
}
}
if osNames, ok := hr.Diagnostics["osNames"]; ok {
if v, ok := metadata[osNames]; ok {
tk.OSName = v.(genericSet).Values[0].(string)
} else {
sklog.Warningf("Unable to find the value for osNames (%v).", osNames)
}
}
if stories, ok := hr.Diagnostics["stories"]; ok {
if v, ok := metadata[stories]; ok {
tk.Story = v.(genericSet).Values[0].(string)
} else {
sklog.Warningf("Unable to find the value for stories (%v).", stories)
}
}
return tk, Histogram{SampleValues: hr.SampleValues}
}
// NewResults creates a new PerfResults from the given data stream.
//
// It decodes the data in a streaming manner to reduce the memory footprint as the JSON files
// are sometimes bigger than 10MB.
func NewResults(r io.Reader) (*PerfResults, error) {
pr := &PerfResults{
Histograms: make(map[TraceKey]Histogram),
}
decoder := json.NewDecoder(r)
// perf_results.json is an array of objects
// read the open '['
t, err := decoder.Token()
// don't panic on an empty file
if err == io.EOF {
return pr, nil
}
if err != nil {
return nil, skerr.Wrap(err)
}
if delim, ok := t.(json.Delim); !ok || delim.String() != "[" {
return nil, skerr.Fmt("expecting the open '['")
}
// metadata only useful within the file scope.
md := make(map[string]any)
// looping all the elements
for decoder.More() {
var entry singleEntry
err := decoder.Decode(&entry)
if err != nil {
return nil, skerr.Wrap(err)
}
// If Name is not empty, it is a histogram
if entry.Name != "" {
pr.merge(entry.asTraceKeyAndHistogram(md))
continue
}
switch entry.Type {
case "GenericSet":
md[entry.GUID] = entry.genericSet
case "DateRange":
md[entry.GUID] = entry.dateRange
case "RelatedNameMap":
md[entry.GUID] = entry.relatedNameMap
}
}
t, err = decoder.Token()
if err != nil {
return nil, skerr.Wrap(err)
}
if delim, ok := t.(json.Delim); !ok || delim.String() != "]" {
return nil, skerr.Fmt("expecting the closing ']'")
}
return pr, nil
}
func (h Histogram) Aggregate(method string) float64 {
if m, ok := AggregationMapping[method]; len(h.SampleValues) > 0 && ok {
return m(h)
}
return math.NaN()
}
// This should be deprecated in favor of streaming decoding.
//
// UnmarshalJSON parses a byte slice into a PerfResults instance.
func (pr *PerfResults) UnmarshalJSON(data []byte) error {
pr.Histograms = make(map[TraceKey]Histogram)
var raw []json.RawMessage
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
md := make(map[string]any)
for _, m := range raw {
var entry singleEntry
if err := json.Unmarshal(m, &entry); err != nil {
return err
}
// If Name is not empty, it is a histogram
if entry.Name != "" {
pr.merge(entry.asTraceKeyAndHistogram(md))
continue
}
switch entry.Type {
case "GenericSet":
md[entry.GUID] = entry.genericSet
case "DateRange":
md[entry.GUID] = entry.dateRange
case "RelatedNameMap":
md[entry.GUID] = entry.relatedNameMap
}
}
return nil
}
// GetSampleValues returns the all the sampled values for the same chart.
//
// Deprecated: this will merge from all the stories, and this is only the results files that
// contain only one story. This is the only use case for cabe.
func (pr *PerfResults) GetSampleValues(chart string) []float64 {
var values []float64
for k, sv := range pr.Histograms {
if k.ChartName == chart {
values = append(values, sv.SampleValues...)
}
}
return values
}
func (pr *PerfResults) MergeResults(other *PerfResults) {
for key, hist := range other.Histograms {
pr.merge(key, hist)
}
}
// Merge takes the given histogram and merges sample values.
func (pr *PerfResults) merge(key TraceKey, other Histogram) {
if h, ok := pr.Histograms[key]; ok {
other.SampleValues = append(h.SampleValues, other.SampleValues...)
}
pr.Histograms[key] = other
}
func (h Histogram) Min() float64 {
return slices.Min(h.SampleValues)
}
func (h Histogram) Max() float64 {
return slices.Max(h.SampleValues)
}
func (h Histogram) Count() int {
return len(h.SampleValues)
}
func (h Histogram) Mean() float64 {
return h.Sum() / float64(h.Count())
}
func (h Histogram) Stddev() float64 {
sum := h.Sum()
mean := sum / float64(h.Count())
vr := 0.0
for _, x := range h.SampleValues {
vr += (x - mean) * (x - mean)
}
stddev := math.Sqrt(float64(vr / float64(h.Count()-1)))
return stddev
}
func (h Histogram) Sum() float64 {
s := 0.0
for i := range h.SampleValues {
s += h.SampleValues[i]
}
return s
}