perf/go/perfresults/perf_results_parser.go - buildbot - Git at Google

 package perfresults

 import (
 	"encoding/json"
 	"io"
 	"math"
 	"slices"

 	"go.skia.org/infra/go/skerr"
 	"go.skia.org/infra/go/sklog"
 )

 // PerfResults represents the contenst of a perf_results.json file generated by a
 // telemetry-based benchmark. The full format is not formally defined, but some
 // documnentation for it exists in various places.  The most comprehensive doc is
 // https://chromium.googlesource.com/external/github.com/catapult-project/catapult/+/HEAD/docs/Histogram-set-json-format.md
 type PerfResults struct {
 	Histograms map[TraceKey]Histogram
 }

 // NonEmptyHistogramNames returns a list of names of histograms whose SampleValues arrays are non-empty.
 func (pr *PerfResults) NonEmptyHistogramNames() []string {
 	ret := []string{}
 	for k, h := range pr.Histograms {
 		if len(h.SampleValues) > 0 {
 			ret = append(ret, k.ChartName)
 		}
 	}
 	return ret
 }

 // TraceKey is a unique identifier for one trace.
 //
 // The Telemetry trace from one perf run is identified as:
 // ChromePerf/[BotConfig]/[benchmark]/[ChartName]/[Story] where
 // BotConfig is usually the build name that defines the bot dimension where it runs
 // benchmark is a collection of runs
 // ChartName is a specific measurement
 type TraceKey struct {
 	// ChartName is a specific measurement, this is also equivalent to metric.
 	ChartName string `json:"chart"`

 	// Unit is usually tied to the chart, we still save it for the reference.
 	Unit string `json:"unit"`

 	// Story is a specific user journey that collects the metrics (ChartName).
 	Story string `json:"story"`

 	// Architecture and OSName are defined by the BotConfig, within each perf result file,
 	// this should be a unique combo throughput as it runs on a single machine. In other cases,
 	// the key pair can identify a unique trace because they are running on a different machine.
 	Architecture string `json:"arch"`
 	OSName       string `json:"os"`

 	// ignored fields for now, those are not used for analysis.
 	// benchmarkDescriptions
 	// benchmarkStart
 	// benchmarks
 	// storysetRepeats
 	// traceStart
 	// traceUrls
 	// botId
 	// owners
 	// osVersions
 	// osDetailedVersions
 }

 // Histogram is an individual benchmark measurement.
 type Histogram struct {
 	SampleValues []float64
 }

 // AggregationMapping maps the string literals to the aggregation methods to be used in the
 // trace generations and user-facing Json/OpenAPIs.
 var AggregationMapping = map[string]func(Histogram) float64{
 	"max":  Histogram.Max,
 	"min":  Histogram.Min,
 	"mean": Histogram.Mean,
 	"std":  Histogram.Stddev,
 	"sum":  Histogram.Sum,
 	"count": func(h Histogram) float64 {
 		return float64(h.Count())
 	},
 }

 type histogramRaw struct {
 	Name string `json:"name"`
 	Unit string `json:"unit"`

 	// optional fields
 	Description  string    `json:"description"`
 	SampleValues []float64 `json:"sampleValues"`
 	// Diagnostics maps a diagnostic key to a guid, which points to e.g. a genericSet.
 	Diagnostics map[string]string `json:"diagnostics"`
 }

 // genericSet is a normalized value that other parts of the json file can reference by guid.
 type genericSet struct {
 	Values []any `json:"values"` // Can be string or number. sigh.
 }

 // dateRange is a range of dates.
 type dateRange struct {
 	Min float64 `json:"min"`
 	Max float64 `json:"max"`
 }

 // relatedNameMap is a map from short names to full histogram names.
 type relatedNameMap struct {
 	Names map[string]string `json:"names"`
 }

 type singleEntry struct {
 	Type string `json:"type"`
 	GUID string `json:"guid"`

 	histogramRaw
 	genericSet
 	dateRange
 	relatedNameMap
 }

 // asTraceKeyAndHistogram converts raw data into a unique trace key and histogram samples.
 func (hr *histogramRaw) asTraceKeyAndHistogram(metadata map[string]any) (TraceKey, Histogram) {
 	tk := TraceKey{
 		ChartName: hr.Name,
 		Unit:      hr.Unit,
 	}

 	// The original key is plural but they are actually singular.
 	if arch, ok := hr.Diagnostics["architectures"]; ok {
 		if v, ok := metadata[arch]; ok {
 			tk.Architecture = v.(genericSet).Values[0].(string)
 		} else {
 			sklog.Warningf("Unable to find the value for architectures (%v).", arch)
 		}
 	}

 	if osNames, ok := hr.Diagnostics["osNames"]; ok {
 		if v, ok := metadata[osNames]; ok {
 			tk.OSName = v.(genericSet).Values[0].(string)
 		} else {
 			sklog.Warningf("Unable to find the value for osNames (%v).", osNames)
 		}
 	}

 	if stories, ok := hr.Diagnostics["stories"]; ok {
 		if v, ok := metadata[stories]; ok {
 			tk.Story = v.(genericSet).Values[0].(string)
 		} else {
 			sklog.Warningf("Unable to find the value for stories (%v).", stories)
 		}
 	}
 	return tk, Histogram{SampleValues: hr.SampleValues}
 }

 // NewResults creates a new PerfResults from the given data stream.
 //
 // It decodes the data in a streaming manner to reduce the memory footprint as the JSON files
 // are sometimes bigger than 10MB.
 func NewResults(r io.Reader) (*PerfResults, error) {
 	pr := &PerfResults{
 		Histograms: make(map[TraceKey]Histogram),
 	}
 	decoder := json.NewDecoder(r)

 	// perf_results.json is an array of objects
 	// read the open '['
 	t, err := decoder.Token()

 	// don't panic on an empty file
 	if err == io.EOF {
 		return pr, nil
 	}
 	if err != nil {
 		return nil, skerr.Wrap(err)
 	}
 	if delim, ok := t.(json.Delim); !ok || delim.String() != "[" {
 		return nil, skerr.Fmt("expecting the open '['")
 	}

 	// metadata only useful within the file scope.
 	md := make(map[string]any)

 	// looping all the elements
 	for decoder.More() {
 		var entry singleEntry
 		err := decoder.Decode(&entry)
 		if err != nil {
 			return nil, skerr.Wrap(err)
 		}
 		// If Name is not empty, it is a histogram
 		if entry.Name != "" {
 			pr.merge(entry.asTraceKeyAndHistogram(md))
 			continue
 		}
 		switch entry.Type {
 		case "GenericSet":
 			md[entry.GUID] = entry.genericSet
 		case "DateRange":
 			md[entry.GUID] = entry.dateRange
 		case "RelatedNameMap":
 			md[entry.GUID] = entry.relatedNameMap
 		}
 	}

 	t, err = decoder.Token()
 	if err != nil {
 		return nil, skerr.Wrap(err)
 	}
 	if delim, ok := t.(json.Delim); !ok || delim.String() != "]" {
 		return nil, skerr.Fmt("expecting the closing ']'")
 	}

 	return pr, nil
 }

 func (h Histogram) Aggregate(method string) float64 {
 	if m, ok := AggregationMapping[method]; len(h.SampleValues) > 0 && ok {
 		return m(h)
 	}
 	return math.NaN()
 }

 // This should be deprecated in favor of streaming decoding.
 //
 // UnmarshalJSON parses a byte slice into a PerfResults instance.
 func (pr *PerfResults) UnmarshalJSON(data []byte) error {
 	pr.Histograms = make(map[TraceKey]Histogram)
 	var raw []json.RawMessage
 	if err := json.Unmarshal(data, &raw); err != nil {
 		return err
 	}

 	md := make(map[string]any)
 	for _, m := range raw {
 		var entry singleEntry
 		if err := json.Unmarshal(m, &entry); err != nil {
 			return err
 		}
 		// If Name is not empty, it is a histogram
 		if entry.Name != "" {
 			pr.merge(entry.asTraceKeyAndHistogram(md))
 			continue
 		}
 		switch entry.Type {
 		case "GenericSet":
 			md[entry.GUID] = entry.genericSet
 		case "DateRange":
 			md[entry.GUID] = entry.dateRange
 		case "RelatedNameMap":
 			md[entry.GUID] = entry.relatedNameMap
 		}
 	}
 	return nil
 }

 // GetSampleValues returns the all the sampled values for the same chart.
 //
 // Deprecated: this will merge from all the stories, and this is only the results files that
 // contain only one story. This is the only use case for cabe.
 func (pr *PerfResults) GetSampleValues(chart string) []float64 {
 	var values []float64
 	for k, sv := range pr.Histograms {
 		if k.ChartName == chart {
 			values = append(values, sv.SampleValues...)
 		}
 	}
 	return values
 }

 func (pr *PerfResults) MergeResults(other *PerfResults) {
 	for key, hist := range other.Histograms {
 		pr.merge(key, hist)
 	}
 }

 // Merge takes the given histogram and merges sample values.
 func (pr *PerfResults) merge(key TraceKey, other Histogram) {
 	if h, ok := pr.Histograms[key]; ok {
 		other.SampleValues = append(h.SampleValues, other.SampleValues...)
 	}
 	pr.Histograms[key] = other
 }

 func (h Histogram) Min() float64 {
 	return slices.Min(h.SampleValues)
 }

 func (h Histogram) Max() float64 {
 	return slices.Max(h.SampleValues)
 }

 func (h Histogram) Count() int {
 	return len(h.SampleValues)
 }

 func (h Histogram) Mean() float64 {
 	return h.Sum() / float64(h.Count())
 }

 func (h Histogram) Stddev() float64 {
 	sum := h.Sum()
 	mean := sum / float64(h.Count())
 	vr := 0.0
 	for _, x := range h.SampleValues {
 		vr += (x - mean) * (x - mean)
 	}
 	stddev := math.Sqrt(float64(vr / float64(h.Count()-1)))
 	return stddev
 }

 func (h Histogram) Sum() float64 {
 	s := 0.0
 	for i := range h.SampleValues {
 		s += h.SampleValues[i]
 	}
 	return s
 }
	package perfresults

	import (
	"encoding/json"
	"io"
	"math"
	"slices"

	"go.skia.org/infra/go/skerr"
	"go.skia.org/infra/go/sklog"
	)

	// PerfResults represents the contenst of a perf_results.json file generated by a
	// telemetry-based benchmark. The full format is not formally defined, but some
	// documnentation for it exists in various places. The most comprehensive doc is
	// https://chromium.googlesource.com/external/github.com/catapult-project/catapult/+/HEAD/docs/Histogram-set-json-format.md
	type PerfResults struct {
	Histograms map[TraceKey]Histogram
	}

	// NonEmptyHistogramNames returns a list of names of histograms whose SampleValues arrays are non-empty.
	func (pr *PerfResults) NonEmptyHistogramNames() []string {
	ret := []string{}
	for k, h := range pr.Histograms {
	if len(h.SampleValues) > 0 {
	ret = append(ret, k.ChartName)
	}
	}
	return ret
	}

	// TraceKey is a unique identifier for one trace.
	//
	// The Telemetry trace from one perf run is identified as:
	// ChromePerf/[BotConfig]/[benchmark]/[ChartName]/[Story] where
	// BotConfig is usually the build name that defines the bot dimension where it runs
	// benchmark is a collection of runs
	// ChartName is a specific measurement
	type TraceKey struct {
	// ChartName is a specific measurement, this is also equivalent to metric.
	ChartName string `json:"chart"`

	// Unit is usually tied to the chart, we still save it for the reference.
	Unit string `json:"unit"`

	// Story is a specific user journey that collects the metrics (ChartName).
	Story string `json:"story"`

	// Architecture and OSName are defined by the BotConfig, within each perf result file,
	// this should be a unique combo throughput as it runs on a single machine. In other cases,
	// the key pair can identify a unique trace because they are running on a different machine.
	Architecture string `json:"arch"`
	OSName string `json:"os"`

	// ignored fields for now, those are not used for analysis.
	// benchmarkDescriptions
	// benchmarkStart
	// benchmarks
	// storysetRepeats
	// traceStart
	// traceUrls
	// botId
	// owners
	// osVersions
	// osDetailedVersions
	}

	// Histogram is an individual benchmark measurement.
	type Histogram struct {
	SampleValues []float64
	}

	// AggregationMapping maps the string literals to the aggregation methods to be used in the
	// trace generations and user-facing Json/OpenAPIs.
	var AggregationMapping = map[string]func(Histogram) float64{
	"max": Histogram.Max,
	"min": Histogram.Min,
	"mean": Histogram.Mean,
	"std": Histogram.Stddev,
	"sum": Histogram.Sum,
	"count": func(h Histogram) float64 {
	return float64(h.Count())
	},
	}

	type histogramRaw struct {
	Name string `json:"name"`
	Unit string `json:"unit"`

	// optional fields
	Description string `json:"description"`
	SampleValues []float64 `json:"sampleValues"`
	// Diagnostics maps a diagnostic key to a guid, which points to e.g. a genericSet.
	Diagnostics map[string]string `json:"diagnostics"`
	}

	// genericSet is a normalized value that other parts of the json file can reference by guid.
	type genericSet struct {
	Values []any `json:"values"` // Can be string or number. sigh.
	}

	// dateRange is a range of dates.
	type dateRange struct {
	Min float64 `json:"min"`
	Max float64 `json:"max"`
	}

	// relatedNameMap is a map from short names to full histogram names.
	type relatedNameMap struct {
	Names map[string]string `json:"names"`
	}

	type singleEntry struct {
	Type string `json:"type"`
	GUID string `json:"guid"`

	histogramRaw
	genericSet
	dateRange
	relatedNameMap
	}

	// asTraceKeyAndHistogram converts raw data into a unique trace key and histogram samples.
	func (hr *histogramRaw) asTraceKeyAndHistogram(metadata map[string]any) (TraceKey, Histogram) {
	tk := TraceKey{
	ChartName: hr.Name,
	Unit: hr.Unit,
	}

	// The original key is plural but they are actually singular.
	if arch, ok := hr.Diagnostics["architectures"]; ok {
	if v, ok := metadata[arch]; ok {
	tk.Architecture = v.(genericSet).Values[0].(string)
	} else {
	sklog.Warningf("Unable to find the value for architectures (%v).", arch)
	}
	}

	if osNames, ok := hr.Diagnostics["osNames"]; ok {
	if v, ok := metadata[osNames]; ok {
	tk.OSName = v.(genericSet).Values[0].(string)
	} else {
	sklog.Warningf("Unable to find the value for osNames (%v).", osNames)
	}
	}

	if stories, ok := hr.Diagnostics["stories"]; ok {
	if v, ok := metadata[stories]; ok {
	tk.Story = v.(genericSet).Values[0].(string)
	} else {
	sklog.Warningf("Unable to find the value for stories (%v).", stories)
	}
	}
	return tk, Histogram{SampleValues: hr.SampleValues}
	}

	// NewResults creates a new PerfResults from the given data stream.
	//
	// It decodes the data in a streaming manner to reduce the memory footprint as the JSON files
	// are sometimes bigger than 10MB.
	func NewResults(r io.Reader) (*PerfResults, error) {
	pr := &PerfResults{
	Histograms: make(map[TraceKey]Histogram),
	}
	decoder := json.NewDecoder(r)

	// perf_results.json is an array of objects
	// read the open '['
	t, err := decoder.Token()

	// don't panic on an empty file
	if err == io.EOF {
	return pr, nil
	}
	if err != nil {
	return nil, skerr.Wrap(err)
	}
	if delim, ok := t.(json.Delim); !ok \|\| delim.String() != "[" {
	return nil, skerr.Fmt("expecting the open '['")
	}

	// metadata only useful within the file scope.
	md := make(map[string]any)

	// looping all the elements
	for decoder.More() {
	var entry singleEntry
	err := decoder.Decode(&entry)
	if err != nil {
	return nil, skerr.Wrap(err)
	}
	// If Name is not empty, it is a histogram
	if entry.Name != "" {
	pr.merge(entry.asTraceKeyAndHistogram(md))
	continue
	}
	switch entry.Type {
	case "GenericSet":
	md[entry.GUID] = entry.genericSet
	case "DateRange":
	md[entry.GUID] = entry.dateRange
	case "RelatedNameMap":
	md[entry.GUID] = entry.relatedNameMap
	}
	}

	t, err = decoder.Token()
	if err != nil {
	return nil, skerr.Wrap(err)
	}
	if delim, ok := t.(json.Delim); !ok \|\| delim.String() != "]" {
	return nil, skerr.Fmt("expecting the closing ']'")
	}

	return pr, nil
	}

	func (h Histogram) Aggregate(method string) float64 {
	if m, ok := AggregationMapping[method]; len(h.SampleValues) > 0 && ok {
	return m(h)
	}
	return math.NaN()
	}

	// This should be deprecated in favor of streaming decoding.
	//
	// UnmarshalJSON parses a byte slice into a PerfResults instance.
	func (pr *PerfResults) UnmarshalJSON(data []byte) error {
	pr.Histograms = make(map[TraceKey]Histogram)
	var raw []json.RawMessage
	if err := json.Unmarshal(data, &raw); err != nil {
	return err
	}

	md := make(map[string]any)
	for _, m := range raw {
	var entry singleEntry
	if err := json.Unmarshal(m, &entry); err != nil {
	return err
	}
	// If Name is not empty, it is a histogram
	if entry.Name != "" {
	pr.merge(entry.asTraceKeyAndHistogram(md))
	continue
	}
	switch entry.Type {
	case "GenericSet":
	md[entry.GUID] = entry.genericSet
	case "DateRange":
	md[entry.GUID] = entry.dateRange
	case "RelatedNameMap":
	md[entry.GUID] = entry.relatedNameMap
	}
	}
	return nil
	}

	// GetSampleValues returns the all the sampled values for the same chart.
	//
	// Deprecated: this will merge from all the stories, and this is only the results files that
	// contain only one story. This is the only use case for cabe.
	func (pr *PerfResults) GetSampleValues(chart string) []float64 {
	var values []float64
	for k, sv := range pr.Histograms {
	if k.ChartName == chart {
	values = append(values, sv.SampleValues...)
	}
	}
	return values
	}

	func (pr PerfResults) MergeResults(other PerfResults) {
	for key, hist := range other.Histograms {
	pr.merge(key, hist)
	}
	}

	// Merge takes the given histogram and merges sample values.
	func (pr *PerfResults) merge(key TraceKey, other Histogram) {
	if h, ok := pr.Histograms[key]; ok {
	other.SampleValues = append(h.SampleValues, other.SampleValues...)
	}
	pr.Histograms[key] = other
	}

	func (h Histogram) Min() float64 {
	return slices.Min(h.SampleValues)
	}

	func (h Histogram) Max() float64 {
	return slices.Max(h.SampleValues)
	}

	func (h Histogram) Count() int {
	return len(h.SampleValues)
	}

	func (h Histogram) Mean() float64 {
	return h.Sum() / float64(h.Count())
	}

	func (h Histogram) Stddev() float64 {
	sum := h.Sum()
	mean := sum / float64(h.Count())
	vr := 0.0
	for _, x := range h.SampleValues {
	vr += (x - mean) * (x - mean)
	}
	stddev := math.Sqrt(float64(vr / float64(h.Count()-1)))
	return stddev
	}

	func (h Histogram) Sum() float64 {
	s := 0.0
	for i := range h.SampleValues {
	s += h.SampleValues[i]
	}
	return s
	}