[perf] Add vec32.TwoSidedStdDev.
Change-Id: I0e68c714482f067f2ecfccbd9ab9b20cc40d392a
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/324096
Commit-Queue: Joe Gregorio <jcgregorio@google.com>
Reviewed-by: Joe Gregorio <jcgregorio@google.com>
diff --git a/go/vec32/vec.go b/go/vec32/vec.go
index 6a11154..e5d2552 100644
--- a/go/vec32/vec.go
+++ b/go/vec32/vec.go
@@ -4,6 +4,7 @@
import (
"fmt"
"math"
+ "sort"
)
const (
@@ -52,6 +53,64 @@
return mean, stddev, nil
}
+// stddev only works on arrays that have no MISSING_DATA_SENTINEL values.
+func stddev(arr []float32, middle float32) float32 {
+ sum := float32(0)
+ for _, x := range arr {
+ sum += (x - middle) * (x - middle)
+ }
+ return float32(math.Sqrt(float64(sum) / float64(len(arr)-1)))
+}
+
+type float32Slice []float32
+
+func (p float32Slice) Len() int { return len(p) }
+func (p float32Slice) Less(i, j int) bool { return p[i] < p[j] }
+func (p float32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+// TwoSidedStdDev returns the median, and the stddev of all the points below and
+// above the median respectively.
+//
+// That is, the vector is sorted, the median found, and then the stddev of all
+// the points below the median are returned, along with the stddev of all the
+// points above the median.
+//
+// This is useful because performance measurements are inherintly asymmetric. A
+// benchmark can always run 2x slower, or 10x slower, there's no upper bound. On
+// the other hand a performance metric can only run 100% faster, i.e. have a
+// value of 0. This implies that the distribution of samples from a benchmark
+// are skewed.
+//
+// MISSING_DATA_SENTINELs are ignored.
+//
+// The median is chosen as the midpoint instead of the mean because that ensures
+// that both sides have the same number of points (+/- 1).
+func TwoSidedStdDev(arr []float32) (float32, float32, float32, error) {
+ // Allocate a new slice since we need to sort the incoming values in the
+ // array.
+ values := make([]float32, 0, len(arr))
+ count := 0
+ for _, x := range arr {
+ if x != MissingDataSentinel {
+ count += 1
+ values = append(values, x)
+ }
+ }
+ if count < 4 {
+ return 0, 0, 0, fmt.Errorf("Insufficient number of points, at least 4 are needed: %d", len(values))
+ }
+ sort.Sort(float32Slice(values))
+
+ mid := len(values) / 2
+ var median float32
+ if len(values)%2 == 1 {
+ median = values[mid]
+ } else {
+ median = (values[mid-1] + values[mid]) / 2
+ }
+ return median, stddev(values[:mid], median), stddev(values[mid:], median), nil
+}
+
// ScaleBy divides each non-sentinel value in the slice by 'b', converting
// resulting NaNs and Infs into sentinel values.
func ScaleBy(a []float32, b float32) {
diff --git a/go/vec32/vec_test.go b/go/vec32/vec_test.go
index d01ae1d..7d03b36 100644
--- a/go/vec32/vec_test.go
+++ b/go/vec32/vec_test.go
@@ -503,3 +503,77 @@
})
}
}
+
+func TestStddev(t *testing.T) {
+ unittest.SmallTest(t)
+
+ // Try sample data with a known integer stddev to confirm the calculation.
+ assert.Equal(t, float32(2), stddev([]float32{-1, -1, 1, -2, 3}, 0))
+}
+
+func TestTwoSidedStdDev(t *testing.T) {
+ unittest.SmallTest(t)
+ tests := []struct {
+ name string
+ arr []float32
+ median float32
+ lower, upper float32
+ hasError bool
+ }{
+ {
+ "EmptyArray_Error",
+ []float32{},
+ 0,
+ 0, 0,
+ true,
+ },
+ {
+ "InsufficientNonMissingData_Error",
+ []float32{e, 1, 1, 2},
+ 0,
+ 0, 0,
+ true,
+ },
+ {
+ "ConstantArray_ZeroStdDevs",
+ []float32{1, 1, 1, 1},
+ 1,
+ 0, 0,
+ false,
+ },
+ {
+ "ConfirmSorting_UpperHasNonZeroStdDev",
+ []float32{1, 2, 1, 1},
+ 1,
+ 0, 1,
+ false,
+ },
+ {
+ "MissingDataValuesAreIgnored",
+ []float32{1, 2, 1, 1, e, e, e},
+ 1,
+ 0, 1,
+ false,
+ },
+ {
+ "AnOddNumberOfValuesInArray",
+ []float32{2, 2, 1, 1, 1},
+ 1,
+ 0, 1,
+ false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ median, lower, upper, err := TwoSidedStdDev(tt.arr)
+ if tt.hasError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+ assert.Equal(t, tt.median, median, "median")
+ assert.Equal(t, tt.lower, lower, "lower")
+ assert.Equal(t, tt.upper, upper, "upper")
+ })
+ }
+}