[perf] Add vec32.TwoSidedStdDev. Change-Id: I0e68c714482f067f2ecfccbd9ab9b20cc40d392a Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/324096 Commit-Queue: Joe Gregorio <jcgregorio@google.com> Reviewed-by: Joe Gregorio <jcgregorio@google.com>

commit: 234017854268f1bf9db438f898b797775b2b5cc9 [log] [tgz]
author: Joe Gregorio <jcgregorio@google.com> Fri Oct 09 11:30:19 2020 -0400
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> Fri Oct 09 15:51:01 2020 +0000
tree: 2405a7767501b6601ca9eeaf246d797d407c7bdf
parent: d9d3604c6b9e13463f12c643aaccd990346a3b0e [diff]
diff --git a/go/vec32/vec.go b/go/vec32/vec.go
index 6a11154..e5d2552 100644
--- a/go/vec32/vec.go
+++ b/go/vec32/vec.go

@@ -4,6 +4,7 @@
 import (
 	"fmt"
 	"math"
+	"sort"
 )
 
 const (
@@ -52,6 +53,64 @@
 	return mean, stddev, nil
 }
 
+// stddev only works on arrays that have no MISSING_DATA_SENTINEL values.
+func stddev(arr []float32, middle float32) float32 {
+	sum := float32(0)
+	for _, x := range arr {
+		sum += (x - middle) * (x - middle)
+	}
+	return float32(math.Sqrt(float64(sum) / float64(len(arr)-1)))
+}
+
+type float32Slice []float32
+
+func (p float32Slice) Len() int           { return len(p) }
+func (p float32Slice) Less(i, j int) bool { return p[i] < p[j] }
+func (p float32Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+// TwoSidedStdDev returns the median, and the stddev of all the points below and
+// above the median respectively.
+//
+// That is, the vector is sorted, the median found, and then the stddev of all
+// the points below the median are returned, along with the stddev of all the
+// points above the median.
+//
+// This is useful because performance measurements are inherintly asymmetric. A
+// benchmark can always run 2x slower, or 10x slower, there's no upper bound. On
+// the other hand a performance metric can only run 100% faster, i.e. have a
+// value of 0. This implies that the distribution of samples from a benchmark
+// are skewed.
+//
+// MISSING_DATA_SENTINELs are ignored.
+//
+// The median is chosen as the midpoint instead of the mean because that ensures
+// that both sides have the same number of points (+/- 1).
+func TwoSidedStdDev(arr []float32) (float32, float32, float32, error) {
+	// Allocate a new slice since we need to sort the incoming values in the
+	// array.
+	values := make([]float32, 0, len(arr))
+	count := 0
+	for _, x := range arr {
+		if x != MissingDataSentinel {
+			count += 1
+			values = append(values, x)
+		}
+	}
+	if count < 4 {
+		return 0, 0, 0, fmt.Errorf("Insufficient number of points, at least 4 are needed: %d", len(values))
+	}
+	sort.Sort(float32Slice(values))
+
+	mid := len(values) / 2
+	var median float32
+	if len(values)%2 == 1 {
+		median = values[mid]
+	} else {
+		median = (values[mid-1] + values[mid]) / 2
+	}
+	return median, stddev(values[:mid], median), stddev(values[mid:], median), nil
+}
+
 // ScaleBy divides each non-sentinel value in the slice by 'b', converting
 // resulting NaNs and Infs into sentinel values.
 func ScaleBy(a []float32, b float32) {

diff --git a/go/vec32/vec_test.go b/go/vec32/vec_test.go
index d01ae1d..7d03b36 100644
--- a/go/vec32/vec_test.go
+++ b/go/vec32/vec_test.go

@@ -503,3 +503,77 @@
 		})
 	}
 }
+
+func TestStddev(t *testing.T) {
+	unittest.SmallTest(t)
+
+	// Try sample data with a known integer stddev to confirm the calculation.
+	assert.Equal(t, float32(2), stddev([]float32{-1, -1, 1, -2, 3}, 0))
+}
+
+func TestTwoSidedStdDev(t *testing.T) {
+	unittest.SmallTest(t)
+	tests := []struct {
+		name         string
+		arr          []float32
+		median       float32
+		lower, upper float32
+		hasError     bool
+	}{
+		{
+			"EmptyArray_Error",
+			[]float32{},
+			0,
+			0, 0,
+			true,
+		},
+		{
+			"InsufficientNonMissingData_Error",
+			[]float32{e, 1, 1, 2},
+			0,
+			0, 0,
+			true,
+		},
+		{
+			"ConstantArray_ZeroStdDevs",
+			[]float32{1, 1, 1, 1},
+			1,
+			0, 0,
+			false,
+		},
+		{
+			"ConfirmSorting_UpperHasNonZeroStdDev",
+			[]float32{1, 2, 1, 1},
+			1,
+			0, 1,
+			false,
+		},
+		{
+			"MissingDataValuesAreIgnored",
+			[]float32{1, 2, 1, 1, e, e, e},
+			1,
+			0, 1,
+			false,
+		},
+		{
+			"AnOddNumberOfValuesInArray",
+			[]float32{2, 2, 1, 1, 1},
+			1,
+			0, 1,
+			false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			median, lower, upper, err := TwoSidedStdDev(tt.arr)
+			if tt.hasError {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+			}
+			assert.Equal(t, tt.median, median, "median")
+			assert.Equal(t, tt.lower, lower, "lower")
+			assert.Equal(t, tt.upper, upper, "upper")
+		})
+	}
+}
commit	234017854268f1bf9db438f898b797775b2b5cc9	[log] [tgz]
author	Joe Gregorio <jcgregorio@google.com>	Fri Oct 09 11:30:19 2020 -0400
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	Fri Oct 09 15:51:01 2020 +0000
tree	2405a7767501b6601ca9eeaf246d797d407c7bdf
parent	d9d3604c6b9e13463f12c643aaccd990346a3b0e [diff]