[pinpoint] create comparePairwise and comparePairwiseActivity
comparePairwise wraps PairwiseWilcoxonSignedRankedTestResult
and concludes whether the result is statistically significant
or not.
Bug: b/321306810
Change-Id: Id63d48c31933d2b18454a9bae20fba1195d999a4
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/839596
Commit-Queue: Leina Sun <sunxiaodi@google.com>
Reviewed-by: Jeff Yoon <jeffyoon@google.com>
diff --git a/pinpoint/go/compare/BUILD.bazel b/pinpoint/go/compare/BUILD.bazel
index dcc8bd4..68d2ea0 100644
--- a/pinpoint/go/compare/BUILD.bazel
+++ b/pinpoint/go/compare/BUILD.bazel
@@ -13,6 +13,7 @@
deps = [
"//go/skerr",
"//go/sklog",
+ "//pinpoint/go/compare/stats",
"//pinpoint/go/compare/thresholds",
],
)
@@ -26,6 +27,7 @@
],
embed = [":compare"],
deps = [
+ "//pinpoint/go/compare/thresholds",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
],
diff --git a/pinpoint/go/compare/compare.go b/pinpoint/go/compare/compare.go
index 463cf9a..32b638f 100644
--- a/pinpoint/go/compare/compare.go
+++ b/pinpoint/go/compare/compare.go
@@ -36,6 +36,7 @@
"go.skia.org/infra/go/skerr"
"go.skia.org/infra/go/sklog"
+ "go.skia.org/infra/pinpoint/go/compare/stats"
"go.skia.org/infra/pinpoint/go/compare/thresholds"
)
@@ -96,6 +97,47 @@
return sum / float64(len(arr))
}
+// ComparePairwiseResult contains the results of a pairwise comparison
+// between two samples
+type ComparePairwiseResult struct {
+ // Verdict is the outcome of the statistical analysis which is Same or Different.
+ // Note that pairwise does not have an Unknown verdict.
+ Verdict Verdict
+ // stats.PairwiseWilcoxonSignedRankedTestResult is the result of the Pairwise
+ // statistical analysis.
+ stats.PairwiseWilcoxonSignedRankedTestResult
+}
+
+// ComparePairwise wraps PairwiseWilcoxonSignedRankedTest.
+// TODO(sunxiaodi@): Add additional data manipulation CABE does to ensure
+// data is valid prior to analysis.
+func ComparePairwise(valuesA, valuesB []float64, dir ImprovementDir) (*ComparePairwiseResult, error) {
+ // The pairwise test is intentionally designed to receive the input backwards.
+ result, err := stats.PairwiseWilcoxonSignedRankedTest(valuesB, valuesA, stats.TwoSided, stats.LogTransform)
+ if err != nil {
+ return nil, skerr.Wrap(err)
+ }
+ // return same if improvement
+ if (dir == Up && result.LowerCi > 0) || (dir == Down && result.UpperCi < 0) {
+ return &ComparePairwiseResult{
+ Verdict: Same,
+ PairwiseWilcoxonSignedRankedTestResult: result,
+ }, nil
+ }
+ // at p-value < 0.05, it is unlikely for the 95% confidence interval to encompass 0,
+ // but we enforce a check just in case.
+ if result.PValue < thresholds.LowThreshold && result.LowerCi*result.UpperCi > 0 {
+ return &ComparePairwiseResult{
+ Verdict: Different,
+ PairwiseWilcoxonSignedRankedTestResult: result,
+ }, nil
+ }
+ return &ComparePairwiseResult{
+ Verdict: Same,
+ PairwiseWilcoxonSignedRankedTestResult: result,
+ }, nil
+}
+
// Based on https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/models/job_state.py;drc=94f2bff5159bf660910b35c39426102c5982c4a4;l=356
// the default functional analysis error rate expected is 1.0 for all bisections
// pivoting to functional analysis.
diff --git a/pinpoint/go/compare/compare_test.go b/pinpoint/go/compare/compare_test.go
index 1ec3993..537b3cc 100644
--- a/pinpoint/go/compare/compare_test.go
+++ b/pinpoint/go/compare/compare_test.go
@@ -6,6 +6,7 @@
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
+ "go.skia.org/infra/pinpoint/go/compare/thresholds"
)
func convertNormalizedToRawMagnitude(a, b []float64, normMagnitude float64) float64 {
@@ -144,3 +145,69 @@
y = []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
test("x > y, ImprovementDir = Up", x, y, Up)
}
+
+func TestComparePairwise_GivenImprovement_ReturnsSame(t *testing.T) {
+ test := func(name string, x, y []float64, dir ImprovementDir) {
+ t.Run(name, func(t *testing.T) {
+ result, err := ComparePairwise(x, y, dir)
+ require.NoError(t, err)
+ require.LessOrEqual(t, result.PairwiseWilcoxonSignedRankedTestResult.PValue, thresholds.LowThreshold)
+ assert.Equal(t, Same, result.Verdict)
+ switch dir {
+ case Up:
+ assert.Positive(t, result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+ case Down:
+ assert.Negative(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi)
+ }
+ })
+ }
+ var x = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+ var y = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+ test("x < y, ImprovementDir = Up", x, y, Up)
+
+ x = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+ y = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+ test("x > y, ImprovementDir = Down", x, y, Down)
+}
+
+func TestComparePairwise_GivenRegression_ReturnsDifferent(t *testing.T) {
+ test := func(name string, x, y []float64, dir ImprovementDir) {
+ t.Run(name, func(t *testing.T) {
+ result, err := ComparePairwise(x, y, dir)
+ require.NoError(t, err)
+ require.LessOrEqual(t, result.PairwiseWilcoxonSignedRankedTestResult.PValue, thresholds.LowThreshold)
+ assert.Equal(t, Different, result.Verdict)
+ assert.Positive(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi*result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+ switch dir {
+ case Up:
+ assert.Negative(t, result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+ case Down:
+ assert.Positive(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi)
+ }
+ })
+ }
+ var x = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+ var y = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+ test("x < y, ImprovementDir = Down", x, y, Down)
+ test("x < y, ImprovementDir = Unknown", x, y, UnknownDir)
+
+ x = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+ y = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+ test("x > y, ImprovementDir = Up", x, y, Up)
+}
+
+func TestComparePairwise_GivenNoRegression_ReturnsSame(t *testing.T) {
+ test := func(name string, x, y []float64, dir ImprovementDir) {
+ t.Run(name, func(t *testing.T) {
+ result, err := ComparePairwise(x, y, dir)
+ require.NoError(t, err)
+ require.GreaterOrEqual(t, result.PairwiseWilcoxonSignedRankedTestResult.PValue, thresholds.LowThreshold)
+ assert.Equal(t, Same, result.Verdict)
+ assert.Negative(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi*result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+ })
+ }
+ var x = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+ var y = []float64{1.1, 1.9, 3.1, 3.9, 4.1, 4.9, 6.1, 7.9, 9.1}
+ test("ImprovementDir = Down", x, y, Down)
+ test("ImprovementDir = Unknown", x, y, UnknownDir)
+}
diff --git a/pinpoint/go/workflows/internal/compare.go b/pinpoint/go/workflows/internal/compare.go
index 4a632cf..b32034c 100644
--- a/pinpoint/go/workflows/internal/compare.go
+++ b/pinpoint/go/workflows/internal/compare.go
@@ -103,3 +103,8 @@
return result.Result, nil
}
+
+// ComparePairwiseActivity wraps compare.ComparePairwise as a temporal activity
+func ComparePairwiseActivity(ctx workflow.Context, valuesA, valuesB []float64, chart string, dir compare.ImprovementDir) (*compare.ComparePairwiseResult, error) {
+ return compare.ComparePairwise(valuesA, valuesB, dir)
+}