[pinpoint] create comparePairwise and comparePairwiseActivity comparePairwise wraps PairwiseWilcoxonSignedRankedTestResult and concludes whether the result is statistically significant or not. Bug: b/321306810 Change-Id: Id63d48c31933d2b18454a9bae20fba1195d999a4 Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/839596 Commit-Queue: Leina Sun <sunxiaodi@google.com> Reviewed-by: Jeff Yoon <jeffyoon@google.com>

commit: dcc727b87ab620f6542db3c763b8cf4ec70d22c0 [log] [tgz]
author: Leina Sun <sunxiaodi@google.com> Tue Apr 16 22:23:29 2024 +0000
committer: SkCQ <skcq-be@skia-corp.google.com.iam.gserviceaccount.com> Thu Apr 18 16:56:27 2024 +0000
tree: 4c630217b8f72a0bd8b692672b78694566d2fe6c
parent: d82ac8c4d178ea5eb881492e3c8de794a3a5f182 [diff]
diff --git a/pinpoint/go/compare/BUILD.bazel b/pinpoint/go/compare/BUILD.bazel
index dcc8bd4..68d2ea0 100644
--- a/pinpoint/go/compare/BUILD.bazel
+++ b/pinpoint/go/compare/BUILD.bazel

@@ -13,6 +13,7 @@
     deps = [
         "//go/skerr",
         "//go/sklog",
+        "//pinpoint/go/compare/stats",
         "//pinpoint/go/compare/thresholds",
     ],
 )
@@ -26,6 +27,7 @@
     ],
     embed = [":compare"],
     deps = [
+        "//pinpoint/go/compare/thresholds",
         "@com_github_stretchr_testify//assert",
         "@com_github_stretchr_testify//require",
     ],

diff --git a/pinpoint/go/compare/compare.go b/pinpoint/go/compare/compare.go
index 463cf9a..32b638f 100644
--- a/pinpoint/go/compare/compare.go
+++ b/pinpoint/go/compare/compare.go

@@ -36,6 +36,7 @@
 
 	"go.skia.org/infra/go/skerr"
 	"go.skia.org/infra/go/sklog"
+	"go.skia.org/infra/pinpoint/go/compare/stats"
 	"go.skia.org/infra/pinpoint/go/compare/thresholds"
 )
 
@@ -96,6 +97,47 @@
 	return sum / float64(len(arr))
 }
 
+// ComparePairwiseResult contains the results of a pairwise comparison
+// between two samples
+type ComparePairwiseResult struct {
+	// Verdict is the outcome of the statistical analysis which is Same or Different.
+	// Note that pairwise does not have an Unknown verdict.
+	Verdict Verdict
+	// stats.PairwiseWilcoxonSignedRankedTestResult is the result of the Pairwise
+	// statistical analysis.
+	stats.PairwiseWilcoxonSignedRankedTestResult
+}
+
+// ComparePairwise wraps PairwiseWilcoxonSignedRankedTest.
+// TODO(sunxiaodi@): Add additional data manipulation CABE does to ensure
+// data is valid prior to analysis.
+func ComparePairwise(valuesA, valuesB []float64, dir ImprovementDir) (*ComparePairwiseResult, error) {
+	// The pairwise test is intentionally designed to receive the input backwards.
+	result, err := stats.PairwiseWilcoxonSignedRankedTest(valuesB, valuesA, stats.TwoSided, stats.LogTransform)
+	if err != nil {
+		return nil, skerr.Wrap(err)
+	}
+	// return same if improvement
+	if (dir == Up && result.LowerCi > 0) || (dir == Down && result.UpperCi < 0) {
+		return &ComparePairwiseResult{
+			Verdict:                                Same,
+			PairwiseWilcoxonSignedRankedTestResult: result,
+		}, nil
+	}
+	// at p-value < 0.05, it is unlikely for the 95% confidence interval to encompass 0,
+	// but we enforce a check just in case.
+	if result.PValue < thresholds.LowThreshold && result.LowerCi*result.UpperCi > 0 {
+		return &ComparePairwiseResult{
+			Verdict:                                Different,
+			PairwiseWilcoxonSignedRankedTestResult: result,
+		}, nil
+	}
+	return &ComparePairwiseResult{
+		Verdict:                                Same,
+		PairwiseWilcoxonSignedRankedTestResult: result,
+	}, nil
+}
+
 // Based on https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/models/job_state.py;drc=94f2bff5159bf660910b35c39426102c5982c4a4;l=356
 // the default functional analysis error rate expected is 1.0 for all bisections
 // pivoting to functional analysis.

diff --git a/pinpoint/go/compare/compare_test.go b/pinpoint/go/compare/compare_test.go
index 1ec3993..537b3cc 100644
--- a/pinpoint/go/compare/compare_test.go
+++ b/pinpoint/go/compare/compare_test.go

@@ -6,6 +6,7 @@
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	"go.skia.org/infra/pinpoint/go/compare/thresholds"
 )
 
 func convertNormalizedToRawMagnitude(a, b []float64, normMagnitude float64) float64 {
@@ -144,3 +145,69 @@
 	y = []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
 	test("x > y, ImprovementDir = Up", x, y, Up)
 }
+
+func TestComparePairwise_GivenImprovement_ReturnsSame(t *testing.T) {
+	test := func(name string, x, y []float64, dir ImprovementDir) {
+		t.Run(name, func(t *testing.T) {
+			result, err := ComparePairwise(x, y, dir)
+			require.NoError(t, err)
+			require.LessOrEqual(t, result.PairwiseWilcoxonSignedRankedTestResult.PValue, thresholds.LowThreshold)
+			assert.Equal(t, Same, result.Verdict)
+			switch dir {
+			case Up:
+				assert.Positive(t, result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+			case Down:
+				assert.Negative(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi)
+			}
+		})
+	}
+	var x = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+	var y = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+	test("x < y, ImprovementDir = Up", x, y, Up)
+
+	x = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+	y = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+	test("x > y, ImprovementDir = Down", x, y, Down)
+}
+
+func TestComparePairwise_GivenRegression_ReturnsDifferent(t *testing.T) {
+	test := func(name string, x, y []float64, dir ImprovementDir) {
+		t.Run(name, func(t *testing.T) {
+			result, err := ComparePairwise(x, y, dir)
+			require.NoError(t, err)
+			require.LessOrEqual(t, result.PairwiseWilcoxonSignedRankedTestResult.PValue, thresholds.LowThreshold)
+			assert.Equal(t, Different, result.Verdict)
+			assert.Positive(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi*result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+			switch dir {
+			case Up:
+				assert.Negative(t, result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+			case Down:
+				assert.Positive(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi)
+			}
+		})
+	}
+	var x = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+	var y = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+	test("x < y, ImprovementDir = Down", x, y, Down)
+	test("x < y, ImprovementDir = Unknown", x, y, UnknownDir)
+
+	x = []float64{7, 8, 9, 10, 11, 12, 13, 14, 15}
+	y = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+	test("x > y, ImprovementDir = Up", x, y, Up)
+}
+
+func TestComparePairwise_GivenNoRegression_ReturnsSame(t *testing.T) {
+	test := func(name string, x, y []float64, dir ImprovementDir) {
+		t.Run(name, func(t *testing.T) {
+			result, err := ComparePairwise(x, y, dir)
+			require.NoError(t, err)
+			require.GreaterOrEqual(t, result.PairwiseWilcoxonSignedRankedTestResult.PValue, thresholds.LowThreshold)
+			assert.Equal(t, Same, result.Verdict)
+			assert.Negative(t, result.PairwiseWilcoxonSignedRankedTestResult.LowerCi*result.PairwiseWilcoxonSignedRankedTestResult.UpperCi)
+		})
+	}
+	var x = []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
+	var y = []float64{1.1, 1.9, 3.1, 3.9, 4.1, 4.9, 6.1, 7.9, 9.1}
+	test("ImprovementDir = Down", x, y, Down)
+	test("ImprovementDir = Unknown", x, y, UnknownDir)
+}

diff --git a/pinpoint/go/workflows/internal/compare.go b/pinpoint/go/workflows/internal/compare.go
index 4a632cf..b32034c 100644
--- a/pinpoint/go/workflows/internal/compare.go
+++ b/pinpoint/go/workflows/internal/compare.go

@@ -103,3 +103,8 @@
 
 	return result.Result, nil
 }
+
+// ComparePairwiseActivity wraps compare.ComparePairwise as a temporal activity
+func ComparePairwiseActivity(ctx workflow.Context, valuesA, valuesB []float64, chart string, dir compare.ImprovementDir) (*compare.ComparePairwiseResult, error) {
+	return compare.ComparePairwise(valuesA, valuesB, dir)
+}
commit	dcc727b87ab620f6542db3c763b8cf4ec70d22c0	[log] [tgz]
author	Leina Sun <sunxiaodi@google.com>	Tue Apr 16 22:23:29 2024 +0000
committer	SkCQ <skcq-be@skia-corp.google.com.iam.gserviceaccount.com>	Thu Apr 18 16:56:27 2024 +0000
tree	4c630217b8f72a0bd8b692672b78694566d2fe6c
parent	d82ac8c4d178ea5eb881492e3c8de794a3a5f182 [diff]