[bisection] implement benchmark interface This creates an interface for all benchmarks to implement. We start with telemetry, which deprecates telemetry_exp.go. It implements waterfall enabled gtests and story tags. All of pinpoint.go is updated to get the appropriate benchmark and Run() accordingly. Bug: b/318863812, b/318863812, b/321299939 Change-Id: Ie267f07f31a15df54dde1633711fc959e36b3e1b Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/814037 Reviewed-by: Hao Wu <haowoo@google.com> Commit-Queue: Jeff Yoon <jeffyoon@google.com>

diff --git a/pinpoint/go/bot_configs/bot_configs.go b/pinpoint/go/bot_configs/bot_configs.go
index 91e2ebf..95e76f7 100644
--- a/pinpoint/go/bot_configs/bot_configs.go
+++ b/pinpoint/go/bot_configs/bot_configs.go

@@ -38,8 +38,11 @@
 	// Browser is the name of the Chrome browser to run
 	// when testing benchmarks
 	Browser string `json:"browser"`
-	// Builder refers to the LUCI pool to build Chrome,
+	// Bucket refers to the LUCI pool to build Chrome,
 	// typically luci.chrome.try
+	Bucket string `json:"bucket"`
+	// Builder refers to the LUCI builder used to build Chrome,
+	// usually a compile builder.
 	Builder string `json:"builder"`
 	// Dimensions are used by Swarming to find the
 	// device pool to test benchmarks
@@ -50,6 +53,8 @@
 	// SwarmingServer is almost always
 	// https://chrome-swarming.appspot.com
 	SwarmingServer string `json:"swarming_server"`
+	// Bot is the original key used for this config.
+	Bot string
 }
 
 // GetBotConfig gets the config for a bot. Will only check internal
@@ -77,9 +82,14 @@
 		}
 		return BotConfig{}, fmt.Errorf(errMsg)
 	}
-	if cfg.Alias != "" {
-		return botConfigs[cfg.Alias], nil
+	alias := cfg.Alias
+	if alias != "" {
+		cfg = botConfigs[cfg.Alias]
+		cfg.Bot = alias
+		return cfg, nil
 	}
+
+	cfg.Bot = bot
 	return cfg, nil
 }
 

diff --git a/pinpoint/go/bot_configs/bot_configs_test.go b/pinpoint/go/bot_configs/bot_configs_test.go
index 23428a0..0358b6c 100644
--- a/pinpoint/go/bot_configs/bot_configs_test.go
+++ b/pinpoint/go/bot_configs/bot_configs_test.go

@@ -40,12 +40,6 @@
 			externalOnly:  false,
 			expectedError: true,
 		},
-		{
-			name:          "check alias",
-			bot:           "Android Go",
-			externalOnly:  true,
-			expectedError: false,
-		},
 	} {
 		t.Run(fmt.Sprintf("[%d] %s", i, test.name), func(t *testing.T) {
 			bot, err := GetBotConfig(test.bot, test.externalOnly)
@@ -62,3 +56,9 @@
 func TestValidateBotConfigs(t *testing.T) {
 	assert.NoError(t, validate())
 }
+
+func TestGetBotConfig_AliasedBuilder_CorrectReference(t *testing.T) {
+	cfg, err := GetBotConfig("Android Go", true)
+	assert.NoError(t, err)
+	assert.Equal(t, "android-go-perf", cfg.Bot)
+}

diff --git a/pinpoint/go/pinpoint/pinpoint.go b/pinpoint/go/pinpoint/pinpoint.go
index 6a6319d..56aff75 100644
--- a/pinpoint/go/pinpoint/pinpoint.go
+++ b/pinpoint/go/pinpoint/pinpoint.go

@@ -193,7 +193,7 @@
 			c.tests = &testMetadata{
 				req: c.createRunBenchmarkRequest(jobID, cfg, target, req),
 			}
-			tasks, err := c.scheduleRunBenchmark(ctx, pp.sc)
+			tasks, err := c.scheduleRunBenchmark(ctx, pp.sc, req)
 			if err != nil {
 				return resp, err
 			}
@@ -242,7 +242,7 @@
 			}
 			if res != nil {
 				spew.Dump(res)
-				culprit, err := cdl.updateCommitsByResult(ctx, pp.sc, pp.mc, res, left, right)
+				culprit, err := cdl.updateCommitsByResult(ctx, pp.sc, pp.mc, res, left, right, req)
 				if err != nil {
 					return resp, skerr.Wrapf(err, "could not update commitDataList after compare")
 				}
@@ -259,7 +259,7 @@
 			}
 			if res != nil {
 				spew.Dump(res)
-				culprit, err := cdl.updateCommitsByResult(ctx, pp.sc, pp.mc, res, left, right)
+				culprit, err := cdl.updateCommitsByResult(ctx, pp.sc, pp.mc, res, left, right, req)
 				if err != nil {
 					return resp, skerr.Wrapf(err, "could not update commitDataList after compare")
 				}
@@ -370,7 +370,7 @@
 }
 
 // scheduleRunBenchmark schedules run benchmark tests to swarming and returns the task IDs
-func (c *commitData) scheduleRunBenchmark(ctx context.Context, sc backends.SwarmingClient) ([]string, error) {
+func (c *commitData) scheduleRunBenchmark(ctx context.Context, sc backends.SwarmingClient, req *ppb.ScheduleBisectRequest) ([]string, error) {
 	if c.tests == nil || c.tests.req == nil {
 		return nil, skerr.Fmt("Cannot schedule benchmark runs without request")
 	}
@@ -381,11 +381,11 @@
 	}
 	if len(tasks) < maxSampleSize {
 		for i := 0; i < interval; i++ {
-			task, err := run_benchmark.Run(ctx, sc, *c.tests.req)
+			task, err := run_benchmark.Run(ctx, sc, req, c.tests.req.Commit, c.tests.req.JobID, c.tests.req.Build, 1)
 			if err != nil {
 				return nil, skerr.Wrapf(err, "Could not start run benchmark task for request %v", c.tests.req)
 			}
-			tasks = append(tasks, task)
+			tasks = append(tasks, task[0].TaskId)
 		}
 	}
 	return tasks, nil
@@ -513,8 +513,7 @@
 // updateCommitsByResult takes the compare results and determines the next
 // steps in the workflow. Changes are made to CommitDataList depending
 // on what the compare verdict is.
-func (cdl *commitDataList) updateCommitsByResult(ctx context.Context, sc backends.SwarmingClient, mh midpoint.MidpointHandler, res *compare.CompareResults,
-	left, right int) (*midpoint.Commit, error) {
+func (cdl *commitDataList) updateCommitsByResult(ctx context.Context, sc backends.SwarmingClient, mh midpoint.MidpointHandler, res *compare.CompareResults, left, right int, req *ppb.ScheduleBisectRequest) (*midpoint.Commit, error) {
 	if left < 0 || right >= len(cdl.commits) {
 		return nil, skerr.Fmt("cannot update commitDataList with left %d and right %d index out of bounds", left, right)
 	}
@@ -522,7 +521,7 @@
 		return nil, skerr.Fmt("cannot update commitDataList with left %d index >= right %d", left, right)
 	}
 	if res.Verdict == compare.Unknown {
-		return nil, cdl.runMoreTestsIfNeeded(ctx, sc, left, right)
+		return nil, cdl.runMoreTestsIfNeeded(ctx, sc, left, right, req)
 	} else if res.Verdict == compare.Different {
 		return cdl.findMidpointOrCulprit(ctx, mh, left, right)
 	}
@@ -555,9 +554,9 @@
 }
 
 // runMoreTestsIfNeeded adds more run_benchmark tasks to the left and right commit
-func (cdl *commitDataList) runMoreTestsIfNeeded(ctx context.Context, sc backends.SwarmingClient, left, right int) error {
+func (cdl *commitDataList) runMoreTestsIfNeeded(ctx context.Context, sc backends.SwarmingClient, left, right int, req *ppb.ScheduleBisectRequest) error {
 	c := cdl.commits[left]
-	tasks, err := c.scheduleRunBenchmark(ctx, sc)
+	tasks, err := c.scheduleRunBenchmark(ctx, sc, req)
 	if err != nil {
 		return skerr.Wrapf(err, "could not schedule more tasks for left commit [%d] %s", left, c.commit.GitHash[:7])
 	}
@@ -566,7 +565,7 @@
 		c.tests.isRunning = true
 	}
 	c = cdl.commits[right]
-	tasks, err = c.scheduleRunBenchmark(ctx, sc)
+	tasks, err = c.scheduleRunBenchmark(ctx, sc, req)
 	if err != nil {
 		return skerr.Wrapf(err, "could not schedule more tasks for right commit [%d] %s", right, c.commit.GitHash[:7])
 	}

diff --git a/pinpoint/go/pinpoint/pinpoint_test.go b/pinpoint/go/pinpoint/pinpoint_test.go
index 624b4f1..b953c7a 100644
--- a/pinpoint/go/pinpoint/pinpoint_test.go
+++ b/pinpoint/go/pinpoint/pinpoint_test.go

@@ -352,7 +352,7 @@
 		sc := &backends.SwarmingClientImpl{
 			ApiClient: msc,
 		}
-		tasks, err := c.scheduleRunBenchmark(ctx, sc)
+		tasks, err := c.scheduleRunBenchmark(ctx, sc, req)
 		So(err, ShouldBeNil)
 		So(tasks[0], ShouldEqual, "new_task")
 		So(len(tasks), ShouldEqual, interval)
@@ -364,7 +364,7 @@
 			sc := &backends.SwarmingClientImpl{
 				ApiClient: msc,
 			}
-			tasks, err := c.scheduleRunBenchmark(ctx, sc)
+			tasks, err := c.scheduleRunBenchmark(ctx, sc, req)
 			So(err, ShouldErrLike, "Cannot schedule benchmark runs without request")
 			So(tasks, ShouldBeNil)
 		})
@@ -375,7 +375,7 @@
 			sc := &backends.SwarmingClientImpl{
 				ApiClient: msc,
 			}
-			tasks, err := c.scheduleRunBenchmark(ctx, sc)
+			tasks, err := c.scheduleRunBenchmark(ctx, sc, req)
 			So(err, ShouldErrLike, "Cannot schedule benchmark runs without request")
 			So(tasks, ShouldBeNil)
 		})
@@ -406,7 +406,7 @@
 			sc := &backends.SwarmingClientImpl{
 				ApiClient: msc,
 			}
-			tasks, err := c.scheduleRunBenchmark(ctx, sc)
+			tasks, err := c.scheduleRunBenchmark(ctx, sc, req)
 			So(err, ShouldErrLike, errMsg)
 			So(tasks, ShouldBeNil)
 		})
@@ -776,6 +776,7 @@
 	msc := swarmingMocks.NewApiClient(t)
 	c := mockhttpclient.NewURLMock().Client()
 	mmh := midpoint.New(ctx, c)
+	req := defaultRunRequest()
 
 	Convey(`Error`, t, func() {
 		cdl := commitDataList{
@@ -790,7 +791,7 @@
 			sc := &backends.SwarmingClientImpl{
 				ApiClient: msc,
 			}
-			mid, err := cdl.updateCommitsByResult(ctx, sc, mmh, res, left, right)
+			mid, err := cdl.updateCommitsByResult(ctx, sc, mmh, res, left, right, req)
 			So(err, ShouldErrLike, "index out of bounds")
 			So(mid, ShouldBeNil)
 		})
@@ -800,7 +801,7 @@
 			sc := &backends.SwarmingClientImpl{
 				ApiClient: msc,
 			}
-			mid, err := cdl.updateCommitsByResult(ctx, sc, mmh, res, left, right)
+			mid, err := cdl.updateCommitsByResult(ctx, sc, mmh, res, left, right, req)
 			So(err, ShouldErrLike, fmt.Sprintf("left %d index >= right %d", left, right))
 			So(mid, ShouldBeNil)
 		})
@@ -885,7 +886,7 @@
 		sc := &backends.SwarmingClientImpl{
 			ApiClient: msc,
 		}
-		err = cdl.runMoreTestsIfNeeded(ctx, sc, left, right)
+		err = cdl.runMoreTestsIfNeeded(ctx, sc, left, right, req)
 		So(err, ShouldBeNil)
 		So(len(lcommit.tests.tasks), ShouldEqual, interval+2)
 		So(lcommit.tests.tasks[0], ShouldEqual, "old_left_task_1")

diff --git a/pinpoint/go/run_benchmark/BUILD.bazel b/pinpoint/go/run_benchmark/BUILD.bazel
index dd58ff9..2da6950 100644
--- a/pinpoint/go/run_benchmark/BUILD.bazel
+++ b/pinpoint/go/run_benchmark/BUILD.bazel

@@ -4,8 +4,10 @@
 go_library(
     name = "run_benchmark",
     srcs = [
+        "benchmark_test_factory.go",
         "run_benchmark.go",
-        "telemetry_exp.go",
+        "swarming_helpers.go",
+        "telemetry.go",
     ],
     importpath = "go.skia.org/infra/pinpoint/go/run_benchmark",
     visibility = ["//visibility:public"],
@@ -15,6 +17,7 @@
         "//go/util",
         "//pinpoint/go/backends",
         "//pinpoint/go/bot_configs",
+        "//pinpoint/proto/v1:proto",
         "@org_chromium_go_luci//common/api/swarming/swarming/v1:swarming",
     ],
 )
@@ -22,8 +25,9 @@
 go_test(
     name = "run_benchmark_test",
     srcs = [
+        "benchmark_test_factory_test.go",
         "run_benchmark_test.go",
-        "telemetry_exp_test.go",
+        "telemetry_test.go",
     ],
     embed = [":run_benchmark"],
     deps = [
@@ -31,11 +35,9 @@
         "//go/swarming",
         "//go/swarming/mocks",
         "//pinpoint/go/backends",
-        "//pinpoint/go/bot_configs",
-        "@com_github_smartystreets_goconvey//convey",
+        "//pinpoint/proto/v1:proto",
         "@com_github_stretchr_testify//assert",
         "@com_github_stretchr_testify//mock",
         "@org_chromium_go_luci//common/api/swarming/swarming/v1:swarming",
-        "@org_chromium_go_luci//common/testing/assertions",
     ],
 )

diff --git a/pinpoint/go/run_benchmark/benchmark_test_factory.go b/pinpoint/go/run_benchmark/benchmark_test_factory.go
new file mode 100644
index 0000000..97836ca
--- /dev/null
+++ b/pinpoint/go/run_benchmark/benchmark_test_factory.go

@@ -0,0 +1,54 @@
+package run_benchmark
+
+import (
+	"strings"
+
+	"go.skia.org/infra/go/skerr"
+
+	bc "go.skia.org/infra/pinpoint/go/bot_configs"
+	ppb "go.skia.org/infra/pinpoint/proto/v1"
+)
+
+type BenchmarkTest interface {
+	GetCommand() []string
+}
+
+// NewBenchmarkTest returns a BenchmarkTest based on the request parameters.
+// The Configuration (bot) is used alongside the benchmark to determine the
+// isolate target for that combination. Based on the isolate target,
+func NewBenchmarkTest(req *ppb.ScheduleBisectRequest, commit string) (BenchmarkTest, error) {
+	bot, benchmark := req.Configuration, req.Benchmark
+	config, err := bc.GetBotConfig(bot, false)
+	if err != nil {
+		return nil, skerr.Wrapf(err, "Failed to fetch bot configs to create benchmark test")
+	}
+	target, err := bc.GetIsolateTarget(bot, benchmark)
+	if err != nil {
+		return nil, skerr.Wrapf(err, "Failed to get isolate target to create the benchmark test")
+	}
+
+	// These targets could have suffixes, especially for Android.
+	// For example, 'performance_test_suite_android_clank_monochrome_64_32_bundle'
+	if strings.Contains(target, "performance_test_suite") || strings.Contains(target, "telemetry_perf_tests") {
+		return &telemetryTest{
+			benchmark: req.Benchmark,
+			browser:   config.Browser,
+			commit:    commit,
+			story:     req.Story,
+			storyTags: req.StoryTags,
+		}, nil
+	}
+
+	switch target {
+	case "performance_webview_test_suite":
+		return &telemetryTest{
+			benchmark: req.Benchmark,
+			browser:   config.Browser,
+			commit:    commit,
+			story:     req.Story,
+			storyTags: req.StoryTags,
+		}, nil
+	default:
+		return nil, skerr.Fmt("Unsupported test target %s", target)
+	}
+}

diff --git a/pinpoint/go/run_benchmark/benchmark_test_factory_test.go b/pinpoint/go/run_benchmark/benchmark_test_factory_test.go
new file mode 100644
index 0000000..0c94a74
--- /dev/null
+++ b/pinpoint/go/run_benchmark/benchmark_test_factory_test.go

@@ -0,0 +1,32 @@
+package run_benchmark
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	ppb "go.skia.org/infra/pinpoint/proto/v1"
+)
+
+func TestGetBenchmark_TelemetryTestBuilder_TelemetryTests(t *testing.T) {
+	expect := func(req *ppb.ScheduleBisectRequest, commit, want string) {
+		benchmark, _ := NewBenchmarkTest(req, commit)
+		assert.Equal(t, want, reflect.TypeOf(benchmark).String())
+	}
+
+	commit := "01bfa421eee3c76bbbf32510343e074060051c9f"
+	req := &ppb.ScheduleBisectRequest{
+		Configuration: "android-go-perf",
+		Benchmark:     "components_perftests",
+	}
+	expect(req, commit, "*run_benchmark.telemetryTest")
+
+	req = &ppb.ScheduleBisectRequest{
+		Story:         "story",
+		StoryTags:     "all",
+		Configuration: "android-pixel2_webview-perf",
+		Benchmark:     "performance_browser_tests",
+	}
+	expect(req, commit, "*run_benchmark.telemetryTest")
+}

diff --git a/pinpoint/go/run_benchmark/run_benchmark.go b/pinpoint/go/run_benchmark/run_benchmark.go
index 294bdb7..1d0cbc6 100644
--- a/pinpoint/go/run_benchmark/run_benchmark.go
+++ b/pinpoint/go/run_benchmark/run_benchmark.go

@@ -8,23 +8,24 @@
 
 import (
 	"context"
-	"fmt"
 	"slices"
 
-	swarmingV1 "go.chromium.org/luci/common/api/swarming/swarming/v1"
 	"go.skia.org/infra/go/skerr"
 	"go.skia.org/infra/go/swarming"
 	"go.skia.org/infra/pinpoint/go/backends"
 	"go.skia.org/infra/pinpoint/go/bot_configs"
+
+	spb "go.chromium.org/luci/common/api/swarming/swarming/v1"
+	ppb "go.skia.org/infra/pinpoint/proto/v1"
 )
 
-// A RunBenchmarkRequest defines the request arguments of the performance test
-// to swarming.
+// A RunBenchmarkRequest defines the request arguments of the performance test to swarming.
+// Note: This is being used in workflows/internal/run_benchmark.go.
 type RunBenchmarkRequest struct {
 	// the Pinpoint job id
 	JobID string
 	// the swarming instance and cas digest hash and bytes location for the build
-	Build *swarmingV1.SwarmingRpcsCASReference
+	Build *spb.SwarmingRpcsCASReference
 	// commit hash
 	Commit string
 	// device configuration
@@ -37,80 +38,6 @@
 	Target string
 }
 
-// swarming request to run task
-// define static, constant fields here
-var swarmingReq = swarmingV1.SwarmingRpcsNewTaskRequest{
-	BotPingToleranceSecs: 1200,
-	ExpirationSecs:       86400,
-	// EvaluateOnly omitted
-	Name: "Pinpoint bisection run benchmark task",
-	// ParentTaskId omitted
-	// PoolTaskTemplate omitted
-	Priority: 100,
-	// define properties later
-	PubsubTopic:    "projects/chromeperf/topics/pinpoint-swarming-updates",
-	PubsubUserdata: "UNUSED", // can populate later, see example swarming call log
-	Realm:          "chrome:pinpoint",
-	// RequestUuid: omitted
-	// Resultdb: omitted
-	ServiceAccount: "chrome-tester@chops-service-accounts.iam.gserviceaccount.com",
-	// define tags later
-	// TaskSlices optional if properties defined
-	User: "Pinpoint",
-	// ForceSendFields: omitted
-	// NullFields: omitted
-}
-
-func createSwarmingReq(req RunBenchmarkRequest) (
-	*swarmingV1.SwarmingRpcsNewTaskRequest, error) {
-	// TODO(b/318863812): add mapping from device + benchmark to the specific run test
-	// currently catapult maps the device + benchmark to the target and then
-	// the target dictates what test to run. We can map to target if that info
-	// is useful on the UI, but for this, it's not relevant.
-	// see GetIsolateTarget and _GenerateQuests here:
-	// https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/handlers/new.py;drc=8fe602e47f11cfdd79225696f1f6a5556b57c58c;l=466
-	// TODO(b/321299939): create an interface for different runBenchmark types
-	// and refactor telemetryExp to use that interface
-	exp := telemetryExp{}
-	cmd, err := exp.createCmd(req)
-	if err != nil {
-		return nil, skerr.Fmt("Unable to create run benchmark command due to %s\n", err)
-	}
-	dim := make([]*swarmingV1.SwarmingRpcsStringPair,
-		len(req.Config.Dimensions))
-	for i, kv := range req.Config.Dimensions {
-		dim[i] = &swarmingV1.SwarmingRpcsStringPair{
-			Key:   kv["key"],
-			Value: kv["value"],
-		}
-	}
-
-	swarmingReq.Properties = &swarmingV1.SwarmingRpcsTaskProperties{
-		CasInputRoot: &swarmingV1.SwarmingRpcsCASReference{
-			CasInstance: req.Build.CasInstance,
-			Digest: &swarmingV1.SwarmingRpcsDigest{
-				Hash:      req.Build.Digest.Hash,
-				SizeBytes: req.Build.Digest.SizeBytes,
-			},
-		},
-		// TODO(b/318863812): support user submitted extra_args.
-		// This support is needed for pairwise executions, not bisection.
-		Command:              cmd,
-		Dimensions:           dim,
-		ExecutionTimeoutSecs: 2700,
-		IoTimeoutSecs:        2700,
-		RelativeCwd:          "out/Release",
-	}
-
-	// TODO(b/318863812): update swarming task tags to more appropriate tags.
-	swarmingReq.Tags = []string{
-		fmt.Sprintf("pinpoint_job_id:%s", req.JobID),
-		fmt.Sprintf("build_cas:%s/%d", req.Build.Digest.Hash, req.Build.Digest.SizeBytes),
-	}
-
-	return &swarmingReq, nil
-}
-
 var runningStates = []string{
 	swarming.TASK_STATE_PENDING,
 	swarming.TASK_STATE_RUNNING,
@@ -130,16 +57,29 @@
 }
 
 // Run schedules a swarming task to run the RunBenchmarkRequest.
-func Run(ctx context.Context, sc backends.SwarmingClient, req RunBenchmarkRequest) (string, error) {
-	swarmingReq, err := createSwarmingReq(req)
+func Run(ctx context.Context, sc backends.SwarmingClient, req *ppb.ScheduleBisectRequest, commit string, jobID string, buildArtifact *spb.SwarmingRpcsCASReference, iter int) ([]*spb.SwarmingRpcsTaskRequestMetadata, error) {
+	test, err := NewBenchmarkTest(req, commit)
 	if err != nil {
-		return "", skerr.Wrapf(err, "Could not create run test request")
+		return nil, skerr.Wrapf(err, "Failed to prepare benchmark test for execution")
 	}
 
-	metadataResp, err := sc.TriggerTask(ctx, swarmingReq)
+	bot := req.Configuration
+	botConfig, err := bot_configs.GetBotConfig(bot, false)
 	if err != nil {
-		return "", skerr.Fmt("trigger task %v\ncaused error: %s", req, err)
+		return nil, skerr.Wrapf(err, "Failed to create benchmark test object")
 	}
 
-	return metadataResp.TaskId, nil
+	swarmingRequest := createSwarmingRequest(jobID, test.GetCommand(), buildArtifact, botConfig.Dimensions)
+
+	resp := make([]*spb.SwarmingRpcsTaskRequestMetadata, 0)
+	for i := 0; i < iter; i++ {
+		r, err := sc.TriggerTask(ctx, swarmingRequest)
+		if err != nil {
+			return nil, skerr.Wrapf(err, "benchmark task %d with request %v failed", i, r)
+		}
+
+		resp = append(resp, r)
+	}
+
+	return resp, nil
 }

diff --git a/pinpoint/go/run_benchmark/run_benchmark_test.go b/pinpoint/go/run_benchmark/run_benchmark_test.go
index f825764..8f06981 100644
--- a/pinpoint/go/run_benchmark/run_benchmark_test.go
+++ b/pinpoint/go/run_benchmark/run_benchmark_test.go

@@ -4,16 +4,16 @@
 	"context"
 	"testing"
 
-	. "github.com/smartystreets/goconvey/convey"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
-	swarmingV1 "go.chromium.org/luci/common/api/swarming/swarming/v1"
-	. "go.chromium.org/luci/common/testing/assertions"
+
 	"go.skia.org/infra/go/skerr"
 	"go.skia.org/infra/go/swarming"
 	"go.skia.org/infra/go/swarming/mocks"
 	"go.skia.org/infra/pinpoint/go/backends"
-	"go.skia.org/infra/pinpoint/go/bot_configs"
+
+	swarmingV1 "go.chromium.org/luci/common/api/swarming/swarming/v1"
+	ppb "go.skia.org/infra/pinpoint/proto/v1"
 )
 
 var req = RunBenchmarkRequest{
@@ -31,37 +31,38 @@
 }
 var expectedErr = skerr.Fmt("some error")
 
-func TestRun(t *testing.T) {
+func TestRun_TelemetryTest_ValidExecution(t *testing.T) {
 	ctx := context.Background()
 	mockClient := mocks.NewApiClient(t)
 	sc := &backends.SwarmingClientImpl{
 		ApiClient: mockClient,
 	}
 
-	Convey(`OK`, t, func() {
-		cfg, err := bot_configs.GetBotConfig("linux-perf", true)
-		So(err, ShouldBeNil)
-		req.Config = cfg
-		mockClient.On("TriggerTask", ctx, mock.Anything).
-			Return(&swarmingV1.SwarmingRpcsTaskRequestMetadata{
-				TaskId: "123",
-			}, nil).Once()
-		taskId, err := Run(ctx, sc, req)
-		So(err, ShouldBeNil)
-		So(taskId, ShouldEqual, "123")
-	})
-	Convey(`Return error`, t, func() {
-		cfg, err := bot_configs.GetBotConfig("linux-perf", true)
-		So(err, ShouldBeNil)
-		req.Config = cfg
-		mockClient.On("TriggerTask", ctx, mock.Anything).
-			Return(&swarmingV1.SwarmingRpcsTaskRequestMetadata{
-				TaskId: "123",
-			}, expectedErr).Once()
-		taskId, err := Run(ctx, sc, req)
-		So(taskId, ShouldBeEmpty)
-		So(err, ShouldErrLike, expectedErr)
-	})
+	bisectReq := &ppb.ScheduleBisectRequest{
+		Story:         "story",
+		StoryTags:     "all",
+		Configuration: "android-pixel2_webview-perf",
+		Benchmark:     "performance_browser_tests",
+	}
+
+	buildArtifact := &swarmingV1.SwarmingRpcsCASReference{
+		CasInstance: "instance",
+		Digest: &swarmingV1.SwarmingRpcsDigest{
+			Hash:      "hash",
+			SizeBytes: 0,
+		},
+	}
+
+	commit := "64893ca6294946163615dcf23b614afe0419bfa3"
+
+	mockClient.On("TriggerTask", ctx, mock.Anything).
+		Return(&swarmingV1.SwarmingRpcsTaskRequestMetadata{
+			TaskId: "123",
+		}, nil).Once()
+	taskIds, err := Run(ctx, sc, bisectReq, commit, "id", buildArtifact, 1)
+	assert.NoError(t, err)
+	assert.Equal(t, 1, len(taskIds))
+	assert.Equal(t, "123", taskIds[0].TaskId)
 }
 
 func TestIsTaskStateFinished_GivenCompleteStates_ReturnsTrue(t *testing.T) {

diff --git a/pinpoint/go/run_benchmark/swarming_helpers.go b/pinpoint/go/run_benchmark/swarming_helpers.go
new file mode 100644
index 0000000..78b2ef3
--- /dev/null
+++ b/pinpoint/go/run_benchmark/swarming_helpers.go

@@ -0,0 +1,72 @@
+package run_benchmark
+
+import (
+	"fmt"
+
+	spb "go.chromium.org/luci/common/api/swarming/swarming/v1"
+)
+
+func convertDimensions(dimensions []map[string]string) []*spb.SwarmingRpcsStringPair {
+	// TODO(b/318863812): add mapping from device + benchmark to the specific run test
+	// currently catapult maps the device + benchmark to the target and then
+	// the target dictates what test to run. We can map to target if that info
+	// is useful on the UI, but for this, it's not relevant.
+	// see GetIsolateTarget and _GenerateQuests here:
+	// https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/handlers/new.py;drc=8fe602e47f11cfdd79225696f1f6a5556b57c58c;l=466
+	// TODO(b/321299939): create an interface for different runBenchmark types
+	// and refactor telemetryExp to use that interface
+	dim := make([]*spb.SwarmingRpcsStringPair, len(dimensions))
+	for i, kv := range dimensions {
+		dim[i] = &spb.SwarmingRpcsStringPair{
+			Key:   kv["key"],
+			Value: kv["value"],
+		}
+	}
+	return dim
+}
+
+func generateProperties(command []string, casRef *spb.SwarmingRpcsCASReference, dim []*spb.SwarmingRpcsStringPair) *spb.SwarmingRpcsTaskProperties {
+	return &spb.SwarmingRpcsTaskProperties{
+		CasInputRoot:         casRef,
+		Command:              command,
+		Dimensions:           dim,
+		ExecutionTimeoutSecs: 2700,
+		IoTimeoutSecs:        2700,
+		RelativeCwd:          "out/Release",
+	}
+}
+
+func generateTags(jobID string, hash string, sizeBytes int64) []string {
+	// TODO(b/318863812): update swarming task tags to more appropriate tags.
+	return []string{
+		fmt.Sprintf("pinpoint_job_id:%s", jobID),
+		fmt.Sprintf("build_cas:%s/%d", hash, sizeBytes),
+	}
+}
+
+func createSwarmingRequest(jobID string, command []string, casRef *spb.SwarmingRpcsCASReference, dimensions []map[string]string) *spb.SwarmingRpcsNewTaskRequest {
+	return &spb.SwarmingRpcsNewTaskRequest{
+		BotPingToleranceSecs: 1200,
+		ExpirationSecs:       86400,
+		// EvaluateOnly omitted
+		Name: "Pinpoint bisection run benchmark task",
+		// ParentTaskId omitted
+		// PoolTaskTemplate omitted
+		Priority: 100,
+		// define properties later
+		PubsubTopic: "projects/chromeperf/topics/pinpoint-swarming-updates",
+		// can populate later, see example swarming call log
+		PubsubUserdata: "UNUSED",
+		Realm:          "chrome:pinpoint",
+		// RequestUuid: omitted
+		// Resultdb: omitted
+		ServiceAccount: "chrome-tester@chops-service-accounts.iam.gserviceaccount.com",
+		// define tags later
+		// TaskSlices optional if properties defined
+		User: "Pinpoint",
+		// ForceSendFields: omitted
+		// NullFields: omitted
+		Properties: generateProperties(command, casRef, convertDimensions(dimensions)),
+		Tags:       generateTags(jobID, casRef.Digest.Hash, casRef.Digest.SizeBytes),
+	}
+}

diff --git a/pinpoint/go/run_benchmark/telemetry.go b/pinpoint/go/run_benchmark/telemetry.go
new file mode 100644
index 0000000..c59084e
--- /dev/null
+++ b/pinpoint/go/run_benchmark/telemetry.go

@@ -0,0 +1,116 @@
+package run_benchmark
+
+import (
+	"fmt"
+
+	"go.skia.org/infra/go/util"
+)
+
+var defaultExtraArgs = []string{
+	"-v",
+	"--upload-results",
+	"--output-format",
+	"histograms",
+	"--isolated-script-test-output",
+	"${ISOLATED_OUTDIR}/output.json",
+}
+
+// Please keep this executable-argument mapping synced with perf waterfall:
+// https://chromium.googlesource.com/chromium/src/+/main/tools/perf/core/bot_platforms.py
+var waterfallEnabledGtests = util.NewStringSet([]string{
+	"base_perftests",
+	"components_perftests",
+	"dawn_perf_tests",
+	"gpu_perftests",
+	"load_library_perf_tests",
+	"performance_browser_tests",
+	"sync_performance_tests",
+	"tracing_perftests",
+	"views_perftests",
+})
+
+type telemetryTest struct {
+	benchmark string
+	browser   string
+	commit    string
+	story     string
+	storyTags string
+}
+
+// getCommand generates the command needed to execute Telemetry benchmark tests.
+// This function assumes that the request object has been validated, and that
+// required fields have been verified for.
+// TODO(b/318863812): support user submitted extra_args.
+// This support is needed for pairwise executions, not bisection.
+func (t *telemetryTest) GetCommand() []string {
+	cmd := []string{
+		"luci-auth",
+		"context",
+		"--",
+		"vpython3",
+		"../../testing/test_env.py",
+		"../../testing/scripts/run_performance_tests.py",
+	}
+
+	// Note(jeffyoon@): The logic below can likely be made more efficient, but
+	// for parity sake we retain the same order for arguments as Catapult's Pinpoint.
+	// Ref: https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/models/quest/run_telemetry_test.py;drc=10f23074bdab2b425cb29a8224b63298fdac42b7;l=127
+	if _, ok := waterfallEnabledGtests[t.benchmark]; ok {
+		if t.benchmark == "performance_browser_tests" {
+			cmd = append(cmd, "browser_tests")
+		} else {
+			cmd = append(cmd, t.benchmark)
+		}
+	} else {
+		cmd = append(cmd, "../../tools/perf/run_benchmark")
+	}
+
+	if t.storyTags == "" {
+		cmd = append(cmd, "-d")
+	}
+
+	if _, ok := waterfallEnabledGtests[t.benchmark]; ok {
+		cmd = append(cmd, "--gtest-benchmark-name", t.benchmark, "--non-telemetry", "true")
+		switch t.benchmark {
+		case "base_perftests", "dawn_perf_tests", "sync_performance_tests":
+			cmd = append(cmd, "--test-launcher-jobs=1", "--test-launcher-retry-limit=0")
+		case "components_perftests", "views_perftests":
+			cmd = append(cmd, "--xvfb")
+		case "performance_browser_tests":
+			// Allow the full performance runs to take up to 60 seconds (rather
+			// than the default of 30 for normal CQ browser test runs).
+			cmd = append(cmd, "--full-performance-run", "--test-launcher-jobs=1",
+				"--test-launcher-retry-limit=0", "--ui-test-action-timeout=60000",
+				"--ui-test-action-max-timeout=60000", "--test-launcher-timeout=60000",
+				"--gtest_filter=*/TabCapturePerformanceTest.*:*/CastV2PerformanceTest.*")
+		default:
+			break
+		}
+	} else {
+		cmd = append(cmd, "--benchmarks", t.benchmark)
+	}
+
+	if t.story != "" {
+		// TODO(b/40635221): Note that usage of "--run-full-story-set" and "--story-filter"
+		// can be replaced with --story=<story> (no regex needed). See crrev/c/1869800.
+		cmd = append(cmd, "--story-filter", fmt.Sprintf("^%s$", t.story))
+	}
+
+	if t.storyTags != "" {
+		cmd = append(cmd, "--story-tag-filter", t.storyTags)
+	}
+
+	cmd = append(cmd, "--pageset-repeat", "1", "--browser", t.browser)
+	cmd = append(cmd, defaultExtraArgs...)
+
+	// For results2 to differentiate between runs, we need to add the
+	// Telemetry parameter `--results-label <change>` to the runs.
+	// TODO(jeffyoon@) deprecate this label once the UI is no longer dependant on this.
+	cmd = append(cmd, "--results-label", t.commit[:7])
+
+	// Note: Appending "--run-full-story-set" last per comment above to retain
+	// argument order. This is always appended in catapult regardless of whether
+	// a story is defined or not.
+	cmd = append(cmd, "--run-full-story-set")
+	return cmd
+}

diff --git a/pinpoint/go/run_benchmark/telemetry_exp.go b/pinpoint/go/run_benchmark/telemetry_exp.go
deleted file mode 100644
index f57ee7d..0000000
--- a/pinpoint/go/run_benchmark/telemetry_exp.go
+++ /dev/null

@@ -1,91 +0,0 @@
-package run_benchmark
-
-import (
-	"fmt"
-
-	"go.skia.org/infra/go/skerr"
-	"go.skia.org/infra/go/util"
-)
-
-var defaultExtraArgs = []string{
-	"-v",
-	"--upload-results",
-	"--output-format",
-	"histograms",
-	"--isolated-script-test-output",
-	"${ISOLATED_OUTDIR}/output.json",
-}
-
-// TODO(b/318863812): implement benchmarks in _WATERFALL_ENABLED_GTEST_NAMES
-// crbug/1146949
-// Please keep this executable-argument mapping synced with perf waterfall:
-// https://chromium.googlesource.com/chromium/src/+/main/tools/perf/core/bot_platforms.py
-var waterfallEnabledGtests = util.NewStringSet([]string{
-	"base_perftests",
-	"components_perftests",
-	"dawn_perf_tests",
-	"gpu_perftests",
-	"load_library_perf_tests",
-	"performance_browser_tests",
-	"sync_performance_tests",
-	"tracing_perftests",
-	"views_perftests",
-})
-
-// A telemetryExp contains the command for the test device
-// to run a telemetry benchmark on Chrome browser.
-// telemetryExp will support most devices.
-type telemetryExp struct{}
-
-// createCmd creates the command for the test device.
-func (t *telemetryExp) createCmd(req RunBenchmarkRequest) ([]string, error) {
-	// TODO(b/318863812): implement benchmarks in _WATERFALL_ENABLED_GTEST_NAMES
-	// Benchmarks in _WATERFALL_ENABLED_GTEST_NAMES are rarely used.
-	// see: https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/models/quest/run_telemetry_test.py;drc=10f23074bdab2b425cb29a8224b63298fdac42b7;l=96
-	_, ok := waterfallEnabledGtests[req.Benchmark]
-	if ok {
-		return nil, skerr.Fmt("Benchmark %s is not yet implemented", req.Benchmark)
-	}
-	cmd := []string{
-		"luci-auth",
-		"context",
-		"--",
-		"vpython3",
-		"../../testing/test_env.py",
-		"../../testing/scripts/run_performance_tests.py",
-		"../../tools/perf/run_benchmark",
-		// TODO(b/318863812): Implement story_tags. Some of our entrenched users
-		// are only familiar with running Pinpoint jobs with story tags. Although
-		// some of this behavior is due to UI confusion, we should still support it
-		// and ensure the UI makes it clear what the differences are.
-		// see: https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/models/quest/run_telemetry_test.py;drc=10f23074bdab2b425cb29a8224b63298fdac42b7;l=127
-		"-d",
-	}
-	if req.Benchmark == "" {
-		return nil, skerr.Fmt("Missing 'Benchmark' argument.")
-	}
-	cmd = append(cmd, "--benchmarks", req.Benchmark)
-	if req.Story == "" {
-		return nil, skerr.Fmt("Missing 'Story' argument.")
-	}
-	cmd = append(cmd, "--story-filter", fmt.Sprintf("^%s$", req.Story))
-	cmd = append(cmd, "--pageset-repeat", "1")
-	cmd = append(cmd, "--browser", req.Config.Browser)
-	cmd = append(cmd, defaultExtraArgs...)
-
-	// TODO(b/318863812): Add support for non-chromium commits
-	// append sha for results2
-	if req.Commit == "" {
-		return nil, skerr.Fmt("Missing 'Commit' argument.")
-	}
-	cmd = append(cmd,
-		"--results-label",
-		fmt.Sprintf("chromium@%s", req.Commit[:7]))
-
-	// unclear what happens if you write the flags in
-	// a different order
-	// see: https://source.chromium.org/chromium/chromium/src/+/main:third_party/catapult/dashboard/dashboard/pinpoint/models/quest/run_telemetry_test.py;drc=10f23074bdab2b425cb29a8224b63298fdac42b7;l=80
-	cmd = append(cmd, "--run-full-story-set")
-
-	return cmd, nil
-}

diff --git a/pinpoint/go/run_benchmark/telemetry_exp_test.go b/pinpoint/go/run_benchmark/telemetry_exp_test.go
deleted file mode 100644
index d582f55..0000000
--- a/pinpoint/go/run_benchmark/telemetry_exp_test.go
+++ /dev/null

@@ -1,70 +0,0 @@
-package run_benchmark
-
-import (
-	"fmt"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestCreateCmd(t *testing.T) {
-	for i, test := range []struct {
-		name        string
-		req         RunBenchmarkRequest
-		expectedErr string
-	}{
-		{
-			name: "create command works",
-			req: RunBenchmarkRequest{
-				Benchmark: "benchmark",
-				Story:     "story",
-				Commit:    "64893ca6294946163615dcf23b614afe0419bfa3",
-			},
-			expectedErr: "",
-		},
-		{
-			name: "benchmark error",
-			req: RunBenchmarkRequest{
-				Story:  "story",
-				Commit: "64893ca6294946163615dcf23b614afe0419bfa3",
-			},
-			expectedErr: "Benchmark",
-		},
-		{
-			name: "story error",
-			req: RunBenchmarkRequest{
-				Benchmark: "benchmark",
-				Commit:    "64893ca6294946163615dcf23b614afe0419bfa3",
-			},
-			expectedErr: "Story",
-		},
-		{
-			name: "commit error",
-			req: RunBenchmarkRequest{
-				Benchmark: "benchmark",
-				Story:     "story",
-			},
-			expectedErr: "Commit",
-		},
-		{
-			name: "base_perftests not yet implemented",
-			req: RunBenchmarkRequest{
-				Benchmark: "base_perftests",
-				Story:     "story",
-				Commit:    "64893ca6294946163615dcf23b614afe0419bfa3",
-			},
-			expectedErr: "base_perftests is not yet implemented",
-		},
-	} {
-		t.Run(fmt.Sprintf("[%d] %s", i, test.name), func(t *testing.T) {
-			e := telemetryExp{}
-			cmd, err := e.createCmd(test.req)
-			if test.expectedErr == "" {
-				assert.Contains(t, cmd, test.req.Benchmark)
-				assert.Contains(t, cmd, fmt.Sprintf("^%s$", test.req.Story))
-			} else {
-				assert.ErrorContains(t, err, test.expectedErr)
-			}
-		})
-	}
-}

diff --git a/pinpoint/go/run_benchmark/telemetry_test.go b/pinpoint/go/run_benchmark/telemetry_test.go
new file mode 100644
index 0000000..f915fb1
--- /dev/null
+++ b/pinpoint/go/run_benchmark/telemetry_test.go

@@ -0,0 +1,71 @@
+package run_benchmark
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	ppb "go.skia.org/infra/pinpoint/proto/v1"
+)
+
+func TestGetCommand_WaterfallGTest_TestCommand(t *testing.T) {
+	commit := "01bfa421eee3c76bbbf32510343e074060051c9f"
+
+	req := &ppb.ScheduleBisectRequest{
+		Configuration: "android-go-perf",
+		Benchmark:     "components_perftests",
+	}
+
+	b, err := NewBenchmarkTest(req, commit)
+	assert.NoError(t, err)
+
+	cmd := b.GetCommand()
+	// stories w/o story tags should have -d set
+	assert.Contains(t, cmd, "-d")
+	assert.NotContains(t, cmd, "--story-tag-filter")
+
+	// Should not contain run_benchmark because it's a waterfall
+	// enabled GTest.
+	assert.NotContains(t, cmd, "../../tools/perf/run_benchmark")
+}
+
+func TestGetCommand_PerfBrowserTestWithStory_StoryTestCommand(t *testing.T) {
+	commit := "01bfa421eee3c76bbbf32510343e074060051c9f"
+
+	req := &ppb.ScheduleBisectRequest{
+		Story:         "story",
+		StoryTags:     "all",
+		Configuration: "android-pixel2_webview-perf",
+		Benchmark:     "performance_browser_tests",
+	}
+
+	b, err := NewBenchmarkTest(req, commit)
+	assert.NoError(t, err)
+
+	cmd := b.GetCommand()
+	// stories w/o story tags should have -d set
+	assert.Contains(t, cmd, "--story-filter")
+	assert.Contains(t, cmd, "--story-tag-filter")
+
+	// Special gtest filter for performance_browser_tests
+	assert.Contains(t, cmd, "--gtest_filter=*/TabCapturePerformanceTest.*:*/CastV2PerformanceTest.*")
+}
+
+func TestGetCommand_NonWaterfallEnabledGTest_TestCommand(t *testing.T) {
+	commit := "01bfa421eee3c76bbbf32510343e074060051c9f"
+
+	req := &ppb.ScheduleBisectRequest{
+		Configuration: "android-pixel2_webview-perf",
+		Benchmark:     "random_test",
+	}
+
+	b, err := NewBenchmarkTest(req, commit)
+	assert.NoError(t, err)
+
+	cmd := b.GetCommand()
+
+	// Non waterfall enabled gtest should have run_benchmark
+	assert.Contains(t, cmd, "../../tools/perf/run_benchmark")
+	assert.Contains(t, cmd, "--benchmarks")
+	assert.Contains(t, cmd, "random_test")
+}

diff --git a/pinpoint/go/workflows/internal/BUILD.bazel b/pinpoint/go/workflows/internal/BUILD.bazel
index 1fd88ec..fb1270c 100644
--- a/pinpoint/go/workflows/internal/BUILD.bazel
+++ b/pinpoint/go/workflows/internal/BUILD.bazel

@@ -15,6 +15,7 @@
         "//pinpoint/go/build_chrome",
         "//pinpoint/go/run_benchmark",
         "//pinpoint/go/workflows",
+        "//pinpoint/proto/v1:proto",
         "@io_temporal_go_sdk//activity",
         "@io_temporal_go_sdk//temporal",
         "@io_temporal_go_sdk//workflow",

diff --git a/pinpoint/go/workflows/internal/run_benchmark.go b/pinpoint/go/workflows/internal/run_benchmark.go
index aa12df2..e7efe90 100644
--- a/pinpoint/go/workflows/internal/run_benchmark.go
+++ b/pinpoint/go/workflows/internal/run_benchmark.go

@@ -10,6 +10,7 @@
 	"go.skia.org/infra/pinpoint/go/backends"
 	"go.skia.org/infra/pinpoint/go/run_benchmark"
 	"go.skia.org/infra/pinpoint/go/workflows"
+	ppb "go.skia.org/infra/pinpoint/proto/v1"
 	"go.temporal.io/sdk/activity"
 	"go.temporal.io/sdk/temporal"
 	"go.temporal.io/sdk/workflow"
@@ -68,7 +69,6 @@
 	}
 
 	resp.CAS = cas
-
 	return &resp, nil
 }
 
@@ -82,7 +82,23 @@
 		return "", skerr.Wrap(err)
 	}
 
-	return run_benchmark.Run(ctx, sc, params.Request)
+	// TODO(jeffyoon@) - this is a workaround to generate the request object s.t. the refactor
+	// does not obstruct the current run_benchmark workflow. Once the request spec defined
+	// by the proto is in full use, this can be deprecated.
+	//
+	// This only fills in the minimum set required to run the benchmark.
+	req := &ppb.ScheduleBisectRequest{
+		// run_benchmark.RunBenchmarkRequest doesn't support story tags
+		Configuration: params.Request.Config.Bot,
+		Benchmark:     params.Request.Benchmark,
+		Story:         params.Request.Story,
+	}
+
+	taskIds, err := run_benchmark.Run(ctx, sc, req, params.Request.Commit, params.Request.JobID, params.Request.Build, 1)
+	if err != nil {
+		return "", err
+	}
+	return taskIds[0].TaskId, nil
 }
 
 // WaitTaskFinishedActivity polls the task until it finishes or errors. Returns the status