[task scheduler] Fixes for Kubernetes

Bug: skia:8636
Change-Id: I45440c3170ed168b185fdd111c07625fcff0a772
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/201200
Commit-Queue: Eric Boren <borenet@google.com>
Reviewed-by: Ben Wagner <benjaminwagner@google.com>
diff --git a/task_scheduler/Dockerfile b/task_scheduler/Dockerfile
new file mode 100644
index 0000000..e3c37ee
--- /dev/null
+++ b/task_scheduler/Dockerfile
@@ -0,0 +1,7 @@
+FROM gcr.io/skia-public/basedebian:testing-slim
+
+USER skia
+
+COPY . /
+
+ENTRYPOINT ["/usr/local/bin/task-scheduler"]
diff --git a/task_scheduler/Makefile b/task_scheduler/Makefile
index 99ff3ae..6b3e9d4 100644
--- a/task_scheduler/Makefile
+++ b/task_scheduler/Makefile
@@ -20,6 +20,10 @@
 task_scheduler: core_js elements_html skiaversion
 	go install -v ./go/task_scheduler
 
+.PHONY: task_scheduler_kube
+task_scheduler_kube: core_js elements_html skiaversion
+	CGO_ENABLED=0 GOOS=linux go install -a ./go/task_scheduler
+
 .PHONY: testgo
 testgo: skiaversion
 	go test ./go/... -v --small --medium
@@ -36,6 +40,14 @@
 task_scheduler_debug: debug skiaversion
 	go install -v ./go/task_scheduler
 
+.PHONY: release_kube
+release_kube: task_scheduler_kube
+	./build_docker_release
+
+.PHONY: push_kube
+push_kube: release_kube skia-public
+	pushk --message="$(MESSAGE)" task-scheduler
+
 .PHONY: push
 push: all
 	./build_release "`git log -n1 --format=%s`"
diff --git a/task_scheduler/build_docker_release b/task_scheduler/build_docker_release
new file mode 100755
index 0000000..895169aa
--- /dev/null
+++ b/task_scheduler/build_docker_release
@@ -0,0 +1,22 @@
+#!/bin/bash
+APPNAME=task-scheduler
+
+set -x -e
+
+# Copy files into the right locations in ${ROOT}.
+copy_release_files()
+{
+INSTALL="install -D --verbose --backup=none"
+INSTALL_DIR="install -d --verbose --backup=none"
+${INSTALL} --mode=644 -T Dockerfile                   ${ROOT}/Dockerfile
+${INSTALL} --mode=755 -T ${GOPATH}/bin/task_scheduler ${ROOT}/usr/local/bin/${APPNAME}
+${INSTALL} --mode=644 -T ../infra/config/recipes.cfg  ${ROOT}/usr/local/share/task-scheduler/recipes.cfg
+${INSTALL} --mode=644 -T ./res/img/fav.ico            ${ROOT}/usr/local/share/task-scheduler/res/img/fav.ico
+${INSTALL} --mode=644 -T ./res/js/core.js             ${ROOT}/usr/local/share/task-scheduler/res/js/core.js
+${INSTALL} --mode=644 -T ./res/vul/elements.html      ${ROOT}/usr/local/share/task-scheduler/res/vul/elements.html
+cp -r                    ./templates                  ${ROOT}/usr/local/share/task-scheduler/templates
+chmod 777 ${ROOT}/usr/local/share/task-scheduler/templates
+chmod 644 ${ROOT}/usr/local/share/task-scheduler/templates/*
+}
+
+source ../bash/docker_build.sh
diff --git a/task_scheduler/create-task-scheduler-internal-sa.sh b/task_scheduler/create-task-scheduler-internal-sa.sh
index 7d96c1e..3d5890e 100755
--- a/task_scheduler/create-task-scheduler-internal-sa.sh
+++ b/task_scheduler/create-task-scheduler-internal-sa.sh
@@ -9,10 +9,15 @@
 
 # New service account we will create.
 SA_NAME="task-scheduler-internal"
+SA_EMAIL="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
 
 cd /tmp/ramdisk
 
 gcloud --project=${PROJECT_ID} iam service-accounts create "${SA_NAME}" --display-name="Service account for Skia Task Scheduler Internal"
+gcloud projects add-iam-policy-binding google.com:skia-buildbots --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-firestore --member serviceAccount:${SA_EMAIL} --role roles/datastore.user
+gcloud projects add-iam-policy-binding skia-corp --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-corp --member serviceAccount:${SA_EMAIL} --role roles/bigtable.user
 
 gcloud beta iam service-accounts keys create ${SA_NAME}.json --iam-account="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
 
diff --git a/task_scheduler/create-task-scheduler-sa.sh b/task_scheduler/create-task-scheduler-sa.sh
index 14b7694..9dfa623 100755
--- a/task_scheduler/create-task-scheduler-sa.sh
+++ b/task_scheduler/create-task-scheduler-sa.sh
@@ -1,7 +1,7 @@
 #/bin/bash
 
-# Creates the service account used by Skia Task Scheduler, and export a key for
-# it into the kubernetes cluster as a secret.
+# Creates the service account used by Task Scheduler, and export a key for it
+# into the kubernetes cluster as a secret.
 
 set -e -x
 source ../kube/config.sh
@@ -9,12 +9,17 @@
 
 # New service account we will create.
 SA_NAME="task-scheduler"
+SA_EMAIL="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
 
 cd /tmp/ramdisk
 
 gcloud --project=${PROJECT_ID} iam service-accounts create "${SA_NAME}" --display-name="Service account for Skia Task Scheduler"
+gcloud projects add-iam-policy-binding google.com:skia-buildbots --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-firestore --member serviceAccount:${SA_EMAIL} --role roles/datastore.user
+gcloud projects add-iam-policy-binding skia-public --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-public --member serviceAccount:${SA_EMAIL} --role roles/bigtable.user
 
-gcloud beta iam service-accounts keys create ${SA_NAME}.json --iam-account="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
+gcloud beta iam service-accounts keys create ${SA_NAME}.json --iam-account="${SA_EMAIL}"
 
 kubectl create secret generic "${SA_NAME}" --from-file=key.json=${SA_NAME}.json
 
diff --git a/task_scheduler/go/task_scheduler/main.go b/task_scheduler/go/task_scheduler/main.go
index 681564b..21037ea 100644
--- a/task_scheduler/go/task_scheduler/main.go
+++ b/task_scheduler/go/task_scheduler/main.go
@@ -7,6 +7,7 @@
 	"fmt"
 	"html/template"
 	"net/http"
+	"os"
 	"os/user"
 	"path"
 	"path/filepath"
@@ -23,17 +24,16 @@
 	"go.skia.org/infra/go/depot_tools"
 	"go.skia.org/infra/go/gerrit"
 	"go.skia.org/infra/go/git/repograph"
+	"go.skia.org/infra/go/gitauth"
 	"go.skia.org/infra/go/httputils"
 	"go.skia.org/infra/go/human"
 	"go.skia.org/infra/go/isolate"
 	"go.skia.org/infra/go/login"
-	"go.skia.org/infra/go/metadata"
 	"go.skia.org/infra/go/periodic"
 	"go.skia.org/infra/go/skiaversion"
 	"go.skia.org/infra/go/sklog"
 	"go.skia.org/infra/go/swarming"
 	"go.skia.org/infra/go/util"
-	"go.skia.org/infra/go/webhook"
 	"go.skia.org/infra/task_scheduler/go/blacklist"
 	"go.skia.org/infra/task_scheduler/go/db"
 	"go.skia.org/infra/task_scheduler/go/db/firestore"
@@ -43,6 +43,7 @@
 	"go.skia.org/infra/task_scheduler/go/testutils"
 	"go.skia.org/infra/task_scheduler/go/tryjobs"
 	"go.skia.org/infra/task_scheduler/go/types"
+	"golang.org/x/oauth2"
 )
 
 const (
@@ -90,6 +91,7 @@
 	disableTryjobs    = flag.Bool("disable_try_jobs", false, "If set, no try jobs will be picked up.")
 	firestoreInstance = flag.String("firestore_instance", "", "Firestore instance to use, eg. \"production\"")
 	isolateServer     = flag.String("isolate_server", isolate.ISOLATE_SERVER_URL, "Which Isolate server to use.")
+	kube              = flag.Bool("kube", false, "Whether we're running in Kubernetes.")
 	local             = flag.Bool("local", false, "Whether we're running on a dev machine vs in production.")
 	// TODO(borenet): pubsubTopicSet is also used for as the blacklist
 	// instance name. Once all schedulers are using Firestore for their
@@ -516,8 +518,11 @@
 	r.HandleFunc("/oauth2callback/", login.OAuth2CallbackHandler)
 
 	sklog.AddLogsRedirect(r)
-
-	http.Handle("/", httputils.LoggingGzipRequestResponse(r))
+	h := httputils.LoggingGzipRequestResponse(r)
+	if *kube {
+		h = httputils.HealthzAndHTTPS(h)
+	}
+	http.Handle("/", h)
 	sklog.Infof("Ready to serve on %s", serverURL)
 	sklog.Fatal(http.ListenAndServe(*port, nil))
 }
@@ -525,11 +530,22 @@
 func main() {
 
 	// Global init.
-	common.InitWithMust(
-		APP_NAME,
-		common.PrometheusOpt(promPort),
-		common.CloudLoggingOpt(),
-	)
+	// TODO(borenet): Temporary measure until all schedulers are running in
+	// kubernetes.
+	flag.Parse()
+	if *kube {
+		common.InitWithMust(
+			APP_NAME,
+			common.PrometheusOpt(promPort),
+			common.MetricsLoggingOpt(),
+		)
+	} else {
+		common.InitWithMust(
+			APP_NAME,
+			common.PrometheusOpt(promPort),
+			common.CloudLoggingOpt(),
+		)
+	}
 	defer common.Defer()
 
 	reloadTemplates()
@@ -541,46 +557,57 @@
 	ctx, cancelFn := context.WithCancel(context.Background())
 	cleanup.AtExit(cancelFn)
 
-	// Parse the time period.
-	period, err := human.ParseDuration(*timePeriod)
-	if err != nil {
-		sklog.Fatal(err)
-	}
-
 	// Get the absolute workdir.
 	wdAbs, err := filepath.Abs(*workdir)
 	if err != nil {
 		sklog.Fatal(err)
 	}
 
-	// Authenticated HTTP client.
-	oauthCacheFile := path.Join(wdAbs, "google_storage_token.data")
-	tokenSource, err := auth.NewLegacyTokenSource(*local, oauthCacheFile, "", auth.SCOPE_READ_WRITE, pubsub.AUTH_SCOPE, datastore.ScopeDatastore, bigtable.Scope)
-	if err != nil {
-		sklog.Fatal(err)
-	}
-	httpClient := httputils.DefaultClientConfig().WithTokenSource(tokenSource).With2xxOnly().Client()
-
-	// Initialize Isolate client.
+	// Set up token source and authenticated API clients.
 	isolateServerUrl := *isolateServer
 	if *local {
 		isolateServerUrl = isolate.ISOLATE_SERVER_URL_FAKE
 	}
-	isolateClient, err := isolate.NewClient(wdAbs, isolateServerUrl)
-	if err != nil {
-		sklog.Fatal(err)
+	var isolateClient *isolate.Client
+	var tokenSource oauth2.TokenSource
+	gitcookiesPath := "/tmp/.gitcookies"
+	if *kube {
+		tokenSource, err = auth.NewDefaultTokenSource(*local, auth.SCOPE_USERINFO_EMAIL, auth.SCOPE_GERRIT, auth.SCOPE_READ_WRITE, pubsub.AUTH_SCOPE, datastore.ScopeDatastore, bigtable.Scope, swarming.AUTH_SCOPE)
+		if err != nil {
+			sklog.Fatalf("Failed to create token source: %s", err)
+		}
+		isolateClient, err = isolate.NewClientWithServiceAccount(wdAbs, isolateServerUrl, os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"))
+		if err != nil {
+			sklog.Fatal(err)
+		}
+		if _, err := gitauth.New(tokenSource, gitcookiesPath, true, ""); err != nil {
+			sklog.Fatalf("Failed to create git cookie updater: %s", err)
+		}
+	} else {
+		oauthCacheFile := path.Join(wdAbs, "google_storage_token.data")
+		tokenSource, err = auth.NewLegacyTokenSource(*local, oauthCacheFile, "", auth.SCOPE_READ_WRITE, pubsub.AUTH_SCOPE, datastore.ScopeDatastore, bigtable.Scope, swarming.AUTH_SCOPE)
+		if err != nil {
+			sklog.Fatalf("Failed to create token source: %s", err)
+		}
+		isolateClient, err = isolate.NewClient(wdAbs, isolateServerUrl)
+		if err != nil {
+			sklog.Fatal(err)
+		}
+		user, err := user.Current()
+		if err != nil {
+			sklog.Fatal(err)
+		}
+		gitcookiesPath = path.Join(user.HomeDir, ".gitcookies")
+		if !*local {
+			// The schedulers use the gitcookie created by gcompute-tools/git-cookie-authdaemon.
+			gitcookiesPath = filepath.Join(user.HomeDir, ".git-credential-cache", "cookie")
+		}
 	}
 
+	// Authenticated HTTP client.
+	httpClient := httputils.DefaultClientConfig().WithTokenSource(tokenSource).With2xxOnly().Client()
+
 	// Gerrit API client.
-	user, err := user.Current()
-	if err != nil {
-		sklog.Fatal(err)
-	}
-	gitcookiesPath := path.Join(user.HomeDir, ".gitcookies")
-	if !*local {
-		// The schedulers use the gitcookie created by gcompute-tools/git-cookie-authdaemon.
-		gitcookiesPath = filepath.Join(user.HomeDir, ".git-credential-cache", "cookie")
-	}
 	gerrit, err := gerrit.NewGerrit(gerrit.GERRIT_SKIA_URL, gitcookiesPath, nil)
 	if err != nil {
 		sklog.Fatal(err)
@@ -624,11 +651,7 @@
 		go testutils.PeriodicallyUpdateMockTasksForTesting(swarmTestClient)
 		swarm = swarmTestClient
 	} else {
-		ts, err := auth.NewLegacyTokenSource(*local, oauthCacheFile, "", swarming.AUTH_SCOPE)
-		if err != nil {
-			sklog.Fatal(err)
-		}
-		cfg := httputils.DefaultClientConfig().WithTokenSource(ts).WithDialTimeout(time.Minute).With2xxOnly()
+		cfg := httputils.DefaultClientConfig().WithTokenSource(tokenSource).WithDialTimeout(time.Minute).With2xxOnly()
 		cfg.RequestTimeout = time.Minute
 		swarm, err = swarming.NewApiClient(cfg.Client(), *swarmingServer)
 		if err != nil {
@@ -645,6 +668,12 @@
 		sklog.Fatal(err)
 	}
 
+	// Parse the time period.
+	period, err := human.ParseDuration(*timePeriod)
+	if err != nil {
+		sklog.Fatal(err)
+	}
+
 	// Create and start the task scheduler.
 	sklog.Infof("Creating task scheduler.")
 	serverURL := "https://" + *host
@@ -679,12 +708,6 @@
 	// Start up the web server.
 	login.SimpleInitMust(*port, *local)
 
-	if *local {
-		webhook.InitRequestSaltForTesting()
-	} else {
-		webhook.MustInitRequestSaltFromMetadata(metadata.WEBHOOK_REQUEST_SALT)
-	}
-
 	go runServer(serverURL, tsDb)
 
 	// Run indefinitely, responding to HTTP requests.