[task scheduler] Fixes for Kubernetes
Bug: skia:8636
Change-Id: I45440c3170ed168b185fdd111c07625fcff0a772
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/201200
Commit-Queue: Eric Boren <borenet@google.com>
Reviewed-by: Ben Wagner <benjaminwagner@google.com>
diff --git a/task_scheduler/Dockerfile b/task_scheduler/Dockerfile
new file mode 100644
index 0000000..e3c37ee
--- /dev/null
+++ b/task_scheduler/Dockerfile
@@ -0,0 +1,7 @@
+FROM gcr.io/skia-public/basedebian:testing-slim
+
+USER skia
+
+COPY . /
+
+ENTRYPOINT ["/usr/local/bin/task-scheduler"]
diff --git a/task_scheduler/Makefile b/task_scheduler/Makefile
index 99ff3ae..6b3e9d4 100644
--- a/task_scheduler/Makefile
+++ b/task_scheduler/Makefile
@@ -20,6 +20,10 @@
task_scheduler: core_js elements_html skiaversion
go install -v ./go/task_scheduler
+.PHONY: task_scheduler_kube
+task_scheduler_kube: core_js elements_html skiaversion
+ CGO_ENABLED=0 GOOS=linux go install -a ./go/task_scheduler
+
.PHONY: testgo
testgo: skiaversion
go test ./go/... -v --small --medium
@@ -36,6 +40,14 @@
task_scheduler_debug: debug skiaversion
go install -v ./go/task_scheduler
+.PHONY: release_kube
+release_kube: task_scheduler_kube
+ ./build_docker_release
+
+.PHONY: push_kube
+push_kube: release_kube skia-public
+ pushk --message="$(MESSAGE)" task-scheduler
+
.PHONY: push
push: all
./build_release "`git log -n1 --format=%s`"
diff --git a/task_scheduler/build_docker_release b/task_scheduler/build_docker_release
new file mode 100755
index 0000000..895169aa
--- /dev/null
+++ b/task_scheduler/build_docker_release
@@ -0,0 +1,22 @@
+#!/bin/bash
+APPNAME=task-scheduler
+
+set -x -e
+
+# Copy files into the right locations in ${ROOT}.
+copy_release_files()
+{
+INSTALL="install -D --verbose --backup=none"
+INSTALL_DIR="install -d --verbose --backup=none"
+${INSTALL} --mode=644 -T Dockerfile ${ROOT}/Dockerfile
+${INSTALL} --mode=755 -T ${GOPATH}/bin/task_scheduler ${ROOT}/usr/local/bin/${APPNAME}
+${INSTALL} --mode=644 -T ../infra/config/recipes.cfg ${ROOT}/usr/local/share/task-scheduler/recipes.cfg
+${INSTALL} --mode=644 -T ./res/img/fav.ico ${ROOT}/usr/local/share/task-scheduler/res/img/fav.ico
+${INSTALL} --mode=644 -T ./res/js/core.js ${ROOT}/usr/local/share/task-scheduler/res/js/core.js
+${INSTALL} --mode=644 -T ./res/vul/elements.html ${ROOT}/usr/local/share/task-scheduler/res/vul/elements.html
+cp -r ./templates ${ROOT}/usr/local/share/task-scheduler/templates
+chmod 777 ${ROOT}/usr/local/share/task-scheduler/templates
+chmod 644 ${ROOT}/usr/local/share/task-scheduler/templates/*
+}
+
+source ../bash/docker_build.sh
diff --git a/task_scheduler/create-task-scheduler-internal-sa.sh b/task_scheduler/create-task-scheduler-internal-sa.sh
index 7d96c1e..3d5890e 100755
--- a/task_scheduler/create-task-scheduler-internal-sa.sh
+++ b/task_scheduler/create-task-scheduler-internal-sa.sh
@@ -9,10 +9,15 @@
# New service account we will create.
SA_NAME="task-scheduler-internal"
+SA_EMAIL="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
cd /tmp/ramdisk
gcloud --project=${PROJECT_ID} iam service-accounts create "${SA_NAME}" --display-name="Service account for Skia Task Scheduler Internal"
+gcloud projects add-iam-policy-binding google.com:skia-buildbots --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-firestore --member serviceAccount:${SA_EMAIL} --role roles/datastore.user
+gcloud projects add-iam-policy-binding skia-corp --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-corp --member serviceAccount:${SA_EMAIL} --role roles/bigtable.user
gcloud beta iam service-accounts keys create ${SA_NAME}.json --iam-account="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
diff --git a/task_scheduler/create-task-scheduler-sa.sh b/task_scheduler/create-task-scheduler-sa.sh
index 14b7694..9dfa623 100755
--- a/task_scheduler/create-task-scheduler-sa.sh
+++ b/task_scheduler/create-task-scheduler-sa.sh
@@ -1,7 +1,7 @@
#/bin/bash
-# Creates the service account used by Skia Task Scheduler, and export a key for
-# it into the kubernetes cluster as a secret.
+# Creates the service account used by Task Scheduler, and export a key for it
+# into the kubernetes cluster as a secret.
set -e -x
source ../kube/config.sh
@@ -9,12 +9,17 @@
# New service account we will create.
SA_NAME="task-scheduler"
+SA_EMAIL="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
cd /tmp/ramdisk
gcloud --project=${PROJECT_ID} iam service-accounts create "${SA_NAME}" --display-name="Service account for Skia Task Scheduler"
+gcloud projects add-iam-policy-binding google.com:skia-buildbots --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-firestore --member serviceAccount:${SA_EMAIL} --role roles/datastore.user
+gcloud projects add-iam-policy-binding skia-public --member serviceAccount:${SA_EMAIL} --role roles/pubsub.admin
+gcloud projects add-iam-policy-binding skia-public --member serviceAccount:${SA_EMAIL} --role roles/bigtable.user
-gcloud beta iam service-accounts keys create ${SA_NAME}.json --iam-account="${SA_NAME}@${PROJECT_SUBDOMAIN}.iam.gserviceaccount.com"
+gcloud beta iam service-accounts keys create ${SA_NAME}.json --iam-account="${SA_EMAIL}"
kubectl create secret generic "${SA_NAME}" --from-file=key.json=${SA_NAME}.json
diff --git a/task_scheduler/go/task_scheduler/main.go b/task_scheduler/go/task_scheduler/main.go
index 681564b..21037ea 100644
--- a/task_scheduler/go/task_scheduler/main.go
+++ b/task_scheduler/go/task_scheduler/main.go
@@ -7,6 +7,7 @@
"fmt"
"html/template"
"net/http"
+ "os"
"os/user"
"path"
"path/filepath"
@@ -23,17 +24,16 @@
"go.skia.org/infra/go/depot_tools"
"go.skia.org/infra/go/gerrit"
"go.skia.org/infra/go/git/repograph"
+ "go.skia.org/infra/go/gitauth"
"go.skia.org/infra/go/httputils"
"go.skia.org/infra/go/human"
"go.skia.org/infra/go/isolate"
"go.skia.org/infra/go/login"
- "go.skia.org/infra/go/metadata"
"go.skia.org/infra/go/periodic"
"go.skia.org/infra/go/skiaversion"
"go.skia.org/infra/go/sklog"
"go.skia.org/infra/go/swarming"
"go.skia.org/infra/go/util"
- "go.skia.org/infra/go/webhook"
"go.skia.org/infra/task_scheduler/go/blacklist"
"go.skia.org/infra/task_scheduler/go/db"
"go.skia.org/infra/task_scheduler/go/db/firestore"
@@ -43,6 +43,7 @@
"go.skia.org/infra/task_scheduler/go/testutils"
"go.skia.org/infra/task_scheduler/go/tryjobs"
"go.skia.org/infra/task_scheduler/go/types"
+ "golang.org/x/oauth2"
)
const (
@@ -90,6 +91,7 @@
disableTryjobs = flag.Bool("disable_try_jobs", false, "If set, no try jobs will be picked up.")
firestoreInstance = flag.String("firestore_instance", "", "Firestore instance to use, eg. \"production\"")
isolateServer = flag.String("isolate_server", isolate.ISOLATE_SERVER_URL, "Which Isolate server to use.")
+ kube = flag.Bool("kube", false, "Whether we're running in Kubernetes.")
local = flag.Bool("local", false, "Whether we're running on a dev machine vs in production.")
// TODO(borenet): pubsubTopicSet is also used for as the blacklist
// instance name. Once all schedulers are using Firestore for their
@@ -516,8 +518,11 @@
r.HandleFunc("/oauth2callback/", login.OAuth2CallbackHandler)
sklog.AddLogsRedirect(r)
-
- http.Handle("/", httputils.LoggingGzipRequestResponse(r))
+ h := httputils.LoggingGzipRequestResponse(r)
+ if *kube {
+ h = httputils.HealthzAndHTTPS(h)
+ }
+ http.Handle("/", h)
sklog.Infof("Ready to serve on %s", serverURL)
sklog.Fatal(http.ListenAndServe(*port, nil))
}
@@ -525,11 +530,22 @@
func main() {
// Global init.
- common.InitWithMust(
- APP_NAME,
- common.PrometheusOpt(promPort),
- common.CloudLoggingOpt(),
- )
+ // TODO(borenet): Temporary measure until all schedulers are running in
+ // kubernetes.
+ flag.Parse()
+ if *kube {
+ common.InitWithMust(
+ APP_NAME,
+ common.PrometheusOpt(promPort),
+ common.MetricsLoggingOpt(),
+ )
+ } else {
+ common.InitWithMust(
+ APP_NAME,
+ common.PrometheusOpt(promPort),
+ common.CloudLoggingOpt(),
+ )
+ }
defer common.Defer()
reloadTemplates()
@@ -541,46 +557,57 @@
ctx, cancelFn := context.WithCancel(context.Background())
cleanup.AtExit(cancelFn)
- // Parse the time period.
- period, err := human.ParseDuration(*timePeriod)
- if err != nil {
- sklog.Fatal(err)
- }
-
// Get the absolute workdir.
wdAbs, err := filepath.Abs(*workdir)
if err != nil {
sklog.Fatal(err)
}
- // Authenticated HTTP client.
- oauthCacheFile := path.Join(wdAbs, "google_storage_token.data")
- tokenSource, err := auth.NewLegacyTokenSource(*local, oauthCacheFile, "", auth.SCOPE_READ_WRITE, pubsub.AUTH_SCOPE, datastore.ScopeDatastore, bigtable.Scope)
- if err != nil {
- sklog.Fatal(err)
- }
- httpClient := httputils.DefaultClientConfig().WithTokenSource(tokenSource).With2xxOnly().Client()
-
- // Initialize Isolate client.
+ // Set up token source and authenticated API clients.
isolateServerUrl := *isolateServer
if *local {
isolateServerUrl = isolate.ISOLATE_SERVER_URL_FAKE
}
- isolateClient, err := isolate.NewClient(wdAbs, isolateServerUrl)
- if err != nil {
- sklog.Fatal(err)
+ var isolateClient *isolate.Client
+ var tokenSource oauth2.TokenSource
+ gitcookiesPath := "/tmp/.gitcookies"
+ if *kube {
+ tokenSource, err = auth.NewDefaultTokenSource(*local, auth.SCOPE_USERINFO_EMAIL, auth.SCOPE_GERRIT, auth.SCOPE_READ_WRITE, pubsub.AUTH_SCOPE, datastore.ScopeDatastore, bigtable.Scope, swarming.AUTH_SCOPE)
+ if err != nil {
+ sklog.Fatalf("Failed to create token source: %s", err)
+ }
+ isolateClient, err = isolate.NewClientWithServiceAccount(wdAbs, isolateServerUrl, os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"))
+ if err != nil {
+ sklog.Fatal(err)
+ }
+ if _, err := gitauth.New(tokenSource, gitcookiesPath, true, ""); err != nil {
+ sklog.Fatalf("Failed to create git cookie updater: %s", err)
+ }
+ } else {
+ oauthCacheFile := path.Join(wdAbs, "google_storage_token.data")
+ tokenSource, err = auth.NewLegacyTokenSource(*local, oauthCacheFile, "", auth.SCOPE_READ_WRITE, pubsub.AUTH_SCOPE, datastore.ScopeDatastore, bigtable.Scope, swarming.AUTH_SCOPE)
+ if err != nil {
+ sklog.Fatalf("Failed to create token source: %s", err)
+ }
+ isolateClient, err = isolate.NewClient(wdAbs, isolateServerUrl)
+ if err != nil {
+ sklog.Fatal(err)
+ }
+ user, err := user.Current()
+ if err != nil {
+ sklog.Fatal(err)
+ }
+ gitcookiesPath = path.Join(user.HomeDir, ".gitcookies")
+ if !*local {
+ // The schedulers use the gitcookie created by gcompute-tools/git-cookie-authdaemon.
+ gitcookiesPath = filepath.Join(user.HomeDir, ".git-credential-cache", "cookie")
+ }
}
+ // Authenticated HTTP client.
+ httpClient := httputils.DefaultClientConfig().WithTokenSource(tokenSource).With2xxOnly().Client()
+
// Gerrit API client.
- user, err := user.Current()
- if err != nil {
- sklog.Fatal(err)
- }
- gitcookiesPath := path.Join(user.HomeDir, ".gitcookies")
- if !*local {
- // The schedulers use the gitcookie created by gcompute-tools/git-cookie-authdaemon.
- gitcookiesPath = filepath.Join(user.HomeDir, ".git-credential-cache", "cookie")
- }
gerrit, err := gerrit.NewGerrit(gerrit.GERRIT_SKIA_URL, gitcookiesPath, nil)
if err != nil {
sklog.Fatal(err)
@@ -624,11 +651,7 @@
go testutils.PeriodicallyUpdateMockTasksForTesting(swarmTestClient)
swarm = swarmTestClient
} else {
- ts, err := auth.NewLegacyTokenSource(*local, oauthCacheFile, "", swarming.AUTH_SCOPE)
- if err != nil {
- sklog.Fatal(err)
- }
- cfg := httputils.DefaultClientConfig().WithTokenSource(ts).WithDialTimeout(time.Minute).With2xxOnly()
+ cfg := httputils.DefaultClientConfig().WithTokenSource(tokenSource).WithDialTimeout(time.Minute).With2xxOnly()
cfg.RequestTimeout = time.Minute
swarm, err = swarming.NewApiClient(cfg.Client(), *swarmingServer)
if err != nil {
@@ -645,6 +668,12 @@
sklog.Fatal(err)
}
+ // Parse the time period.
+ period, err := human.ParseDuration(*timePeriod)
+ if err != nil {
+ sklog.Fatal(err)
+ }
+
// Create and start the task scheduler.
sklog.Infof("Creating task scheduler.")
serverURL := "https://" + *host
@@ -679,12 +708,6 @@
// Start up the web server.
login.SimpleInitMust(*port, *local)
- if *local {
- webhook.InitRequestSaltForTesting()
- } else {
- webhook.MustInitRequestSaltFromMetadata(metadata.WEBHOOK_REQUEST_SALT)
- }
-
go runServer(serverURL, tsDb)
// Run indefinitely, responding to HTTP requests.