Add 'cluster-autoscaler.kubernetes.io/safe-to-evict: true' annotation to all pods. Clusters are now running with Cluster Autoscaler turned on which automatically resizes the Node Pools. https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-autoscaler That means that every pod should have the following annotation: ```yaml annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: 'true' ``` If you need finer grained control over how your pods are started and stopped that can be done by defining a PodDisruptionBudget. https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ CockroachDB defines a PodDisruptionBudget and is a good example of such a budget. Bug: skia:13593 Change-Id: I4f6f2cd8ac048bb34eccd8ef41b31c05e7218c36 Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/566297 Commit-Queue: Joe Gregorio <jcgregorio@google.com> Reviewed-by: Eric Boren <borenet@google.com> Reviewed-by: Kevin Lubick <kjlubick@google.com>

commit: 6c9589e15d4da74a6a3a5153fd44a404f2852b02 [log] [tgz]
author: Joe Gregorio <jcgregorio@google.com> Mon Aug 08 10:58:53 2022 -0400
committer: SkCQ <skcq-be@skia-corp.google.com.iam.gserviceaccount.com> Mon Aug 08 15:41:46 2022 +0000
tree: 545078b1f8090b9e56355cb0dac0cf50a81f7542
parent: a8f8a839a8ac93bc4966e61598795aafd3b58371 [diff]
diff --git a/autoroll/go/autoroll-config-converter/autoroll-be.yaml.template b/autoroll/go/autoroll-config-converter/autoroll-be.yaml.template
index 50d2422..55c7dd5 100644
--- a/autoroll/go/autoroll-config-converter/autoroll-be.yaml.template
+++ b/autoroll/go/autoroll-config-converter/autoroll-be.yaml.template

@@ -34,6 +34,7 @@
       annotations:
         prometheus.io.scrape: "true"
         prometheus.io.port: "20000"
+        cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
     spec:
       automountServiceAccountToken: false
       securityContext:

diff --git a/autoroll/go/autoroll-fe/autoroll-fe.yaml.template b/autoroll/go/autoroll-fe/autoroll-fe.yaml.template
index caf7b49..90cd707 100644
--- a/autoroll/go/autoroll-fe/autoroll-fe.yaml.template
+++ b/autoroll/go/autoroll-fe/autoroll-fe.yaml.template

@@ -36,6 +36,7 @@
       annotations:
         prometheus.io.scrape: "true"
         prometheus.io.port: "20000"
+        cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
     spec:
       automountServiceAccountToken: false
       securityContext:

diff --git a/golden/k8s-config-templates/gold-baselineserver-template.yaml b/golden/k8s-config-templates/gold-baselineserver-template.yaml
index 331540a..3b3f87a 100644
--- a/golden/k8s-config-templates/gold-baselineserver-template.yaml
+++ b/golden/k8s-config-templates/gold-baselineserver-template.yaml

@@ -34,6 +34,7 @@
       annotations:
         prometheus.io.scrape: "true"
         prometheus.io.port: "20000"
+        cluster-autoscaler.kubernetes.io/safe-to-evict: 'true'
     spec:
       affinity:
         nodeAffinity: # Schedule these pods to a specific set of nodes.

diff --git a/golden/k8s-config-templates/gold-diffcalculator-template.yaml b/golden/k8s-config-templates/gold-diffcalculator-template.yaml
index 12bfe82..96c857d 100644
--- a/golden/k8s-config-templates/gold-diffcalculator-template.yaml
+++ b/golden/k8s-config-templates/gold-diffcalculator-template.yaml

@@ -32,6 +32,7 @@
       annotations:
         prometheus.io.scrape: "true"
         prometheus.io.port: "20000"
+        cluster-autoscaler.kubernetes.io/safe-to-evict: 'true'
     spec:
       affinity:
         nodeAffinity: # Schedule these pods to a specific set of nodes.

diff --git a/golden/k8s-config-templates/gold-gitilesfollower-template.yaml b/golden/k8s-config-templates/gold-gitilesfollower-template.yaml
index c39d263..3872ea4 100644
--- a/golden/k8s-config-templates/gold-gitilesfollower-template.yaml
+++ b/golden/k8s-config-templates/gold-gitilesfollower-template.yaml

@@ -32,6 +32,7 @@
       annotations:
         prometheus.io.scrape: "true"
         prometheus.io.port: "20000"
+        cluster-autoscaler.kubernetes.io/safe-to-evict: 'true'
     spec:
       automountServiceAccountToken: false
       securityContext:

diff --git a/golden/k8s-config-templates/gold-ingestion-template.yaml b/golden/k8s-config-templates/gold-ingestion-template.yaml
index 97c5059..c37c865 100644
--- a/golden/k8s-config-templates/gold-ingestion-template.yaml
+++ b/golden/k8s-config-templates/gold-ingestion-template.yaml

@@ -32,6 +32,7 @@
       annotations:
         prometheus.io.scrape: "true"
         prometheus.io.port: "20000"
+        cluster-autoscaler.kubernetes.io/safe-to-evict: 'true'
     spec:
       affinity:
         nodeAffinity: # Schedule these pods to a specific set of nodes.

diff --git a/golden/k8s-config-templates/gold-periodictasks-template.yaml b/golden/k8s-config-templates/gold-periodictasks-template.yaml
index cbcd188..dcf2b86 100644
--- a/golden/k8s-config-templates/gold-periodictasks-template.yaml
+++ b/golden/k8s-config-templates/gold-periodictasks-template.yaml

@@ -32,6 +32,7 @@
       annotations:
         prometheus.io.scrape: "true"
         prometheus.io.port: "20000"
+        cluster-autoscaler.kubernetes.io/safe-to-evict: 'true'
     spec:
       automountServiceAccountToken: false
       securityContext:

diff --git a/launch.md b/launch.md
index 4f52467..607f1b4 100644
--- a/launch.md
+++ b/launch.md

@@ -220,6 +220,22 @@
   prometheus.io.port: '20000'
 ```
 
+- Clusters run with [Cluster
+  Autoscaler](https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-autoscaler),
+  which means that every pod should have the following annotation:
+
+```yaml
+annotations:
+  cluster-autoscaler.kubernetes.io/safe-to-evict: 'true'
+```
+
+If you need finer grained control over how your pods are started and stopped
+that can be done by defining a
+[PodDisruptionBudget](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/).
+CockroachDB defines a PodDisruptionBudget and is a good example of such a
+budget.
+
+
 - Metrics will be available on
   [thanos-query.skia.org](https://thanos-query.skia.org/).
 - The metrics will be labeled `app=<foo>` where `foo` is the first argument to
commit	6c9589e15d4da74a6a3a5153fd44a404f2852b02	[log] [tgz]
author	Joe Gregorio <jcgregorio@google.com>	Mon Aug 08 10:58:53 2022 -0400
committer	SkCQ <skcq-be@skia-corp.google.com.iam.gserviceaccount.com>	Mon Aug 08 15:41:46 2022 +0000
tree	545078b1f8090b9e56355cb0dac0cf50a81f7542
parent	a8f8a839a8ac93bc4966e61598795aafd3b58371 [diff]