Add alert for Swarming bot uptime

Change-Id: I0d764fbec4e6fd8dfc057601c3c9c8f55a400631
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/270950
Reviewed-by: Kevin Lubick <kjlubick@google.com>
Commit-Queue: Eric Boren <borenet@google.com>
diff --git a/promk/prometheus/alerts_general.yml b/promk/prometheus/alerts_general.yml
index 13c0350..2325a7e 100644
--- a/promk/prometheus/alerts_general.yml
+++ b/promk/prometheus/alerts_general.yml
@@ -464,6 +464,15 @@
       abbr: '{{ $labels.bot }}'
       description: 'Swarming bot {{ $labels.bot }} is quarantined because the device is {{ $labels.device_state }} and has not resolved itself in 1+ hours. https://{{ $labels.swarming }}/bot?id={{ $labels.bot }} https://goto.google.com/skolo-maintenance'
 
+  - alert: BotUptime
+    expr: swarming_bots_uptime_s{bot!="skia-rpi-template"} / 60 / 60 > 36
+    labels:
+      category: infra
+      severity: critical
+    annotations:
+      abbr: '{{ $labels.bot }}'
+      description: 'Swarming bot {{ $labels.bot }} has gone too long without a reboot. Check the events on the Swarming bot page and reboot manually if necessary. https://{{ $labels.swarming }}/bot?id={{ $labels.bot }} https://goto.google.com/skolo-maintenance'
+
 # Alerts for supported branches.
 
   - alert: MissingCQConfigForSupportedBranch