[CQ Watcher] Make CQ_TRYBOTS_COUNT_THRESHOLD match the alert threshold
Change-Id: Ie91ef0ee28ae6adf73e99bb3fad31c0f17b13790
Bug: skia:
Reviewed-on: https://skia-review.googlesource.com/c/179846
Reviewed-by: Ben Wagner <benjaminwagner@google.com>
Commit-Queue: Ravi Mistry <rmistry@google.com>
diff --git a/go/cq/cq.go b/go/cq/cq.go
index 93b5bba..579a0ce 100644
--- a/go/cq/cq.go
+++ b/go/cq/cq.go
@@ -33,7 +33,7 @@
// Thresholds after which errors are logged.
CQ_TRYBOT_DURATION_SECS_THRESHOLD = 2700
- CQ_TRYBOTS_COUNT_THRESHOLD = 35
+ CQ_TRYBOTS_COUNT_THRESHOLD = 50
)
var (
diff --git a/promk/prometheus/alerts_public.yml b/promk/prometheus/alerts_public.yml
index 5604346..0c63a66 100644
--- a/promk/prometheus/alerts_public.yml
+++ b/promk/prometheus/alerts_public.yml
@@ -89,6 +89,7 @@
abbr: 'Too many CLs in CQ.'
description: 'There are 10 CLs or more in a Skia CL. https://skia.googlesource.com/buildbot/%2B/master/cq_watcher/PROD.md#too_many_cls'
+ # Update CQ_TRYBOT_DURATION_SECS_THRESHOLD in go/cq/cq.go if the number below is changed.
- alert: CQWatcherTrybotDuration
expr: max_over_time(cq_watcher_in_flight_trybot_duration{app="cq-watcher"}[20m]) > 2700
labels:
@@ -98,6 +99,7 @@
abbr: 'CQ trybot running for too long.'
description: '{{ $labels.trybot }} ran longer than 45 mins. Playbook: https://skia.googlesource.com/buildbot/%2B/master/cq_watcher/PROD.md#trybot_duration_beyond_threshold Direct link to logs: https://pantheon.corp.google.com/logs/viewer?project=skia-public&advancedFilter=logName%3D%22projects%2Fskia-public%2Flogs%2Fcq-watcher%22%20AND%20textPayload:%20%22CQTrybotDurationError%22'
+ # Update CQ_TRYBOTS_COUNT_THRESHOLD in go/cq/cq.go if the number below is changed.
- alert: CQWatcherTrybotsCount
expr: max_over_time(cq_watcher_in_flight_trybot_num{app="cq-watcher"}[20m]) > 50
labels: