blob: 514114e1db687f79772e7d3e2c0431f2eac15588 [file] [log] [blame]
# Alerts for things in the skia-corp cluster only.
groups:
- name: general
rules:
- alert: InternalAutoRoll
expr: autoroll_last_roll_result{roller="skia-internal-autoroll"} == 0
for: 10m
labels:
category: infra
severity: warning
owner: borenet@google.com
annotations:
description: 'The last DEPS roll into internal_test repo failed. https://skia.googlesource.com/buildbot/%2B/main/autoroll/PROD.md#autoroll_failed'
- alert: InternalAutoRoll24H
expr: liveness_last_autoroll_landed_s{roller="skia-internal-autoroll"}/60/60 > 24
labels:
category: infra
severity: warning
owner: borenet@google.com
annotations:
description: 'The last-landed roll into internal_test was over 24h ago. https://skia.googlesource.com/buildbot/%2B/main/autoroll/PROD.md#no_rolls_24h'
- alert: TrybotUpdaterErrorRate
expr: rate(num_log_lines{level="ERROR",app=~"trybot-updater.*"}[1h]) > 0.001
labels:
category: infra
severity: critical
owner: rmistry@google.com
annotations:
abbr: '{{ $labels.app }}'
description: 'The error rate on {{ $labels.app }} is too high.
https://console.cloud.google.com/logs/viewer?project={{ $labels.project }}&minLogLevel=500&resource=container&logName=projects%2F{{ $labels.project }}%2Flogs%2F{{ $labels.app }}'
- alert: SkCQBackendErrorRate
expr: rate(num_log_lines{level="ERROR",app=~"skcq-be"}[1h]) > 0.001
labels:
category: infra
severity: critical
owner: rmistry@google.com
annotations:
abbr: '{{ $labels.app }}'
description: 'The error rate on {{ $labels.app }} is too high.
https://console.cloud.google.com/logs/viewer?project={{ $labels.project }}&minLogLevel=500&resource=container&logName=projects%2F{{ $labels.project }}%2Flogs%2F{{ $labels.app }}'