groups: | |
- name: general | |
rules: | |
- alert: JobDown | |
expr: up == 0 | |
for: 1m | |
labels: | |
category: infra | |
severity: critical | |
annotations: | |
abbr: | |
'{{ $labels.job }} - {{ $labels.hostname }} - {{ | |
$externalLabels.cluster }}' | |
description: | |
'{{ $labels.job }} on {{ $labels.hostname }} in {{ | |
$externalLabels.cluster }} has been down for more than 5 minutes.' | |
- alert: TooManyGoRoutines | |
expr: go_goroutines > 3000 | |
for: 2m | |
labels: | |
category: infra | |
severity: warning | |
annotations: | |
abbr: '{{ $labels.job }} - {{ $externalLabels.cluster }}' | |
description: | |
'Too many Go routines in {{ $labels.hostname }} for app {{ | |
$labels.job }}.' | |
- alert: TooManyOpenFDs | |
expr: process_open_fds > 5000 | |
labels: | |
category: infra | |
severity: warning | |
annotations: | |
abbr: '{{ $labels.job }} - {{ $externalLabels.cluster }}' | |
description: | |
'Too many open file handles on {{ $labels.hostname }} for app {{ | |
$labels.job }}.' |