| # This file defines alerts to be triggered by the server. |
| |
| [[rule]] |
| name = "Perf Alerts" |
| message = "At least one perf alert has been found. Please visit https://skiaperf.com/alerts/ to triage." |
| query = "select value from skiaperf.alerting.new.value limit 1" |
| condition = "x > 0" |
| actions = ["Email(alerts@skia.org)"] |
| auto-dismiss = true |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia/buildslaves" |
| query = "select mean(value) from prober.master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia.android)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia.android/buildslaves" |
| query = "select mean(value) from prober.android_master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia.compile)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia.compile/buildslaves" |
| query = "select mean(value) from prober.compile_master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia.fyi)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia.fyi/buildslaves" |
| query = "select mean(value) from prober.fyi_master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| |
| [[rule]] |
| name = "Ingestion Failure (nanobench)" |
| message = "At least two rounds of data ingestion have failed back to back." |
| query = "select mean(value) from ingest.ingester.nano-ingest.gauge.time-since-last-successful-update.value where time > now() - 10m" |
| condition = "x >= 750" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |