| # This file defines alerts to be triggered by the server. |
| |
| [[rule]] |
| name = "Perf Alerts" |
| message = "At least one perf alert has been found. Please visit https://skiaperf.com/alerts/ to triage." |
| query = "select value from skiaperf.alerting.new.value limit 1" |
| condition = "x > 0" |
| actions = ["Email(alerts@skia.org)"] |
| auto-dismiss = true |
| nag = "24h" |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia/buildslaves" |
| query = "select mean(value) from prober.master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| nag = "1h" |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia.android)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia.android/buildslaves" |
| query = "select mean(value) from prober.android_master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| nag = "1h" |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia.compile)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia.compile/buildslaves" |
| query = "select mean(value) from prober.compile_master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| nag = "1h" |
| |
| [[rule]] |
| name = "Buildslaves offline (client.skia.fyi)" |
| message = "At least one buildslave has been offline for more than ten minutes: http://build.chromium.org/p/client.skia.fyi/buildslaves" |
| query = "select mean(value) from prober.fyi_master_host.failure.value where time > now() - 10m" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| nag = "1h" |
| |
| [[rule]] |
| name = "Ingestion Failure (nanobench)" |
| message = "At least two rounds of data ingestion have failed back to back." |
| query = "select mean(value) from ingest.ingester.nano-ingest.gauge.time-since-last-successful-update.value where time > now() - 10m" |
| condition = "x >= 750" |
| actions = ["Email(infra-alerts@skia.org)"] |
| auto-dismiss = false |
| nag = "1h" |
| |
| [[rule]] |
| name = "Skia Fiddle Prober (main page)" |
| message = "The main page at http://skfiddle.com has failed." |
| query = "select mean(value) from prober.skfiddle.failure.value where time > now() - 10m;" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)", "Email(humper@google.com)"] |
| auto-dismiss = true |
| nag = "1h" |
| |
| [[rule]] |
| name = "Skia Fiddle Prober (compile)" |
| message = "A test compile at http://skfiddle.com has failed for more than 10 minutes." |
| query = "select mean(value) from prober.skfiddle_compile_bad.failure.value where time > now() - 10m;" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)", "Email(humper@google.com)"] |
| auto-dismiss = true |
| nag = "1h" |
| |
| [[rule]] |
| name = "Skia Fiddle Prober (compile)" |
| message = "A test compile at http://skfiddle.com has failed for more than 10 minutes." |
| query = "select mean(value) from prober.skfiddle_compile_good.failure.value where time > now() - 10m;" |
| condition = "x >= 1" |
| actions = ["Email(infra-alerts@skia.org)", "Email(humper@google.com)"] |
| auto-dismiss = true |
| nag = "1h" |