blob: a9da4faefc1ab77f69dd9808a7539314df9296d6 [file] [log] [blame]
# See https://www.cockroachlabs.com/docs/v20.1/monitor-cockroachdb-with-prometheus.html for details on CockroachDB monitoring.
#
# This file contains aggregation rules, specifically:
# "node:X" node-level aggregation of a per-store metric X
# "cluster:X" cluster-level aggregation of a per-store or per-node metric X
#
# Most aggregation rules should use the "without (label1, label2, ...)" keyword
# to keep all labels but the ones specified.
groups:
- name: perf_cockroachdb_aggregation_rules.yml
rules:
- record: node:capacity
expr: sum without(store) (capacity{app="perf-cockroachdb"})
- record: cluster:capacity
expr: sum without(instance) (node:capacity{app="perf-cockroachdb"})
- record: node:capacity_available
expr: sum without(store) (capacity_available{app="perf-cockroachdb"})
- record: cluster:capacity_available
expr: sum without(instance) (node:capacity_available{app="perf-cockroachdb"})
- record: capacity_available:ratio
expr: capacity_available{app="perf-cockroachdb"} / capacity{app="perf-cockroachdb"}
- record: node:capacity_available:ratio
expr: node:capacity_available{app="perf-cockroachdb"} / node:capacity{app="perf-cockroachdb"}
- record: cluster:capacity_available:ratio
expr: cluster:capacity_available{app="perf-cockroachdb"} / cluster:capacity{app="perf-cockroachdb"}
# Histogram rules: these are fairly expensive to compute live, so we precompute a few percetiles.
- record: txn_durations_bucket:rate1m
expr: rate(txn_durations_bucket{app="perf-cockroachdb"}[1m])
- record: txn_durations:rate1m:quantile_50
expr: histogram_quantile(0.5, txn_durations_bucket:rate1m)
- record: txn_durations:rate1m:quantile_75
expr: histogram_quantile(0.75, txn_durations_bucket:rate1m)
- record: txn_durations:rate1m:quantile_90
expr: histogram_quantile(0.9, txn_durations_bucket:rate1m)
- record: txn_durations:rate1m:quantile_95
expr: histogram_quantile(0.95, txn_durations_bucket:rate1m)
- record: txn_durations:rate1m:quantile_99
expr: histogram_quantile(0.99, txn_durations_bucket:rate1m)
- record: exec_latency_bucket:rate1m
expr: rate(exec_latency_bucket{app="perf-cockroachdb"}[1m])
- record: exec_latency:rate1m:quantile_50
expr: histogram_quantile(0.5, exec_latency_bucket:rate1m)
- record: exec_latency:rate1m:quantile_75
expr: histogram_quantile(0.75, exec_latency_bucket:rate1m)
- record: exec_latency:rate1m:quantile_90
expr: histogram_quantile(0.9, exec_latency_bucket:rate1m)
- record: exec_latency:rate1m:quantile_95
expr: histogram_quantile(0.95, exec_latency_bucket:rate1m)
- record: exec_latency:rate1m:quantile_99
expr: histogram_quantile(0.99, exec_latency_bucket:rate1m)
- record: round_trip_latency_bucket:rate1m
expr: rate(round_trip_latency_bucket{app="perf-cockroachdb"}[1m])
- record: round_trip_latency:rate1m:quantile_50
expr: histogram_quantile(0.5, round_trip_latency_bucket:rate1m)
- record: round_trip_latency:rate1m:quantile_75
expr: histogram_quantile(0.75, round_trip_latency_bucket:rate1m)
- record: round_trip_latency:rate1m:quantile_90
expr: histogram_quantile(0.9, round_trip_latency_bucket:rate1m)
- record: round_trip_latency:rate1m:quantile_95
expr: histogram_quantile(0.95, round_trip_latency_bucket:rate1m)
- record: round_trip_latency:rate1m:quantile_99
expr: histogram_quantile(0.99, round_trip_latency_bucket:rate1m)
- record: sql_exec_latency_bucket:rate1m
expr: rate(sql_exec_latency_bucket{app="perf-cockroachdb"}[1m])
- record: sql_exec_latency:rate1m:quantile_50
expr: histogram_quantile(0.5, sql_exec_latency_bucket:rate1m)
- record: sql_exec_latency:rate1m:quantile_75
expr: histogram_quantile(0.75, sql_exec_latency_bucket:rate1m)
- record: sql_exec_latency:rate1m:quantile_90
expr: histogram_quantile(0.9, sql_exec_latency_bucket:rate1m)
- record: sql_exec_latency:rate1m:quantile_95
expr: histogram_quantile(0.95, sql_exec_latency_bucket:rate1m)
- record: sql_exec_latency:rate1m:quantile_99
expr: histogram_quantile(0.99, sql_exec_latency_bucket:rate1m)
- record: raft_process_logcommit_latency_bucket:rate1m
expr: rate(raft_process_logcommit_latency_bucket{app="perf-cockroachdb"}[1m])
- record: raft_process_logcommit_latency:rate1m:quantile_50
expr: histogram_quantile(0.5, raft_process_logcommit_latency_bucket:rate1m)
- record: raft_process_logcommit_latency:rate1m:quantile_75
expr: histogram_quantile(0.75, raft_process_logcommit_latency_bucket:rate1m)
- record: raft_process_logcommit_latency:rate1m:quantile_90
expr: histogram_quantile(0.9, raft_process_logcommit_latency_bucket:rate1m)
- record: raft_process_logcommit_latency:rate1m:quantile_95
expr: histogram_quantile(0.95, raft_process_logcommit_latency_bucket:rate1m)
- record: raft_process_logcommit_latency:rate1m:quantile_99
expr: histogram_quantile(0.99, raft_process_logcommit_latency_bucket:rate1m)
- record: raft_process_commandcommit_latency_bucket:rate1m
expr: rate(raft_process_commandcommit_latency_bucket{app="perf-cockroachdb"}[1m])
- record: raft_process_commandcommit_latency:rate1m:quantile_50
expr: histogram_quantile(0.5, raft_process_commandcommit_latency_bucket:rate1m)
- record: raft_process_commandcommit_latency:rate1m:quantile_75
expr: histogram_quantile(0.75, raft_process_commandcommit_latency_bucket:rate1m)
- record: raft_process_commandcommit_latency:rate1m:quantile_90
expr: histogram_quantile(0.9, raft_process_commandcommit_latency_bucket:rate1m)
- record: raft_process_commandcommit_latency:rate1m:quantile_95
expr: histogram_quantile(0.95, raft_process_commandcommit_latency_bucket:rate1m)
- record: raft_process_commandcommit_latency:rate1m:quantile_99
expr: histogram_quantile(0.99, raft_process_commandcommit_latency_bucket:rate1m)