[machines] Refactor Maintenance Mode metrics.
Change-Id: I75261e84ada3b2ecd1548a6c6cd0b32929d7a2d2
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/608697
Reviewed-by: Eric Boren <borenet@google.com>
Commit-Queue: Joe Gregorio <jcgregorio@google.com>
diff --git a/machine/go/machine/processor/impl.go b/machine/go/machine/processor/impl.go
index 3f79b8d..0a56a44 100644
--- a/machine/go/machine/processor/impl.go
+++ b/machine/go/machine/processor/impl.go
@@ -115,13 +115,8 @@
next.IsQuarantined = true
}
- // Set the Quarantined dimension in Swarming.
- quarantinedMetric := metrics2.GetInt64Metric("machine_processor_device_quarantined", next.Dimensions.AsMetricsTags())
- if machine.SetSwarmingQuarantinedMessage(&next) {
- quarantinedMetric.Update(1)
- } else {
- quarantinedMetric.Update(0)
- }
+ _ = machine.SetSwarmingQuarantinedMessage(&next)
+ setQuarantineMetrics(next)
return next
}
@@ -139,6 +134,36 @@
return processMissingDeviceEvent(ctx, previous, event)
}
+var (
+ maintenanceTag = map[string]string{"state": "Maintenance"}
+ quarantineTag = map[string]string{"state": "Quarantined"}
+ recoveringTag = map[string]string{"state": "Recovering"}
+)
+
+// Reflects MaintenanceMode, Quarantined, and Recovering into metrics.
+func setQuarantineMetrics(d machine.Description) {
+ m := metrics2.GetInt64Metric("machine_processor_device_quarantine_state", d.Dimensions.AsMetricsTags(), maintenanceTag)
+ if d.InMaintenanceMode() {
+ m.Update(1)
+ } else {
+ m.Update(0)
+ }
+
+ m = metrics2.GetInt64Metric("machine_processor_device_quarantine_state", d.Dimensions.AsMetricsTags(), quarantineTag)
+ if d.IsQuarantined {
+ m.Update(1)
+ } else {
+ m.Update(0)
+ }
+
+ m = metrics2.GetInt64Metric("machine_processor_device_quarantine_state", d.Dimensions.AsMetricsTags(), recoveringTag)
+ if d.IsRecovering() {
+ m.Update(1)
+ } else {
+ m.Update(0)
+ }
+}
+
func processAndroidEvent(ctx context.Context, previous machine.Description, event machine.Event) machine.Description {
machineID := event.Host.Name
dimensions := dimensionsFromAndroidProperties(parseAndroidProperties(event.Android.GetProp))
diff --git a/machine/go/machine/processor/impl_test.go b/machine/go/machine/processor/impl_test.go
index 6e75570..c8a1cfa 100644
--- a/machine/go/machine/processor/impl_test.go
+++ b/machine/go/machine/processor/impl_test.go
@@ -325,7 +325,6 @@
assert.Equal(t, expected, next.Dimensions)
assert.NotEmpty(t, next.MaintenanceMode)
- assert.Equal(t, int64(1), metrics2.GetInt64Metric("machine_processor_device_quarantined", next.Dimensions.AsMetricsTags()).Get())
}
func TestProcess_RemoveMachineFromQuarantineIfDeviceReturns(t *testing.T) {
@@ -394,8 +393,6 @@
LastUpdated: serverTime,
Battery: machine.BadBatteryLevel,
}, next)
-
- assert.Equal(t, int64(0), metrics2.GetInt64Metric("machine_processor_device_quarantined", next.Dimensions.AsMetricsTags()).Get())
}
func TestProcess_RecoveryModeIfDeviceBatteryTooLow(t *testing.T) {
@@ -1269,3 +1266,78 @@
shouldPowerCycle(t, prevPowerCycleFalse, prevRunningSwarmingFalse, eventRunningSwarmingFalse, expectedNextPowerCycleFalse)
})
}
+
+func TestProcessorImpl_setQuarantineMetrics(t *testing.T) {
+
+ tests := []struct {
+ name string
+ desc machine.Description
+ expectedMaintenance int64
+ expectedRecovering int64
+ expectedQuarantined int64
+ }{
+ {
+ name: "Machine is available",
+ desc: machine.Description{
+ MaintenanceMode: "",
+ Recovering: "",
+ IsQuarantined: false,
+ },
+ expectedMaintenance: 0,
+ expectedRecovering: 0,
+ expectedQuarantined: 0,
+ },
+ {
+ name: "Manually put into maintenance mode.",
+ desc: machine.Description{
+ MaintenanceMode: "alice@example.com",
+ Recovering: "",
+ IsQuarantined: false,
+ },
+ expectedMaintenance: 1,
+ expectedRecovering: 0,
+ expectedQuarantined: 0,
+ },
+ {
+ name: "Machine is recovering",
+ desc: machine.Description{
+ MaintenanceMode: "",
+ Recovering: "Too hot.",
+ IsQuarantined: false,
+ },
+ expectedMaintenance: 0,
+ expectedRecovering: 1,
+ expectedQuarantined: 0,
+ },
+ {
+ name: "Machine was quarantined by failing an infra step",
+ desc: machine.Description{
+ MaintenanceMode: "",
+ Recovering: "",
+ IsQuarantined: true,
+ },
+ expectedMaintenance: 0,
+ expectedRecovering: 0,
+ expectedQuarantined: 1,
+ },
+ {
+ name: "Machine has multiple reasons for being quarantined",
+ desc: machine.Description{
+ MaintenanceMode: "bob@example.com",
+ Recovering: "Low charge.",
+ IsQuarantined: true,
+ },
+ expectedMaintenance: 1,
+ expectedRecovering: 1,
+ expectedQuarantined: 1,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ setQuarantineMetrics(tt.desc)
+ require.Equal(t, tt.expectedMaintenance, metrics2.GetInt64Metric("machine_processor_device_quarantine_state", tt.desc.Dimensions.AsMetricsTags(), maintenanceTag).Get())
+ require.Equal(t, tt.expectedRecovering, metrics2.GetInt64Metric("machine_processor_device_quarantine_state", tt.desc.Dimensions.AsMetricsTags(), recoveringTag).Get())
+ require.Equal(t, tt.expectedQuarantined, metrics2.GetInt64Metric("machine_processor_device_quarantine_state", tt.desc.Dimensions.AsMetricsTags(), quarantineTag).Get())
+ })
+ }
+}