[task scheduler] Add metrics for free bot count.

Change-Id: Ib73ac2c65eb6edd5e0abc8d2975d2c71eaa239a3
Reviewed-on: https://skia-review.googlesource.com/141266
Reviewed-by: Eric Boren <borenet@google.com>
Commit-Queue: Ben Wagner <benjaminwagner@google.com>
diff --git a/task_scheduler/go/scheduling/busy_bots.go b/task_scheduler/go/scheduling/busy_bots.go
index 129cd2f..351eac3 100644
--- a/task_scheduler/go/scheduling/busy_bots.go
+++ b/task_scheduler/go/scheduling/busy_bots.go
@@ -1,29 +1,118 @@
 package scheduling
 
 import (
+	"sort"
+	"strings"
 	"sync"
 
+	"go.skia.org/infra/go/metrics2"
 	"go.skia.org/infra/go/swarming"
 	"go.skia.org/infra/go/trie"
+	"go.skia.org/infra/go/util"
 	"go.skia.org/infra/task_scheduler/go/db"
 
 	swarming_api "go.chromium.org/luci/common/api/swarming/swarming/v1"
 )
 
 const (
-	MEASUREMENT_BUSY_BOTS = "busy-bots"
+	// Metric name for free bots.
+	MEASUREMENT_FREE_BOT_COUNT = "free_bot_count"
+
+	// FILTER_* are used as the value of the "filter" key in metrics; we record counts for all free
+	// bots and all free bots after allocating pending tasks to bots.
+	FILTER_ALL_FREE_BOTS       = "all_free_bots"
+	FILTER_MINUS_PENDING_TASKS = "minus_pending_tasks"
 )
 
+var (
+	// dimensionWhitelist includes all dimensions used in
+	// https://skia.googlesource.com/skia/+/42974b73cd6f3515af69c553aac8dd15e3fc1927/infra/bots/gen_tasks.go
+	// (except for "image" which has a TODO to remove).
+	dimensionWhitelist = []string{
+		"cpu",
+		"device",
+		"device_os",
+		"device_type",
+		"gpu",
+		"machine_type",
+		"os",
+		"release_version",
+		"valgrind",
+	}
+)
+
+func init() {
+	sort.Strings(dimensionWhitelist)
+}
+
 // busyBots is a struct used for marking a bot as busy while it runs a Task.
 type busyBots struct {
-	pendingTasks *trie.Trie
-	mtx          sync.Mutex
+	// map[<filter>]map[<dimensionsString>]<count of bots>
+	freeBotMetrics map[string]map[string]metrics2.Int64Metric
+	pendingTasks   *trie.Trie
+	mtx            sync.Mutex
 }
 
 // newBusyBots returns a busyBots instance.
 func newBusyBots() *busyBots {
 	return &busyBots{
-		pendingTasks: trie.New(),
+		freeBotMetrics: map[string]map[string]metrics2.Int64Metric{},
+		pendingTasks:   trie.New(),
+	}
+}
+
+// Return a space-separated string of sorted dimensions and values, filtered by dimensionWhitelist.
+// Similar to flatten in task_scheduler.go. When there are multiple values for a dimension, the
+// longest is used. (The longest value is usually the most interesting.)
+func dimensionsString(dims []*swarming_api.SwarmingRpcsStringListPair) string {
+	vals := make(map[string]string, len(dimensionWhitelist))
+	for _, dim := range dims {
+		if util.In(dim.Key, dimensionWhitelist) {
+			for _, val := range dim.Value {
+				if len(val) > len(vals[dim.Key]) {
+					vals[dim.Key] = val
+				}
+			}
+		}
+	}
+	rv := make([]string, 0, 2*len(vals))
+	for _, key := range dimensionWhitelist {
+		if vals[key] != "" {
+			rv = append(rv, key, vals[key])
+		}
+	}
+	return strings.Join(rv, " ")
+}
+
+// recordBotMetrics updates MEASUREMENT_FREE_BOT_COUNT for the given filter based on bots. Assumes
+// b.mtx is locked.
+func (b *busyBots) recordBotMetrics(filter string, bots []*swarming_api.SwarmingRpcsBotInfo) {
+	metrics, ok := b.freeBotMetrics[filter]
+	if !ok {
+		metrics = map[string]metrics2.Int64Metric{}
+		b.freeBotMetrics[filter] = metrics
+	}
+	counts := map[string]int64{}
+	for _, bot := range bots {
+		counts[dimensionsString(bot.Dimensions)]++
+	}
+	for dims, count := range counts {
+		metric, ok := metrics[dims]
+		if !ok {
+			metric = metrics2.GetInt64Metric(MEASUREMENT_FREE_BOT_COUNT, map[string]string{
+				"filter":     filter,
+				"dimensions": dims,
+			})
+			metrics[dims] = metric
+		}
+		metric.Update(count)
+	}
+	for dims, metric := range metrics {
+		_, ok := counts[dims]
+		if !ok {
+			metric.Update(0)
+			delete(metrics, dims)
+		}
 	}
 }
 
@@ -31,6 +120,7 @@
 func (b *busyBots) Filter(bots []*swarming_api.SwarmingRpcsBotInfo) []*swarming_api.SwarmingRpcsBotInfo {
 	b.mtx.Lock()
 	defer b.mtx.Unlock()
+	b.recordBotMetrics(FILTER_ALL_FREE_BOTS, bots)
 	matched := make(map[string]bool, len(bots))
 	rv := make([]*swarming_api.SwarmingRpcsBotInfo, 0, len(bots))
 	for _, bot := range bots {
@@ -52,6 +142,7 @@
 			rv = append(rv, bot)
 		}
 	}
+	b.recordBotMetrics(FILTER_MINUS_PENDING_TASKS, bots)
 	return rv
 }