Add --namespace_allow_filter flag to k8s-checker.

Bug: skia:13597
Change-Id: I4d212136d1ddb5da4d1da1d06aed5403158c2b57
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/566302
Reviewed-by: Eric Boren <borenet@google.com>
Commit-Queue: Joe Gregorio <jcgregorio@google.com>
diff --git a/k8s-checker/go/k8s-checker/BUILD.bazel b/k8s-checker/go/k8s-checker/BUILD.bazel
index cb9283c..50f89d8 100644
--- a/k8s-checker/go/k8s-checker/BUILD.bazel
+++ b/k8s-checker/go/k8s-checker/BUILD.bazel
@@ -1,3 +1,4 @@
+load("//bazel/go:go_test.bzl", "go_test")
 load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
 
 go_library(
@@ -30,3 +31,13 @@
     embed = [":k8s-checker_lib"],
     visibility = ["//visibility:public"],
 )
+
+go_test(
+    name = "k8s-checker_test",
+    srcs = ["main_test.go"],
+    embed = [":k8s-checker_lib"],
+    deps = [
+        "//go/testutils/unittest",
+        "@com_github_stretchr_testify//require",
+    ],
+)
diff --git a/k8s-checker/go/k8s-checker/main.go b/k8s-checker/go/k8s-checker/main.go
index 941d7f7..b1d9a73 100644
--- a/k8s-checker/go/k8s-checker/main.go
+++ b/k8s-checker/go/k8s-checker/main.go
@@ -58,6 +58,9 @@
 // "gcr.io/${PROJECT}/${APPNAME}:${DATETIME}-${USER}-${HASH:0:7}-${REPO_STATE}" (from bash/docker_build.sh).
 var imageRegex = regexp.MustCompile(`^.+:(.+)-.+-.+-.+$`)
 
+// allowedAppsInNamespace maps a namespace to a list of allowed applications in that namespace.
+type allowedAppsInNamespace map[string][]string
+
 func main() {
 	// Flags.
 	dirtyConfigChecksPeriod := flag.Duration("dirty_config_checks_period", 2*time.Minute, "How often to check for dirty configs/images in K8s.")
@@ -65,11 +68,17 @@
 	cluster := flag.String("cluster", "skia-public", "The k8s cluster name.")
 	promPort := flag.String("prom_port", ":20000", "Metrics service address (e.g., ':20000')")
 	ignoreNamespaces := common.NewMultiStringFlag("ignore_namespace", nil, "Namespaces to ignore.")
+	namespaceAllowFilter := common.NewMultiStringFlag("namespace_allow_filter", nil, "app names to ignore in a namespace. A namespace name, colon, list of comma separated app names. Ex: gmp-system:rule-evaluator,collector")
 
 	common.InitWithMust("k8s_checker", common.PrometheusOpt(promPort))
 	defer sklog.Flush()
 	ctx := context.Background()
 
+	allowedAppsByNamespace, err := parseNamespaceAllowFilterFlag(*namespaceAllowFilter)
+	if err != nil {
+		sklog.Fatal("Failed to parse flag --namespace_allow_filter %s: %s", *namespaceAllowFilter, err)
+	}
+
 	clusterConfig, err := clusterconfig.New(*configFile)
 	if err != nil {
 		sklog.Fatalf("Failed to load cluster config: %s", err)
@@ -100,7 +109,7 @@
 	liveness := metrics2.NewLiveness(livenessMetric)
 	oldMetrics := map[metrics2.Int64Metric]struct{}{}
 	go util.RepeatCtx(ctx, *dirtyConfigChecksPeriod, func(ctx context.Context) {
-		newMetrics, err := performChecks(ctx, *cluster, clusterConfig.Repo, clientset, *ignoreNamespaces, gitiles.NewRepo(clusterConfig.Repo, httpClient), oldMetrics)
+		newMetrics, err := performChecks(ctx, *cluster, clusterConfig.Repo, clientset, *ignoreNamespaces, gitiles.NewRepo(clusterConfig.Repo, httpClient), oldMetrics, allowedAppsByNamespace)
 		if err != nil {
 			sklog.Errorf("Error when checking for dirty configs: %s", err)
 		} else {
@@ -112,6 +121,22 @@
 	select {}
 }
 
+func parseNamespaceAllowFilterFlag(namespaceAllowFilter []string) (allowedAppsInNamespace, error) {
+	ret := allowedAppsInNamespace{}
+
+	for _, filter := range namespaceAllowFilter {
+		parts := strings.SplitN(filter, ":", 2)
+		if len(parts) != 2 {
+			return nil, skerr.Fmt("Missing colon in: %q", filter)
+		}
+		ns := fixupNamespace(parts[0])
+		apps := strings.Split(parts[1], ",")
+		ret[ns] = apps
+	}
+
+	return ret, nil
+}
+
 // fixupNamespace sets the namespace to the default, if necessary.
 func fixupNamespace(namespace string) string {
 	if namespace == "" {
@@ -255,7 +280,7 @@
 // change. Eg: liveImage in dirtyConfigMetricTags.
 // It returns a map of newMetrics, which are all the metrics that were used during this
 // invocation of the function.
-func performChecks(ctx context.Context, cluster, repo string, clientset *kubernetes.Clientset, ignoreNamespaces []string, g *gitiles.Repo, oldMetrics map[metrics2.Int64Metric]struct{}) (map[metrics2.Int64Metric]struct{}, error) {
+func performChecks(ctx context.Context, cluster, repo string, clientset *kubernetes.Clientset, ignoreNamespaces []string, g *gitiles.Repo, oldMetrics map[metrics2.Int64Metric]struct{}, allowedAppsByNamespace allowedAppsInNamespace) (map[metrics2.Int64Metric]struct{}, error) {
 	sklog.Info("---------- New round of checking k8s ----------")
 	newMetrics := map[metrics2.Int64Metric]struct{}{}
 
@@ -470,11 +495,12 @@
 	// Find out which apps and containers are live but not found in git repo.
 	for namespace, liveAppContainerToImages := range liveAppContainerToImagesByNamespace {
 		for liveApp := range liveAppContainerToImages {
+			ns := fixupNamespace(namespace)
 			runningAppHasConfigMetricTags := map[string]string{
 				"app":       liveApp,
 				"repo":      repo,
 				"cluster":   cluster,
-				"namespace": fixupNamespace(namespace),
+				"namespace": ns,
 			}
 			runningAppHasConfigMetric := metrics2.GetInt64Metric(runningAppHasConfigMetric, runningAppHasConfigMetricTags)
 			newMetrics[runningAppHasConfigMetric] = struct{}{}
@@ -487,7 +513,7 @@
 						"container": liveContainer,
 						"repo":      repo,
 						"cluster":   cluster,
-						"namespace": fixupNamespace(namespace),
+						"namespace": ns,
 					}
 					runningContainerHasConfigMetric := metrics2.GetInt64Metric(runningContainerHasConfigMetric, runningContainerHasConfigMetricTags)
 					newMetrics[runningContainerHasConfigMetric] = struct{}{}
@@ -498,6 +524,9 @@
 						runningContainerHasConfigMetric.Update(0)
 					}
 				}
+			} else if util.In(liveApp, allowedAppsByNamespace[ns]) {
+				sklog.Infof("The running app %s is allowed in namespace %s in repo %s", liveApp, ns, repo)
+				runningAppHasConfigMetric.Update(1)
 			} else {
 				sklog.Infof("The running app %s is not checked into %s", liveApp, repo)
 				runningAppHasConfigMetric.Update(0)
diff --git a/k8s-checker/go/k8s-checker/main_test.go b/k8s-checker/go/k8s-checker/main_test.go
new file mode 100644
index 0000000..74eaf49
--- /dev/null
+++ b/k8s-checker/go/k8s-checker/main_test.go
@@ -0,0 +1,33 @@
+// k8s_checker is an application that checks for the following and alerts if necessary:
+// * Dirty images checked into K8s config files.
+// * Dirty configs running in K8s.
+package main
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"go.skia.org/infra/go/testutils/unittest"
+)
+
+func TestParseNamespaceAllowFilterFlag_MalFormed_ReturnsError(t *testing.T) {
+	unittest.SmallTest(t)
+	_, err := parseNamespaceAllowFilterFlag([]string{
+		"this flag value contains no colon",
+	})
+	require.Error(t, err)
+}
+
+func TestParseNamespaceAllowFilterFlag_HappyPath(t *testing.T) {
+	unittest.SmallTest(t)
+	actual, err := parseNamespaceAllowFilterFlag([]string{
+		"gmp-system:rule-evaluator,collector",
+		"kube-system:calico-node,calico-typha,fluentbit,gke-metadata-server,ip-masq-agent,kube-dns",
+	})
+	require.NoError(t, err)
+	expected := allowedAppsInNamespace{
+		"gmp-system":  []string{"rule-evaluator", "collector"},
+		"kube-system": []string{"calico-node", "calico-typha", "fluentbit", "gke-metadata-server", "ip-masq-agent", "kube-dns"},
+	}
+	require.Equal(t, expected, actual)
+}