[gold] reduce frequency and breadth of backup polling.

Add MinHours as a possibility for ingestion.

This is one part in reducing the QPS hit to GCS/buildbucket.

Every 1 hour, we poll the last 2 hours of commits/data, down from
every 20-60 minutes, polling the last 2 days of data.

PubSub has been pretty reliable, so we shouldn't need to
scan such a wide swath.

Bug: skia:9073
Change-Id: I91ce22757b451a71fe03b93f93d823b537b3364f
Reviewed-on: https://skia-review.googlesource.com/c/buildbot/+/235201
Commit-Queue: Kevin Lubick <kjlubick@google.com>
Reviewed-by: Ravi Mistry <rmistry@google.com>
diff --git a/go/ingestion/ingestion.go b/go/ingestion/ingestion.go
index a188cd1..5538fe4 100644
--- a/go/ingestion/ingestion.go
+++ b/go/ingestion/ingestion.go
@@ -65,11 +65,14 @@
 		return nil, skerr.Fmt("eventBus and ingestionStore cannot be nil")
 	}
 
+	minDuration := time.Duration(ingesterConf.MinDays) * time.Hour * 24
+	minDuration += time.Duration(ingesterConf.MinHours) * time.Hour
+
 	ret := &Ingester{
 		id:                  ingesterID,
 		vcs:                 vcs,
 		nCommits:            ingesterConf.NCommits,
-		minDuration:         time.Duration(ingesterConf.MinDays) * time.Hour * 24,
+		minDuration:         minDuration,
 		runEvery:            ingesterConf.RunEvery.Duration,
 		sources:             sources,
 		processor:           processor,
@@ -85,7 +88,7 @@
 	concurrentProc := make(chan bool, nConcurrentProcessors)
 	resultChan, err := i.getInputChannel(ctx)
 	if err != nil {
-		return sklog.FmtErrorf("Error retrieving input channel: %s", err)
+		return skerr.Wrapf(err, "retrieving input channel")
 	}
 
 	// Continuously catch events from all input sources and push the data to the processor.
@@ -133,7 +136,7 @@
 
 	for _, source := range i.sources {
 		if err := source.SetEventChannel(eventChan); err != nil {
-			return nil, sklog.FmtErrorf("Error setting event channel: %s", err)
+			return nil, skerr.Wrapf(err, "setting event channel for source %v", source)
 		}
 
 		// Watch the source and feed anything not found in the IngestionStore
@@ -147,7 +150,7 @@
 // storage events if the files in the source have not been ingested yet.
 func (i *Ingester) watchSource(source Source) {
 	if i.minDuration == 0 {
-		sklog.Infof("Not going to do polling because minDays = 0")
+		sklog.Infof("Not going to do polling because minDuration == 0")
 		return
 	}
 	sklog.Infof("Watching source %s", source.ID())
diff --git a/go/sharedconfig/sharedconfig.go b/go/sharedconfig/sharedconfig.go
index b6e0e20..410c84c 100644
--- a/go/sharedconfig/sharedconfig.go
+++ b/go/sharedconfig/sharedconfig.go
@@ -12,9 +12,19 @@
 }
 
 type IngesterConfig struct {
-	RunEvery    config.Duration   // How often the ingester should pull data from Google Storage.
-	NCommits    int               // Minimum number of commits that should be ingested.
-	MinDays     int               // Minimum number of days that should be covered by the ingested commits.
+	// As of 2019, the primary way to ingest data is event-driven. That is, when
+	// new files are put into a GCS bucket, PubSub fires an event and that is the
+	// primary way for an ingester to be notified about a file.
+	// The four parameters below configure the manual polling of the source, which
+	// is a backup way to ingest data in the unlikely case that a PubSub event is
+	// dropped (PubSub will try and re-try to send events for up to seven days by default).
+	// If MinDays and MinHours are both 0, polling will not happen.
+	// If MinDays and MinHours are both specified, the two will be added.
+	RunEvery config.Duration // How often the ingester should pull data from Google Storage.
+	NCommits int             // Minimum number of commits that should be ingested.
+	MinDays  int             // Minimum number of days the commits polled should span.
+	MinHours int             // Minimum number of hours the commits polled should span.
+
 	MetricName  string            // What to call this ingester's data when imported to Graphite
 	Sources     []*DataSource     // Input sources where the ingester reads from.
 	ExtraParams map[string]string // Any additional needed parameters (ingester specific)
diff --git a/golden/go/ingestion_processors/tracestore_impl.go b/golden/go/ingestion_processors/tracestore_impl.go
index 7fa316b..37dba7f 100644
--- a/golden/go/ingestion_processors/tracestore_impl.go
+++ b/golden/go/ingestion_processors/tracestore_impl.go
@@ -120,7 +120,7 @@
 		return true, nil
 	}
 
-	if err := b.vcs.Update(ctx, false /*=pull*/, false /*=all branches*/); err != nil {
+	if err := b.vcs.Update(ctx, true /*=pull*/, false /*=all branches*/); err != nil {
 		return false, skerr.Wrapf(err, "could not update VCS")
 	}
 	if i, _ := b.vcs.IndexOf(ctx, hash); i >= 0 {
diff --git a/golden/k8s-config-templates/gold-ingestion-template-bt.yaml b/golden/k8s-config-templates/gold-ingestion-template-bt.yaml
index f9b5061..ead1e5f 100644
--- a/golden/k8s-config-templates/gold-ingestion-template-bt.yaml
+++ b/golden/k8s-config-templates/gold-ingestion-template-bt.yaml
@@ -48,7 +48,6 @@
             - "--git_bt_table={{.GIT_BT_TABLE}}"
             - "--http_port=:9091"
             - "--logtostderr"
-            - "--no_cloud_log"
             - "--prom_port=:20000"
             - "--pubsub_project_id={{.PUBSUB_PROJECT}}"
           ports:
diff --git a/golden/k8s-config-templates/gold-ingestion-template.yaml b/golden/k8s-config-templates/gold-ingestion-template.yaml
deleted file mode 100644
index 2f22af8..0000000
--- a/golden/k8s-config-templates/gold-ingestion-template.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: gold-{{.INSTANCE_ID}}-ingestion-bt
-spec:
-  ports:
-    - name: http
-      port: 9091
-  selector:
-      app: gold-{{.INSTANCE_ID}}-ingestion-bt
-  type: NodePort
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: gold-{{.INSTANCE_ID}}-ingestion-bt
-spec:
-  replicas: 1 # Start with one so you don't make duplicate GCS Pubsub subscriptions
-  selector:
-    matchLabels:
-      app: gold-{{.INSTANCE_ID}}-ingestion-bt
-  strategy:
-    type: RollingUpdate
-  template:
-    metadata:
-      labels:
-        app: gold-{{.INSTANCE_ID}}-ingestion-bt  # Pod template's label selector
-        appgroup: gold-{{.INSTANCE_ID}}
-      annotations:
-        prometheus.io.scrape: "true"
-        prometheus.io.port: "20000"
-    spec:
-      automountServiceAccountToken: false
-      securityContext:
-        runAsUser: 2000 # aka skia
-        fsGroup: 2000   # aka skia
-      containers:
-        - name: gold-{{.INSTANCE_ID}}-ingestion
-          image: {{.INGESTION_IMAGE}}
-          args:
-            - "--bt_project_id={{.BIGTABLE_PROJECT}}"
-            - "--config_filename=/etc/gold-ingestion/gold-{{.INSTANCE_ID}}-ingestion-config-bt.json5"
-            - "--fs_namespace={{.INSTANCE_ID}}"
-            - "--fs_project_id={{.FIRESTORE_PROJECT}}"
-            - "--git_bt_instance={{.BIGTABLE_INSTANCE}}"
-            - "--git_bt_table={{.GIT_BT_TABLE}}"
-            - "--http_port=:9091"
-            - "--logtostderr"
-            - "--prom_port=:20000"
-            - "--pubsub_project_id={{.PUBSUB_PROJECT}}"
-          ports:
-            - containerPort: 9091
-              name: http-health
-          volumeMounts:
-            - name: gold-{{.INSTANCE_ID}}-ingestion-config-volume
-              mountPath: /etc/gold-ingestion
-            - name: gold-service-account-secrets
-              mountPath: /var/secrets/google
-          env:
-            - name: GOOGLE_APPLICATION_CREDENTIALS
-              value: /var/secrets/google/service-account.json
-          resources:
-            requests:
-              memory: "8Gi"
-              cpu: 4
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: 9091
-            initialDelaySeconds: 30
-            periodSeconds: 10
-      volumes:
-        - name: gold-{{.INSTANCE_ID}}-ingestion-config-volume
-          configMap:
-            defaultMode: 420
-            name: gold-{{.INSTANCE_ID}}-ingestion-config-bt
-        - name: gold-service-account-secrets
-          secret:
-            secretName: gold-service-account-secrets
diff --git a/golden/k8s-config-templates/ingest-config-template.json5 b/golden/k8s-config-templates/ingest-config-template.json5
index 4b61c66..02eff9a 100644
--- a/golden/k8s-config-templates/ingest-config-template.json5
+++ b/golden/k8s-config-templates/ingest-config-template.json5
@@ -7,7 +7,7 @@
     "gold-bt": {
       RunEvery: "{{.ING_RUN_EVERY}}",
       NCommits: {{.ING_N_COMMITS}},
-      MinDays: {{.ING_MIN_DAYS}},
+      MinHours: {{.ING_MIN_HOURS}},
       MetricName: "gold-{{.INSTANCE_ID}}-ingestion",
       Sources: [
         {
@@ -26,8 +26,7 @@
     // Gold Tryjob Ingester for gold-{{.INSTANCE_ID}}.
     "gold-tryjob": {
       RunEvery: "{{.ING_TRY_RUN_EVERY}}",
-      NCommits: {{.ING_TRY_N_COMMITS}},
-      MinDays: {{.ING_TRY_MIN_DAYS}},
+      MinHours: {{.ING_TRY_MIN_HOURS}},
       MetricName: "{{.INSTANCE_ID}}-gold-ingest-tryjob",
       Sources: [
         {
diff --git a/golden/k8s-instances/chrome-gpu-instance.json5 b/golden/k8s-instances/chrome-gpu-instance.json5
index d1c8344..e04571d 100644
--- a/golden/k8s-instances/chrome-gpu-instance.json5
+++ b/golden/k8s-instances/chrome-gpu-instance.json5
@@ -30,16 +30,15 @@
     "TILE_FRESHNESS":          "1m",
 
     // Settings for the main ingester.
-    "ING_RUN_EVERY":           "20m",
-    "ING_N_COMMITS":           100,
-    "ING_MIN_DAYS":            1,
+    "ING_RUN_EVERY":           "1h",
+    "ING_N_COMMITS":           5,
+    "ING_MIN_HOURS":           2,
 
     // Settings for the Tryjob ingester. If ING_TRY_ENABLED is false no ingester for
     // tryjobs will be configured.
     "ING_TRY_ENABLED":         true,
-    "ING_TRY_RUN_EVERY":       "20m",
-    "ING_TRY_MIN_DAYS":        2,
-    "ING_TRY_N_COMMITS":       1, // N_COMMITS doesn't really make sense for tryjob scanning
+    "ING_TRY_RUN_EVERY":       "1h",
+    "ING_TRY_MIN_HOURS":        2,
     "ING_TRY_BUILDBUCKET_URL": "https://cr-buildbucket.appspot.com/api/buildbucket/v1/",
     "ING_TRY_BUILDBUCKET":     "chromium/try",
     "ING_TRY_BB_INTERVAL":     "10s",
diff --git a/golden/k8s-instances/chromevr-instance.json5 b/golden/k8s-instances/chromevr-instance.json5
index c4ef3a1..644c2b1 100644
--- a/golden/k8s-instances/chromevr-instance.json5
+++ b/golden/k8s-instances/chromevr-instance.json5
@@ -30,16 +30,15 @@
     "TILE_FRESHNESS":          "1m",
 
     // Settings for the main ingester.
-    "ING_RUN_EVERY":           "20m",
-    "ING_N_COMMITS":           100,
-    "ING_MIN_DAYS":            1,
+    "ING_RUN_EVERY":           "1h",
+    "ING_N_COMMITS":           5,
+    "ING_MIN_HOURS":           2,
 
     // Settings for the Tryjob ingester. If ING_TRY_ENABLED is false no ingester for
     // tryjobs will be configured.
     "ING_TRY_ENABLED":         true,
-    "ING_TRY_RUN_EVERY":       "60m",
-    "ING_TRY_MIN_DAYS":        2,
-    "ING_TRY_N_COMMITS":       1, // N_COMMITS doesn't really make sense for tryjob scanning
+    "ING_TRY_RUN_EVERY":       "1h",
+    "ING_TRY_MIN_HOURS":        2,
     "ING_TRY_BUILDBUCKET_URL": "https://cr-buildbucket.appspot.com/api/buildbucket/v1/",
     "ING_TRY_BUILDBUCKET":     "master.tryserver.chromium.linux",
     "ING_TRY_BB_INTERVAL":     "10s",
diff --git a/golden/k8s-instances/flutter-instance.json5 b/golden/k8s-instances/flutter-instance.json5
index e26a90c..e5238f0 100644
--- a/golden/k8s-instances/flutter-instance.json5
+++ b/golden/k8s-instances/flutter-instance.json5
@@ -30,16 +30,15 @@
     "TILE_FRESHNESS":          "1m",
 
     // Settings for the main ingester.
-    "ING_RUN_EVERY":           "20m",
-    "ING_N_COMMITS":           30,
-    "ING_MIN_DAYS":            1,
+    "ING_RUN_EVERY":           "1h",
+    "ING_N_COMMITS":           5,
+    "ING_MIN_HOURS":           2,
 
     // Settings for the Tryjob ingester. If ING_TRY_ENABLED is false no ingester for
     // tryjobs will be configured.
     "ING_TRY_ENABLED":         false,
-    "ING_TRY_RUN_EVERY":       "5m",
-    "ING_TRY_MIN_DAYS":        20,
-    "ING_TRY_N_COMMITS":       10,
+    "ING_TRY_RUN_EVERY":       "1h",
+    "ING_TRY_MIN_HOURS":       2,
     "ING_TRY_BUILDBUCKET_URL": "",
     "ING_TRY_BUILDBUCKET":     "chromium/try",
     "ING_TRY_BB_INTERVAL":     "10s",
diff --git a/golden/k8s-instances/lottie-instance.json5 b/golden/k8s-instances/lottie-instance.json5
index e7cc354..e46cf00 100644
--- a/golden/k8s-instances/lottie-instance.json5
+++ b/golden/k8s-instances/lottie-instance.json5
@@ -30,16 +30,15 @@
     "TILE_FRESHNESS":          "1m",
 
     // Settings for the main ingester.
-    "ING_RUN_EVERY":           "20m",
-    "ING_N_COMMITS":           30,
-    "ING_MIN_DAYS":            1,
+    "ING_RUN_EVERY":           "1h",
+    "ING_N_COMMITS":           5,
+    "ING_MIN_HOURS":           2,
 
     // Settings for the Tryjob ingester. If ING_TRY_ENABLED is false no ingester for
     // tryjobs will be configured.
     "ING_TRY_ENABLED":         false,
-    "ING_TRY_RUN_EVERY":       "5m",
-    "ING_TRY_MIN_DAYS":        20,
-    "ING_TRY_N_COMMITS":       10,
+    "ING_TRY_RUN_EVERY":       "1h",
+    "ING_TRY_MIN_HOURS":       2,
     "ING_TRY_BUILDBUCKET_URL": "",
     "ING_TRY_BUILDBUCKET":     "",
     "ING_TRY_BB_INTERVAL":     "10s",
diff --git a/golden/k8s-instances/pdfium-instance.json5 b/golden/k8s-instances/pdfium-instance.json5
index 7f8eb15..35f4046 100644
--- a/golden/k8s-instances/pdfium-instance.json5
+++ b/golden/k8s-instances/pdfium-instance.json5
@@ -31,16 +31,15 @@
     "TILE_FRESHNESS":          "1m",
 
     // Settings for the main ingester.
-    "ING_RUN_EVERY":           "20m",
-    "ING_N_COMMITS":           30,
-    "ING_MIN_DAYS":            1,
+    "ING_RUN_EVERY":           "1h",
+    "ING_N_COMMITS":           5,
+    "ING_MIN_HOURS":           2,
 
     // Settings for the Tryjob ingester. If ING_TRY_ENABLED is false no ingester for
     // tryjobs will be configured.
     "ING_TRY_ENABLED":         true,
-    "ING_TRY_RUN_EVERY":       "60m",
-    "ING_TRY_MIN_DAYS":        2,
-    "ING_TRY_N_COMMITS":       1,  // N_COMMITS doesn't really make sense for tryjob scanning
+    "ING_TRY_RUN_EVERY":       "1h",
+    "ING_TRY_MIN_HOURS":       2,
     "ING_TRY_BUILDBUCKET_URL": "https://cr-buildbucket.appspot.com/api/buildbucket/v1/",
     "ING_TRY_BUILDBUCKET":     "master.tryserver.client.pdfium",
     "ING_TRY_BB_INTERVAL":     "10s",
diff --git a/golden/k8s-instances/skia-instance.json5 b/golden/k8s-instances/skia-instance.json5
index 826e907..8607635 100644
--- a/golden/k8s-instances/skia-instance.json5
+++ b/golden/k8s-instances/skia-instance.json5
@@ -30,16 +30,15 @@
     "TILE_FRESHNESS":          "5m",
 
     // Settings for the main ingester.
-    "ING_RUN_EVERY":           "20m",
-    "ING_N_COMMITS":           30,
-    "ING_MIN_DAYS":            1,
+    "ING_RUN_EVERY":           "1h",
+    "ING_N_COMMITS":           5,
+    "ING_MIN_HOURS":           2,
 
     // Settings for the Tryjob ingester. If ING_TRY_ENABLED is false no ingester for
     // tryjobs will be configured.
     "ING_TRY_ENABLED":         true,
-    "ING_TRY_RUN_EVERY":       "20m",
-    "ING_TRY_MIN_DAYS":        2,
-    "ING_TRY_N_COMMITS":       1,  // N_COMMITS doesn't really make sense for tryjob scanning
+    "ING_TRY_RUN_EVERY":       "1h",
+    "ING_TRY_MIN_HOURS":       2,
     "ING_TRY_BUILDBUCKET_URL": "https://cr-buildbucket.appspot.com/api/buildbucket/v1/",
     "ING_TRY_BUILDBUCKET":     "skia.primary",
     "ING_TRY_BB_INTERVAL":     "10s",