add autotuning to visualbench

BUG=skia:

Review URL: https://codereview.chromium.org/1201003002
diff --git a/tools/VisualBench.cpp b/tools/VisualBench.cpp
index 60174a4..cbc8994 100644
--- a/tools/VisualBench.cpp
+++ b/tools/VisualBench.cpp
@@ -24,9 +24,15 @@
 
 __SK_FORCE_IMAGE_DECODER_LINKING;
 
+// Between samples we reset context
+// Between frames we swap buffers
+// Between flushes we call flush on GrContext
+
 DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag.");
-DEFINE_int32(samples, 10, "Number of times to render each skp.");
-DEFINE_int32(loops, 5, "Number of times to time.");
+DEFINE_int32(samples, 10, "Number of times to time each skp.");
+DEFINE_int32(frames, 5, "Number of frames of each skp to render per sample.");
+DEFINE_double(flushMs, 20, "Target flush time in millseconds.");
+DEFINE_double(loopMs, 5, "Target loop time in millseconds.");
 DEFINE_int32(msaa, 0, "Number of msaa samples.");
 DEFINE_bool2(fullscreen, f, true, "Run fullscreen.");
 
@@ -41,10 +47,12 @@
 
 VisualBench::VisualBench(void* hwnd, int argc, char** argv)
     : INHERITED(hwnd)
-    , fLoop(0)
     , fCurrentPictureIdx(-1)
     , fCurrentSample(0)
-    , fState(kPreWarm_State) {
+    , fCurrentFrame(0)
+    , fFlushes(1)
+    , fLoops(1)
+    , fState(kPreWarmLoops_State) {
     SkCommandLineFlags::Parse(argc, argv);
 
     // read all the skp file names.
@@ -66,6 +74,9 @@
 
     this->setTitle();
     this->setupBackend();
+
+    // Print header
+    SkDebugf("curr/maxrss\tloops\tflushes\tmin\tmedian\tmean\tmax\tstddev\tbench\n");
 }
 
 VisualBench::~VisualBench() {
@@ -122,8 +133,12 @@
 }
 
 inline void VisualBench::renderFrame(SkCanvas* canvas) {
-    canvas->drawPicture(fPicture);
-    canvas->flush();
+    for (int flush = 0; flush < fFlushes; flush++) {
+        for (int loop = 0; loop < fLoops; loop++) {
+            canvas->drawPicture(fPicture);
+        }
+        canvas->flush();
+    }
     INHERITED::present();
 }
 
@@ -139,9 +154,11 @@
         SkASSERT(measurements.count());
         Stats stats(measurements.begin(), measurements.count());
         const double stdDevPercent = 100 * sqrt(stats.var) / stats.mean;
-        SkDebugf("%4d/%-4dMB\t%s\t%s\t%s\t%s\t%.0f%%\t%s\n",
+        SkDebugf("%4d/%-4dMB\t%d\t%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\n",
                  sk_tools::getCurrResidentSetSizeMB(),
                  sk_tools::getMaxResidentSetSizeMB(),
+                 fLoops,
+                 fFlushes,
                  HUMANIZE(stats.min),
                  HUMANIZE(stats.median),
                  HUMANIZE(stats.mean),
@@ -180,6 +197,17 @@
     return false;
 }
 
+void VisualBench::preWarm(State nextState) {
+    if (fCurrentFrame >= FLAGS_gpuFrameLag) {
+        // we currently time across all frames to make sure we capture all GPU work
+        fState = nextState;
+        fCurrentFrame = 0;
+        fTimer.start();
+    } else {
+        fCurrentFrame++;
+    }
+}
+
 void VisualBench::draw(SkCanvas* canvas) {
     if (!this->advanceRecordIfNecessary()) {
         this->closeWindow();
@@ -187,34 +215,59 @@
     }
     this->renderFrame(canvas);
     switch (fState) {
-        case kPreWarm_State: {
-            if (fCurrentSample >= FLAGS_gpuFrameLag) {
-                // TODO we currently time across all frames to make sure we capture all GPU work
-                // We should also rendering an empty SKP to get a baseline to subtract from
-                // our timing
-                fState = kTiming_State;
-                fCurrentSample -= FLAGS_gpuFrameLag;
-                fTimer.start();
+        case kPreWarmLoops_State: {
+            this->preWarm(kTuneLoops_State);
+            break;
+        }
+        case kTuneLoops_State: {
+            if (1 << 30 == fLoops) {
+                // We're about to wrap.  Something's wrong with the bench.
+                SkDebugf("InnerLoops wrapped\n");
+                fLoops = 0;
             } else {
-                fCurrentSample++;
+                fTimer.end();
+                double elapsed = fTimer.fWall;
+                if (elapsed > FLAGS_loopMs) {
+                    fState = kPreWarmTiming_State;
+
+                    // Scale back the number of loops
+                    fLoops = (int)ceil(fLoops * FLAGS_loopMs / elapsed);
+                    fFlushes = (int)ceil(FLAGS_flushMs / elapsed);
+                } else {
+                    fState = kPreWarmLoops_State;
+                    fLoops *= 2;
+                }
+
+                fCurrentFrame = 0;
+                fTimer = WallTimer();
+                this->resetContext();
             }
             break;
         }
+        case kPreWarmTiming_State: {
+            this->preWarm(kTiming_State);
+            break;
+        }
         case kTiming_State: {
-            if (fCurrentSample >= FLAGS_samples) {
+            if (fCurrentFrame >= FLAGS_frames) {
                 fTimer.end();
-                fRecords[fCurrentPictureIdx].fMeasurements.push_back(fTimer.fWall / FLAGS_samples);
-                this->resetContext();
-                fTimer = WallTimer();
-                fState = kPreWarm_State;
-                fCurrentSample = 0;
-                if (fLoop++ > FLAGS_loops) {
+                fRecords[fCurrentPictureIdx].fMeasurements.push_back(
+                        fTimer.fWall / (FLAGS_frames * fLoops * fFlushes));
+                if (fCurrentSample++ >= FLAGS_samples) {
+                    fState = kPreWarmLoops_State;
                     this->printStats();
                     fPicture.reset(NULL);
-                    fLoop = 0;
+                    fCurrentSample = 0;
+                    fFlushes = 1;
+                    fLoops = 1;
+                } else {
+                    fState = kPreWarmTiming_State;
                 }
+                fTimer = WallTimer();
+                this->resetContext();
+                fCurrentFrame = 0;
             } else {
-                fCurrentSample++;
+                fCurrentFrame++;
             }
             break;
         }
diff --git a/tools/VisualBench.h b/tools/VisualBench.h
index 9d7b946..332fe82 100644
--- a/tools/VisualBench.h
+++ b/tools/VisualBench.h
@@ -54,14 +54,19 @@
     };
 
     enum State {
-        kPreWarm_State,
+        kPreWarmLoops_State,
+        kTuneLoops_State,
+        kPreWarmTiming_State,
         kTiming_State,
     };
+    void preWarm(State nextState);
 
-    int fLoop;
     int fCurrentPictureIdx;
     SkAutoTUnref<SkPicture> fPicture;
     int fCurrentSample;
+    int fCurrentFrame;
+    int fFlushes;
+    int fLoops;
     SkTArray<Record> fRecords;
     WallTimer fTimer;
     State fState;