Improve performance of highQualityFilter

Move loop invariant caculation out of loop. This gives about 15% speedup
for bitmap_BGRA_8888_A_scale_rotate_bicubic on my desktop i7-3770.

BUG=skia:
R=humper@google.com, mtklein@google.com, reed@google.com

Author: qiankun.miao@intel.com

Review URL: https://codereview.chromium.org/502953004
diff --git a/src/core/SkBitmapFilter.cpp b/src/core/SkBitmapFilter.cpp
index a9c3223..20a0514 100644
--- a/src/core/SkBitmapFilter.cpp
+++ b/src/core/SkBitmapFilter.cpp
@@ -28,6 +28,7 @@
 void highQualityFilter(ColorPacker pack, const SkBitmapProcState& s, int x, int y, Color* SK_RESTRICT colors, int count) {
     const int maxX = s.fBitmap->width();
     const int maxY = s.fBitmap->height();
+    SkAutoTMalloc<SkScalar> xWeights(maxX);
 
     while (count-- > 0) {
         SkPoint srcPt;
@@ -44,11 +45,16 @@
         int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
         int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX);
 
+        for (int srcX = x0; srcX < x1 ; srcX++) {
+            // Looking these up once instead of each loop is a ~15% speedup.
+            xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
+        }
+
         for (int srcY = y0; srcY < y1; srcY++) {
             SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));
 
             for (int srcX = x0; srcX < x1 ; srcX++) {
-                SkScalar xWeight = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
+                SkScalar xWeight = xWeights[srcX - x0];
 
                 SkScalar combined_weight = SkScalarMul(xWeight, yWeight);