Modify sample buffer size for larger displays.
Increases the intermediate buffer size for sample pixel indexes,
used in the sample proc function calls. If the operation is bigger
than the buffer it's split into multiple calls, creating overhead.
This would especially impact the performance of SIMD optimizations.
Also, aligns the start address of the buffer to 16 bytes, to enable
more efficient SIMD optimizations.
Author: henrik.smiding@intel.com
Signed-off-by: Henrik Smiding <henrik.smiding@intel.com>
R=reed@google.com, mtklein@google.com, tomhudson@google.com, djsollen@google.com, joakim.landberg@intel.com, scroggo@google.com, bsalomon@chromium.org, bsalomon@google.com
Author: henrik.smiding@intel.com
Review URL: https://codereview.chromium.org/240433002
git-svn-id: http://skia.googlecode.com/svn/trunk@14825 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/expectations/gm/ignored-tests.txt b/expectations/gm/ignored-tests.txt
index c134382..8f39cbd 100644
--- a/expectations/gm/ignored-tests.txt
+++ b/expectations/gm/ignored-tests.txt
@@ -51,4 +51,63 @@
downsamplebitmap_checkerboard_high_512_256
downsamplebitmap_image_high_mandrill_512.png
filterbitmap_checkerboard_192_192
-downsamplebitmap_text_high_72.00pt
\ No newline at end of file
+downsamplebitmap_text_high_72.00pt
+
+# https://codereview.chromium.org/240433002/
+# scroggo will rebaseline this test
+shadertext3_8888
+shadertext3_565
+pictureshader_8888
+pictureshader_565
+giantbitmap_mirror_bilerp_rotate_8888
+giantbitmap_mirror_bilerp_rotate_565
+giantbitmap_repeat_bilerp_rotate_8888
+giantbitmap_repeat_bilerp_rotate_565
+filterbitmap_image_mandrill_512.png_8888
+filterbitmap_image_mandrill_512.png_565
+filterbitmap_image_mandrill_256.png_8888
+filterbitmap_image_mandrill_256.png_565
+filterbitmap_image_mandrill_128.png_8888
+filterbitmap_image_mandrill_128.png_565
+filterbitmap_image_mandrill_64.png_8888
+filterbitmap_image_mandrill_64.png_565
+filterbitmap_image_mandrill_32.png_8888
+filterbitmap_image_mandrill_32.png_565
+filterbitmap_image_mandrill_16.png_8888
+filterbitmap_image_mandrill_16.png_565
+filterbitmap_checkerboard_192_192_8888
+filterbitmap_checkerboard_192_192_565
+filterbitmap_checkerboard_32_2_8888
+filterbitmap_checkerboard_32_2_565
+filterbitmap_checkerboard_32_8_8888
+filterbitmap_checkerboard_32_8_565
+filterbitmap_checkerboard_32_32_8888
+filterbitmap_checkerboard_32_32_565
+filterbitmap_checkerboard_4_4_8888
+filterbitmap_checkerboard_4_4_565
+filterbitmap_text_10.00pt_8888
+filterbitmap_text_10.00pt_565
+filterbitmap_text_7.00pt_8888
+filterbitmap_text_7.00pt_565
+filterbitmap_text_3.00pt_8888
+filterbitmap_text_3.00pt_565
+downsamplebitmap_image_none_mandrill_512.png_8888
+downsamplebitmap_image_none_mandrill_512.png_565
+downsamplebitmap_checkerboard_none_512_256_8888
+downsamplebitmap_checkerboard_none_512_256_565
+downsamplebitmap_text_none_72.00pt_8888
+downsamplebitmap_text_none_72.00pt_565
+downsamplebitmap_image_low_mandrill_512.png_8888
+downsamplebitmap_image_low_mandrill_512.png_565
+downsamplebitmap_checkerboard_low_512_256_8888
+downsamplebitmap_checkerboard_low_512_256_565
+downsamplebitmap_text_low_72.00pt_8888
+downsamplebitmap_text_low_72.00pt_565
+downsamplebitmap_image_medium_mandrill_512.png_8888
+downsamplebitmap_image_medium_mandrill_512.png_565
+downsamplebitmap_checkerboard_medium_512_256_8888
+downsamplebitmap_checkerboard_medium_512_256_565
+downsamplebitmap_text_medium_72.00pt_8888
+downsamplebitmap_text_medium_72.00pt_565
+drawbitmapmatrix_8888
+drawbitmapmatrix_565
diff --git a/include/core/SkPostConfig.h b/include/core/SkPostConfig.h
index 88a2bfc..3253434 100644
--- a/include/core/SkPostConfig.h
+++ b/include/core/SkPostConfig.h
@@ -68,6 +68,16 @@
# endif
#endif
+/**
+ * Used to align stack allocated variables/buffers.
+ * Different compilers have different ways to accomplish this.
+ */
+#if defined(_MSC_VER)
+# define SK_ALIGN(x) __declspec(align(x))
+#else
+# define SK_ALIGN(x) __attribute__((aligned(x)))
+#endif
+
#if !defined(SK_SUPPORT_GPU)
# define SK_SUPPORT_GPU 1
#endif
diff --git a/src/core/SkBitmapProcShader.cpp b/src/core/SkBitmapProcShader.cpp
index 8e25530..ec46d42 100644
--- a/src/core/SkBitmapProcShader.cpp
+++ b/src/core/SkBitmapProcShader.cpp
@@ -183,13 +183,21 @@
fState->~SkBitmapProcState();
}
-#define BUF_MAX 128
+/* Defines the buffer size for sample pixel indexes, used in the sample proc function calls.
+ * If the operation is bigger than the buffer, it's split into multiple calls. This split is bad
+ * for the performance of SIMD optimizations.
+ * A display in portrait mode, with a width of 720 pixels, requires a buffer size of at least 721
+ * to run uninterrupted in the more basic operations.
+ * (Formula: Width + 1 for 'scale/translate with filter' procs.
+ * See description of SkBitmapProcState::maxCountForBufferSize for more information.)
+ */
+#define BUF_MAX 1081
#define TEST_BUFFER_OVERRITEx
#ifdef TEST_BUFFER_OVERRITE
#define TEST_BUFFER_EXTRA 32
- #define TEST_PATTERN 0x88888888
+ #define TEST_PATTERN 0x88888888
#else
#define TEST_BUFFER_EXTRA 0
#endif
@@ -202,7 +210,9 @@
return;
}
- uint32_t buffer[BUF_MAX + TEST_BUFFER_EXTRA];
+ // Align buffer to 16 bytes to enable more efficient SIMD optimizations.
+ uint32_t SK_ALIGN(16) buffer[BUF_MAX + TEST_BUFFER_EXTRA];
+
SkBitmapProcState::MatrixProc mproc = state.getMatrixProc();
SkBitmapProcState::SampleProc32 sproc = state.getSampleProc32();
int max = state.maxCountForBufferSize(sizeof(buffer[0]) * BUF_MAX);
@@ -255,7 +265,9 @@
return;
}
- uint32_t buffer[BUF_MAX];
+ // Align buffer to 16 bytes to enable more efficient SIMD optimizations.
+ uint32_t SK_ALIGN(16) buffer[BUF_MAX];
+
SkBitmapProcState::MatrixProc mproc = state.getMatrixProc();
SkBitmapProcState::SampleProc16 sproc = state.getSampleProc16();
int max = state.maxCountForBufferSize(sizeof(buffer));