[libpng15] Optimized png_combine_row() when rows are aligned This gains a
small percentage for 16-bit and 32-bit pixels in the typical case where the
output row buffers are appropriately aligned. The optimization was not
previously possible because the png_struct buffer was always misaligned.
diff --git a/ANNOUNCE b/ANNOUNCE
index 8871e17..04c2d59 100644
--- a/ANNOUNCE
+++ b/ANNOUNCE
@@ -64,6 +64,10 @@
a bug in the code that attempted to align it; the code needs to subtract
one from the pointer to take account of the filter byte prepended to
each row.
+ Optimized png_combine_row() when rows are aligned This gains a small
+ percentage for 16-bit and 32-bit pixels in the typical case where the
+ output row buffers are appropriately aligned. The optimization was not
+ previously possible because the png_struct buffer was always misaligned.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net:
(subscription required; visit
diff --git a/CHANGES b/CHANGES
index 066fb43..549346a 100644
--- a/CHANGES
+++ b/CHANGES
@@ -3625,6 +3625,10 @@
a bug in the code that attempted to align it; the code needs to subtract
one from the pointer to take account of the filter byte prepended to
each row.
+ Optimized png_combine_row() when rows are aligned This gains a small
+ percentage for 16-bit and 32-bit pixels in the typical case where the
+ output row buffers are appropriately aligned. The optimization was not
+ previously possible because the png_struct buffer was always misaligned.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit
diff --git a/pngrutil.c b/pngrutil.c
index ee06ed6..ba29886 100644
--- a/pngrutil.c
+++ b/pngrutil.c
@@ -2931,12 +2931,13 @@
/* This is a common optimization for 2 and 4 byte pixels, for other
* values rely on the toolchain memcpy being optimized.
- *
- * TBD: this should use png_isaligned, but currently something isn't
- * aligned (NOTE: to be investigated in a really serious fashion.)
*/
- else if (pixel_depth == 2)
+ else if (pixel_depth == sizeof (png_uint_16) &&
+ png_isaligned(sp, png_uint_16) && png_isaligned(dp, png_uint_16))
{
+ png_uint_16p dp16 = (png_uint_16p)dp;
+ png_uint_16p sp16 = (png_uint_16p)sp;
+
do
{
m >>= 1;
@@ -2945,16 +2946,20 @@
m = mask;
if (m & 1)
- dp[0] = sp[0], dp[1] = sp[1];
+ *dp16 = *sp16;
- dp += 2;
- sp += 2;
+ ++dp16;
+ ++sp16;
}
while (--row_width > 0);
}
- else if (pixel_depth == 4) /* as above, not optimal */
+ else if (pixel_depth == sizeof (png_uint_32) &&
+ png_isaligned(sp, png_uint_32) && png_isaligned(dp, png_uint_32))
{
+ png_uint_32p dp32 = (png_uint_32p)dp;
+ png_uint_32p sp32 = (png_uint_32p)sp;
+
do
{
m >>= 1;
@@ -2963,10 +2968,10 @@
m = mask;
if (m & 1)
- dp[0] = sp[0], dp[1] = sp[1], dp[2] = sp[2], dp[3] = sp[3];
+ *dp32 = *sp32;
- dp += 4;
- sp += 4;
+ ++dp32;
+ ++sp32;
}
while (--row_width > 0);
}