[libpng15] Optimized png_combine_row() when rows are aligned This gains a

small percentage for 16-bit and 32-bit pixels in the typical case where the
output row buffers are appropriately aligned. The optimization was not
previously possible because the png_struct buffer was always misaligned.
diff --git a/ANNOUNCE b/ANNOUNCE
index 8871e17..04c2d59 100644
--- a/ANNOUNCE
+++ b/ANNOUNCE
@@ -64,6 +64,10 @@
     a bug in the code that attempted to align it; the code needs to subtract
     one from the pointer to take account of the filter byte prepended to
     each row.
+  Optimized png_combine_row() when rows are aligned This gains a small
+    percentage for 16-bit and 32-bit pixels in the typical case where the
+    output row buffers are appropriately aligned. The optimization was not
+    previously possible because the png_struct buffer was always misaligned.
 
 Send comments/corrections/commendations to png-mng-implement at lists.sf.net:
 (subscription required; visit
diff --git a/CHANGES b/CHANGES
index 066fb43..549346a 100644
--- a/CHANGES
+++ b/CHANGES
@@ -3625,6 +3625,10 @@
     a bug in the code that attempted to align it; the code needs to subtract
     one from the pointer to take account of the filter byte prepended to
     each row.
+  Optimized png_combine_row() when rows are aligned This gains a small
+    percentage for 16-bit and 32-bit pixels in the typical case where the
+    output row buffers are appropriately aligned. The optimization was not
+    previously possible because the png_struct buffer was always misaligned.
 
 Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 (subscription required; visit
diff --git a/pngrutil.c b/pngrutil.c
index ee06ed6..ba29886 100644
--- a/pngrutil.c
+++ b/pngrutil.c
@@ -2931,12 +2931,13 @@
 
             /* This is a common optimization for 2 and 4 byte pixels, for other
              * values rely on the toolchain memcpy being optimized.
-             *
-             * TBD: this should use png_isaligned, but currently something isn't
-             * aligned (NOTE: to be investigated in a really serious fashion.)
              */
-            else if (pixel_depth == 2)
+            else if (pixel_depth == sizeof (png_uint_16) &&
+               png_isaligned(sp, png_uint_16) && png_isaligned(dp, png_uint_16))
             {
+               png_uint_16p dp16 = (png_uint_16p)dp;
+               png_uint_16p sp16 = (png_uint_16p)sp;
+
                do
                {
                   m >>= 1;
@@ -2945,16 +2946,20 @@
                      m = mask;
 
                   if (m & 1)
-                     dp[0] = sp[0], dp[1] = sp[1];
+                     *dp16 = *sp16;
 
-                  dp += 2;
-                  sp += 2;
+                  ++dp16;
+                  ++sp16;
                }
                while (--row_width > 0);
             }
 
-            else if (pixel_depth == 4) /* as above, not optimal */
+            else if (pixel_depth == sizeof (png_uint_32) &&
+               png_isaligned(sp, png_uint_32) && png_isaligned(dp, png_uint_32))
             {
+               png_uint_32p dp32 = (png_uint_32p)dp;
+               png_uint_32p sp32 = (png_uint_32p)sp;
+
                do
                {
                   m >>= 1;
@@ -2963,10 +2968,10 @@
                      m = mask;
 
                   if (m & 1)
-                     dp[0] = sp[0], dp[1] = sp[1], dp[2] = sp[2], dp[3] = sp[3];
+                     *dp32 = *sp32;
 
-                  dp += 4;
-                  sp += 4;
+                  ++dp32;
+                  ++sp32;
                }
                while (--row_width > 0);
             }