[sfnt] Fix crash in `Load_SBit_Png` on Windows x64.

This change fixes a crash that occurs in `Load_SBit_Png` when
running on a 64-bit Windows OS.  A memory access violation exception
would be raised by `setjmp` if the `jmp_buf` is not aligned to a
16-byte memory boundary.  This is due to setjmp executing `movdqa`
instructions to store 128-bit XMM registers to memory, which require
correct memory alignment.  This problem occurs because
`png_create_read_struct` uses `malloc` and `free` for memory
management, which only guarantees 8-byte alignment on Windows.

Instead, to fix the problem, `png_create_read_struct_2` is used on
64-bit Windows, which allows for user-defined memory allocation and
deallocation callbacks to be specified.  These callbacks forward the
allocation and deallocation requests to `_aligned_alloc` and
`_aligned_free`, ensuring that the allocated `png_struct` and
internal `jmp_buf` have the requisite 16-byte alignment.

* src/sfnt/pngshim.c <_WIN64>: Include `malloc.h`.
(malloc_callback, free_callback) <_WIN64>: New functions.
(Load_SBit_Png) <_WIN64>: Use `png_create_read_struct_2` instead of
`png_create_read_struct`
diff --git a/ChangeLog b/ChangeLog
index 7945b1e..3c522d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2021-02-27  Jesse Towner  <townerj@gmail.com>
+
+	[sfnt] Fix crash in `Load_SBit_Png` on Windows x64.
+
+	This change fixes a crash that occurs in `Load_SBit_Png` when
+	running on a 64-bit Windows OS.  A memory access violation exception
+	would be raised by `setjmp` if the `jmp_buf` is not aligned to a
+	16-byte memory boundary.  This is due to setjmp executing `movdqa`
+	instructions to store 128-bit XMM registers to memory, which require
+	correct memory alignment.  This problem occurs because
+	`png_create_read_struct` uses `malloc` and `free` for memory
+	management, which only guarantees 8-byte alignment on Windows.
+
+	Instead, to fix the problem, `png_create_read_struct_2` is used on
+	64-bit Windows, which allows for user-defined memory allocation and
+	deallocation callbacks to be specified.  These callbacks forward the
+	allocation and deallocation requests to `_aligned_alloc` and
+	`_aligned_free`, ensuring that the allocated `png_struct` and
+	internal `jmp_buf` have the requisite 16-byte alignment.
+
+	* src/sfnt/pngshim.c <_WIN64>: Include `malloc.h`.
+	(malloc_callback, free_callback) <_WIN64>: New functions.
+	(Load_SBit_Png) <_WIN64>: Use `png_create_read_struct_2` instead of
+	`png_create_read_struct`
+
 2021-02-25  Werner Lemberg  <wl@gnu.org>
 
 	[woff2] Fix memory leak.
diff --git a/src/sfnt/pngshim.c b/src/sfnt/pngshim.c
index c7a2938..2973b9a 100644
--- a/src/sfnt/pngshim.c
+++ b/src/sfnt/pngshim.c
@@ -33,6 +33,16 @@
 
 #include "sferrors.h"
 
+  /* Use _aligned_malloc / _aligned_free on 64-bit Windows to ensure that */
+  /* the jmp_buf needed for ft_setjmp is aligned to a 16-byte boundary.   */
+  /* If the jmp_buf is not aligned to a 16-byte boundary then a memory    */
+  /* access violation exception will occur upon ft_setjmp being called.   */
+#ifdef _WIN64
+#ifndef PNG_USER_MEM_SUPPORTED
+#error "libpng user-defined memory allocation is required for 64-bit Windows"
+#endif
+#include <malloc.h>
+#endif
 
   /* This code is freely based on cairo-png.c.  There's so many ways */
   /* to call libpng, and the way cairo does it is defacto standard.  */
@@ -221,6 +231,32 @@
   }
 
 
+#ifdef _WIN64
+
+  /* Memory allocation callback to ensure that the jmp_buf that is stored */
+  /* within the png_struct has 16-byte alignment for 64-bit Windows.      */
+  static png_voidp
+  malloc_callback( png_structp       png,
+                   png_alloc_size_t  size )
+  {
+    FT_UNUSED( png );
+    return _aligned_malloc( size, 16 );
+  }
+
+
+  /* Memory deallocation callback to release memory that was allocated */
+  /* with the matching memory allocation callback above.               */
+  static void
+  free_callback( png_structp  png,
+                 png_voidp    ptr )
+  {
+    FT_UNUSED( png );
+    _aligned_free( ptr );
+  }
+
+#endif /* _WIN64 */
+
+
   static void
   read_data_from_FT_Stream( png_structp  png,
                             png_bytep    data,
@@ -292,10 +328,20 @@
 
     FT_Stream_OpenMemory( &stream, data, png_len );
 
+#ifdef _WIN64
+    png = png_create_read_struct_2( PNG_LIBPNG_VER_STRING,
+                                    &error,
+                                    error_callback,
+                                    warning_callback,
+                                    NULL,
+                                    malloc_callback,
+                                    free_callback );
+#else
     png = png_create_read_struct( PNG_LIBPNG_VER_STRING,
                                   &error,
                                   error_callback,
                                   warning_callback );
+#endif
     if ( !png )
     {
       error = FT_THROW( Out_Of_Memory );