Imported from pngcrush-1.3.0.tar
diff --git a/INSTALL b/INSTALL
index 021dd38..0c09107 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,17 +1,20 @@
 
-There's no makefile or "configure" for pngcrush.
+There's a sample makefile.gcc for pngcrush, which you can use
+by typing
 
-All you should need to do is enter the pngcrush-1.2.l
+    make -f makefile.gcc
+
+However, all you should need to do is enter the pngcrush-1.3.0
 directory and type
 
     cc -O -o pngcrush *.c -lm
     cp pngcrush /usr/local/bin  # or wherever you want
 
 You might want to create a makefile if you are planning to do
-something more complicated, like loading with your own shared
+something more complicated, like loading with your system's shared
 libraries for libpng and zlib.
 
-Here's the command for compiling on SGI IRIX:
+Here's a command for compiling on SGI IRIX:
 
     cc -n32 -fullwarn -O2 -IPA:plimit=256 -OPT:Olimit=0 -o pngcrush *.c -lm
     cp pngcrush /usr/local/bin
@@ -20,4 +23,5 @@
 
     gcc -O3 -Wall -funroll-loops -o pngcrush *.c
     copy /B pmodstub.exe + pngcrush pngcrush.exe
-    then put pngcrush.exe wherever you want.
+
+then put pngcrush.exe wherever you want.
diff --git a/README.txt b/README.txt
index 534ade4..94dcdf2 100644
--- a/README.txt
+++ b/README.txt
@@ -1,8 +1,8 @@
 
- | pngcrush 1.2.1, Copyright (C) 1998, 1999, Glenn Randers-Pehrson
+ | pngcrush 1.3.0, Copyright (C) 1998, 1999, Glenn Randers-Pehrson
  | This is a free, open-source program.  Permission is
  | granted to everyone to use pngcrush without fee.
- | This program was built with libpng version 1.0.5f,
+ | This program was built with libpng version 1.0.5j,
  |    Copyright (C) 1995, Guy Eric Schalnat, Group 42 Inc.,
  |    Copyright (C) 1996, 1997 Andreas Dilger,
  |    Copyright (C) 1998, 1999, Glenn Randers-Pehrson,
@@ -35,10 +35,25 @@
          -text b[efore_IDAT]|a[fter_IDAT] "keyword" "text"
          -trns index red green blue gray
       -verbose (write more detailed information)
-            -w compression_window_size [32, 16, 8, 4, 2, 1, 512, 256]
+            -w compression_window_size [32, 16, 8, 4, 2, 1, 512]
             -h (help)
             -p (pause)
 
+ | pngcrush 1.3.0, Copyright (C) 1998, 1999, Glenn Randers-Pehrson
+ | This is a free, open-source program.  Permission is
+ | granted to everyone to use pngcrush without fee.
+ | This program was built with libpng version 1.0.5j,
+ |    Copyright (C) 1995, Guy Eric Schalnat, Group 42 Inc.,
+ |    Copyright (C) 1996, 1997 Andreas Dilger,
+ |    Copyright (C) 1998, 1999, Glenn Randers-Pehrson,
+ | and zlib version 1.1.3, Copyright (c) 1998,
+ |    Jean-loup Gailly and Mark Adler.
+
+
+usage: pngcrush [options] infile.png outfile.png
+       pngcrush -e ext [other options] files.png ...
+       pngcrush -d dir [other options] files.png ...
+
 options:
         -brute (Use brute-force, try 114 different methods)
 
@@ -78,7 +93,7 @@
             -f user_filter [0-5]
 
                filter to use with the method specified in the
-               preceding '-m method' argument.
+               preceding '-m method' or '-brute_force' argument.
                0: none; 1-4: use specified filter; 5: adaptive.
 
         -force (Write a new output file even if larger than input)
@@ -96,7 +111,8 @@
             -l zlib_compression_level [0-9]
 
                zlib compression level to use with method specified
-               with the preceding '-m method' argument.
+               with the preceding '-m method' or '-brute_force'
+               argument.
 
             -m method [0 through 200]
 
@@ -165,10 +181,10 @@
 
                Repeat the option (use "-v -v") for even more.
 
-            -w compression_window_size [32, 16, 8, 4, 2, 1, 512, 256]
+            -w compression_window_size [32, 16, 8, 4, 2, 1, 512]
 
                Size of the sliding compression window, in kbytes
-               (or bytes, in case of 512 or 256).  It's best to
+               (or bytes, in case of 512).  It's best to
                use the default (32) unless you run out of memory.
                The program will use a smaller window anyway when
                the uncompressed file is smaller than 16k.
diff --git a/deflate.c b/deflate.c
index d6e262b..8a0d646 100644
--- a/deflate.c
+++ b/deflate.c
@@ -101,7 +101,7 @@
 /* Tail of hash chains */
 
 #ifndef TOO_FAR
-#  define TOO_FAR 32767   /* changed from 4096 for pngcrush */
+#  define TOO_FAR 4096
 #endif
 /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
 
@@ -261,8 +261,10 @@
     s->hash_mask = s->hash_size - 1;
     s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
 
-    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
-    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
+    s->window = (Bytef *) ZALLOC(strm, (windowBits == 8 ? 264 : s->w_size),
+        2*sizeof(Byte));
+    s->prev   = (Posf *)  ZALLOC(strm, (windowBits == 8 ? 264 : s->w_size),
+        sizeof(Pos));
     s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
 
     s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
@@ -1277,12 +1279,9 @@
             }
             /* longest_match() sets match_start */
 
-            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
-#if (TOO_FAR <= 32767)
-                 || (s->match_length == MIN_MATCH &&
-                  s->strstart - s->match_start > TOO_FAR)
-#endif
-                 )) {
+            if (s->match_length <= 5 && (s->strategy == Z_FILTERED ||
+                 (s->match_length == MIN_MATCH &&
+                  s->strstart - s->match_start > TOO_FAR))) {
 
                 /* If prev_match is also MIN_MATCH, match_start is garbage
                  * but we will ignore the current match anyway.
diff --git a/makefile.gcc b/makefile.gcc
new file mode 100644
index 0000000..095868b
--- /dev/null
+++ b/makefile.gcc
@@ -0,0 +1,55 @@
+# Sample makefile for pngcrush using gcc and make.
+# Glenn Randers-Pehrson
+# Last modified:  7 December 1999
+#
+# Invoke this makefile from a shell prompt in the usual way; for example:
+#
+#	make -f makefile.unx
+#
+# This makefile builds a statically linked executable.
+
+# macros --------------------------------------------------------------------
+
+CC = gcc
+LD = gcc
+RM = rm -f
+CFLAGS = -O -Wall
+# [note that -Wall is a gcc-specific compilation flag ("all warnings on")]
+LDFLAGS =
+O = .o
+E =
+
+PNGCRUSH  = pngcrush
+
+LIBS = -lm
+
+OBJS  = $(PNGCRUSH)$(O) adler32$(O) crc32$(O) deflate$(O) gzio$(O) \
+	infblock$(O) infcodes$(O) inffast$(O) inflate$(O) inftrees$(O) \
+	infutil$(O) png$(O) pngerror$(O) pngget$(O) pngmem$(O) \
+	pngpread$(O) pngread$(O) pngrio$(O) pngrtran$(O) pngrutil$(O) \
+	pngset$(O) pngtrans$(O) pngvcrd$(O) pngwio$(O) pngwrite$(O) \
+	pngwtran$(O) pngwutil$(O) trees$(O) zutil$(O)
+
+EXES = $(PNGCRUSH)$(E)
+
+
+# implicit make rules -------------------------------------------------------
+
+.c$(O): png.h pngconf.h zlib.h pngcrush.h
+	$(CC) -c $(CFLAGS) $<
+
+
+# dependencies --------------------------------------------------------------
+
+all:  $(EXES)
+
+
+$(PNGCRUSH)$(E): $(OBJS)
+	$(LD) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+
+$(PNGCRUSH)$(O): $(PNGCRUSH).c png.h pngconf.h zlib.h pngcrush.h
+
+# maintenance ---------------------------------------------------------------
+
+clean:
+	$(RM) $(EXES) $(OBJS)
diff --git a/png.c b/png.c
index 340d261..50d7cf9 100644
--- a/png.c
+++ b/png.c
@@ -1,15 +1,16 @@
 
 /* png.c - location for general purpose libpng functions
  *
- * libpng version 1.0.5f - December 6, 1999
+ * libpng version 1.0.5j - December 21, 1999
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
  * Copyright (c) 1998, 1999 Glenn Randers-Pehrson
- * 
+ *
  */
 
 #define PNG_INTERNAL
 #define PNG_NO_EXTERN
+#include <assert.h>
 #include "png.h"
 
 /* Version information for C files.  This had better match the version
@@ -18,12 +19,12 @@
 
 #ifdef PNG_USE_GLOBAL_ARRAYS
 /* png_libpng_ver was changed to a function in version 1.0.5c */
-char png_libpng_ver[12] = "1.0.5f";
+char png_libpng_ver[12] = "1.0.5j";
 
 /* png_sig was changed to a function in version 1.0.5c */
 /* Place to hold the signature string for a PNG file. */
 png_byte FARDATA png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
- 
+
 /* Invoke global declarations for constant strings for known chunk types */
 PNG_IHDR;
 PNG_IDAT;
@@ -263,27 +264,28 @@
 void
 png_free_text(png_structp png_ptr, png_infop info_ptr, int num)
 {
-    if (num != -1)
-    {
-    if (info_ptr->text[num].key)
-    {
-        png_free(png_ptr, info_ptr->text[num].key);
-        info_ptr->text[num].key = NULL;
-    }
-    if (info_ptr->text[num].lang)
-    {
-        png_free(png_ptr, info_ptr->text[num].lang);
-        info_ptr->text[num].lang = NULL;
-    }
-    }
-    else if (info_ptr->text != NULL)
-    {
-    int i;
-    for (i = 0; i < info_ptr->num_text; i++)
-        png_free_text(png_ptr, info_ptr, i);
-    png_free(png_ptr, info_ptr->text);
-    info_ptr->text = NULL;
-    }
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if (num != -1)
+   {
+     if (info_ptr->text[num].key)
+     {
+         png_free(png_ptr, info_ptr->text[num].key);
+         info_ptr->text[num].key = NULL;
+     }
+   }
+   else if (info_ptr->text != NULL)
+   {
+     int i;
+     if(info_ptr->text != NULL)
+     {
+       for (i = 0; i < info_ptr->num_text; i++)
+           png_free_text(png_ptr, info_ptr, i);
+       png_free(png_ptr, info_ptr->text);
+       info_ptr->text = NULL;
+     }
+     info_ptr->num_text=0;
+   }
 }
 #endif
 
@@ -292,10 +294,18 @@
 void
 png_free_sCAL(png_structp png_ptr, png_infop info_ptr)
 {
+   if (png_ptr == NULL || info_ptr == NULL)
+       return;
    if (info_ptr->valid & PNG_INFO_sCAL)
    {
-       png_free(png_ptr, info_ptr->scal_unit);
-       info_ptr->valid &= ~PNG_INFO_sCAL;
+#if defined(PNG_FIXED_POINT_SUPPORTED)&& !defined(PNG_FLOATING_POINT_SUPPORTED)
+       png_free(png_ptr, info_ptr->scal_s_width);
+       png_free(png_ptr, info_ptr->scal_s_height);
+#else
+       if(png_ptr != NULL)
+          /* silence a compiler warning */ ;
+#endif
+          info_ptr->valid &= ~PNG_INFO_sCAL;
    }
 }
 #endif
@@ -305,6 +315,8 @@
 void
 png_free_pCAL(png_structp png_ptr, png_infop info_ptr)
 {
+   if (png_ptr == NULL || info_ptr == NULL)
+       return;
    if (info_ptr->valid & PNG_INFO_pCAL)
    {
        png_free(png_ptr, info_ptr->pcal_purpose);
@@ -313,9 +325,9 @@
        {
            int i;
            for (i = 0; i < (int)info_ptr->pcal_nparams; i++)
-           {
-               png_free(png_ptr, info_ptr->pcal_params[i]);
-           }
+             {
+             png_free(png_ptr, info_ptr->pcal_params[i]);
+             }
            png_free(png_ptr, info_ptr->pcal_params);
        }
        info_ptr->valid &= ~PNG_INFO_pCAL;
@@ -324,10 +336,12 @@
 #endif
 
 #if defined(PNG_iCCP_SUPPORTED)
-/* free any pCAL entry */
+/* free any iCCP entry */
 void
 png_free_iCCP(png_structp png_ptr, png_infop info_ptr)
 {
+   if (png_ptr == NULL || info_ptr == NULL)
+       return;
    if (info_ptr->valid & PNG_INFO_iCCP)
    {
        png_free(png_ptr, info_ptr->iccp_name);
@@ -342,17 +356,23 @@
 void
 png_free_spalettes(png_structp png_ptr, png_infop info_ptr, int num)
 {
+   if (png_ptr == NULL || info_ptr == NULL)
+       return;
    if (num != -1)
    {
        png_free(png_ptr, info_ptr->splt_palettes[num].name);
        png_free(png_ptr, info_ptr->splt_palettes[num].entries);
+       info_ptr->valid &=~ PNG_INFO_sPLT;
    }
    else
    {
-       png_uint_32 i;
+       int i;
 
-       for (i = 0; i < info_ptr->splt_palettes_num; i++)
-          png_free_spalettes(png_ptr, info_ptr, num);
+       if(info_ptr->splt_palettes_num == 0)
+          return;
+
+       for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
+          png_free_spalettes(png_ptr, info_ptr, i);
 
        png_free(png_ptr, info_ptr->splt_palettes);
        info_ptr->splt_palettes_num = 0;
@@ -360,6 +380,48 @@
 }
 #endif
 
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+void
+png_free_unknown_chunks(png_structp png_ptr, png_infop info_ptr, int num)
+{
+   if (png_ptr == NULL || info_ptr == NULL)
+       return;
+   if (num != -1)
+   {
+       png_free(png_ptr, info_ptr->unknown_chunks[num].data);
+       info_ptr->unknown_chunks[num].data = NULL;
+   }
+   else
+   {
+       int i;
+
+       if(info_ptr->unknown_chunks_num == 0)
+          return;
+
+       for (i = 0; i < (int)info_ptr->unknown_chunks_num; i++)
+          png_free_unknown_chunks(png_ptr, info_ptr, i);
+
+       png_free(png_ptr, info_ptr->unknown_chunks);
+       info_ptr->unknown_chunks_num = 0;
+   }
+}
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+/* free any hIST entry */
+void
+png_free_hIST(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr == NULL || info_ptr == NULL)
+       return;
+   if (info_ptr->valid & PNG_INFO_hIST)
+   {
+       png_free(png_ptr, info_ptr->hist);
+       info_ptr->valid &= ~PNG_INFO_hIST;
+   }
+}
+#endif
+
 /* This is an internal routine to free any memory that the info struct is
  * pointing to before re-using it or freeing the struct itself.  Recall
  * that png_free() checks for NULL pointers for us.
@@ -383,6 +445,12 @@
 #if defined(PNG_READ_sPLT_SUPPORTED)
    png_free_spalettes(png_ptr, info_ptr, -1);
 #endif
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   png_free_unknown_chunks(png_ptr, info_ptr, -1);
+#endif
+#if defined(PNG_hIST_SUPPORTED)
+   png_free_hIST(png_ptr, info_ptr);
+#endif
    png_info_init(info_ptr);
 }
 
@@ -460,7 +528,7 @@
 png_get_copyright(png_structp png_ptr)
 {
    if (png_ptr != NULL || png_ptr == NULL)  /* silence compiler warning */
-   return ("\n libpng version 1.0.5f - December 6, 1999\n\
+   return ("\n libpng version 1.0.5j - December 21, 1999\n\
    Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.\n\
    Copyright (c) 1996, 1997 Andreas Dilger\n\
    Copyright (c) 1998, 1999 Glenn Randers-Pehrson\n");
@@ -478,8 +546,8 @@
 {
    /* Version of *.c files used when building libpng */
    if(png_ptr != NULL) /* silence compiler warning about unused png_ptr */
-      return("1.0.5f");
-   return("1.0.5f");
+      return("1.0.5j");
+   return("1.0.5j");
 }
 
 png_charp
@@ -503,8 +571,8 @@
 /* Generate a compiler error if there is an old png.h in the search path. */
 void
 png_check_version
-   (version_1_0_5f png_h_is_not_version_1_0_5f)
+   (version_1_0_5j png_h_is_not_version_1_0_5j)
 {
-   if(png_h_is_not_version_1_0_5f == NULL)
+   if(png_h_is_not_version_1_0_5j == NULL)
      return;
 }
diff --git a/png.h b/png.h
index 6da95a2..443972a 100644
--- a/png.h
+++ b/png.h
@@ -1,7 +1,7 @@
 
 /* png.h - header file for PNG reference library
  *
- * libpng version 1.0.5f - December 6, 1999
+ * libpng version 1.0.5j - December 21, 1999
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
  * Copyright (c) 1998, 1999 Glenn Randers-Pehrson
@@ -9,34 +9,34 @@
  * Authors and maintainers:
  *  libpng versions 0.71, May 1995, through 0.89c, May 1996: Guy Schalnat
  *  libpng versions 0.90, December 1996, through 0.96, May 1997: Andreas Dilger
- *  libpng versions 0.97, January 1998, through 1.0.5f - December 6, 1999: Glenn
+ *  libpng versions 0.97, January 1998, through 1.0.5j - December 21, 1999: Glenn
  *  See also "Contributing Authors", below.
  *
  * Y2K compliance in libpng:
  * =========================
- *    
- *    December 6, 1999
- *    
+ *
+ *    December 21, 1999
+ *
  *    Since the PNG Development group is an ad-hoc body, we can't make
  *    an official declaration.
- *    
+ *
  *    This is your unofficial assurance that libpng from version 0.71 and
- *    upward through 1.0.5f are Y2K compliant.  It is my belief that earlier
+ *    upward through 1.0.5j are Y2K compliant.  It is my belief that earlier
  *    versions were also Y2K compliant.
- *    
+ *
  *    Libpng only has three year fields.  One is a 2-byte unsigned integer
  *    that will hold years up to 65535.  The other two hold the date in text
  *    format, and will hold years up to 9999.
- *    
+ *
  *    The integer is
  *        "png_uint_16 year" in png_time_struct.
- *    
+ *
  *    The strings are
  *        "png_charp time_buffer" in png_struct and
  *        "near_time_buffer", which is a local character string in png.c.
- *    
+ *
  *    There are seven time-related functions:
- *        png.c: png_convert_to_rfc_1123() in png.c 
+ *        png.c: png_convert_to_rfc_1123() in png.c
  *          (formerly png_convert_to_rfc_1152() in error)
  *        png_convert_from_struct_tm() in pngwrite.c, called in pngwrite.c
  *        png_convert_from_time_t() in pngwrite.c
@@ -44,8 +44,8 @@
  *        png_handle_tIME() in pngrutil.c, called in pngread.c
  *        png_set_tIME() in pngset.c
  *        png_write_tIME() in pngwutil.c, called in pngwrite.c
- *    
- *    All handle dates properly in a Y2K environment.  The 
+ *
+ *    All handle dates properly in a Y2K environment.  The
  *    png_convert_from_time_t() function calls gmtime() to convert from system
  *    clock time, which returns (year - 1900), which we properly convert to
  *    the full 4-digit year.  There is a possibility that applications using
@@ -55,19 +55,19 @@
  *    but this is not under our control.  The libpng documentation has always
  *    stated that it works with 4-digit years, and the APIs have been
  *    documented as such.
- *    
+ *
  *    The tIME chunk itself is also Y2K compliant.  It uses a 2-byte unsigned
  *    integer to hold the year, and can hold years as large as 65535.
- *    
+ *
  *    zlib, upon which libpng depends, is also Y2K compliant.  It contains
  *    no date-related code.
- *    
+ *
  *       Glenn Randers-Pehrson
  *       libpng maintainer
  *       PNG Development Group
- * 
+ *
  * Note about libpng version numbers:
- * 
+ *
  *    Due to various miscommunications, unforeseen code incompatibilities
  *    and occasional factors outside the authors' control, version numbering
  *    on the library has not always been consistent and straightforward.
@@ -98,7 +98,7 @@
  *    1.0.4a-f                 1.0.4a-f 10005  2.1.0.4a-f
  *    1.0.5                    1.0.5    10005  2.1.0.5
  *    1.0.5a-d                 1.0.5a-d 10006  2.1.0.5a-d
- *    1.0.5e-f                 1.0.5e-f 10100  2.1.0.5e-f
+ *    1.0.5e-j                 1.0.5e-j 10100  2.1.0.5e-j
  *    1.1.0                    1.1.0    10100  3.1.0.0
  *
  *    Henceforth the source version will match the shared-library minor
@@ -123,7 +123,7 @@
  * Copyright (c) 1996, 1997 Andreas Dilger
  * (libpng versions 0.90, December 1996, through 0.96, May 1997)
  * Copyright (c) 1998, 1999 Glenn Randers-Pehrson
- * (libpng versions 0.97, January 1998, through 1.0.5f, December 6, 1999)
+ * (libpng versions 0.97, January 1998, through 1.0.5j, December 21, 1999)
  *
  * For the purposes of this copyright and license, "Contributing Authors"
  * is defined as the following set of individuals:
@@ -177,9 +177,9 @@
 /*
  * A "png_get_copyright" function is available, for convenient use in "about"
  * boxes and the like:
- * 
+ *
  * printf("%s",png_get_copyright(NULL));
- * 
+ *
  * Also, the PNG logo (in PNG format, of course) is supplied in the
  * file "pngnow.png".
  */
@@ -224,7 +224,7 @@
  */
 
 /* Version information for png.h - this should match the version in png.c */
-#define PNG_LIBPNG_VER_STRING "1.0.5f"
+#define PNG_LIBPNG_VER_STRING "1.0.5j"
 
 /* Careful here.  At one time, Guy wanted to use 082, but that would be octal.
  * We must not include leading zeros.
@@ -324,17 +324,23 @@
 typedef png_spalette FAR * FAR * png_spalette_pp;
 
 #ifdef PNG_TEXT_SUPPORTED
-/* png_text holds the contents of a text chunk in a PNG file, and whether 
- * that contents is compressed or not.  The "keyword" field points to a
- * regular C string.  */
+/* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file,
+ * and whether that contents is compressed or not.  The "key" field
+ * points to a regular C string.  */
 typedef struct png_text_struct
 {
-   int compression;        /* compression value, see PNG_TEXT_COMPRESSION_ */
+   int  compression; /* compression value:
+                       -1: tEXt, none
+                        0: zTXt, deflate
+                        1: iTXt, none
+                        2: iTXt, deflate  */
    png_charp key;          /* keyword, 1-79 character description of "text" */
-   png_charp lang;         /* language code, 1-79 characters */
    png_charp text;         /* comment, may be an empty string (ie "") */
-   /* text_length is no longer used, and now present for compatibility only */
-   png_size_t text_length; /* length of "text" field (not used any more) */
+   png_size_t text_length; /* length of the text string */
+   png_size_t itxt_length; /* length of the itxt string */
+   png_charp lang;         /* language code, 1-79 characters */
+   png_charp lang_key;     /* keyword translated UTF-8 string, 0 or more
+                              chars */
 } png_text;
 typedef png_text FAR * png_textp;
 typedef png_text FAR * FAR * png_textpp;
@@ -346,7 +352,9 @@
 #define PNG_TEXT_COMPRESSION_zTXt_WR -2
 #define PNG_TEXT_COMPRESSION_NONE    -1
 #define PNG_TEXT_COMPRESSION_zTXt     0
-#define PNG_TEXT_COMPRESSION_LAST     1  /* Not a valid value */
+#define PNG_ITXT_COMPRESSION_NONE     1
+#define PNG_ITXT_COMPRESSION_zTXt     2
+#define PNG_TEXT_COMPRESSION_LAST     3  /* Not a valid value */
 
 /* png_time is a way to hold the time in an machine independent way.
  * Two conversions are provided, both from time_t and struct tm.  There
@@ -366,6 +374,26 @@
 typedef png_time FAR * png_timep;
 typedef png_time FAR * FAR * png_timepp;
 
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+/* png_unknown_chunk is a structure to hold queued chunks for which there is
+ * no specific support.  The idea is that we can use this to queue
+ * up private chunks for output even though the library doesn't actually
+ * know about their semantics.
+ */
+typedef struct png_unknown_chunk_t
+{
+    png_byte name[5];
+    png_byte *data;
+    png_size_t size;
+
+    /* libpng-using applications should NOT directly modify this byte. */
+    png_byte location; /* mode of operation at read time */
+}
+png_unknown_chunk;
+typedef png_unknown_chunk FAR * png_unknown_chunkp;
+typedef png_unknown_chunk FAR * FAR * png_unknown_chunkpp;
+#endif
+
 /* png_info is a structure that holds the information in a PNG file so
  * that the application can find out the characteristics of the image.
  * If you are reading the file, this structure will tell you what is
@@ -389,16 +417,16 @@
  *
  * The following members may have allocated storage attached that should be
  * cleaned up before the structure is discarded: palette, text, pcal_purpose,
- * pcal_units, pcal_params, iccp_name, iccp_profile, splt_palettes, and 
- * scal_unit.  Of these, the text, pcal_*, iccp_*, splt_*, and scal_unit
- * members are automatically freed when the info structure is deallocated. 
+ * pcal_units, pcal_params, hist, iccp_name, iccp_profile, splt_palettes, and
+ * scal_unit.  Of these, the text, pcal_*, hist, iccp_*, splt_*, and scal_unit
+ * members are automatically freed when the info structure is deallocated.
  * The palette member is not.
  *
  * More allocation details: all the chunk-reading functions that change these
  * members go through the corresponding png_set_* functions.  Functions to
  * clear these members are available: see png_free_*.  The png_set_* functions
- * do not depend on being able to point info structure members to any of the 
- * storage they are passed (they make their own copies), EXCEPT that the 
+ * do not depend on being able to point info structure members to any of the
+ * storage they are passed (they make their own copies), EXCEPT that the
  * png_set_text function uses the same storage passed to them
  * in the text_ptr or itxt_ptr structure argument.
  */
@@ -435,8 +463,13 @@
     * on which the image was created, normally in the range [1.0, 2.5].
     * Data is valid if (valid & PNG_INFO_gAMA) is non-zero.
     */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float gamma; /* gamma value of image, if (valid & PNG_INFO_gAMA) */
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_gamma; /* gamma value of image, if (valid & PNG_INFO_gAMA) */
+#endif
+#endif
 
 #if defined(PNG_sRGB_SUPPORTED)
     /* GR-P, 0.96a */
@@ -447,7 +480,7 @@
 #if defined(PNG_TEXT_SUPPORTED)
    /* The tEXt, and zTXt chunks contain human-readable textual data in
     * uncompressed, compressed, and optionally compressed forms, respectively.
-    * The data in "text" is an array of pointers to uncompressed, 
+    * The data in "text" is an array of pointers to uncompressed,
     * null-terminated C strings. Each chunk has a keyword that describes the
     * textual data contained in that chunk.  Keywords are not required to be
     * unique, and the text string may be empty.  Any number of text chunks may
@@ -456,6 +489,9 @@
    int num_text; /* number of comments read/to write */
    int max_text; /* current size of text array */
    png_textp text; /* array of comments read/to write */
+   int num_text_old; /* number of comments read/to write */
+   png_textp text_old; /* array of comments read/to write, backward
+                          compatible  with libpng-1.0.5 and earlier */
 #endif /* PNG_TEXT_SUPPORTED */
 
 #if defined(PNG_tIME_SUPPORTED)
@@ -538,6 +574,7 @@
     * colors in the image as the creator.  Values are in the range
     * [0.0, 0.8].  Data valid if (valid & PNG_INFO_cHRM) non-zero.
     */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float x_white;
    float y_white;
    float x_red;
@@ -547,6 +584,17 @@
    float x_blue;
    float y_blue;
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_x_white;
+   png_fixed_point int_y_white;
+   png_fixed_point int_x_red;
+   png_fixed_point int_y_red;
+   png_fixed_point int_x_green;
+   png_fixed_point int_y_green;
+   png_fixed_point int_x_blue;
+   png_fixed_point int_y_blue;
+#endif
+#endif
 
 #if defined(PNG_pCAL_SUPPORTED)
    /* The pCAL chunk describes a transformation between the stored pixel
@@ -585,18 +633,30 @@
 #endif
 
 #if defined(PNG_sCAL_SUPPORTED)
-   /* The sCAL chunk describes the actual physical dimensions of the 
+   /* The sCAL chunk describes the actual physical dimensions of the
     * subject matter of the graphic.  The chunk contains a unit specification
-    * (an ASCII string), and two ASCII strings representing floating-point
+    * a byte value, and two ASCII strings representing floating-point
     * values.  The values are width and height corresponsing to one pixel
     * in the image.  This external representation is converted to double
     * here.  Data values are valid if (valid & PNG_INFO_sCAL) is non-zero.
     */
-   png_charp scal_unit;        /* unit of physical scale */
+   png_byte scal_unit;         /* unit of physical scale */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    double scal_pixel_width;    /* width of one pixel */
    double scal_pixel_height;   /* height of one pixel */
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_charp scal_s_width;     /* string containing height */
+   png_charp scal_s_height;    /* string containing width */
+#endif
+#endif
 #endif
 
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   /* storage for unknown chunks that the library doesn't recognize. */
+   png_unknown_chunkp unknown_chunks;
+   png_size_t unknown_chunks_num;
+#endif
 } png_info;
 
 typedef png_info FAR * png_infop;
@@ -643,6 +703,12 @@
 #define PNG_EQUATION_HYPERBOLIC   3 /* Hyperbolic sine transformation */
 #define PNG_EQUATION_LAST         4 /* Not a valid value */
 
+/* These are for the sCAL chunk.  These values should NOT be changed. */
+#define PNG_SCALE_UNKNOWN         0 /* unknown unit (image scale) */
+#define PNG_SCALE_METER           1 /* meters per pixel */
+#define PNG_SCALE_RADIAN          2 /* radians per pixel */
+#define PNG_SCALE_LAST            3 /* Not a valid value */
+
 /* These are for the pHYs chunk.  These values should NOT be changed. */
 #define PNG_RESOLUTION_UNKNOWN    0 /* pixels/unknown unit (aspect ratio) */
 #define PNG_RESOLUTION_METER      1 /* pixels/meter */
@@ -815,7 +881,9 @@
 
 #if defined(PNG_READ_bKGD_SUPPORTED)
    png_byte background_gamma_type;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float background_gamma;
+#endif
    png_color_16 background;   /* background color in screen gamma space */
 #  if defined(PNG_READ_GAMMA_SUPPORTED)
      png_color_16 background_1; /* background normalized to gamma 1.0 */
@@ -830,9 +898,14 @@
 
 #if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
    int gamma_shift;      /* number of "insignificant" bits 16-bit gamma */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float gamma;          /* file gamma value */
    float screen_gamma;   /* screen gamma value (display_exponent) */
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_gamma;
+#endif
+#endif
 
 #if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
    png_bytep gamma_table;     /* gamma table for 8-bit depth files */
@@ -937,9 +1010,9 @@
 };
 
 /* This prevents a compiler error in png_get_copyright() in png.c if png.c
-and png.h are both at * version 1.0.5f
+and png.h are both at * version 1.0.5j
  */
-typedef png_structp version_1_0_5f;
+typedef png_structp version_1_0_5j;
 
 typedef png_struct FAR * FAR * png_structpp;
 
@@ -1056,8 +1129,10 @@
 
 #if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
 /* Reduce RGB to grayscale. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(void,png_set_rgb_to_gray) PNGARG((png_structp png_ptr,
    int error_action, double red, double green ));
+#endif
 extern PNG_EXPORT(png_byte,png_get_rgb_to_gray_status) PNGARG((png_structp
    png_ptr));
 #endif
@@ -1122,9 +1197,11 @@
 
 #if defined(PNG_READ_BACKGROUND_SUPPORTED)
 /* Handle alpha and tRNS by replacing with a background color. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(void,png_set_background) PNGARG((png_structp png_ptr,
    png_color_16p background_color, int background_gamma_code,
    int need_expand, double background_gamma));
+#endif
 #define PNG_BACKGROUND_GAMMA_UNKNOWN 0
 #define PNG_BACKGROUND_GAMMA_SCREEN  1
 #define PNG_BACKGROUND_GAMMA_FILE    2
@@ -1145,9 +1222,11 @@
 
 #if defined(PNG_READ_GAMMA_SUPPORTED)
 /* Handle gamma correction. Screen_gamma=(display_exponent) */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(void,png_set_gamma) PNGARG((png_structp png_ptr,
    double screen_gamma, double default_file_gamma));
 #endif
+#endif
 
 #if defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
     defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
@@ -1312,9 +1391,11 @@
  * the weights and costs are set to 1.0, this degenerates the WEIGHTED method
  * to the UNWEIGHTED method, but with added encoding time/computation.
  */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(void,png_set_filter_heuristics) PNGARG((png_structp png_ptr,
    int heuristic_method, int num_weights, png_doublep filter_weights,
    png_doublep filter_costs));
+#endif
 #endif /*  PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
 
 /* Heuristic used for row filter selection.  These defines should NOT be
@@ -1550,8 +1631,10 @@
 png_ptr, png_infop info_ptr));
 
 /* Returns pixel aspect ratio, computed from pHYs chunk data.  */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(float, png_get_pixel_aspect_ratio) PNGARG((png_structp
 png_ptr, png_infop info_ptr));
+#endif
 
 /* Returns image x, y offset in pixels or microns, from oFFs chunk data. */
 extern PNG_EXPORT(png_uint_32, png_get_x_offset_pixels) PNGARG((png_structp
@@ -1580,27 +1663,57 @@
 #endif
 
 #if defined(PNG_READ_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(png_uint_32,png_get_cHRM) PNGARG((png_structp png_ptr,
    png_infop info_ptr, double *white_x, double *white_y, double *red_x,
    double *red_y, double *green_x, double *green_y, double *blue_x,
    double *blue_y));
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_white_x, png_fixed_point
+   *int_white_y, png_fixed_point *int_red_x, png_fixed_point *int_red_y,
+   png_fixed_point *int_green_x, png_fixed_point *int_green_y, png_fixed_point
+   *int_blue_x, png_fixed_point *int_blue_y));
+#endif
+#endif
 
 #if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(void,png_set_cHRM) PNGARG((png_structp png_ptr,
    png_infop info_ptr, double white_x, double white_y, double red_x,
    double red_y, double green_x, double green_y, double blue_x, double blue_y));
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_white_x, png_fixed_point int_white_y,
+   png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
+   int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
+   png_fixed_point int_blue_y));
+#endif
+#endif
 
 #if defined(PNG_READ_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(png_uint_32,png_get_gAMA) PNGARG((png_structp png_ptr,
    png_infop info_ptr, double *file_gamma));
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_file_gamma));
+#endif
+#endif
 
 #if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(void,png_set_gAMA) PNGARG((png_structp png_ptr,
    png_infop info_ptr, double file_gamma));
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_file_gamma));
+#endif
+#endif
 
 #if defined(PNG_READ_hIST_SUPPORTED)
 extern PNG_EXPORT(png_uint_32,png_get_hIST) PNGARG((png_structp png_ptr,
@@ -1698,6 +1811,8 @@
 extern PNG_EXPORT(void,png_set_iCCP) PNGARG((png_structp png_ptr,
    png_infop info_ptr, png_charp name, int compression_type,
    png_charp profile, int proflen));
+extern PNG_EXPORT(void,png_free_iCCP) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
 #endif
 
 #if defined(PNG_READ_sPLT_SUPPORTED)
@@ -1708,14 +1823,23 @@
 #if defined(PNG_sPLT_SUPPORTED)
 extern PNG_EXPORT(void,png_set_spalettes) PNGARG((png_structp png_ptr,
    png_infop info_ptr, png_spalette_p entries, int nentries));
-#endif
-
-#if defined(PNG_sPLT_SUPPORTED)
-extern PNG_EXPORT(void,png_free_spallettes) PNGARG((png_structp png_ptr,
+extern PNG_EXPORT(void,png_free_spalettes) PNGARG((png_structp png_ptr,
    png_infop info_ptr, int num));
 #endif
 
+#if defined(PNG_READ_iTXt_SUPPORTED)
+/* png_get_itxt also returns the number of text chunks in *num_text */
+extern PNG_EXPORT(png_uint_32,png_get_itxt) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp *text_ptr, int *num_text));
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+extern PNG_EXPORT(void,png_set_itxt) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp text_ptr, int num_text));
+#endif
+
 #if defined(PNG_READ_TEXT_SUPPORTED)
+/* Old interface; apps should use png_get_itxt instead */
 /* png_get_text also returns the number of text chunks in *num_text */
 extern PNG_EXPORT(png_uint_32,png_get_text) PNGARG((png_structp png_ptr,
    png_infop info_ptr, png_textp *text_ptr, int *num_text));
@@ -1725,7 +1849,7 @@
 extern PNG_EXPORT(void,png_set_text) PNGARG((png_structp png_ptr,
    png_infop info_ptr, png_textp text_ptr, int num_text));
 extern PNG_EXPORT(void,png_free_text) PNGARG((png_structp png_ptr,
-   png_infop info_ptr, int num_text));
+   png_infop info_ptr, int num_text_old));
 #endif
 
 #if defined(PNG_READ_tIME_SUPPORTED)
@@ -1751,13 +1875,26 @@
 #endif
 
 #if defined(PNG_READ_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(png_uint_32,png_get_sCAL) PNGARG((png_structp png_ptr,
-   png_infop info_ptr, png_charpp unit, double *width, double *height));
+   png_infop info_ptr, int *unit, double *width, double *height));
+#else 
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *unit, png_charpp swidth, png_charpp sheight));
+#endif
+#endif
 #endif /* PNG_READ_sCAL_SUPPORTED */
 
-#if defined(PNG_READ_sCAL_SUPPORTED) || defined(PNG_WRITE_sCAL_SUPPORTED)
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 extern PNG_EXPORT(void,png_set_sCAL) PNGARG((png_structp png_ptr,
-   png_infop info_ptr, png_charp unit, double width, double height));
+   png_infop info_ptr, int unit, double width, double height));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int unit, png_charp swidth, png_charp sheight));
+#endif
 #endif /* PNG_READ_sCAL_SUPPORTED || PNG_WRITE_sCAL_SUPPORTED */
 
 #if defined(PNG_READ_sCAL_SUPPORTED) || defined(PNG_WRITE_sCAL_SUPPORTED)
@@ -1765,6 +1902,17 @@
    png_infop info_ptr));
 #endif /* PNG_READ_sCAL_SUPPORTED || PNG_WRITE_sCAL_SUPPORTED */
 
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+extern PNG_EXPORT(void, png_set_keep_unknown_chunks) PNGARG((png_structp
+   png_ptr, int keep, png_bytep chunk_list, int num_chunks));
+extern PNG_EXPORT(void, png_set_unknown_chunks) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkp unknowns, int num_unknowns));
+extern PNG_EXPORT(void,png_free_unknown_chunks) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int num));
+extern PNG_EXPORT(png_uint_32,png_get_unknown_chunks) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkpp entries));
+#endif
+
 /* Define PNG_DEBUG at compile time for debugging information.  Higher
  * numbers for PNG_DEBUG mean more debugging information.  This has
  * only been added since version 0.95 so it is not implemented throughout
@@ -1804,7 +1952,7 @@
 extern PNG_EXPORT(png_charp,png_get_header_version) PNGARG((png_structp png_ptr));
 extern PNG_EXPORT(png_charp,png_get_libpng_ver) PNGARG((png_structp png_ptr));
 
-#define PNG_HEADER_VERSION_STRING " libpng version 1.0.5f - December 6, 1999 (header)\n"
+#define PNG_HEADER_VERSION_STRING " libpng version 1.0.5j - December 21, 1999 (header)\n"
 
 #ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED
 /* With these routines we avoid an integer divide, which will be slower on
@@ -1888,7 +2036,7 @@
 #define PNG_EXPAND             0x1000
 #define PNG_GAMMA              0x2000
 #define PNG_GRAY_TO_RGB        0x4000
-#define PNG_FILLER             0x8000
+#define PNG_FILLER             0x8000L
 #define PNG_PACKSWAP          0x10000L
 #define PNG_SWAP_ALPHA        0x20000L
 #define PNG_STRIP_ALPHA       0x40000L
@@ -1924,6 +2072,8 @@
 #define PNG_FLAG_FREE_PALETTE             0x1000
 #define PNG_FLAG_FREE_TRANS               0x2000
 #define PNG_FLAG_FREE_HIST                0x4000
+#define PNG_FLAG_KEEP_UNKNOWN_CHUNKS      0x8000L
+#define PNG_FLAG_KEEP_UNSAFE_CHUNKS      0x10000L
 
 
 #define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \
@@ -1948,6 +2098,7 @@
 #else
 #define png_sig png_sig_bytes(NULL)
 #endif
+#endif /* PNG_NO_EXTERN */
 
 /* Constant strings for known chunk types.  If you need to add a chunk,
  * define the name here, and add an invocation of the macro in png.c and
@@ -1999,7 +2150,6 @@
 PNG_EXPORT_VAR (const png_byte FARDATA) png_zTXt[5];
 #endif /* PNG_USE_GLOBAL_ARRAYS */
 
-#endif /* PNG_NO_EXTERN */
 
 /* Inline macros to do direct reads of bytes from the input buffer.  These
  * require that you are using an architecture that uses PNG byte ordering
@@ -2132,8 +2282,14 @@
 PNG_EXTERN void png_write_IEND PNGARG((png_structp png_ptr));
 
 #if defined(PNG_WRITE_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 PNG_EXTERN void png_write_gAMA PNGARG((png_structp png_ptr, double file_gamma));
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_gAMA_fixed PNGARG((png_structp png_ptr, png_fixed_point
+    file_gamma));
+#endif
+#endif
 
 #if defined(PNG_WRITE_sBIT_SUPPORTED)
 PNG_EXTERN void png_write_sBIT PNGARG((png_structp png_ptr, png_color_8p sbit,
@@ -2141,11 +2297,20 @@
 #endif
 
 #if defined(PNG_WRITE_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 PNG_EXTERN void png_write_cHRM PNGARG((png_structp png_ptr,
    double white_x, double white_y,
    double red_x, double red_y, double green_x, double green_y,
    double blue_x, double blue_y));
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_cHRM_fixed PNGARG((png_structp png_ptr,
+   png_fixed_point int_white_x, png_fixed_point int_white_y,
+   png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
+   int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
+   png_fixed_point int_blue_y));
+#endif
+#endif
 
 #if defined(PNG_WRITE_sRGB_SUPPORTED)
 PNG_EXTERN void png_write_sRGB PNGARG((png_structp png_ptr,
@@ -2159,8 +2324,8 @@
 #endif
 
 #if defined(PNG_WRITE_sPLT_SUPPORTED)
-PNG_EXTERN void png_write_sPLT PNGARG((png_structp png_ptr, 
-                                       png_spalette_p palette));
+PNG_EXTERN void png_write_sPLT PNGARG((png_structp png_ptr,
+   png_spalette_p palette));
 #endif
 
 #if defined(PNG_WRITE_tRNS_SUPPORTED)
@@ -2178,6 +2343,11 @@
    int num_hist));
 #endif
 
+#if defined(PNG_hIST_SUPPORTED)
+extern PNG_EXPORT(void,png_free_hIST) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
 #if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \
     defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
 PNG_EXTERN png_size_t png_check_keyword PNGARG((png_structp png_ptr,
@@ -2195,8 +2365,9 @@
 #endif
 
 #if defined(PNG_WRITE_iTXt_SUPPORTED)
-PNG_EXTERN void png_write_iTXt PNGARG((png_structp png_ptr, 
-   int compression, png_charp key, png_charp lang, png_charp text));
+PNG_EXTERN void png_write_iTXt PNGARG((png_structp png_ptr,
+   int compression, png_charp key, png_charp lang, png_charp lang_key,
+   png_charp text));
 #endif
 
 #if defined(PNG_WRITE_oFFs_SUPPORTED)
@@ -2222,8 +2393,15 @@
 #endif
 
 #if defined(PNG_WRITE_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 PNG_EXTERN void png_write_sCAL PNGARG((png_structp png_ptr,
-   png_charp unit, double width, double height));
+   int unit, double width, double height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_sCAL_s PNGARG((png_structp png_ptr,
+   int unit, png_charp width, png_charp height));
+#endif
+#endif
 #endif
 
 /* Called when finished processing a row of data */
diff --git a/pngconf.h b/pngconf.h
index 9884d04..98f6a74 100644
--- a/pngconf.h
+++ b/pngconf.h
@@ -1,7 +1,7 @@
 
 /* pngconf.h - machine configurable file for libpng
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -17,21 +17,7 @@
 #ifndef PNGCONF_H
 #define PNGCONF_H
 
-#define PNG_NO_EASY_ACCESS
-#define PNG_NO_READ_EMPTY_PLTE
-#define PNG_NO_WRITE_TRANSFORMS
-#define PNG_READ_USER_TRANSFORM_SUPPORTED
-#define PNG_READ_STRIP_ALPHA_SUPPORTED
-#define PNG_READ_EXPAND_SUPPORTED
-#define PNG_READ_FILLER_SUPPORTED
-#define PNG_READ_GRAY_TO_RGB_SUPPORTED
-#define PNG_READ_RGB_TO_GRAY_SUPPORTED
-#define PNG_READ_BACKGROUND_SUPPORTED
-#define PNG_READ_GAMMA_SUPPORTED
-#define PNG_ZBUF_SIZE 524288
-/*
-#define PNG_NO_GLOBAL_ARRAYS
-*/
+#include "pngcrush.h" /* except for this line, this is libpng's pngconf.h */
 
 /* This is the size of the compression buffer, and thus the size of
  * an IDAT chunk.  Make this whatever size you feel is best for your
@@ -174,6 +160,7 @@
  * them inside an appropriate ifdef/endif pair for portability.
  */
 
+#if !defined(PNG_NO_FLOATING_POINT_SUPPORTED)
 #if defined(MACOS)
 /* We need to check that <math.h> hasn't already been included earlier
  * as it seems it doesn't agree with <fp.h>, yet we should really use
@@ -185,6 +172,7 @@
 #else
 #include <math.h>
 #endif
+#endif
 
 /* Codewarrior on NT has linking problems without this. */
 #if (defined(__MWERKS__) && defined(WIN32)) || defined(__STDC__)
@@ -276,6 +264,7 @@
  */
 
 
+
 #if !defined(PNG_READ_TRANSFORMS_NOT_SUPPORTED) && \
     !defined(PNG_NO_READ_TRANSFORMS)
 #define PNG_READ_TRANSFORMS_SUPPORTED
@@ -434,6 +423,14 @@
 #define PNG_ASSEMBLER_CODE_SUPPORTED
 #endif
 
+#ifndef PNG_NO_FLOATING_POINT_SUPPORTED
+#define PNG_FLOATING_POINT_SUPPORTED
+#endif
+
+#ifndef PNG_NO_FIXED_POINT_SUPPORTED
+#define PNG_FIXED_POINT_SUPPORTED
+#endif
+
 /* Do not use global arrays (helps with building DLL's)
  * They are no longer used in libpng itself, since version 1.0.5c,
  * but might be required for some pre-1.0.5c applications.
@@ -480,199 +477,220 @@
 #endif
 
 #ifdef PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
+
+#ifdef PNG_NO_READ_TEXT
+#  define PNG_NO_READ_iTXt
+#  define PNG_NO_READ_tEXt
+#  define PNG_NO_READ_zTXt
+#endif
 #ifndef PNG_NO_READ_bKGD
-#define PNG_READ_bKGD_SUPPORTED
-#define PNG_bKGD_SUPPORTED
+#  define PNG_READ_bKGD_SUPPORTED
+#  define PNG_bKGD_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_cHRM
-#define PNG_READ_cHRM_SUPPORTED
-#define PNG_cHRM_SUPPORTED
+#  define PNG_READ_cHRM_SUPPORTED
+#  define PNG_cHRM_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_gAMA
-#define PNG_READ_gAMA_SUPPORTED
-#define PNG_gAMA_SUPPORTED
+#  define PNG_READ_gAMA_SUPPORTED
+#  define PNG_gAMA_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_hIST
-#define PNG_READ_hIST_SUPPORTED
-#define PNG_hIST_SUPPORTED
+#  define PNG_READ_hIST_SUPPORTED
+#  define PNG_hIST_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_iCCP
-#define PNG_READ_iCCP_SUPPORTED
-#define PNG_iCCP_SUPPORTED
+#  define PNG_READ_iCCP_SUPPORTED
+#  define PNG_iCCP_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_iTXt
-#define PNG_READ_iTXt_SUPPORTED
-#define PNG_iTXt_SUPPORTED
+#  define PNG_READ_iTXt_SUPPORTED
+#  define PNG_iTXt_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_oFFs
-#define PNG_READ_oFFs_SUPPORTED
-#define PNG_oFFs_SUPPORTED
+#  define PNG_READ_oFFs_SUPPORTED
+#  define PNG_oFFs_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_pCAL
-#define PNG_READ_pCAL_SUPPORTED
-#define PNG_pCAL_SUPPORTED
+#  define PNG_READ_pCAL_SUPPORTED
+#  define PNG_pCAL_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_sCAL
-#define PNG_READ_sCAL_SUPPORTED
-#define PNG_sCAL_SUPPORTED
+#  define PNG_READ_sCAL_SUPPORTED
+#  define PNG_sCAL_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_pHYs
-#define PNG_READ_pHYs_SUPPORTED
-#define PNG_pHYs_SUPPORTED
+#  define PNG_READ_pHYs_SUPPORTED
+#  define PNG_pHYs_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_sBIT
-#define PNG_READ_sBIT_SUPPORTED
-#define PNG_sBIT_SUPPORTED
+#  define PNG_READ_sBIT_SUPPORTED
+#  define PNG_sBIT_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_sPLT
-#define PNG_READ_sPLT_SUPPORTED
-#define PNG_sPLT_SUPPORTED
+#  define PNG_READ_sPLT_SUPPORTED
+#  define PNG_sPLT_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_sRGB
-#define PNG_READ_sRGB_SUPPORTED
-#define PNG_sRGB_SUPPORTED
+#  define PNG_READ_sRGB_SUPPORTED
+#  define PNG_sRGB_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_tEXt
-#define PNG_READ_tEXt_SUPPORTED
-#define PNG_tEXt_SUPPORTED
+#  define PNG_READ_tEXt_SUPPORTED
+#  define PNG_tEXt_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_tIME
-#define PNG_READ_tIME_SUPPORTED
-#define PNG_tIME_SUPPORTED
+#  define PNG_READ_tIME_SUPPORTED
+#  define PNG_tIME_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_tRNS
-#define PNG_READ_tRNS_SUPPORTED
-#define PNG_tRNS_SUPPORTED
+#  define PNG_READ_tRNS_SUPPORTED
+#  define PNG_tRNS_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_zTXt
-#define PNG_READ_zTXt_SUPPORTED
-#define PNG_zTXt_SUPPORTED
+#  define PNG_READ_zTXt_SUPPORTED
+#  define PNG_zTXt_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_UNKNOWN_CHUNKS
+#  define PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+#  define PNG_UNKNOWN_CHUNKS_SUPPORTED
 #endif
 #ifndef PNG_NO_READ_OPT_PLTE
-#define PNG_READ_OPT_PLTE_SUPPORTED /* only affects support of the optional */
-#endif                              /* PLTE chunk in RGB and RGBA images */
+#  define PNG_READ_OPT_PLTE_SUPPORTED /* only affects support of the */
+#endif                      /* optional PLTE chunk in RGB and RGBA images */
 #if defined(PNG_READ_iTXt_SUPPORTED) || defined(PNG_READ_tEXt_SUPPORTED) || \
   defined(PNG_READ_zTXt_SUPPORTED)
-#define PNG_READ_TEXT_SUPPORTED
-#define PNG_TEXT_SUPPORTED
+#  define PNG_READ_TEXT_SUPPORTED
+#  define PNG_TEXT_SUPPORTED
 #endif
 #endif /* PNG_READ_ANCILLARY_CHUNKS_SUPPORTED */
 
 #ifdef PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
-#ifndef PNG_NO_WRITE_bKGD
-#define PNG_WRITE_bKGD_SUPPORTED
-#ifndef PNG_bKGD_SUPPORTED
-#  define PNG_bKGD_SUPPORTED
+#ifdef PNG_NO_WRITE_TEXT
+#  define PNG_NO_WRITE_iTXt
+#  define PNG_NO_WRITE_tEXt
+#  define PNG_NO_WRITE_zTXt
 #endif
+#ifndef PNG_NO_WRITE_bKGD
+#  define PNG_WRITE_bKGD_SUPPORTED
+#  ifndef PNG_bKGD_SUPPORTED
+#    define PNG_bKGD_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_cHRM
-#define PNG_WRITE_cHRM_SUPPORTED
-#ifndef PNG_cHRM_SUPPORTED
-#  define PNG_cHRM_SUPPORTED
-#endif
+#  define PNG_WRITE_cHRM_SUPPORTED
+#  ifndef PNG_cHRM_SUPPORTED
+#    define PNG_cHRM_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_gAMA
-#define PNG_WRITE_gAMA_SUPPORTED
-#ifndef PNG_gAMA_SUPPORTED
-#  define PNG_gAMA_SUPPORTED
-#endif
+#  define PNG_WRITE_gAMA_SUPPORTED
+#  ifndef PNG_gAMA_SUPPORTED
+#    define PNG_gAMA_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_hIST
-#define PNG_WRITE_hIST_SUPPORTED
-#ifndef PNG_hIST_SUPPORTED
-#  define PNG_hIST_SUPPORTED
-#endif
+#  define PNG_WRITE_hIST_SUPPORTED
+#  ifndef PNG_hIST_SUPPORTED
+#    define PNG_hIST_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_iCCP
-#define PNG_WRITE_iCCP_SUPPORTED
-#ifndef PNG_iCCP_SUPPORTED
-#  define PNG_iCCP_SUPPORTED
-#endif
+#  define PNG_WRITE_iCCP_SUPPORTED
+#  ifndef PNG_iCCP_SUPPORTED
+#    define PNG_iCCP_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_iTXt
-#define PNG_WRITE_iTXt_SUPPORTED
-#ifndef PNG_iTXt_SUPPORTED
-#  define PNG_iTXt_SUPPORTED
-#endif
+#  define PNG_WRITE_iTXt_SUPPORTED
+#  ifndef PNG_iTXt_SUPPORTED
+#    define PNG_iTXt_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_oFFs
-#define PNG_WRITE_oFFs_SUPPORTED
-#ifndef PNG_oFFs_SUPPORTED
-#  define PNG_oFFs_SUPPORTED
-#endif
+#  define PNG_WRITE_oFFs_SUPPORTED
+#  ifndef PNG_oFFs_SUPPORTED
+#    define PNG_oFFs_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_pCAL
-#define PNG_WRITE_pCAL_SUPPORTED
-#ifndef PNG_pCAL_SUPPORTED
-#  define PNG_pCAL_SUPPORTED
-#endif
+#  define PNG_WRITE_pCAL_SUPPORTED
+#  ifndef PNG_pCAL_SUPPORTED
+#    define PNG_pCAL_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_sCAL
-#define PNG_WRITE_sCAL_SUPPORTED
-#ifndef PNG_sCAL_SUPPORTED
-#  define PNG_sCAL_SUPPORTED
-#endif
+#  define PNG_WRITE_sCAL_SUPPORTED
+#  ifndef PNG_sCAL_SUPPORTED
+#    define PNG_sCAL_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_pHYs
-#define PNG_WRITE_pHYs_SUPPORTED
-#ifndef PNG_pHYs_SUPPORTED
-#  define PNG_pHYs_SUPPORTED
-#endif
+#  define PNG_WRITE_pHYs_SUPPORTED
+#  ifndef PNG_pHYs_SUPPORTED
+#    define PNG_pHYs_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_sBIT
-#define PNG_WRITE_sBIT_SUPPORTED
-#ifndef PNG_sBIT_SUPPORTED
-#  define PNG_sBIT_SUPPORTED
-#endif
+#  define PNG_WRITE_sBIT_SUPPORTED
+#  ifndef PNG_sBIT_SUPPORTED
+#    define PNG_sBIT_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_sPLT
-#define PNG_WRITE_sPLT_SUPPORTED
-#ifndef PNG_sPLT_SUPPORTED
-#  define PNG_sPLT_SUPPORTED
-#endif
+#  define PNG_WRITE_sPLT_SUPPORTED
+#  ifndef PNG_sPLT_SUPPORTED
+#    define PNG_sPLT_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_sRGB
-#define PNG_WRITE_sRGB_SUPPORTED
-#ifndef PNG_sRGB_SUPPORTED
-#  define PNG_sRGB_SUPPORTED
-#endif
+#  define PNG_WRITE_sRGB_SUPPORTED
+#  ifndef PNG_sRGB_SUPPORTED
+#    define PNG_sRGB_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_tEXt
-#define PNG_WRITE_tEXt_SUPPORTED
-#ifndef PNG_tEXt_SUPPORTED
-#  define PNG_tEXt_SUPPORTED
-#endif
+#  define PNG_WRITE_tEXt_SUPPORTED
+#  ifndef PNG_tEXt_SUPPORTED
+#    define PNG_tEXt_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_tIME
-#define PNG_WRITE_tIME_SUPPORTED
-#ifndef PNG_tIME_SUPPORTED
-#  define PNG_tIME_SUPPORTED
-#endif
+#  define PNG_WRITE_tIME_SUPPORTED
+#  ifndef PNG_tIME_SUPPORTED
+#    define PNG_tIME_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_tRNS
-#define PNG_WRITE_tRNS_SUPPORTED
-#ifndef PNG_tRNS_SUPPORTED
-#  define PNG_tRNS_SUPPORTED
-#endif
+#  define PNG_WRITE_tRNS_SUPPORTED
+#  ifndef PNG_tRNS_SUPPORTED
+#    define PNG_tRNS_SUPPORTED
+#  endif
 #endif
 #ifndef PNG_NO_WRITE_zTXt
-#define PNG_WRITE_zTXt_SUPPORTED
-#ifndef PNG_zTXt_SUPPORTED
-#  define PNG_zTXt_SUPPORTED
+#  define PNG_WRITE_zTXt_SUPPORTED
+#  ifndef PNG_zTXt_SUPPORTED
+#    define PNG_zTXt_SUPPORTED
+#  endif
 #endif
+#ifndef PNG_NO_WRITE_UNKNOWN_CHUNKS
+#  define PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+#  ifndef PNG_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
 #endif
 #if defined(PNG_WRITE_iTXt_SUPPORTED) || defined(PNG_WRITE_tEXt_SUPPORTED) || \
   defined(PNG_WRITE_zTXt_SUPPORTED)
-#define PNG_WRITE_TEXT_SUPPORTED
-#ifndef PNG_TEXT_SUPPORTED
-#  define PNG_TEXT_SUPPORTED
-#endif
+#  define PNG_WRITE_TEXT_SUPPORTED
+#  ifndef PNG_TEXT_SUPPORTED
+#    define PNG_TEXT_SUPPORTED
+#  endif
 #endif
 #endif /* PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED */
 
 /* need the time information for reading tIME chunks */
 #if defined(PNG_READ_tIME_SUPPORTED) || defined(PNG_WRITE_tIME_SUPPORTED)
-#include <time.h>
+#  include <time.h>
 #endif
 
 /* Some typedefs to get us started.  These should be safe on most of the
@@ -755,6 +773,10 @@
 #define FARDATA
 #endif
 
+/* Typedef for floating-point numbers that are converted
+   to fixed-point with a multiple of 100,000, e.g., int_gamma */
+typedef png_int_32 png_fixed_point;
+
 /* Add typedefs for pointers */
 typedef void            FAR * png_voidp;
 typedef png_byte        FAR * png_bytep;
@@ -764,7 +786,10 @@
 typedef png_int_16      FAR * png_int_16p;
 typedef PNG_CONST char  FAR * png_const_charp;
 typedef char            FAR * png_charp;
+typedef png_fixed_point FAR * png_fixed_point_p;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 typedef double          FAR * png_doublep;
+#endif
 
 /* Pointers to pointers; i.e. arrays */
 typedef png_byte        FAR * FAR * png_bytepp;
@@ -774,7 +799,10 @@
 typedef png_int_16      FAR * FAR * png_int_16pp;
 typedef PNG_CONST char  FAR * FAR * png_const_charpp;
 typedef char            FAR * FAR * png_charpp;
+typedef png_fixed_point FAR * FAR * png_fixed_point_pp;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 typedef double          FAR * FAR * png_doublepp;
+#endif
 
 /* Pointers to pointers to pointers; i.e. pointer to array */
 typedef char            FAR * FAR * FAR * png_charppp;
@@ -823,7 +851,7 @@
 #    define PNG_EXPORT_VAR(type) extern __declspec(dllexport) type
 #  endif
 #  ifdef PNG_ATTR_DLLEXP
-#    define PNG_EXPORT_VAR(type) extern type __attribute__((dllexport)) 
+#    define PNG_EXPORT_VAR(type) extern type __attribute__((dllexport))
 #  endif
 #  ifdef PNG_DECL_DLLIMP
 #    define PNG_EXPORT_VAR(type) extern __declspec(dllimport) type
@@ -863,7 +891,7 @@
 #endif
 /* End of memory model independent support */
 
-/* Just a double check that someone hasn't tried to define something
+/* Just a little check that someone hasn't tried to define something
  * contradictory.
  */
 #if (PNG_ZBUF_SIZE > 65536) && defined(PNG_MAX_MALLOC_64K)
diff --git a/pngcrush.c b/pngcrush.c
index 2a5f110..19a14cb 100644
--- a/pngcrush.c
+++ b/pngcrush.c
@@ -1,4 +1,4 @@
-/* crushpng.c - a simple program to recompress png files
+/* pngcrush.c - a simple program to recompress png files
  *
  * This program reads in a PNG image, and writes it out again, with the
  * optimum filter_type and zlib_level.  It uses brute force (trying
@@ -6,20 +6,21 @@
  * levels 3 and 9).  It does the most time-consuming method last in case
  * it turns out to be the best.
  *
- * Optionally, it can remove unwanted chunks or add gAMA and sRGB chunks.
+ * Optionally, it can remove unwanted chunks or add gAMA, sRGB, bKGD,
+ * tEXt/zTXt, and tRNS chunks.
  *
- * Uses libpng-1.0.5a.  This program was based upon libpng's pngtest.c.
+ * Uses libpng-1.0.5i.  This program was based upon libpng's pngtest.c.
  *
  * Thanks to Greg Roelofs for various bug fixes, suggestions, and
  * occasionally creating Linux executables.
  */
 
-#define PNGCRUSH_VERSION "1.2.1"
+#define PNGCRUSH_VERSION "1.2.2"
 
 /*
  * COPYRIGHT NOTICE, DISCLAIMER, AND LICENSE:
  *
- * Copyright (c) 1998, 1999, Glenn Randers-Pehrson
+ * Copyright (c) 1998, 1999, Glenn Randers-Pehrson (randeg@alum.rpi.edu)
  *
  * The pngcrush program is supplied "AS IS".  The Author disclaims all
  * warranties, expressed or implied, including, without limitation, the
@@ -45,17 +46,47 @@
  *
  * Version 1.2.*: check for unused alpha channel and ok-to-reduce-depth.
  *   Rearrange palette to put most-used color first and
- *   transparent color second.
+ *   transparent color second.  Finish pplt (partial palette) feature.
  *
- * Version 1.2.2: Add iCCP, iTXt, sCAL, and sPLT support, which are
- *   now supported by libpng.
+ * Version 1.2.*: Use an alternate write function for the trial passes, that
+ *   simply counts bytes rather than actually writing to a file, to save wear
+ *   and tear on disk drives.
+ *
+ * Version 1.2.*: Drop explicit support for pCAL, hIST, sCAL, sPLT, iCCP,
+ *   tIME, and cHRM chunks and handle them as unknown but safe-to-copy, once
+ *   libpng is able to override the unsafe-to-copy status of unknown chunks.
  *
  * Change log:
  *
+ * Version 1.2.2: Added support for handling unknown chunks.
+ *
+ *   pngcrush is now fixed-point only, unless PNG_NO_FLOATING_POINT_SUPPORTED
+ *   is undefined in pngcrush.h.
+ *
+ *   Added support for the iCCP, iTXt, sCAL, and sPLT chunks, which
+ *   are now supported by libpng (since libpng-1.0.5j).  None of these have
+ *   been adequately tested.
+ *
+ *   #ifdef'ed out more unused code (weighted filters and progressive read;
+ *   this saves about 15k in the size of the executable).
+ *
+ *   Moved the special definitions from pngconf.h into a new pngcrush.h
+ *
+ *   Disallow 256-byte compression window size when writing, to work around
+ *   an apparent zlib bug.  Either deflate was producing incorrect results in a
+ *   21x21 4-bit image or inflate was decoding it incorrectly; the uncompressed
+ *   stream is 252 bytes, which is uncomfortably close to the resulting
+ *   256-byte compression  window.  This workaround can be removed when zlib
+ *   is fixed.
+ *
+ *   The "-m method" can be used any of the 124 methods, without having to
+ *   specify the filter, level, and strategy, instead of just the first 10.
+ *
  * Version 1.2.1: Fixed -srgb parameter so it really does take an argument,
  *   and so it continues to use "0" if an integer does not follow the -srgb.
  *   Added "-plte_len n" argument for truncating the PLTE.  Be sure not to
  *   truncate it to less than the greatest index actually appearing in IDAT.
+ *   Built with libpng-1.0.5f.
  *
  * Version 1.2.0: Removed registration requirement.  Added open source
  *   license.  Redefined TOO_FAR=32k in deflate.c.
@@ -128,17 +159,24 @@
 static PNG_CONST char *extension = "_C.png";
 
 static int number_of_open_files;
+static int do_pplt = 0;
+char pplt_string[1024];
 char *ip, *op, *dot;
 char in_string[256];
 char prog_string[256];
 char out_string[256];
 char in_extension[256];
-int text_inputs=0;
+static int text_inputs=0;
 int text_where[10];  /* 0: no text; 1: before PLTE; 2: after PLTE */
-int text_compression[10]; /* 0: tEXt; 1: zTXt */
+int text_compression[10]; /* -1: uncompressed tEXt; 0: compressed zTXt
+                              1: uncompressed iTXt; 2: compressed iTXt */
 char text_text[20480];  /* It would be nice to png_malloc this, but we don't
                          * have a png_ptr yet when we need it. */
 char text_keyword[800];
+#ifdef PNG_iTXt_SUPPORTED
+char text_lang[800];
+char text_lang_key[800];
+#endif
 int best;
 
 char buffer[256];
@@ -180,9 +218,11 @@
 static int methods_specified=0;
 static int intent=-1;
 static int plte_len=-1;
-static double specified_gamma=0.;
-static double force_specified_gamma=0.;
+#ifdef PNG_gAMA_SUPPORTED
+static int specified_gamma=0;
+static int force_specified_gamma=0;
 static int double_gamma=0;
+#endif
 static int names;
 static int have_trns=0;
 static png_uint_16 trns_index=0;
@@ -329,6 +369,8 @@
       char keystroke;
       fprintf(STDERR, "Press [ENTER] key to continue.\n");
       keystroke=(char)getc(stdin);
+      if (keystroke)
+        /* stifle compiler warning */ ;
    }
 }
 #define PNG_CRUSH_CLEANUP \
@@ -376,16 +418,20 @@
            (!strncmp(name,"gIFx",4) && (!strncmp(argv[i],"gifx",4) || allb)) ||
            (!strncmp(name,"hIST",4) && (!strncmp(argv[i],"hist",4) || allb)) ||
            (!strncmp(name,"iCCP",4) && (!strncmp(argv[i],"iccp",4) || allb)) ||
+           (!strncmp(name,"iTXt",4) && (!strncmp(argv[i],"itxt",4) || allb)) ||
+           (!strncmp(name,"iTXt",4) && (!strncmp(argv[i],"text",4)        )) ||
            (!strncmp(name,"oFFs",4) && (!strncmp(argv[i],"offs",4) || allb)) ||
            (!strncmp(name,"pHYs",4) && (!strncmp(argv[i],"phys",4) || allb)) ||
            (!strncmp(name,"pCAL",4) && (!strncmp(argv[i],"pcal",4) || allb)) ||
            (!strncmp(name,"sBIT",4) && (!strncmp(argv[i],"sbit",4) || allb)) ||
+           (!strncmp(name,"sCAL",4) && (!strncmp(argv[i],"scal",4) || allb)) ||
            (!strncmp(name,"sRGB",4) && (!strncmp(argv[i],"srgb",4) || allb)) ||
+           (!strncmp(name,"sPLT",4) && (!strncmp(argv[i],"splt",4) || allb)) ||
            (!strncmp(name,"tEXt",4) && (!strncmp(argv[i],"text",4) || allb)) ||
            (!strncmp(name,"tIME",4) && (!strncmp(argv[i],"time",4) || allb)) ||
-           (!strncmp(name,"tRNS",4) && (!strncmp(argv[i],"trns",4))) ||
-           (!strncmp(name,"zTXt",4) && (!strncmp(argv[i],"text",4) || allb)) ||
-           (!strncmp(name,"zTXt",4) && (!strncmp(argv[i],"ztxt",4))))
+           (!strncmp(name,"tRNS",4) && (!strncmp(argv[i],"trns",4)        )) ||
+           (!strncmp(name,"zTXt",4) && (!strncmp(argv[i],"ztxt",4) || allb)) ||
+           (!strncmp(name,"zTXt",4) && (!strncmp(argv[i],"text",4)        )))
          {
            things_have_changed=1;
            if(verbose > 0 && trial == 1)
@@ -455,7 +501,10 @@
    int lv[MAX_METHODSP1];
    int zs[MAX_METHODSP1];
    int ntrial;
-   double file_gamma=0.;
+   int lev, strat, filt;
+#ifdef PNG_gAMA_SUPPORTED
+   png_fixed_point file_gamma=0;
+#endif
    char *cp;
 
    int i;
@@ -492,7 +541,27 @@
    fm[1]=0; fm[2]=1; fm[4]=0; fm[5]=1; fm[7]=0; fm[8]=1;
    lv[1]=4; lv[2]=4; lv[3]=4; lv[9]=2;
    zs[1]=0; zs[2]=0; zs[5]= 0; zs[6]=0; zs[7]=0; zs[9]=2;
-
+   method=11;
+   for(filt=0; filt<6; filt++)
+     {
+        zs[method]=2;
+        lv[method]=2;
+        fm[method]=filt;
+        method++;
+     }
+   for(lev=1; lev<10; lev++)
+     {
+        for(strat=0; strat<2; strat++)
+        {
+           for(filt=0; filt<6; filt++)
+           {
+              zs[method]=strat;
+              lv[method]=lev;
+              fm[method]=filt;
+              method++;
+           }
+        }
+     }
 
    names=1;
    for (i=1; i<argc; i++)
@@ -503,28 +572,16 @@
       /* try two fast filters */
       {
          methods_specified=1;
-         zs[11]=0;
-         lv[11]=4;
-         fm[11]=0;
-         try_method[11]=0;
-         zs[12]=2;
-         lv[12]=1;
-         fm[12]=5;
-         try_method[12]=0;
+         try_method[16]=0;
+         try_method[53]=0;
       }
    else if(!strncmp(argv[i],"-huffman",8))
       /* try all filters with huffman */
       {
-         int filt;
          methods_specified=1;
-         method=11;
-         for(filt=0; filt<6; filt++)
+         for(method=11; method<16; method++)
          {
-            zs[method]=2;
-            lv[method]=2;
-            fm[method]=filt;
             try_method[method]=0;
-            method++;
          }
       }
 
@@ -543,37 +600,18 @@
       {
          int lev, strat, filt;
          methods_specified=1;
-         method=11;
          brute_force++;
-         brute_force_strategy=0;
-         for (strat=0; strat<3; strat++)
-            brute_force_strategies[strat]=0;
-         brute_force_level=0;
-         brute_force_filter=0;
-         for(filt=0; filt<6; filt++)
-         {
-            zs[method]=2;
-            lv[method]=2;
-            fm[method]=filt;
-            try_method[method]=0;
-            brute_force_filters[filt]=0;
-            method++;
-         }
-         for(lev=1; lev<10; lev++)
-         {
-            brute_force_levels[lev]=0;
-            for(strat=0; strat<2; strat++)
-            {
-               for(filt=0; filt<6; filt++)
-               {
-                  zs[method]=strat;
-                  lv[method]=lev;
-                  fm[method]=filt;
-                  try_method[method]=0;
-                  method++;
-               }
-            }
-         }
+         for(method=11; method < 125; method++)
+              try_method[method]=0;
+         if(brute_force_filter==0)
+           for (filt=0; filt<6; filt++)
+             brute_force_filters[filt]=0;      
+         if(brute_force_level==0)
+           for (lev=0; lev<10; lev++)
+              brute_force_levels[lev]=0;      
+         if(brute_force_strategy == 0)
+           for (strat=0; strat<3; strat++)
+              brute_force_strategies[strat]=0;      
       }
    else if(!strncmp(argv[i],"-bit_depth",10))
       {
@@ -585,11 +623,13 @@
          names++;
          force_output_color_type=atoi(argv[++i]);
       }
+#ifdef PNG_gAMA_SUPPORTED
    else if(!strncmp(argv[i],"-dou",4))
       {
          double_gamma++;
          things_have_changed=1;
       }
+#endif
    else if(!strncmp(argv[i],"-d",2))
       {
          i++;
@@ -617,15 +657,14 @@
             fm[method]=specified_filter;
          else
          {
-            if(brute_force_filter == 0)
-               for (filt=0; filt<6; filt++)
-                  brute_force_filters[filt]=1;      
+            for (filt=0; filt<6; filt++)
+               brute_force_filters[filt]=1;      
             brute_force_filters[specified_filter]=0;
             method=11;
             for(filt=0; filt<6; filt++)
             {
                try_method[method]= brute_force_filters[filt] |
-                       brute_force_strategies[2];
+                  brute_force_strategies[2];
                method++;
             }
             for(lev=1; lev<10; lev++)
@@ -681,12 +720,14 @@
             brute_force_level++;
          }
       }
+#ifdef PNG_gAMA_SUPPORTED
    else if(!strncmp(argv[i],"-g",2))
       {
          names++;
          i++;
-         if (intent < 0) specified_gamma=atof(argv[i]);
+         if (intent < 0) specified_gamma=atoi(argv[i]);
       }
+#endif
    else if(!strncmp(argv[i],"-h",2))
       {
          help++;
@@ -717,16 +758,25 @@
          names++;
          plte_len=atoi(argv[++i]);
       }
+   else if(!strncmp(argv[i],"-pplt",9))
+      {
+         names++;
+         do_pplt++;
+         strcpy(pplt_string,argv[++i]);
+         things_have_changed=1;
+      }
    else if(!strncmp(argv[i],"-p",2))
       {
       pauses++;
       }
+#ifdef PNG_gAMA_SUPPORTED
    else if(!strncmp(argv[i],"-rep",4))
       {
          names++;
-         force_specified_gamma=atof(argv[++i]);
+         force_specified_gamma=atoi(argv[++i]);
          things_have_changed=1;
       }
+#endif
    else if(!strncmp(argv[i],"-res",4))
       {
          names++;
@@ -741,7 +791,9 @@
    else if( !strncmp(argv[i],"-srgb",5) ||
             !strncmp(argv[i],"-sRGB",5))
       {
-         specified_gamma=.45455;
+#ifdef PNG_gAMA_SUPPORTED
+         specified_gamma=45455L;
+#endif
          intent=0;
          i++;
          if(!strncmp(argv[i],"0",1) ||
@@ -760,21 +812,47 @@
          verbose=0;
       }
    else if( !strncmp(argv[i],"-text",5) || !strncmp(argv[i],"-tEXt",5) ||
-            !strncmp(argv[i],"-ztxt",5) || !strncmp(argv[i],"-zTXt",5))
+            !strncmp(argv[i],"-ztxt",5) || !strncmp(argv[i],"-zTXt",5) ||
+            !strncmp(argv[i],"-zitxt",6) || !strncmp(argv[i],"-ziTXt",6) ||
+            !strncmp(argv[i],"-itxt",5) || !strncmp(argv[i],"-iTXt",5))
       {
          if(strlen(argv[i+2]) < 80 && strlen(argv[i+3]) < 2048 &&
             text_inputs < 10)
          {
-         if( !strncmp(argv[i],"-z",2))
+         if( !strncmp(argv[i],"-zi",3))
+         {
+            text_compression[text_inputs] = PNG_ITXT_COMPRESSION_zTXt;
+              names+=2;
+         }
+         else if( !strncmp(argv[i],"-z",2))
             text_compression[text_inputs] = PNG_TEXT_COMPRESSION_zTXt;
-         else
+         else if( !strncmp(argv[i],"-t",2))
             text_compression[text_inputs] = PNG_TEXT_COMPRESSION_NONE;
+         else
+         {
+           text_compression[text_inputs] = PNG_ITXT_COMPRESSION_NONE;
+           names+=2;
+           printf("Adding an iTXt chunk.\n");
+         }
          names+=3;
          if( !strncmp(argv[++i],"b",1))
             text_where[text_inputs]=1;
          if( !strncmp(argv[i],"a",1))
             text_where[text_inputs]=2;
          strcpy(&text_keyword[text_inputs*80],argv[++i]);
+#ifdef PNG_iTXt_SUPPORTED
+         if(text_compression[text_inputs] <= 0)
+           {
+             text_lang[text_inputs*80] = '\0';
+             text_lang_key[text_inputs*80] = '\0';
+           }
+         else
+           {
+             strcpy(&text_lang[text_inputs*80],argv[++i]);
+             /* libpng-1.0.5j and later */
+             strcpy(&text_lang_key[text_inputs*80],argv[++i]);
+           }
+#endif
          strcpy(&text_text[text_inputs*2048],argv[++i]);
          text_inputs++;
          }
@@ -788,6 +866,11 @@
               "keyword exceeds 79 characters or text exceeds 2047 characters\n");
             i+=3;
             names+=3;
+            if( !strncmp(argv[i],"-i",2) || !strncmp(argv[i],"-zi",3))
+            {
+              i+=2;
+              names+=2;
+            }
          }
       }
    else if( !strncmp(argv[i],"-trns",5) ||
@@ -1013,7 +1096,7 @@
      fprintf(STDERR,
        "\n               filter to use with the method specified in the\n");
      fprintf(STDERR,
-       "               preceding '-m method' argument.\n");
+       "               preceding '-m method' or '-brute_force' argument.\n");
      fprintf(STDERR,
        "               0: none; 1-4: use specified filter; 5: adaptive.\n\n");
      }
@@ -1047,10 +1130,12 @@
      fprintf(STDERR,
        "\n               zlib compression level to use with method specified\n");
      fprintf(STDERR,
-       "               with the preceding '-m method' argument.\n\n");
+       "               with the preceding '-m method' or '-brute_force'\n");
+     fprintf(STDERR,
+       "               argument.\n\n");
      }
      fprintf(STDERR,
-       "            -m method [0 through 200]\n");
+       "            -m method [0 through %d]\n",MAX_METHODS);
      if(verbose > 1)
      {
      fprintf(STDERR,
@@ -1192,13 +1277,13 @@
      fprintf(STDERR,
        "\n               Repeat the option (use \"-v -v\") for even more.\n\n");
      fprintf(STDERR,
-       "            -w compression_window_size [32, 16, 8, 4, 2, 1, 512, 256]\n");
+       "            -w compression_window_size [32, 16, 8, 4, 2, 1, 512]\n");
      if(verbose > 1)
      {
      fprintf(STDERR,
        "\n               Size of the sliding compression window, in kbytes\n");
      fprintf(STDERR,
-       "               (or bytes, in case of 512 or 256).  It's best to\n");
+       "               (or bytes, in case of 512).  It's best to\n");
      fprintf(STDERR,
        "               use the default (32) unless you run out of memory.\n");
      fprintf(STDERR,
@@ -1562,15 +1647,15 @@
 #if !defined(PNG_NO_STDIO)
       png_init_io(read_ptr, fpin);
       if(nosave == 0)
-      png_init_io(write_ptr, fpout);
+         png_init_io(write_ptr, fpout);
 #else
       png_set_read_fn(read_ptr, (png_voidp)fpin, png_default_read_data);
       if(nosave == 0)
-      png_set_write_fn(write_ptr, (png_voidp)fpout,  png_default_write_data,
+         png_set_write_fn(write_ptr, (png_voidp)fpout,  png_default_write_data,
 #if defined(PNG_WRITE_FLUSH_SUPPORTED)
-         png_default_flush);
+            png_default_flush);
 #else
-         NULL);
+            NULL);
 #endif
 #endif
 
@@ -1597,15 +1682,24 @@
         (png_bytep)png_malloc(read_ptr, (png_uint_32)read_ptr->zbuf_size);
       }
       if(nosave == 0)
-      if(write_ptr->zbuf_size > (png_size_t)max_idat_size)
-      {
-      if(verbose > 2)
-         printf("reinitializing write zbuf.\n");
-      png_free(write_ptr, write_ptr->zbuf);
-      write_ptr->zbuf_size = (png_size_t)max_idat_size;
-      write_ptr->zbuf =
-        (png_bytep)png_malloc(write_ptr, (png_uint_32)write_ptr->zbuf_size);
-      }
+         if(write_ptr->zbuf_size > (png_size_t)max_idat_size)
+         {
+            if (verbose > 2)
+            printf("reinitializing write zbuf.\n");
+            png_free(write_ptr, write_ptr->zbuf);
+            write_ptr->zbuf_size = (png_size_t)max_idat_size;
+            write_ptr->zbuf =
+              (png_bytep)png_malloc(write_ptr,
+                 (png_uint_32)write_ptr->zbuf_size);
+         }
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+      png_set_keep_unknown_chunks(read_ptr, 2, (png_bytep)NULL, 0);
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+      if(nosave == 0)
+         png_set_keep_unknown_chunks(write_ptr, 1, (png_bytep)NULL, 0);
+#endif
 
       png_debug(0, "Reading info struct\n");
       png_read_info(read_ptr, read_info_ptr);
@@ -1647,9 +1741,15 @@
             if((color_type == 2 || color_type == 6 || color_type == 3) &&
               (output_color_type == 0 || output_color_type == 4))
             {
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
                png_set_rgb_to_gray(read_ptr, 1, 54./255., 183./255.);
                if(output_bit_depth < 8)output_bit_depth=8;
                if(color_type == 3) need_expand = 1;
+#else
+               printf("  Cannot reduce color image to grayscale unless\n");
+               printf("  pngcrush is rebuilt with floating point support \n");
+               output_color_type=input_color_type;
+#endif
             }
            
             if(color_type != 3 && output_color_type == 3)
@@ -1717,8 +1817,12 @@
 
                required_window=(int)(height*((width*channels*bit_depth+15)>>3));
 
+#ifdef WBITS_8_OK
                if     (required_window <=   256)compression_window =  8;
                else if(required_window <=   512)compression_window =  9;
+#else
+               if     (required_window <=   512)compression_window =  9;
+#endif
                else if(required_window <=  1024)compression_window = 10;
                else if(required_window <=  2048)compression_window = 11;
                else if(required_window <=  4096)compression_window = 12;
@@ -1776,52 +1880,57 @@
 #endif
 #if defined(PNG_READ_cHRM_SUPPORTED) && defined(PNG_WRITE_cHRM_SUPPORTED)
       {
-         double white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y;
+         png_fixed_point white_x, white_y, red_x, red_y, green_x, green_y,
+            blue_x, blue_y;
 
-         if (png_get_cHRM(read_ptr, read_info_ptr, &white_x, &white_y, &red_x,
-            &red_y, &green_x, &green_y, &blue_x, &blue_y))
+         if (png_get_cHRM_fixed(read_ptr, read_info_ptr, &white_x, &white_y,
+            &red_x, &red_y, &green_x, &green_y, &blue_x, &blue_y))
          {
             if(keep_chunk("cHRM",argv))
-            png_set_cHRM(write_ptr, write_info_ptr, white_x, white_y, red_x,
-               red_y, green_x, green_y, blue_x, blue_y);
+            png_set_cHRM_fixed(write_ptr, write_info_ptr, white_x, white_y,
+               red_x, red_y, green_x, green_y, blue_x, blue_y);
          }
       }
 #endif
 #if defined(PNG_READ_gAMA_SUPPORTED) && defined(PNG_WRITE_gAMA_SUPPORTED)
       {
-         if(force_specified_gamma > 0.)
+         if(force_specified_gamma > 0)
          {
             if(trial == 1)
             {
                things_have_changed=1;
                if(verbose > 0)
-                 fprintf(STDERR, "   Inserting gAMA chunk with gamma=%f\n",
+                 fprintf(STDERR,
+                "   Inserting gAMA chunk with gamma=(%d/100000)\n",
                     force_specified_gamma);
             }
-            png_set_gAMA(write_ptr, write_info_ptr, force_specified_gamma);
-            file_gamma=force_specified_gamma;
+            png_set_gAMA_fixed(write_ptr, write_info_ptr, 
+               (png_fixed_point)force_specified_gamma);
+            file_gamma=(png_fixed_point)force_specified_gamma;
          }
-         else if (png_get_gAMA(read_ptr, read_info_ptr, &file_gamma))
+         else if (png_get_gAMA_fixed(read_ptr, read_info_ptr, &file_gamma))
          {
             if(keep_chunk("gAMA",argv))
             {
                if(verbose > 1 && trial == 1)
-                 fprintf(STDERR, "   gamma=%f\n", file_gamma);
+                 fprintf(STDERR, "   gamma=(%lu/100000)\n", file_gamma);
                if(double_gamma)file_gamma+=file_gamma;
-               png_set_gAMA(write_ptr, write_info_ptr, file_gamma);
+               png_set_gAMA_fixed(write_ptr, write_info_ptr, file_gamma);
             }
          }
-         else if(specified_gamma > 0.)
+         else if(specified_gamma > 0)
          {
             if(trial == 1)
             {
                things_have_changed=1;
                if(verbose > 0)
-                 fprintf(STDERR, "   Inserting gAMA chunk with gamma=%f\n",
+                 fprintf(STDERR,
+                 "   Inserting gAMA chunk with gamma=(%d/100000)\n",
                     specified_gamma);
             }
-            png_set_gAMA(write_ptr, write_info_ptr, specified_gamma);
-            file_gamma=specified_gamma;
+            png_set_gAMA_fixed(write_ptr, write_info_ptr,
+               (png_fixed_point)specified_gamma);
+            file_gamma=(png_fixed_point)specified_gamma;
          }
       }
 #endif
@@ -1836,14 +1945,15 @@
          }
          else if(intent >= 0)
          {
-            if((int)(file_gamma*100 + .1) == 45)
+#ifdef PNG_gAMA_SUPPORTED
+            if(file_gamma > 45000L && file_gamma < 46000L)
             {
                things_have_changed=1;
                if(trial == 1)
                fprintf(STDERR, "   Inserting sRGB chunk with intent=%d\n",intent);
                png_set_sRGB(write_ptr, write_info_ptr, intent);
             }
-            else if((int)(file_gamma*100) == 0)
+            else if(file_gamma == 0)
             {
                things_have_changed=1;
                png_set_sRGB_gAMA_and_cHRM(write_ptr, write_info_ptr, intent);
@@ -1853,10 +1963,11 @@
                if(trial == 1)
                {
                   fprintf(STDERR,
-               "   Ignoring sRGB request because gamma=%f is not approx. 0.45\n",
+          "   Ignoring sRGB request; gamma=(%lu/100000) is not approx. 0.455\n",
                    file_gamma);
                }
             }
+#endif
          }
       }
 #endif
@@ -1871,6 +1982,22 @@
          }
       }
 #endif
+#if defined(PNG_iCCP_SUPPORTED)
+   {
+      png_charp name;
+      png_charp profile;
+      png_int_32 proflen;
+      int compression_type;
+
+      if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_type, 
+                      &profile, &proflen))
+      {
+         if(keep_chunk("iCCP",argv))
+            png_set_iCCP(write_ptr, write_info_ptr, name, compression_type, 
+                      profile, proflen);
+      }
+   }
+#endif
 #if defined(PNG_READ_oFFs_SUPPORTED) && defined(PNG_WRITE_oFFs_SUPPORTED)
       {
          png_int_32 offset_x, offset_y;
@@ -2035,6 +2162,10 @@
      {
         if (plte_len > 0)
            num_palette=plte_len;
+        if (do_pplt != 0)
+        {
+           printf("PPLT: %s\n",pplt_string);
+        }
         if(output_color_type == 3)
            png_set_PLTE(write_ptr, write_info_ptr, palette, num_palette);
         else if(keep_chunk("PLTE",argv))
@@ -2079,8 +2210,34 @@
          }
       }
 #endif
-#if (defined(PNG_READ_tEXt_SUPPORTED) && defined(PNG_WRITE_tEXt_SUPPORTED)) || \
-       (defined(PNG_READ_zTXt_SUPPORTED) && defined(PNG_WRITE_zTXt_SUPPORTED))
+#if defined(PNG_sCAL_SUPPORTED)
+   {
+      int unit;
+      png_charp width, height;
+
+      if (png_get_sCAL_s(read_ptr, read_info_ptr, &unit, &width, &height))
+      {
+         if(keep_chunk("sCAL",argv))
+            png_set_sCAL_s(write_ptr, write_info_ptr, unit, width, height);
+      }
+   }
+#endif
+#if defined(PNG_sPLT_SUPPORTED)
+   {
+      png_spalette_p entries;
+      int num_entries;
+
+      num_entries = (int)png_get_spalettes(read_ptr, read_info_ptr, &entries);
+      if (num_entries)
+      {
+         if(keep_chunk("sPLT",argv))
+            png_set_spalettes(write_ptr, write_info_ptr, entries, num_entries);
+         png_free_spalettes(read_ptr, read_info_ptr, num_entries);
+      }
+   }
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
       {
          png_textp text_ptr;
          int num_text=0;
@@ -2093,17 +2250,26 @@
 
             if (verbose > 1 && trial == 1 && num_text > 0)
             {
-               fprintf(STDERR,"before IDAT, num_text= %d",num_text);
                for (ntext = 0; ntext < num_text; ntext++)
                {
-                  fprintf(STDERR,"%d  %s: ",ntext,text_ptr[ntext].key);
-                  fprintf(STDERR,"%s\n",text_ptr[ntext].text);
+                  fprintf(STDERR,"%d  %s",ntext,text_ptr[ntext].key);
+                  if(text_ptr[ntext].text_length != 0)
+                     fprintf(STDERR,": %s\n",text_ptr[ntext].text);
+                  else if (text_ptr[ntext].itxt_length != 0)
+                  {
+                     fprintf(STDERR," (%s: %s): \n",
+                          text_ptr[ntext].lang,
+                          text_ptr[ntext].lang_key);
+                     fprintf(STDERR,"%s\n",text_ptr[ntext].text);
+                  }
+                  else
+                     fprintf(STDERR,"\n");
                }
             }
 
             if(num_text > 0)
             {
-               if(keep_chunk("tEXt/zTXt",argv))
+               if(keep_chunk("text",argv))
                   png_set_text(write_ptr, write_info_ptr, text_ptr, num_text);
             }
             for (ntext=0; ntext<text_inputs; ntext++)
@@ -2114,12 +2280,20 @@
                     added_text = (png_textp)
                        png_malloc(write_ptr, (png_uint_32)sizeof(png_text));
                     added_text[0].key = &text_keyword[ntext*80];
+                    added_text[0].lang = &text_lang[ntext*80];
+                    added_text[0].lang_key = &text_lang_key[ntext*80];
                     added_text[0].text = &text_text[ntext*2048];
                     added_text[0].compression = text_compression[ntext];
-                    added_text[0].text_length = (png_size_t)strlen
-                        (&text_text[ntext*2048]);
                     png_set_text(write_ptr, write_info_ptr, added_text, 1);
                     png_free(write_ptr,added_text);
+                    if(added_text[0].compression < 0)
+                       printf("Added a tEXt chunk.\n");
+                    else if(added_text[0].compression == 0)
+                       printf("Added a zTXt chunk.\n");
+                    else if(added_text[0].compression == 1)
+                       printf("Added an uncompressed iTXt chunk.\n");
+                    else
+                       printf("Added a compressed iTXt chunk.\n");
                   }
               }
          }
@@ -2152,6 +2326,23 @@
       else                       png_set_filter(write_ptr,0,PNG_FILTER_NONE);
 
 
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+      {
+         png_unknown_chunkp unknowns;
+         int num_unknowns = (int)png_get_unknown_chunks(read_ptr, read_info_ptr,
+            &unknowns);
+         if (num_unknowns)
+         {
+            png_size_t i;
+            png_set_unknown_chunks(write_ptr, write_info_ptr, unknowns,
+              num_unknowns);
+            for (i = 0; i < read_info_ptr->unknown_chunks_num; i++)
+              write_info_ptr->unknown_chunks[i].location =
+                 unknowns[i].location;
+         }
+      }
+#endif
+
       if(verbose > 2)
       printf("writing info structure.\n");
       png_crush_pause();
@@ -2277,6 +2468,13 @@
       }
 #endif
 
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   png_free_unknown_chunks(read_ptr, read_info_ptr, -1);
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   png_free_unknown_chunks(write_ptr, write_info_ptr, -1);
+#endif
+
       png_debug(0, "Reading and writing end_info data\n");
       png_read_end(read_ptr, end_info_ptr);
 
@@ -2296,15 +2494,26 @@
             {
                for (ntext = 0; ntext < num_text; ntext++)
                {
-                  fprintf(STDERR,"%d  %s: ",ntext,text_ptr[ntext].key);
-                  fprintf(STDERR,"%s\n",text_ptr[ntext].text);
+                  fprintf(STDERR,"%d  %s",ntext,text_ptr[ntext].key);
+                  if(text_ptr[ntext].text_length != 0)
+                     fprintf(STDERR,": %s\n",text_ptr[ntext].text);
+                  else if (text_ptr[ntext].itxt_length != 0)
+                  {
+                     fprintf(STDERR," (%s: %s): \n",
+                          text_ptr[ntext].lang,
+                          text_ptr[ntext].lang_key);
+                     fprintf(STDERR,"%s\n",text_ptr[ntext].text);
+                  }
+                  else
+                     fprintf(STDERR,"\n");
                }
             }
 
             if(num_text > 0)
             {
-               if(keep_chunk("tEXt/zTXt",argv))
-                  png_set_text(write_ptr, write_end_info_ptr, text_ptr, num_text);
+               if(keep_chunk("text",argv))
+                  png_set_text(write_ptr, write_end_info_ptr, text_ptr,
+                      num_text);
             }
             for (ntext=0; ntext<text_inputs; ntext++)
               {
@@ -2314,12 +2523,20 @@
                     added_text = (png_textp)
                       png_malloc(write_ptr, (png_uint_32)sizeof(png_text));
                     added_text[0].key = &text_keyword[ntext*80];
+                    added_text[0].lang = &text_lang[ntext*80];
+                    added_text[0].lang_key = &text_lang_key[ntext*80];
                     added_text[0].text = &text_text[ntext*2048];
                     added_text[0].compression = text_compression[ntext];
-                    added_text[0].text_length = (png_size_t)strlen
-                        (&text_text[ntext*2048]);
                     png_set_text(write_ptr, write_end_info_ptr, added_text, 1);
                     png_free(write_ptr,added_text);
+                    if(added_text[0].compression < 0)
+                       printf("Added a tEXt chunk after IDAT.\n");
+                    else if(added_text[0].compression == 0)
+                       printf("Added a zTXt chunk after IDAT.\n");
+                    else if(added_text[0].compression == 1)
+                       printf("Added an uncompressed iTXt chunk after IDAT.\n");
+                    else
+                       printf("Added a compressed iTXt chunk after IDAT.\n");
                   }
               }
          }
@@ -2337,6 +2554,23 @@
       }
 #endif
 
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   {
+      png_unknown_chunkp unknowns;
+      int num_unknowns = (int)png_get_unknown_chunks(read_ptr, read_info_ptr,
+         &unknowns);
+      if (num_unknowns && nosave == 0)
+      {
+         png_size_t i;
+         png_set_unknown_chunks(write_ptr, write_end_info_ptr, unknowns,
+           num_unknowns);
+         for (i = 0; i < read_info_ptr->unknown_chunks_num; i++)
+           write_end_info_ptr->unknown_chunks[i].location =
+              unknowns[i].location;
+      }
+   }
+#endif
+
       if(nosave == 0)
          png_write_end(write_ptr, write_end_info_ptr);
 
@@ -2347,11 +2581,7 @@
       png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
       if(nosave == 0)
       {
-      png_destroy_info_struct(write_ptr, &write_end_info_ptr);
-      }
-     
-      if(nosave == 0)
-      {
+         png_destroy_info_struct(write_ptr, &write_end_info_ptr);
          png_destroy_write_struct(&write_ptr, &write_info_ptr);
       }
       read_ptr=NULL;
diff --git a/pngcrush.h b/pngcrush.h
new file mode 100644
index 0000000..5e48ecf
--- /dev/null
+++ b/pngcrush.h
@@ -0,0 +1,30 @@
+/* pngcrush.h */
+
+/* Special defines for pngcrush, mostly just to reduce the size of the
+   static executable. */
+
+#define PNG_NO_FLOATING_POINT_SUPPORTED /* undef this if you want to be able
+                                           to reduce color to gray */
+#define PNG_NO_EASY_ACCESS
+#define PNG_NO_READ_EMPTY_PLTE
+#define PNG_NO_WRITE_TRANSFORMS
+#define PNG_NO_PROGRESSIVE_READ
+#define PNG_NO_WRITE_WEIGHTED_FILTER
+#define PNG_READ_USER_TRANSFORM_SUPPORTED
+#define PNG_READ_STRIP_ALPHA_SUPPORTED
+#define PNG_READ_EXPAND_SUPPORTED
+#define PNG_READ_FILLER_SUPPORTED
+#ifndef PNG_NO_FLOATING_POINT_SUPPORTED
+#  define PNG_READ_GRAY_TO_RGB_SUPPORTED
+#  define PNG_READ_RGB_TO_GRAY_SUPPORTED
+#  define PNG_READ_BACKGROUND_SUPPORTED
+#  define PNG_READ_GAMMA_SUPPORTED
+#else
+#  define PNG_NO_READ_RGB_TO_GRAY
+#endif
+#define PNG_ZBUF_SIZE 524288       /* increases the IDAT size */
+/*
+#define PNG_NO_GLOBAL_ARRAYS
+*/
+#define TOO_FAR 32767     /* Improves zlib/deflate compression */
+
diff --git a/pngerror.c b/pngerror.c
index b1674eb..6ebc5aa 100644
--- a/pngerror.c
+++ b/pngerror.c
@@ -1,7 +1,7 @@
 
 /* pngerror.c - stub functions for i/o and memory allocation
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
diff --git a/pnggccrd.c b/pnggccrd.c
new file mode 100644
index 0000000..6e9db58
--- /dev/null
+++ b/pnggccrd.c
@@ -0,0 +1,4633 @@
+/* pnggccrd.c - mixed C/assembler version of utilities to read a PNG file
+ *
+ * For Intel x86 CPU (Pentium-MMX or later) and GNU C compiler.
+ *
+ *     See http://www.intel.com/drg/pentiumII/appnotes/916/916.htm
+ *     and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm
+ *     for Intel's performance analysis of the MMX vs. non-MMX code.
+ *
+ * libpng 1.0.5 - October 15, 1999
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998, Intel Corporation
+ * Copyright (c) 1998, 1999 Glenn Randers-Pehrson
+ *
+ * Based on MSVC code contributed by Nirav Chhatrapati, Intel Corp., 1998.
+ * Interface to libpng contributed by Gilles Vollant, 1999.
+ * GNU C port by Greg Roelofs, 1999.
+ *
+ * Lines 2350-4300 converted in place with intel2gas 1.3.1:
+ *
+ *   intel2gas -mdI pnggccrd.c.partially-msvc -o pnggccrd.c
+ *
+ * and then cleaned up by hand.  See http://hermes.terminal.at/intel2gas/ .
+ *
+ * NOTE:  A sufficiently recent version of GNU as (or as.exe under DOS/Windows)
+ *        is required to assemble the newer MMX instructions such as movq.
+ *        For djgpp, see
+ *
+ *           ftp://ftp.cdrom.com/pub/simtelnet/gnu/djgpp/v2gnu/bnu281b.zip
+ *
+ *        (or a later version in the same directory).  For Linux, check your
+ *        distribution's web site(s) or try these links:
+ *
+ *           http://rufus.w3.org/linux/RPM/binutils.html
+ *           http://www.debian.org/Packages/stable/devel/binutils.html
+ *           ftp://ftp.cdrom.com/pub/linux/slackware/slakware/d1/binutils.tgz
+ *
+ *        For other platforms, see the main GNU site:
+ *
+ *           ftp://ftp.gnu.org/pub/gnu/binutils/
+ *
+ *        Version 2.5.2l.15 is definitely too old...
+ */
+#if 0
+// GRR NOTES
+//
+// 19991006:
+//  - fixed sign error in post-MMX cleanup code (16- & 32-bit cases)
+//
+// 19991007:
+//  - additional optimizations (possible or definite):
+//     x [DONE] write MMX code for 64-bit case (pixel_bytes == 8) [not tested]
+//     - write MMX code for 48-bit case (pixel_bytes == 6)
+//     - figure out what's up with 24-bit case (pixel_bytes == 3):
+//        why subtract 8 from width_mmx in the pass 4/5 case?
+//        (only width_mmx case)
+//     x [DONE] replace pixel_bytes within each block with the true
+//        constant value (or are compilers smart enough to do that?)
+//     - rewrite all MMX interlacing code so it's aligned with
+//        the *beginning* of the row buffer, not the end.  This
+//        would not only allow one to eliminate half of the memory
+//        writes for odd passes (i.e., pass == odd), it may also
+//        eliminate some unaligned-data-access exceptions (assuming
+//        there's a penalty for not aligning 64-bit accesses on
+//        64-bit boundaries).  The only catch is that the "leftover"
+//        pixel(s) at the end of the row would have to be saved,
+//        but there are enough unused MMX registers in every case,
+//        so this is not a problem.  A further benefit is that the
+//        post-MMX cleanup code (C code) in at least some of the
+//        cases could be done within the assembler block.
+//  x [DONE] the "v3 v2 v1 v0 v7 v6 v5 v4" comments are confusing,
+//     inconsistent, and don't match the MMX Programmer's Reference
+//     Manual conventions anyway.  They should be changed to
+//     "b7 b6 b5 b4 b3 b2 b1 b0," where b0 indicates the byte that
+//     was lowest in memory (e.g., corresponding to a left pixel)
+//     and b7 is the byte that was highest (e.g., a right pixel).
+//
+// 19991016:
+//  - Brennan's Guide notwithstanding, gcc under Linux does *not*
+//     want globals prefixed by underscores when referencing them--
+//     i.e., if the variable is const4, then refer to it as const4,
+//     not _const4.  This seems to be a djgpp-specific requirement.
+//     Also, such variables apparently *must* be declared outside
+//     of functions; neither static nor automatic variables work if
+//     defined within the scope of a single function, but both
+//     static and truly global (multi-module) variables work fine.
+//
+// 19991023:
+//  - fixed png_combine_row() non-MMX replication bug (odd passes only?)
+//  - switched from string-concatenation-with-macros to cleaner method of
+//     renaming global variables for djgpp--i.e., always use prefixes in
+//     inlined assembler code (== strings) and conditionally rename the
+//     variables, not the other way around.  Hence _const4, _mask8_0, etc.
+//
+// 19991024:
+//  - fixed mmxsupport()/png_do_interlace() first-row bug
+//     This one was severely weird:  even though mmxsupport() doesn't touch
+//     ebx (where "row" pointer was stored), it nevertheless managed to zero
+//     the register (even in static/non-fPIC code--see below), which in turn
+//     caused png_do_interlace() to return prematurely on the first row of
+//     interlaced images (i.e., without expanding the interlaced pixels).
+//     Inspection of the generated assembly code didn't turn up any clues,
+//     although it did point at a minor optimization (i.e., get rid of
+//     mmx_supported_local variable and just use eax).  Possibly the CPUID
+//     instruction is more destructive than it looks?  (Not yet checked.)
+//  - "info gcc" was next to useless, so compared fPIC and non-fPIC assembly
+//     listings...  Apparently register spillage has to do with ebx, since
+//     it's used to index the global offset table.  Commenting it out of the
+//     input-reg lists in png_combine_row() eliminated compiler barfage, so
+//     ifdef'd with __PIC__ macro:  if defined, use a global for unmask
+//
+// 19991107:
+//  - verified CPUID clobberage:  12-char string constant ("GenuineIntel",
+//     "AuthenticAMD", etc.) placed in EBX:ECX:EDX.  Still need to polish.
+//
+// 19991120:
+//  - made "diff" variable (now "_dif") global to simplify conversion of
+//     filtering routines (running out of regs, sigh).  "diff" is still used
+//     in interlacing routines, however.
+//  - fixed up both versions of mmxsupport() (ORIG_THAT_USED_TO_CLOBBER_EBX
+//     macro determines which is used); original not yet tested.
+#endif
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGGCCRD)
+
+int mmxsupport(void);
+
+static int mmx_supported = 2;
+
+// djgpp adds its own underscores to global variables, so define them without:
+#ifdef __DJGPP__
+#  define _unmask      unmask
+#  define _const4      const4
+#  define _const6      const6
+#  define _mask8_0     mask8_0  
+#  define _mask16_1    mask16_1 
+#  define _mask16_0    mask16_0 
+#  define _mask24_2    mask24_2 
+#  define _mask24_1    mask24_1 
+#  define _mask24_0    mask24_0 
+#  define _mask32_3    mask32_3 
+#  define _mask32_2    mask32_2 
+#  define _mask32_1    mask32_1 
+#  define _mask32_0    mask32_0 
+#  define _mask48_5    mask48_5 
+#  define _mask48_4    mask48_4 
+#  define _mask48_3    mask48_3 
+#  define _mask48_2    mask48_2 
+#  define _mask48_1    mask48_1 
+#  define _mask48_0    mask48_0 
+#  define _FullLength  FullLength
+#  define _MMXLength   MMXLength
+#  define _dif         dif
+#endif
+
+/* These constants are used in the inlined MMX assembly code.
+   Ignore gcc's "At top level: defined but not used" warnings. */
+
+#ifdef __PIC__
+static int _unmask;     // not enough regs when compiling with -fPIC, so...
+#endif
+
+static unsigned long long _mask8_0  = 0x0102040810204080LL;
+
+static unsigned long long _mask16_1 = 0x0101020204040808LL;
+static unsigned long long _mask16_0 = 0x1010202040408080LL;
+
+static unsigned long long _mask24_2 = 0x0101010202020404LL;
+static unsigned long long _mask24_1 = 0x0408080810101020LL;
+static unsigned long long _mask24_0 = 0x2020404040808080LL;
+
+static unsigned long long _mask32_3 = 0x0101010102020202LL;
+static unsigned long long _mask32_2 = 0x0404040408080808LL;
+static unsigned long long _mask32_1 = 0x1010101020202020LL;
+static unsigned long long _mask32_0 = 0x4040404080808080LL;
+
+static unsigned long long _mask48_5 = 0x0101010101010202LL;
+static unsigned long long _mask48_4 = 0x0202020204040404LL;
+static unsigned long long _mask48_3 = 0x0404080808080808LL;
+static unsigned long long _mask48_2 = 0x1010101010102020LL;
+static unsigned long long _mask48_1 = 0x2020202040404040LL;
+static unsigned long long _mask48_0 = 0x4040808080808080LL;
+
+static unsigned long long _const4   = 0x0000000000FFFFFFLL;
+//static unsigned long long _const5 = 0x000000FFFFFF0000LL;     // NOT USED
+static unsigned long long _const6   = 0x00000000000000FFLL;
+
+// These are used in the row-filter routines and should/would be local
+//  variables if not for gcc addressing limitations.
+
+static png_uint_32  _FullLength;
+static png_uint_32  _MMXLength;
+static int          _dif;
+
+
+void
+png_read_filter_row_c(png_structp png_ptr, png_row_infop row_info,
+   png_bytep row, png_bytep prev_row, int filter);
+
+
+#if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW)
+
+/* Combines the row recently read in with the previous row.
+   This routine takes care of alpha and transparency if requested.
+   This routine also handles the two methods of progressive display
+   of interlaced images, depending on the mask value.
+   The mask value describes which pixels are to be combined with
+   the row.  The pattern always repeats every 8 pixels, so just 8
+   bits are needed.  A one indicates the pixel is to be combined; a
+   zero indicates the pixel is to be skipped.  This is in addition
+   to any alpha or transparency value associated with the pixel.
+   If you want all pixels to be combined, pass 0xff (255) in mask. */
+
+/* Use this routine for the x86 platform - it uses a faster MMX routine
+   if the machine supports MMX. */
+
+void
+png_combine_row(png_structp png_ptr, png_bytep row, int mask)
+{
+   png_debug(1,"in png_combine_row_asm\n");
+
+   if (mmx_supported == 2)
+       mmx_supported = mmxsupport();
+
+/*
+fprintf(stderr, "GRR DEBUG:  png_combine_row() pixel_depth = %d, mask = 0x%02x, unmask = 0x%02x\n", png_ptr->row_info.pixel_depth, mask, ~mask);
+fflush(stderr);
+ */
+   if (mask == 0xff)
+   {
+      png_memcpy(row, png_ptr->row_buf + 1,
+       (png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3));
+   }
+   /* GRR:  add "else if (mask == 0)" case?
+    *       or does png_combine_row() not even get called in that case? */
+   else
+   {
+      switch (png_ptr->row_info.pixel_depth)
+      {
+         case 1:        // png_ptr->row_info.pixel_depth
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_inc, s_start, s_end;
+            int m;
+            int shift;
+            png_uint_32 i;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+            else
+#endif
+            {
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  int value;
+
+                  value = (*sp >> shift) & 0x1;
+                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+
+         case 2:        // png_ptr->row_info.pixel_depth
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_start, s_end, s_inc;
+            int m;
+            int shift;
+            png_uint_32 i;
+            int value;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+            else
+#endif
+            {
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0x3;
+                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+
+         case 4:        // png_ptr->row_info.pixel_depth
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_start, s_end, s_inc;
+            int m;
+            int shift;
+            png_uint_32 i;
+            int value;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+            else
+#endif
+            {
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0xf;
+                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+
+         case 8:        // png_ptr->row_info.pixel_depth
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            if (mmx_supported)
+            {
+               png_uint_32 len;
+               int diff;
+#ifndef __PIC__
+               int unmask = ~mask;
+#else
+               _unmask = ~mask;            // global variable for -fPIC version
+#endif
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width &~7;  // reduce to multiple of 8
+               diff = png_ptr->width & 7;  // amount lost
+
+               __asm__ (
+#ifdef __PIC__
+                  "movd      _unmask, %%mm7  \n\t" // load bit pattern
+#else
+// preload        "movd      unmask, %%mm7   \n\t" // (unmask is in ebx)
+                  "movd      %%ebx, %%mm7    \n\t" // load bit pattern (unmask)
+#endif
+                  "psubb     %%mm6, %%mm6    \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7    \n\t"
+                  "punpcklwd %%mm7, %%mm7    \n\t"
+                  "punpckldq %%mm7, %%mm7    \n\t" // fill reg with 8 masks
+
+                  "movq      _mask8_0, %%mm0 \n\t"
+                  "pand      %%mm7, %%mm0    \n\t" // nonzero if keep byte
+                  "pcmpeqb   %%mm6, %%mm0    \n\t" // zeros->1s, v versa
+
+// preload        "movl      len, %%ecx      \n\t" // load length of line
+// preload        "movl      srcptr, %%esi   \n\t" // load source
+// preload        "movl      dstptr, %%edi   \n\t" // load dest
+
+                  "cmpl      $0, %%ecx       \n\t" // len == 0 ?
+                  "je        mainloop8end    \n\t"
+
+                "mainloop8:                  \n\t"
+                  "movq      (%%esi), %%mm4  \n\t" // *srcptr
+                  "pand      %%mm0, %%mm4    \n\t"
+                  "movq      %%mm0, %%mm6    \n\t"
+                  "pandn     (%%edi), %%mm6  \n\t" // *dstptr
+                  "por       %%mm6, %%mm4    \n\t"
+                  "movq      %%mm4, (%%edi)  \n\t"
+                  "addl      $8, %%esi       \n\t" // inc by 8 bytes processed
+                  "addl      $8, %%edi       \n\t"
+                  "subl      $8, %%ecx       \n\t" // dec by 8 pixels processed
+                  "ja        mainloop8       \n\t"
+
+                "mainloop8end:               \n\t"
+// preload        "movl      diff, %%ecx     \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx    \n\t"
+                  "cmpl      $0, %%ecx       \n\t"
+                  "jz        end8            \n\t"
+// preload        "movl      mask, %%edx     \n\t"
+                  "sall      $24, %%edx      \n\t" // make low byte, high byte
+
+                "secondloop8:                \n\t"
+                  "sall      %%edx           \n\t" // move high bit to CF
+                  "jnc       skip8           \n\t" // if CF = 0
+                  "movb      (%%esi), %%al   \n\t"
+                  "movb      %%al, (%%edi)   \n\t"
+
+                "skip8:                      \n\t"
+                  "incl      %%esi           \n\t"
+                  "incl      %%edi           \n\t"
+                  "decl      %%ecx           \n\t"
+                  "jnz       secondloop8     \n\t"
+
+                "end8:                       \n\t"
+                  "EMMS                      \n\t"  // DONE
+
+                  :                                 // output regs (none)
+
+                  : "S" (srcptr),      // esi       // input regs
+                    "D" (dstptr),      // edi
+                    "a" (diff),        // eax
+#ifndef __PIC__
+                    "b" (unmask),      // ebx       // Global Offset Table idx
+#endif
+                    "c" (len),         // ecx
+                    "d" (mask)         // edx
+
+                  : "%esi", "%edi", "%eax",         // clobber list
+                    "%ecx", "%edx"
+#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+                  , "%mm0", "%mm4", "%mm6", "%mm7"
+#endif
+               );
+            }
+            else /* mmx _not supported - Use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = png_pass_start[png_ptr->pass];
+                 // png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+               register int stride = png_pass_inc[png_ptr->pass];
+                 // png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+               register int rep_bytes = png_pass_width[png_ptr->pass];
+                 // png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+               register png_uint_32 final_val = png_ptr->width;
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               } 
+            } /* end of else */
+
+            break;
+         }       // end 8 bpp
+
+         case 16:       // png_ptr->row_info.pixel_depth
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            if (mmx_supported)
+            {
+               png_uint_32 len;
+               int diff;
+#ifndef __PIC__
+               int unmask = ~mask;
+#else
+               _unmask = ~mask;            // global variable for -fPIC version
+#endif
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width &~7;  // reduce to multiple of 8
+               diff = png_ptr->width & 7;  // amount lost
+
+               __asm__ (
+#ifdef __PIC__
+                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
+#else
+// preload        "movd      unmask, %%mm7    \n\t" // (unmask is in ebx)
+                  "movd      %%ebx, %%mm7     \n\t" // load bit pattern (unmask)
+#endif
+                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7     \n\t"
+                  "punpcklwd %%mm7, %%mm7     \n\t"
+                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
+
+                  "movq      _mask16_0, %%mm0 \n\t"
+                  "movq      _mask16_1, %%mm1 \n\t"
+
+                  "pand      %%mm7, %%mm0     \n\t"
+                  "pand      %%mm7, %%mm1     \n\t"
+
+                  "pcmpeqb   %%mm6, %%mm0     \n\t"
+                  "pcmpeqb   %%mm6, %%mm1     \n\t"
+
+// preload        "movl      len, %%ecx       \n\t" // load length of line
+// preload        "movl      srcptr, %%esi    \n\t" // load source
+// preload        "movl      dstptr, %%edi    \n\t" // load dest
+
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        mainloop16end    \n\t"
+
+                "mainloop16:                  \n\t"
+                  "movq      (%%esi), %%mm4   \n\t"
+                  "pand      %%mm0, %%mm4     \n\t"
+                  "movq      %%mm0, %%mm6     \n\t"
+                  "movq      (%%edi), %%mm7   \n\t"
+                  "pandn     %%mm7, %%mm6     \n\t"
+                  "por       %%mm6, %%mm4     \n\t"
+                  "movq      %%mm4, (%%edi)   \n\t"
+
+                  "movq      8(%%esi), %%mm5  \n\t"
+                  "pand      %%mm1, %%mm5     \n\t"
+                  "movq      %%mm1, %%mm7     \n\t"
+                  "movq      8(%%edi), %%mm6  \n\t"
+                  "pandn     %%mm6, %%mm7     \n\t"
+                  "por       %%mm7, %%mm5     \n\t"
+                  "movq      %%mm5, 8(%%edi)  \n\t"
+
+                  "addl      $16, %%esi       \n\t" // inc by 16 bytes processed
+                  "addl      $16, %%edi       \n\t"
+                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
+                  "ja        mainloop16       \n\t"
+
+                "mainloop16end:               \n\t"
+// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx     \n\t"
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        end16            \n\t"
+// preload        "movl      mask, %%edx      \n\t"
+                  "sall      $24, %%edx       \n\t" // make low byte, high byte
+
+                "secondloop16:                \n\t"
+                  "sall      %%edx            \n\t" // move high bit to CF
+                  "jnc       skip16           \n\t" // if CF = 0
+                  "movw      (%%esi), %%ax    \n\t"
+                  "movw      %%ax, (%%edi)    \n\t"
+
+                "skip16:                      \n\t"
+                  "addl      $2, %%esi        \n\t"
+                  "addl      $2, %%edi        \n\t"
+                  "decl      %%ecx            \n\t"
+                  "jnz       secondloop16     \n\t"
+
+                "end16:                       \n\t"
+                  "EMMS                       \n\t" // DONE
+
+                  :                                 // output regs (none)
+
+                  : "S" (srcptr),      // esi       // input regs
+                    "D" (dstptr),      // edi
+                    "a" (diff),        // eax
+#ifndef __PIC__
+                    "b" (unmask),      // ebx       // Global Offset Table idx
+#endif
+                    "c" (len),         // ecx
+                    "d" (mask)         // edx
+
+                  : "%esi", "%edi", "%eax",         // clobber list
+                    "%ecx", "%edx"
+#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+                  , "%mm0", "%mm1",
+                    "%mm4", "%mm5", "%mm6", "%mm7"
+#endif
+               );
+            }
+            else /* mmx _not supported - Use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = 2 * png_pass_start[png_ptr->pass];
+                 // png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+               register int stride = 2 * png_pass_inc[png_ptr->pass];
+                 // png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+               register int rep_bytes = 2 * png_pass_width[png_ptr->pass];
+                 // png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+               register png_uint_32 final_val = 2 * png_ptr->width;
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               } 
+            } /* end of else */
+
+            break;
+         }       // end 16 bpp
+
+         case 24:       // png_ptr->row_info.pixel_depth
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            if (mmx_supported)
+            {
+               png_uint_32 len;
+               int diff;
+#ifndef __PIC__
+               int unmask = ~mask;
+#else
+               _unmask = ~mask;            // global variable for -fPIC version
+#endif
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width &~7;  // reduce to multiple of 8
+               diff = png_ptr->width & 7;  // amount lost
+
+               __asm__ (
+#ifdef __PIC__
+                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
+#else
+// preload        "movd      unmask, %%mm7    \n\t" // (unmask is in ebx)
+                  "movd      %%ebx, %%mm7     \n\t" // load bit pattern (unmask)
+#endif
+                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7     \n\t"
+                  "punpcklwd %%mm7, %%mm7     \n\t"
+                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
+
+                  "movq      _mask24_0, %%mm0 \n\t"
+                  "movq      _mask24_1, %%mm1 \n\t"
+                  "movq      _mask24_2, %%mm2 \n\t"
+
+                  "pand      %%mm7, %%mm0     \n\t"
+                  "pand      %%mm7, %%mm1     \n\t"
+                  "pand      %%mm7, %%mm2     \n\t"
+
+                  "pcmpeqb   %%mm6, %%mm0     \n\t"
+                  "pcmpeqb   %%mm6, %%mm1     \n\t"
+                  "pcmpeqb   %%mm6, %%mm2     \n\t"
+
+// preload        "movl      len, %%ecx       \n\t" // load length of line
+// preload        "movl      srcptr, %%esi    \n\t" // load source
+// preload        "movl      dstptr, %%edi    \n\t" // load dest
+
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        mainloop24end    \n\t"
+
+                "mainloop24:                  \n\t"
+                  "movq      (%%esi), %%mm4   \n\t"
+                  "pand      %%mm0, %%mm4     \n\t"
+                  "movq      %%mm0, %%mm6     \n\t"
+                  "movq      (%%edi), %%mm7   \n\t"
+                  "pandn     %%mm7, %%mm6     \n\t"
+                  "por       %%mm6, %%mm4     \n\t"
+                  "movq      %%mm4, (%%edi)   \n\t"
+
+                  "movq      8(%%esi), %%mm5  \n\t"
+                  "pand      %%mm1, %%mm5     \n\t"
+                  "movq      %%mm1, %%mm7     \n\t"
+                  "movq      8(%%edi), %%mm6  \n\t"
+                  "pandn     %%mm6, %%mm7     \n\t"
+                  "por       %%mm7, %%mm5     \n\t"
+                  "movq      %%mm5, 8(%%edi)  \n\t"
+
+                  "movq      16(%%esi), %%mm6 \n\t"
+                  "pand      %%mm2, %%mm6     \n\t"
+                  "movq      %%mm2, %%mm4     \n\t"
+                  "movq      16(%%edi), %%mm7 \n\t"
+                  "pandn     %%mm7, %%mm4     \n\t"
+                  "por       %%mm4, %%mm6     \n\t"
+                  "movq      %%mm6, 16(%%edi) \n\t"
+
+                  "addl      $24, %%esi       \n\t" // inc by 24 bytes processed
+                  "addl      $24, %%edi       \n\t"
+                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
+
+                  "ja        mainloop24       \n\t"
+
+                "mainloop24end:               \n\t"
+// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx     \n\t"
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        end24            \n\t"
+// preload        "movl      mask, %%edx      \n\t"
+                  "sall      $24, %%edx       \n\t" // make low byte, high byte
+
+                "secondloop24:                \n\t"
+                  "sall      %%edx            \n\t" // move high bit to CF
+                  "jnc       skip24           \n\t" // if CF = 0
+                  "movw      (%%esi), %%ax    \n\t"
+                  "movw      %%ax, (%%edi)    \n\t"
+                  "xorl      %%eax, %%eax     \n\t"
+                  "movb      2(%%esi), %%al   \n\t"
+                  "movb      %%al, 2(%%edi)   \n\t"
+
+                "skip24:                      \n\t"
+                  "addl      $3, %%esi        \n\t"
+                  "addl      $3, %%edi        \n\t"
+                  "decl      %%ecx            \n\t"
+                  "jnz       secondloop24     \n\t"
+
+                "end24:                       \n\t"
+                  "EMMS                       \n\t" // DONE
+
+                  :                                 // output regs (none)
+
+                  : "S" (srcptr),      // esi       // input regs
+                    "D" (dstptr),      // edi
+                    "a" (diff),        // eax
+#ifndef __PIC__
+                    "b" (unmask),      // ebx       // Global Offset Table idx
+#endif
+                    "c" (len),         // ecx
+                    "d" (mask)         // edx
+
+                  : "%esi", "%edi", "%eax",         // clobber list
+                    "%ecx", "%edx"
+#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+                  , "%mm0", "%mm1", "%mm2",
+                    "%mm4", "%mm5", "%mm6", "%mm7"
+#endif
+               );
+            }
+            else /* mmx _not supported - Use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = 3 * png_pass_start[png_ptr->pass];
+                 // png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+               register int stride = 3 * png_pass_inc[png_ptr->pass];
+                 // png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+               register int rep_bytes = 3 * png_pass_width[png_ptr->pass];
+                 // png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+               register png_uint_32 final_val = 3 * png_ptr->width;
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               } 
+            } /* end of else */
+
+            break;
+         }       // end 24 bpp
+
+         case 32:       // png_ptr->row_info.pixel_depth
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            if (mmx_supported)
+            {
+               png_uint_32 len;
+               int diff;
+#ifndef __PIC__
+               int unmask = ~mask;
+#else
+               _unmask = ~mask;            // global variable for -fPIC version
+#endif
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width &~7;  // reduce to multiple of 8
+               diff = png_ptr->width & 7;  // amount lost
+
+               __asm__ (
+#ifdef __PIC__
+                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
+#else
+// preload        "movd      unmask, %%mm7    \n\t" // (unmask is in ebx)
+                  "movd      %%ebx, %%mm7     \n\t" // load bit pattern (unmask)
+#endif
+                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7     \n\t"
+                  "punpcklwd %%mm7, %%mm7     \n\t"
+                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
+
+                  "movq      _mask32_0, %%mm0 \n\t"
+                  "movq      _mask32_1, %%mm1 \n\t"
+                  "movq      _mask32_2, %%mm2 \n\t"
+                  "movq      _mask32_3, %%mm3 \n\t"
+
+                  "pand      %%mm7, %%mm0     \n\t"
+                  "pand      %%mm7, %%mm1     \n\t"
+                  "pand      %%mm7, %%mm2     \n\t"
+                  "pand      %%mm7, %%mm3     \n\t"
+
+                  "pcmpeqb   %%mm6, %%mm0     \n\t"
+                  "pcmpeqb   %%mm6, %%mm1     \n\t"
+                  "pcmpeqb   %%mm6, %%mm2     \n\t"
+                  "pcmpeqb   %%mm6, %%mm3     \n\t"
+
+// preload        "movl      len, %%ecx       \n\t" // load length of line
+// preload        "movl      srcptr, %%esi    \n\t" // load source
+// preload        "movl      dstptr, %%edi    \n\t" // load dest
+
+                  "cmpl      $0, %%ecx        \n\t" // lcr
+                  "jz        mainloop32end    \n\t"
+
+                "mainloop32:                  \n\t"
+                  "movq      (%%esi), %%mm4   \n\t"
+                  "pand      %%mm0, %%mm4     \n\t"
+                  "movq      %%mm0, %%mm6     \n\t"
+                  "movq      (%%edi), %%mm7   \n\t"
+                  "pandn     %%mm7, %%mm6     \n\t"
+                  "por       %%mm6, %%mm4     \n\t"
+                  "movq      %%mm4, (%%edi)   \n\t"
+
+                  "movq      8(%%esi), %%mm5  \n\t"
+                  "pand      %%mm1, %%mm5     \n\t"
+                  "movq      %%mm1, %%mm7     \n\t"
+                  "movq      8(%%edi), %%mm6  \n\t"
+                  "pandn     %%mm6, %%mm7     \n\t"
+                  "por       %%mm7, %%mm5     \n\t"
+                  "movq      %%mm5, 8(%%edi)  \n\t"
+
+                  "movq      16(%%esi), %%mm6 \n\t"
+                  "pand      %%mm2, %%mm6     \n\t"
+                  "movq      %%mm2, %%mm4     \n\t"
+                  "movq      16(%%edi), %%mm7 \n\t"
+                  "pandn     %%mm7, %%mm4     \n\t"
+                  "por       %%mm4, %%mm6     \n\t"
+                  "movq      %%mm6, 16(%%edi) \n\t"
+
+                  "movq      24(%%esi), %%mm7 \n\t"
+                  "pand      %%mm3, %%mm7     \n\t"
+                  "movq      %%mm3, %%mm5     \n\t"
+                  "movq      24(%%edi), %%mm4 \n\t"
+                  "pandn     %%mm4, %%mm5     \n\t"
+                  "por       %%mm5, %%mm7     \n\t"
+                  "movq      %%mm7, 24(%%edi) \n\t"
+
+                  "addl      $32, %%esi       \n\t" // inc by 32 bytes processed
+                  "addl      $32, %%edi       \n\t"
+                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
+                  "ja        mainloop32       \n\t"
+
+                "mainloop32end:               \n\t"
+// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx     \n\t"
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        end32            \n\t"
+// preload        "movl      mask, %%edx      \n\t"
+                  "sall      $24, %%edx       \n\t" // low byte => high byte
+
+                "secondloop32:                \n\t"
+                  "sall      %%edx            \n\t" // move high bit to CF
+                  "jnc       skip32           \n\t" // if CF = 0
+                  "movl      (%%esi), %%eax   \n\t"
+                  "movl      %%eax, (%%edi)   \n\t"
+
+                "skip32:                      \n\t"
+                  "addl      $4, %%esi        \n\t"
+                  "addl      $4, %%edi        \n\t"
+                  "decl      %%ecx            \n\t"
+                  "jnz       secondloop32     \n\t"
+
+                "end32:                       \n\t"
+                  "EMMS                       \n\t" // DONE
+
+                  :                                 // output regs (none)
+
+                  : "S" (srcptr),      // esi       // input regs
+                    "D" (dstptr),      // edi
+                    "a" (diff),        // eax
+#ifndef __PIC__
+                    "b" (unmask),      // ebx       // Global Offset Table idx
+#endif
+                    "c" (len),         // ecx
+                    "d" (mask)         // edx
+
+                  : "%esi", "%edi", "%eax",         // clobber list
+                    "%ecx", "%edx"
+#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+                  , "%mm0", "%mm1", "%mm2", "%mm3",
+                    "%mm4", "%mm5", "%mm6", "%mm7"
+#endif
+               );
+            }
+            else /* mmx _not supported - Use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = 4 * png_pass_start[png_ptr->pass];
+                 // png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+               register int stride = 4 * png_pass_inc[png_ptr->pass];
+                 // png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+               register int rep_bytes = 4 * png_pass_width[png_ptr->pass];
+                 // png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+               register png_uint_32 final_val = 4 * png_ptr->width;
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+            } /* end of else */
+
+            break;
+         }       // end 32 bpp
+
+         case 48:       // png_ptr->row_info.pixel_depth
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            if (mmx_supported)
+            {
+               png_uint_32 len;
+               int diff;
+#ifndef __PIC__
+               int unmask = ~mask;
+#else
+               _unmask = ~mask;            // global variable for -fPIC version
+#endif
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width &~7;  // reduce to multiple of 8
+               diff = png_ptr->width & 7;  // amount lost
+
+               __asm__ (
+#ifdef __PIC__
+                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
+#else
+// preload        "movd      unmask, %%mm7    \n\t" // (unmask is in ebx)
+                  "movd      %%ebx, %%mm7     \n\t" // load bit pattern (unmask)
+#endif
+                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7     \n\t"
+                  "punpcklwd %%mm7, %%mm7     \n\t"
+                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
+
+                  "movq      _mask48_0, %%mm0 \n\t"
+                  "movq      _mask48_1, %%mm1 \n\t"
+                  "movq      _mask48_2, %%mm2 \n\t"
+                  "movq      _mask48_3, %%mm3 \n\t"
+                  "movq      _mask48_4, %%mm4 \n\t"
+                  "movq      _mask48_5, %%mm5 \n\t"
+
+                  "pand      %%mm7, %%mm0     \n\t"
+                  "pand      %%mm7, %%mm1     \n\t"
+                  "pand      %%mm7, %%mm2     \n\t"
+                  "pand      %%mm7, %%mm3     \n\t"
+                  "pand      %%mm7, %%mm4     \n\t"
+                  "pand      %%mm7, %%mm5     \n\t"
+
+                  "pcmpeqb   %%mm6, %%mm0     \n\t"
+                  "pcmpeqb   %%mm6, %%mm1     \n\t"
+                  "pcmpeqb   %%mm6, %%mm2     \n\t"
+                  "pcmpeqb   %%mm6, %%mm3     \n\t"
+                  "pcmpeqb   %%mm6, %%mm4     \n\t"
+                  "pcmpeqb   %%mm6, %%mm5     \n\t"
+
+// preload        "movl      len, %%ecx       \n\t" // load length of line
+// preload        "movl      srcptr, %%esi    \n\t" // load source
+// preload        "movl      dstptr, %%edi    \n\t" // load dest
+
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        mainloop48end    \n\t"
+
+                "mainloop48:                  \n\t"
+                  "movq      (%%esi), %%mm7   \n\t"
+                  "pand      %%mm0, %%mm7     \n\t"
+                  "movq      %%mm0, %%mm6     \n\t"
+                  "pandn     (%%edi), %%mm6   \n\t"
+                  "por       %%mm6, %%mm7     \n\t"
+                  "movq      %%mm7, (%%edi)   \n\t"
+
+                  "movq      8(%%esi), %%mm6  \n\t"
+                  "pand      %%mm1, %%mm6     \n\t"
+                  "movq      %%mm1, %%mm7     \n\t"
+                  "pandn     8(%%edi), %%mm7  \n\t"
+                  "por       %%mm7, %%mm6     \n\t"
+                  "movq      %%mm6, 8(%%edi)  \n\t"
+
+                  "movq      16(%%esi), %%mm6 \n\t"
+                  "pand      %%mm2, %%mm6     \n\t"
+                  "movq      %%mm2, %%mm7     \n\t"
+                  "pandn     16(%%edi), %%mm7 \n\t"
+                  "por       %%mm7, %%mm6     \n\t"
+                  "movq      %%mm6, 16(%%edi) \n\t"
+
+                  "movq      24(%%esi), %%mm7 \n\t"
+                  "pand      %%mm3, %%mm7     \n\t"
+                  "movq      %%mm3, %%mm6     \n\t"
+                  "pandn     24(%%edi), %%mm6 \n\t"
+                  "por       %%mm6, %%mm7     \n\t"
+                  "movq      %%mm7, 24(%%edi) \n\t"
+
+                  "movq      32(%%esi), %%mm6 \n\t"
+                  "pand      %%mm4, %%mm6     \n\t"
+                  "movq      %%mm4, %%mm7     \n\t"
+                  "pandn     32(%%edi), %%mm7 \n\t"
+                  "por       %%mm7, %%mm6     \n\t"
+                  "movq      %%mm6, 32(%%edi) \n\t"
+
+                  "movq      40(%%esi), %%mm7 \n\t"
+                  "pand      %%mm5, %%mm7     \n\t"
+                  "movq      %%mm5, %%mm6     \n\t"
+                  "pandn     40(%%edi), %%mm6 \n\t"
+                  "por       %%mm6, %%mm7     \n\t"
+                  "movq      %%mm7, 40(%%edi) \n\t"
+
+                  "addl      $48, %%esi       \n\t" // inc by 48 bytes processed
+                  "addl      $48, %%edi       \n\t"
+                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
+
+                  "ja        mainloop48       \n\t"
+
+                "mainloop48end:               \n\t"
+// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx     \n\t"
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        end48            \n\t"
+// preload        "movl      mask, %%edx      \n\t"
+                  "sall      $24, %%edx       \n\t" // make low byte, high byte
+
+                "secondloop48:                \n\t"
+                  "sall      %%edx            \n\t" // move high bit to CF
+                  "jnc       skip48           \n\t" // if CF = 0
+                  "movl      (%%esi), %%eax   \n\t"
+                  "movl      %%eax, (%%edi)   \n\t"
+
+                "skip48:                      \n\t"
+                  "addl      $4, %%esi        \n\t"
+                  "addl      $4, %%edi        \n\t"
+                  "decl      %%ecx            \n\t"
+                  "jnz       secondloop48     \n\t"
+
+                "end48:                       \n\t"
+                  "EMMS                       \n\t" // DONE
+
+                  :                                 // output regs (none)
+
+                  : "S" (srcptr),      // esi       // input regs
+                    "D" (dstptr),      // edi
+                    "a" (diff),        // eax
+#ifndef __PIC__
+                    "b" (unmask),      // ebx       // Global Offset Table idx
+#endif
+                    "c" (len),         // ecx
+                    "d" (mask)         // edx
+
+                  : "%esi", "%edi", "%eax",         // clobber list
+                    "%ecx", "%edx"
+#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+                  , "%mm0", "%mm1", "%mm2", "%mm3",
+                    "%mm4", "%mm5", "%mm6", "%mm7"
+#endif
+               );
+            }
+            else /* mmx _not supported - Use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = 6 * png_pass_start[png_ptr->pass];
+                 // png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+               register int stride = 6 * png_pass_inc[png_ptr->pass];
+                 // png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+               register int rep_bytes = 6 * png_pass_width[png_ptr->pass];
+                 // png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+               register png_uint_32 final_val = 6 * png_ptr->width;
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               } 
+            } /* end of else */
+
+            break;
+         }       // end 48 bpp
+
+         case 64:       // png_ptr->row_info.pixel_depth
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+            register png_uint_32 i;
+            png_uint_32 initial_val = 8 * png_pass_start[png_ptr->pass];
+              // png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+            register int stride = 8 * png_pass_inc[png_ptr->pass];
+              // png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+            register int rep_bytes = 8 * png_pass_width[png_ptr->pass];
+              // png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+            register png_uint_32 final_val = 8 * png_ptr->width;
+
+            srcptr = png_ptr->row_buf + 1 + initial_val;
+            dstptr = row + initial_val;
+
+            for (i = initial_val; i < final_val; i += stride)
+            {
+               png_memcpy(dstptr, srcptr, rep_bytes);
+               srcptr += stride;
+               dstptr += stride;
+            } 
+            break;
+         }       // end 64 bpp
+
+         default:   // png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64
+         {
+            // this should never happen
+            fprintf(stderr,
+              "libpng internal error:  png_ptr->row_info.pixel_depth = %d\n",
+              png_ptr->row_info.pixel_depth);
+            fflush(stderr);
+            break;
+         }
+      } /* end switch (png_ptr->row_info.pixel_depth) */
+
+   } /* end if (non-trivial mask) */
+
+} /* end png_combine_row() */
+
+#endif /* PNG_HAVE_ASSEMBLER_COMBINE_ROW */
+
+
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+#if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE)
+
+/* png_do_read_interlace() is called after any 16-bit to 8-bit conversion
+ * has taken place.  [GRR: what other steps come before and/or after?]
+ */
+
+void
+png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
+   png_uint_32 transformations)
+{
+/*
+fprintf(stderr, "GRR DEBUG:  entering png_do_read_interlace()\n");
+if (row == NULL) fprintf(stderr, "GRR DEBUG:  row == NULL\n");
+if (row_info == NULL) fprintf(stderr, "GRR DEBUG:  row_info == NULL\n");
+fflush(stderr);
+ */
+   png_debug(1,"in png_do_read_interlace\n");
+
+   if (mmx_supported == 2)
+       mmx_supported = mmxsupport();
+/*
+{
+fprintf(stderr, "GRR DEBUG:  calling mmxsupport()\n");
+fprintf(stderr, "GRR DEBUG:  done with mmxsupport() (mmx_supported = %d)\n", mmx_supported);
+}
+ */
+
+/*
+this one happened on first row due to weirdness with mmxsupport():
+if (row == NULL) fprintf(stderr, "GRR DEBUG:  now row == NULL!!!\n");
+  row was in ebx, and even though nothing touched ebx, it still got wiped...
+  [weird side effect of CPUID instruction?]
+if (row_info == NULL) fprintf(stderr, "GRR DEBUG:  now row_info == NULL!!!\n");
+ */
+   if (row != NULL && row_info != NULL)
+   {
+      png_uint_32 final_width;
+
+      final_width = row_info->width * png_pass_inc[pass];
+
+/*
+fprintf(stderr, "GRR DEBUG:  png_do_read_interlace() row_info->width = %d, final_width = %d\n", row_info->width, final_width);
+fprintf(stderr, "GRR DEBUG:  png_do_read_interlace() pixel_depth = %d\n", row_info->pixel_depth);
+fflush(stderr);
+ */
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp, dp;
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_byte v;
+            png_uint_32 i;
+            int j;
+
+            sp = row + (png_size_t)((row_info->width - 1) >> 3);
+            dp = row + (png_size_t)((final_width - 1) >> 3);
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (int)((row_info->width + 7) & 7);
+               dshift = (int)((final_width + 7) & 7);
+               s_start = 7;
+               s_end = 0;
+               s_inc = -1;
+            }
+            else
+#endif
+            {
+               sshift = 7 - (int)((row_info->width + 7) & 7);
+               dshift = 7 - (int)((final_width + 7) & 7);
+               s_start = 0;
+               s_end = 7;
+               s_inc = 1;
+            }
+
+            for (i = row_info->width; i; i--)
+            {
+               v = (png_byte)((*sp >> sshift) & 0x1);
+               for (j = 0; j < png_pass_inc[pass]; j++)
+               {
+                  *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+
+         case 2:
+         {
+            png_bytep sp, dp;
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_uint_32 i;
+
+            sp = row + (png_size_t)((row_info->width - 1) >> 2);
+            dp = row + (png_size_t)((final_width - 1) >> 2);
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (png_size_t)(((row_info->width + 3) & 3) << 1);
+               dshift = (png_size_t)(((final_width + 3) & 3) << 1);
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+            else
+#endif
+            {
+               sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1);
+               dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1);
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+
+            for (i = row_info->width; i; i--)
+            {
+               png_byte v;
+               int j;
+
+               v = (png_byte)((*sp >> sshift) & 0x3);
+               for (j = 0; j < png_pass_inc[pass]; j++)
+               {
+                  *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+
+         case 4:
+         {
+            png_bytep sp, dp;
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_uint_32 i;
+
+            sp = row + (png_size_t)((row_info->width - 1) >> 1);
+            dp = row + (png_size_t)((final_width - 1) >> 1);
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (png_size_t)(((row_info->width + 1) & 1) << 2);
+               dshift = (png_size_t)(((final_width + 1) & 1) << 2);
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            else
+#endif
+            {
+               sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2);
+               dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2);
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+
+            for (i = row_info->width; i; i--)
+            {
+               png_byte v;
+               int j;
+
+               v = (png_byte)((*sp >> sshift) & 0xf);
+               for (j = 0; j < png_pass_inc[pass]; j++)
+               {
+                  *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+
+         //====================================================================
+
+         default:  // 8-bit or larger (this is where the routine is modified)
+         {
+//          static unsigned long long _const4 = 0x0000000000FFFFFFLL;  no good
+//          static unsigned long long const4 = 0x0000000000FFFFFFLL;   no good
+//          unsigned long long _const4 = 0x0000000000FFFFFFLL;         no good
+//          unsigned long long const4 = 0x0000000000FFFFFFLL;          no good
+            png_bytep sptr, dp;
+            png_uint_32 i;
+            png_size_t pixel_bytes;
+            int width = row_info->width;
+
+            pixel_bytes = (row_info->pixel_depth >> 3);
+
+            // point sptr at the last pixel in the pre-expanded row:
+            sptr = row + (width - 1) * pixel_bytes;
+
+            // point dp at the last pixel position in the expanded row:
+            dp = row + (final_width - 1) * pixel_bytes;
+
+            // New code by Nirav Chhatrapati - Intel Corporation
+
+            if (mmx_supported)  // use MMX code if machine supports it
+            {
+               //--------------------------------------------------------------
+               if (pixel_bytes == 3)
+               {
+                  if (((pass == 0) || (pass == 1)) && width)
+                  {
+                     __asm__ (
+                        "subl $21, %%edi         \n\t"
+                                     // (png_pass_inc[pass] - 1)*pixel_bytes
+
+                     ".loop3_pass0:              \n\t"
+                        "movd (%%esi), %%mm0     \n\t" // x x x x x 2 1 0
+                        "pand _const4, %%mm0     \n\t" // z z z z z 2 1 0
+                        "movq %%mm0, %%mm1       \n\t" // z z z z z 2 1 0
+                        "psllq $16, %%mm0        \n\t" // z z z 2 1 0 z z
+                        "movq %%mm0, %%mm2       \n\t" // z z z 2 1 0 z z
+                        "psllq $24, %%mm0        \n\t" // 2 1 0 z z z z z
+                        "psrlq $8, %%mm1         \n\t" // z z z z z z 2 1
+                        "por %%mm2, %%mm0        \n\t" // 2 1 0 2 1 0 z z
+                        "por %%mm1, %%mm0        \n\t" // 2 1 0 2 1 0 2 1
+                        "movq %%mm0, %%mm3       \n\t" // 2 1 0 2 1 0 2 1
+                        "psllq $16, %%mm0        \n\t" // 0 2 1 0 2 1 z z
+                        "movq %%mm3, %%mm4       \n\t" // 2 1 0 2 1 0 2 1
+                        "punpckhdq %%mm0, %%mm3  \n\t" // 0 2 1 0 2 1 0 2
+                        "movq %%mm4, 16(%%edi)   \n\t"
+                        "psrlq $32, %%mm0        \n\t" // z z z z 0 2 1 0
+                        "movq %%mm3, 8(%%edi)    \n\t"
+                        "punpckldq %%mm4, %%mm0  \n\t" // 1 0 2 1 0 2 1 0
+                        "subl $3, %%esi          \n\t"
+                        "movq %%mm0, (%%edi)     \n\t"
+                        "subl $24, %%edi         \n\t"
+                        "decl %%ecx              \n\t"
+                        "jnz .loop3_pass0        \n\t"
+                        "EMMS                    \n\t" // DONE
+
+                        :                              // output regs (none)
+
+                        : "S" (sptr),      // esi      // input regs
+                          "D" (dp),        // edi
+                          "c" (width)      // ecx
+// doesn't work           "i" (0x0000000000FFFFFFLL)   // %1 (a.k.a. _const4)
+
+                        : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                        , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
+#endif
+                     );
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     __asm__ (
+                        "subl $9, %%edi          \n\t"
+                                     // (png_pass_inc[pass] - 1)*pixel_bytes
+
+                     ".loop3_pass2:              \n\t"
+                        "movd (%%esi), %%mm0     \n\t" // x x x x x 2 1 0
+                        "pand _const4, %%mm0     \n\t" // z z z z z 2 1 0
+                        "movq %%mm0, %%mm1       \n\t" // z z z z z 2 1 0
+                        "psllq $16, %%mm0        \n\t" // z z z 2 1 0 z z
+                        "movq %%mm0, %%mm2       \n\t" // z z z 2 1 0 z z
+                        "psllq $24, %%mm0        \n\t" // 2 1 0 z z z z z
+                        "psrlq $8, %%mm1         \n\t" // z z z z z z 2 1
+                        "por %%mm2, %%mm0        \n\t" // 2 1 0 2 1 0 z z
+                        "por %%mm1, %%mm0        \n\t" // 2 1 0 2 1 0 2 1
+                        "movq %%mm0, 4(%%edi)    \n\t"
+                        "psrlq $16, %%mm0        \n\t" // z z 2 1 0 2 1 0
+                        "subl $3, %%esi          \n\t"
+                        "movd %%mm0, (%%edi)     \n\t"
+                        "subl $12, %%edi         \n\t"
+                        "decl %%ecx              \n\t"
+                        "jnz .loop3_pass2        \n\t"
+                        "EMMS                    \n\t" // DONE
+
+                        :                              // output regs (none)
+
+                        : "S" (sptr),      // esi      // input regs
+                          "D" (dp),        // edi
+                          "c" (width)      // ecx
+
+                        : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                        , "%mm0", "%mm1", "%mm2"
+#endif
+                     );
+                  }
+                  else if (width) /* && ((pass == 4) || (pass == 5)) */
+                  {
+                     int width_mmx = ((width >> 1) << 1) - 8;   // GRR:  huh?
+                     if (width_mmx < 0)
+                         width_mmx = 0;
+                     width -= width_mmx;        // 8 or 9 pix, 24 or 27 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $3, %%esi          \n\t"
+                           "subl $9, %%edi          \n\t"
+                                        // (png_pass_inc[pass] - 1)*pixel_bytes
+
+                        ".loop3_pass4:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // x x 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // x x 5 4 3 2 1 0
+                           "movq %%mm0, %%mm2       \n\t" // x x 5 4 3 2 1 0
+                           "psllq $24, %%mm0        \n\t" // 4 3 2 1 0 z z z
+                           "pand _const4, %%mm0     \n\t" // z z z z z 2 1 0
+                           "psrlq $24, %%mm2        \n\t" // z z z x x 5 4 3
+                           "por %%mm1, %%mm0        \n\t" // 4 3 2 1 0 2 1 0
+                           "movq %%mm2, %%mm3       \n\t" // z z z x x 5 4 3
+                           "psllq $8, %%mm2         \n\t" // z z x x 5 4 3 z
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "psrlq $16, %%mm3        \n\t" // z z z z z x x 5
+                           "pand _const6, %%mm3     \n\t" // z z z z z z z 5
+                           "por %%mm3, %%mm2        \n\t" // z z x x 5 4 3 5
+                           "subl $6, %%esi          \n\t"
+                           "movd %%mm2, 8(%%edi)    \n\t"
+                           "subl $12, %%edi         \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop3_pass4        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1", "%mm2", "%mm3"
+#endif
+                        );
+                     }
+
+                     sptr -= width_mmx*3;
+                     dp -= width_mmx*6;
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+
+                        png_memcpy(v, sptr, 3);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           png_memcpy(dp, v, 3);
+                           dp -= 3;
+                        }
+                        sptr -= 3;
+                     }
+                  }
+               } /* end of pixel_bytes == 3 */
+
+               //--------------------------------------------------------------
+               else if (pixel_bytes == 1)
+               {
+                  if (((pass == 0) || (pass == 1)) && width)
+                  {
+                     int width_mmx = ((width >> 2) << 2);
+                     width -= width_mmx;        // 0-3 pixels => 0-3 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $3, %%esi          \n\t"
+                           "subl $31, %%edi         \n\t"
+
+                        ".loop1_pass0:              \n\t"
+                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // x x x x 3 2 1 0
+                           "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
+                           "movq %%mm0, %%mm2       \n\t" // 3 3 2 2 1 1 0 0
+                           "punpcklwd %%mm0, %%mm0  \n\t" // 1 1 1 1 0 0 0 0
+                           "movq %%mm0, %%mm3       \n\t" // 1 1 1 1 0 0 0 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 0 0 0 0 0 0 0 0
+                           "punpckhdq %%mm3, %%mm3  \n\t" // 1 1 1 1 1 1 1 1
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "punpckhwd %%mm2, %%mm2  \n\t" // 3 3 3 3 2 2 2 2
+                           "movq %%mm3, 8(%%edi)    \n\t"
+                           "movq %%mm2, %%mm4       \n\t" // 3 3 3 3 2 2 2 2
+                           "punpckldq %%mm2, %%mm2  \n\t" // 2 2 2 2 2 2 2 2
+                           "punpckhdq %%mm4, %%mm4  \n\t" // 3 3 3 3 3 3 3 3
+                           "movq %%mm2, 16(%%edi)   \n\t"
+                           "subl $4, %%esi          \n\t"
+                           "movq %%mm4, 24(%%edi)   \n\t"
+                           "subl $32, %%edi         \n\t"
+                           "subl $4, %%ecx          \n\t"
+                           "jnz .loop1_pass0        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"
+#endif
+                        );
+                     }
+
+                     sptr -= width_mmx;
+                     dp -= width_mmx*8;
+                     for (i = width; i; i--)
+                     {
+                        int j;
+
+                       /* I simplified this part in version 1.0.4e
+                        * here and in several other instances where
+                        * pixel_bytes == 1  -- GR-P
+                        *
+                        * Original code:
+                        *
+                        * png_byte v[8];
+                        * png_memcpy(v, sptr, pixel_bytes);
+                        * for (j = 0; j < png_pass_inc[pass]; j++)
+                        * {
+                        *    png_memcpy(dp, v, pixel_bytes);
+                        *    dp -= pixel_bytes;
+                        * }
+                        * sptr -= pixel_bytes;
+                        *
+                        * Replacement code is in the next three lines:
+                        */
+
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                           *dp-- = *sptr;
+                        --sptr;
+                     }
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     int width_mmx = ((width >> 2) << 2);
+                     width -= width_mmx;        // 0-3 pixels => 0-3 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $3, %%esi          \n\t"
+                           "subl $15, %%edi         \n\t"
+
+                        ".loop1_pass2:              \n\t"
+                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                           "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
+                           "movq %%mm0, %%mm1       \n\t" // 3 3 2 2 1 1 0 0
+                           "punpcklwd %%mm0, %%mm0  \n\t" // 1 1 1 1 0 0 0 0
+                           "punpckhwd %%mm1, %%mm1  \n\t" // 3 3 3 3 2 2 2 2
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "subl $4, %%esi          \n\t"
+                           "movq %%mm1, 8(%%edi)    \n\t"
+                           "subl $16, %%edi         \n\t"
+                           "subl $4, %%ecx          \n\t"
+                           "jnz .loop1_pass2        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1"
+#endif
+                        );
+                     }
+
+                     sptr -= width_mmx;
+                     dp -= width_mmx*4;
+                     for (i = width; i; i--)
+                     {
+                        int j;
+
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                           *dp-- = *sptr;
+                        --sptr;
+                     }
+                  }
+                  else if (width)  /* && ((pass == 4) || (pass == 5)) */
+                  {
+                     int width_mmx = ((width >> 3) << 3);
+                     width -= width_mmx;        // 0-3 pixels => 0-3 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $7, %%esi          \n\t"
+                           "subl $15, %%edi         \n\t"
+
+                        ".loop1_pass4:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
+                           "punpckhbw %%mm1, %%mm1  \n\t" // 7 7 6 6 5 5 4 4
+                           "movq %%mm1, 8(%%edi)    \n\t"
+                           "subl $8, %%esi          \n\t"
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "subl $16, %%edi         \n\t"
+                           "subl $8, %%ecx          \n\t"
+                           "jnz .loop1_pass4        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1"
+#endif
+                        );
+                     }
+
+                     sptr -= width_mmx;
+                     dp -= width_mmx*2;
+                     for (i = width; i; i--)
+                     {
+                        int j;
+
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                           *dp-- = *sptr;
+                        --sptr;
+                     }
+                  }
+               } /* end of pixel_bytes == 1 */
+
+               //--------------------------------------------------------------
+               else if (pixel_bytes == 2)
+               {
+                  if (((pass == 0) || (pass == 1)) && width)
+                  {
+                     int width_mmx = ((width >> 1) << 1);
+                     width -= width_mmx;        // 0,1 pixels => 0,2 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $2, %%esi          \n\t"
+                           "subl $30, %%edi         \n\t"
+
+                        ".loop2_pass0:              \n\t"
+                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                           "punpcklwd %%mm0, %%mm0  \n\t" // 3 2 3 2 1 0 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 3 2 3 2 1 0 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 1 0 1 0 1 0 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 3 2 3 2 3 2 3 2
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "movq %%mm0, 8(%%edi)    \n\t"
+                           "movq %%mm1, 16(%%edi)   \n\t"
+                           "subl $4, %%esi          \n\t"
+                           "movq %%mm1, 24(%%edi)   \n\t"
+                           "subl $32, %%edi         \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop2_pass0        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1"
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*2 - 2); // sign fixed
+                     dp -= (width_mmx*16 - 2);  // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 2;
+                        png_memcpy(v, sptr, 2);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 2;
+                           png_memcpy(dp, v, 2);
+                        }
+                     }
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     int width_mmx = ((width >> 1) << 1) ;
+                     width -= width_mmx;        // 0,1 pixels => 0,2 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $2, %%esi          \n\t"
+                           "subl $14, %%edi         \n\t"
+
+                        ".loop2_pass2:              \n\t"
+                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                           "punpcklwd %%mm0, %%mm0  \n\t" // 3 2 3 2 1 0 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 3 2 3 2 1 0 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 1 0 1 0 1 0 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 3 2 3 2 3 2 3 2
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "subl $4, %%esi          \n\t"
+                           "movq %%mm1, 8(%%edi)    \n\t"
+                           "subl $16, %%edi         \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop2_pass2        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1"
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*2 - 2); // sign fixed
+                     dp -= (width_mmx*8 - 2);   // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 2;
+                        png_memcpy(v, sptr, 2);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 2;
+                           png_memcpy(dp, v, 2);
+                        }
+                     }
+                  }
+                  else if (width)  // pass == 4 or 5
+                  {
+                     int width_mmx = ((width >> 1) << 1) ;
+                     width -= width_mmx;        // 0,1 pixels => 0,2 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $2, %%esi          \n\t"
+                           "subl $6, %%edi          \n\t"
+
+                        ".loop2_pass4:              \n\t"
+                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                           "punpcklwd %%mm0, %%mm0  \n\t" // 3 2 3 2 1 0 1 0
+                           "subl $4, %%esi          \n\t"
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "subl $8, %%edi          \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop2_pass4        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0"
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*2 - 2); // sign fixed
+                     dp -= (width_mmx*4 - 2);   // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 2;
+                        png_memcpy(v, sptr, 2);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 2;
+                           png_memcpy(dp, v, 2);
+                        }
+                     }
+                  }
+               } /* end of pixel_bytes == 2 */
+
+               //--------------------------------------------------------------
+               else if (pixel_bytes == 4)
+               {
+                  if (((pass == 0) || (pass == 1)) && width)
+                  {
+                     int width_mmx = ((width >> 1) << 1);
+                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
+/*
+fprintf(stderr, "GRR DEBUG:  png_do_read_interlace() pass = %d, width_mmx = %d, width = %d\n", pass, width_mmx, width);
+fprintf(stderr, "            sptr = 0x%08lx, dp = 0x%08lx\n", (unsigned long)sptr, (unsigned long)dp);
+fflush(stderr);
+ */
+                     if (width_mmx)
+                     {
+#ifdef GRR_DEBUG
+                        FILE *junk = fopen("junk.4bytes", "wb");
+                        if (junk)
+                           fclose(junk);
+#endif /* GRR_DEBUG */
+                        __asm__ (
+                           "subl $4, %%esi          \n\t"
+                           "subl $60, %%edi         \n\t"
+
+                        ".loop4_pass0:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "movq %%mm0, 8(%%edi)    \n\t"
+                           "movq %%mm0, 16(%%edi)   \n\t"
+                           "movq %%mm0, 24(%%edi)   \n\t"
+                           "movq %%mm1, 32(%%edi)   \n\t"
+                           "movq %%mm1, 40(%%edi)   \n\t"
+                           "movq %%mm1, 48(%%edi)   \n\t"
+                           "subl $8, %%esi          \n\t"
+                           "movq %%mm1, 56(%%edi)   \n\t"
+                           "subl $64, %%edi         \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop4_pass0        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1"
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*4 - 4); // sign fixed
+                     dp -= (width_mmx*32 - 4);  // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 4;
+                        png_memcpy(v, sptr, 4);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 4;
+                           png_memcpy(dp, v, 4);
+                        }
+                     }
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     int width_mmx = ((width >> 1) << 1);
+                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $4, %%esi          \n\t"
+                           "subl $28, %%edi         \n\t"
+
+                        ".loop4_pass2:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "movq %%mm0, 8(%%edi)    \n\t"
+                           "movq %%mm1, 16(%%edi)   \n\t"
+                           "movq %%mm1, 24(%%edi)   \n\t"
+                           "subl $8, %%esi          \n\t"
+                           "subl $32, %%edi         \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop4_pass2        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1"
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*4 - 4); // sign fixed
+                     dp -= (width_mmx*16 - 4);  // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 4;
+                        png_memcpy(v, sptr, 4);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 4;
+                           png_memcpy(dp, v, 4);
+                        }
+                     }
+                  }
+                  else if (width)  // pass == 4 or 5
+                  {
+                     int width_mmx = ((width >> 1) << 1) ;
+                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $4, %%esi          \n\t"
+                           "subl $12, %%edi         \n\t"
+
+                        ".loop4_pass4:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "subl $8, %%esi          \n\t"
+                           "movq %%mm1, 8(%%edi)    \n\t"
+                           "subl $16, %%edi         \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop4_pass4        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width_mmx)  // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0", "%mm1"
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*4 - 4); // sign fixed
+                     dp -= (width_mmx*8 - 4);   // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 4;
+                        png_memcpy(v, sptr, 4);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 4;
+                           png_memcpy(dp, v, 4);
+                        }
+                     }
+                  }
+               } /* end of pixel_bytes == 4 */
+
+#define STILL_WORKING_ON_THIS
+#ifdef STILL_WORKING_ON_THIS  // GRR: should work, but needs testing
+                              //      (special 64-bit version of rpng2)
+
+               //--------------------------------------------------------------
+               else if (pixel_bytes == 8)
+               {
+                  // GRR NOTE:  no need to combine passes here!
+                  if (((pass == 0) || (pass == 1)) && width)
+                  {
+                     // source is 8-byte RRGGBBAA
+                     // dest is 64-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA ...
+#ifdef GRR_DEBUG
+                        FILE *junk = fopen("junk.8bytes", "wb");
+                        if (junk)
+                            fclose(junk);
+#endif /* GRR_DEBUG */
+                        __asm__ (
+                           "subl $56, %%edi         \n\t" // start of last block
+
+                        ".loop8_pass0:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "movq %%mm0, 8(%%edi)    \n\t"
+                           "movq %%mm0, 16(%%edi)   \n\t"
+                           "movq %%mm0, 24(%%edi)   \n\t"
+                           "movq %%mm0, 32(%%edi)   \n\t"
+                           "movq %%mm0, 40(%%edi)   \n\t"
+                           "movq %%mm0, 48(%%edi)   \n\t"
+                           "subl $8, %%esi          \n\t"
+                           "movq %%mm0, 56(%%edi)   \n\t"
+                           "subl $64, %%edi         \n\t"
+                           "decl %%ecx              \n\t"
+                           "jnz .loop8_pass0        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width)      // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0"
+#endif
+                        );
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     // source is 8-byte RRGGBBAA
+                     // dest is 32-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA
+                     int width_mmx = ((width >> 1) << 1) ;
+                     width -= width_mmx;
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $24, %%edi         \n\t" // start of last block
+
+                        ".loop8_pass2:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "movq %%mm0, 8(%%edi)    \n\t"
+                           "movq %%mm0, 16(%%edi)   \n\t"
+                           "subl $8, %%esi          \n\t"
+                           "movq %%mm0, 24(%%edi)   \n\t"
+                           "subl $32, %%edi         \n\t"
+                           "decl %%ecx              \n\t"
+                           "jnz .loop8_pass2        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width)      // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0"
+#endif
+                        );
+                     }
+                  }
+                  else if (width)  // pass == 4 or 5
+                  {
+                     // source is 8-byte RRGGBBAA
+                     // dest is 16-byte RRGGBBAA RRGGBBAA
+                     int width_mmx = ((width >> 1) << 1) ;
+                     width -= width_mmx;
+                     if (width_mmx)
+                     {
+                        __asm__ (
+                           "subl $8, %%edi          \n\t" // start of last block
+
+                        ".loop8_pass4:              \n\t"
+                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, (%%edi)     \n\t"
+                           "subl $8, %%esi          \n\t"
+                           "movq %%mm0, 8(%%edi)    \n\t"
+                           "subl $16, %%edi         \n\t"
+                           "decl %%ecx              \n\t"
+                           "jnz .loop8_pass4        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           :                              // output regs (none)
+
+                           : "S" (sptr),      // esi      // input regs
+                             "D" (dp),        // edi
+                             "c" (width)      // ecx
+
+                           : "%esi", "%edi", "%ecx"       // clobber list
+#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
+                           , "%mm0"
+#endif
+                        );
+                     }
+                  }
+
+               } /* end of pixel_bytes == 8 */
+
+#endif /* STILL_WORKING_ON_THIS */
+
+               //--------------------------------------------------------------
+               else if (pixel_bytes == 6)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 6);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 6);
+                        dp -= 6;
+                     }
+                     sptr -= 6;
+                  }
+               } /* end of pixel_bytes == 6 */
+
+               //--------------------------------------------------------------
+               else
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, pixel_bytes);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, pixel_bytes);
+                        dp -= pixel_bytes;
+                     }
+                     sptr-= pixel_bytes;
+                  }
+               }
+            } // end of mmx_supported =========================================
+
+            else /* MMX not supported:  use modified C code - takes advantage
+                  *   of inlining of memcpy for a constant */
+                 /* GRR 19991007:  does it?  or should pixel_bytes in each
+                  *   block be replaced with immediate value (e.g., 1)? */
+                 /* GRR 19991017:  replaced with constants in each case */
+            {
+               if (pixel_bytes == 1)
+               {
+                  for (i = width; i; i--)
+                  {
+                     int j;
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                        *dp-- = *sptr;
+                     --sptr;
+                  }
+               }
+               else if (pixel_bytes == 3)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 3);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 3);
+                        dp -= 3;
+                     }
+                     sptr -= 3;
+                  }
+               }
+               else if (pixel_bytes == 2)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 2);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 2);
+                        dp -= 2;
+                     }
+                     sptr -= 2;
+                  }
+               }
+               else if (pixel_bytes == 4)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 4);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 4);
+                        dp -= 4;
+                     }
+                     sptr -= 4;
+                  }
+               }
+               else if (pixel_bytes == 6)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 6);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 6);
+                        dp -= 6;
+                     }
+                     sptr -= 6;
+                  }
+               }
+               else if (pixel_bytes == 8)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 8);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 8);
+                        dp -= 8;
+                     }
+                     sptr -= 8;
+                  }
+               }
+               else     // GRR:  should never be reached
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, pixel_bytes);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, pixel_bytes);
+                        dp -= pixel_bytes;
+                     }
+                     sptr -= pixel_bytes;
+                  }
+               }
+
+            } /* end if (MMX not supported) */
+            break;
+         }
+      } /* end switch (row_info->pixel_depth) */
+
+      row_info->width = final_width;
+      row_info->rowbytes = ((final_width *
+         (png_uint_32)row_info->pixel_depth + 7) >> 3);
+   }
+
+} /* end png_do_read_interlace() */
+
+#endif /* PNG_HAVE_ASSEMBLER_READ_INTERLACE */
+#endif /* PNG_READ_INTERLACING_SUPPORTED */
+
+
+// These variables are utilized in the functions below.  They are declared
+// globally here to ensure alignment on 8-byte boundaries.
+
+union uAll {
+   long long use;
+   double  align;
+} LBCarryMask = {0x0101010101010101LL},
+  HBClearMask = {0x7f7f7f7f7f7f7f7fLL},
+  ActiveMask, ActiveMask2, ActiveMaskEnd, ShiftBpp, ShiftRem;
+
+
+// Optimized code for PNG Average filter decoder
+void
+png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row,
+                            png_bytep prev_row)
+{
+   int bpp;
+// int diff;  GRR: global now (shortened to dif/_dif)
+
+   bpp = (row_info->pixel_depth + 7) >> 3;  // Get # bytes per pixel
+   _FullLength  = row_info->rowbytes;        // # of bytes to filter
+   __asm__ (
+      // Init address pointers and offset
+//GRR "movl row, %%edi             \n\t" // edi ==> Avg(x)
+      "xorl %%ebx, %%ebx           \n\t" // ebx ==> x
+      "movl %%edi, %%edx           \n\t"
+//GRR "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+//GRR "subl bpp, %%edx             \n\t" // (bpp is preloaded into ecx)
+      "subl %%ecx, %%edx           \n\t" // edx ==> Raw(x-bpp)
+
+      "xorl %%eax,%%eax            \n\t"
+
+      // Compute the Raw value for the first bpp bytes
+      //    Raw(x) = Avg(x) + (Prior(x)/2)
+   "avg_rlp:                       \n\t"
+      "movb (%%esi,%%ebx,),%%al    \n\t" // Load al with Prior(x)
+      "incl %%ebx                  \n\t"
+      "shrb %%al                   \n\t" // divide by 2
+      "addb -1(%%edi,%%ebx,),%%al  \n\t" // add Avg(x); -1 to offset inc ebx
+//GRR "cmpl bpp, %%ebx             \n\t" // (bpp is preloaded into ecx)
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movb %%al,-1(%%edi,%%ebx,)  \n\t" // write Raw(x); -1 to offset inc ebx
+      "jb avg_rlp                  \n\t" // mov does not affect flags
+
+      // get # of bytes to alignment
+      "movl %%edi, _dif            \n\t" // take start of row
+      "addl %%ebx, _dif            \n\t" // add bpp
+      "addl $0xf, _dif             \n\t" // add 7+8 to incr past alignment bdry
+      "andl $0xfffffff8, _dif      \n\t" // mask to alignment boundary
+      "subl %%edi, _dif            \n\t" // subtract from start => value ebx at alignment
+      "jz avg_go                   \n\t"
+
+      // fix alignment
+      // Compute the Raw value for the bytes up to the alignment boundary
+      //    Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
+      "xorl %%ecx, %%ecx           \n\t"
+   "avg_lp1:                       \n\t"
+      "xorl %%eax, %%eax           \n\t"
+      "movb (%%esi,%%ebx,), %%cl   \n\t" // load cl with Prior(x)
+      "movb (%%edx,%%ebx,), %%al   \n\t" // load al with Raw(x-bpp)
+      "addw %%cx, %%ax             \n\t"
+      "incl %%ebx                  \n\t"
+      "shrw %%ax                   \n\t" // divide by 2
+      "addb -1(%%edi,%%ebx,), %%al \n\t" // add Avg(x); -1 to offset inc ebx
+      "cmpl _dif, %%ebx            \n\t" // check if at alignment boundary
+      "movb %%al, -1(%%edi,%%ebx,) \n\t" // write Raw(x); -1 to offset inc ebx
+      "jb avg_lp1                  \n\t" // repeat until at alignment boundary
+
+   "avg_go:                        \n\t"
+      "movl _FullLength, %%eax     \n\t"
+      "movl %%eax, %%ecx           \n\t"
+      "subl %%ebx, %%eax           \n\t" // subtract alignment fix
+      "andl $0x00000007, %%eax     \n\t" // calc bytes over mult of 8
+      "subl %%eax, %%ecx           \n\t" // drop over bytes from original length
+      "movl %%ecx, _MMXLength      \n\t"
+
+      : // output regs/vars here, e.g., "=m" (_MMXLength) instead of final instr
+
+      : "S" (prev_row),  // esi          // input regs
+        "D" (row),       // edi
+        "c" (bpp)        // ecx
+
+      : "%eax", "%ebx", "%ecx",          // clobber list
+        "%edx", "%edi", "%esi"
+// GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength)     PROBABLY
+   );
+
+#ifdef GRR_GCC_MMX_CONVERTED
+   // Now do the math for the rest of the row
+   switch ( bpp )
+   {
+      case 3:
+      {
+         ActiveMask.use  = 0x0000000000ffffff;
+         ShiftBpp.use = 24;    // == 3 * 8
+         ShiftRem.use = 40;    // == 64 - 24
+         __asm__ (
+            // Re-init address pointers and offset
+            "movq $ActiveMask, %%mm7     \n\t"
+            "movl _dif, %%ebx            \n\t" // ebx ==> x = offset to alignment boundary
+            "movq $LBCarryMask, %%mm5    \n\t"
+            "movl row, %%edi             \n\t" // edi ==> Avg(x)
+            "movq $HBClearMask, %%mm4    \n\t"
+            "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+            // PRIME the pump (load the first Raw(x-bpp) data set)
+            "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes
+                                          // (we correct position in loop below)
+         "avg_3lp:                       \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t" // Load mm0 with Avg(x)
+            // Add (Prev_row/2) to Average
+            "movq %%mm5, %%mm3           \n\t"
+            "psrlq $ShiftRem, %%mm2      \n\t" // Correct position Raw(x-bpp) data
+            "movq (%%esi,%%ebx,), %%mm1  \n\t" // Load mm1 with Prior(x)
+            "movq %%mm7, %%mm6           \n\t"
+            "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
+            "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
+            "pand  %%mm4, %%mm1          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg for each byte
+            // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                               // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 1 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active
+                               //  byte
+            // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
+            "psllq $ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover bytes 3-5
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $ShiftBpp, %%mm2      \n\t" // shift data to position correctly
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                               // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 2 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active
+                               //  byte
+
+            // Add 3rd active group (Raw(x-bpp)/2) to Average with LBCarry
+            "psllq $ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover the last two
+                                 // bytes
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $ShiftBpp, %%mm2      \n\t" // shift data to position correctly
+                              // Data only needs to be shifted once here to
+                              // get the correct x-bpp offset.
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                              // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 2 bytes to add to Avg
+            "addl $8, %%ebx              \n\t"
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active
+                                               // byte
+            // Now ready to write back to memory
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
+            // Move updated Raw(x) to use as Raw(x-bpp) for next loop
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raw(x) to mm2
+            "jb avg_3lp                  \n\t"
+
+            : // output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : "S" (prev_row),  // esi          // input regs
+              "D" (row)        // edi
+
+            : "%ebx", "%edi", "%esi"           // clobber list
+//            GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength)   PROBABLY
+//          , "%mm0", "%mm1", "%mm2", "%mm3",
+//            "%mm4", "%mm5", "%mm6", "%mm7"
+         );
+      }
+      break;  // end 3 bpp
+
+      case 6:
+      case 4:
+      //case 7:   // who wrote this?  PNG doesn't support 5 or 7 bytes/pixel
+      //case 5:
+      {
+         ActiveMask.use  = 0xffffffffffffffff;  // use shift below to clear
+                                                // appropriate inactive bytes
+         ShiftBpp.use = bpp << 3;
+         ShiftRem.use = 64 - ShiftBpp.use;
+         __asm__ (
+            "movq $HBClearMask, %%mm4    \n\t"
+
+            // Re-init address pointers and offset
+            "movl _dif, %%ebx            \n\t" // ebx ==> x = offset to alignment boundary
+
+            // Load ActiveMask and clear all bytes except for 1st active group
+            "movq $ActiveMask, %%mm7     \n\t"
+            "movl row, %%edi             \n\t" // edi ==> Avg(x)
+            "psrlq $ShiftRem, %%mm7      \n\t"
+            "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+            "movq %%mm7, %%mm6           \n\t"
+            "movq $LBCarryMask, %%mm5    \n\t"
+            "psllq $ShiftBpp, %%mm6      \n\t" // Create mask for 2nd active group
+
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes
+                                          // (we correct position in loop below)
+         "avg_4lp:                       \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "psrlq $ShiftRem, %%mm2      \n\t" // shift data to position correctly
+            "movq (%%esi,%%ebx,), %%mm1  \n\t"
+            // Add (Prev_row/2) to Average
+            "movq %%mm5, %%mm3           \n\t"
+            "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
+            "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
+            "pand  %%mm4, %%mm1          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg for each byte
+            // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                              // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm7, %%mm2           \n\t" // Leave only Active Group 1 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active
+                              // byte
+            // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $ShiftBpp, %%mm2      \n\t" // shift data to position correctly
+            "addl $8, %%ebx              \n\t"
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                              // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 2 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active
+                              // byte
+            "cmpl _MMXLength, %%ebx      \n\t"
+            // Now ready to write back to memory
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
+            // Prep Raw(x-bpp) for next loop
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "jb avg_4lp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;  // end 4,6 bpp
+
+      case 2:
+      {
+         ActiveMask.use  = 0x000000000000ffff;
+         ShiftBpp.use = 24;   // == 3 * 8
+         ShiftRem.use = 40;   // == 64 - 24
+         __asm__ (
+            // Load ActiveMask
+            "movq $ActiveMask, %%mm7     \n\t"
+            // Re-init address pointers and offset
+            "movl _dif, %%ebx            \n\t" // ebx ==> x = offset to alignment boundary
+            "movq $LBCarryMask, %%mm5    \n\t"
+            "movl row, %%edi             \n\t" // edi ==> Avg(x)
+            "movq $HBClearMask, %%mm4    \n\t"
+            "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes
+                              // (we correct position in loop below)
+         "avg_2lp:                       \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "psllq $ShiftRem, %%mm2      \n\t" // shift data to position correctly
+            "movq (%%esi,%%ebx,), %%mm1  \n\t"
+            // Add (Prev_row/2) to Average
+            "movq %%mm5, %%mm3           \n\t"
+            "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
+            "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
+            "pand  %%mm4, %%mm1          \n\t" // clear invalid bit 7 of each byte
+            "movq %%mm7, %%mm6           \n\t"
+            "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg for each byte
+            // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                              // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 1 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte
+            // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
+            "psllq $ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover bytes 2 & 3
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $ShiftBpp, %%mm2      \n\t" // shift data to position correctly
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                                // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 2 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte
+
+            // Add rdd active group (Raw(x-bpp)/2) to Average with LBCarry
+            "psllq $ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover bytes 4 & 5
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $ShiftBpp, %%mm2      \n\t" // shift data to position correctly
+                                // Data only needs to be shifted once here to
+                                // get the correct x-bpp offset.
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                                // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 2 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte
+
+            // Add 4th active group (Raw(x-bpp)/2) to Average with LBCarry
+            "psllq $ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover bytes 6 & 7
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $ShiftBpp, %%mm2      \n\t" // shift data to position correctly
+                                 // Data only needs to be shifted once here to
+                                 // get the correct x-bpp offset.
+            "addl $8, %%ebx              \n\t"
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte where both
+                             // lsb's were == 1 (Only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte
+            "pand %%mm6, %%mm2           \n\t" // Leave only Active Group 2 bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte
+
+            "cmpl _MMXLength, %%ebx      \n\t"
+            // Now ready to write back to memory
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
+            // Prep Raw(x-bpp) for next loop
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "jb avg_2lp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;  // end 2 bpp
+
+      case 1:
+      {
+         __asm__ (
+            // Re-init address pointers and offset
+            "movl _dif, %%ebx            \n\t" // ebx ==> x = offset to alignment boundary
+            "movl row, %%edi             \n\t" // edi ==> Avg(x)
+            "cmpl _FullLength, %%ebx     \n\t" // Test if offset at end of array
+            "jnb avg_1end                \n\t"
+            // Do Paeth decode for remaining bytes
+            "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+            "movl %%edi, %%edx           \n\t"
+            "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx in loop below
+            "subl bpp, %%edx             \n\t" // edx ==> Raw(x-bpp)
+         "avg_1lp:                       \n\t"
+            // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
+            "xorl %%eax, %%eax           \n\t"
+            "movb (%%esi,%%ebx,), %%cl   \n\t" // load cl with Prior(x)
+            "movb (%%edx,%%ebx,), %%al   \n\t" // load al with Raw(x-bpp)
+            "addw %%cx, %%ax             \n\t"
+            "incl %%ebx                  \n\t"
+            "shrw %%ax                   \n\t" // divide by 2
+            "addb -1(%%edi,%%ebx,), %%al \n\t" // Add Avg(x); -1 to offset inc ebx
+            "cmpl _FullLength, %%ebx     \n\t" // Check if at end of array
+            "movb %%al, -1(%%edi,%%ebx,) \n\t" // Write back Raw(x);
+                         // mov does not affect flags; -1 to offset inc ebx
+            "jb avg_1lp                  \n\t"
+         "avg_1end:                      \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list
+         );
+      }
+      return;  // end 1 bpp
+
+      case 8:
+      {
+         __asm__ (
+            // Re-init address pointers and offset
+            "movl _dif, %%ebx            \n\t" // ebx ==> x = offset to alignment boundary
+            "movq $LBCarryMask, %%mm5    \n\t"
+            "movl row, %%edi             \n\t" // edi ==> Avg(x)
+            "movq $HBClearMask, %%mm4    \n\t"
+            "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes
+                                // (NO NEED to correct position in loop below)
+         "avg_8lp:                       \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "movq %%mm5, %%mm3           \n\t"
+            "movq (%%esi,%%ebx,), %%mm1  \n\t"
+            "addl $8, %%ebx              \n\t"
+            "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
+            "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
+            "pand %%mm2, %%mm3           \n\t" // get LBCarrys for each byte where both
+                                // lsb's were == 1
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm1          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm3, %%mm0          \n\t" // add LBCarrys to Avg for each byte
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg for each byte
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) to Avg for each byte
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
+            "movq %%mm0, %%mm2           \n\t" // reuse as Raw(x-bpp)
+            "jb avg_8lp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" // CHECKASM: clobber list
+         );
+      }
+      break;  // end 8 bpp
+
+      default:                  // bpp greater than 8 (!= 1,2,3,4,6,8)
+      {
+
+      GRR:  PRINT ERROR HERE:  SHOULD NEVER BE REACHED (unless smaller than 1?)
+
+        __asm__ (
+            "movq $LBCarryMask, %%mm5    \n\t"
+            // Re-init address pointers and offset
+            "movl _dif, %%ebx            \n\t" // ebx ==> x = offset to alignment boundary
+            "movl row, %%edi             \n\t" // edi ==> Avg(x)
+            "movq $HBClearMask, %%mm4    \n\t"
+            "movl %%edi, %%edx           \n\t"
+            "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+            "subl bpp, %%edx             \n\t" // edx ==> Raw(x-bpp)
+         "avg_Alp:                       \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "movq %%mm5, %%mm3           \n\t"
+            "movq (%%esi,%%ebx,), %%mm1  \n\t"
+            "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
+            "movq (%%edx,%%ebx,), %%mm2  \n\t"
+            "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
+            "pand %%mm2, %%mm3           \n\t" // get LBCarrys for each byte where both
+                                // lsb's were == 1
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm1          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm3, %%mm0          \n\t" // add LBCarrys to Avg for each byte
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each byte
+            "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg for each byte
+            "addl $8, %%ebx              \n\t"
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) to Avg for each byte
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
+            "jb avg_Alp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" // CHECKASM: clobber list
+         );
+      }
+      break;
+   }                         // end switch ( bpp )
+
+   __asm__ (
+      // MMX acceleration complete now do clean-up
+      // Check if any remaining bytes left to decode
+      "movl _MMXLength, %%ebx      \n\t" // ebx ==> x = offset bytes remaining after MMX
+      "movl row, %%edi             \n\t" // edi ==> Avg(x)
+      "cmpl _FullLength, %%ebx     \n\t" // Test if offset at end of array
+      "jnb avg_end                 \n\t"
+      // Do Paeth decode for remaining bytes
+      "movl prev_row, %%esi        \n\t" // esi ==> Prior(x)
+      "movl %%edi, %%edx           \n\t"
+      "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx in loop below
+      "subl bpp, %%edx             \n\t" // edx ==> Raw(x-bpp)
+   "avg_lp2:                       \n\t"
+      // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
+      "xorl %%eax, %%eax           \n\t"
+      "movb (%%esi,%%ebx,), %%cl   \n\t" // load cl with Prior(x)
+      "movb (%%edx,%%ebx,), %%al   \n\t" // load al with Raw(x-bpp)
+      "addw %%cx, %%ax             \n\t"
+      "incl %%ebx                  \n\t"
+      "shrw %%ax                   \n\t" // divide by 2
+      "addb -1(%%edi,%%ebx,), %%al \n\t" // Add Avg(x); -1 to offset inc ebx
+      "cmpl _FullLength, %%ebx     \n\t" // Check if at end of array
+      "movb %%al, -1(%%edi,%%ebx,) \n\t" // Write back Raw(x);
+                       // mov does not affect flags; -1 to offset inc ebx
+      "jb avg_lp2                  \n\t"
+   "avg_end:                       \n\t"
+      "emms                        \n\t" // End MMX instructions; prep for possible FP instrs.
+
+      : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+      : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+      : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list
+   );
+#endif /* GRR_GCC_MMX_CONVERTED */
+}
+
+// Optimized code for PNG Paeth filter decoder
+void
+png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row,
+                              png_bytep prev_row)
+{
+#ifdef GRR_GCC_MMX_CONVERTED
+   int bpp;
+   int patemp, pbtemp, pctemp;
+
+   bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
+   _FullLength  = row_info->rowbytes; // # of bytes to filter
+   __asm__ (
+      "xorl %%ebx, %%ebx           \n\t" // ebx ==> x offset
+      "movl row, %%edi             \n\t"
+      "xorl %%edx, %%edx           \n\t" // edx ==> x-bpp offset
+      "movl prev_row, %%esi        \n\t"
+      "xorl %%eax, %%eax           \n\t"
+
+      // Compute the Raw value for the first bpp bytes
+      // Note: the formula works out to be always
+      //   Paeth(x) = Raw(x) + Prior(x)      where x < bpp
+   "paeth_rlp:                     \n\t"
+      "movb (%%edi,%%ebx,), %%al   \n\t"
+      "addb (%%esi,%%ebx,), %%al   \n\t"
+      "incl %%ebx                  \n\t"
+      "cmpl bpp, %%ebx             \n\t"
+      "movb %%al, -1(%%edi,%%ebx,) \n\t"
+      "jb paeth_rlp                \n\t"
+      // get # of bytes to alignment
+      "movl %%edi, _dif            \n\t" // take start of row
+      "addl %%ebx, _dif            \n\t" // add bpp
+      "xorl %%ecx, %%ecx           \n\t"
+      "addl $0xf, _dif             \n\t" // add 7 + 8 to incr past alignment boundary
+      "andl $0xfffffff8, _dif      \n\t" // mask to alignment boundary
+      "subl %%edi, _dif            \n\t" // subtract from start ==> value ebx at alignment
+      "jz paeth_go                 \n\t"
+      // fix alignment
+   "paeth_lp1:                     \n\t"
+      "xorl %%eax, %%eax           \n\t"
+      // pav = p - a = (a + b - c) - a = b - c
+      "movb (%%esi,%%ebx,), %%al   \n\t" // load Prior(x) into al
+      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
+      "movl %%eax, patemp          \n\t" // Save pav for later use
+      "xorl %%eax, %%eax           \n\t"
+      // pbv = p - b = (a + b - c) - b = a - c
+      "movb (%%edi,%%edx,), %%al   \n\t" // load Raw(x-bpp) into al
+      "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
+      "movl %%eax, %%ecx           \n\t"
+      // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+      "addl patemp, %%eax          \n\t" // pcv = pav + pbv
+      // pc = abs(pcv)
+      "testl $0x80000000, %%eax    \n\t"
+      "jz paeth_pca                \n\t"
+      "negl %%eax                  \n\t" // reverse sign of neg values
+   "paeth_pca:                     \n\t"
+      "movl %%eax, pctemp          \n\t" // save pc for later use
+      // pb = abs(pbv)
+      "testl $0x80000000, %%ecx    \n\t"
+      "jz paeth_pba                \n\t"
+      "negl %%ecx                  \n\t" // reverse sign of neg values
+   "paeth_pba:                     \n\t"
+      "movl %%ecx, pbtemp          \n\t" // save pb for later use
+      // pa = abs(pav)
+      "movl patemp, %%eax          \n\t"
+      "testl $0x80000000, %%eax    \n\t"
+      "jz paeth_paa                \n\t"
+      "negl %%eax                  \n\t" // reverse sign of neg values
+   "paeth_paa:                     \n\t"
+      "movl %%eax, patemp          \n\t" // save pa for later use
+      // test if pa <= pb
+      "cmpl %%ecx, %%eax           \n\t"
+      "jna paeth_abb               \n\t"
+      // pa > pb; now test if pb <= pc
+      "cmpl pctemp, %%ecx          \n\t"
+      "jna paeth_bbc               \n\t"
+      // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "jmp paeth_paeth             \n\t"
+   "paeth_bbc:                     \n\t"
+      // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
+      "movb (%%esi,%%ebx,), %%cl   \n\t" // load Prior(x) into cl
+      "jmp paeth_paeth             \n\t"
+   "paeth_abb:                     \n\t"
+      // pa <= pb; now test if pa <= pc
+      "cmpl pctemp, %%eax          \n\t"
+      "jna paeth_abc               \n\t"
+      // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "jmp paeth_paeth             \n\t"
+   "paeth_abc:                     \n\t"
+      // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
+      "movb (%%edi,%%edx,), %%cl   \n\t" // load Raw(x-bpp) into cl
+   "paeth_paeth:                   \n\t"
+      "incl %%ebx                  \n\t"
+      "incl %%edx                  \n\t"
+      // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
+      "addb %%cl, -1(%%edi,%%ebx,) \n\t"
+      "cmpl _dif, %%ebx            \n\t"
+      "jb paeth_lp1                \n\t"
+   "paeth_go:                      \n\t"
+      "movl _FullLength, %%ecx     \n\t"
+      "movl %%ecx, %%eax           \n\t"
+      "subl %%ebx, %%eax           \n\t" // subtract alignment fix
+      "andl $0x00000007, %%eax     \n\t" // calc bytes over mult of 8
+      "subl %%eax, %%ecx           \n\t" // drop over bytes from original length
+      "movl %%ecx, _MMXLength      \n\t"
+
+      : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+      : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+      : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list
+   );
+
+   // Now do the math for the rest of the row
+   switch ( bpp )
+   {
+      case 3:
+      {
+         ActiveMask.use = 0x0000000000ffffff;
+         ActiveMaskEnd.use = 0xffff000000000000;
+         ShiftBpp.use = 24;    // == bpp(3) * 8
+         ShiftRem.use = 40;    // == 64 - 24
+         __asm__ (
+            "movl _dif, %%ebx            \n\t"
+            "movl row, %%edi             \n\t"
+            "movl prev_row, %%esi        \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t"
+         "paeth_3lp:                     \n\t"
+            "psrlq $ShiftRem, %%mm1      \n\t" // shift last 3 bytes to 1st 3 bytes
+            "movq (%%esi,%%ebx,), %%mm2  \n\t" // load b=Prior(x)
+            "punpcklbw %%mm0, %%mm1      \n\t" // Unpack High bytes of a
+            "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // Prep c=Prior(x-bpp) bytes
+            "punpcklbw %%mm0, %%mm2      \n\t" // Unpack High bytes of b
+            "psrlq $ShiftRem, %%mm3      \n\t" // shift last 3 bytes to 1st 3 bytes
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            "punpcklbw %%mm0, %%mm3      \n\t" // Unpack High bytes of c
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "packuswb %%mm1, %%mm7       \n\t"
+            "movq (%%esi,%%ebx,), %%mm3  \n\t" // load c=Prior(x-bpp)
+            "pand $ActiveMask, %%mm7     \n\t"
+            "movq %%mm3, %%mm2           \n\t" // load b=Prior(x) step 1
+            "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
+            "punpcklbw %%mm0, %%mm3      \n\t" // Unpack High bytes of c
+            "movq %%mm7, (%%edi,%%ebx,)  \n\t" // write back updated value
+            "movq %%mm7, %%mm1           \n\t" // Now mm1 will be used as Raw(x-bpp)
+            // Now do Paeth for 2nd set of bytes (3-5)
+            "psrlq $ShiftBpp, %%mm2      \n\t" // load b=Prior(x) step 2
+            "punpcklbw %%mm0, %%mm1      \n\t" // Unpack High bytes of a
+            "pxor %%mm7, %%mm7           \n\t"
+            "punpcklbw %%mm0, %%mm2      \n\t" // Unpack High bytes of b
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) =
+            //       pav + pbv = pbv + pav
+            "movq %%mm5, %%mm6           \n\t"
+            "paddw %%mm4, %%mm6          \n\t"
+
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm5, %%mm0        \n\t" // Create mask pbv bytes < 0
+            "pcmpgtw %%mm4, %%mm7        \n\t" // Create mask pav bytes < 0
+            "pand %%mm5, %%mm0           \n\t" // Only pbv bytes < 0 in mm0
+            "pand %%mm4, %%mm7           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm0, %%mm5          \n\t"
+            "psubw %%mm7, %%mm4          \n\t"
+            "psubw %%mm0, %%mm5          \n\t"
+            "psubw %%mm7, %%mm4          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "movq (%%esi,%%ebx,), %%mm2  \n\t" // load b=Prior(x)
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "pxor %%mm1, %%mm1           \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "packuswb %%mm1, %%mm7       \n\t"
+            "movq %%mm2, %%mm3           \n\t" // load c=Prior(x-bpp) step 1
+            "pand $ActiveMask, %%mm7     \n\t"
+            "punpckhbw %%mm0, %%mm2      \n\t" // Unpack High bytes of b
+            "psllq $ShiftBpp, %%mm7      \n\t" // Shift bytes to 2nd group of 3 bytes
+             // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
+            "psllq $ShiftBpp, %%mm3      \n\t" // load c=Prior(x-bpp) step 2
+            "movq %%mm7, (%%edi,%%ebx,)  \n\t" // write back updated value
+            "movq %%mm7, %%mm1           \n\t"
+            "punpckhbw %%mm0, %%mm3      \n\t" // Unpack High bytes of c
+            "psllq $ShiftBpp, %%mm1      \n\t" // Shift bytes
+                                    // Now mm1 will be used as Raw(x-bpp)
+            // Now do Paeth for 3rd, and final, set of bytes (6-7)
+            "pxor %%mm7, %%mm7           \n\t"
+            "punpckhbw %%mm0, %%mm1      \n\t" // Unpack High bytes of a
+            "psubw %%mm3, %%mm4          \n\t"
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "paddw %%mm5, %%mm6          \n\t"
+
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm1, %%mm1           \n\t"
+            "packuswb %%mm7, %%mm1       \n\t"
+            // Step ebx to next set of 8 bytes and repeat loop til done
+            "addl $8, %%ebx              \n\t"
+            "pand $ActiveMaskEnd, %%mm1  \n\t"
+            "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)
+
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "pxor %%mm0, %%mm0           \n\t" // pxor does not affect flags
+            "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value
+                                 // mm1 will be used as Raw(x-bpp) next loop
+                           // mm3 ready to be used as Prior(x-bpp) next loop
+            "jb paeth_3lp                \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;
+
+      case 6:
+      //case 7:   // GRR BOGUS
+      //case 5:   // GRR BOGUS
+      {
+         ActiveMask.use  = 0x00000000ffffffff;
+         ActiveMask2.use = 0xffffffff00000000;
+         ShiftBpp.use = bpp << 3;    // == bpp * 8
+         ShiftRem.use = 64 - ShiftBpp.use;
+         __asm__ (
+            "movl _dif, %%ebx            \n\t"
+            "movl row, %%edi             \n\t"
+            "movl prev_row, %%esi        \n\t"
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+         "paeth_6lp:                     \n\t"
+            // Must shift to position Raw(x-bpp) data
+            "psrlq $ShiftRem, %%mm1      \n\t"
+            // Do first set of 4 bytes
+            "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            "punpcklbw %%mm0, %%mm1      \n\t" // Unpack Low bytes of a
+            "movq (%%esi,%%ebx,), %%mm2  \n\t" // load b=Prior(x)
+            "punpcklbw %%mm0, %%mm2      \n\t" // Unpack Low bytes of b
+            // Must shift to position Prior(x-bpp) data
+            "psrlq $ShiftRem, %%mm3      \n\t"
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            "punpcklbw %%mm0, %%mm3      \n\t" // Unpack Low bytes of c
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "packuswb %%mm1, %%mm7       \n\t"
+            "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // load c=Prior(x-bpp)
+            "pand $ActiveMask, %%mm7     \n\t"
+            "psrlq $ShiftRem, %%mm3      \n\t"
+            "movq (%%esi,%%ebx,), %%mm2  \n\t" // load b=Prior(x) step 1
+            "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
+            "movq %%mm2, %%mm6           \n\t"
+            "movq %%mm7, (%%edi,%%ebx,)  \n\t" // write back updated value
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t"
+            "psllq $ShiftBpp, %%mm6      \n\t"
+            "movq %%mm7, %%mm5           \n\t"
+            "psrlq $ShiftRem, %%mm1      \n\t"
+            "por %%mm6, %%mm3            \n\t"
+            "psllq $ShiftBpp, %%mm5      \n\t"
+            "punpckhbw %%mm0, %%mm3      \n\t" // Unpack High bytes of c
+            "por %%mm5, %%mm1            \n\t"
+            // Do second set of 4 bytes
+            "punpckhbw %%mm0, %%mm2      \n\t" // Unpack High bytes of b
+            "punpckhbw %%mm0, %%mm1      \n\t" // Unpack High bytes of a
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "pxor %%mm1, %%mm1           \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            // Step ex to next set of 8 bytes and repeat loop til done
+            "addl $8, %%ebx              \n\t"
+            "packuswb %%mm7, %%mm1       \n\t"
+            "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value
+                                // mm1 will be used as Raw(x-bpp) next loop
+            "jb paeth_6lp                \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;
+
+      case 4:
+      {
+         ActiveMask.use  = 0x00000000ffffffff;
+         __asm__ (
+            "movl _dif, %%ebx            \n\t"
+            "movl row, %%edi             \n\t"
+            "movl prev_row, %%esi        \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t" // Only time should need to read
+                                     //  a=Raw(x-bpp) bytes
+         "paeth_4lp:                     \n\t"
+            // Do first set of 4 bytes
+            "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            "punpckhbw %%mm0, %%mm1      \n\t" // Unpack Low bytes of a
+            "movq (%%esi,%%ebx,), %%mm2  \n\t" // load b=Prior(x)
+            "punpcklbw %%mm0, %%mm2      \n\t" // Unpack High bytes of b
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            "punpckhbw %%mm0, %%mm3      \n\t" // Unpack High bytes of c
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "packuswb %%mm1, %%mm7       \n\t"
+            "movq (%%esi,%%ebx,), %%mm3  \n\t" // load c=Prior(x-bpp)
+            "pand $ActiveMask, %%mm7     \n\t"
+            "movq %%mm3, %%mm2           \n\t" // load b=Prior(x) step 1
+            "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
+            "punpcklbw %%mm0, %%mm3      \n\t" // Unpack High bytes of c
+            "movq %%mm7, (%%edi,%%ebx,)  \n\t" // write back updated value
+            "movq %%mm7, %%mm1           \n\t" // Now mm1 will be used as Raw(x-bpp)
+            // Do second set of 4 bytes
+            "punpckhbw %%mm0, %%mm2      \n\t" // Unpack Low bytes of b
+            "punpcklbw %%mm0, %%mm1      \n\t" // Unpack Low bytes of a
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "pxor %%mm1, %%mm1           \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            // Step ex to next set of 8 bytes and repeat loop til done
+            "addl $8, %%ebx              \n\t"
+            "packuswb %%mm7, %%mm1       \n\t"
+            "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value
+                                // mm1 will be used as Raw(x-bpp) next loop
+            "jb paeth_4lp                \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;
+      case 8:                          // bpp == 8
+      {
+         ActiveMask.use  = 0x00000000ffffffff;
+         __asm__ (
+            "movl _dif, %%ebx            \n\t"
+            "movl row, %%edi             \n\t"
+            "movl prev_row, %%esi        \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t" // Only time should need to read
+                                       //  a=Raw(x-bpp) bytes
+         "paeth_8lp:                     \n\t"
+            // Do first set of 4 bytes
+            "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            "punpcklbw %%mm0, %%mm1      \n\t" // Unpack Low bytes of a
+            "movq (%%esi,%%ebx,), %%mm2  \n\t" // load b=Prior(x)
+            "punpcklbw %%mm0, %%mm2      \n\t" // Unpack Low bytes of b
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            "punpcklbw %%mm0, %%mm3      \n\t" // Unpack Low bytes of c
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "packuswb %%mm1, %%mm7       \n\t"
+            "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            "pand $ActiveMask, %%mm7     \n\t"
+            "movq (%%esi,%%ebx,), %%mm2  \n\t" // load b=Prior(x)
+            "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
+            "punpckhbw %%mm0, %%mm3      \n\t" // Unpack High bytes of c
+            "movq %%mm7, (%%edi,%%ebx,)  \n\t" // write back updated value
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t" // read a=Raw(x-bpp) bytes
+
+            // Do second set of 4 bytes
+            "punpckhbw %%mm0, %%mm2      \n\t" // Unpack High bytes of b
+            "punpckhbw %%mm0, %%mm1      \n\t" // Unpack High bytes of a
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // Create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // Create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // Only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // Create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // Only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "pxor %%mm1, %%mm1           \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            // Step ex to next set of 8 bytes and repeat loop til done
+            "addl $8, %%ebx              \n\t"
+            "packuswb %%mm7, %%mm1       \n\t"
+            "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value
+                            // mm1 will be used as Raw(x-bpp) next loop
+            "jb paeth_8lp                \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;
+
+      case 1:                // bpp = 1
+      case 2:                // bpp = 2
+      default:               // bpp > 8
+      {
+         __asm__ (
+            "movl _dif, %%ebx            \n\t"
+            "cmpl _FullLength, %%ebx     \n\t"
+            "jnb paeth_dend              \n\t"
+            "movl row, %%edi             \n\t"
+            "movl prev_row, %%esi        \n\t"
+            // Do Paeth decode for remaining bytes
+            "movl %%ebx, %%edx           \n\t"
+            "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx in loop below
+            "subl bpp, %%edx             \n\t" // Set edx = ebx - bpp
+         "paeth_dlp:                     \n\t"
+            "xorl %%eax, %%eax           \n\t"
+            // pav = p - a = (a + b - c) - a = b - c
+            "movb (%%esi,%%ebx,), %%al   \n\t" // load Prior(x) into al
+            "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+            "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
+            "movl %%eax, patemp          \n\t" // Save pav for later use
+            "xorl %%eax, %%eax           \n\t"
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movb (%%edi,%%edx,), %%al   \n\t" // load Raw(x-bpp) into al
+            "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
+            "movl %%eax, %%ecx           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "addl patemp, %%eax          \n\t" // pcv = pav + pbv
+            // pc = abs(pcv)
+            "testl $0x80000000, %%eax    \n\t"
+            "jz paeth_dpca               \n\t"
+            "negl %%eax                  \n\t" // reverse sign of neg values
+         "paeth_dpca:                    \n\t"
+            "movl %%eax, pctemp          \n\t" // save pc for later use
+            // pb = abs(pbv)
+            "testl $0x80000000, %%ecx    \n\t"
+            "jz paeth_dpba               \n\t"
+            "negl %%ecx                  \n\t" // reverse sign of neg values
+         "paeth_dpba:                    \n\t"
+            "movl %%ecx, pbtemp          \n\t" // save pb for later use
+            // pa = abs(pav)
+            "movl patemp, %%eax          \n\t"
+            "testl $0x80000000, %%eax    \n\t"
+            "jz paeth_dpaa               \n\t"
+            "negl %%eax                  \n\t" // reverse sign of neg values
+         "paeth_dpaa:                    \n\t"
+            "movl %%eax, patemp          \n\t" // save pa for later use
+            // test if pa <= pb
+            "cmpl %%ecx, %%eax           \n\t"
+            "jna paeth_dabb              \n\t"
+            // pa > pb; now test if pb <= pc
+            "cmpl pctemp, %%ecx          \n\t"
+            "jna paeth_dbbc              \n\t"
+            // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+            "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+            "jmp paeth_dpaeth            \n\t"
+         "paeth_dbbc:                    \n\t"
+            // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
+            "movb (%%esi,%%ebx,), %%cl   \n\t" // load Prior(x) into cl
+            "jmp paeth_dpaeth            \n\t"
+         "paeth_dabb:                    \n\t"
+            // pa <= pb; now test if pa <= pc
+            "cmpl pctemp, %%eax          \n\t"
+            "jna paeth_dabc              \n\t"
+            // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+            "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+            "jmp paeth_dpaeth            \n\t"
+         "paeth_dabc:                    \n\t"
+            // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
+            "movb (%%edi,%%edx,), %%cl   \n\t" // load Raw(x-bpp) into cl
+         "paeth_dpaeth:                  \n\t"
+            "incl %%ebx                  \n\t"
+            "incl %%edx                  \n\t"
+            // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
+            "addb %%cl, -1(%%edi,%%ebx,) \n\t"
+            "cmpl _FullLength, %%ebx     \n\t"
+            "jb paeth_dlp                \n\t"
+         "paeth_dend:                    \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list
+         );
+      }
+      return;                   // No need to go further with this one
+   }                         // end switch ( bpp )
+   __asm__ (
+      // MMX acceleration complete now do clean-up
+      // Check if any remaining bytes left to decode
+      "movl _MMXLength, %%ebx      \n\t"
+      "cmpl _FullLength, %%ebx     \n\t"
+      "jnb paeth_end               \n\t"
+      "movl row, %%edi             \n\t"
+      "movl prev_row, %%esi        \n\t"
+      // Do Paeth decode for remaining bytes
+      "movl %%ebx, %%edx           \n\t"
+      "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx in loop below
+      "subl bpp, %%edx             \n\t" // Set edx = ebx - bpp
+   "paeth_lp2:                     \n\t"
+      "xorl %%eax, %%eax           \n\t"
+      // pav = p - a = (a + b - c) - a = b - c
+      "movb (%%esi,%%ebx,), %%al   \n\t" // load Prior(x) into al
+      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
+      "movl %%eax, patemp          \n\t" // Save pav for later use
+      "xorl %%eax, %%eax           \n\t"
+      // pbv = p - b = (a + b - c) - b = a - c
+      "movb (%%edi,%%edx,), %%al   \n\t" // load Raw(x-bpp) into al
+      "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
+      "movl %%eax, %%ecx           \n\t"
+      // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+      "addl patemp, %%eax          \n\t" // pcv = pav + pbv
+      // pc = abs(pcv)
+      "testl $0x80000000, %%eax    \n\t"
+      "jz paeth_pca2               \n\t"
+      "negl %%eax                  \n\t" // reverse sign of neg values
+   "paeth_pca2:                    \n\t"
+      "movl %%eax, pctemp          \n\t" // save pc for later use
+      // pb = abs(pbv)
+      "testl $0x80000000, %%ecx    \n\t"
+      "jz paeth_pba2               \n\t"
+      "negl %%ecx                  \n\t" // reverse sign of neg values
+   "paeth_pba2:                    \n\t"
+      "movl %%ecx, pbtemp          \n\t" // save pb for later use
+      // pa = abs(pav)
+      "movl patemp, %%eax          \n\t"
+      "testl $0x80000000, %%eax    \n\t"
+      "jz paeth_paa2               \n\t"
+      "negl %%eax                  \n\t" // reverse sign of neg values
+   "paeth_paa2:                    \n\t"
+      "movl %%eax, patemp          \n\t" // save pa for later use
+      // test if pa <= pb
+      "cmpl %%ecx, %%eax           \n\t"
+      "jna paeth_abb2              \n\t"
+      // pa > pb; now test if pb <= pc
+      "cmpl pctemp, %%ecx          \n\t"
+      "jna paeth_bbc2              \n\t"
+      // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "jmp paeth_paeth2            \n\t"
+   "paeth_bbc2:                    \n\t"
+      // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
+      "movb (%%esi,%%ebx,), %%cl   \n\t" // load Prior(x) into cl
+      "jmp paeth_paeth2            \n\t"
+   "paeth_abb2:                    \n\t"
+      // pa <= pb; now test if pa <= pc
+      "cmpl pctemp, %%eax          \n\t"
+      "jna paeth_abc2              \n\t"
+      // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "jmp paeth_paeth2            \n\t"
+   "paeth_abc2:                    \n\t"
+      // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
+      "movb (%%edi,%%edx,), %%cl   \n\t" // load Raw(x-bpp) into cl
+   "paeth_paeth2:                  \n\t"
+      "incl %%ebx                  \n\t"
+      "incl %%edx                  \n\t"
+      // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
+      "addb %%cl, -1(%%edi,%%ebx,) \n\t"
+      "cmpl _FullLength, %%ebx     \n\t"
+      "jb paeth_lp2                \n\t"
+   "paeth_end:                     \n\t"
+      "emms                        \n\t" // End MMX instructions; prep for possible FP instrs.
+
+      : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+      : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+      : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list
+   );
+#endif /* GRR_GCC_MMX_CONVERTED */
+}
+
+// Optimized code for PNG Sub filter decoder
+void
+png_read_filter_row_mmx_sub(png_row_infop row_info, png_bytep row)
+{
+#ifdef GRR_GCC_MMX_CONVERTED
+   int bpp;
+
+   bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
+   _FullLength  = row_info->rowbytes - bpp; // # of bytes to filter
+   __asm__ (
+      "movl row, %%edi             \n\t"
+      "movl %%edi, %%esi           \n\t" // lp = row
+      "addl bpp, %%edi             \n\t" // rp = row + bpp
+      "xorl %%eax, %%eax           \n\t"
+      // get # of bytes to alignment
+      "movl %%edi, _dif            \n\t" // take start of row
+      "addl $0xf, _dif             \n\t" // add 7 + 8 to incr past
+                                         // alignment boundary
+      "xorl %%ebx, %%ebx           \n\t"
+      "andl $0xfffffff8, _dif      \n\t" // mask to alignment boundary
+      "subl %%edi, _dif            \n\t" // subtract from start ==> value
+                                         //  ebx at alignment
+      "jz sub_go                   \n\t"
+      // fix alignment
+   "sub_lp1:                       \n\t"
+      "movb (%%esi,%%ebx,), %%al   \n\t"
+      "addb %%al, (%%edi,%%ebx,)   \n\t"
+      "incl %%ebx                  \n\t"
+      "cmpl _dif, %%ebx            \n\t"
+      "jb sub_lp1                  \n\t"
+   "sub_go:                        \n\t"
+      "movl _FullLength, %%ecx     \n\t"
+      "movl %%ecx, %%edx           \n\t"
+      "subl %%ebx, %%edx           \n\t" // subtract alignment fix
+      "andl $0x00000007, %%edx     \n\t" // calc bytes over mult of 8
+      "subl %%edx, %%ecx           \n\t" // drop over bytes from length
+      "movl %%ecx, _MMXLength      \n\t"
+
+      : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+      : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+      : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list
+   );
+
+   // Now do the math for the rest of the row
+   switch ( bpp )
+   {
+        case 3:
+        {
+         ActiveMask.use  = 0x0000ffffff000000;
+         ShiftBpp.use = 24;       // == 3 * 8
+         ShiftRem.use  = 40;      // == 64 - 24
+         __asm__ (
+            "movl row, %%edi             \n\t"
+            "movq $ActiveMask, %%mm7     \n\t" // Load ActiveMask for 2nd active byte group
+            "movl %%edi, %%esi           \n\t" // lp = row
+            "addl bpp, %%edi             \n\t" // rp = row + bpp
+            "movq %%mm7, %%mm6           \n\t"
+            "movl _dif, %%ebx            \n\t"
+            "psllq $ShiftBpp, %%mm6      \n\t" // Move mask in mm6 to cover 3rd active
+                                  // byte group
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t"
+         "sub_3lp:                       \n\t"
+            "psrlq $ShiftRem, %%mm1      \n\t" // Shift data for adding 1st bpp bytes
+                          // no need for mask; shift clears inactive bytes
+            // Add 1st active group
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "paddb %%mm1, %%mm0          \n\t"
+            // Add 2nd active group
+            "movq %%mm0, %%mm1           \n\t" // mov updated Raws to mm1
+            "psllq $ShiftBpp, %%mm1      \n\t" // shift data to position correctly
+            "pand %%mm7, %%mm1           \n\t" // mask to use only 2nd active group
+            "paddb %%mm1, %%mm0          \n\t"
+            // Add 3rd active group
+            "movq %%mm0, %%mm1           \n\t" // mov updated Raws to mm1
+            "psllq $ShiftBpp, %%mm1      \n\t" // shift data to position correctly
+            "pand %%mm6, %%mm1           \n\t" // mask to use only 3rd active group
+            "addl $8, %%ebx              \n\t"
+            "paddb %%mm1, %%mm0          \n\t"
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // Write updated Raws back to array
+            // Prep for doing 1st add at top of loop
+            "movq %%mm0, %%mm1           \n\t"
+            "jb sub_3lp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;
+
+      case 1:
+      {
+         // Placed here just in case this is a duplicate of the
+         // non-MMX code for the SUB filter in png_read_filter_row above
+         //
+         //         png_bytep rp;
+         //         png_bytep lp;
+         //         png_uint_32 i;
+         //         bpp = (row_info->pixel_depth + 7) >> 3;
+         //         for (i = (png_uint_32)bpp, rp = row + bpp, lp = row;
+         //            i < row_info->rowbytes; i++, rp++, lp++)
+         //      {
+         //            *rp = (png_byte)(((int)(*rp) + (int)(*lp)) & 0xff);
+         //      }
+         __asm__ (
+            "movl _dif, %%ebx            \n\t"
+            "movl row, %%edi             \n\t"
+            "cmpl _FullLength, %%ebx     \n\t"
+            "jnb sub_1end                \n\t"
+            "movl %%edi, %%esi           \n\t" // lp = row
+            "xorl %%eax, %%eax           \n\t"
+            "addl bpp, %%edi             \n\t" // rp = row + bpp
+         "sub_1lp:                       \n\t"
+            "movb (%%esi,%%ebx,), %%al   \n\t"
+            "addb %%al, (%%edi,%%ebx,)   \n\t"
+            "incl %%ebx                  \n\t"
+            "cmpl _FullLength, %%ebx     \n\t"
+            "jb sub_1lp                  \n\t"
+         "sub_1end:                      \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%eax", "%ebx", "%edi", "%esi" // CHECKASM: clobber list
+         );
+      }
+      return;
+
+      case 6:
+      case 7:
+      case 4:
+      case 5:
+      {
+         ShiftBpp.use = bpp << 3;
+         ShiftRem.use = 64 - ShiftBpp.use;
+         __asm__ (
+            "movl row, %%edi             \n\t"
+            "movl _dif, %%ebx            \n\t"
+            "movl %%edi, %%esi           \n\t" // lp = row
+            "addl bpp, %%edi             \n\t" // rp = row + bpp
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t"
+         "sub_4lp:                       \n\t"
+            "psrlq $ShiftRem, %%mm1      \n\t" // Shift data for adding 1st bpp bytes
+                          // no need for mask; shift clears inactive bytes
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "paddb %%mm1, %%mm0          \n\t"
+            // Add 2nd active group
+            "movq %%mm0, %%mm1           \n\t" // mov updated Raws to mm1
+            "psllq $ShiftBpp, %%mm1      \n\t" // shift data to position correctly
+                                   // there is no need for any mask
+                                   // since shift clears inactive bits/bytes
+            "addl $8, %%ebx              \n\t"
+            "paddb %%mm1, %%mm0          \n\t"
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
+            "movq %%mm0, %%mm1           \n\t" // Prep for doing 1st add at top of loop
+            "jb sub_4lp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1" // CHECKASM: clobber list
+         );
+      }
+      break;
+
+      case 2:
+      {
+         ActiveMask.use  = 0x00000000ffff0000;
+         ShiftBpp.use = 16;       // == 2 * 8
+         ShiftRem.use = 48;       // == 64 - 16
+         __asm__ (
+            "movq $ActiveMask, %%mm7     \n\t" // Load ActiveMask for 2nd active byte group
+            "movl _dif, %%ebx            \n\t"
+            "movq %%mm7, %%mm6           \n\t"
+            "movl row, %%edi             \n\t"
+            "psllq $ShiftBpp, %%mm6      \n\t" // Move mask in mm6 to cover 3rd active
+                                    //  byte group
+            "movl %%edi, %%esi           \n\t" // lp = row
+            "movq %%mm6, %%mm5           \n\t"
+            "addl bpp, %%edi             \n\t" // rp = row + bpp
+            "psllq $ShiftBpp, %%mm5      \n\t" // Move mask in mm5 to cover 4th active
+                                    //  byte group
+            // PRIME the pump (load the first Raw(x-bpp) data set
+            "movq -8(%%edi,%%ebx,), %%mm1 \n\t"
+         "sub_2lp:                       \n\t"
+            // Add 1st active group
+            "psrlq $ShiftRem, %%mm1      \n\t" // Shift data for adding 1st bpp bytes
+                                    // no need for mask; shift clears inactive
+                                    //  bytes
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "paddb %%mm1, %%mm0          \n\t"
+            // Add 2nd active group
+            "movq %%mm0, %%mm1           \n\t" // mov updated Raws to mm1
+            "psllq $ShiftBpp, %%mm1      \n\t" // shift data to position correctly
+            "pand %%mm7, %%mm1           \n\t" // mask to use only 2nd active group
+            "paddb %%mm1, %%mm0          \n\t"
+            // Add 3rd active group
+            "movq %%mm0, %%mm1           \n\t" // mov updated Raws to mm1
+            "psllq $ShiftBpp, %%mm1      \n\t" // shift data to position correctly
+            "pand %%mm6, %%mm1           \n\t" // mask to use only 3rd active group
+            "paddb %%mm1, %%mm0          \n\t"
+            // Add 4th active group
+            "movq %%mm0, %%mm1           \n\t" // mov updated Raws to mm1
+            "psllq $ShiftBpp, %%mm1      \n\t" // shift data to position correctly
+            "pand %%mm5, %%mm1           \n\t" // mask to use only 4th active group
+            "addl $8, %%ebx              \n\t"
+            "paddb %%mm1, %%mm0          \n\t"
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // Write updated Raws back to array
+            "movq %%mm0, %%mm1           \n\t" // Prep for doing 1st add at top of loop
+            "jb sub_2lp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;
+      case 8:
+      {
+         __asm__ (
+            "movl row, %%edi             \n\t"
+            "movl _dif, %%ebx            \n\t"
+            "movl %%edi, %%esi           \n\t" // lp = row
+            "addl bpp, %%edi             \n\t" // rp = row + bpp
+            "movl _MMXLength, %%ecx      \n\t"
+            "movq -8(%%edi,%%ebx,), %%mm7 \n\t" // PRIME the pump (load the first
+                                    // Raw(x-bpp) data set
+            "andl $0x0000003f, %%ecx     \n\t" // calc bytes over mult of 64
+         "sub_8lp:                       \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t" // Load Sub(x) for 1st 8 bytes
+            "paddb %%mm7, %%mm0          \n\t"
+            "movq 8(%%edi,%%ebx,), %%mm1 \n\t" // Load Sub(x) for 2nd 8 bytes
+            "movq %%mm0, (%%edi,%%ebx,)  \n\t" // Write Raw(x) for 1st 8 bytes
+                                   // Now mm0 will be used as Raw(x-bpp) for
+                                   // the 2nd group of 8 bytes.  This will be
+                                   // repeated for each group of 8 bytes with
+                                   // the 8th group being used as the Raw(x-bpp)
+                                   // for the 1st group of the next loop.
+            "paddb %%mm0, %%mm1          \n\t"
+            "movq 16(%%edi,%%ebx,), %%mm2 \n\t" // Load Sub(x) for 3rd 8 bytes
+            "movq %%mm1, 8(%%edi,%%ebx,) \n\t" // Write Raw(x) for 2nd 8 bytes
+            "paddb %%mm1, %%mm2          \n\t"
+            "movq 24(%%edi,%%ebx,), %%mm3 \n\t" // Load Sub(x) for 4th 8 bytes
+            "movq %%mm2, 16(%%edi,%%ebx,) \n\t" // Write Raw(x) for 3rd 8 bytes
+            "paddb %%mm2, %%mm3          \n\t"
+            "movq 32(%%edi,%%ebx,), %%mm4 \n\t" // Load Sub(x) for 5th 8 bytes
+            "movq %%mm3, 24(%%edi,%%ebx,) \n\t" // Write Raw(x) for 4th 8 bytes
+            "paddb %%mm3, %%mm4          \n\t"
+            "movq 40(%%edi,%%ebx,), %%mm5 \n\t" // Load Sub(x) for 6th 8 bytes
+            "movq %%mm4, 32(%%edi,%%ebx,) \n\t" // Write Raw(x) for 5th 8 bytes
+            "paddb %%mm4, %%mm5          \n\t"
+            "movq 48(%%edi,%%ebx,), %%mm6 \n\t" // Load Sub(x) for 7th 8 bytes
+            "movq %%mm5, 40(%%edi,%%ebx,) \n\t" // Write Raw(x) for 6th 8 bytes
+            "paddb %%mm5, %%mm6          \n\t"
+            "movq 56(%%edi,%%ebx,), %%mm7 \n\t" // Load Sub(x) for 8th 8 bytes
+            "movq %%mm6, 48(%%edi,%%ebx,) \n\t" // Write Raw(x) for 7th 8 bytes
+            "addl $64, %%ebx             \n\t"
+            "paddb %%mm6, %%mm7          \n\t"
+            "cmpl %%ecx, %%ebx           \n\t"
+            "movq %%mm7, -8(%%edi,%%ebx,) \n\t" // Write Raw(x) for 8th 8 bytes
+            "jb sub_8lp                  \n\t"
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "jnb sub_8lt8                \n\t"
+         "sub_8lpA:                      \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "addl $8, %%ebx              \n\t"
+            "paddb %%mm7, %%mm0          \n\t"
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // use -8 to offset early add to ebx
+            "movq %%mm0, %%mm7           \n\t" // Move calculated Raw(x) data to mm1 to
+                                    // be the new Raw(x-bpp) for the next loop
+            "jb sub_8lpA                 \n\t"
+         "sub_8lt8:                      \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%ecx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+         );
+      }
+      break;
+
+      default:                // bpp greater than 8 bytes
+      {
+         __asm__ (
+            "movl _dif, %%ebx            \n\t"
+            "movl row, %%edi             \n\t"
+            "movl %%edi, %%esi           \n\t" // lp = row
+            "addl bpp, %%edi             \n\t" // rp = row + bpp
+         "sub_Alp:                       \n\t"
+            "movq (%%edi,%%ebx,), %%mm0  \n\t"
+            "movq (%%esi,%%ebx,), %%mm1  \n\t"
+            "addl $8, %%ebx              \n\t"
+            "paddb %%mm1, %%mm0          \n\t"
+            "cmpl _MMXLength, %%ebx      \n\t"
+            "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // mov does not affect flags; -8 to offset
+                                   //  add ebx
+            "jb sub_Alp                  \n\t"
+
+            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+            : "%ebx", "%edi", "%esi", "%mm0", "%mm1" // CHECKASM: clobber list
+         );
+      }
+      break;
+
+   } // end switch ( bpp )
+
+   __asm__ (
+      "movl _MMXLength, %%ebx      \n\t"
+      "movl row, %%edi             \n\t"
+      "cmpl _FullLength, %%ebx     \n\t"
+      "jnb sub_end                 \n\t"
+      "movl %%edi, %%esi           \n\t" // lp = row
+      "xorl %%eax, %%eax           \n\t"
+      "addl bpp, %%edi             \n\t" // rp = row + bpp
+   "sub_lp2:                       \n\t"
+      "movb (%%esi,%%ebx,), %%al   \n\t"
+      "addb %%al, (%%edi,%%ebx,)   \n\t"
+      "incl %%ebx                  \n\t"
+      "cmpl _FullLength, %%ebx     \n\t"
+      "jb sub_lp2                  \n\t"
+   "sub_end:                       \n\t"
+      "emms                        \n\t" // end MMX instructions
+
+      : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+      : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+      : "%eax", "%ebx", "%edi", "%esi" // CHECKASM: clobber list
+   );
+#endif /* GRR_GCC_MMX_CONVERTED */
+}
+
+// Optimized code for PNG Up filter decoder
+void
+png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
+                           png_bytep prev_row)
+{
+#ifdef GRR_GCC_MMX_CONVERTED
+   png_uint_32 len;
+
+   len = row_info->rowbytes;       // # of bytes to filter
+   __asm__ (
+      "movl row, %%edi             \n\t"
+      // get # of bytes to alignment
+      "movl %%edi, %%ecx           \n\t"
+      "xorl %%ebx, %%ebx           \n\t"
+      "addl $0x7, %%ecx            \n\t"
+      "xorl %%eax, %%eax           \n\t"
+      "andl $0xfffffff8, %%ecx     \n\t"
+      "movl prev_row, %%esi        \n\t"
+      "subl %%edi, %%ecx           \n\t"
+      "jz up_go                    \n\t"
+      // fix alignment
+   "up_lp1:                        \n\t"
+      "movb (%%edi,%%ebx,), %%al   \n\t"
+      "addb (%%esi,%%ebx,), %%al   \n\t"
+      "incl %%ebx                  \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to offset inc ebx
+      "jb up_lp1                   \n\t"
+   "up_go:                         \n\t"
+      "movl len, %%ecx             \n\t"
+      "movl %%ecx, %%edx           \n\t"
+      "subl %%ebx, %%edx           \n\t" // subtract alignment fix
+      "andl $0x0000003f, %%edx     \n\t" // calc bytes over mult of 64
+      "subl %%edx, %%ecx           \n\t" // drop over bytes from length
+      // Unrolled loop - use all MMX registers and interleave to reduce
+      // number of branch instructions (loops) and reduce partial stalls
+   "up_loop:                       \n\t"
+      "movq (%%esi,%%ebx,), %%mm1  \n\t"
+      "movq (%%edi,%%ebx,), %%mm0  \n\t"
+      "movq 8(%%esi,%%ebx,), %%mm3 \n\t"
+      "paddb %%mm1, %%mm0          \n\t"
+      "movq 8(%%edi,%%ebx,), %%mm2 \n\t"
+      "movq %%mm0, (%%edi,%%ebx,)  \n\t"
+      "paddb %%mm3, %%mm2          \n\t"
+      "movq 16(%%esi,%%ebx,), %%mm5 \n\t"
+      "movq %%mm2, 8(%%edi,%%ebx,) \n\t"
+      "movq 16(%%edi,%%ebx,), %%mm4 \n\t"
+      "movq 24(%%esi,%%ebx,), %%mm7 \n\t"
+      "paddb %%mm5, %%mm4          \n\t"
+      "movq 24(%%edi,%%ebx,), %%mm6 \n\t"
+      "movq %%mm4, 16(%%edi,%%ebx,) \n\t"
+      "paddb %%mm7, %%mm6          \n\t"
+      "movq 32(%%esi,%%ebx,), %%mm1 \n\t"
+      "movq %%mm6, 24(%%edi,%%ebx,) \n\t"
+      "movq 32(%%edi,%%ebx,), %%mm0 \n\t"
+      "movq 40(%%esi,%%ebx,), %%mm3 \n\t"
+      "paddb %%mm1, %%mm0          \n\t"
+      "movq 40(%%edi,%%ebx,), %%mm2 \n\t"
+      "movq %%mm0, 32(%%edi,%%ebx,) \n\t"
+      "paddb %%mm3, %%mm2          \n\t"
+      "movq 48(%%esi,%%ebx,), %%mm5 \n\t"
+      "movq %%mm2, 40(%%edi,%%ebx,) \n\t"
+      "movq 48(%%edi,%%ebx,), %%mm4 \n\t"
+      "movq 56(%%esi,%%ebx,), %%mm7 \n\t"
+      "paddb %%mm5, %%mm4          \n\t"
+      "movq 56(%%edi,%%ebx,), %%mm6 \n\t"
+      "movq %%mm4, 48(%%edi,%%ebx,) \n\t"
+      "addl $64, %%ebx             \n\t"
+      "paddb %%mm7, %%mm6          \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movq %%mm6, -8(%%edi,%%ebx,) \n\t" // (+56)movq does not affect flags;
+                                     // -8 to offset add ebx
+      "jb up_loop                  \n\t"
+
+      "cmpl $0, %%edx              \n\t" // Test for bytes over mult of 64
+      "jz up_end                   \n\t"
+
+
+      // 2 lines added by lcreeve@netins.net
+      // (mail 11 Jul 98 in png-implement list)
+      "cmpl $8, %%edx              \n\t" //test for less than 8 bytes
+      "jb up_lt8                   \n\t"
+
+
+      "addl %%edx, %%ecx           \n\t"
+      "andl $0x00000007, %%edx     \n\t" // calc bytes over mult of 8
+      "subl %%edx, %%ecx           \n\t" // drop over bytes from length
+      "jz up_lt8                   \n\t"
+      // Loop using MMX registers mm0 & mm1 to update 8 bytes simultaneously
+   "up_lpA:                        \n\t"
+      "movq (%%esi,%%ebx,), %%mm1  \n\t"
+      "movq (%%edi,%%ebx,), %%mm0  \n\t"
+      "addl $8, %%ebx              \n\t"
+      "paddb %%mm1, %%mm0          \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // movq does not affect flags; -8 to offset add ebx
+      "jb up_lpA                   \n\t"
+      "cmpl $0, %%edx              \n\t" // Test for bytes over mult of 8
+      "jz up_end                   \n\t"
+   "up_lt8:                        \n\t"
+      "xorl %%eax, %%eax           \n\t"
+      "addl %%edx, %%ecx           \n\t" // move over byte count into counter
+      // Loop using x86 registers to update remaining bytes
+   "up_lp2:                        \n\t"
+      "movb (%%edi,%%ebx,), %%al   \n\t"
+      "addb (%%esi,%%ebx,), %%al   \n\t"
+      "incl %%ebx                  \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to offset inc ebx
+      "jb up_lp2                   \n\t"
+   "up_end:                        \n\t"
+      // Conversion of filtered row completed
+      "emms                        \n\t" // End MMX instructions; prep for possible FP instrs.
+
+      : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
+
+      : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
+
+      : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list
+   );
+#endif /* GRR_GCC_MMX_CONVERTED */
+}
+
+
+#if defined(PNG_HAVE_ASSEMBLER_READ_FILTER_ROW)
+
+// Optimized png_read_filter_row routines
+
+void
+png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
+   row, png_bytep prev_row, int filter)
+{
+#ifdef PNG_DEBUG
+   char filnm[6];
+#endif
+   #define UseMMX 1
+
+   if (mmx_supported == 2)
+       mmx_supported = mmxsupport();
+
+#ifdef GRR_GCC_MMX_CONVERTED
+   if (!mmx_supported)
+#endif
+   {
+       png_read_filter_row_c(png_ptr, row_info, row, prev_row, filter);
+       return ;
+   }
+
+#ifdef PNG_DEBUG
+   png_debug(1, "in png_read_filter_row\n");
+#if (UseMMX == 1)
+   png_debug1(0,"%s, ", "MMX");
+#else
+   png_debug1(0,"%s, ", "x86");
+#endif
+   switch (filter)
+   {
+      case 0: sprintf(filnm, "None ");
+         break;
+      case 1: sprintf(filnm, "Sub  ");
+         break;
+      case 2: sprintf(filnm, "Up   ");
+         break;
+      case 3: sprintf(filnm, "Avg  ");
+         break;
+      case 4: sprintf(filnm, "Paeth");
+         break;
+      default: sprintf(filnm, "Unknw");
+         break;
+   }
+   png_debug2(0,"row=%5d, %s, ", png_ptr->row_number, filnm);
+   png_debug2(0, "pd=%2d, b=%d, ", (int)row_info->pixel_depth,
+      (int)((row_info->pixel_depth + 7) >> 3));
+   png_debug1(0,"len=%8d, ", row_info->rowbytes);
+#endif
+
+   switch (filter)
+   {
+      case PNG_FILTER_VALUE_NONE:
+         break;
+
+      case PNG_FILTER_VALUE_SUB:
+#if (UseMMX == 1)
+         if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))
+         {
+            png_read_filter_row_mmx_sub(row_info, row);
+         }
+         else
+#endif
+         {
+            png_uint_32 i;
+            png_uint_32 istop = row_info->rowbytes;
+            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+            png_bytep rp = row + bpp;
+            png_bytep lp = row;
+
+            for (i = bpp; i < istop; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);
+               rp++;
+            }
+         }  //end !UseMMX
+         break;
+
+      case PNG_FILTER_VALUE_UP:
+#if (UseMMX == 1)
+         if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))
+         {
+            png_read_filter_row_mmx_up(row_info, row, prev_row);
+         }
+         else
+#endif
+         {
+            png_bytep rp;
+            png_bytep pp;
+            png_uint_32 i;
+            for (i = 0, rp = row, pp = prev_row;
+               i < row_info->rowbytes; i++, rp++, pp++)
+            {
+                  *rp = (png_byte)(((int)(*rp) + (int)(*pp)) & 0xff);
+            }
+         }  //end !UseMMX
+         break;
+
+      case PNG_FILTER_VALUE_AVG:
+#if (UseMMX == 1)
+         if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))
+         {
+            png_read_filter_row_mmx_avg(row_info, row, prev_row);
+         }
+         else
+#endif
+         {
+            png_uint_32 i;
+            png_bytep rp = row;
+            png_bytep pp = prev_row;
+            png_bytep lp = row;
+            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+            png_uint_32 istop = row_info->rowbytes - bpp;
+
+            for (i = 0; i < bpp; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) +
+                  ((int)(*pp++) >> 1)) & 0xff);
+               rp++;
+            }
+
+            for (i = 0; i < istop; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) +
+                  ((int)(*pp++ + *lp++) >> 1)) & 0xff);
+               rp++;
+            }
+         }  //end !UseMMX
+         break;
+
+      case PNG_FILTER_VALUE_PAETH:
+#if (UseMMX == 1)
+         if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128))
+         {
+            png_read_filter_row_mmx_paeth(row_info, row, prev_row);
+         }
+         else
+#endif
+         {
+            png_uint_32 i;
+            png_bytep rp = row;
+            png_bytep pp = prev_row;
+            png_bytep lp = row;
+            png_bytep cp = prev_row;
+            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+            png_uint_32 istop=row_info->rowbytes - bpp;
+
+            for (i = 0; i < bpp; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+               rp++;
+            }
+
+            for (i = 0; i < istop; i++)   // use leftover rp,pp
+            {
+               int a, b, c, pa, pb, pc, p;
+
+               a = *lp++;
+               b = *pp++;
+               c = *cp++;
+
+               p = b - c;
+               pc = a - c;
+
+#ifdef PNG_USE_ABS
+               pa = abs(p);
+               pb = abs(pc);
+               pc = abs(p + pc);
+#else
+               pa = p < 0 ? -p : p;
+               pb = pc < 0 ? -pc : pc;
+               pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+               /*
+                  if (pa <= pb && pa <= pc)
+                     p = a;
+                  else if (pb <= pc)
+                     p = b;
+                  else
+                     p = c;
+                */
+
+               p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+
+               *rp = (png_byte)(((int)(*rp) + p) & 0xff);
+               rp++;
+            }
+         }  //end !UseMMX
+         break;
+
+      default:
+         png_error(png_ptr, "Bad adaptive filter type");
+         break;
+   }
+}
+
+#endif /* PNG_HAVE_ASSEMBLER_READ_FILTER_ROW */
+
+
+// GRR NOTES:  (1) the following code assumes 386 or better (pushfl/popfl)
+//             (2) all instructions compile with gcc 2.7.2.3 and later
+//             (3) the function is moved down here to prevent gcc from
+//                  inlining it in multiple places and then barfing be-
+//                  cause the ".NOT_SUPPORTED" label is multiply defined
+//             [is there a way to signal that a *single* function should
+//              not be inlined?  is there a way to modify the label for
+//              each inlined instance, e.g., by appending _1, _2, etc.?
+//              maybe if don't use leading "." in label name? (not tested)]
+
+#ifdef ORIG_THAT_USED_TO_CLOBBER_EBX
+
+int mmxsupport(void)
+{
+    int mmx_supported_local = 0;
+
+    __asm__ (
+//      ".byte  0x66          \n\t"  // convert 16-bit pushf to 32-bit pushfd
+//      "pushf                \n\t"  // save Eflag to stack
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack into eax
+        "movl %%eax, %%ecx    \n\t"  // make another copy of Eflag in ecx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushl %%eax          \n\t"  // save modified Eflag back to stack
+//      ".byte  0x66          \n\t"  // convert 16-bit popf to 32-bit popfd
+//      "popf                 \n\t"  // restore modified value to Eflag reg
+        "popfl                \n\t"  // restore modified value to Eflag reg
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack
+        "xorl %%ecx, %%eax    \n\t"  // compare new Eflag with original Eflag
+        "jz .NOT_SUPPORTED    \n\t"  // if same, CPUID instr. is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero
+//      ".byte  0x0f, 0xa2    \n\t"  // CPUID instruction (two-byte opcode)
+        "cpuid                \n\t"  // get the CPU identification info
+        "cmpl $1, %%eax       \n\t"  // make sure eax return non-zero value
+        "jl .NOT_SUPPORTED    \n\t"  // if eax is zero, MMX is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero and...
+        "incl %%eax           \n\t"  // ...increment eax to 1.  This pair is
+                                     // faster than the instruction "mov eax, 1"
+        "cpuid                \n\t"  // get the CPU identification info again
+        "andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)
+        "cmpl $0, %%edx       \n\t"  // 0 = MMX not supported
+        "jz .NOT_SUPPORTED    \n\t"  // non-zero = yes, MMX IS supported
+
+        "movl $1, %0          \n\t"  // set return value to 1 and fall through
+
+    ".NOT_SUPPORTED:          \n\t"  // target label for jump instructions
+        "movl %0, %%eax       \n\t"  // move return value to eax
+                                     // DONE
+
+        : "=m" (mmx_supported_local) // %0 (output list:  memory only)
+
+        :                            // any variables used on input (none)
+
+        : "%eax", "%ebx",            // clobber list
+          "%ecx", "%edx"
+//      , "memory"   // if write to a variable gcc thought was in a reg
+//      , "cc"       // "condition codes" (flag bits)
+    );
+
+    //mmx_supported_local=0; // test code for force don't support MMX
+    //printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);
+
+    return mmx_supported_local;
+}
+
+#else /* !ORIG_THAT_USED_TO_CLOBBER_EBX */
+
+int mmxsupport(void)
+{
+    __asm__ (
+        "pushl %%ebx          \n\t"  // ebx gets clobbered by CPUID instruction
+        "pushl %%ecx          \n\t"  // so does ecx...
+        "pushl %%edx          \n\t"  // ...and edx (but ecx & edx safe on Linux)
+//      ".byte  0x66          \n\t"  // convert 16-bit pushf to 32-bit pushfd
+//      "pushf                \n\t"  // save Eflag to stack
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack into eax
+        "movl %%eax, %%ecx    \n\t"  // make another copy of Eflag in ecx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushl %%eax          \n\t"  // save modified Eflag back to stack
+//      ".byte  0x66          \n\t"  // convert 16-bit popf to 32-bit popfd
+//      "popf                 \n\t"  // restore modified value to Eflag reg
+        "popfl                \n\t"  // restore modified value to Eflag reg
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack
+        "xorl %%ecx, %%eax    \n\t"  // compare new Eflag with original Eflag
+        "jz .NOT_SUPPORTED    \n\t"  // if same, CPUID instr. is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero
+//      ".byte  0x0f, 0xa2    \n\t"  // CPUID instruction (two-byte opcode)
+        "cpuid                \n\t"  // get the CPU identification info
+        "cmpl $1, %%eax       \n\t"  // make sure eax return non-zero value
+        "jl .NOT_SUPPORTED    \n\t"  // if eax is zero, MMX is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero and...
+        "incl %%eax           \n\t"  // ...increment eax to 1.  This pair is
+                                     // faster than the instruction "mov eax, 1"
+        "cpuid                \n\t"  // get the CPU identification info again
+        "andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)
+        "cmpl $0, %%edx       \n\t"  // 0 = MMX not supported
+        "jz .NOT_SUPPORTED    \n\t"  // non-zero = yes, MMX IS supported
+
+        "movl $1, %%eax       \n\t"  // set return value to 1
+        "popl %%edx           \n\t"  // restore edx
+        "popl %%ecx           \n\t"  // restore ecx
+        "popl %%ebx           \n\t"  // restore ebx ("row" in png_do_interlace)
+        "ret                  \n\t"  // DONE:  have MMX support
+
+    ".NOT_SUPPORTED:          \n\t"  // target label for jump instructions
+        "movl $0, %%eax       \n\t"  // set return value to 0
+        "popl %%edx           \n\t"  // restore edx
+        "popl %%ecx           \n\t"  // restore ecx
+        "popl %%ebx           \n\t"  // restore ebx ("row" in png_do_interlace)
+//      "ret                  \n\t"  // DONE:  no MMX support
+                                     // (fall through to standard C "ret")
+
+        : // "=m" (mmx_supported_local) // %0 (output list:  memory only)
+
+        :                            // any variables used on input (none)
+
+        : "%eax"                     // clobber list
+//      , "%ebx", "%ecx", "%edx"     // GRR:  we handle these manually
+//      , "memory"   // if write to a variable gcc thought was in a reg
+//      , "cc"       // "condition codes" (flag bits)
+    );
+
+    //mmx_supported_local=0; // test code for force don't support MMX
+    //printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);
+
+    //return mmx_supported_local;
+}
+
+#endif /* ?ORIG_THAT_USED_TO_CLOBBER_EBX */
+
+#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGGCCRD */
+
+
diff --git a/pngget.c b/pngget.c
index dd92fb0..04504b1 100644
--- a/pngget.c
+++ b/pngget.c
@@ -1,7 +1,7 @@
 
 /* pngget.c - retrieval of values from info struct
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -150,6 +150,7 @@
    return (0);
 }
 
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 float
 png_get_pixel_aspect_ratio(png_structp png_ptr, png_infop info_ptr)
    {
@@ -167,6 +168,7 @@
 #endif
       return ((float)0.0);
 }
+#endif
 
 png_uint_32
 png_get_x_offset_microns(png_structp png_ptr, png_infop info_ptr)
@@ -232,7 +234,7 @@
    return (0);
 }
 
-#ifdef PNG_INCH_CONVERSIONS
+#if defined(PNG_INCH_CONVERSIONS) && defined(PNG_FLOATING_POINT_SUPPORTED)
 png_uint_32
 png_get_pixels_per_inch(png_structp png_ptr, png_infop info_ptr)
 {
@@ -302,7 +304,7 @@
    return (retval);
 }
 #endif /* PNG_READ_pHYs_SUPPORTED */
-#endif  /* PNG_INCH_CONVERSIONS */
+#endif  /* PNG_INCH_CONVERSIONS $$ PNG_FLOATING_POINT_SUPPORTED */
 
 /* png_get_channels really belongs in here, too, but it's been around longer */
 
@@ -343,6 +345,7 @@
 #endif
 
 #if defined(PNG_READ_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 png_uint_32
 png_get_cHRM(png_structp png_ptr, png_infop info_ptr,
    double *white_x, double *white_y, double *red_x, double *red_y,
@@ -372,8 +375,41 @@
    return (0);
 }
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32
+png_get_cHRM_fixed(png_structp png_ptr, png_infop info_ptr,
+   png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x,
+   png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y,
+   png_fixed_point *blue_x, png_fixed_point *blue_y)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
+   {
+      png_debug1(1, "in %s retrieval function\n", "cHRM");
+      if (white_x != NULL)
+         *white_x = info_ptr->int_x_white;
+      if (white_y != NULL)
+         *white_y = info_ptr->int_y_white;
+      if (red_x != NULL)
+         *red_x = info_ptr->int_x_red;
+      if (red_y != NULL)
+         *red_y = info_ptr->int_y_red;
+      if (green_x != NULL)
+         *green_x = info_ptr->int_x_green;
+      if (green_y != NULL)
+         *green_y = info_ptr->int_y_green;
+      if (blue_x != NULL)
+         *blue_x = info_ptr->int_x_blue;
+      if (blue_y != NULL)
+         *blue_y = info_ptr->int_y_blue;
+      return (PNG_INFO_cHRM);
+   }
+   return (0);
+}
+#endif
+#endif
 
 #if defined(PNG_READ_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 png_uint_32
 png_get_gAMA(png_structp png_ptr, png_infop info_ptr, double *file_gamma)
 {
@@ -387,6 +423,22 @@
    return (0);
 }
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32
+png_get_gAMA_fixed(png_structp png_ptr, png_infop info_ptr,
+    png_fixed_point *int_file_gamma)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
+      && int_file_gamma != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "gAMA");
+      *int_file_gamma = info_ptr->int_gamma;
+      return (PNG_INFO_gAMA);
+   }
+   return (0);
+}
+#endif
+#endif
 
 #if defined(PNG_READ_sRGB_SUPPORTED)
 png_uint_32
@@ -425,9 +477,9 @@
 }
 #endif
 
-#if defined(PNG_READ_sPLT_SUPPORTED) || defined(PNG_READ_zTXt_SUPPORTED)
+#if defined(PNG_READ_sPLT_SUPPORTED)
 png_uint_32
-png_get_spalettes(png_structp png_ptr, png_infop info_ptr, 
+png_get_spalettes(png_structp png_ptr, png_infop info_ptr,
              png_spalette_pp spalettes)
 {
    if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL)
@@ -540,9 +592,10 @@
 #endif
 
 #if defined(PNG_READ_sCAL_SUPPORTED) || defined(PNG_WRITE_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 png_uint_32
 png_get_sCAL(png_structp png_ptr, png_infop info_ptr,
-             png_charpp unit, double *width, double *height)
+             int *unit, double *width, double *height)
 {
     if (png_ptr != NULL && info_ptr != NULL && info_ptr->valid & PNG_INFO_sCAL)
     {
@@ -553,6 +606,23 @@
     }
     return(0);
 }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32
+png_get_sCAL_s(png_structp png_ptr, png_infop info_ptr,
+             int *unit, png_charpp width, png_charpp height)
+{
+    if (png_ptr != NULL && info_ptr != NULL && info_ptr->valid & PNG_INFO_sCAL)
+    {
+        *unit = info_ptr->scal_unit;
+        *width = info_ptr->scal_s_width;
+        *height = info_ptr->scal_s_height;
+        return (PNG_INFO_sCAL);
+    }
+    return(0);
+}
+#endif
+#endif
 #endif
 
 #if defined(PNG_READ_pHYs_SUPPORTED)
@@ -690,6 +760,17 @@
 }
 #endif
 
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+png_uint_32
+png_get_unknown_chunks(png_structp png_ptr, png_infop info_ptr,
+             png_unknown_chunkpp unknowns)
+{
+   if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL)
+     *unknowns = info_ptr->unknown_chunks;
+   return ((png_uint_32)info_ptr->unknown_chunks_num);
+}
+#endif
+
 #if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
 png_byte
 png_get_rgb_to_gray_status (png_structp png_ptr)
diff --git a/pngmem.c b/pngmem.c
index 18f7d98..12bfaea 100644
--- a/pngmem.c
+++ b/pngmem.c
@@ -1,7 +1,7 @@
 
 /* pngmem.c - stub functions for memory allocation
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
diff --git a/pngpread.c b/pngpread.c
index 691936c..cd1c82c 100644
--- a/pngpread.c
+++ b/pngpread.c
@@ -1,7 +1,7 @@
 
 /* pngpread.c - read a png file in push mode
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -151,6 +151,12 @@
 #if defined(PNG_READ_hIST_SUPPORTED)
       PNG_hIST;
 #endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      PNG_iCCP;
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      PNG_iTXt;
+#endif
 #if defined(PNG_READ_oFFs_SUPPORTED)
       PNG_oFFs;
 #endif
@@ -163,9 +169,15 @@
 #if defined(PNG_READ_sBIT_SUPPORTED)
       PNG_sBIT;
 #endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      PNG_sCAL;
+#endif
 #if defined(PNG_READ_sRGB_SUPPORTED)
       PNG_sRGB;
 #endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      PNG_sPLT;
+#endif
 #if defined(PNG_READ_tEXt_SUPPORTED)
       PNG_tEXt;
 #endif
@@ -305,6 +317,30 @@
       png_handle_sRGB(png_ptr, info_ptr, png_ptr->push_length);
    }
 #endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_iCCP, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_handle_iCCP(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sPLT, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_handle_sPLT(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
 #if defined(PNG_READ_tRNS_SUPPORTED)
    else if (!png_memcmp(png_ptr->chunk_name, png_tRNS, 4))
    {
@@ -377,6 +413,18 @@
       png_handle_pCAL(png_ptr, info_ptr, png_ptr->push_length);
    }
 #endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sCAL, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_handle_sCAL(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
 #if defined(PNG_READ_tIME_SUPPORTED)
    else if (!png_memcmp(png_ptr->chunk_name, png_tIME, 4))
    {
@@ -402,7 +450,7 @@
    }
 #endif
 #if defined(PNG_READ_iTXt_SUPPORTED)
-   else if (!png_memcmp(png_ptr->chunk_name, png_zTXt, 4))
+   else if (!png_memcmp(png_ptr->chunk_name, png_iTXt, 4))
    {
       png_push_handle_iTXt(png_ptr, info_ptr, png_ptr->push_length);
    }
@@ -841,30 +889,30 @@
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   
+
    /* start of interlace block */
    const int png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
-   
+
    /* offset to next interlace block */
    const int png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
-   
+
    /* start of interlace block in the y direction */
    const int png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
-   
+
    /* offset to next interlace block in the y direction */
    const int png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
-   
+
    /* Width of interlace block.  This is not currently used - if you need
     * it, uncomment it here and in png.h
    const int png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
    */
-   
+
    /* Height of interlace block.  This is not currently used - if you need
     * it, uncomment it here and in png.h
    const int png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
    */
 #endif
-   
+
    png_ptr->row_number++;
    if (png_ptr->row_number < png_ptr->num_rows)
       return;
@@ -978,6 +1026,7 @@
       text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
       text_ptr->lang = (char *)NULL;
       text_ptr->key = key;
+      text_ptr->lang_key = (char *)NULL;
       text_ptr->text = text;
 
       png_set_text(png_ptr, info_ptr, text_ptr, 1);
@@ -1156,8 +1205,9 @@
 
       text_ptr = (png_textp)png_malloc(png_ptr, (png_uint_32)sizeof(png_text));
       text_ptr->compression = PNG_TEXT_COMPRESSION_zTXt;
-      text_ptr->lang = (char *)NULL;
       text_ptr->key = key;
+      text_ptr->lang = (char *)NULL;
+      text_ptr->lang_key = (char *)NULL;
       text_ptr->text = text;
 
       png_set_text(png_ptr, info_ptr, text_ptr, 1);
@@ -1201,6 +1251,7 @@
 void
 png_push_read_iTXt(png_structp png_ptr, png_infop info_ptr)
 {
+
    if (png_ptr->buffer_size && png_ptr->current_text_left)
    {
       png_size_t text_size;
@@ -1216,9 +1267,11 @@
    if (!(png_ptr->current_text_left))
    {
       png_textp text_ptr;
-      png_charp text;
-      png_charp lang;
       png_charp key;
+      int comp_flag = 0;
+      png_charp lang;
+      png_charp lang_key;
+      png_charp text;
 
       if (png_ptr->buffer_size < 4)
       {
@@ -1233,26 +1286,36 @@
          return;
 #endif
 
-      lang = png_ptr->current_text;
+      key = png_ptr->current_text;
       png_ptr->current_text = 0;
 
-      for (key = lang; *key; key++)
+      for (lang = key; *lang; lang++)
          /* empty loop */ ;
 
-      if (key != lang + png_ptr->current_text_size)
-         key++;
+      if (lang != key + png_ptr->current_text_size)
+         lang++;
 
-      for (text = key; *text; text++)
+      comp_flag = *lang++;
+      lang++;     /* skip comp_type, always zero */
+
+      for (lang_key = lang; *lang_key; lang_key++)
+         /* empty loop */ ;
+      lang_key++;        /* skip NUL separator */
+
+      for (text = lang_key; *text; text++)
          /* empty loop */ ;
 
       if (text != key + png_ptr->current_text_size)
          text++;
 
       text_ptr = (png_textp)png_malloc(png_ptr, (png_uint_32)sizeof(png_text));
-      text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
-      text_ptr->lang = lang;
+      text_ptr->compression = comp_flag + 2;
       text_ptr->key = key;
+      text_ptr->lang = lang;
+      text_ptr->lang_key = lang_key;
       text_ptr->text = text;
+      text_ptr->text_length = 0;
+      text_ptr->itxt_length = png_strlen(text);
 
       png_set_text(png_ptr, info_ptr, text_ptr, 1);
 
@@ -1280,6 +1343,30 @@
       if(info_ptr == NULL) return;
    }
 
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS)
+   {
+       png_unknown_chunk chunk;
+
+#ifdef PNG_MAX_MALLOC_64K
+       if (length > (png_uint_32)65535L)
+       {
+           png_warning(png_ptr, "unknown chunk too large to fit in memory");
+           skip = length - (png_uint_32)65535L;
+           length = (png_uint_32)65535L;
+       }
+#endif
+
+       strcpy((png_charp)chunk.name, (png_charp)png_ptr->chunk_name);
+       chunk.data = (png_bytep)png_malloc(png_ptr, length);
+       png_crc_read(png_ptr, chunk.data, length);
+       chunk.size = length;
+       png_set_unknown_chunks(png_ptr, info_ptr, &chunk, 1);
+       png_free(png_ptr, chunk.data);
+   }
+   else
+#endif
+
    png_push_crc_skip(png_ptr, length);
 }
 
diff --git a/pngread.c b/pngread.c
index ddad581..c128ecc 100644
--- a/pngread.c
+++ b/pngread.c
@@ -1,7 +1,7 @@
 
 /* pngread.c - read a PNG file
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -214,7 +214,7 @@
 #if defined(PNG_READ_sBIT_SUPPORTED)
       PNG_sBIT;
 #endif
-#if defined(PNG_READ_pCAL_SUPPORTED)
+#if defined(PNG_READ_sCAL_SUPPORTED)
       PNG_sCAL;
 #endif
 #if defined(PNG_READ_sPLT_SUPPORTED)
@@ -607,7 +607,7 @@
  * not called png_set_interlace_handling(), the display_row buffer will
  * be ignored, so pass NULL to it.
  *
- * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.5f.
+ * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.5j.
  */
 
 void
@@ -656,7 +656,7 @@
  * only call this function once.  If you desire to have an image for
  * each pass of a interlaced image, use png_read_rows() instead.
  *
- * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.5f.
+ * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.5j.
  */
 void
 png_read_image(png_structp png_ptr, png_bytepp image)
@@ -743,7 +743,7 @@
 #if defined(PNG_READ_sBIT_SUPPORTED)
       PNG_sBIT;
 #endif
-#if defined(PNG_READ_pCAL_SUPPORTED)
+#if defined(PNG_READ_sCAL_SUPPORTED)
       PNG_sCAL;
 #endif
 #if defined(PNG_READ_sPLT_SUPPORTED)
diff --git a/pngrio.c b/pngrio.c
index a8c1657..01306e1 100644
--- a/pngrio.c
+++ b/pngrio.c
@@ -1,7 +1,7 @@
 
 /* pngrio.c - functions for data input
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
diff --git a/pngrtran.c b/pngrtran.c
index 9215bfa..33367c3 100644
--- a/pngrtran.c
+++ b/pngrtran.c
@@ -1,7 +1,7 @@
 
 /* pngrtran.c - transforms the data in a row for PNG readers
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -69,7 +69,8 @@
    }
 }
 
-#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
+    defined(PNG_FLOATING_POINT_SUPPORTED)
 /* handle alpha and tRNS via a background color */
 void
 png_set_background(png_structp png_ptr,
@@ -504,7 +505,7 @@
 }
 #endif
 
-#if defined(PNG_READ_GAMMA_SUPPORTED)
+#if defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
 /* Transform the image from the file_gamma to the screen_gamma.  We
  * only do transformations on images where the file_gamma and screen_gamma
  * are not close reciprocals, otherwise it slows things down slightly, and
@@ -581,7 +582,8 @@
 }
 #endif
 
-#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) && \
+    defined(PNG_FLOATING_POINT_SUPPORTED)
 /* Convert a RGB image to a grayscale of the same width.  This allows us,
  * for example, to convert a 24 bpp RGB image into an 8 bpp grayscale image.
  */
@@ -718,7 +720,7 @@
 #if defined(PNG_READ_BACKGROUND_SUPPORTED)
    png_ptr->background_1 = png_ptr->background;
 #endif
-#if defined(PNG_READ_GAMMA_SUPPORTED)
+#if defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
    if (png_ptr->transformations & (PNG_GAMMA | PNG_RGB_TO_GRAY))
    {
       png_build_gamma_table(png_ptr);
@@ -731,7 +733,6 @@
             png_colorp palette = png_ptr->palette;
             int num_palette = png_ptr->num_palette;
             int i;
-
             if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_FILE)
             {
                back.red = png_ptr->gamma_table[png_ptr->background.red];
@@ -789,7 +790,6 @@
                back_1.blue = (png_byte)(pow(
                   (double)png_ptr->background.blue/255, g) * 255.0 + .5);
             }
-
             for (i = 0; i < num_palette; i++)
             {
                if (i < (int)png_ptr->num_trans && png_ptr->trans[i] != 0xff)
@@ -998,8 +998,15 @@
 
 #if defined(PNG_READ_GAMMA_SUPPORTED)
    if (png_ptr->transformations & PNG_GAMMA)
+   {
+#ifdef PNG_FLOATING_POINT_SUPPORTED
       info_ptr->gamma = png_ptr->gamma;
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      info_ptr->int_gamma = png_ptr->int_gamma;
+#endif
+   }
+#endif
 
 #if defined(PNG_READ_16_TO_8_SUPPORTED)
    if ((png_ptr->transformations & PNG_16_TO_8) && info_ptr->bit_depth == 16)
@@ -1290,7 +1297,7 @@
          png_ptr->row_info.channels = png_ptr->user_transform_channels;
       png_ptr->row_info.pixel_depth = (png_byte)(png_ptr->row_info.bit_depth *
          png_ptr->row_info.channels);
-      png_ptr->row_info.rowbytes = (png_ptr->row_info.width * 
+      png_ptr->row_info.rowbytes = (png_ptr->row_info.width *
          png_ptr->row_info.pixel_depth+7)>>3;
    }
 #endif
@@ -1988,7 +1995,7 @@
 #endif
 
 #if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-/* reduce RGB files to grayscale, with or without alpha 
+/* reduce RGB files to grayscale, with or without alpha
  * using the equation given in Poynton's ColorFAQ at
  * <http://www.inforamp.net/~poynton/>
  * Copyright (c) 1998-01-04 Charles Poynton poynton@inforamp.net
@@ -1996,7 +2003,7 @@
  *     Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
  *
  *  We approximate this with
- * 
+ *
  *     Y = 0.211 * R    + 0.715 * G    + 0.074 * B
  *
  *  which can be expressed with integers as
@@ -2072,7 +2079,7 @@
                }
             }
          }
- 
+
          else /* RGB bit_depth == 16 */
          {
 #if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
@@ -2097,7 +2104,7 @@
                                   png_ptr->gamma_shift][red>>8];
                      png_uint_16 green_1 = png_ptr->gamma_16_to_1[(green&0xff) >>
                                   png_ptr->gamma_shift][green>>8];
-                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >> 
+                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >>
                                   png_ptr->gamma_shift][blue>>8];
                      png_uint_16 gray16  = (png_uint_16)((rc*red_1 + gc*green_1
                                   + bc*blue_1)>>8);
@@ -2105,7 +2112,7 @@
                          png_ptr->gamma_shift][gray16 >> 8];
                      rgb_error |= 1;
                   }
-                  
+
                   *(dp++) = (png_byte)((w>>8) & 0xff);
                   *(dp++) = (png_byte)(w & 0xff);
                }
@@ -2194,7 +2201,7 @@
                                   png_ptr->gamma_shift][red>>8];
                      png_uint_16 green_1 = png_ptr->gamma_16_to_1[(green&0xff) >>
                                   png_ptr->gamma_shift][green>>8];
-                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >> 
+                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >>
                                   png_ptr->gamma_shift][blue>>8];
                      png_uint_16 gray16  = (png_uint_16)((rc * red_1
                                   + gc * green_1 + bc * blue_1)>>8);
@@ -2202,7 +2209,7 @@
                          png_ptr->gamma_shift][gray16 >> 8];
                      rgb_error |= 1;
                   }
-                  
+
                   *(dp++) = (png_byte)((w>>8) & 0xff);
                   *(dp++) = (png_byte)(w & 0xff);
                   *(dp++) = *(sp++);  /* alpha */
@@ -2298,7 +2305,8 @@
    int num_palette)
 {
    png_debug(1, "in png_correct_palette\n");
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
+    defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
    if (png_ptr->transformations & (PNG_GAMMA | PNG_BACKGROUND))
    {
       png_color back, back_1;
@@ -3764,6 +3772,7 @@
 }
 #endif
 
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 #if defined(PNG_READ_GAMMA_SUPPORTED)
 static int png_gamma_shift[] =
    {0x10, 0x21, 0x42, 0x84, 0x110, 0x248, 0x550, 0xff0};
@@ -3814,7 +3823,7 @@
                g) * 255.0 + .5);
          }
 
-         
+
          png_ptr->gamma_from_1 = (png_bytep)png_malloc(png_ptr,
             (png_uint_32)256);
 
@@ -3983,4 +3992,6 @@
  }
 }
 #endif
+/* To do: install integer version of png_build_gamma_table here */
+#endif
 
diff --git a/pngrutil.c b/pngrutil.c
index 4669b4e..94db0af 100644
--- a/pngrutil.c
+++ b/pngrutil.c
@@ -1,7 +1,7 @@
 
 /* pngrutil.c - utilities to read a PNG file
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -145,7 +145,7 @@
  * holding the original prefix part and an uncompressed version of the
  * trailing part (the malloc area passed in is freed).
  */
-png_charp png_decompress_chunk(png_structp png_ptr, int comp_type, 
+png_charp png_decompress_chunk(png_structp png_ptr, int comp_type,
                               png_charp chunkdata, png_size_t chunklength,
                               png_size_t prefix_size)
 {
@@ -196,7 +196,7 @@
          {
             if (text == NULL)
             {
-               text_size = prefix_size + 
+               text_size = prefix_size +
                    png_ptr->zbuf_size - png_ptr->zstream.avail_out;
                text = (png_charp)png_malloc(png_ptr, text_size + 1);
                png_memcpy(text + prefix_size, png_ptr->zbuf,
@@ -236,7 +236,6 @@
    }
    else /* if (comp_type >= PNG_TEXT_COMPRESSION_LAST) */
    {
-      png_size_t text_size;
 #if !defined(PNG_NO_STDIO)
       char umsg[50];
 
@@ -496,8 +495,10 @@
 void
 png_handle_gAMA(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
 {
-   png_uint_32 igamma;
+   png_fixed_point igamma;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float file_gamma;
+#endif
    png_byte buf[4];
 
    png_debug(1, "in png_handle_gAMA\n");
@@ -536,14 +537,14 @@
    if (png_crc_finish(png_ptr, 0))
       return;
 
-   igamma = png_get_uint_32(buf);
+   igamma = (png_fixed_point)png_get_uint_32(buf);
    /* check for zero gamma */
    if (igamma == 0)
       return;
 
 #if defined(PNG_READ_sRGB_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_sRGB)
-      if(fabs((float)igamma - 45455.)>500.)
+      if(igamma < 45000L || igamma > 46000L)
       {
          png_warning(png_ptr,
            "Ignoring incorrect gAMA value when sRGB is also present");
@@ -554,11 +555,16 @@
       }
 #endif /* PNG_READ_sRGB_SUPPORTED */
 
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    file_gamma = (float)igamma / (float)100000.0;
 #ifdef PNG_READ_GAMMA_SUPPORTED
    png_ptr->gamma = file_gamma;
 #endif
    png_set_gAMA(png_ptr, info_ptr, file_gamma);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_gAMA_fixed(png_ptr, info_ptr, igamma);
+#endif
 }
 #endif
 
@@ -633,8 +639,11 @@
 png_handle_cHRM(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
 {
    png_byte buf[4];
-   png_uint_32 val;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y;
+#endif
+   png_fixed_point int_x_white, int_y_white, int_x_red, int_y_red, int_x_green,
+      int_y_green, int_x_blue, int_y_blue;
 
    png_debug(1, "in png_handle_cHRM\n");
 
@@ -669,15 +678,13 @@
    }
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   white_x = (float)val / (float)100000.0;
+   int_x_white = (png_fixed_point)png_get_uint_32(buf);
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   white_y = (float)val / (float)100000.0;
+   int_y_white = (png_fixed_point)png_get_uint_32(buf);
 
-   if (white_x < 0 || white_x > 0.8 || white_y < 0 || white_y > 0.8 ||
-       white_x + white_y > 1.0)
+   if (int_x_white > 80000L || int_y_white > 80000L ||
+      int_x_white + int_y_white > 100000L)
    {
       png_warning(png_ptr, "Invalid cHRM white point");
       png_crc_finish(png_ptr, 24);
@@ -685,15 +692,13 @@
    }
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   red_x = (float)val / (float)100000.0;
+   int_x_red = (png_fixed_point)png_get_uint_32(buf);
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   red_y = (float)val / (float)100000.0;
+   int_y_red = (png_fixed_point)png_get_uint_32(buf);
 
-   if (red_x < 0 || red_x > 0.8 || red_y < 0 || red_y > 0.8 ||
-       red_x + red_y > 1.0)
+   if (int_x_red > 80000L || int_y_red > 80000L ||
+      int_x_red + int_y_red > 100000L)
    {
       png_warning(png_ptr, "Invalid cHRM red point");
       png_crc_finish(png_ptr, 16);
@@ -701,15 +706,13 @@
    }
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   green_x = (float)val / (float)100000.0;
+   int_x_green = (png_fixed_point)png_get_uint_32(buf);
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   green_y = (float)val / (float)100000.0;
+   int_y_green = (png_fixed_point)png_get_uint_32(buf);
 
-   if (green_x < 0 || green_x > 0.8 || green_y < 0 || green_y > 0.8 ||
-       green_x + green_y > 1.0)
+   if (int_x_green > 80000L || int_y_green > 80000L ||
+      int_x_green + int_y_green > 100000L)
    {
       png_warning(png_ptr, "Invalid cHRM green point");
       png_crc_finish(png_ptr, 8);
@@ -717,52 +720,74 @@
    }
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   blue_x = (float)val / (float)100000.0;
+   int_x_blue = (png_fixed_point)png_get_uint_32(buf);
 
    png_crc_read(png_ptr, buf, 4);
-   val = png_get_uint_32(buf);
-   blue_y = (float)val / (float)100000.0;
+   int_y_blue = (png_fixed_point)png_get_uint_32(buf);
 
-   if (blue_x < (float)0 || blue_x > (float)0.8 || blue_y < (float)0 ||
-       blue_y > (float)0.8 || blue_x + blue_y > (float)1.0)
+   if (int_x_blue > 80000L || int_y_blue > 80000L ||
+      int_x_blue + int_y_blue > 100000L)
    {
       png_warning(png_ptr, "Invalid cHRM blue point");
       png_crc_finish(png_ptr, 0);
       return;
    }
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   white_x = (float)int_x_white / (float)100000.0;
+   white_y = (float)int_y_white / (float)100000.0;
+   red_x   = (float)int_x_red   / (float)100000.0;
+   red_y   = (float)int_y_red   / (float)100000.0;
+   green_x = (float)int_x_green / (float)100000.0;
+   green_y = (float)int_y_green / (float)100000.0;
+   blue_x  = (float)int_x_blue  / (float)100000.0;
+   blue_y  = (float)int_y_blue  / (float)100000.0;
+#endif
 
 #if defined(PNG_READ_sRGB_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_sRGB)
       {
-      if (fabs(white_x - (float).3127) > (float).001 ||
-          fabs(white_y - (float).3290) > (float).001 ||
-          fabs(  red_x - (float).6400) > (float).001 ||
-          fabs(  red_y - (float).3300) > (float).001 ||
-          fabs(green_x - (float).3000) > (float).001 ||
-          fabs(green_y - (float).6000) > (float).001 ||
-          fabs( blue_x - (float).1500) > (float).001 ||
-          fabs( blue_y - (float).0600) > (float).001)
+      if (abs(int_x_white - 31270L) > 1000 ||
+          abs(int_y_white - 32900L) > 1000 ||
+          abs(  int_x_red - 64000L) > 1000 ||
+          abs(  int_y_red - 33000L) > 1000 ||
+          abs(int_x_green - 30000L) > 1000 ||
+          abs(int_y_green - 60000L) > 1000 ||
+          abs( int_x_blue - 15000L) > 1000 ||
+          abs( int_y_blue -  6000L) > 1000)
          {
 
             png_warning(png_ptr,
               "Ignoring incorrect cHRM value when sRGB is also present");
 #ifndef PNG_NO_CONSOLE_IO
+#ifdef PNG_FLOATING_POINT_SUPPORTED
             fprintf(stderr,"wx=%f, wy=%f, rx=%f, ry=%f\n",
                white_x, white_y, red_x, red_y);
             fprintf(stderr,"gx=%f, gy=%f, bx=%f, by=%f\n",
                green_x, green_y, blue_x, blue_y);
+#else
+            fprintf(stderr,"wx=%ld, wy=%ld, rx=%ld, ry=%ld\n",
+               int_x_white, int_y_white, int_x_red, int_y_red);
+            fprintf(stderr,"gx=%ld, gy=%ld, bx=%ld, by=%ld\n",
+               int_x_green, int_y_green, int_x_blue, int_y_blue);
 #endif
+#endif /* PNG_NO_CONSOLE_IO */
          }
+         png_crc_finish(png_ptr, 0);
          return;
       }
 #endif /* PNG_READ_sRGB_SUPPORTED */
 
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    png_set_cHRM(png_ptr, info_ptr,
       white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_cHRM_fixed(png_ptr, info_ptr,
+      int_x_white, int_y_white, int_x_red, int_y_red, int_x_green,
+      int_y_green, int_x_blue, int_y_blue);
+#endif
+   if (png_crc_finish(png_ptr, 0))
+      return;
 }
 #endif
 
@@ -771,6 +796,9 @@
 png_handle_sRGB(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
 {
    int intent;
+#if defined(PNG_READ_gAMA_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
+   int igamma;
+#endif
    png_byte buf[1];
 
    png_debug(1, "in png_handle_sRGB\n");
@@ -814,27 +842,38 @@
    }
 
 #if defined(PNG_READ_gAMA_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   igamma=png_ptr->gamma * 100000.;
+#else
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+   igamma=(int)png_ptr->int_gamma;
+#  endif
+#endif
    if ((info_ptr->valid & PNG_INFO_gAMA))
-      if(fabs((png_ptr->gamma*(float)100000.+.5)-45455.) > 500.)
+      if(igamma < 45000L || igamma > 46000L)
       {
          png_warning(png_ptr,
            "Ignoring incorrect gAMA value when sRGB is also present");
 #ifndef PNG_NO_CONSOLE_IO
+#ifdef PNG_FLOATING_POINT_SUPPORTED
            fprintf(stderr,"gamma=%f\n",png_ptr->gamma);
+#else
+           fprintf(stderr,"gamma=(%lu/100000)\n",png_ptr->int_gamma);
+#endif
 #endif
       }
 #endif /* PNG_READ_gAMA_SUPPORTED */
 
 #ifdef PNG_READ_cHRM_SUPPORTED
    if (info_ptr->valid & PNG_INFO_cHRM)
-      if (fabs(info_ptr->x_white - (float).3127) > (float).001 ||
-          fabs(info_ptr->y_white - (float).3290) > (float).001 ||
-          fabs(  info_ptr->x_red - (float).6400) > (float).001 ||
-          fabs(  info_ptr->y_red - (float).3300) > (float).001 ||
-          fabs(info_ptr->x_green - (float).3000) > (float).001 ||
-          fabs(info_ptr->y_green - (float).6000) > (float).001 ||
-          fabs( info_ptr->x_blue - (float).1500) > (float).001 ||
-          fabs( info_ptr->y_blue - (float).0600) > (float).001)
+      if (abs(info_ptr->int_x_white - 31270L) > 1000 ||
+          abs(info_ptr->int_y_white - 32900L) > 1000 ||
+          abs(  info_ptr->int_x_red - 64000L) > 1000 ||
+          abs(  info_ptr->int_y_red - 33000L) > 1000 ||
+          abs(info_ptr->int_x_green - 30000L) > 1000 ||
+          abs(info_ptr->int_y_green - 60000L) > 1000 ||
+          abs( info_ptr->int_x_blue - 15000L) > 1000 ||
+          abs( info_ptr->int_y_blue -  6000L) > 1000)
          {
             png_warning(png_ptr,
               "Ignoring incorrect cHRM value when sRGB is also present");
@@ -851,7 +890,7 @@
 /* Note: this does not properly handle chunks that are > 64K under DOS */
 {
    png_charp chunkdata;
-   png_byte compression_type; 
+   png_byte compression_type;
    png_charp profile;
    png_uint_32 skip = 0;
    png_size_t slength, prefix_length;
@@ -1480,8 +1519,15 @@
 void
 png_handle_sCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
 {
-   png_charp unit, ep, vp;
-   double width, height;
+   png_charp buffer, ep;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   double width=0., height=0.;
+   png_charp vp;
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_charp swidth, sheight;
+#endif
+#endif
    png_size_t slength;
 
    png_debug(1, "in png_handle_sCAL\n");
@@ -1503,46 +1549,75 @@
 
    png_debug1(2, "Allocating and reading sCAL chunk data (%d bytes)\n",
       length + 1);
-   unit = (png_charp)png_malloc(png_ptr, length + 1);
+   buffer = (png_charp)png_malloc(png_ptr, length + 1);
    slength = (png_size_t)length;
-   png_crc_read(png_ptr, (png_bytep)unit, slength);
+   png_crc_read(png_ptr, (png_bytep)buffer, slength);
 
    if (png_crc_finish(png_ptr, 0))
    {
-      png_free(png_ptr, unit);
+      png_free(png_ptr, buffer);
       return;
    }
 
-   unit[slength] = 0x00; /* null terminate the last string */
+   buffer[slength] = 0x00; /* null terminate the last string */
 
-   png_debug(3, "Finding end of sCAL unit string\n");
-   for (ep = unit; *ep; ep++)
-      /* empty loop */ ;
-   ep++;
+   ep = buffer + 1;	/* skip unit byte */
 
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    width = strtod(ep, &vp);
    if (*vp)
        png_error(png_ptr, "malformed width string in sCAL chunk");
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   swidth = (png_charp)png_malloc(png_ptr, strlen(ep) + 1);
+   png_memcpy(swidth, ep, (png_size_t)strlen(ep));
+#endif
+#endif
 
-   for (ep = unit; *ep; ep++)
+   for (ep = buffer; *ep; ep++)
       /* empty loop */ ;
    ep++;
 
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    height = strtod(ep, &vp);
    if (*vp)
        png_error(png_ptr, "malformed height string in sCAL chunk");
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   sheight = (png_charp)png_malloc(png_ptr, strlen(ep) + 1);
+   png_memcpy(sheight, ep, (png_size_t)strlen(ep));
+#endif
+#endif
 
-   if (unit + slength < ep || width <= 0. || height <= 0.)
+   if (buffer + slength < ep
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      || width <= 0. || height <= 0.
+#endif
+      )
    {
       png_warning(png_ptr, "Invalid sCAL data");
-      png_free(png_ptr, unit);
+      png_free(png_ptr, buffer);
+#if defined(PNG_FIXED_POINT_SUPPORTED)&& !defined(PNG_FLOATING_POINT_SUPPORTED)
+      png_free(png_ptr, swidth);
+      png_free(png_ptr, sheight);
+#endif
       return;
    }
 
 
-   png_set_sCAL(png_ptr, info_ptr, unit, width, height);
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   png_set_sCAL(png_ptr, info_ptr, buffer[0], width, height);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_sCAL_s(png_ptr, info_ptr, buffer[0], swidth, sheight);
+#endif
+#endif
 
-   png_free(png_ptr, unit);
+   png_free(png_ptr, buffer);
+#if defined(PNG_FIXED_POINT_SUPPORTED)&& !defined(PNG_FLOATING_POINT_SUPPORTED)
+   png_free(png_ptr, swidth);
+   png_free(png_ptr, sheight);
+#endif
 }
 #endif
 
@@ -1637,9 +1712,12 @@
 
    text_ptr = (png_textp)png_malloc(png_ptr, (png_uint_32)sizeof(png_text));
    text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
-   text_ptr->lang = NULL;
    text_ptr->key = key;
+   text_ptr->lang = NULL;
+   text_ptr->lang_key = NULL;
    text_ptr->text = text;
+   text_ptr->text_length = png_strlen(text);
+   text_ptr->itxt_length = 0;
 
    png_set_text(png_ptr, info_ptr, text_ptr, 1);
 
@@ -1700,7 +1778,7 @@
    else
    {
        comp_type = *(++text);
-       text++;        /* skip the compression byte */
+       text++;        /* skip the compression_method byte */
    }
    prefix_len = text - chunkdata;
 
@@ -1711,7 +1789,10 @@
    text_ptr->compression = comp_type;
    text_ptr->lang = NULL;
    text_ptr->key = chunkdata;
+   text_ptr->lang_key = NULL;
    text_ptr->text = chunkdata + prefix_len;
+   text_ptr->text_length = png_strlen(text);
+   text_ptr->itxt_length = 0;
 
    png_set_text(png_ptr, info_ptr, text_ptr, 1);
 
@@ -1727,9 +1808,9 @@
 {
    png_textp text_ptr;
    png_charp chunkdata;
-   png_charp lang, text;
-   int comp_type = PNG_TEXT_COMPRESSION_NONE;
-   int comp_flag = 0;
+   png_charp key, lang, text, lang_key;
+   int comp_flag = PNG_TEXT_COMPRESSION_NONE;
+   int comp_type = 0;
    png_size_t slength, prefix_len;
 
    png_debug(1, "in png_handle_iTXt\n");
@@ -1766,10 +1847,13 @@
       /* empty loop */ ;
    lang++;        /* skip NUL separator */
 
-   /* iTXt must have a language tag and some text after the keyword */
+   /* iTXt must have a language tag (possibly empty), two compression bytes,
+      translated keyword (possibly empty), and possibly some text after the
+      keyword */
+
    if (lang >= chunkdata + slength)
    {
-      comp_type = PNG_TEXT_COMPRESSION_NONE;
+      comp_flag = PNG_TEXT_COMPRESSION_NONE;
       png_warning(png_ptr, "Zero length iTXt chunk");
    }
    else
@@ -1778,25 +1862,31 @@
        comp_type = *lang++;
    }
 
-   for (text = lang; *text; text++)
+   for (lang_key = lang; *lang_key; lang_key++)
+      /* empty loop */ ;
+   lang_key++;        /* skip NUL separator */
+
+   for (text = lang_key; *text; text++)
       /* empty loop */ ;
    text++;        /* skip NUL separator */
 
    prefix_len = text - chunkdata;
 
+   key=chunkdata;
    if (comp_flag)
        chunkdata = png_decompress_chunk(png_ptr, comp_type, chunkdata,
-                                        (size_t)length, prefix_len);
-
+          (size_t)length, prefix_len);
    text_ptr = (png_textp)png_malloc(png_ptr, (png_uint_32)sizeof(png_text));
-   text_ptr->compression = (png_byte)comp_type;
-   text_ptr->lang = NULL;
+   text_ptr->compression = (int)comp_flag + 1;
+   text_ptr->lang_key = chunkdata+(lang_key-key);
+   text_ptr->lang = chunkdata+(lang-key);
    text_ptr->key = chunkdata;
    text_ptr->text = chunkdata + prefix_len;
+   text_ptr->text_length = 0;
+   text_ptr->itxt_length = png_strlen(text_ptr->text);
 
    png_set_text(png_ptr, info_ptr, text_ptr, 1);
 
-   png_free(png_ptr, text_ptr->key);
    png_free(png_ptr, text_ptr);
    png_free(png_ptr, chunkdata);
 }
@@ -1804,12 +1894,19 @@
 
 /* This function is called when we haven't found a handler for a
    chunk.  If there isn't a problem with the chunk itself (ie bad
-   chunk name, CRC, or a critical chunk), the chunk is silently ignored. */
+   chunk name, CRC, or a critical chunk), the chunk is silently ignored
+   -- unless the PNG_FLAG_UNKNOWN_CHUNKS_SUPPORTED flag is on in which
+   case it will be saved away to be written out later. */
 void
 png_handle_unknown(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
 {
+   png_uint_32 skip = 0;
+
    png_debug(1, "in png_handle_unknown\n");
 
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
    /* In the future we can have code here that calls user-supplied
     * callback functions for unknown chunks before they are ignored or
     * cause an error.
@@ -1825,11 +1922,33 @@
          return;
    }
 
-   if (png_ptr->mode & PNG_HAVE_IDAT)
-      png_ptr->mode |= PNG_AFTER_IDAT;
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS)
+   {
+       png_unknown_chunk chunk;
 
-   png_crc_finish(png_ptr, length);
 
+#ifdef PNG_MAX_MALLOC_64K
+       if (length > (png_uint_32)65535L)
+       {
+           png_warning(png_ptr, "unknown chunk too large to fit in memory");
+           skip = length - (png_uint_32)65535L;
+           length = (png_uint_32)65535L;
+       }
+#endif
+
+       strcpy((png_charp)chunk.name, (png_charp)png_ptr->chunk_name);
+       chunk.data = (png_bytep)png_malloc(png_ptr, length);
+       png_crc_read(png_ptr, chunk.data, length);
+       chunk.size = length;
+       png_set_unknown_chunks(png_ptr, info_ptr, &chunk, 1);
+       png_free(png_ptr, chunk.data);
+   }
+   else
+#endif
+       skip = length;
+
+   png_crc_finish(png_ptr, skip);
 }
 
 /* This function is called to verify that a chunk name is valid.
@@ -2080,11 +2199,11 @@
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   
+
    /* offset to next interlace block */
    const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
 #endif
-   
+
    png_debug(1,"in png_do_read_interlace\n");
    if (row != NULL && row_info != NULL)
    {
@@ -2425,20 +2544,20 @@
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   
+
    /* start of interlace block */
    const int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-   
+
    /* offset to next interlace block */
    const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-   
+
    /* start of interlace block in the y direction */
    const int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-   
+
    /* offset to next interlace block in the y direction */
    const int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
 #endif
-   
+
    png_debug(1, "in png_read_finish_row\n");
    png_ptr->row_number++;
    if (png_ptr->row_number < png_ptr->num_rows)
@@ -2547,20 +2666,20 @@
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   
+
    /* start of interlace block */
    const int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-   
+
    /* offset to next interlace block */
    const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-   
+
    /* start of interlace block in the y direction */
    const int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-   
+
    /* offset to next interlace block in the y direction */
    const int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
 #endif
-   
+
    int max_pixel_depth;
    png_uint_32 row_bytes;
 
diff --git a/pngset.c b/pngset.c
index 8c79109..37aaf52 100644
--- a/pngset.c
+++ b/pngset.c
@@ -1,7 +1,7 @@
 
 /* pngset.c - storage of image information into info struct
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -30,6 +30,7 @@
 #endif
 
 #if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 void
 png_set_cHRM(png_structp png_ptr, png_infop info_ptr,
    double white_x, double white_y, double red_x, double red_y,
@@ -47,11 +48,55 @@
    info_ptr->y_green = (float)green_y;
    info_ptr->x_blue  = (float)blue_x;
    info_ptr->y_blue  = (float)blue_y;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_x_white = (png_fixed_point)(white_x*100000.+0.5);
+   info_ptr->int_y_white = (png_fixed_point)(white_y*100000.+0.5);
+   info_ptr->int_x_red   = (png_fixed_point)(red_x*100000.+0.5);
+   info_ptr->int_y_red   = (png_fixed_point)(red_y*100000.+0.5);
+   info_ptr->int_x_green = (png_fixed_point)(green_x*100000.+0.5);
+   info_ptr->int_y_green = (png_fixed_point)(green_y*100000.+0.5);
+   info_ptr->int_x_blue  = (png_fixed_point)(blue_x*100000.+0.5);
+   info_ptr->int_y_blue  = (png_fixed_point)(blue_y*100000.+0.5);
+#endif
    info_ptr->valid |= PNG_INFO_cHRM;
 }
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void
+png_set_cHRM_fixed(png_structp png_ptr, png_infop info_ptr,
+   png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x,
+   png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y,
+   png_fixed_point blue_x, png_fixed_point blue_y)
+{
+   png_debug1(1, "in %s storage function\n", "cHRM");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->int_x_white = white_x;
+   info_ptr->int_y_white = white_y;
+   info_ptr->int_x_red   = red_x;
+   info_ptr->int_y_red   = red_y;
+   info_ptr->int_x_green = green_x;
+   info_ptr->int_y_green = green_y;
+   info_ptr->int_x_blue  = blue_x;
+   info_ptr->int_y_blue  = blue_y;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   info_ptr->x_white = (float)white_x/100000.;
+   info_ptr->y_white = (float)white_y/100000.;
+   info_ptr->x_red   = (float)red_x/100000.;
+   info_ptr->y_red   = (float)red_y/100000.;
+   info_ptr->x_green = (float)green_x/100000.;
+   info_ptr->y_green = (float)green_y/100000.;
+   info_ptr->x_blue  = (float)blue_x/100000.;
+   info_ptr->y_blue  = (float)blue_y/100000.;
+#endif
+   info_ptr->valid |= PNG_INFO_cHRM;
+}
+#endif
+#endif
 
 #if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 void
 png_set_gAMA(png_structp png_ptr, png_infop info_ptr, double file_gamma)
 {
@@ -60,6 +105,26 @@
       return;
 
    info_ptr->gamma = (float)file_gamma;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_gamma = (int)(file_gamma*100000.+.5);
+#endif
+   info_ptr->valid |= PNG_INFO_gAMA;
+}
+#endif
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void
+png_set_gAMA_fixed(png_structp png_ptr, png_infop info_ptr, png_fixed_point
+   int_gamma)
+{
+   png_debug1(1, "in %s storage function\n", "gAMA");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   info_ptr->gamma = (float)int_gamma/100000.;
+#endif
+   info_ptr->int_gamma = int_gamma;
    info_ptr->valid |= PNG_INFO_gAMA;
 }
 #endif
@@ -72,7 +137,8 @@
    if (png_ptr == NULL || info_ptr == NULL)
       return;
 
-   info_ptr->hist = hist;
+   info_ptr->hist = png_malloc(png_ptr, sizeof(png_uint_16) * info_ptr->num_palette);
+   memcpy(info_ptr->hist, hist, sizeof(png_uint_16) * info_ptr->num_palette);
    info_ptr->valid |= PNG_INFO_hIST;
 }
 #endif
@@ -179,9 +245,26 @@
 #endif
 
 #if defined(PNG_READ_sCAL_SUPPORTED) || defined(PNG_WRITE_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 void
 png_set_sCAL(png_structp png_ptr, png_infop info_ptr,
-             png_charp unit, double width, double height)
+             int unit, double width, double height)
+{
+   png_debug1(1, "in %s storage function\n", "sCAL");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->scal_unit = (png_byte)unit;
+   info_ptr->scal_pixel_width = width;
+   info_ptr->scal_pixel_height = height;
+
+   info_ptr->valid |= PNG_INFO_sCAL;
+}
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void
+png_set_sCAL_s(png_structp png_ptr, png_infop info_ptr,
+             int unit, png_charp swidth, png_charp sheight)
 {
    png_uint_32 length;
 
@@ -189,16 +272,23 @@
    if (png_ptr == NULL || info_ptr == NULL)
       return;
 
-   length = png_strlen(unit) + 1;
+   info_ptr->scal_unit = (png_byte)unit;
+
+   length = png_strlen(swidth) + 1;
    png_debug1(3, "allocating unit for info (%d bytes)\n", length);
-   info_ptr->scal_unit = (png_charp)png_malloc(png_ptr, length);
-   png_memcpy(info_ptr->scal_unit, unit, (png_size_t)length);
-   info_ptr->scal_pixel_width = width;
-   info_ptr->scal_pixel_height = height;
+   info_ptr->scal_s_width = (png_charp)png_malloc(png_ptr, length);
+   png_memcpy(info_ptr->scal_s_width, swidth, (png_size_t)length);
+
+   length = png_strlen(sheight) + 1;
+   png_debug1(3, "allocating unit for info (%d bytes)\n", length);
+   info_ptr->scal_s_width = (png_charp)png_malloc(png_ptr, length);
+   png_memcpy(info_ptr->scal_s_height, sheight, (png_size_t)length);
 
    info_ptr->valid |= PNG_INFO_sCAL;
 }
 #endif
+#endif
+#endif
 
 #if defined(PNG_pHYs_SUPPORTED)
 void
@@ -254,16 +344,28 @@
    info_ptr->srgb_intent = (png_byte)intent;
    info_ptr->valid |= PNG_INFO_sRGB;
 }
+
 void
 png_set_sRGB_gAMA_and_cHRM(png_structp png_ptr, png_infop info_ptr,
    int intent)
 {
 #if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float file_gamma;
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_file_gamma;
+#endif
+#endif
 #if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    float white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y;
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_white_x, int_white_y, int_red_x, int_red_y, int_green_x,
+      int_green_y, int_blue_x, int_blue_y;
+#endif
+#endif
    png_debug1(1, "in %s storage function\n", "sRGB_gAMA_and_cHRM");
    if (png_ptr == NULL || info_ptr == NULL)
       return;
@@ -271,11 +373,32 @@
    png_set_sRGB(png_ptr, info_ptr, intent);
 
 #if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    file_gamma = (float).45455;
    png_set_gAMA(png_ptr, info_ptr, file_gamma);
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   int_file_gamma = 45455L;
+   png_set_gAMA_fixed(png_ptr, info_ptr, int_file_gamma);
+#endif
+#endif
 
 #if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   int_white_x = 31270L;
+   int_white_y = 32900L;
+   int_red_x   = 64000L;
+   int_red_y   = 33000L;
+   int_green_x = 30000L;
+   int_green_y = 60000L;
+   int_blue_x  = 15000L;
+   int_blue_y  =  6000L;
+
+   png_set_cHRM_fixed(png_ptr, info_ptr,
+      int_white_x, int_white_y, int_red_x, int_red_y, int_green_x, int_green_y,
+      int_blue_x, int_blue_y);
+#endif
+#ifdef PNG_FLOATING_POINT_SUPPORTED
    white_x = (float).3127;
    white_y = (float).3290;
    red_x   = (float).64;
@@ -287,11 +410,12 @@
 
    png_set_cHRM(png_ptr, info_ptr,
       white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y);
-
+#endif
 #endif
 }
 #endif
 
+
 #if defined(PNG_iCCP_SUPPORTED)
 void
 png_set_iCCP(png_structp png_ptr, png_infop info_ptr,
@@ -359,52 +483,87 @@
 
    for (i = 0; i < num_text; i++)
    {
+      png_size_t text_length,key_len,lang_len,lang_key_len;
       png_textp textp = &(info_ptr->text[info_ptr->num_text]);
-      png_charp key,text;
 
       if (text_ptr[i].key == (png_charp)NULL)
           continue;
 
-#ifdef PNG_iTXt_SUPPORTED
-      textp->lang = text_ptr[i].lang;
-#else
-      textp->lang = NULL;
-#endif
+      key_len = png_strlen(text_ptr[i].key);
+
+      if(text_ptr[i].compression > 0)
+      {
+        /* set iTXt data */
+        lang_len = png_strlen(text_ptr[i].lang);
+        lang_key_len = png_strlen(text_ptr[i].lang_key);
+      }
+      else
+      {
+        lang_len = 0;
+        lang_key_len = 0;
+      }
 
       if (text_ptr[i].text[0] == '\0')
       {
-         textp->text_length = 0;
-         textp->compression = PNG_TEXT_COMPRESSION_NONE;
+         text_length = 0;
+         if(text_ptr[i].compression > 0)
+            textp->compression = PNG_ITXT_COMPRESSION_NONE;
+         else
+            textp->compression = PNG_TEXT_COMPRESSION_NONE;
       }
       else
       {
-         textp->text_length = png_strlen(text_ptr[i].text);
+         text_length = png_strlen(text_ptr[i].text);
          textp->compression = text_ptr[i].compression;
       }
-      key=text_ptr[i].key;
-      for (text = key; *text++;)
-        /* empty loop to find the byte after the zero byte after the
-           end of key */ ;
 
       textp->key = (png_charp)png_malloc(png_ptr,
-         (png_uint_32)(text+textp->text_length - key)+1);
+         (png_uint_32)(key_len + lang_len + lang_key_len + text_length + 4));
       /* Caution: the calling program, not libpng, is responsible for
          freeing this, if libpng wasn't the caller. */
       png_debug2(2, "Allocated %d bytes at %x in png_set_text\n",
-         text+textp->text_length-key+1, textp->key);
+         key_len + lang_len + lang_key_len + text_length + 4, textp->key);
 
       png_memcpy(textp->key, text_ptr[i].key,
-         (png_size_t)(text - key));  /* includes the zero-byte separator */
+         (png_size_t)(key_len));
+      *(textp->key+key_len) = '\0';
+      if (text_ptr[i].compression > 0)
+      {
+         textp->lang=textp->key + key_len + 1;
+         png_memcpy(textp->lang, text_ptr[i].lang, lang_len);
+         *(textp->lang+lang_len) = '\0';
+         textp->lang_key=textp->lang + lang_len + 1;
+         png_memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len);
+         *(textp->lang_key+lang_key_len) = '\0';
+         textp->text=textp->lang_key + lang_key_len + 1;
+      }
+      else
+      {
+         textp->lang=NULL;
+         textp->lang_key=NULL;
+         textp->text=textp->key + key_len + 1;
+      }
 
-      textp->text = textp->key + (text-key);
-      if(textp->text_length)
+      if(text_length)
       {
          png_memcpy(textp->text, text_ptr[i].text,
-            (png_size_t)(textp->text_length));
-         *(textp->text+textp->text_length) = '\0';
+            (png_size_t)(text_length));
+         *(textp->text+text_length) = '\0';
       }
       else
          textp->text--;
+
+      if(textp->compression > 0)
+      {
+         textp->text_length = 0;
+         textp->itxt_length = text_length;
+      }
+      else
+      {
+         textp->text_length = text_length;
+         textp->itxt_length = 0;
+      }
+
       info_ptr->text[info_ptr->num_text]= *textp;
       info_ptr->num_text++;
       png_debug1(3, "transferred text chunk %d\n", info_ptr->num_text);
@@ -463,13 +622,13 @@
     np = (png_spalette_p)png_malloc(png_ptr,
         (info_ptr->splt_palettes_num + nentries) * sizeof(png_spalette));
 
-    memcpy(np, info_ptr->splt_palettes, 
+    memcpy(np, info_ptr->splt_palettes,
            info_ptr->splt_palettes_num * sizeof(png_spalette));
     png_free(png_ptr, info_ptr->splt_palettes);
 
     for (i = 0; i < nentries; i++)
     {
-        png_spalette_p to = np + i;
+        png_spalette_p to = np + info_ptr->splt_palettes_num + i;
         png_spalette_p from = entries + i;
 
         to->name = (png_charp)png_malloc(png_ptr,
@@ -477,15 +636,56 @@
         png_strcpy(to->name, from->name);
         to->entries = (png_spalette_entryp)png_malloc(png_ptr,
                                  from->nentries * sizeof(png_spalette));
-        memcpy(to->entries, from->entries, 
+        memcpy(to->entries, from->entries,
                from->nentries * sizeof(png_spalette));
+	to->nentries = from->nentries;
+	to->depth = from->depth;
     }
 
     info_ptr->splt_palettes = np;
     info_ptr->splt_palettes_num += nentries;
+    info_ptr->valid |= PNG_INFO_sPLT;
 }
 #endif /* PNG_sPLT_SUPPORTED */
 
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+void
+png_set_unknown_chunks(png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkp unknowns, int num_unknowns)
+{
+    png_unknown_chunkp np;
+    int i;
+
+    if (png_ptr == NULL || info_ptr == NULL || num_unknowns == 0)
+        return;
+
+    np = (png_unknown_chunkp)png_malloc(png_ptr,
+        (info_ptr->unknown_chunks_num + num_unknowns) *
+        sizeof(png_unknown_chunk));
+
+    memcpy(np, info_ptr->unknown_chunks,
+           info_ptr->unknown_chunks_num * sizeof(png_unknown_chunk));
+    png_free(png_ptr, info_ptr->unknown_chunks);
+
+    for (i = 0; i < num_unknowns; i++)
+    {
+        png_unknown_chunkp to = np + info_ptr->unknown_chunks_num + i;
+        png_unknown_chunkp from = unknowns + i;
+
+        png_strcpy((png_charp)to->name, (png_charp)from->name);
+        to->data = (png_bytep)png_malloc(png_ptr, from->size);
+        memcpy(to->data, from->data, from->size);
+        to->size = from->size;
+
+        /* note our location in the read or write sequence */
+        to->location = (png_byte)(png_ptr->mode & 0xff);
+    }
+
+    info_ptr->unknown_chunks = np;
+    info_ptr->unknown_chunks_num += num_unknowns;
+}
+#endif
+
 #if defined(PNG_READ_EMPTY_PLTE_SUPPORTED)
 void
 png_permit_empty_plte (png_structp png_ptr, int empty_plte_permitted)
@@ -497,9 +697,45 @@
 }
 #endif
 
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+void
+png_set_keep_unknown_chunks(png_structp png_ptr, int keep, png_bytep chunk_list,
+    int num_chunks)
+{
 
+/*
+    png_set_keep_unknown_chunks(png_ptr, keep, chunk_list,
+        num_chunks);
+    keep       - 0: do not keep
+                 1: keep only if safe-to-copy
+                 2: keep even if unsafe-to-copy
+    The following are not yet implemented
+                    in libpng version 1.0.5j:
+    chunk_list - list of chunks affected, NULL if
+                 num_chunks is 0.
+    num_chunks - number of chunks affected.  If 0, all
+                 unknown chunks are affected.
+*/
 
+    if (num_chunks == 0)
+    {
+      if(keep)
+        png_ptr->flags |= PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
+      else
+        png_ptr->flags &= ~PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
 
-
-
-
+      if(keep == 2)
+        png_ptr->flags |= PNG_FLAG_KEEP_UNSAFE_CHUNKS;
+      else
+        png_ptr->flags &= ~PNG_FLAG_KEEP_UNSAFE_CHUNKS;
+    }
+    else
+    {
+      /* to do: set up chunk_list processing */
+      png_warning(png_ptr,
+        "chunk_list not yet implemented in png_set_keep_unknown_chunks");
+      if (chunk_list == NULL || num_chunks == 0)
+        /* do nothing right now */ ; 
+    }
+}
+#endif
diff --git a/pngtrans.c b/pngtrans.c
index c0f29fc..eabd096 100644
--- a/pngtrans.c
+++ b/pngtrans.c
@@ -1,7 +1,7 @@
 
 /* pngtrans.c - transforms the data in a row (used by both readers and writers)
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
diff --git a/pngvcrd.c b/pngvcrd.c
index 3398fff..5a9d119 100644
--- a/pngvcrd.c
+++ b/pngvcrd.c
@@ -2,7 +2,7 @@
  *
  * For Intel x86 CPU and Microsoft Visual C++ compiler
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998, Intel Corporation
  * Copyright (c) 1998, 1999 Glenn Randers-Pehrson
@@ -3653,7 +3653,7 @@
 #ifdef PNG_DEBUG
    char filnm[6];
 #endif
-   #define UseMMX 1
+#define UseMMX 1
 
    if (mmx_supported == 2)
        mmx_supported = mmxsupport();
diff --git a/pngwio.c b/pngwio.c
index 91c3fbb..307476e 100644
--- a/pngwio.c
+++ b/pngwio.c
@@ -1,7 +1,7 @@
 
 /* pngwio.c - functions for data output
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
diff --git a/pngwrite.c b/pngwrite.c
index 1f836ad..62ec55a 100644
--- a/pngwrite.c
+++ b/pngwrite.c
@@ -1,7 +1,7 @@
 
 /* pngwrite.c - general routines to write a PNG file
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -24,9 +24,6 @@
 void
 png_write_info_before_PLTE(png_structp png_ptr, png_infop info_ptr)
 {
-#if defined(PNG_WRITE_sPLT_SUPPORTED)
-   int i;
-#endif
    png_debug(1, "in png_write_info_before_PLTE\n");
    if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE))
    {
@@ -44,7 +41,15 @@
       flag set, and if it does, writes the chunk. */
 #if defined(PNG_WRITE_gAMA_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_gAMA)
+   {
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
       png_write_gAMA(png_ptr, info_ptr->gamma);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_gAMA_fixed(png_ptr, info_ptr->int_gamma);
+#  endif
+#endif
+   }
 #endif
 #if defined(PNG_WRITE_sRGB_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_sRGB)
@@ -55,22 +60,49 @@
       png_write_iCCP(png_ptr, info_ptr->iccp_name, PNG_TEXT_COMPRESSION_NONE,
                      info_ptr->iccp_profile, (int)info_ptr->iccp_proflen);
 #endif
-#if defined(PNG_WRITE_sPLT_SUPPORTED)
-   if (info_ptr->valid & PNG_INFO_sPLT)
-     for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
-       png_write_sPLT(png_ptr, info_ptr->splt_palettes + i);
-#endif
 #if defined(PNG_WRITE_sBIT_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_sBIT)
       png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type);
 #endif
 #if defined(PNG_WRITE_cHRM_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_cHRM)
+   {
+#ifdef PNG_FLOATING_POINT_SUPPORTED
       png_write_cHRM(png_ptr,
          info_ptr->x_white, info_ptr->y_white,
          info_ptr->x_red, info_ptr->y_red,
          info_ptr->x_green, info_ptr->y_green,
          info_ptr->x_blue, info_ptr->y_blue);
+#else
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_cHRM_fixed(png_ptr,
+         info_ptr->int_x_white, info_ptr->int_y_white,
+         info_ptr->int_x_red, info_ptr->int_y_red,
+         info_ptr->int_x_green, info_ptr->int_y_green,
+         info_ptr->int_x_blue, info_ptr->int_y_blue);
+#  endif
+#endif
+   }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         if (up->location && (!(up->location & PNG_HAVE_PLTE)) &&
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS ||
+            up->name[3] & 0x20))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
 #endif
       png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE;
    }
@@ -79,7 +111,7 @@
 void
 png_write_info(png_structp png_ptr, png_infop info_ptr)
 {
-#if defined(PNG_WRITE_TEXT_SUPPORTED)
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
    int i;
 #endif
 
@@ -131,8 +163,15 @@
 #endif
 #if defined(PNG_WRITE_sCAL_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_sCAL)
-      png_write_sCAL(png_ptr, info_ptr->scal_unit, 
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      png_write_sCAL(png_ptr, (int)info_ptr->scal_unit,
           info_ptr->scal_pixel_width, info_ptr->scal_pixel_height);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit,
+          info_ptr->scal_s_width, info_ptr->scal_s_height);
+#endif
+#endif
 #endif
 #if defined(PNG_WRITE_pHYs_SUPPORTED)
    if (info_ptr->valid & PNG_INFO_pHYs)
@@ -146,6 +185,11 @@
       png_ptr->mode |= PNG_WROTE_tIME;
    }
 #endif
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sPLT)
+     for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
+       png_write_sPLT(png_ptr, info_ptr->splt_palettes + i);
+#endif
 #if defined(PNG_WRITE_TEXT_SUPPORTED)
    /* Check to see if we need to write text chunks */
    for (i = 0; i < info_ptr->num_text; i++)
@@ -153,14 +197,15 @@
       png_debug2(2, "Writing header text chunk %d, type %d\n", i,
          info_ptr->text[i].compression);
       /* an internationalized chunk? */
-      if (info_ptr->text[i].lang)
+      if (info_ptr->text[i].compression > 0)
       {
 #if defined(PNG_WRITE_iTXt_SUPPORTED)
           /* write international chunk */
-          png_write_iTXt(png_ptr, 
+          png_write_iTXt(png_ptr,
                          info_ptr->text[i].compression,
-                         info_ptr->text[i].lang,
                          info_ptr->text[i].key,
+                         info_ptr->text[i].lang,
+                         info_ptr->text[i].lang_key,
                          info_ptr->text[i].text);
 #else
           png_warning(png_ptr, "Unable to write international text\n");
@@ -169,12 +214,12 @@
           info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
       }
       /* If we want a compressed text chunk */
-      else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
+      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt)
       {
 #if defined(PNG_WRITE_zTXt_SUPPORTED)
          /* write compressed chunk */
          png_write_zTXt(png_ptr, info_ptr->text[i].key,
-            info_ptr->text[i].text, info_ptr->text[i].text_length,
+            info_ptr->text[i].text, 0,
             info_ptr->text[i].compression);
 #else
          png_warning(png_ptr, "Unable to write compressed text\n");
@@ -187,8 +232,8 @@
 #if defined(PNG_WRITE_tEXt_SUPPORTED)
          /* write uncompressed chunk */
          png_write_tEXt(png_ptr, info_ptr->text[i].key,
-                         info_ptr->text[i].text, 
-                         info_ptr->text[i].text_length);
+                         info_ptr->text[i].text,
+                         0);
 #else
          png_warning(png_ptr, "Unable to write uncompressed text\n");
 #endif
@@ -197,6 +242,27 @@
       }
    }
 #endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         if (up->location && (up->location& PNG_HAVE_PLTE) &&
+            !(up->location& PNG_HAVE_IDAT) &&
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS ||
+            up->name[3] & 0x20))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
 }
 
 /* Writes the end of the PNG file.  If you don't want to write comments or
@@ -229,12 +295,29 @@
       {
          png_debug2(2, "Writing trailer text chunk %d, type %d\n", i,
             info_ptr->text[i].compression);
-         if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
+         /* an internationalized chunk? */
+         if (info_ptr->text[i].compression > 0)
+         {
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+             /* write international chunk */
+             png_write_iTXt(png_ptr,
+                         info_ptr->text[i].compression,
+                         info_ptr->text[i].key,
+                         info_ptr->text[i].lang,
+                         info_ptr->text[i].lang_key,
+                         info_ptr->text[i].text);
+#else
+             png_warning(png_ptr, "Unable to write international text\n");
+#endif
+             /* Mark this chunk as written */
+             info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+         }
+         else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
          {
 #if defined(PNG_WRITE_zTXt_SUPPORTED)
             /* write compressed chunk */
             png_write_zTXt(png_ptr, info_ptr->text[i].key,
-               info_ptr->text[i].text, info_ptr->text[i].text_length,
+               info_ptr->text[i].text, 0,
                info_ptr->text[i].compression);
 #else
             png_warning(png_ptr, "Unable to write compressed text\n");
@@ -247,7 +330,7 @@
 #if defined(PNG_WRITE_tEXt_SUPPORTED)
             /* write uncompressed chunk */
             png_write_tEXt(png_ptr, info_ptr->text[i].key,
-               info_ptr->text[i].text, info_ptr->text[i].text_length);
+               info_ptr->text[i].text, 0);
 #else
             png_warning(png_ptr, "Unable to write uncompressed text\n");
 #endif
@@ -257,6 +340,26 @@
          }
       }
 #endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         if ((up->location && (up->location & PNG_AFTER_IDAT)) &&
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS ||
+            up->name[3] & 0x20))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
    }
 
    png_ptr->mode |= PNG_AFTER_IDAT;
@@ -679,40 +782,27 @@
    if (info_ptr != NULL)
    {
 #if defined(PNG_WRITE_TEXT_SUPPORTED)
-   png_debug(1, "in png_info_destroy\n");
-   if (info_ptr->text != NULL)
-   {
-      int i;
-      for (i = 0; i < info_ptr->num_text; i++)
-      {
-         if(info_ptr->text[i].key != NULL)
-         {
-           png_free(png_ptr, info_ptr->text[i].key);
-           info_ptr->text[i].key = NULL;
-         }
-         if(info_ptr->text[i].lang != NULL)
-         {
-           png_free(png_ptr, info_ptr->text[i].lang);
-           info_ptr->text[i].lang = NULL;
-         }
-      }
-      png_free(png_ptr, info_ptr->text);
-      info_ptr->text = NULL;
-   }
+      png_free_text(png_ptr, info_ptr, -1);
 #endif
-#if defined(PNG_READ_pCAL_SUPPORTED)
-      png_free(png_ptr, info_ptr->pcal_purpose);
-      png_free(png_ptr, info_ptr->pcal_units);
-      if (info_ptr->pcal_params != NULL)
-      {
-         int i;
-         for (i = 0; i < (int)info_ptr->pcal_nparams; i++)
-         {
-            png_free(png_ptr, info_ptr->pcal_params[i]);
-         }
-         png_free(png_ptr, info_ptr->pcal_params);
-      }
+#if defined(PNG_WRITE_sCAL_SUPPORTED)
+      png_free_sCAL(png_ptr, info_ptr);
 #endif
+#if defined(PNG_WRITE_pCAL_SUPPORTED)
+      png_free_pCAL(png_ptr, info_ptr);
+#endif
+#if defined(PNG_WRITE_iCCP_SUPPORTED)
+      png_free_iCCP(png_ptr, info_ptr);
+#endif
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+      png_free_spalettes(png_ptr, info_ptr, -1);
+#endif
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+      png_free_unknown_chunks(png_ptr, info_ptr, -1);
+#endif
+#if defined(PNG_hIST_SUPPORTED)
+      png_free_hIST(png_ptr, info_ptr);
+#endif
+
 #ifdef PNG_USER_MEM_SUPPORTED
       png_destroy_struct_2((png_voidp)info_ptr, free_fn);
 #else
@@ -1042,6 +1132,14 @@
       png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
    else if (window_bits < 8)
       png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
+#ifndef WBITS_8_OK
+   /* avoid libpng bug with 256-byte windows */
+   if (window_bits == 8)
+     {
+       png_warning(png_ptr, "Compression window is being reset to 512");
+       window_bits=9;
+     }
+#endif
    png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS;
    png_ptr->zlib_window_bits = window_bits;
 }
diff --git a/pngwtran.c b/pngwtran.c
index 310064e..66282a2 100644
--- a/pngwtran.c
+++ b/pngwtran.c
@@ -1,7 +1,7 @@
 
 /* pngwtran.c - transforms the data in a row for PNG writers
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
diff --git a/pngwutil.c b/pngwutil.c
index 37a4f50..050f1a8 100644
--- a/pngwutil.c
+++ b/pngwutil.c
@@ -1,7 +1,7 @@
 
 /* pngwutil.c - utilities to write a PNG file
  *
- * libpng 1.0.5f - December 6, 1999
+ * libpng 1.0.5j - December 21, 1999
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
  * Copyright (c) 1996, 1997 Andreas Dilger
@@ -133,7 +133,7 @@
       (png_size_t)8 - png_ptr->sig_bytes);
 }
 
-#if defined(PNG_WRITE_TEXT_SUPPORTED)
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_iCCP_SUPPORTED)
 /*
  * This pair of functions encapsulates the operation of (a) compressing a
  * text string, and (b) issuing it later as a series of chunk data writes.
@@ -151,8 +151,8 @@
 } compression_state;
 
 /* compress given text into storage in the png_ptr structure */
-static int 
-png_text_compress(png_structp png_ptr, 
+static int
+png_text_compress(png_structp png_ptr,
         png_charp text, png_size_t text_len, int compression,
         compression_state *comp)
 {
@@ -233,7 +233,7 @@
                old_ptr = comp->output_ptr;
                comp->output_ptr = (png_charpp)png_malloc(png_ptr,
                   (png_uint_32)(comp->max_output_ptr * sizeof (png_charpp)));
-               png_memcpy(comp->output_ptr, old_ptr, 
+               png_memcpy(comp->output_ptr, old_ptr,
            old_max * sizeof (png_charp));
                png_free(png_ptr, old_ptr);
             }
@@ -288,7 +288,7 @@
                /* This could be optimized to realloc() */
                comp->output_ptr = (png_charpp)png_malloc(png_ptr,
                   (png_uint_32)(comp->max_output_ptr * sizeof (png_charpp)));
-               png_memcpy(comp->output_ptr, old_ptr, 
+               png_memcpy(comp->output_ptr, old_ptr,
            old_max * sizeof (png_charp));
                png_free(png_ptr, old_ptr);
             }
@@ -319,7 +319,7 @@
 }
 
 /* ship the compressed text out via chunk writes */
-static void 
+static void
 png_write_compressed_data_out(png_structp png_ptr, compression_state *comp)
 {
    int i;
@@ -334,7 +334,8 @@
    /* write saved output buffers, if any */
    for (i = 0; i < comp->num_output_ptr; i++)
    {
-      png_write_chunk_data(png_ptr,(png_bytep)comp->output_ptr[i],png_ptr->zbuf_size);
+      png_write_chunk_data(png_ptr,(png_bytep)comp->output_ptr[i],
+         png_ptr->zbuf_size);
       png_free(png_ptr, comp->output_ptr[i]);
    }
    if (comp->max_output_ptr != 0)
@@ -566,6 +567,7 @@
 
 #if defined(PNG_WRITE_gAMA_SUPPORTED)
 /* write a gAMA chunk */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 void
 png_write_gAMA(png_structp png_ptr, double file_gamma)
 {
@@ -576,12 +578,28 @@
    png_byte buf[4];
 
    png_debug(1, "in png_write_gAMA\n");
-   /* file_gamma is saved in 1/1000000ths */
+   /* file_gamma is saved in 1/100,000ths */
    igamma = (png_uint_32)(file_gamma * 100000.0 + 0.5);
    png_save_uint_32(buf, igamma);
    png_write_chunk(png_ptr, (png_bytep)png_gAMA, buf, (png_size_t)4);
 }
 #endif
+void
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_write_gAMA_fixed(png_structp png_ptr, png_fixed_point file_gamma)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_gAMA;
+#endif
+   png_byte buf[4];
+
+   png_debug(1, "in png_write_gAMA\n");
+   /* file_gamma is saved in 1/100,000ths */
+   png_save_uint_32(buf, file_gamma);
+   png_write_chunk(png_ptr, (png_bytep)png_gAMA, buf, (png_size_t)4);
+}
+#endif
+#endif
 
 #if defined(PNG_WRITE_sRGB_SUPPORTED)
 /* write a sRGB chunk */
@@ -634,7 +652,7 @@
                    PNG_TEXT_COMPRESSION_zTXt, &comp);
 
    /* make sure we include the NULL after the name and the compression type */
-   png_write_chunk_start(png_ptr, (png_bytep)png_iCCP, 
+   png_write_chunk_start(png_ptr, (png_bytep)png_iCCP,
           (png_uint_32)name_len+profile_len+2);
    png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 2);
 
@@ -669,9 +687,10 @@
    }
 
    /* make sure we include the NULL after the name */
-   png_write_chunk_start(png_ptr, (png_bytep) png_sPLT, 
-          (png_uint_32)(name_len + 1 + palette_size));
+   png_write_chunk_start(png_ptr, (png_bytep) png_sPLT,
+          (png_uint_32)(name_len + 2 + palette_size));
    png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 1);
+   png_write_chunk_data(png_ptr, (png_bytep)&spalette->depth, 1);
 
    /* loop through each palette entry, writing appropriately */
    for (ep = spalette->entries; ep<spalette->entries+spalette->nentries; ep++)
@@ -758,6 +777,7 @@
 
 #if defined(PNG_WRITE_cHRM_SUPPORTED)
 /* write the cHRM chunk */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 void
 png_write_cHRM(png_structp png_ptr, double white_x, double white_y,
    double red_x, double red_y, double green_x, double green_y,
@@ -766,15 +786,16 @@
 #ifdef PNG_USE_LOCAL_ARRAYS
    PNG_cHRM;
 #endif
-   png_uint_32 itemp;
    png_byte buf[32];
+   png_uint_32 itemp;
 
    png_debug(1, "in png_write_cHRM\n");
-   /* each value is saved int 1/1000000ths */
+   /* each value is saved in 1/100,000ths */
    if (white_x < 0 || white_x > 0.8 || white_y < 0 || white_y > 0.8 ||
        white_x + white_y > 1.0)
    {
       png_warning(png_ptr, "Invalid cHRM white point specified");
+      printf("white_x=%f, white_y=%f\n",white_x, white_y);
       return;
    }
    itemp = (png_uint_32)(white_x * 100000.0 + 0.5);
@@ -818,6 +839,57 @@
    png_write_chunk(png_ptr, (png_bytep)png_cHRM, buf, (png_size_t)32);
 }
 #endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void
+png_write_cHRM_fixed(png_structp png_ptr, png_fixed_point white_x,
+   png_fixed_point white_y, png_fixed_point red_x, png_fixed_point red_y,
+   png_fixed_point green_x, png_fixed_point green_y, png_fixed_point blue_x,
+   png_fixed_point blue_y)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_cHRM;
+#endif
+   png_byte buf[32];
+
+   png_debug(1, "in png_write_cHRM\n");
+   /* each value is saved in 1/100,000ths */
+   if (white_x > 80000L || white_y > 80000L || white_x + white_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM white point specified");
+      printf("white_x=%ld, white_y=%ld\n",white_x, white_y);
+      return;
+   }
+   png_save_uint_32(buf, white_x);
+   png_save_uint_32(buf + 4, white_y);
+
+   if (red_x > 80000L || red_y > 80000L || red_x + red_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM fixed red point specified");
+      return;
+   }
+   png_save_uint_32(buf + 8, red_x);
+   png_save_uint_32(buf + 12, red_y);
+
+   if (green_x > 80000L || green_y > 80000L || green_x + green_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM green point specified");
+      return;
+   }
+   png_save_uint_32(buf + 16, green_x);
+   png_save_uint_32(buf + 20, green_y);
+
+   if (blue_x > 80000L || blue_y > 80000L || blue_x + blue_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM blue point specified");
+      return;
+   }
+   png_save_uint_32(buf + 24, blue_x);
+   png_save_uint_32(buf + 28, blue_y);
+
+   png_write_chunk(png_ptr, (png_bytep)png_cHRM, buf, (png_size_t)32);
+}
+#endif
+#endif
 
 #if defined(PNG_WRITE_tRNS_SUPPORTED)
 /* write the tRNS chunk */
@@ -1039,7 +1111,7 @@
    {
       png_free(png_ptr, *new_key);
       *new_key=NULL;
-      png_chunk_warning(png_ptr, "zero length keyword");
+      png_chunk_warning(png_ptr, "Zero length keyword");
    }
 
    if (key_len > 79)
@@ -1074,6 +1146,8 @@
 
    if (text == NULL || *text == '\0')
       text_len = 0;
+   else
+      text_len = png_strlen(text);
 
    /* make sure we include the 0 after the key */
    png_write_chunk_start(png_ptr, (png_bytep)png_tEXt, (png_uint_32)key_len+text_len+1);
@@ -1120,10 +1194,12 @@
       png_free(png_ptr, new_key);
       return;
    }
+   
+   text_len = png_strlen(text);
 
    png_free(png_ptr, new_key);
 
-   /* compute the compressed data; do it now for the length */ 
+   /* compute the compressed data; do it now for the length */
    text_len = png_text_compress(png_ptr, text, text_len, compression, &comp);
 
    /* write start of chunk */
@@ -1145,53 +1221,49 @@
 #if defined(PNG_WRITE_iTXt_SUPPORTED)
 /* write an iTXt chunk */
 void
-png_write_iTXt(png_structp png_ptr, int compression, 
-          png_charp key, png_charp lang, png_charp text)
+png_write_iTXt(png_structp png_ptr, int compression, png_charp key,
+    png_charp lang, png_charp lang_key, png_charp text)
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    PNG_iTXt;
 #endif
-   png_size_t lang_len, key_len, text_len = png_strlen(text);
+   png_size_t lang_len, key_len, lang_key_len, text_len;
    png_charp new_lang, new_key;
    png_byte cbuf[2];
    compression_state comp;
 
    png_debug(1, "in png_write_iTXt\n");
+
+   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in iTXt chunk");
+      return;
+   }
    if (lang == NULL || (lang_len = png_check_keyword(png_ptr, lang,
       &new_lang))==0)
    {
       png_warning(png_ptr, "Empty language field in iTXt chunk");
       return;
    }
-   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
-   {
-      png_warning(png_ptr, "Empty keyword in iTXt chunk");
-      return;
-   }
+   lang_key_len = png_strlen(lang_key);
+   text_len = png_strlen(text);
 
    if (text == NULL || *text == '\0')
       text_len = 0;
 
-   /* compute the compressed data; do it now for the length */ 
-   text_len = png_text_compress(png_ptr, text, text_len, compression, &comp);
+   /* compute the compressed data; do it now for the length */
+   text_len = png_text_compress(png_ptr, text, text_len, compression-2, &comp);
 
-   /* make sure we include the compression flag, the compression byte, 
-    * and the NULs after the lang and key parts */
-   png_write_chunk_start(png_ptr, (png_bytep)png_iTXt, 
-          (png_uint_32)(2 + lang_len+1 + key_len+1 + text_len));
+   /* make sure we include the compression flag, the compression byte,
+    * and the NULs after the key, lang, and lang_key parts */
 
-   /* set the compression bits */
-   if (compression == PNG_TEXT_COMPRESSION_NONE)
-   {
-       cbuf[0] = 0;
-       cbuf[1] = 0;
-   }
-   else /* compression == PNG_TEXT_COMPRESSION_zTXt */
-   {
-       cbuf[0] = 1;
-       cbuf[1] = 0;
-   }
-   png_write_chunk_data(png_ptr, cbuf, 2);
+   png_write_chunk_start(png_ptr, (png_bytep)png_iTXt,
+          (png_uint_32)(
+        5 /* comp byte, comp flag, terminators for key, lang and lang_key */
+        + key_len
+        + lang_len
+        + lang_key_len
+        + text_len));
 
    /*
     * We leave it to the application to meet PNG-1.0 requirements on the
@@ -1199,9 +1271,22 @@
     * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
     * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
     */
-   png_write_chunk_data(png_ptr, (png_bytep)new_lang, lang_len + 1);
    png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
 
+   /* set the compression flag */
+   if (compression == PNG_ITXT_COMPRESSION_NONE || \
+       compression == PNG_TEXT_COMPRESSION_NONE)
+       cbuf[0] = 0;
+   else /* compression == PNG_ITXT_COMPRESSION_zTXt */
+       cbuf[0] = 1;
+   /* set the compression method */
+   cbuf[1] = 0;
+   png_write_chunk_data(png_ptr, cbuf, 2);
+
+   png_write_chunk_data(png_ptr, (png_bytep)new_lang, lang_len + 1);
+   png_write_chunk_data(png_ptr, (png_bytep)lang_key, lang_key_len+1);
+   png_write_chunk_data(png_ptr, '\0', 1);
+
    png_write_compressed_data_out(png_ptr, &comp);
 
    png_write_chunk_end(png_ptr);
@@ -1296,8 +1381,9 @@
 
 #if defined(PNG_WRITE_sCAL_SUPPORTED)
 /* write the sCAL chunk */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
 void
-png_write_sCAL(png_structp png_ptr, png_charp unit, double width,double height)
+png_write_sCAL(png_structp png_ptr, int unit, double width,double height)
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    PNG_sCAL;
@@ -1309,16 +1395,44 @@
 
    sprintf(wbuf, "%12.12e", width);
    sprintf(hbuf, "%12.12e", height);
-   total_len = png_strlen(unit)+1 + png_strlen(wbuf)+1 + png_strlen(hbuf);
+   total_len = 1 + png_strlen(wbuf)+1 + png_strlen(hbuf);
 
    png_debug1(3, "sCAL total length = %d\n", total_len);
    png_write_chunk_start(png_ptr, (png_bytep)png_sCAL, (png_uint_32)total_len);
-   png_write_chunk_data(png_ptr, (png_bytep)unit, png_strlen(unit)+1);
+   png_write_chunk_data(png_ptr, (png_bytep)&unit, 1);
    png_write_chunk_data(png_ptr, (png_bytep)wbuf, strlen(wbuf)+1);
    png_write_chunk_data(png_ptr, (png_bytep)hbuf, strlen(hbuf));
 
    png_write_chunk_end(png_ptr);
 }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void
+png_write_sCAL_s(png_structp png_ptr, int unit, png_charp width,
+   png_charp height)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sCAL;
+#endif
+   png_size_t total_len;
+   char wbuf[32], hbuf[32];
+
+   png_debug(1, "in png_write_sCAL\n");
+
+   sprintf(wbuf, "%s", width);
+   sprintf(hbuf, "%s", height);
+   total_len = 1 + png_strlen(wbuf)+1 + png_strlen(hbuf);
+
+   png_debug1(3, "sCAL total length = %d\n", total_len);
+   png_write_chunk_start(png_ptr, (png_bytep)png_sCAL, (png_uint_32)total_len);
+   png_write_chunk_data(png_ptr, (png_bytep)&unit, 1);
+   png_write_chunk_data(png_ptr, (png_bytep)wbuf, strlen(wbuf)+1);
+   png_write_chunk_data(png_ptr, (png_bytep)hbuf, strlen(hbuf));
+
+   png_write_chunk_end(png_ptr);
+}
+#endif
+#endif
 #endif
 
 #if defined(PNG_WRITE_pHYs_SUPPORTED)
@@ -1383,20 +1497,20 @@
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   
+
    /* start of interlace block */
    int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-   
+
    /* offset to next interlace block */
    int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-   
+
    /* start of interlace block in the y direction */
    int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-   
+
    /* offset to next interlace block in the y direction */
    int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
 #endif
-   
+
    png_size_t buf_size;
 
    png_debug(1, "in png_write_start_row\n");
@@ -1477,20 +1591,20 @@
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   
+
    /* start of interlace block */
    int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-   
+
    /* offset to next interlace block */
    int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-   
+
    /* start of interlace block in the y direction */
    int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-   
+
    /* offset to next interlace block in the y direction */
    int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
 #endif
-   
+
    int ret;
 
    png_debug(1, "in png_write_finish_row\n");
@@ -1591,14 +1705,14 @@
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   
+
    /* start of interlace block */
    int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-   
+
    /* offset to next interlace block */
    int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
 #endif
-   
+
    png_debug(1, "in png_do_write_interlace\n");
    /* we don't have to do anything on the last pass (6) */
 #if defined(PNG_USELESS_TESTS_SUPPORTED)
diff --git a/uncompr.c b/uncompr.c
new file mode 100644
index 0000000..d103321
--- /dev/null
+++ b/uncompr.c
@@ -0,0 +1,58 @@
+/* uncompr.c -- decompress a memory buffer
+ * Copyright (C) 1995-1998 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* @(#) $Id$ */
+
+#include "zlib.h"
+
+/* ===========================================================================
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be large enough to hold the
+   entire uncompressed data. (The size of the uncompressed data must have
+   been saved previously by the compressor and transmitted to the decompressor
+   by some mechanism outside the scope of this compression library.)
+   Upon exit, destLen is the actual size of the compressed buffer.
+     This function can be used to decompress a whole file at once if the
+   input file is mmap'ed.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+int ZEXPORT uncompress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    z_stream stream;
+    int err;
+
+    stream.next_in = (Bytef*)source;
+    stream.avail_in = (uInt)sourceLen;
+    /* Check for source > 64K on 16-bit machine: */
+    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
+
+    stream.next_out = dest;
+    stream.avail_out = (uInt)*destLen;
+    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+
+    err = inflateInit(&stream);
+    if (err != Z_OK) return err;
+
+    err = inflate(&stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        inflateEnd(&stream);
+        return err == Z_OK ? Z_BUF_ERROR : err;
+    }
+    *destLen = stream.total_out;
+
+    err = inflateEnd(&stream);
+    return err;
+}