diff --git a/Makefile.in b/Makefile.in
index d9a3664..5654c78 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.13.1 from Makefile.am.
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
 # @configure_input@
 
-# Copyright (C) 1994-2012 Free Software Foundation, Inc.
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
 
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
@@ -23,23 +23,51 @@
 
 
 VPATH = @srcdir@
-am__make_dryrun = \
-  { \
-    am__dry=no; \
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
     case $$MAKEFLAGS in \
       *\\[\ \	]*) \
-        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
-          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
-      *) \
-        for am__flg in $$MAKEFLAGS; do \
-          case $$am__flg in \
-            *=*|--*) ;; \
-            *n*) am__dry=yes; break;; \
-          esac; \
-        done;; \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
     esac; \
-    test $$am__dry = yes; \
-  }
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
 pkgdatadir = $(datadir)/@PACKAGE@
 pkgincludedir = $(includedir)/@PACKAGE@
 pkglibdir = $(libdir)/@PACKAGE@
@@ -484,8 +512,8 @@
 $(am__aclocal_m4_deps):
 
 jconfig.h: stamp-h1
-	@if test ! -f $@; then rm -f stamp-h1; else :; fi
-	@if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
+	@test -f $@ || rm -f stamp-h1
+	@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
 
 stamp-h1: $(srcdir)/jconfig.cfg $(top_builddir)/config.status
 	@rm -f stamp-h1
@@ -532,6 +560,7 @@
 	  echo rm -f $${locs}; \
 	  rm -f $${locs}; \
 	}
+
 libjpeg.la: $(libjpeg_la_OBJECTS) $(libjpeg_la_DEPENDENCIES) $(EXTRA_libjpeg_la_DEPENDENCIES) 
 	$(AM_V_CCLD)$(libjpeg_la_LINK) -rpath $(libdir) $(libjpeg_la_OBJECTS) $(libjpeg_la_LIBADD) $(LIBS)
 install-binPROGRAMS: $(bin_PROGRAMS)
@@ -583,18 +612,23 @@
 	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
 	echo " rm -f" $$list; \
 	rm -f $$list
+
 cjpeg$(EXEEXT): $(cjpeg_OBJECTS) $(cjpeg_DEPENDENCIES) $(EXTRA_cjpeg_DEPENDENCIES) 
 	@rm -f cjpeg$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(cjpeg_OBJECTS) $(cjpeg_LDADD) $(LIBS)
+
 djpeg$(EXEEXT): $(djpeg_OBJECTS) $(djpeg_DEPENDENCIES) $(EXTRA_djpeg_DEPENDENCIES) 
 	@rm -f djpeg$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(djpeg_OBJECTS) $(djpeg_LDADD) $(LIBS)
+
 jpegtran$(EXEEXT): $(jpegtran_OBJECTS) $(jpegtran_DEPENDENCIES) $(EXTRA_jpegtran_DEPENDENCIES) 
 	@rm -f jpegtran$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(jpegtran_OBJECTS) $(jpegtran_LDADD) $(LIBS)
+
 rdjpgcom$(EXEEXT): $(rdjpgcom_OBJECTS) $(rdjpgcom_DEPENDENCIES) $(EXTRA_rdjpgcom_DEPENDENCIES) 
 	@rm -f rdjpgcom$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(rdjpgcom_OBJECTS) $(rdjpgcom_LDADD) $(LIBS)
+
 wrjpgcom$(EXEEXT): $(wrjpgcom_OBJECTS) $(wrjpgcom_DEPENDENCIES) $(EXTRA_wrjpgcom_DEPENDENCIES) 
 	@rm -f wrjpgcom$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(wrjpgcom_OBJECTS) $(wrjpgcom_LDADD) $(LIBS)
@@ -676,14 +710,14 @@
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c $<
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
 
 .c.obj:
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c `$(CYGPATH_W) '$<'`
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
 
 .c.lo:
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
diff --git a/README b/README
index fc5ca5d..014ad30 100644
--- a/README
+++ b/README
@@ -1,8 +1,8 @@
 The Independent JPEG Group's JPEG software
 ==========================================
 
-README for release 9 of 13-Jan-2013
-===================================
+README for release 9a of 19-Jan-2014
+====================================
 
 This distribution contains the ninth public release of the Independent JPEG
 Group's free JPEG software.  You are welcome to redistribute this software and
@@ -14,7 +14,7 @@
 and other members of the Independent JPEG Group.
 
 IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
-(also known as JPEG, together with ITU-T SG16).
+(previously known as JPEG, together with ITU-T SG16).
 
 
 DOCUMENTATION ROADMAP
@@ -115,7 +115,7 @@
 fitness for a particular purpose.  This software is provided "AS IS", and you,
 its user, assume the entire risk as to its quality and accuracy.
 
-This software is copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
+This software is copyright (C) 1991-2014, Thomas G. Lane, Guido Vollbeding.
 All Rights Reserved except as specified below.
 
 Permission is hereby granted to use, copy, modify, and distribute this
@@ -153,11 +153,11 @@
 but is also freely distributable.
 
 The IJG distribution formerly included code to read and write GIF files.
-To avoid entanglement with the Unisys LZW patent, GIF reading support has
-been removed altogether, and the GIF writer has been simplified to produce
-"uncompressed GIFs".  This technique does not use the LZW algorithm; the
-resulting GIF files are larger than usual, but are readable by all standard
-GIF decoders.
+To avoid entanglement with the Unisys LZW patent (now expired), GIF reading
+support has been removed altogether, and the GIF writer has been simplified
+to produce "uncompressed GIFs".  This technique does not use the LZW
+algorithm; the resulting GIF files are larger than usual, but are readable
+by all standard GIF decoders.
 
 We are required to state that
     "The Graphics Interchange Format(c) is the Copyright property of
@@ -252,8 +252,8 @@
 The "official" archive site for this software is www.ijg.org.
 The most recent released version can always be found there in
 directory "files".  This particular version will be archived as
-http://www.ijg.org/files/jpegsrc.v9.tar.gz, and in Windows-compatible
-"zip" archive format as http://www.ijg.org/files/jpegsr9.zip.
+http://www.ijg.org/files/jpegsrc.v9a.tar.gz, and in Windows-compatible
+"zip" archive format as http://www.ijg.org/files/jpegsr9a.zip.
 
 The JPEG FAQ (Frequently Asked Questions) article is a source of some
 general information about JPEG.
@@ -280,7 +280,7 @@
 Joint Video Team (MPEG & ITU) meeting in Geneva, Switzerland.
 
 Thank to Thomas Richter and Daniel Lee for inviting me to the
-ISO/IEC JTC1/SC29/WG1 (also known as JPEG, together with ITU-T SG16)
+ISO/IEC JTC1/SC29/WG1 (previously known as JPEG, together with ITU-T SG16)
 meeting in Berlin, Germany.
 
 Thank to John Korejwa and Massimo Ballerini for inviting me to
@@ -306,10 +306,10 @@
 FILE FORMAT WARS
 ================
 
-The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together
-with ITU-T SG16) currently promotes different formats containing the name
-"JPEG" which is misleading because these formats are incompatible with
-original DCT-based JPEG and are based on faulty technologies.
+The ISO/IEC JTC1/SC29/WG1 standards committee (previously known as JPEG,
+together with ITU-T SG16) currently promotes different formats containing
+the name "JPEG" which is misleading because these formats are incompatible
+with original DCT-based JPEG and are based on faulty technologies.
 IJG therefore does not and will not support such momentary mistakes
 (see REFERENCES).
 There exist also distributions under the name "OpenJPEG" promoting such
@@ -322,9 +322,13 @@
 (In any case, our decoder will remain capable of reading existing JPEG
 image files indefinitely.)
 
-Furthermore, the ISO committee pretends to be "responsible for the popular
-JPEG" in their public reports which is not true because they don't respond to
-actual requirements for the maintenance of the original JPEG specification.
+The ISO committee pretends to be "responsible for the popular JPEG" in their
+public reports which is not true because they don't respond to actual
+requirements for the maintenance of the original JPEG specification.
+Furthermore, the ISO committee pretends to "ensure interoperability" with
+their standards which is not true because their "standards" support only
+application-specific and proprietary use cases and contain mathematically
+incorrect code.
 
 There are currently different distributions in circulation containing the
 name "libjpeg" which is misleading because they don't have the features and
@@ -332,19 +336,46 @@
 One of those fakes is released by members of the ISO committee and just uses
 the name of libjpeg for misdirection of people, similar to the abuse of the
 name JPEG as described above, while having nothing in common with actual IJG
-libjpeg distributions.
-The other one claims to be a "derivative" or "fork" of the original libjpeg
-and violates the license conditions as described under LEGAL ISSUES above.
-We have no sympathy for the release of misleading and illegal distributions
-derived from obsolete code bases.
+libjpeg distributions and containing mathematically incorrect code.
+The other one claims to be a "derivative" or "fork" of the original libjpeg,
+but violates the license conditions as described under LEGAL ISSUES above
+and violates basic C programming properties.
+We have no sympathy for the release of misleading, incorrect and illegal
+distributions derived from obsolete code bases.
 Don't use an obsolete code base!
 
+According to the UCC (Uniform Commercial Code) law, IJG has the lawful and
+legal right to foreclose on certain standardization bodies and other
+institutions or corporations that knowingly perform substantial and
+systematic deceptive acts and practices, fraud, theft, and damaging of the
+value of the people of this planet without their knowing, willing and
+intentional consent.
+The titles, ownership, and rights of these institutions and all their assets
+are now duly secured and held in trust for the free people of this planet.
+People of the planet, on every country, may have a financial interest in
+the assets of these former principals, agents, and beneficiaries of the
+foreclosed institutions and corporations.
+IJG asserts what is: that each man, woman, and child has unalienable value
+and rights granted and deposited in them by the Creator and not any one of
+the people is subordinate to any artificial principality, corporate fiction
+or the special interest of another without their appropriate knowing,
+willing and intentional consent made by contract or accommodation agreement.
+IJG expresses that which already was.
+The people have already determined and demanded that public administration
+entities, national governments, and their supporting judicial systems must
+be fully transparent, accountable, and liable.
+IJG has secured the value for all concerned free people of the planet.
+
+A partial list of foreclosed institutions and corporations ("Hall of Shame")
+is currently prepared and will be published later.
+
 
 TO DO
 =====
 
 Version 9 is the second release of a new generation JPEG standard
-to overcome the limitations of the original JPEG specification.
+to overcome the limitations of the original JPEG specification,
+and is the first true source reference JPEG codec.
 More features are being prepared for coming releases...
 
 Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
diff --git a/aclocal.m4 b/aclocal.m4
index 9f63e17..9e947ab 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.13.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
 
-# Copyright (C) 1996-2012 Free Software Foundation, Inc.
+# Copyright (C) 1996-2013 Free Software Foundation, Inc.
 
 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
@@ -8619,10 +8619,10 @@
 # generated from the m4 files accompanying Automake X.Y.
 # (This private macro should not be called outside this file.)
 AC_DEFUN([AM_AUTOMAKE_VERSION],
-[am__api_version='1.13'
+[am__api_version='1.14'
 dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
 dnl require some minimum version.  Point them to the right macro.
-m4_if([$1], [1.13.1], [],
+m4_if([$1], [1.14.1], [],
       [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
 ])
 
@@ -8638,7 +8638,7 @@
 # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
 # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
 AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.13.1])dnl
+[AM_AUTOMAKE_VERSION([1.14.1])dnl
 m4_ifndef([AC_AUTOCONF_VERSION],
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
 _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
@@ -8663,7 +8663,8 @@
 : ${AR=ar}
 
 AC_CACHE_CHECK([the archiver ($AR) interface], [am_cv_ar_interface],
-  [am_cv_ar_interface=ar
+  [AC_LANG_PUSH([C])
+   am_cv_ar_interface=ar
    AC_COMPILE_IFELSE([AC_LANG_SOURCE([[int some_variable = 0;]])],
      [am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&AS_MESSAGE_LOG_FD'
       AC_TRY_EVAL([am_ar_try])
@@ -8680,7 +8681,7 @@
       fi
       rm -f conftest.lib libconftest.a
      ])
-   ])
+   AC_LANG_POP([C])])
 
 case $am_cv_ar_interface in
 ar)
@@ -9019,7 +9020,7 @@
     DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
     test -z "$DEPDIR" && continue
     am__include=`sed -n 's/^am__include = //p' < "$mf"`
-    test -z "am__include" && continue
+    test -z "$am__include" && continue
     am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
     # Find all dependency output files, they are included files with
     # $(DEPDIR) in their names.  We invoke sed twice because it is the
@@ -9064,6 +9065,12 @@
 # This macro actually does too much.  Some checks are only needed if
 # your package does certain things.  But this isn't really a big deal.
 
+dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
+m4_define([AC_PROG_CC],
+m4_defn([AC_PROG_CC])
+[_AM_PROG_CC_C_O
+])
+
 # AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
 # AM_INIT_AUTOMAKE([OPTIONS])
 # -----------------------------------------------
@@ -9172,7 +9179,48 @@
 AC_CONFIG_COMMANDS_PRE(dnl
 [m4_provide_if([_AM_COMPILER_EXEEXT],
   [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
-])
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <http://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
+  fi
+fi])
 
 dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
 dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
@@ -9180,7 +9228,6 @@
 m4_define([_AC_COMPILER_EXEEXT],
 m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
 
-
 # When config.status generates a header, we must update the stamp-h file.
 # This file resides in the same directory as the config header
 # that is generated.  The stamp files are numbered to have different names.
@@ -9398,6 +9445,70 @@
 AC_DEFUN([_AM_IF_OPTION],
 [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
 
+# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_CC_C_O
+# ---------------
+# Like AC_PROG_CC_C_O, but changed for automake.  We rewrite AC_PROG_CC
+# to automatically call this.
+AC_DEFUN([_AM_PROG_CC_C_O],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([compile])dnl
+AC_LANG_PUSH([C])dnl
+AC_CACHE_CHECK(
+  [whether $CC understands -c and -o together],
+  [am_cv_prog_cc_c_o],
+  [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i])
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+AC_LANG_POP([C])])
+
+# For backward compatibility.
+AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
+
+# Copyright (C) 2001-2013 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_RUN_LOG(COMMAND)
+# -------------------
+# Run COMMAND, save the exit status in ac_status, and log it.
+# (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
+AC_DEFUN([AM_RUN_LOG],
+[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
+   ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   (exit $ac_status); }])
+
 # Check to make sure that the build environment is sane.    -*- Autoconf -*-
 
 # Copyright (C) 1996-2013 Free Software Foundation, Inc.
@@ -9607,76 +9718,114 @@
 # Substitute a variable $(am__untar) that extract such
 # a tarball read from stdin.
 #     $(am__untar) < result.tar
+#
 AC_DEFUN([_AM_PROG_TAR],
 [# Always define AMTAR for backward compatibility.  Yes, it's still used
 # in the wild :-(  We should find a proper way to deprecate it ...
 AC_SUBST([AMTAR], ['$${TAR-tar}'])
-m4_if([$1], [v7],
-     [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
-     [m4_case([$1], [ustar],, [pax],,
-              [m4_fatal([Unknown tar format])])
-AC_MSG_CHECKING([how to create a $1 tar archive])
-# Loop over all known methods to create a tar archive until one works.
+
+# We'll loop over all known methods to create a tar archive until one works.
 _am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
-_am_tools=${am_cv_prog_tar_$1-$_am_tools}
-# Do not fold the above two line into one, because Tru64 sh and
-# Solaris sh will not grok spaces in the rhs of '-'.
-for _am_tool in $_am_tools
-do
-  case $_am_tool in
-  gnutar)
-    for _am_tar in tar gnutar gtar;
-    do
-      AM_RUN_LOG([$_am_tar --version]) && break
-    done
-    am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
-    am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
-    am__untar="$_am_tar -xf -"
-    ;;
-  plaintar)
-    # Must skip GNU tar: if it does not support --format= it doesn't create
-    # ustar tarball either.
-    (tar --version) >/dev/null 2>&1 && continue
-    am__tar='tar chf - "$$tardir"'
-    am__tar_='tar chf - "$tardir"'
-    am__untar='tar xf -'
-    ;;
-  pax)
-    am__tar='pax -L -x $1 -w "$$tardir"'
-    am__tar_='pax -L -x $1 -w "$tardir"'
-    am__untar='pax -r'
-    ;;
-  cpio)
-    am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
-    am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
-    am__untar='cpio -i -H $1 -d'
-    ;;
-  none)
-    am__tar=false
-    am__tar_=false
-    am__untar=false
-    ;;
-  esac
 
-  # If the value was cached, stop now.  We just wanted to have am__tar
-  # and am__untar set.
-  test -n "${am_cv_prog_tar_$1}" && break
+m4_if([$1], [v7],
+  [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
 
-  # tar/untar a dummy directory, and stop if the command works
+  [m4_case([$1],
+    [ustar],
+     [# The POSIX 1988 'ustar' format is defined with fixed-size fields.
+      # There is notably a 21 bits limit for the UID and the GID.  In fact,
+      # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
+      # and bug#13588).
+      am_max_uid=2097151 # 2^21 - 1
+      am_max_gid=$am_max_uid
+      # The $UID and $GID variables are not portable, so we need to resort
+      # to the POSIX-mandated id(1) utility.  Errors in the 'id' calls
+      # below are definitely unexpected, so allow the users to see them
+      # (that is, avoid stderr redirection).
+      am_uid=`id -u || echo unknown`
+      am_gid=`id -g || echo unknown`
+      AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format])
+      if test $am_uid -le $am_max_uid; then
+         AC_MSG_RESULT([yes])
+      else
+         AC_MSG_RESULT([no])
+         _am_tools=none
+      fi
+      AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format])
+      if test $am_gid -le $am_max_gid; then
+         AC_MSG_RESULT([yes])
+      else
+        AC_MSG_RESULT([no])
+        _am_tools=none
+      fi],
+
+  [pax],
+    [],
+
+  [m4_fatal([Unknown tar format])])
+
+  AC_MSG_CHECKING([how to create a $1 tar archive])
+
+  # Go ahead even if we have the value already cached.  We do so because we
+  # need to set the values for the 'am__tar' and 'am__untar' variables.
+  _am_tools=${am_cv_prog_tar_$1-$_am_tools}
+
+  for _am_tool in $_am_tools; do
+    case $_am_tool in
+    gnutar)
+      for _am_tar in tar gnutar gtar; do
+        AM_RUN_LOG([$_am_tar --version]) && break
+      done
+      am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
+      am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
+      am__untar="$_am_tar -xf -"
+      ;;
+    plaintar)
+      # Must skip GNU tar: if it does not support --format= it doesn't create
+      # ustar tarball either.
+      (tar --version) >/dev/null 2>&1 && continue
+      am__tar='tar chf - "$$tardir"'
+      am__tar_='tar chf - "$tardir"'
+      am__untar='tar xf -'
+      ;;
+    pax)
+      am__tar='pax -L -x $1 -w "$$tardir"'
+      am__tar_='pax -L -x $1 -w "$tardir"'
+      am__untar='pax -r'
+      ;;
+    cpio)
+      am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
+      am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
+      am__untar='cpio -i -H $1 -d'
+      ;;
+    none)
+      am__tar=false
+      am__tar_=false
+      am__untar=false
+      ;;
+    esac
+
+    # If the value was cached, stop now.  We just wanted to have am__tar
+    # and am__untar set.
+    test -n "${am_cv_prog_tar_$1}" && break
+
+    # tar/untar a dummy directory, and stop if the command works.
+    rm -rf conftest.dir
+    mkdir conftest.dir
+    echo GrepMe > conftest.dir/file
+    AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
+    rm -rf conftest.dir
+    if test -s conftest.tar; then
+      AM_RUN_LOG([$am__untar <conftest.tar])
+      AM_RUN_LOG([cat conftest.dir/file])
+      grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
+    fi
+  done
   rm -rf conftest.dir
-  mkdir conftest.dir
-  echo GrepMe > conftest.dir/file
-  AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
-  rm -rf conftest.dir
-  if test -s conftest.tar; then
-    AM_RUN_LOG([$am__untar <conftest.tar])
-    grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
-  fi
-done
-rm -rf conftest.dir
 
-AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
-AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+  AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
+  AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+
 AC_SUBST([am__tar])
 AC_SUBST([am__untar])
 ]) # _AM_PROG_TAR
diff --git a/change.log b/change.log
index a1d94ff..97cde1d 100644
--- a/change.log
+++ b/change.log
@@ -1,6 +1,33 @@
 CHANGE LOG for Independent JPEG Group's JPEG software
 
 
+Version 9a  19-Jan-2014
+-----------------------
+
+Add support for wide gamut color spaces (JFIF version 2).
+Improve clarity and accuracy in color conversion modules.
+Note: Requires rebuild of test images.
+
+Extend the bit depth support to all values from 8 to 12
+(BITS_IN_JSAMPLE configuration option in jmorecfg.h).
+jpegtran now supports N bits sample data precision with all N from 8 to 12
+in a single instance.  Thank to Roland Fassauer for inspiration.
+
+Try to resolve issues with new boolean type definition.
+Thank also to v4hn for suggestion.
+
+Enable option to use default Huffman tables for lossless compression
+(for hardware solution), and in this case improve lossless RGB compression
+with reversible color transform.  Thank to Benny Alexandar for hint.
+
+Extend the entropy decoding structure, so that extraneous bytes between
+compressed scan data and following marker can be reported correctly.
+Thank to Nigel Tao for hint.
+
+Add jpegtran -wipe option and extension for -crop.
+Thank to Andrew Senior, David Clunie, and Josef Schmid for suggestion.
+
+
 Version 9  13-Jan-2013
 ----------------------
 
diff --git a/cjpeg.1 b/cjpeg.1
index e0b8d43..8684331 100644
--- a/cjpeg.1
+++ b/cjpeg.1
@@ -1,4 +1,4 @@
-.TH CJPEG 1 "4 May 2012"
+.TH CJPEG 1 "23 November 2013"
 .SH NAME
 cjpeg \- compress an image file to a JPEG file
 .SH SYNOPSIS
@@ -199,6 +199,26 @@
 widely implemented, so many decoders will be unable to view
 a reversible color transformed JPEG file at all.
 .TP
+.B \-bgycc
+Create big gamut YCC JPEG file.
+In this type of encoding the color difference components are quantized
+further by a factor of 2 compared to the normal Cb/Cr values, thus creating
+space to allow larger color values with higher saturation than the normal
+gamut limits to be encoded.  In order to compensate for the loss of color
+fidelity compared to a normal YCC encoded file, the color quantization
+tables can be adjusted accordingly.  For example,
+.B cjpeg \-bgycc \-quality
+80,90 will give similar results as
+.B cjpeg \-quality
+80.
+.B Caution:
+For correct decompression a decoder with big gamut YCC support (JFIF
+version 2) is required.  An old decoder may or may not display a big
+gamut YCC encoded JPEG file, depending on JFIF version check and
+corresponding warning/error configuration.  In case of a granted
+decompression the old decoder will display the image with half
+saturated colors.
+.TP
 .B \-dct int
 Use integer DCT method (default).
 .TP
@@ -355,7 +375,8 @@
 .SH AUTHOR
 Independent JPEG Group
 .SH BUGS
-GIF input files are no longer supported, to avoid the Unisys LZW patent.
+GIF input files are no longer supported, to avoid the Unisys LZW patent
+(now expired).
 (Conversion of GIF files to JPEG is usually a bad idea anyway.)
 .PP
 Not all variants of BMP and Targa file formats are supported.
diff --git a/cjpeg.c b/cjpeg.c
index d05cdd0..3cb07fa 100644
--- a/cjpeg.c
+++ b/cjpeg.c
@@ -174,6 +174,7 @@
 #endif
 #if JPEG_LIB_VERSION_MAJOR >= 9
   fprintf(stderr, "  -rgb1          Create RGB JPEG file with reversible color transform\n");
+  fprintf(stderr, "  -bgycc         Create big gamut YCC JPEG file\n");
 #endif
 #ifdef DCT_ISLOW_SUPPORTED
   fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
@@ -323,6 +324,17 @@
 #endif
       jpeg_set_colorspace(cinfo, JCS_RGB);
 
+    } else if (keymatch(arg, "bgycc", 5)) {
+      /* Force a big gamut YCC JPEG file to be generated. */
+#if JPEG_LIB_VERSION_MAJOR >= 9 && \
+      (JPEG_LIB_VERSION_MAJOR > 9 || JPEG_LIB_VERSION_MINOR >= 1)
+      jpeg_set_colorspace(cinfo, JCS_BG_YCC);
+#else
+      fprintf(stderr, "%s: sorry, BG_YCC colorspace not supported\n",
+	      progname);
+      exit(EXIT_FAILURE);
+#endif
+
     } else if (keymatch(arg, "maxmemory", 3)) {
       /* Maximum memory in Kb (or Mb with 'm'). */
       long lval;
diff --git a/compile b/compile
new file mode 100755
index 0000000..531136b
--- /dev/null
+++ b/compile
@@ -0,0 +1,347 @@
+#! /bin/sh
+# Wrapper for compilers which do not understand '-c -o'.
+
+scriptversion=2012-10-14.11; # UTC
+
+# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+nl='
+'
+
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent tools from complaining about whitespace usage.
+IFS=" ""	$nl"
+
+file_conv=
+
+# func_file_conv build_file lazy
+# Convert a $build file to $host form and store it in $file
+# Currently only supports Windows hosts. If the determined conversion
+# type is listed in (the comma separated) LAZY, no conversion will
+# take place.
+func_file_conv ()
+{
+  file=$1
+  case $file in
+    / | /[!/]*) # absolute file, and not a UNC file
+      if test -z "$file_conv"; then
+	# lazily determine how to convert abs files
+	case `uname -s` in
+	  MINGW*)
+	    file_conv=mingw
+	    ;;
+	  CYGWIN*)
+	    file_conv=cygwin
+	    ;;
+	  *)
+	    file_conv=wine
+	    ;;
+	esac
+      fi
+      case $file_conv/,$2, in
+	*,$file_conv,*)
+	  ;;
+	mingw/*)
+	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
+	  ;;
+	cygwin/*)
+	  file=`cygpath -m "$file" || echo "$file"`
+	  ;;
+	wine/*)
+	  file=`winepath -w "$file" || echo "$file"`
+	  ;;
+      esac
+      ;;
+  esac
+}
+
+# func_cl_dashL linkdir
+# Make cl look for libraries in LINKDIR
+func_cl_dashL ()
+{
+  func_file_conv "$1"
+  if test -z "$lib_path"; then
+    lib_path=$file
+  else
+    lib_path="$lib_path;$file"
+  fi
+  linker_opts="$linker_opts -LIBPATH:$file"
+}
+
+# func_cl_dashl library
+# Do a library search-path lookup for cl
+func_cl_dashl ()
+{
+  lib=$1
+  found=no
+  save_IFS=$IFS
+  IFS=';'
+  for dir in $lib_path $LIB
+  do
+    IFS=$save_IFS
+    if $shared && test -f "$dir/$lib.dll.lib"; then
+      found=yes
+      lib=$dir/$lib.dll.lib
+      break
+    fi
+    if test -f "$dir/$lib.lib"; then
+      found=yes
+      lib=$dir/$lib.lib
+      break
+    fi
+    if test -f "$dir/lib$lib.a"; then
+      found=yes
+      lib=$dir/lib$lib.a
+      break
+    fi
+  done
+  IFS=$save_IFS
+
+  if test "$found" != yes; then
+    lib=$lib.lib
+  fi
+}
+
+# func_cl_wrapper cl arg...
+# Adjust compile command to suit cl
+func_cl_wrapper ()
+{
+  # Assume a capable shell
+  lib_path=
+  shared=:
+  linker_opts=
+  for arg
+  do
+    if test -n "$eat"; then
+      eat=
+    else
+      case $1 in
+	-o)
+	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
+	  eat=1
+	  case $2 in
+	    *.o | *.[oO][bB][jJ])
+	      func_file_conv "$2"
+	      set x "$@" -Fo"$file"
+	      shift
+	      ;;
+	    *)
+	      func_file_conv "$2"
+	      set x "$@" -Fe"$file"
+	      shift
+	      ;;
+	  esac
+	  ;;
+	-I)
+	  eat=1
+	  func_file_conv "$2" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-I*)
+	  func_file_conv "${1#-I}" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-l)
+	  eat=1
+	  func_cl_dashl "$2"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-l*)
+	  func_cl_dashl "${1#-l}"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-L)
+	  eat=1
+	  func_cl_dashL "$2"
+	  ;;
+	-L*)
+	  func_cl_dashL "${1#-L}"
+	  ;;
+	-static)
+	  shared=false
+	  ;;
+	-Wl,*)
+	  arg=${1#-Wl,}
+	  save_ifs="$IFS"; IFS=','
+	  for flag in $arg; do
+	    IFS="$save_ifs"
+	    linker_opts="$linker_opts $flag"
+	  done
+	  IFS="$save_ifs"
+	  ;;
+	-Xlinker)
+	  eat=1
+	  linker_opts="$linker_opts $2"
+	  ;;
+	-*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
+	  func_file_conv "$1"
+	  set x "$@" -Tp"$file"
+	  shift
+	  ;;
+	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
+	  func_file_conv "$1" mingw
+	  set x "$@" "$file"
+	  shift
+	  ;;
+	*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+      esac
+    fi
+    shift
+  done
+  if test -n "$linker_opts"; then
+    linker_opts="-link$linker_opts"
+  fi
+  exec "$@" $linker_opts
+  exit 1
+}
+
+eat=
+
+case $1 in
+  '')
+     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: compile [--help] [--version] PROGRAM [ARGS]
+
+Wrapper for compilers which do not understand '-c -o'.
+Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
+arguments, and rename the output as expected.
+
+If you are trying to build a whole package this is not the
+right script to run: please start by reading the file 'INSTALL'.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "compile $scriptversion"
+    exit $?
+    ;;
+  cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
+    func_cl_wrapper "$@"      # Doesn't return...
+    ;;
+esac
+
+ofile=
+cfile=
+
+for arg
+do
+  if test -n "$eat"; then
+    eat=
+  else
+    case $1 in
+      -o)
+	# configure might choose to run compile as 'compile cc -o foo foo.c'.
+	# So we strip '-o arg' only if arg is an object.
+	eat=1
+	case $2 in
+	  *.o | *.obj)
+	    ofile=$2
+	    ;;
+	  *)
+	    set x "$@" -o "$2"
+	    shift
+	    ;;
+	esac
+	;;
+      *.c)
+	cfile=$1
+	set x "$@" "$1"
+	shift
+	;;
+      *)
+	set x "$@" "$1"
+	shift
+	;;
+    esac
+  fi
+  shift
+done
+
+if test -z "$ofile" || test -z "$cfile"; then
+  # If no '-o' option was seen then we might have been invoked from a
+  # pattern rule where we don't need one.  That is ok -- this is a
+  # normal compilation that the losing compiler can handle.  If no
+  # '.c' file was seen then we are probably linking.  That is also
+  # ok.
+  exec "$@"
+fi
+
+# Name of file we expect compiler to create.
+cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
+
+# Create the lock directory.
+# Note: use '[/\\:.-]' here to ensure that we don't use the same name
+# that we are using for the .o file.  Also, base the name on the expected
+# object file name, since that is what matters with a parallel build.
+lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
+while true; do
+  if mkdir "$lockdir" >/dev/null 2>&1; then
+    break
+  fi
+  sleep 1
+done
+# FIXME: race condition here if user kills between mkdir and trap.
+trap "rmdir '$lockdir'; exit 1" 1 2 15
+
+# Run the compile.
+"$@"
+ret=$?
+
+if test -f "$cofile"; then
+  test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
+elif test -f "${cofile}bj"; then
+  test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
+fi
+
+rmdir "$lockdir"
+exit $ret
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/config.guess b/config.guess
index 1804e9f..9afd676 100755
--- a/config.guess
+++ b/config.guess
@@ -1,10 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-#   2011, 2012, 2013 Free Software Foundation, Inc.
+#   Copyright 1992-2013 Free Software Foundation, Inc.
 
-timestamp='2012-12-29'
+timestamp='2013-11-29'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -26,7 +24,7 @@
 # program.  This Exception is an additional permission under section 7
 # of the GNU General Public License, version 3 ("GPLv3").
 #
-# Originally written by Per Bothner. 
+# Originally written by Per Bothner.
 #
 # You can get the latest version of this script from:
 # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
@@ -52,9 +50,7 @@
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
-2012, 2013 Free Software Foundation, Inc.
+Copyright 1992-2013 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -136,6 +132,27 @@
 UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
 UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 
+case "${UNAME_SYSTEM}" in
+Linux|GNU|GNU/*)
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	LIBC=gnu
+
+	eval $set_cc_for_build
+	cat <<-EOF > $dummy.c
+	#include <features.h>
+	#if defined(__UCLIBC__)
+	LIBC=uclibc
+	#elif defined(__dietlibc__)
+	LIBC=dietlibc
+	#else
+	LIBC=gnu
+	#endif
+	EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+	;;
+esac
+
 # Note: order is significant - the case branches are not exclusive.
 
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
@@ -857,21 +874,21 @@
 	exit ;;
     *:GNU:*:*)
 	# the GNU system
-	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
 	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
 	exit ;;
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
     aarch64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     alpha:Linux:*:*)
 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
@@ -884,59 +901,54 @@
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    arc:Linux:*:* | arceb:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     arm*:Linux:*:*)
 	eval $set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo ${UNAME_MACHINE}-unknown-linux-gnu
+	    echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	else
 	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 		| grep -q __ARM_PCS_VFP
 	    then
-		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
 	    else
-		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
 	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     cris:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
 	exit ;;
     crisv32:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
 	exit ;;
     frv:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     hexagon:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     i*86:Linux:*:*)
-	LIBC=gnu
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#ifdef __dietlibc__
-	LIBC=dietlibc
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
-	echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
+	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
 	exit ;;
     ia64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     m32r*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     m68*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     mips:Linux:*:* | mips64:Linux:*:*)
 	eval $set_cc_for_build
@@ -955,54 +967,63 @@
 	#endif
 EOF
 	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
 	;;
+    or1k:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     or32:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-gnu
+	echo sparc-unknown-linux-${LIBC}
 	exit ;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-gnu
+	echo hppa64-unknown-linux-${LIBC}
 	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
 	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
-	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
-	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
-	  *)    echo hppa-unknown-linux-gnu ;;
+	  PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
+	  PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
+	  *)    echo hppa-unknown-linux-${LIBC} ;;
 	esac
 	exit ;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-gnu
+	echo powerpc64-unknown-linux-${LIBC}
 	exit ;;
     ppc:Linux:*:*)
-	echo powerpc-unknown-linux-gnu
+	echo powerpc-unknown-linux-${LIBC}
+	exit ;;
+    ppc64le:Linux:*:*)
+	echo powerpc64le-unknown-linux-${LIBC}
+	exit ;;
+    ppcle:Linux:*:*)
+	echo powerpcle-unknown-linux-${LIBC}
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo ${UNAME_MACHINE}-ibm-linux
+	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
 	exit ;;
     sh64*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     sh*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     tile*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     vax:Linux:*:*)
-	echo ${UNAME_MACHINE}-dec-linux-gnu
+	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
 	exit ;;
     x86_64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     xtensa*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -1235,19 +1256,31 @@
 	exit ;;
     *:Darwin:*:*)
 	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
-	case $UNAME_PROCESSOR in
-	    i386)
-		eval $set_cc_for_build
-		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
-		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
-		      grep IS_64BIT_ARCH >/dev/null
-		  then
-		      UNAME_PROCESSOR="x86_64"
-		  fi
-		fi ;;
-	    unknown) UNAME_PROCESSOR=powerpc ;;
-	esac
+	eval $set_cc_for_build
+	if test "$UNAME_PROCESSOR" = unknown ; then
+	    UNAME_PROCESSOR=powerpc
+	fi
+	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
+	    if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		    (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		    grep IS_64BIT_ARCH >/dev/null
+		then
+		    case $UNAME_PROCESSOR in
+			i386) UNAME_PROCESSOR=x86_64 ;;
+			powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		    esac
+		fi
+	    fi
+	elif test "$UNAME_PROCESSOR" = i386 ; then
+	    # Avoid executing cc on OS X 10.9, as it ships with a stub
+	    # that puts up a graphical alert prompting to install
+	    # developer tools.  Any system running Mac OS X 10.7 or
+	    # later (Darwin 11 and later) is required to have a 64-bit
+	    # processor. This is not true of the ARM version of Darwin
+	    # that Apple uses in portable devices.
+	    UNAME_PROCESSOR=x86_64
+	fi
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
 	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
diff --git a/config.sub b/config.sub
index 802a224..61cb4bc 100755
--- a/config.sub
+++ b/config.sub
@@ -1,10 +1,8 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-#   2011, 2012, 2013 Free Software Foundation, Inc.
+#   Copyright 1992-2013 Free Software Foundation, Inc.
 
-timestamp='2012-12-29'
+timestamp='2013-10-01'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -70,9 +68,7 @@
 version="\
 GNU config.sub ($timestamp)
 
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
-2012, 2013 Free Software Foundation, Inc.
+Copyright 1992-2013 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -256,12 +252,12 @@
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
-	| arc \
+	| arc | arceb \
 	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
 	| avr | avr32 \
 	| be32 | be64 \
 	| bfin \
-	| c4x | clipper \
+	| c4x | c8051 | clipper \
 	| d10v | d30v | dlx | dsp16xx \
 	| epiphany \
 	| fido | fr30 | frv \
@@ -269,6 +265,7 @@
 	| hexagon \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
+	| k1om \
 	| le32 | le64 \
 	| lm32 \
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
@@ -290,16 +287,17 @@
 	| mipsisa64r2 | mipsisa64r2el \
 	| mipsisa64sb1 | mipsisa64sb1el \
 	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipsr5900 | mipsr5900el \
 	| mipstx39 | mipstx39el \
 	| mn10200 | mn10300 \
 	| moxie \
 	| mt \
 	| msp430 \
 	| nds32 | nds32le | nds32be \
-	| nios | nios2 \
+	| nios | nios2 | nios2eb | nios2el \
 	| ns16k | ns32k \
 	| open8 \
-	| or32 \
+	| or1k | or32 \
 	| pdp10 | pdp11 | pj | pjl \
 	| powerpc | powerpc64 | powerpc64le | powerpcle \
 	| pyramid \
@@ -327,7 +325,7 @@
 	c6x)
 		basic_machine=tic6x-unknown
 		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
 		;;
@@ -369,13 +367,13 @@
 	| aarch64-* | aarch64_be-* \
 	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
-	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
 	| avr-* | avr32-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
 	| c[123]* | c30-* | [cjt]90-* | c4x-* \
-	| clipper-* | craynv-* | cydra-* \
+	| c8051-* | clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
 	| elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
@@ -384,6 +382,7 @@
 	| hexagon-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
+	| k1om-* \
 	| le32-* | le64-* \
 	| lm32-* \
 	| m32c-* | m32r-* | m32rle-* \
@@ -407,12 +406,13 @@
 	| mipsisa64r2-* | mipsisa64r2el-* \
 	| mipsisa64sb1-* | mipsisa64sb1el-* \
 	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipsr5900-* | mipsr5900el-* \
 	| mipstx39-* | mipstx39el-* \
 	| mmix-* \
 	| mt-* \
 	| msp430-* \
 	| nds32-* | nds32le-* | nds32be-* \
-	| nios-* | nios2-* \
+	| nios-* | nios2-* | nios2eb-* | nios2el-* \
 	| none-* | np1-* | ns16k-* | ns32k-* \
 	| open8-* \
 	| orion-* \
@@ -796,7 +796,7 @@
 		os=-mingw64
 		;;
 	mingw32)
-		basic_machine=i386-pc
+		basic_machine=i686-pc
 		os=-mingw32
 		;;
 	mingw32ce)
@@ -832,7 +832,7 @@
 		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
 		;;
 	msys)
-		basic_machine=i386-pc
+		basic_machine=i686-pc
 		os=-msys
 		;;
 	mvs)
@@ -1354,7 +1354,7 @@
 	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
 	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
 	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
-	      | -sym* | -kopensolaris* \
+	      | -sym* | -kopensolaris* | -plan9* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
 	      | -aos* | -aros* \
 	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
@@ -1500,9 +1500,6 @@
 	-aros*)
 		os=-aros
 		;;
-	-kaos*)
-		os=-kaos
-		;;
 	-zvmoe)
 		os=-zvmoe
 		;;
@@ -1551,6 +1548,9 @@
 	c4x-* | tic4x-*)
 		os=-coff
 		;;
+	c8051-*)
+		os=-elf
+		;;
 	hexagon-*)
 		os=-elf
 		;;
@@ -1594,6 +1594,9 @@
 	mips*-*)
 		os=-elf
 		;;
+	or1k-*)
+		os=-elf
+		;;
 	or32-*)
 		os=-coff
 		;;
diff --git a/configure b/configure
index c7b1387..77a5df3 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for libjpeg 9.0.0.
+# Generated by GNU Autoconf 2.69 for libjpeg 9.1.0.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -587,8 +587,8 @@
 # Identity of this package.
 PACKAGE_NAME='libjpeg'
 PACKAGE_TARNAME='libjpeg'
-PACKAGE_VERSION='9.0.0'
-PACKAGE_STRING='libjpeg 9.0.0'
+PACKAGE_VERSION='9.1.0'
+PACKAGE_STRING='libjpeg 9.1.0'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1321,7 +1321,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures libjpeg 9.0.0 to adapt to many kinds of systems.
+\`configure' configures libjpeg 9.1.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1392,7 +1392,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of libjpeg 9.0.0:";;
+     short | recursive ) echo "Configuration of libjpeg 9.1.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1504,7 +1504,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-libjpeg configure 9.0.0
+libjpeg configure 9.1.0
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1869,7 +1869,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by libjpeg $as_me 9.0.0, which was
+It was created by libjpeg $as_me 9.1.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2370,7 +2370,7 @@
 
 # Initialize Automake
 # Don't require all the GNU mandated files
-am__api_version='1.13'
+am__api_version='1.14'
 
 # Find a good install program.  We prefer a C program (faster),
 # so one script is as good as another.  But avoid the broken or
@@ -2856,7 +2856,7 @@
 
 # Define the identity of the package.
  PACKAGE='libjpeg'
- VERSION='9.0.0'
+ VERSION='9.1.0'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -2896,6 +2896,10 @@
 # in the wild :-(  We should find a proper way to deprecate it ...
 AMTAR='$${TAR-tar}'
 
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar  pax cpio none'
+
 am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
 
 
@@ -2903,6 +2907,48 @@
 
 
 
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <http://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
+  fi
+fi
+
 # Make --enable-silent-rules the default.
 # To get verbose build output you may configure
 # with --disable-silent-rules or use "make V=1".
@@ -3762,6 +3808,65 @@
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5
+$as_echo_n "checking whether $CC understands -c and -o together... " >&6; }
+if ${am_cv_prog_cc_c_o+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5
+   ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
+$as_echo "$am_cv_prog_cc_c_o" >&6; }
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
 DEPDIR="${am__leading_dot}deps"
 
 ac_config_commands="$ac_config_commands depfiles"
@@ -4534,7 +4639,13 @@
 if ${am_cv_ar_interface+:} false; then :
   $as_echo_n "(cached) " >&6
 else
-  am_cv_ar_interface=ar
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+   am_cv_ar_interface=ar
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 int some_variable = 0;
@@ -4565,6 +4676,11 @@
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5
@@ -13507,7 +13623,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by libjpeg $as_me 9.0.0, which was
+This file was extended by libjpeg $as_me 9.1.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -13573,7 +13689,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-libjpeg config.status 9.0.0
+libjpeg config.status 9.1.0
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
@@ -14632,7 +14748,7 @@
     DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
     test -z "$DEPDIR" && continue
     am__include=`sed -n 's/^am__include = //p' < "$mf"`
-    test -z "am__include" && continue
+    test -z "$am__include" && continue
     am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
     # Find all dependency output files, they are included files with
     # $(DEPDIR) in their names.  We invoke sed twice because it is the
diff --git a/configure.ac b/configure.ac
index e0cc36b..934d07e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,7 +5,7 @@
 # Configure script for IJG libjpeg
 #
 
-AC_INIT([libjpeg], [9.0.0])
+AC_INIT([libjpeg], [9.1.0])
 
 # Directory where autotools helper scripts lives.
 AC_CONFIG_AUX_DIR([.])
diff --git a/depcomp b/depcomp
index 06b0882..4ebd5b3 100755
--- a/depcomp
+++ b/depcomp
@@ -1,7 +1,7 @@
 #! /bin/sh
 # depcomp - compile a program generating dependencies as side-effects
 
-scriptversion=2012-10-18.11; # UTC
+scriptversion=2013-05-30.07; # UTC
 
 # Copyright (C) 1999-2013 Free Software Foundation, Inc.
 
@@ -552,6 +552,7 @@
   G
   p
 }' >> "$depfile"
+  echo >> "$depfile" # make sure the fragment doesn't end with a backslash
   rm -f "$tmpdepfile"
   ;;
 
diff --git a/djpeg.1 b/djpeg.1
index f3722d1..67ee5cb 100644
--- a/djpeg.1
+++ b/djpeg.1
@@ -1,4 +1,4 @@
-.TH DJPEG 1 "3 October 2009"
+.TH DJPEG 1 "23 November 2013"
 .SH NAME
 djpeg \- decompress a JPEG file to an image file
 .SH SYNOPSIS
@@ -246,7 +246,7 @@
 .SH AUTHOR
 Independent JPEG Group
 .SH BUGS
-To avoid the Unisys LZW patent,
+To avoid the Unisys LZW patent (now expired),
 .B djpeg
 produces uncompressed GIF files.  These are larger than they should be, but
 are readable by standard GIF decoders.
diff --git a/filelist.txt b/filelist.txt
index 86633bc..adfd14f 100644
--- a/filelist.txt
+++ b/filelist.txt
@@ -1,6 +1,6 @@
 IJG JPEG LIBRARY:  FILE LIST
 
-Copyright (C) 1994-2012, Thomas G. Lane, Guido Vollbeding.
+Copyright (C) 1994-2013, Thomas G. Lane, Guido Vollbeding.
 This file is part of the Independent JPEG Group's software.
 For conditions of distribution and use, see the accompanying README file.
 
@@ -198,6 +198,7 @@
 depcomp
 missing
 ar-lib
+compile
 install-sh	Install shell script for those Unix systems lacking one.
 Makefile.in	Makefile input for configure.
 Makefile.am	Source file for use with Automake to generate Makefile.in.
diff --git a/install.txt b/install.txt
index 7ca92cb..0405306 100644
--- a/install.txt
+++ b/install.txt
@@ -1,6 +1,6 @@
 INSTALLATION INSTRUCTIONS for the Independent JPEG Group's JPEG software
 
-Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+Copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
 This file is part of the Independent JPEG Group's software.
 For conditions of distribution and use, see the accompanying README file.
 
@@ -418,54 +418,58 @@
 	    the directory containing the URT "librle.a" file (typically the
 	    "lib" subdirectory of the URT distribution).
 
-Support for 12-bit-deep pixel data:
+Support for 9-bit to 12-bit deep pixel data:
 
-The JPEG standard allows either 8-bit or 12-bit data precision.  (For color,
-this means 8 or 12 bits per channel, of course.)  If you need to work with
-deeper than 8-bit data, you can compile the IJG code for 12-bit operation.
+The IJG code currently allows 8, 9, 10, 11, or 12 bits sample data precision.
+(For color, this means 8 to 12 bits per channel, of course.)  If you need to
+work with deeper than 8-bit data, you can compile the IJG code for 9-bit to
+12-bit operation.
 To do so:
-  1. In jmorecfg.h, define BITS_IN_JSAMPLE as 12 rather than 8.
+  1. In jmorecfg.h, define BITS_IN_JSAMPLE as 9, 10, 11, or 12 rather than 8.
   2. In jconfig.h, undefine BMP_SUPPORTED, RLE_SUPPORTED, and TARGA_SUPPORTED,
-     because the code for those formats doesn't handle 12-bit data and won't
-     even compile.  (The PPM code does work, as explained below.  The GIF
-     code works too; it scales 8-bit GIF data to and from 12-bit depth
-     automatically.)
+     because the code for those formats doesn't handle deeper than 8-bit data
+     and won't even compile.  (The PPM code does work, as explained below.
+     The GIF code works too; it scales 8-bit GIF data to and from 12-bit
+     depth automatically.)
   3. Compile.  Don't expect "make test" to pass, since the supplied test
      files are for 8-bit data.
 
-Currently, 12-bit support does not work on 16-bit-int machines.
+Currently, 9-bit to 12-bit support does not work on 16-bit-int machines.
 
-Note that a 12-bit version will not read 8-bit JPEG files, nor vice versa;
-so you'll want to keep around a regular 8-bit compilation as well.
-(Run-time selection of data depth, to allow a single copy that does both,
-is possible but would probably slow things down considerably; it's very low
-on our to-do list.)
+Run-time selection and conversion of data precision are currently not
+supported and may be added later.
+Exception:  The transcoding part (jpegtran) supports all settings in a
+single instance, since it operates on the level of DCT coefficients and
+not sample values.
 
-The PPM reader (rdppm.c) can read 12-bit data from either text-format or
-binary-format PPM and PGM files.  Binary-format PPM/PGM files which have a
-maxval greater than 255 are assumed to use 2 bytes per sample, MSB first
-(big-endian order).  As of early 1995, 2-byte binary format is not
+The PPM reader (rdppm.c) can read deeper than 8-bit data from either
+text-format or binary-format PPM and PGM files.  Binary-format PPM/PGM files
+which have a maxval greater than 255 are assumed to use 2 bytes per sample,
+MSB first (big-endian order).  As of early 1995, 2-byte binary format is not
 officially supported by the PBMPLUS library, but it is expected that a
 future release of PBMPLUS will support it.  Note that the PPM reader will
 read files of any maxval regardless of the BITS_IN_JSAMPLE setting; incoming
-data is automatically rescaled to either maxval=255 or maxval=4095 as
-appropriate for the cjpeg bit depth.
+data is automatically rescaled to maxval=MAXJSAMPLE as appropriate for the
+cjpeg bit depth.
 
 The PPM writer (wrppm.c) will normally write 2-byte binary PPM or PGM
-format, maxval 4095, when compiled with BITS_IN_JSAMPLE=12.  Since this
+format, maxval=MAXJSAMPLE, when compiled with BITS_IN_JSAMPLE>8.  Since this
 format is not yet widely supported, you can disable it by compiling wrppm.c
 with PPM_NORAWWORD defined; then the data is scaled down to 8 bits to make a
 standard 1-byte/sample PPM or PGM file.  (Yes, this means still another copy
 of djpeg to keep around.  But hopefully you won't need it for very long.
 Poskanzer's supposed to get that new PBMPLUS release out Real Soon Now.)
 
-Of course, if you are working with 12-bit data, you probably have it stored
-in some other, nonstandard format.  In that case you'll probably want to
-write your own I/O modules to read and write your format.
+Of course, if you are working with 9-bit to 12-bit data, you probably have
+it stored in some other, nonstandard format.  In that case you'll probably
+want to write your own I/O modules to read and write your format.
 
-Note that a 12-bit version of cjpeg always runs in "-optimize" mode, in
-order to generate valid Huffman tables.  This is necessary because our
-default Huffman tables only cover 8-bit data.
+Note:
+The standard Huffman tables are only valid for 8-bit data precision.  If
+you selected more than 8-bit data precision, cjpeg uses arithmetic coding
+by default.  The Huffman encoder normally uses entropy optimization to
+compute usable tables for higher precision.  Otherwise, you'll have to
+supply different default Huffman tables.
 
 Removing code:
 
@@ -859,6 +863,12 @@
 	#ifndef __RPCNDR_H__	/* don't conflict if rpcndr.h already read */
 	typedef unsigned char boolean;
 	#endif
+	#ifndef FALSE		/* in case these macros already exist */
+	#define FALSE	0	/* values of boolean */
+	#endif
+	#ifndef TRUE
+	#define TRUE	1
+	#endif
 	#define HAVE_BOOLEAN	/* prevent jmorecfg.h from redefining it */
 (This is already in jconfig.vc, by the way.)
 
diff --git a/jcapistd.c b/jcapistd.c
index c0320b1..0917afa 100644
--- a/jcapistd.c
+++ b/jcapistd.c
@@ -2,6 +2,7 @@
  * jcapistd.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -145,7 +146,7 @@
     (*cinfo->master->pass_startup) (cinfo);
 
   /* Verify that at least one iMCU row has been passed. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
   if (num_lines < lines_per_iMCU_row)
     ERREXIT(cinfo, JERR_BUFFER_SIZE);
 
diff --git a/jcarith.c b/jcarith.c
index a7f9ff7..a64190e 100644
--- a/jcarith.c
+++ b/jcarith.c
@@ -1,7 +1,7 @@
 /*
  * jcarith.c
  *
- * Developed 1997-2012 by Guido Vollbeding.
+ * Developed 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -362,7 +362,6 @@
 encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
   unsigned char *st;
   int blkn, ci, tbl;
   int v, v2, m;
@@ -381,14 +380,13 @@
 
   /* Encode the MCU data blocks */
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
     ci = cinfo->MCU_membership[blkn];
     tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
 
     /* Compute the DC value after the required point transform by Al.
      * This is simply an arithmetic right shift.
      */
-    m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al);
+    m = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
 
     /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
 
@@ -453,11 +451,11 @@
 encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
   JBLOCKROW block;
   unsigned char *st;
   int tbl, k, ke;
   int v, v2, m;
-  const int * natural_order;
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
@@ -552,6 +550,8 @@
 
 /*
  * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
@@ -593,11 +593,11 @@
 encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
   JBLOCKROW block;
   unsigned char *st;
   int tbl, k, ke, kex;
   int v;
-  const int * natural_order;
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
@@ -692,12 +692,13 @@
 encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  jpeg_component_info * compptr;
+  const int * natural_order;
   JBLOCKROW block;
   unsigned char *st;
-  int blkn, ci, tbl, k, ke;
+  int tbl, k, ke;
   int v, v2, m;
-  const int * natural_order;
+  int blkn, ci;
+  jpeg_component_info * compptr;
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
diff --git a/jccolor.c b/jccolor.c
index 81d74ef..f6b4a49 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -2,7 +2,7 @@
  * jccolor.c
  *
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2012 by Guido Vollbeding.
+ * Modified 2011-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -29,13 +29,25 @@
 /**************** RGB -> YCbCr conversion: most common case **************/
 
 /*
- * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * The conversion equations to be implemented are therefore
- *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
- *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
- *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
- * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
+ * previously known as Recommendation CCIR 601-1, except that Cb and Cr
+ * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
+ * sYCC (standard luma-chroma-chroma color space with extended gamut)
+ * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
+ * bg-sRGB and bg-sYCC (big gamut standard color spaces)
+ * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
+ * Note that the derived conversion coefficients given in some of these
+ * documents are imprecise.  The general conversion equations are
+ *	Y  = Kr * R + (1 - Kr - Kb) * G + Kb * B
+ *	Cb = 0.5 * (B - Y) / (1 - Kb)
+ *	Cr = 0.5 * (R - Y) / (1 - Kr)
+ * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
+ * from the 1953 FCC NTSC primaries and CIE Illuminant C),
+ * the conversion equations to be implemented are therefore
+ *	Y  =  0.299 * R + 0.587 * G + 0.114 * B
+ *	Cb = -0.168735892 * R - 0.331264108 * G + 0.5 * B + CENTERJSAMPLE
+ *	Cr =  0.5 * R - 0.418687589 * G - 0.081312411 * B + CENTERJSAMPLE
  * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
  * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
  * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
@@ -49,9 +61,9 @@
  * For even more speed, we avoid doing any multiplications in the inner loop
  * by precalculating the constants times R,G,B for all possible values.
  * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 12-bit samples it is still acceptable.  It's not very reasonable for
- * 16-bit samples, but if you want lossless storage you shouldn't be changing
- * colorspace anyway.
+ * for 9-bit to 12-bit samples it is still acceptable.  It's not very
+ * reasonable for 16-bit samples, but if you want lossless storage you
+ * shouldn't be changing colorspace anyway.
  * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
  * in the tables to save adding them separately in the inner loop.
  */
@@ -96,21 +108,21 @@
 				(TABLE_SIZE * SIZEOF(INT32)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
-    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
-    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
-    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
-    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.299) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.587) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.114) * i   + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.168735892)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.331264108)) * i;
     /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
      * This ensures that the maximum output will round to MAXJSAMPLE
      * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
      */
-    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.5) * i    + CBCR_OFFSET + ONE_HALF-1;
 /*  B=>Cb and R=>Cr tables are the same
-    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.5) * i    + CBCR_OFFSET + ONE_HALF-1;
 */
-    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
-    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.418687589)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.081312411)) * i;
   }
 }
 
@@ -274,6 +286,9 @@
  * Convert some rows of samples to the JPEG colorspace.
  * [R,G,B] to [R-G,G,B-G] conversion with modulo calculation
  * (forward reversible color transform).
+ * This can be seen as an adaption of the general RGB->YCbCr
+ * conversion equation with Kr = Kb = 0, while replacing the
+ * normalization by modulo calculation.
  */
 
 METHODDEF(void)
@@ -312,7 +327,7 @@
 /*
  * Convert some rows of samples to the JPEG colorspace.
  * This version handles grayscale output with no conversion.
- * The source can be either plain grayscale or YCbCr (since Y == gray).
+ * The source can be either plain grayscale or YCC (since Y == gray).
  */
 
 METHODDEF(void)
@@ -439,11 +454,13 @@
     break;
 
   case JCS_RGB:
+  case JCS_BG_RGB:
     if (cinfo->input_components != RGB_PIXELSIZE)
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
 
   case JCS_YCbCr:
+  case JCS_BG_YCC:
     if (cinfo->input_components != 3)
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
@@ -460,8 +477,10 @@
     break;
   }
 
-  /* Support color transform only for RGB colorspace */
-  if (cinfo->color_transform && cinfo->jpeg_color_space != JCS_RGB)
+  /* Support color transform only for RGB colorspaces */
+  if (cinfo->color_transform &&
+      cinfo->jpeg_color_space != JCS_RGB &&
+      cinfo->jpeg_color_space != JCS_BG_RGB)
     ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
 
   /* Check num_components, set conversion method based on requested space */
@@ -469,20 +488,26 @@
   case JCS_GRAYSCALE:
     if (cinfo->num_components != 1)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_GRAYSCALE ||
-	cinfo->in_color_space == JCS_YCbCr)
+    switch (cinfo->in_color_space) {
+    case JCS_GRAYSCALE:
+    case JCS_YCbCr:
+    case JCS_BG_YCC:
       cconvert->pub.color_convert = grayscale_convert;
-    else if (cinfo->in_color_space == JCS_RGB) {
+      break;
+    case JCS_RGB:
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = rgb_gray_convert;
-    } else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   case JCS_RGB:
+  case JCS_BG_RGB:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB) {
+    if (cinfo->in_color_space == cinfo->jpeg_color_space) {
       switch (cinfo->color_transform) {
       case JCT_NONE:
 	cconvert->pub.color_convert = rgb_convert;
@@ -492,7 +517,6 @@
 	break;
       default:
 	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-	break;
       }
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -501,13 +525,48 @@
   case JCS_YCbCr:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB) {
+    switch (cinfo->in_color_space) {
+    case JCS_RGB:
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = rgb_ycc_convert;
-    } else if (cinfo->in_color_space == JCS_YCbCr)
+      break;
+    case JCS_YCbCr:
       cconvert->pub.color_convert = null_convert;
-    else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_BG_YCC:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    switch (cinfo->in_color_space) {
+    case JCS_RGB:
+      /* For conversion from normal RGB input to BG_YCC representation,
+       * the Cb/Cr values are first computed as usual, and then
+       * quantized further after DCT processing by a factor of
+       * 2 in reference to the nominal quantization factor.
+       */
+      /* need quantization scale by factor of 2 after DCT */
+      cinfo->comp_info[1].component_needed = TRUE;
+      cinfo->comp_info[2].component_needed = TRUE;
+      /* compute normal YCC first */
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+      break;
+    case JCS_YCbCr:
+      /* need quantization scale by factor of 2 after DCT */
+      cinfo->comp_info[1].component_needed = TRUE;
+      cinfo->comp_info[2].component_needed = TRUE;
+      /*FALLTHROUGH*/
+    case JCS_BG_YCC:
+      /* Pass through for BG_YCC input */
+      cconvert->pub.color_convert = null_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   case JCS_CMYK:
@@ -522,13 +581,17 @@
   case JCS_YCCK:
     if (cinfo->num_components != 4)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_CMYK) {
+    switch (cinfo->in_color_space) {
+    case JCS_CMYK:
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = cmyk_ycck_convert;
-    } else if (cinfo->in_color_space == JCS_YCCK)
+      break;
+    case JCS_YCCK:
       cconvert->pub.color_convert = null_convert;
-    else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   default:			/* allow null conversion of JCS_UNKNOWN */
diff --git a/jcdctmgr.c b/jcdctmgr.c
index 0bbdbb6..fafab91 100644
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@@ -2,6 +2,7 @@
  * jcdctmgr.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -25,22 +26,30 @@
   /* Pointer to the DCT routine actually in use */
   forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
 
-  /* The actual post-DCT divisors --- not identical to the quant table
-   * entries, because of scaling (especially for an unnormalized DCT).
-   * Each table is given in normal array order.
-   */
-  DCTELEM * divisors[NUM_QUANT_TBLS];
-
 #ifdef DCT_FLOAT_SUPPORTED
   /* Same as above for the floating-point case. */
   float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
-  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
 #endif
 } my_fdct_controller;
 
 typedef my_fdct_controller * my_fdct_ptr;
 
 
+/* The allocated post-DCT divisor tables -- big enough for any
+ * supported variant and not identical to the quant table entries,
+ * because of scaling (especially for an unnormalized DCT) --
+ * are pointed to by dct_table in the per-component comp_info
+ * structures.  Each table is given in normal array order.
+ */
+
+typedef union {
+  DCTELEM int_array[DCTSIZE2];
+#ifdef DCT_FLOAT_SUPPORTED
+  FAST_FLOAT float_array[DCTSIZE2];
+#endif
+} divisor_table;
+
+
 /* The current scaled-DCT routines require ISLOW-style divisor tables,
  * so be sure to compile that code if either ISLOW or SCALING is requested.
  */
@@ -71,7 +80,7 @@
   /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
-  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
+  DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
   DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
   JDIMENSION bi;
 
@@ -134,7 +143,7 @@
   /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
-  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
   JDIMENSION bi;
 
@@ -352,22 +361,17 @@
 	cinfo->quant_tbl_ptrs[qtblno] == NULL)
       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
     qtbl = cinfo->quant_tbl_ptrs[qtblno];
-    /* Compute divisors for this quant table */
-    /* We may do this more than once for same table, but it's not a big deal */
+    /* Create divisor table from quant table */
     switch (method) {
 #ifdef PROVIDE_ISLOW_TABLES
     case JDCT_ISLOW:
       /* For LL&M IDCT method, divisors are equal to raw quantization
        * coefficients multiplied by 8 (to counteract scaling).
        */
-      if (fdct->divisors[qtblno] == NULL) {
-	fdct->divisors[qtblno] = (DCTELEM *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      DCTSIZE2 * SIZEOF(DCTELEM));
-      }
-      dtbl = fdct->divisors[qtblno];
+      dtbl = (DCTELEM *) compptr->dct_table;
       for (i = 0; i < DCTSIZE2; i++) {
-	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+	dtbl[i] =
+	  ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
       }
       fdct->pub.forward_DCT[ci] = forward_DCT;
       break;
@@ -395,17 +399,12 @@
 	};
 	SHIFT_TEMPS
 
-	if (fdct->divisors[qtblno] == NULL) {
-	  fdct->divisors[qtblno] = (DCTELEM *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(DCTELEM));
-	}
-	dtbl = fdct->divisors[qtblno];
+	dtbl = (DCTELEM *) compptr->dct_table;
 	for (i = 0; i < DCTSIZE2; i++) {
 	  dtbl[i] = (DCTELEM)
 	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
 				  (INT32) aanscales[i]),
-		    CONST_BITS-3);
+		    compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
 	}
       }
       fdct->pub.forward_DCT[ci] = forward_DCT;
@@ -422,25 +421,20 @@
 	 * What's actually stored is 1/divisor so that the inner loop can
 	 * use a multiplication rather than a division.
 	 */
-	FAST_FLOAT * fdtbl;
+	FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
 	int row, col;
 	static const double aanscalefactor[DCTSIZE] = {
 	  1.0, 1.387039845, 1.306562965, 1.175875602,
 	  1.0, 0.785694958, 0.541196100, 0.275899379
 	};
 
-	if (fdct->float_divisors[qtblno] == NULL) {
-	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(FAST_FLOAT));
-	}
-	fdtbl = fdct->float_divisors[qtblno];
 	i = 0;
 	for (row = 0; row < DCTSIZE; row++) {
 	  for (col = 0; col < DCTSIZE; col++) {
 	    fdtbl[i] = (FAST_FLOAT)
-	      (1.0 / (((double) qtbl->quantval[i] *
-		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+	      (1.0 / ((double) qtbl->quantval[i] *
+		      aanscalefactor[row] * aanscalefactor[col] *
+		      (compptr->component_needed ? 16.0 : 8.0)));
 	    i++;
 	  }
 	}
@@ -464,19 +458,20 @@
 jinit_forward_dct (j_compress_ptr cinfo)
 {
   my_fdct_ptr fdct;
-  int i;
+  int ci;
+  jpeg_component_info *compptr;
 
   fdct = (my_fdct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(my_fdct_controller));
-  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
+  cinfo->fdct = &fdct->pub;
   fdct->pub.start_pass = start_pass_fdctmgr;
 
-  /* Mark divisor tables unallocated */
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    fdct->divisors[i] = NULL;
-#ifdef DCT_FLOAT_SUPPORTED
-    fdct->float_divisors[i] = NULL;
-#endif
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate a divisor table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(divisor_table));
   }
 }
diff --git a/jchuff.c b/jchuff.c
index 257d7aa..d1313f6 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -2,7 +2,7 @@
  * jchuff.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2009 by Guido Vollbeding.
+ * Modified 2006-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -308,24 +308,27 @@
 /* Emit some bits; return TRUE if successful, FALSE if must suspend */
 {
   /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = state->cur.put_bits;
+  register INT32 put_buffer;
+  register int put_bits;
 
   /* if size is 0, caller used an invalid Huffman table entry */
   if (size == 0)
     ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
 
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-  
+  /* mask off any extra bits in code */
+  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
+
+  /* new number of bits in buffer */
+  put_bits = size + state->cur.put_bits;
+
   put_buffer <<= 24 - put_bits; /* align incoming bits */
 
-  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
-  
+  /* and merge with old buffer contents */
+  put_buffer |= state->cur.put_buffer;
+
   while (put_bits >= 8) {
     int c = (int) ((put_buffer >> 16) & 0xFF);
-    
+
     emit_byte_s(state, c, return FALSE);
     if (c == 0xFF) {		/* need to stuff a zero byte? */
       emit_byte_s(state, 0, return FALSE);
@@ -347,8 +350,8 @@
 /* Emit some bits, unless we are in gather mode */
 {
   /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = entropy->saved.put_bits;
+  register INT32 put_buffer;
+  register int put_bits;
 
   /* if size is 0, caller used an invalid Huffman table entry */
   if (size == 0)
@@ -357,9 +360,11 @@
   if (entropy->gather_statistics)
     return;			/* do nothing if we're only getting stats */
 
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
+  /* mask off any extra bits in code */
+  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
+
+  /* new number of bits in buffer */
+  put_bits = size + entropy->saved.put_bits;
 
   put_buffer <<= 24 - put_bits; /* align incoming bits */
 
@@ -543,10 +548,7 @@
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
   register int temp, temp2;
   register int nbits;
-  int blkn, ci;
-  int Al = cinfo->Al;
-  JBLOCKROW block;
-  jpeg_component_info * compptr;
+  int blkn, ci, tbl;
   ISHIFT_TEMPS
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
@@ -559,28 +561,27 @@
 
   /* Encode the MCU data blocks */
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
     ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
 
     /* Compute the DC value after the required point transform by Al.
      * This is simply an arithmetic right shift.
      */
-    temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
+    temp = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
 
     /* DC differences are figured on the point-transformed values. */
-    temp = temp2 - entropy->saved.last_dc_val[ci];
-    entropy->saved.last_dc_val[ci] = temp2;
+    temp2 = temp - entropy->saved.last_dc_val[ci];
+    entropy->saved.last_dc_val[ci] = temp;
 
     /* Encode the DC coefficient difference per section G.1.2.1 */
-    temp2 = temp;
+    temp = temp2;
     if (temp < 0) {
       temp = -temp;		/* temp is abs value of input */
       /* For a negative input, want temp2 = bitwise complement of abs(input) */
       /* This code assumes we are on a two's complement machine */
       temp2--;
     }
-    
+
     /* Find the number of bits needed for the magnitude of the coefficient */
     nbits = 0;
     while (temp) {
@@ -592,10 +593,10 @@
      */
     if (nbits > MAX_COEF_BITS+1)
       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-    
+
     /* Count/emit the Huffman-coded symbol for the number of bits */
-    emit_dc_symbol(entropy, compptr->dc_tbl_no, nbits);
-    
+    emit_dc_symbol(entropy, tbl, nbits);
+
     /* Emit that number of bits of the value, if positive, */
     /* or the complement of its magnitude, if negative. */
     if (nbits)			/* emit_bits rejects calls with size 0 */
@@ -628,12 +629,12 @@
 encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
   register int temp, temp2;
   register int nbits;
   register int r, k;
   int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
@@ -731,18 +732,15 @@
 
 /*
  * MCU encoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component, although the spec
- * is not very clear on the point.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
 encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp;
-  int blkn;
-  int Al = cinfo->Al;
-  JBLOCKROW block;
+  int Al, blkn;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
@@ -752,13 +750,12 @@
     if (entropy->restarts_to_go == 0)
       emit_restart_e(entropy, entropy->next_restart_num);
 
+  Al = cinfo->Al;
+
   /* Encode the MCU data blocks */
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-
     /* We simply emit the Al'th bit of the DC coefficient value. */
-    temp = (*block)[0];
-    emit_bits_e(entropy, (unsigned int) (temp >> Al), 1);
+    emit_bits_e(entropy, (unsigned int) (MCU_data[blkn][0][0] >> Al), 1);
   }
 
   cinfo->dest->next_output_byte = entropy->next_output_byte;
@@ -786,14 +783,14 @@
 encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
   register int temp;
   register int r, k;
+  int Se, Al;
   int EOB;
   char *BR_buffer;
   unsigned int BR;
-  int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
   int absvalues[DCTSIZE2];
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
@@ -918,7 +915,7 @@
 {
   register int temp, temp2;
   register int nbits;
-  register int k, r, i;
+  register int r, k;
   int Se = state->cinfo->lim_Se;
   const int * natural_order = state->cinfo->natural_order;
 
@@ -960,7 +957,7 @@
   r = 0;			/* r = run length of zeros */
 
   for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
+    if ((temp2 = block[natural_order[k]]) == 0) {
       r++;
     } else {
       /* if run length > 15, must emit special run-length-16 codes (0xF0) */
@@ -970,7 +967,7 @@
 	r -= 16;
       }
 
-      temp2 = temp;
+      temp = temp2;
       if (temp < 0) {
 	temp = -temp;		/* temp is abs value of input */
 	/* This code assumes we are on a two's complement machine */
@@ -986,8 +983,8 @@
 	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
 
       /* Emit Huffman symbol for run length / number of bits */
-      i = (r << 4) + nbits;
-      if (! emit_bits_s(state, actbl->ehufco[i], actbl->ehufsi[i]))
+      temp = (r << 4) + nbits;
+      if (! emit_bits_s(state, actbl->ehufco[temp], actbl->ehufsi[temp]))
 	return FALSE;
 
       /* Emit that number of bits of the value, if positive, */
@@ -1124,16 +1121,16 @@
 {
   register int temp;
   register int nbits;
-  register int k, r;
+  register int r, k;
   int Se = cinfo->lim_Se;
   const int * natural_order = cinfo->natural_order;
-  
+
   /* Encode the DC coefficient difference per section F.1.2.1 */
-  
+
   temp = block[0] - last_dc_val;
   if (temp < 0)
     temp = -temp;
-  
+
   /* Find the number of bits needed for the magnitude of the coefficient */
   nbits = 0;
   while (temp) {
@@ -1148,11 +1145,11 @@
 
   /* Count the Huffman symbol for the number of bits */
   dc_counts[nbits]++;
-  
+
   /* Encode the AC coefficients per section F.1.2.2 */
-  
+
   r = 0;			/* r = run length of zeros */
-  
+
   for (k = 1; k <= Se; k++) {
     if ((temp = block[natural_order[k]]) == 0) {
       r++;
@@ -1162,11 +1159,11 @@
 	ac_counts[0xF0]++;
 	r -= 16;
       }
-      
+
       /* Find the number of bits needed for the magnitude of the coefficient */
       if (temp < 0)
 	temp = -temp;
-      
+
       /* Find the number of bits needed for the magnitude of the coefficient */
       nbits = 1;		/* there must be at least one 1 bit */
       while ((temp >>= 1))
@@ -1174,10 +1171,10 @@
       /* Check for out-of-range coefficient values */
       if (nbits > MAX_COEF_BITS)
 	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-      
+
       /* Count Huffman symbol for run length / number of bits */
       ac_counts[(r << 4) + nbits]++;
-      
+
       r = 0;
     }
   }
@@ -1562,7 +1559,7 @@
   entropy = (huff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(huff_entropy_encoder));
-  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  cinfo->entropy = &entropy->pub;
   entropy->pub.start_pass = start_pass_huff;
 
   /* Mark tables unallocated */
diff --git a/jcinit.c b/jcinit.c
index 0ba310f..1e13e34 100644
--- a/jcinit.c
+++ b/jcinit.c
@@ -2,6 +2,7 @@
  * jcinit.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -29,6 +30,24 @@
 GLOBAL(void)
 jinit_compress_master (j_compress_ptr cinfo)
 {
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Sanity check on image dimensions */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
   /* Initialize master control (includes parameter checking/processing) */
   jinit_c_master_control(cinfo, FALSE /* full compression */);
 
diff --git a/jcmarker.c b/jcmarker.c
index 92aa65f..ca2bb39 100644
--- a/jcmarker.c
+++ b/jcmarker.c
@@ -2,7 +2,7 @@
  * jcmarker.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2012 by Guido Vollbeding.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -508,8 +508,8 @@
  * Write datastream header.
  * This consists of an SOI and optional APPn markers.
  * We recommend use of the JFIF marker, but not the Adobe marker,
- * when using YCbCr or grayscale data.  The JFIF marker should NOT
- * be used for any other JPEG colorspace.  The Adobe marker is helpful
+ * when using YCbCr or grayscale data.  The JFIF marker is also used
+ * for other standard JPEG colorspaces.  The Adobe marker is helpful
  * to distinguish RGB, CMYK, and YCCK colorspaces.
  * Note that an application can write additional header markers after
  * jpeg_start_compress returns.
diff --git a/jcmaster.c b/jcmaster.c
index caf80a5..2a8ae63 100644
--- a/jcmaster.c
+++ b/jcmaster.c
@@ -2,7 +2,7 @@
  * jcmaster.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2011 by Guido Vollbeding.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -222,8 +222,6 @@
 {
   int ci, ssize;
   jpeg_component_info *compptr;
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
 
   if (transcode_only)
     jpeg_calc_trans_dimensions(cinfo);
@@ -251,7 +249,7 @@
 
   /* Sanity check on image dimensions */
   if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 ||
-      cinfo->num_components <= 0 || cinfo->input_components <= 0)
+      cinfo->num_components <= 0)
     ERREXIT(cinfo, JERR_EMPTY_IMAGE);
 
   /* Make sure image isn't bigger than I can handle */
@@ -259,14 +257,8 @@
       (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
     ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
 
-  /* Width of an input scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
+  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   /* Check that number of components won't exceed internal array sizes */
@@ -339,8 +331,10 @@
       jdiv_round_up((long) cinfo->jpeg_height *
 		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
 		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Mark component needed (this flag isn't actually used for compression) */
-    compptr->component_needed = TRUE;
+    /* Don't need quantization scale after DCT,
+     * until color conversion says otherwise.
+     */
+    compptr->component_needed = FALSE;
   }
 
   /* Compute number of fully interleaved MCU rows (number of times that
@@ -811,7 +805,7 @@
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  SIZEOF(my_comp_master));
-  cinfo->master = (struct jpeg_comp_master *) master;
+  cinfo->master = &master->pub;
   master->pub.prepare_for_pass = prepare_for_pass;
   master->pub.pass_startup = pass_startup;
   master->pub.finish_pass = finish_pass_master;
@@ -833,10 +827,14 @@
     cinfo->num_scans = 1;
   }
 
-  if ((cinfo->progressive_mode || cinfo->block_size < DCTSIZE) &&
-      !cinfo->arith_code)			/*  TEMPORARY HACK ??? */
-    /* assume default tables no good for progressive or downscale mode */
-    cinfo->optimize_coding = TRUE;
+  if (cinfo->optimize_coding)
+    cinfo->arith_code = FALSE; /* disable arithmetic coding */
+  else if (! cinfo->arith_code &&
+	   (cinfo->progressive_mode ||
+	    (cinfo->block_size > 1 && cinfo->block_size < DCTSIZE)))
+    /* TEMPORARY HACK ??? */
+    /* assume default tables no good for progressive or reduced AC mode */
+    cinfo->optimize_coding = TRUE; /* force Huffman optimization */
 
   /* Initialize my private state */
   if (transcode_only) {
diff --git a/jconfig.cfg b/jconfig.cfg
index bb7435c..c4548fc 100644
--- a/jconfig.cfg
+++ b/jconfig.cfg
@@ -17,11 +17,17 @@
 /* Define this if you get warnings about undefined structures. */
 #undef INCOMPLETE_TYPES_BROKEN
 
-/* Define "boolean" as unsigned char, not int, on Windows systems. */
+/* Define "boolean" as unsigned char, not enum, on Windows systems. */
 #ifdef _WIN32
 #ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
 typedef unsigned char boolean;
 #endif
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
 #define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
 #endif
 
diff --git a/jconfig.txt b/jconfig.txt
index b96d312..d1710ae 100644
--- a/jconfig.txt
+++ b/jconfig.txt
@@ -2,6 +2,7 @@
  * jconfig.txt
  *
  * Copyright (C) 1991-1994, Thomas G. Lane.
+ * Modified 2009-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -91,12 +92,18 @@
  */
 #undef INCOMPLETE_TYPES_BROKEN
 
-/* Define "boolean" as unsigned char, not int, on Windows systems.
+/* Define "boolean" as unsigned char, not enum, on Windows systems.
  */
 #ifdef _WIN32
 #ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
 typedef unsigned char boolean;
 #endif
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
 #define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
 #endif
 
diff --git a/jconfig.vc b/jconfig.vc
index f15b418..7d88482 100644
--- a/jconfig.vc
+++ b/jconfig.vc
@@ -19,6 +19,12 @@
 #ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
 typedef unsigned char boolean;
 #endif
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
 #define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
 
 
diff --git a/jcparam.c b/jcparam.c
index e530c3c..4b2bee2 100644
--- a/jcparam.c
+++ b/jcparam.c
@@ -2,7 +2,7 @@
  * jcparam.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2012 by Guido Vollbeding.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -323,18 +323,17 @@
   /* Expect normal source image, not raw downsampled data */
   cinfo->raw_data_in = FALSE;
 
-  /* Use Huffman coding, not arithmetic coding, by default */
-  cinfo->arith_code = FALSE;
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, use arithmetic coding.
+   * (Alternatively, using Huffman coding would be possible with forcing
+   * optimization on so that usable tables will be computed, or by
+   * supplying default tables that are valid for the desired precision.)
+   * Otherwise, use Huffman coding by default.
+   */
+  cinfo->arith_code = cinfo->data_precision > 8 ? TRUE : FALSE;
 
   /* By default, don't do extra passes to optimize entropy coding */
   cinfo->optimize_coding = FALSE;
-  /* The standard Huffman tables are only valid for 8-bit data precision.
-   * If the precision is higher, force optimization on so that usable
-   * tables will be computed.  This test can be removed if default tables
-   * are supplied that are valid for the desired precision.
-   */
-  if (cinfo->data_precision > 8)
-    cinfo->optimize_coding = TRUE;
 
   /* By default, use the simpler non-cosited sampling alignment */
   cinfo->CCIR601_sampling = FALSE;
@@ -360,6 +359,9 @@
    * JFIF_minor_version to 2.  We could probably get away with just defaulting
    * to 1.02, but there may still be some decoders in use that will complain
    * about that; saying 1.01 should minimize compatibility problems.
+   *
+   * For wide gamut colorspaces (BG_RGB and BG_YCC), the major version will be
+   * overridden by jpeg_set_colorspace and set to 2.
    */
   cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
   cinfo->JFIF_minor_version = 1;
@@ -384,6 +386,9 @@
 jpeg_default_colorspace (j_compress_ptr cinfo)
 {
   switch (cinfo->in_color_space) {
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
   case JCS_GRAYSCALE:
     jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
     break;
@@ -399,8 +404,12 @@
   case JCS_YCCK:
     jpeg_set_colorspace(cinfo, JCS_YCCK);
     break;
-  case JCS_UNKNOWN:
-    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+  case JCS_BG_RGB:
+    /* No translation for now -- conversion to BG_YCC not yet supportet */
+    jpeg_set_colorspace(cinfo, JCS_BG_RGB);
+    break;
+  case JCS_BG_YCC:
+    jpeg_set_colorspace(cinfo, JCS_BG_YCC);
     break;
   default:
     ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
@@ -441,29 +450,40 @@
   cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
 
   switch (colorspace) {
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1,1, 0, 0,0);
+    }
+    break;
   case JCS_GRAYSCALE:
     cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
     cinfo->num_components = 1;
     /* JFIF specifies component ID 1 */
-    SET_COMP(0, 1, 1,1, 0, 0,0);
+    SET_COMP(0, 0x01, 1,1, 0, 0,0);
     break;
   case JCS_RGB:
     cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
     cinfo->num_components = 3;
-    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0);
-    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0,
+    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0,
 		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
 		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
     break;
   case JCS_YCbCr:
     cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
     cinfo->num_components = 3;
     /* JFIF specifies component IDs 1,2,3 */
     /* We default to 2x2 subsamples of chrominance */
-    SET_COMP(0, 1, 2,2, 0, 0,0);
-    SET_COMP(1, 2, 1,1, 1, 1,1);
-    SET_COMP(2, 3, 1,1, 1, 1,1);
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x02, 1,1, 1, 1,1);
+    SET_COMP(2, 0x03, 1,1, 1, 1,1);
     break;
   case JCS_CMYK:
     cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
@@ -476,19 +496,33 @@
   case JCS_YCCK:
     cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
     cinfo->num_components = 4;
-    SET_COMP(0, 1, 2,2, 0, 0,0);
-    SET_COMP(1, 2, 1,1, 1, 1,1);
-    SET_COMP(2, 3, 1,1, 1, 1,1);
-    SET_COMP(3, 4, 2,2, 0, 0,0);
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x02, 1,1, 1, 1,1);
+    SET_COMP(2, 0x03, 1,1, 1, 1,1);
+    SET_COMP(3, 0x04, 2,2, 0, 0,0);
     break;
-  case JCS_UNKNOWN:
-    cinfo->num_components = cinfo->input_components;
-    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPONENTS);
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      SET_COMP(ci, ci, 1,1, 0, 0,0);
-    }
+  case JCS_BG_RGB:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
+    cinfo->num_components = 3;
+    /* Add offset 0x20 to the normal R/G/B component IDs */
+    SET_COMP(0, 0x72 /* 'r' */, 1,1, 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    SET_COMP(1, 0x67 /* 'g' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x62 /* 'b' */, 1,1, 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    break;
+  case JCS_BG_YCC:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
+    cinfo->num_components = 3;
+    /* Add offset 0x20 to the normal Cb/Cr component IDs */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x22, 1,1, 1, 1,1);
+    SET_COMP(2, 0x23, 1,1, 1, 1,1);
     break;
   default:
     ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
@@ -572,8 +606,10 @@
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   /* Figure space needed for script.  Calculation must match code below! */
-  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
-    /* Custom script for YCbCr color images. */
+  if (ncomps == 3 &&
+      (cinfo->jpeg_color_space == JCS_YCbCr ||
+       cinfo->jpeg_color_space == JCS_BG_YCC)) {
+    /* Custom script for YCC color images. */
     nscans = 10;
   } else {
     /* All-purpose script for other color spaces. */
@@ -588,7 +624,7 @@
    * multiple compressions without changing the settings.  To avoid a memory
    * leak if jpeg_simple_progression is called repeatedly for the same JPEG
    * object, we try to re-use previously allocated space, and we allocate
-   * enough space to handle YCbCr even if initially asked for grayscale.
+   * enough space to handle YCC even if initially asked for grayscale.
    */
   if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
     cinfo->script_space_size = MAX(nscans, 10);
@@ -600,8 +636,10 @@
   cinfo->scan_info = scanptr;
   cinfo->num_scans = nscans;
 
-  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
-    /* Custom script for YCbCr color images. */
+  if (ncomps == 3 &&
+      (cinfo->jpeg_color_space == JCS_YCbCr ||
+       cinfo->jpeg_color_space == JCS_BG_YCC)) {
+    /* Custom script for YCC color images. */
     /* Initial DC scan */
     scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
     /* Initial AC scan: get some luma data out in a hurry */
diff --git a/jctrans.c b/jctrans.c
index 8813c3e..7cd077e 100644
--- a/jctrans.c
+++ b/jctrans.c
@@ -2,7 +2,7 @@
  * jctrans.c
  *
  * Copyright (C) 1995-1998, Thomas G. Lane.
- * Modified 2000-2012 by Guido Vollbeding.
+ * Modified 2000-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -143,10 +143,10 @@
    * if the application chooses to copy JFIF 1.02 extension markers from
    * the source file, we need to copy the version to make sure we don't
    * emit a file that has 1.02 extensions but a claimed version of 1.01.
-   * We will *not*, however, copy version info from mislabeled "2.01" files.
    */
   if (srcinfo->saw_JFIF_marker) {
-    if (srcinfo->JFIF_major_version == 1) {
+    if (srcinfo->JFIF_major_version == 1 ||
+	srcinfo->JFIF_major_version == 2) {
       dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
       dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
     }
diff --git a/jdapimin.c b/jdapimin.c
index 7f1ce4c..a6e0dd9 100644
--- a/jdapimin.c
+++ b/jdapimin.c
@@ -2,7 +2,7 @@
  * jdapimin.c
  *
  * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
+ * Modified 2009-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -114,8 +114,9 @@
 LOCAL(void)
 default_decompress_parms (j_decompress_ptr cinfo)
 {
+  int cid0, cid1, cid2;
+
   /* Guess the input colorspace, and set output colorspace accordingly. */
-  /* (Wish JPEG committee had provided a real way to specify this...) */
   /* Note application may override our guesses. */
   switch (cinfo->num_components) {
   case 1:
@@ -124,9 +125,22 @@
     break;
     
   case 3:
-    if (cinfo->saw_JFIF_marker) {
-      cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
-    } else if (cinfo->saw_Adobe_marker) {
+    cid0 = cinfo->comp_info[0].component_id;
+    cid1 = cinfo->comp_info[1].component_id;
+    cid2 = cinfo->comp_info[2].component_id;
+
+    /* First try to guess from the component IDs */
+    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03)
+      cinfo->jpeg_color_space = JCS_YCbCr;
+    else if (cid0 == 0x01 && cid1 == 0x22 && cid2 == 0x23)
+      cinfo->jpeg_color_space = JCS_BG_YCC;
+    else if (cid0 == 0x52 && cid1 == 0x47 && cid2 == 0x42)
+      cinfo->jpeg_color_space = JCS_RGB;	/* ASCII 'R', 'G', 'B' */
+    else if (cid0 == 0x72 && cid1 == 0x67 && cid2 == 0x62)
+      cinfo->jpeg_color_space = JCS_BG_RGB;	/* ASCII 'r', 'g', 'b' */
+    else if (cinfo->saw_JFIF_marker)
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+    else if (cinfo->saw_Adobe_marker) {
       switch (cinfo->Adobe_transform) {
       case 0:
 	cinfo->jpeg_color_space = JCS_RGB;
@@ -136,23 +150,12 @@
 	break;
       default:
 	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+	cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
 	break;
       }
     } else {
-      /* Saw no special markers, try to guess from the component IDs */
-      int cid0 = cinfo->comp_info[0].component_id;
-      int cid1 = cinfo->comp_info[1].component_id;
-      int cid2 = cinfo->comp_info[2].component_id;
-
-      if (cid0 == 1 && cid1 == 2 && cid2 == 3)
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
-      else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
-	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
-      else {
-	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
-      }
+      TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
     }
     /* Always guess RGB is proper output colorspace. */
     cinfo->out_color_space = JCS_RGB;
@@ -169,7 +172,7 @@
 	break;
       default:
 	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+	cinfo->jpeg_color_space = JCS_YCCK;	/* assume it's YCCK */
 	break;
       }
     } else {
diff --git a/jdapistd.c b/jdapistd.c
index 9d74537..7f3a78b 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -2,6 +2,7 @@
  * jdapistd.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
diff --git a/jdarith.c b/jdarith.c
index 6e32d23..efdb26d 100644
--- a/jdarith.c
+++ b/jdarith.c
@@ -1,7 +1,7 @@
 /*
  * jdarith.c
  *
- * Developed 1997-2012 by Guido Vollbeding.
+ * Developed 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -395,6 +395,8 @@
 
 /*
  * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
@@ -744,6 +746,17 @@
 
 
 /*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
  * Module initialization routine for arithmetic entropy decoding.
  */
 
@@ -758,6 +771,7 @@
 				SIZEOF(arith_entropy_decoder));
   cinfo->entropy = &entropy->pub;
   entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
 
   /* Mark tables unallocated */
   for (i = 0; i < NUM_ARITH_TBLS; i++) {
diff --git a/jdcolor.c b/jdcolor.c
index 939df75..a31c286 100644
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -2,7 +2,7 @@
  * jdcolor.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011-2012 by Guido Vollbeding.
+ * Modified 2011-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -19,12 +19,15 @@
 typedef struct {
   struct jpeg_color_deconverter pub; /* public fields */
 
-  /* Private state for YCC->RGB conversion */
+  /* Private state for YCbCr->RGB and BG_YCC->RGB conversion */
   int * Cr_r_tab;		/* => table for Cr to R conversion */
   int * Cb_b_tab;		/* => table for Cb to B conversion */
   INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
   INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
 
+  JSAMPLE * range_limit; /* pointer to normal sample range limit table, */
+		     /* or extended sample range limit table for BG_YCC */
+
   /* Private state for RGB->Y conversion */
   INT32 * rgb_y_tab;		/* => table for RGB to Y conversion */
 } my_color_deconverter;
@@ -32,22 +35,44 @@
 typedef my_color_deconverter * my_cconvert_ptr;
 
 
-/**************** YCbCr -> RGB conversion: most common case **************/
-/****************   RGB -> Y   conversion: less common case **************/
+/***************  YCbCr -> RGB conversion: most common case **************/
+/*************** BG_YCC -> RGB conversion: less common case **************/
+/***************    RGB -> Y   conversion: less common case **************/
 
 /*
- * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * The conversion equations to be implemented are therefore
+ * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
+ * previously known as Recommendation CCIR 601-1, except that Cb and Cr
+ * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
+ * sYCC (standard luma-chroma-chroma color space with extended gamut)
+ * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
+ * bg-sRGB and bg-sYCC (big gamut standard color spaces)
+ * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
+ * Note that the derived conversion coefficients given in some of these
+ * documents are imprecise.  The general conversion equations are
  *
- *	R = Y                + 1.40200 * Cr
- *	G = Y - 0.34414 * Cb - 0.71414 * Cr
- *	B = Y + 1.77200 * Cb
+ *	R = Y + K * (1 - Kr) * Cr
+ *	G = Y - K * (Kb * (1 - Kb) * Cb + Kr * (1 - Kr) * Cr) / (1 - Kr - Kb)
+ *	B = Y + K * (1 - Kb) * Cb
  *
- *	Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ *	Y = Kr * R + (1 - Kr - Kb) * G + Kb * B
+ *
+ * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
+ * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
+ * the conversion equations to be implemented are therefore
+ *
+ *	R = Y + 1.402 * Cr
+ *	G = Y - 0.344136286 * Cb - 0.714136286 * Cr
+ *	B = Y + 1.772 * Cb
+ *
+ *	Y = 0.299 * R + 0.587 * G + 0.114 * B
  *
  * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
- * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * For bg-sYCC, with K = 4, the equations are
+ *
+ *	R = Y + 2.804 * Cr
+ *	G = Y - 0.688272572 * Cb - 1.428272572 * Cr
+ *	B = Y + 3.544 * Cb
  *
  * To avoid floating-point arithmetic, we represent the fractional constants
  * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
@@ -58,9 +83,9 @@
  * For even more speed, we avoid doing any multiplications in the inner loop
  * by precalculating the constants times Cb and Cr for all possible values.
  * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 12-bit samples it is still acceptable.  It's not very reasonable for
- * 16-bit samples, but if you want lossless storage you shouldn't be changing
- * colorspace anyway.
+ * for 9-bit to 12-bit samples it is still acceptable.  It's not very
+ * reasonable for 16-bit samples, but if you want lossless storage you
+ * shouldn't be changing colorspace anyway.
  * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
  * values for the G calculation are left scaled up, since we must add them
  * together before rounding.
@@ -84,11 +109,12 @@
 
 
 /*
- * Initialize tables for YCC->RGB colorspace conversion.
+ * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
  */
 
 LOCAL(void)
 build_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Normal case, sYCC */
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   int i;
@@ -108,24 +134,84 @@
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				(MAXJSAMPLE+1) * SIZEOF(INT32));
 
+  cconvert->range_limit = cinfo->sample_range_limit;
+
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.40200 * x */
+    /* Cr=>R value is nearest int to 1.402 * x */
     cconvert->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.77200 * x */
+		    RIGHT_SHIFT(FIX(1.402) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
     cconvert->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.71414 * x */
-    cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
-    /* Cb=>G value is scaled-up -0.34414 * x */
+		    RIGHT_SHIFT(FIX(1.772) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
     /* We also add in ONE_HALF so that need not do it in inner loop */
-    cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+    cconvert->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
   }
 }
 
 
+LOCAL(void)
+build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Wide gamut case, bg-sYCC */
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  cconvert->range_limit = (JSAMPLE *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				5 * (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 2.804 * x */
+    cconvert->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(2.804) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 3.544 * x */
+    cconvert->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(3.544) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -1.428272572 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
+    /* Cb=>G value is scaled-up -0.688272572 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
+  }
+
+  /* Cb and Cr portions can extend to double range in wide gamut case,
+   * so we prepare an appropriate extended range limit table.
+   */
+
+  /* First segment of range limit table: limit[x] = 0 for x < 0 */
+  MEMZERO(cconvert->range_limit, 2 * (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  cconvert->range_limit += 2 * (MAXJSAMPLE+1);
+  /* Main part of range limit table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    cconvert->range_limit[i] = (JSAMPLE) i;
+  /* End of range limit table: limit[x] = MAXJSAMPLE for x > MAXJSAMPLE */
+  for (; i < 3 * (MAXJSAMPLE+1); i++)
+    cconvert->range_limit[i] = MAXJSAMPLE;
+}
+
+
 /*
  * Convert some rows of samples to the output colorspace.
  *
@@ -149,7 +235,7 @@
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
   /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register JSAMPLE * range_limit = cconvert->range_limit;
   register int * Crrtab = cconvert->Cr_r_tab;
   register int * Cbbtab = cconvert->Cb_b_tab;
   register INT32 * Crgtab = cconvert->Cr_g_tab;
@@ -166,19 +252,21 @@
       y  = GETJSAMPLE(inptr0[col]);
       cb = GETJSAMPLE(inptr1[col]);
       cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * for extended gamut (sYCC) and wide gamut (bg-sYCC) encodings.
+       */
+      outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
       outptr[RGB_GREEN] = range_limit[y +
 			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
 						 SCALEBITS))];
-      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
       outptr += RGB_PIXELSIZE;
     }
   }
 }
 
 
-/**************** Cases other than YCbCr -> RGB **************/
+/**************** Cases other than YCC -> RGB ****************/
 
 
 /*
@@ -198,9 +286,9 @@
 				(TABLE_SIZE * SIZEOF(INT32)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
-    rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i;
-    rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
+    rgb_y_tab[i+R_Y_OFF] = FIX(0.299) * i;
+    rgb_y_tab[i+G_Y_OFF] = FIX(0.587) * i;
+    rgb_y_tab[i+B_Y_OFF] = FIX(0.114) * i + ONE_HALF;
   }
 }
 
@@ -244,6 +332,9 @@
 /*
  * [R-G,G,B-G] to [R,G,B] conversion with modulo calculation
  * (inverse color transform).
+ * This can be seen as an adaption of the general YCbCr->RGB
+ * conversion equation with Kr = Kb = 0, while replacing the
+ * normalization by modulo calculation.
  */
 
 METHODDEF(void)
@@ -387,7 +478,7 @@
 
 /*
  * Color conversion for grayscale: just copy the data.
- * This also works for YCbCr -> grayscale conversion, in which
+ * This also works for YCC -> grayscale conversion, in which
  * we just copy the Y (luminance) component and ignore chrominance.
  */
 
@@ -466,7 +557,9 @@
       y  = GETJSAMPLE(inptr0[col]);
       cb = GETJSAMPLE(inptr1[col]);
       cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * and for extended gamut encodings (sYCC).
+       */
       outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
       outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
 			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -516,6 +609,8 @@
 
   case JCS_RGB:
   case JCS_YCbCr:
+  case JCS_BG_RGB:
+  case JCS_BG_YCC:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     break;
@@ -532,8 +627,10 @@
     break;
   }
 
-  /* Support color transform only for RGB colorspace */
-  if (cinfo->color_transform && cinfo->jpeg_color_space != JCS_RGB)
+  /* Support color transform only for RGB colorspaces */
+  if (cinfo->color_transform &&
+      cinfo->jpeg_color_space != JCS_RGB &&
+      cinfo->jpeg_color_space != JCS_BG_RGB)
     ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
 
   /* Set out_color_components and conversion method based on requested space.
@@ -544,13 +641,16 @@
   switch (cinfo->out_color_space) {
   case JCS_GRAYSCALE:
     cinfo->out_color_components = 1;
-    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
-	cinfo->jpeg_color_space == JCS_YCbCr) {
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+    case JCS_YCbCr:
+    case JCS_BG_YCC:
       cconvert->pub.color_convert = grayscale_convert;
       /* For color->grayscale conversion, only the Y (0) component is needed */
       for (ci = 1; ci < cinfo->num_components; ci++)
 	cinfo->comp_info[ci].component_needed = FALSE;
-    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      break;
+    case JCS_RGB:
       switch (cinfo->color_transform) {
       case JCT_NONE:
 	cconvert->pub.color_convert = rgb_gray_convert;
@@ -560,21 +660,29 @@
 	break;
       default:
 	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-	break;
       }
       build_rgb_y_table(cinfo);
-    } else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   case JCS_RGB:
     cinfo->out_color_components = RGB_PIXELSIZE;
-    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+      cconvert->pub.color_convert = gray_rgb_convert;
+      break;
+    case JCS_YCbCr:
       cconvert->pub.color_convert = ycc_rgb_convert;
       build_ycc_rgb_table(cinfo);
-    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
-      cconvert->pub.color_convert = gray_rgb_convert;
-    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      break;
+    case JCS_BG_YCC:
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_bg_ycc_rgb_table(cinfo);
+      break;
+    case JCS_RGB:
       switch (cinfo->color_transform) {
       case JCT_NONE:
 	cconvert->pub.color_convert = rgb_convert;
@@ -584,7 +692,25 @@
 	break;
       default:
 	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+      }
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_BG_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->jpeg_color_space == JCS_BG_RGB) {
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_convert;
 	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_rgb_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
       }
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -592,13 +718,17 @@
 
   case JCS_CMYK:
     cinfo->out_color_components = 4;
-    if (cinfo->jpeg_color_space == JCS_YCCK) {
+    switch (cinfo->jpeg_color_space) {
+    case JCS_YCCK:
       cconvert->pub.color_convert = ycck_cmyk_convert;
       build_ycc_rgb_table(cinfo);
-    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      break;
+    case JCS_CMYK:
       cconvert->pub.color_convert = null_convert;
-    } else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   default:
diff --git a/jddctmgr.c b/jddctmgr.c
index 0ded9d5..9ecfbb5 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -2,7 +2,7 @@
  * jddctmgr.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2010 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -368,7 +368,7 @@
   idct = (my_idct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(my_idct_controller));
-  cinfo->idct = (struct jpeg_inverse_dct *) idct;
+  cinfo->idct = &idct->pub;
   idct->pub.start_pass = start_pass;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
diff --git a/jdhuff.c b/jdhuff.c
index e548e68..6920e20 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -2,7 +2,7 @@
  * jdhuff.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2012 by Guido Vollbeding.
+ * Modified 2006-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -628,6 +628,22 @@
 
 
 /*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+}
+
+
+/*
  * Check for a restart marker & resynchronize decoder.
  * Returns FALSE if must suspend.
  */
@@ -638,10 +654,7 @@
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
   int ci;
 
-  /* Throw away any unused bits remaining in bit buffer; */
-  /* include any full bytes in next_marker's count of discarded bytes */
-  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
-  entropy->bitstate.bits_left = 0;
+  finish_pass_huff(cinfo);
 
   /* Advance past the RSTn marker */
   if (! (*cinfo->marker->read_restart_marker) (cinfo))
@@ -846,17 +859,15 @@
 
 /*
  * MCU decoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component, although the spec
- * is not very clear on the point.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
 decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {   
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-  int blkn;
-  JBLOCKROW block;
+  int p1, blkn;
   BITREAD_STATE_VARS;
 
   /* Process restart marker if needed; may have to suspend */
@@ -873,15 +884,15 @@
   /* Load up working state */
   BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
 
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+
   /* Outer loop handles each block in the MCU */
 
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-
     /* Encoded data is simply the next bit of the two's-complement DC value */
     CHECK_BIT_BUFFER(br_state, 1, return FALSE);
     if (GET_BITS(1))
-      (*block)[0] |= p1;
+      MCU_data[blkn][0][0] |= p1;
     /* Note: since we use |=, repeating the assignment later is safe */
   }
 
@@ -1517,6 +1528,7 @@
 				SIZEOF(huff_entropy_decoder));
   cinfo->entropy = &entropy->pub;
   entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.finish_pass = finish_pass_huff;
 
   if (cinfo->progressive_mode) {
     /* Create progression status table */
diff --git a/jdinput.c b/jdinput.c
index 2c5c717..0199553 100644
--- a/jdinput.c
+++ b/jdinput.c
@@ -2,7 +2,7 @@
  * jdinput.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2009 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -196,7 +196,7 @@
   /* Hardwire it to "no scaling" */
   cinfo->output_width = cinfo->image_width;
   cinfo->output_height = cinfo->image_height;
-  /* jdinput.c has already initialized DCT_scaled_size,
+  /* initial_setup has already initialized DCT_scaled_size,
    * and has computed unscaled downsampled_width and downsampled_height.
    */
 
@@ -216,8 +216,8 @@
       (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
     ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
 
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
+  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   /* Check that number of components won't exceed internal array sizes */
@@ -537,6 +537,7 @@
 METHODDEF(void)
 finish_input_pass (j_decompress_ptr cinfo)
 {
+  (*cinfo->entropy->finish_pass) (cinfo);
   cinfo->inputctl->consume_input = consume_markers;
 }
 
@@ -646,7 +647,7 @@
   inputctl = (my_inputctl_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
 				SIZEOF(my_input_controller));
-  cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
+  cinfo->inputctl = &inputctl->pub;
   /* Initialize method pointers */
   inputctl->pub.consume_input = consume_markers;
   inputctl->pub.reset_input_controller = reset_input_controller;
diff --git a/jdmarker.c b/jdmarker.c
index ce8b713..3fbe5c1 100644
--- a/jdmarker.c
+++ b/jdmarker.c
@@ -2,7 +2,7 @@
  * jdmarker.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2009-2012 by Guido Vollbeding.
+ * Modified 2009-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -269,8 +269,8 @@
   /* We don't support files in which the image height is initially specified */
   /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
   /* might as well have a general sanity check. */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
-      || cinfo->num_components <= 0)
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->num_components <= 0)
     ERREXIT(cinfo, JERR_EMPTY_IMAGE);
 
   if (length != (cinfo->num_components * 3))
@@ -350,6 +350,9 @@
 
     /* Detect the case where component id's are not unique, and, if so, */
     /* create a fake component id using the same logic as in get_sof.   */
+    /* Note:  This also ensures that all of the SOF components are      */
+    /* referenced in the single scan case, which prevents access to     */
+    /* uninitialized memory in later decoding stages. */
     for (ci = 0; ci < i; ci++) {
       if (c == cinfo->cur_comp_info[ci]->component_id) {
 	c = cinfo->cur_comp_info[0]->component_id;
@@ -493,6 +496,8 @@
     if (count > 256 || ((INT32) count) > length)
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
 
+    MEMZERO(huffval, SIZEOF(huffval)); /* pre-zero array for later copy */
+
     for (i = 0; i < count; i++)
       INPUT_BYTE(cinfo, huffval[i], return FALSE);
 
@@ -735,12 +740,13 @@
     cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
     cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
     /* Check version.
-     * Major version must be 1, anything else signals an incompatible change.
+     * Major version must be 1 or 2, anything else signals an incompatible
+     * change.
      * (We used to treat this as an error, but now it's a nonfatal warning,
      * because some bozo at Hijaak couldn't read the spec.)
      * Minor version should be 0..2, but process anyway if newer.
      */
-    if (cinfo->JFIF_major_version != 1)
+    if (cinfo->JFIF_major_version != 1 && cinfo->JFIF_major_version != 2)
       WARNMS2(cinfo, JWRN_JFIF_MAJOR,
 	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
     /* Generate trace messages */
diff --git a/jdmaster.c b/jdmaster.c
index fef72a2..6f42d3c 100644
--- a/jdmaster.c
+++ b/jdmaster.c
@@ -2,7 +2,7 @@
  * jdmaster.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2011 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -51,7 +51,8 @@
   /* jdmerge.c only supports YCC=>RGB color conversion */
   if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
       cinfo->out_color_space != JCS_RGB ||
-      cinfo->out_color_components != RGB_PIXELSIZE)
+      cinfo->out_color_components != RGB_PIXELSIZE ||
+      cinfo->color_transform)
     return FALSE;
   /* and it only handles 2h1v or 2h2v sampling ratios */
   if (cinfo->comp_info[0].h_samp_factor != 2 ||
@@ -158,9 +159,11 @@
     cinfo->out_color_components = 1;
     break;
   case JCS_RGB:
+  case JCS_BG_RGB:
     cinfo->out_color_components = RGB_PIXELSIZE;
     break;
   case JCS_YCbCr:
+  case JCS_BG_YCC:
     cinfo->out_color_components = 3;
     break;
   case JCS_CMYK:
@@ -273,10 +276,19 @@
   long samplesperrow;
   JDIMENSION jd_samplesperrow;
 
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   /* Initialize dimensions and other stuff */
   jpeg_calc_output_dimensions(cinfo);
   prepare_range_limit_table(cinfo);
 
+  /* Sanity check on image dimensions */
+  if (cinfo->output_height <= 0 || cinfo->output_width <= 0 ||
+      cinfo->out_color_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
   /* Width of an output scanline must be representable as JDIMENSION. */
   samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
   jd_samplesperrow = (JDIMENSION) samplesperrow;
@@ -521,7 +533,7 @@
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  SIZEOF(my_decomp_master));
-  cinfo->master = (struct jpeg_decomp_master *) master;
+  cinfo->master = &master->pub;
   master->pub.prepare_for_output_pass = prepare_for_output_pass;
   master->pub.finish_output_pass = finish_output_pass;
 
diff --git a/jdmerge.c b/jdmerge.c
index 3744446..a6bde33 100644
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -2,6 +2,7 @@
  * jdmerge.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -103,17 +104,17 @@
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.40200 * x */
+    /* Cr=>R value is nearest int to 1.402 * x */
     upsample->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.77200 * x */
+		    RIGHT_SHIFT(FIX(1.402) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
     upsample->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.71414 * x */
-    upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
-    /* Cb=>G value is scaled-up -0.34414 * x */
+		    RIGHT_SHIFT(FIX(1.772) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
     /* We also add in ONE_HALF so that need not do it in inner loop */
-    upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+    upsample->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
   }
 }
 
diff --git a/jfdctint.c b/jfdctint.c
index 1dde58c..a2ef203 100644
--- a/jfdctint.c
+++ b/jfdctint.c
@@ -2,7 +2,7 @@
  * jfdctint.c
  *
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modification developed 2003-2009 by Guido Vollbeding.
+ * Modification developed 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -165,16 +165,18 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
     elemptr = sample_data[ctr] + start_col;
 
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
@@ -196,47 +198,49 @@
     dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				       CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				       CONST_BITS-PASS1_BITS);
+
+    dataptr[2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
 
     dataptr += DCTSIZE;		/* advance pointer to next row */
   }
@@ -244,12 +248,13 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
+   * cK represents sqrt(2) * cos(K*pi/16).
    */
 
   dataptr = data;
   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
@@ -271,47 +276,49 @@
     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
     dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
     dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
 
     dataptr++;			/* advance pointer to next column */
   }
@@ -338,10 +345,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/14). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/14).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 7; ctr++) {
@@ -472,10 +480,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/12). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/12).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 6; ctr++) {
@@ -585,12 +594,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/10). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/10).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 5; ctr++) {
@@ -695,11 +705,12 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
-  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by (8/4)**2 = 2**2, which we add here.
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
@@ -737,6 +748,7 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
    */
 
   dataptr = data;
@@ -787,12 +799,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2**2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/6). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2**2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/6).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 3; ctr++) {
@@ -869,8 +882,9 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   */
 
   /* Row 0 */
   elemptr = sample_data[0] + start_col;
@@ -935,11 +949,12 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/18). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/18).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1084,11 +1099,12 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/20). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/20).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1248,11 +1264,12 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/22). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/22).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1430,9 +1447,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/24). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/24).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1596,9 +1614,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/26). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/26).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1794,9 +1813,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/28). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/28).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1995,9 +2015,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/30). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/30).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -2173,10 +2194,11 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/32). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/32).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -2275,6 +2297,7 @@
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
    * We must also scale the output by (8/16)**2 = 1/2**2.
+   * cK represents sqrt(2) * cos(K*pi/32).
    */
 
   dataptr = data;
@@ -2380,10 +2403,11 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -2475,12 +2499,13 @@
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
    * We must also scale the output by 8/16 = 1/2.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
    */
 
   dataptr = data;
   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
@@ -2501,43 +2526,43 @@
     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-					   CONST_BITS+PASS1_BITS+1);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+	      CONST_BITS+PASS1_BITS+1);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
-					   CONST_BITS+PASS1_BITS+1);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
 
     dataptr++;			/* advance pointer to next column */
   }
@@ -2564,10 +2589,11 @@
   /* Zero bottom row of output coefficient block. */
   MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 7; ctr++) {
@@ -2727,10 +2753,11 @@
   /* Zero 2 bottom rows of output coefficient block. */
   MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 6; ctr++) {
@@ -2866,10 +2893,11 @@
   /* Zero 3 bottom rows of output coefficient block. */
   MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 5; ctr++) {
@@ -2999,17 +3027,19 @@
   /* Zero 4 bottom rows of output coefficient block. */
   MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by 8/4 = 2, which we add here. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by 8/4 = 2, which we add here.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
     elemptr = sample_data[ctr] + start_col;
 
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
@@ -3032,47 +3062,49 @@
       ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				       CONST_BITS-PASS1_BITS-1);
-    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				       CONST_BITS-PASS1_BITS-1);
+
+    dataptr[2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-1);
+    dataptr[6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-1);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-2);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
-    dataptr[5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
-    dataptr[7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
+    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
+    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
 
     dataptr += DCTSIZE;		/* advance pointer to next row */
   }
@@ -3080,7 +3112,8 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
-   * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
    */
 
   dataptr = data;
@@ -3099,7 +3132,7 @@
 
     /* Odd part */
 
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);   /* c6 */
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
 
@@ -3134,12 +3167,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 3; ctr++) {
@@ -3234,12 +3268,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
-  /* 4-point FDCT kernel, */
-  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by (8/4)*(8/2) = 2**3, which we add here.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 2; ctr++) {
@@ -3323,10 +3358,12 @@
    */
 
   /* Even part */
+
   /* Apply unsigned->signed conversion */
   data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
 
   /* Odd part */
+
   data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
 }
 
@@ -3350,9 +3387,11 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -3360,7 +3399,7 @@
     elemptr = sample_data[ctr] + start_col;
 
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
@@ -3382,39 +3421,43 @@
     dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				   CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				   CONST_BITS-PASS1_BITS);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+	      CONST_BITS-PASS1_BITS);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
 
     ctr++;
 
@@ -3541,10 +3584,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -3721,10 +3765,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -3870,10 +3915,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -4015,11 +4061,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by 8/4 = 2, which we add here. */
-  /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by 8/4 = 2, which we add here.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
@@ -4057,12 +4105,13 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
    */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
@@ -4084,47 +4133,49 @@
     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
     dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
     dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
 
     dataptr++;			/* advance pointer to next column */
   }
@@ -4150,12 +4201,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 6; ctr++) {
@@ -4255,9 +4307,10 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * We must also scale the output by (8/2)*(8/4) = 2**3, which we add here.
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
@@ -4329,18 +4382,23 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
-  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+  /* Pass 1: empty. */
 
-  /* We leave the results scaled up by an overall factor of 8.
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
    * We must also scale the output by (8/1)*(8/2) = 2**5.
    */
 
   /* Even part */
+
+  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
+  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+
   /* Apply unsigned->signed conversion */
   data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
 
   /* Odd part */
+
   data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
 }
 
diff --git a/jidctint.c b/jidctint.c
index dcdf7ce..76fe5d9 100644
--- a/jidctint.c
+++ b/jidctint.c
@@ -2,7 +2,7 @@
  * jidctint.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2009 by Guido Vollbeding.
+ * Modification developed 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -165,6 +165,8 @@
 
 /*
  * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * cK represents sqrt(2) * cos(K*pi/16).
  */
 
 GLOBAL(void)
@@ -184,9 +186,10 @@
   int workspace[DCTSIZE2];	/* buffers data between passes */
   SHIFT_TEMPS
 
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   */
 
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
@@ -223,15 +226,16 @@
       continue;
     }
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
 
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
 
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
@@ -256,25 +260,25 @@
     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -288,15 +292,16 @@
     wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
+
     inptr++;			/* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
 
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   */
 
   wsptr = workspace;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
@@ -330,15 +335,16 @@
     }
 #endif
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
 
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
 
     /* Add fudge factor here for final descale. */
     z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
@@ -346,7 +352,7 @@
 
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -364,21 +370,21 @@
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -2835,9 +2841,11 @@
   int workspace[8*8];	/* buffers data between passes */
   SHIFT_TEMPS
 
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
@@ -2851,14 +2859,14 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
 	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
 	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
 	inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
@@ -2867,23 +2875,24 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
+
       inptr++;			/* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     z2 <<= CONST_BITS;
@@ -2893,44 +2902,44 @@
 
     tmp0 = z2 + z3;
     tmp1 = z2 - z3;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
     tmp12 = tmp1 - tmp3;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
@@ -2939,7 +2948,7 @@
     wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
+
     inptr++;			/* advance pointers to next column */
     quantptr++;
     wsptr++;
@@ -2948,6 +2957,7 @@
   /* Pass 2: process 8 rows from work array, store into output array.
    * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 8; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3109,6 +3119,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3164,6 +3175,7 @@
   /* Pass 2: process 7 rows from work array, store into output array.
    * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 7; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3304,6 +3316,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3346,6 +3359,7 @@
   /* Pass 2: process 6 rows from work array, store into output array.
    * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 6; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3480,6 +3494,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3520,6 +3535,7 @@
   /* Pass 2: process 5 rows from work array, store into output array.
    * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 5; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3639,8 +3655,10 @@
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3675,31 +3693,34 @@
     wsptr[8*2] = (int) (tmp12 - tmp2);
   }
 
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   wsptr = workspace;
   for (ctr = 0; ctr < 4; ctr++) {
     outptr = output_buf[ctr] + output_col;
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
 
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     /* Add fudge factor here for final descale. */
     z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
     z3 = (INT32) wsptr[4];
-    
+
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -3717,21 +3738,21 @@
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -3793,6 +3814,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3823,6 +3845,7 @@
   /* Pass 2: process 3 rows from work array, store into output array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 3; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3924,6 +3947,7 @@
    * 4-point IDCT kernel,
    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 2; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3979,7 +4003,7 @@
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp10;
+  INT32 tmp0, tmp1;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
@@ -3994,18 +4018,18 @@
 
   /* Even part */
 
-  tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]);
+  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
   /* Add fudge factor here for final descale. */
-  tmp10 += ONE << 2;
+  tmp0 += ONE << 2;
 
   /* Odd part */
 
-  tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]);
+  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
 
   /* Final output stage */
 
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK];
+  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
 }
 
 
@@ -4036,6 +4060,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4134,69 +4159,72 @@
     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
   }
-  
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
+
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   wsptr = workspace;
   for (ctr = 0; ctr < 16; ctr++) {
     outptr = output_buf[ctr] + output_col;
-    
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     /* Add fudge factor here for final descale. */
     z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
     z3 = (INT32) wsptr[4];
-    
+
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
     tmp12 = tmp1 - tmp3;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = (INT32) wsptr[7];
     tmp1 = (INT32) wsptr[5];
     tmp2 = (INT32) wsptr[3];
     tmp3 = (INT32) wsptr[1];
-    
+
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
 					      CONST_BITS+PASS1_BITS+3)
 			    & RANGE_MASK];
@@ -4221,7 +4249,7 @@
     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
 					      CONST_BITS+PASS1_BITS+3)
 			    & RANGE_MASK];
-    
+
     wsptr += DCTSIZE;		/* advance pointer to next row */
   }
 }
@@ -4254,6 +4282,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4341,6 +4370,7 @@
   /* Pass 2: process 14 rows from work array, store into output array.
    * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 14; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4437,6 +4467,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4520,6 +4551,7 @@
   /* Pass 2: process 12 rows from work array, store into output array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 12; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4601,6 +4633,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4676,6 +4709,7 @@
   /* Pass 2: process 10 rows from work array, store into output array.
    * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 10; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4750,9 +4784,11 @@
   int workspace[4*8];	/* buffers data between passes */
   SHIFT_TEMPS
 
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
@@ -4789,16 +4825,17 @@
       continue;
     }
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
 
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     z2 <<= CONST_BITS;
@@ -4808,7 +4845,7 @@
 
     tmp0 = z2 + z3;
     tmp1 = z2 - z3;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -4826,21 +4863,21 @@
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -4861,8 +4898,10 @@
   }
 
   /* Pass 2: process 8 rows from work array, store into output array.
-   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 8; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4900,7 +4939,7 @@
     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
 					      CONST_BITS+PASS1_BITS+3)
 			    & RANGE_MASK];
-    
+
     wsptr += 4;		/* advance pointer to next row */
   }
 }
@@ -4932,6 +4971,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4974,6 +5014,7 @@
   /* Pass 2: process 6 rows from work array, store into output array.
    * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 6; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -5037,6 +5078,7 @@
    * 4-point IDCT kernel,
    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -5106,7 +5148,7 @@
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp10;
+  INT32 tmp0, tmp1;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   SHIFT_TEMPS
@@ -5117,19 +5159,19 @@
 
   /* Even part */
     
-  tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
   /* Add fudge factor here for final descale. */
-  tmp10 += ONE << 2;
+  tmp0 += ONE << 2;
 
   /* Odd part */
 
-  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
 
   /* Final output stage */
 
-  output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3)
+  output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3)
 					  & RANGE_MASK];
-  output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3)
+  output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3)
 					  & RANGE_MASK];
 }
 
diff --git a/jmorecfg.h b/jmorecfg.h
index 2407edb..679d68b 100644
--- a/jmorecfg.h
+++ b/jmorecfg.h
@@ -2,7 +2,7 @@
  * jmorecfg.h
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2012 by Guido Vollbeding.
+ * Modified 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -15,13 +15,22 @@
 /*
  * Define BITS_IN_JSAMPLE as either
  *   8   for 8-bit sample values (the usual setting)
+ *   9   for 9-bit sample values
+ *   10  for 10-bit sample values
+ *   11  for 11-bit sample values
  *   12  for 12-bit sample values
- * Only 8 and 12 are legal data precisions for lossy JPEG according to the
- * JPEG standard, and the IJG code does not support anything else!
- * We do not support run-time selection of data precision, sorry.
+ * Only 8, 9, 10, 11, and 12 bits sample data precision are supported for
+ * full-feature DCT processing.  Further depths up to 16-bit may be added
+ * later for the lossless modes of operation.
+ * Run-time selection and conversion of data precision will be added later
+ * and are currently not supported, sorry.
+ * Exception:  The transcoding part (jpegtran) supports all settings in a
+ * single instance, since it operates on the level of DCT coefficients and
+ * not sample values.  The DCT coefficients are of the same type (16 bits)
+ * in all cases (see below).
  */
 
-#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
+#define BITS_IN_JSAMPLE  8	/* use 8, 9, 10, 11, or 12 */
 
 
 /*
@@ -77,6 +86,48 @@
 #endif /* BITS_IN_JSAMPLE == 8 */
 
 
+#if BITS_IN_JSAMPLE == 9
+/* JSAMPLE should be the smallest type that will hold the values 0..511.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	511
+#define CENTERJSAMPLE	256
+
+#endif /* BITS_IN_JSAMPLE == 9 */
+
+
+#if BITS_IN_JSAMPLE == 10
+/* JSAMPLE should be the smallest type that will hold the values 0..1023.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	1023
+#define CENTERJSAMPLE	512
+
+#endif /* BITS_IN_JSAMPLE == 10 */
+
+
+#if BITS_IN_JSAMPLE == 11
+/* JSAMPLE should be the smallest type that will hold the values 0..2047.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	2047
+#define CENTERJSAMPLE	1024
+
+#endif /* BITS_IN_JSAMPLE == 11 */
+
+
 #if BITS_IN_JSAMPLE == 12
 /* JSAMPLE should be the smallest type that will hold the values 0..4095.
  * On nearly all machines "short" will do nicely.
@@ -252,7 +303,10 @@
  * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
  */
 
-#ifdef HAVE_BOOLEAN
+#ifndef HAVE_BOOLEAN
+#if defined FALSE || defined TRUE || defined QGLOBAL_H
+/* Qt3 defines FALSE and TRUE as "const" variables in qglobal.h */
+typedef int boolean;
 #ifndef FALSE			/* in case these macros already exist */
 #define FALSE	0		/* values of boolean */
 #endif
@@ -262,6 +316,7 @@
 #else
 typedef enum { FALSE = 0, TRUE = 1 } boolean;
 #endif
+#endif
 
 
 /*
@@ -299,11 +354,12 @@
 #define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
 #define DCT_SCALING_SUPPORTED	    /* Input rescaling via DCT? (Requires DCT_ISLOW)*/
 #define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
-/* Note: if you selected 12-bit data precision, it is dangerous to turn off
- * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
- * precision, so jchuff.c normally uses entropy optimization to compute
- * usable tables for higher precision.  If you don't want to do optimization,
- * you'll have to supply different default Huffman tables.
+/* Note: if you selected more than 8-bit data precision, it is dangerous to
+ * turn off ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only
+ * good for 8-bit precision, so arithmetic coding is recommended for higher
+ * precision.  The Huffman encoder normally uses entropy optimization to
+ * compute usable tables for higher precision.  Otherwise, you'll have to
+ * supply different default Huffman tables.
  * The exact same statements apply for progressive JPEG: the default tables
  * don't work for progressive mode.  (This may get fixed, however.)
  */
@@ -314,7 +370,7 @@
 #define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
 #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
 #define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? (Requires DCT_ISLOW)*/
 #define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
 #define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
 #undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
diff --git a/jpegint.h b/jpegint.h
index c0d5c14..18bb887 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -2,7 +2,7 @@
  * jpegint.h
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2011 by Guido Vollbeding.
+ * Modified 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -211,8 +211,8 @@
 /* Entropy decoding */
 struct jpeg_entropy_decoder {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
-				JBLOCKROW *MCU_data));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
 };
 
 /* Inverse DCT (also performs dequantization) */
diff --git a/jpeglib.h b/jpeglib.h
index 0a6dac4..f4fbf23 100644
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -2,7 +2,7 @@
  * jpeglib.h
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2002-2012 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -39,12 +39,12 @@
 
 #define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
 #define JPEG_LIB_VERSION_MAJOR  9
-#define JPEG_LIB_VERSION_MINOR  0
+#define JPEG_LIB_VERSION_MINOR  1
 
 
 /* Various constants determining the sizes of things.
- * All of these are specified by the JPEG standard, so don't change them
- * if you want to be compatible.
+ * All of these are specified by the JPEG standard,
+ * so don't change them if you want to be compatible.
  */
 
 #define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
@@ -157,16 +157,21 @@
   /* The downsampled dimensions are the component's actual, unpadded number
    * of samples at the main buffer (preprocessing/compression interface);
    * DCT scaling is included, so
-   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_h_scaled_size/DCTSIZE)
+   * downsampled_width =
+   *   ceil(image_width * Hi/Hmax * DCT_h_scaled_size/block_size)
    * and similarly for height.
    */
   JDIMENSION downsampled_width;	 /* actual width in samples */
   JDIMENSION downsampled_height; /* actual height in samples */
-  /* This flag is used only for decompression.  In cases where some of the
-   * components will be ignored (eg grayscale output from YCbCr image),
-   * we can skip most computations for the unused components.
+  /* For decompression, in cases where some of the components will be
+   * ignored (eg grayscale output from YCbCr image), we can skip most
+   * computations for the unused components.
+   * For compression, some of the components will need further quantization
+   * scale by factor of 2 after DCT (eg BG_YCC output from normal RGB input).
+   * The field is first set TRUE for decompression, FALSE for compression
+   * in initial_setup, and then adapted in color conversion setup.
    */
-  boolean component_needed;	/* do we need the value of this component? */
+  boolean component_needed;
 
   /* These values are computed before starting a scan of the component. */
   /* The decompressor output side may not use these variables. */
@@ -215,10 +220,12 @@
 typedef enum {
 	JCS_UNKNOWN,		/* error/unspecified */
 	JCS_GRAYSCALE,		/* monochrome */
-	JCS_RGB,		/* red/green/blue */
-	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
+	JCS_RGB,		/* red/green/blue, standard RGB (sRGB) */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV), standard YCC */
 	JCS_CMYK,		/* C/M/Y/K */
-	JCS_YCCK		/* Y/Cb/Cr/K */
+	JCS_YCCK,		/* Y/Cb/Cr/K */
+	JCS_BG_RGB,		/* big gamut red/green/blue, bg-sRGB */
+	JCS_BG_YCC		/* big gamut Y/Cb/Cr, bg-sYCC */
 } J_COLOR_SPACE;
 
 /* Supported color transforms. */
diff --git a/jpegtran.1 b/jpegtran.1
index 0ad1bbc..2220f7a 100644
--- a/jpegtran.1
+++ b/jpegtran.1
@@ -1,4 +1,4 @@
-.TH JPEGTRAN 1 "28 December 2009"
+.TH JPEGTRAN 1 "13 September 2013"
 .SH NAME
 jpegtran \- lossless transformation of JPEG files
 .SH SYNOPSIS
@@ -156,15 +156,23 @@
 flip transforms, lossless crop is restricted by the current JPEG format: the
 upper left corner of the selected region must fall on an iMCU boundary.  If
 this does not hold for the given crop parameters, we silently move the upper
-left corner up and/or left to make it so, simultaneously increasing the region
-dimensions to keep the lower right crop corner unchanged.  (Thus, the output
-image covers at least the requested region, but may cover more.)
+left corner up and/or left to make it so, simultaneously increasing the
+region dimensions to keep the lower right crop corner unchanged.  (Thus, the
+output image covers at least the requested region, but may cover more.)
+The adjustment of the region dimensions may be optionally disabled.
 
 The image can be losslessly cropped by giving the switch:
 .TP
 .B \-crop WxH+X+Y
 Crop to a rectangular subarea of width W, height H starting at point X,Y.
 .PP
+A complementary lossless-wipe option is provided to discard (gray out) data
+inside a given image region while losslessly preserving what is outside:
+.TP
+.B \-wipe WxH+X+Y
+Wipe (gray out) a rectangular subarea of width W, height H starting at point
+X,Y.
+.PP
 Other not-strictly-lossless transformation switches are:
 .TP
 .B \-grayscale
diff --git a/jpegtran.c b/jpegtran.c
index 7e8ef0c..5269344 100644
--- a/jpegtran.c
+++ b/jpegtran.c
@@ -1,7 +1,7 @@
 /*
  * jpegtran.c
  *
- * Copyright (C) 1995-2012, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1995-2013, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -66,8 +66,8 @@
   fprintf(stderr, "Switches for modifying the image:\n");
 #if TRANSFORMS_SUPPORTED
   fprintf(stderr, "  -crop WxH+X+Y  Crop to a rectangular subarea\n");
-  fprintf(stderr, "  -grayscale     Reduce to grayscale (omit color data)\n");
   fprintf(stderr, "  -flip [horizontal|vertical]  Mirror image (left-right or top-bottom)\n");
+  fprintf(stderr, "  -grayscale     Reduce to grayscale (omit color data)\n");
   fprintf(stderr, "  -perfect       Fail if there is non-transformable edge blocks\n");
   fprintf(stderr, "  -rotate [90|180|270]         Rotate image (degrees clockwise)\n");
 #endif
@@ -76,6 +76,7 @@
   fprintf(stderr, "  -transpose     Transpose image\n");
   fprintf(stderr, "  -transverse    Transverse transpose image\n");
   fprintf(stderr, "  -trim          Drop non-transformable edge blocks\n");
+  fprintf(stderr, "  -wipe WxH+X+Y  Wipe (gray out) a rectangular subarea\n");
 #endif
   fprintf(stderr, "Switches for advanced users:\n");
 #ifdef C_ARITH_CODING_SUPPORTED
@@ -187,7 +188,8 @@
 #if TRANSFORMS_SUPPORTED
       if (++argn >= argc)	/* advance to next argument */
 	usage();
-      if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
+      if (transformoption.crop /* reject multiple crop/wipe requests */ ||
+	  ! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
 	fprintf(stderr, "%s: bogus -crop argument '%s'\n",
 		progname, argv[argn]);
 	exit(EXIT_FAILURE);
@@ -336,6 +338,21 @@
       /* Trim off any partial edge MCUs that the transform can't handle. */
       transformoption.trim = TRUE;
 
+    } else if (keymatch(arg, "wipe", 1)) {
+#if TRANSFORMS_SUPPORTED
+      if (++argn >= argc)	/* advance to next argument */
+	usage();
+      if (transformoption.crop /* reject multiple crop/wipe requests */ ||
+	  ! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
+	fprintf(stderr, "%s: bogus -wipe argument '%s'\n",
+		progname, argv[argn]);
+	exit(EXIT_FAILURE);
+      }
+      select_transform(JXFORM_WIPE);
+#else
+      select_transform(JXFORM_NONE);	/* force an error */
+#endif
+
     } else {
       usage();			/* bogus switch */
     }
diff --git a/jversion.h b/jversion.h
index 232085f..a6e3ac7 100644
--- a/jversion.h
+++ b/jversion.h
@@ -1,7 +1,7 @@
 /*
  * jversion.h
  *
- * Copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1991-2014, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -9,6 +9,6 @@
  */
 
 
-#define JVERSION	"9  13-Jan-2013"
+#define JVERSION	"9a  19-Jan-2014"
 
-#define JCOPYRIGHT	"Copyright (C) 2013, Thomas G. Lane, Guido Vollbeding"
+#define JCOPYRIGHT	"Copyright (C) 2014, Thomas G. Lane, Guido Vollbeding"
diff --git a/libjpeg.txt b/libjpeg.txt
index 292a127..4243c24 100644
--- a/libjpeg.txt
+++ b/libjpeg.txt
@@ -95,8 +95,8 @@
 	* Lossless JPEG
 	* DNL marker
 	* Nonintegral subsampling ratios
-We support both 8- and 12-bit data precision, but this is a compile-time
-choice rather than a run-time choice; hence it is difficult to use both
+We support 8-bit to 12-bit data precision, but this is a compile-time choice
+rather than a run-time choice; hence it is difficult to use different
 precisions in a single application.
 
 By itself, the library handles only interchange JPEG datastreams --- in
@@ -225,7 +225,7 @@
 BITS_IN_JSAMPLE (normally 8 bits).  For instance, if you choose to compress
 data that's only 6 bits/channel, you should left-justify each value in a
 byte before passing it to the compressor.  If you need to compress data
-that has more than 8 bits/channel, compile with BITS_IN_JSAMPLE = 12.
+that has more than 8 bits/channel, compile with BITS_IN_JSAMPLE = 9 to 12.
 (See "Library compile-time options", later.)
 
 
@@ -1273,9 +1273,10 @@
 The JPEG standard itself is "color blind" and doesn't specify any particular
 color space.  It is customary to convert color data to a luminance/chrominance
 color space before compressing, since this permits greater compression.  The
-existing de-facto JPEG file format standards specify YCbCr or grayscale data
-(JFIF), or grayscale, RGB, YCbCr, CMYK, or YCCK (Adobe).  For special
-applications such as multispectral images, other color spaces can be used,
+existing JPEG file interchange format standards specify YCbCr or GRAYSCALE
+data (JFIF version 1), GRAYSCALE, RGB, YCbCr, CMYK, or YCCK (Adobe), or BG_RGB
+or BG_YCC (big gamut color spaces, JFIF version 2).  For special applications
+such as multispectral images, other color spaces can be used,
 but it must be understood that such files will be unportable.
 
 The JPEG library can handle the most common colorspace conversions (namely
@@ -1292,22 +1293,25 @@
 jccolor.c currently supports the following transformations:
 	RGB => YCbCr
 	RGB => GRAYSCALE
+	RGB => BG_YCC
 	YCbCr => GRAYSCALE
+	YCbCr => BG_YCC
 	CMYK => YCCK
 plus the null transforms: GRAYSCALE => GRAYSCALE, RGB => RGB,
-YCbCr => YCbCr, CMYK => CMYK, YCCK => YCCK, and UNKNOWN => UNKNOWN.
+BG_RGB => BG_RGB, YCbCr => YCbCr, BG_YCC => BG_YCC, CMYK => CMYK,
+YCCK => YCCK, and UNKNOWN => UNKNOWN.
 
-The de-facto file format standards (JFIF and Adobe) specify APPn markers that
-indicate the color space of the JPEG file.  It is important to ensure that
-these are written correctly, or omitted if the JPEG file's color space is not
-one of the ones supported by the de-facto standards.  jpeg_set_colorspace()
-will set the compression parameters to include or omit the APPn markers
-properly, so long as it is told the truth about the JPEG color space.
-For example, if you are writing some random 3-component color space without
-conversion, don't try to fake out the library by setting in_color_space and
-jpeg_color_space to JCS_YCbCr; use JCS_UNKNOWN.  You may want to write an
-APPn marker of your own devising to identify the colorspace --- see "Special
-markers", below.
+The file interchange format standards (JFIF and Adobe) specify APPn markers
+that indicate the color space of the JPEG file.  It is important to ensure
+that these are written correctly, or omitted if the JPEG file's color space
+is not one of the ones supported by the interchange standards.
+jpeg_set_colorspace() will set the compression parameters to include or omit
+the APPn markers properly, so long as it is told the truth about the JPEG
+color space.  For example, if you are writing some random 3-component color
+space without conversion, don't try to fake out the library by setting
+in_color_space and jpeg_color_space to JCS_YCbCr; use JCS_UNKNOWN.
+You may want to write an APPn marker of your own devising to identify
+the colorspace --- see "Special markers", below.
 
 When told that the color space is UNKNOWN, the library will default to using
 luminance-quality compression parameters for all color components.  You may
@@ -1325,6 +1329,8 @@
 transformation.  jdcolor.c currently supports
 	YCbCr => RGB
 	YCbCr => GRAYSCALE
+	BG_YCC => RGB
+	BG_YCC => GRAYSCALE
 	RGB => GRAYSCALE
 	GRAYSCALE => RGB
 	YCCK => CMYK
@@ -2585,10 +2591,10 @@
 you must also pad the data correctly (usually, this is done by replicating
 the last column and/or row).  The data must be padded to a multiple of a DCT
 block in each component: that is, each downsampled row must contain a
-multiple of 8 valid samples, and there must be a multiple of 8 sample rows
-for each component.  (For applications such as conversion of digital TV
-images, the standard image size is usually a multiple of the DCT block size,
-so that no padding need actually be done.)
+multiple of block_size valid samples, and there must be a multiple of
+block_size sample rows for each component.  (For applications such as
+conversion of digital TV images, the standard image size is usually a
+multiple of the DCT block size, so that no padding need actually be done.)
 
 The procedure for compression of raw data is basically the same as normal
 compression, except that you call jpeg_write_raw_data() in place of
@@ -2614,22 +2620,22 @@
 measured in terms of the component with the largest v_samp_factor.
 
 jpeg_write_raw_data() processes one MCU row per call, which is to say
-v_samp_factor*DCTSIZE sample rows of each component.  The passed num_lines
-value must be at least max_v_samp_factor*DCTSIZE, and the return value will
-be exactly that amount (or possibly some multiple of that amount, in future
-library versions).  This is true even on the last call at the bottom of the
-image; don't forget to pad your data as necessary.
+v_samp_factor*block_size sample rows of each component.  The passed num_lines
+value must be at least max_v_samp_factor*block_size, and the return value
+will be exactly that amount (or possibly some multiple of that amount, in
+future library versions).  This is true even on the last call at the bottom
+of the image; don't forget to pad your data as necessary.
 
 The required dimensions of the supplied data can be computed for each
 component as
-	cinfo->comp_info[i].width_in_blocks*DCTSIZE  samples per row
-	cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image
+	cinfo->comp_info[i].width_in_blocks*block_size  samples per row
+	cinfo->comp_info[i].height_in_blocks*block_size rows in image
 after jpeg_start_compress() has initialized those fields.  If the valid data
 is smaller than this, it must be padded appropriately.  For some sampling
 factors and image sizes, additional dummy DCT blocks are inserted to make
 the image a multiple of the MCU dimensions.  The library creates such dummy
 blocks itself; it does not read them from your supplied data.  Therefore you
-need never pad by more than DCTSIZE samples.  An example may help here.
+need never pad by more than block_size samples.  An example may help here.
 Assume 2h2v downsampling of YCbCr data, that is
 	cinfo->comp_info[0].h_samp_factor = 2		for Y
 	cinfo->comp_info[0].v_samp_factor = 2
@@ -2671,8 +2677,8 @@
 decompression process is otherwise the same as usual.
 
 jpeg_read_raw_data() returns one MCU row per call, and thus you must pass a
-buffer of at least max_v_samp_factor*DCTSIZE scanlines (scanline counting is
-the same as for raw-data compression).  The buffer you pass must be large
+buffer of at least max_v_samp_factor*block_size scanlines (scanline counting
+is the same as for raw-data compression).  The buffer you pass must be large
 enough to hold the actual data plus padding to DCT-block boundaries.  As with
 compression, any entirely dummy DCT blocks are not processed so you need not
 allocate space for them, but the total scanline count includes them.  The
@@ -2928,10 +2934,10 @@
 buffer to hold the final output image.
 
 The above figures are valid for 8-bit JPEG data precision and a machine with
-32-bit ints.  For 12-bit JPEG data, double the size of the strip buffers and
-quantization pixel buffer.  The "fixed-size" data will be somewhat smaller
-with 16-bit ints, larger with 64-bit ints.  Also, CMYK or other unusual
-color spaces will require different amounts of space.
+32-bit ints.  For 9-bit to 12-bit JPEG data, double the size of the strip
+buffers and quantization pixel buffer.  The "fixed-size" data will be
+somewhat smaller with 16-bit ints, larger with 64-bit ints.  Also, CMYK
+or other unusual color spaces will require different amounts of space.
 
 The full-image coefficient and pixel buffers, if needed at all, do not
 have to be fully RAM resident; you can have the library use temporary
@@ -2953,27 +2959,34 @@
 
 A number of compile-time options are available by modifying jmorecfg.h.
 
-The JPEG standard provides for both the baseline 8-bit DCT process and
-a 12-bit DCT process.  The IJG code supports 12-bit JPEG if you define
-BITS_IN_JSAMPLE as 12 rather than 8.  Note that this causes JSAMPLE to be
-larger than a char, so it affects the surrounding application's image data.
-The sample applications cjpeg and djpeg can support 12-bit mode only for PPM
-and GIF file formats; you must disable the other file formats to compile a
-12-bit cjpeg or djpeg.  (install.txt has more information about that.)
-At present, a 12-bit library can handle *only* 12-bit images, not both
-precisions.  (If you need to include both 8- and 12-bit libraries in a single
-application, you could probably do it by defining NEED_SHORT_EXTERNAL_NAMES
-for just one of the copies.  You'd have to access the 8-bit and 12-bit copies
-from separate application source files.  This is untested ... if you try it,
-we'd like to hear whether it works!)
+The IJG code currently supports 8-bit to 12-bit sample data precision by
+defining BITS_IN_JSAMPLE as 8, 9, 10, 11, or 12.
+Note that a value larger than 8 causes JSAMPLE to be larger than a char,
+so it affects the surrounding application's image data.
+The sample applications cjpeg and djpeg can support deeper than 8-bit data
+only for PPM and GIF file formats; you must disable the other file formats
+to compile a 9-bit to 12-bit cjpeg or djpeg.  (install.txt has more
+information about that.)
+Run-time selection and conversion of data precision are currently not
+supported and may be added later.
+Exception:  The transcoding part (jpegtran) supports all settings in a
+single instance, since it operates on the level of DCT coefficients and
+not sample values.
+(If you need to include an 8-bit library and a 9-bit to 12-bit library for
+compression or decompression in a single application, you could probably do
+it by defining NEED_SHORT_EXTERNAL_NAMES for just one of the copies.  You'd
+have to access the 8-bit and the 9-bit to 12-bit copies from separate
+application source files.  This is untested ... if you try it, we'd like to
+hear whether it works!)
 
-Note that a 12-bit library always compresses in Huffman optimization mode,
-in order to generate valid Huffman tables.  This is necessary because our
-default Huffman tables only cover 8-bit data.  If you need to output 12-bit
-files in one pass, you'll have to supply suitable default Huffman tables.
-You may also want to supply your own DCT quantization tables; the existing
-quality-scaling code has been developed for 8-bit use, and probably doesn't
-generate especially good tables for 12-bit.
+Note that the standard Huffman tables are only valid for 8-bit data precision.
+If you selected more than 8-bit data precision, cjpeg uses arithmetic coding
+by default.  The Huffman encoder normally uses entropy optimization to
+compute usable tables for higher precision.  Otherwise, you'll have to
+supply different default Huffman tables.  You may also want to supply your
+own DCT quantization tables; the existing quality-scaling code has been
+developed for 8-bit use, and probably doesn't generate especially good tables
+for 9-bit to 12-bit.
 
 The maximum number of components (color channels) in the image is determined
 by MAX_COMPONENTS.  The JPEG standard allows up to 255 components, but we
diff --git a/missing b/missing
index cdea514..db98974 100755
--- a/missing
+++ b/missing
@@ -1,7 +1,7 @@
 #! /bin/sh
 # Common wrapper for a few potentially missing GNU programs.
 
-scriptversion=2012-06-26.16; # UTC
+scriptversion=2013-10-28.13; # UTC
 
 # Copyright (C) 1996-2013 Free Software Foundation, Inc.
 # Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
@@ -160,7 +160,7 @@
       ;;
    autom4te*)
       echo "You might have modified some maintainer files that require"
-      echo "the 'automa4te' program to be rebuilt."
+      echo "the 'autom4te' program to be rebuilt."
       program_details 'autom4te'
       ;;
     bison*|yacc*)
diff --git a/rdjpgcom.1 b/rdjpgcom.1
index 97611df..d7741fb 100644
--- a/rdjpgcom.1
+++ b/rdjpgcom.1
@@ -1,4 +1,4 @@
-.TH RDJPGCOM 1 "02 April 2009"
+.TH RDJPGCOM 1 "13 September 2013"
 .SH NAME
 rdjpgcom \- display text comments from a JPEG file
 .SH SYNOPSIS
diff --git a/structure.txt b/structure.txt
index 44e48ca..98e20c7 100644
--- a/structure.txt
+++ b/structure.txt
@@ -1,6 +1,6 @@
 IJG JPEG LIBRARY:  SYSTEM ARCHITECTURE
 
-Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+Copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
 This file is part of the Independent JPEG Group's software.
 For conditions of distribution and use, see the accompanying README file.
 
@@ -170,16 +170,16 @@
 have special cases at the right and bottom edges.  Therefore the interface
 buffer is always an integral number of blocks wide and high, and we expect
 compression preprocessing to pad the source data properly.  Padding will occur
-only to the next block (N-sample) boundary.  In an interleaved-scan situation,
-additional dummy blocks may be used to fill out MCUs, but the MCU assembly and
-disassembly logic will create or discard these blocks internally.  (This is
-advantageous for speed reasons, since we avoid DCTing the dummy blocks.
-It also permits a small reduction in file size, because the compressor can
-choose dummy block contents so as to minimize their size in compressed form.
-Finally, it makes the interface buffer specification independent of whether
-the file is actually interleaved or not.)  Applications that wish to deal
-directly with the downsampled data must provide similar buffering and padding
-for odd-sized images.
+only to the next block (block_size-sample) boundary.  In an interleaved-scan
+situation, additional dummy blocks may be used to fill out MCUs, but the MCU
+assembly and disassembly logic will create or discard these blocks internally.
+(This is advantageous for speed reasons, since we avoid DCTing the dummy
+blocks.  It also permits a small reduction in file size, because the
+compressor can choose dummy block contents so as to minimize their size
+in compressed form.  Finally, it makes the interface buffer specification
+independent of whether the file is actually interleaved or not.)
+Applications that wish to deal directly with the downsampled data must
+provide similar buffering and padding for odd-sized images.
 
 
 *** Poor man's object-oriented programming ***
@@ -345,9 +345,10 @@
   require context rows above and below the current row group; the
   preprocessing controller is responsible for supplying these rows via proper
   buffering.  The downsampler is responsible for edge expansion at the right
-  edge (i.e., extending each sample row to a multiple of N samples); but the
-  preprocessing controller is responsible for vertical edge expansion (i.e.,
-  duplicating the bottom sample row as needed to make a multiple of N rows).
+  edge (i.e., extending each sample row to a multiple of block_size samples);
+  but the preprocessing controller is responsible for vertical edge expansion
+  (i.e., duplicating the bottom sample row as needed to make a multiple of
+  block_size rows).
 
 * Coefficient controller: buffer controller for the DCT-coefficient data.
   This controller handles MCU assembly, including insertion of dummy DCT
@@ -651,8 +652,8 @@
 quantization a la JPEG Part 3.)
 
 Notice that the allocation unit is now a row of 8x8 coefficient blocks,
-corresponding to N rows of samples.  Otherwise the structure is much the same
-as for samples, and for the same reasons.
+corresponding to block_size rows of samples.  Otherwise the structure
+is much the same as for samples, and for the same reasons.
 
 On machines where malloc() can't handle a request bigger than 64Kb, this data
 structure limits us to rows of less than 512 JBLOCKs, or a picture width of
diff --git a/testimg.bmp b/testimg.bmp
index 012223e..11aa0cb 100644
--- a/testimg.bmp
+++ b/testimg.bmp
Binary files differ
diff --git a/testimg.ppm b/testimg.ppm
index bd78ef8..1377eef 100644
--- a/testimg.ppm
+++ b/testimg.ppm
Binary files differ
diff --git a/transupp.c b/transupp.c
index d0013cc..525932a 100644
--- a/transupp.c
+++ b/transupp.c
@@ -1,7 +1,7 @@
 /*
  * transupp.c
  *
- * Copyright (C) 1997-2012, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1997-2013, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -114,6 +114,116 @@
 
 
 LOCAL(void)
+do_crop_ext (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	     JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	     jvirt_barray_ptr *src_coef_arrays,
+	     jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: If the destination size is larger than the source, we fill in
+ * the extra area with zero (neutral gray).  Note we also have to zero partial
+ * iMCUs at the right and bottom edge of the source image area in this case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      if (dstinfo->jpeg_height > srcinfo->output_height) {
+	if (dst_blk_y < y_crop_blocks ||
+	    dst_blk_y >= comp_height + y_crop_blocks) {
+	  for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	    FMEMZERO(dst_buffer[offset_y],
+		     compptr->width_in_blocks * SIZEOF(JBLOCK));
+	  }
+	  continue;
+	}
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   dst_blk_y - y_crop_blocks,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      } else {
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   dst_blk_y + y_crop_blocks,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	if (dstinfo->jpeg_width > srcinfo->output_width) {
+	  if (x_crop_blocks > 0) {
+	    FMEMZERO(dst_buffer[offset_y],
+		     x_crop_blocks * SIZEOF(JBLOCK));
+	  }
+	  jcopy_block_row(src_buffer[offset_y],
+			  dst_buffer[offset_y] + x_crop_blocks,
+			  comp_width);
+	  if (compptr->width_in_blocks > comp_width + x_crop_blocks) {
+	    FMEMZERO(dst_buffer[offset_y] +
+		       comp_width + x_crop_blocks,
+		     (compptr->width_in_blocks -
+		       comp_width - x_crop_blocks) * SIZEOF(JBLOCK));
+	  }
+	} else {
+	  jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+			  dst_buffer[offset_y],
+			  compptr->width_in_blocks);
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_wipe (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	 jvirt_barray_ptr *src_coef_arrays,
+	 JDIMENSION drop_width, JDIMENSION drop_height)
+/* Wipe - drop content of specified area, fill with zero (neutral gray) */
+{
+  JDIMENSION comp_width, comp_height;
+  JDIMENSION blk_y, x_wipe_blocks, y_wipe_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = drop_width * compptr->h_samp_factor;
+    comp_height = drop_height * compptr->v_samp_factor;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (blk_y = 0; blk_y < comp_height; blk_y += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y + y_wipe_blocks,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	FMEMZERO(buffer[offset_y] + x_wipe_blocks,
+		 comp_width * SIZEOF(JBLOCK));
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
 do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
 		   JDIMENSION x_crop_offset,
 		   jvirt_barray_ptr *src_coef_arrays)
@@ -888,7 +998,8 @@
 
   /* Determine number of components in output image */
   if (info->force_grayscale &&
-      srcinfo->jpeg_color_space == JCS_YCbCr &&
+      (srcinfo->jpeg_color_space == JCS_YCbCr ||
+       srcinfo->jpeg_color_space == JCS_BG_YCC) &&
       srcinfo->num_components == 3)
     /* We'll only process the first component */
     info->num_components = 1;
@@ -965,39 +1076,81 @@
       info->crop_xoffset = 0;	/* default to +0 */
     if (info->crop_yoffset_set == JCROP_UNSET)
       info->crop_yoffset = 0;	/* default to +0 */
-    if (info->crop_xoffset >= info->output_width ||
-	info->crop_yoffset >= info->output_height)
-      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-    if (info->crop_width_set == JCROP_UNSET)
+    if (info->crop_width_set == JCROP_UNSET) {
+      if (info->crop_xoffset >= info->output_width)
+	ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
       info->crop_width = info->output_width - info->crop_xoffset;
-    if (info->crop_height_set == JCROP_UNSET)
+    } else {
+      /* Check for crop extension */
+      if (info->crop_width > info->output_width) {
+	/* Crop extension does not work when transforming! */
+	if (info->transform != JXFORM_NONE ||
+	    info->crop_xoffset >= info->crop_width ||
+	    info->crop_xoffset > info->crop_width - info->output_width)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+	if (info->crop_xoffset >= info->output_width ||
+	    info->crop_width <= 0 ||
+	    info->crop_xoffset > info->output_width - info->crop_width)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
+    if (info->crop_height_set == JCROP_UNSET) {
+      if (info->crop_yoffset >= info->output_height)
+	ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
       info->crop_height = info->output_height - info->crop_yoffset;
-    /* Ensure parameters are valid */
-    if (info->crop_width <= 0 || info->crop_width > info->output_width ||
-	info->crop_height <= 0 || info->crop_height > info->output_height ||
-	info->crop_xoffset > info->output_width - info->crop_width ||
-	info->crop_yoffset > info->output_height - info->crop_height)
-      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+    } else {
+      /* Check for crop extension */
+      if (info->crop_height > info->output_height) {
+	/* Crop extension does not work when transforming! */
+	if (info->transform != JXFORM_NONE ||
+	    info->crop_yoffset >= info->crop_height ||
+	    info->crop_yoffset > info->crop_height - info->output_height)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+	if (info->crop_yoffset >= info->output_height ||
+	    info->crop_height <= 0 ||
+	    info->crop_yoffset > info->output_height - info->crop_height)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
     /* Convert negative crop offsets into regular offsets */
-    if (info->crop_xoffset_set == JCROP_NEG)
-      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
-    else
+    if (info->crop_xoffset_set != JCROP_NEG)
       xoffset = info->crop_xoffset;
-    if (info->crop_yoffset_set == JCROP_NEG)
-      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
+    else if (info->crop_width > info->output_width) /* crop extension */
+      xoffset = info->crop_width - info->output_width - info->crop_xoffset;
     else
+      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
+    if (info->crop_yoffset_set != JCROP_NEG)
       yoffset = info->crop_yoffset;
+    else if (info->crop_height > info->output_height) /* crop extension */
+      yoffset = info->crop_height - info->output_height - info->crop_yoffset;
+    else
+      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
     /* Now adjust so that upper left corner falls at an iMCU boundary */
-    if (info->crop_width_set == JCROP_FORCE)
-      info->output_width = info->crop_width;
-    else
-      info->output_width =
-        info->crop_width + (xoffset % info->iMCU_sample_width);
-    if (info->crop_height_set == JCROP_FORCE)
-      info->output_height = info->crop_height;
-    else
-      info->output_height =
-        info->crop_height + (yoffset % info->iMCU_sample_height);
+    if (info->transform == JXFORM_WIPE) {
+      /* Ensure the effective wipe region will cover the requested */
+      info->drop_width = (JDIMENSION) jdiv_round_up
+	((long) (info->crop_width + (xoffset % info->iMCU_sample_width)),
+	 (long) info->iMCU_sample_width);
+      info->drop_height = (JDIMENSION) jdiv_round_up
+	((long) (info->crop_height + (yoffset % info->iMCU_sample_height)),
+	 (long) info->iMCU_sample_height);
+    } else {
+      /* Ensure the effective crop region will cover the requested */
+      if (info->crop_width_set == JCROP_FORCE ||
+	  info->crop_width > info->output_width)
+	info->output_width = info->crop_width;
+      else
+	info->output_width =
+	  info->crop_width + (xoffset % info->iMCU_sample_width);
+      if (info->crop_height_set == JCROP_FORCE ||
+	  info->crop_height > info->output_height)
+	info->output_height = info->crop_height;
+      else
+	info->output_height =
+	  info->crop_height + (yoffset % info->iMCU_sample_height);
+    }
     /* Save x/y offsets measured in iMCUs */
     info->x_crop_offset = xoffset / info->iMCU_sample_width;
     info->y_crop_offset = yoffset / info->iMCU_sample_height;
@@ -1013,7 +1166,9 @@
   transpose_it = FALSE;
   switch (info->transform) {
   case JXFORM_NONE:
-    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0 ||
+	info->output_width > srcinfo->output_width ||
+	info->output_height > srcinfo->output_height)
       need_workspace = TRUE;
     /* No workspace needed if neither cropping nor transforming */
     break;
@@ -1067,6 +1222,8 @@
     need_workspace = TRUE;
     transpose_it = TRUE;
     break;
+  case JXFORM_WIPE:
+    break;
   }
 
   /* Allocate workspace if needed.
@@ -1327,12 +1484,13 @@
 {
   /* If force-to-grayscale is requested, adjust destination parameters */
   if (info->force_grayscale) {
-    /* First, ensure we have YCbCr or grayscale data, and that the source's
+    /* First, ensure we have YCC or grayscale data, and that the source's
      * Y channel is full resolution.  (No reasonable person would make Y
      * be less than full resolution, so actually coping with that case
      * isn't worth extra code space.  But we check it to avoid crashing.)
      */
-    if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
+    if ((((dstinfo->jpeg_color_space == JCS_YCbCr ||
+	   dstinfo->jpeg_color_space == JCS_BG_YCC) &&
 	  dstinfo->num_components == 3) ||
 	 (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
 	  dstinfo->num_components == 1)) &&
@@ -1427,7 +1585,11 @@
    */
   switch (info->transform) {
   case JXFORM_NONE:
-    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+    if (info->output_width > srcinfo->output_width ||
+	info->output_height > srcinfo->output_height)
+      do_crop_ext(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+		  src_coef_arrays, dst_coef_arrays);
+    else if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
       do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
 	      src_coef_arrays, dst_coef_arrays);
     break;
@@ -1463,6 +1625,10 @@
     do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
 	       src_coef_arrays, dst_coef_arrays);
     break;
+  case JXFORM_WIPE:
+    do_wipe(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+	    src_coef_arrays, info->drop_width, info->drop_height);
+    break;
   }
 }
 
diff --git a/transupp.h b/transupp.h
index 9aa0af3..eee6931 100644
--- a/transupp.h
+++ b/transupp.h
@@ -1,7 +1,7 @@
 /*
  * transupp.h
  *
- * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1997-2013, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -51,14 +51,17 @@
  *
  * We also offer a lossless-crop option, which discards data outside a given
  * image region but losslessly preserves what is inside.  Like the rotate and
- * flip transforms, lossless crop is restricted by the JPEG format: the upper
- * left corner of the selected region must fall on an iMCU boundary.  If this
- * does not hold for the given crop parameters, we silently move the upper left
- * corner up and/or left to make it so, simultaneously increasing the region
- * dimensions to keep the lower right crop corner unchanged.  (Thus, the
+ * flip transforms, lossless crop is restricted by the current JPEG format: the
+ * upper left corner of the selected region must fall on an iMCU boundary.  If
+ * this does not hold for the given crop parameters, we silently move the upper
+ * left corner up and/or left to make it so, simultaneously increasing the
+ * region dimensions to keep the lower right crop corner unchanged.  (Thus, the
  * output image covers at least the requested region, but may cover more.)
  * The adjustment of the region dimensions may be optionally disabled.
  *
+ * A complementary lossless-wipe option is provided to discard (gray out) data
+ * inside a given image region while losslessly preserving what is outside.
+ *
  * We also provide a lossless-resize option, which is kind of a lossless-crop
  * operation in the DCT coefficient block domain - it discards higher-order
  * coefficients and losslessly preserves lower-order coefficients of a
@@ -102,7 +105,8 @@
 	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
 	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
 	JXFORM_ROT_180,		/* 180-degree rotation */
-	JXFORM_ROT_270		/* 270-degree clockwise (or 90 ccw) */
+	JXFORM_ROT_270,		/* 270-degree clockwise (or 90 ccw) */
+	JXFORM_WIPE		/* wipe */
 } JXFORM_CODE;
 
 /*
@@ -130,7 +134,7 @@
   boolean perfect;		/* if TRUE, fail if partial MCUs are requested */
   boolean trim;			/* if TRUE, trim partial MCUs as needed */
   boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
-  boolean crop;			/* if TRUE, crop source image */
+  boolean crop;			/* if TRUE, crop or wipe source image */
 
   /* Crop parameters: application need not set these unless crop is TRUE.
    * These can be filled in by jtransform_parse_crop_spec().
@@ -151,6 +155,8 @@
   JDIMENSION output_height;
   JDIMENSION x_crop_offset;	/* destination crop offsets measured in iMCUs */
   JDIMENSION y_crop_offset;
+  JDIMENSION drop_width;	/* drop/wipe dimensions measured in iMCUs */
+  JDIMENSION drop_height;
   int iMCU_sample_width;	/* destination iMCU size */
   int iMCU_sample_height;
 } jpeg_transform_info;
diff --git a/usage.txt b/usage.txt
index 88158d6..75140e5 100644
--- a/usage.txt
+++ b/usage.txt
@@ -158,10 +158,10 @@
 
 Switches for advanced users:
 
-	-arithmetic	Use arithmetic coding.  CAUTION: arithmetic coded JPEG
-			is not yet widely implemented, so many decoders will
-			be unable to view an arithmetic coded JPEG file at
-			all.
+	-arithmetic	Use arithmetic coding.
+			CAUTION: arithmetic coded JPEG is not yet widely
+			implemented, so many decoders will be unable to
+			view an arithmetic coded JPEG file at all.
 
 	-block N	Set DCT block size.  All N from 1 to 16 are possible.
 			Default is 8 (baseline format).
@@ -188,6 +188,25 @@
 			so many decoders will be unable to view a reversible
 			color transformed JPEG file at all.
 
+	-bgycc		Create big gamut YCC JPEG file.
+			In this type of encoding the color difference
+			components are quantized further by a factor of 2
+			compared to the normal Cb/Cr values, thus creating
+			space to allow larger color values with higher
+			saturation than the normal gamut limits to be encoded.
+			In order to compensate for the loss of color fidelity
+			compared to a normal YCC encoded file, the color
+			quantization tables can be adjusted accordingly.
+			For example, cjpeg -bgycc -quality 80,90 will give
+			similar results as cjpeg -quality 80.
+			CAUTION: For correct decompression a decoder with big
+			gamut YCC support (JFIF version 2) is required.
+			An old decoder may or may not display a big gamut YCC
+			encoded JPEG file, depending on JFIF version check
+			and corresponding warning/error configuration.
+			In case of a granted decompression the old decoder
+			will display the image with half saturated colors.
+
 	-dct int	Use integer DCT method (default).
 	-dct fast	Use fast integer DCT (less accurate).
 	-dct float	Use floating-point DCT method.
@@ -387,7 +406,8 @@
 is often a lot more than it is on larger files.  (At present, -optimize
 mode is always selected when generating progressive JPEG files.)
 
-GIF input files are no longer supported, to avoid the Unisys LZW patent.
+GIF input files are no longer supported, to avoid the Unisys LZW patent
+(now expired).
 (Conversion of GIF files to JPEG is usually a bad idea anyway.)
 
 
@@ -415,8 +435,9 @@
 decompress, with some loss of image quality, by specifying -onepass for
 one-pass quantization.
 
-To avoid the Unisys LZW patent, djpeg produces uncompressed GIF files.  These
-are larger than they should be, but are readable by standard GIF decoders.
+To avoid the Unisys LZW patent (now expired), djpeg produces uncompressed GIF
+files.  These are larger than they should be, but are readable by standard GIF
+decoders.
 
 
 HINTS FOR BOTH PROGRAMS
@@ -533,14 +554,20 @@
 flip transforms, lossless crop is restricted by the current JPEG format: the
 upper left corner of the selected region must fall on an iMCU boundary.  If
 this does not hold for the given crop parameters, we silently move the upper
-left corner up and/or left to make it so, simultaneously increasing the region
-dimensions to keep the lower right crop corner unchanged.  (Thus, the output
-image covers at least the requested region, but may cover more.)
+left corner up and/or left to make it so, simultaneously increasing the
+region dimensions to keep the lower right crop corner unchanged.  (Thus, the
+output image covers at least the requested region, but may cover more.)
+The adjustment of the region dimensions may be optionally disabled.
 
 The image can be losslessly cropped by giving the switch:
 	-crop WxH+X+Y	Crop to a rectangular subarea of width W, height H
 			starting at point X,Y.
 
+A complementary lossless-wipe option is provided to discard (gray out) data
+inside a given image region while losslessly preserving what is outside:
+	-wipe WxH+X+Y	Wipe (gray out) a rectangular subarea of
+			width W, height H starting at point X,Y.
+
 Other not-strictly-lossless transformation switches are:
 
 	-grayscale	Force grayscale output.
