Update x86[-64] assembler recommendations
NASM 2.11.08 has a bug that prevents it from properly assembling a
macho64 version of libjpeg-turbo (the resulting binary generates corrupt
images.)  2.11.09 works properly.  YASM also works properly and has been
a supported alternative since libjpeg-turbo 1.2.
diff --git a/BUILDING.txt b/BUILDING.txt
index 97bd521..169a1be 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -1,5 +1,5 @@
 *******************************************************************************
-**     Building on Unix Platforms (including Cygwin)
+**     Building on Un*x Platforms (including Cygwin and OS X)
 *******************************************************************************
 
 
@@ -10,6 +10,9 @@
 -- autoconf 2.56 or later
 -- automake 1.7 or later
 -- libtool 1.4 or later
+   * If using Xcode 4.3 or later on OS X, autoconf and automake are no longer
+     provided.  The easiest way to obtain them is from MacPorts
+     (http://www.macports.org/).
 
 -- NASM or YASM (if building x86 or x86-64 SIMD extensions)
    * NASM 0.98, or 2.01 or later is required for an x86 build (0.99 and 2.00 do
@@ -36,14 +39,26 @@
    NOTE: the NASM build will fail if texinfo is not installed.
 
 -- GCC v4.1 or later recommended for best performance
+   * Beginning with Xcode 4, Apple stopped distributing GCC and switched to
+     the LLVM compiler.  Xcode v4.0 through v4.6 provides a GCC front end
+     called LLVM-GCC.  Unfortunately, as of this writing, neither LLVM-GCC nor
+     the LLVM (clang) compiler produces optimal performance with libjpeg-turbo.
+     Building libjpeg-turbo with LLVM-GCC v4.2 results in a 10% performance
+     degradation when compressing using 64-bit code, relative to building
+     libjpeg-turbo with GCC v4.2.  Building libjpeg-turbo with LLVM (clang)
+     results in a 20% performance degradation when compressing using 64-bit
+     code, relative to building libjpeg-turbo with GCC v4.2.  If you are
+     running Snow Leopard or earlier, it is suggested that you continue to use
+     Xcode v3.2.6, which provides GCC v4.2.  If you are using Lion or later, it
+     is suggested that you install Apple GCC v4.2 through MacPorts.
 
--- If building the TurboJPEG/OSS Java wrapper, JDK or OpenJDK 1.5 or later is
+-- If building the TurboJPEG Java wrapper, JDK or OpenJDK 1.5 or later is
    required.  Some systems, such as OS X 10.4, Solaris 10 and later, and Red
    Hat Enterprise Linux 5 and later, have this pre-installed.  On OS X 10.5 and
    later, it will be necessary to install the Java Developer Package, which can
-   be downloaded from http://connect.apple.com.  For systems that do not have a
-   JDK installed, you can obtain the Oracle Java Development Kit from
-   http://www.java.com.
+   be downloaded from http://developer.apple.com/downloads (Apple ID required.)
+   For systems that do not have a JDK installed, you can obtain the Oracle Java
+   Development Kit from http://www.java.com.
 
 
 ==================
@@ -64,9 +79,9 @@
 Building libjpeg-turbo
 ======================
 
-The following procedure will build libjpeg-turbo on Linux, FreeBSD, 32-bit
-OS X, Cygwin, and Solaris/x86 systems (on Solaris, this generates a 32-bit
-library.  See below for 64-bit build instructions.)
+The following procedure will build libjpeg-turbo on Linux, FreeBSD, Cygwin, and
+Solaris/x86 systems (on Solaris, this generates a 32-bit library.  See below
+for 64-bit build instructions.)
 
   cd {source_directory}
   autoreconf -fiv
@@ -74,40 +89,44 @@
   sh {source_directory}/configure [additional configure flags]
   make
 
-NOTE: Running autoreconf in the source directory is only necessary if building
-libjpeg-turbo from the SVN repository.
+NOTE: Running autoreconf in the source directory is usually only necessary if
+building libjpeg-turbo from the SVN repository.
 
 This will generate the following files under .libs/
 
   libjpeg.a
-      Static link library for libjpeg-turbo
+      Static link library for the libjpeg API
 
-  libjpeg.so.{version} (Linux, Solaris)
+  libjpeg.so.{version} (Linux, Unix)
   libjpeg.{version}.dylib (OS X)
   cygjpeg-{version}.dll (Cygwin)
-      Shared library for libjpeg-turbo
+      Shared library for the libjpeg API
 
-  libjpeg.so (Linux, Solaris)
+  By default, {version} is 62.1.0, 7.1.0, or 8.0.2, depending on whether
+  libjpeg v6b (default), v7, or v8 emulation is enabled.  If using Cygwin,
+  {version} is 62, 7, or 8.
+
+  libjpeg.so (Linux, Unix)
   libjpeg.dylib (OS X)
+      Development symlink for the libjpeg API
+
   libjpeg.dll.a (Cygwin)
-      Development stub for libjpeg-turbo shared library
+      Import library for the libjpeg API
 
   libturbojpeg.a
-      Static link library for TurboJPEG/OSS
+      Static link library for the TurboJPEG API
 
-  libturbojpeg.so (Linux, Solaris)
+  libturbojpeg.so.0.0.0 (Linux, Unix)
+  libturbojpeg.0.0.0.dylib (OS X)
+  cygturbojpeg-0.dll (Cygwin)
+      Shared library for the TurboJPEG API
+
+  libturbojpeg.so (Linux, Unix)
   libturbojpeg.dylib (OS X)
-      Shared library and development stub for TurboJPEG/OSS
-
-  cygturbojpeg.dll (Cygwin)
-      Shared library for TurboJPEG/OSS
+      Development symlink for the TurboJPEG API
 
   libturbojpeg.dll.a (Cygwin)
-      Development stub for TurboJPEG/OSS shared library
-
-{version} is 62.0.0, 7.0.0, or 8.0.2, depending on whether libjpeg v6b
-(default), v7, or v8 emulation is enabled.  If using Cygwin, {version} is
-62, 7, or 8.
+      Import library for the TurboJPEG API
 
 
 libjpeg v7 or v8 API/ABI Emulation
@@ -120,6 +139,16 @@
 on libjpeg v7 and v8 emulation.
 
 
+In-Memory Source/Destination Managers
+-------------------------------------
+
+When using libjpeg v6b or v7 API/ABI emulation, add --without-mem-srcdst to the
+configure command line to build a version of libjpeg-turbo that lacks the
+jpeg_mem_src() and jpeg_mem_dest() functions.  These functions were not part of
+the original libjpeg v6b and v7 APIs, so removing them ensures strict
+conformance with those APIs.  See README-turbo.txt for more information.
+
+
 Arithmetic Coding Support
 -------------------------
 
@@ -132,13 +161,12 @@
 disable encoding or decoding (respectively.)
 
 
-TurboJPEG/OSS Java Wrapper
---------------------------
+TurboJPEG Java Wrapper
+----------------------
 Add --with-java to the configure command line to incorporate an optional Java
-Native Interface wrapper into the TurboJPEG/OSS dynamic library and build the
-Java front-end classes to support it.  This allows the TurboJPEG/OSS dynamic
-library to be used directly from Java applications.  See java/README for more
-details.
+Native Interface wrapper into the TurboJPEG shared library and build the Java
+front-end classes to support it.  This allows the TurboJPEG shared library to
+be used directly from Java applications.  See java/README for more details.
 
 You can set the JAVAC, JAR, and JAVA configure variables to specify
 alternate commands for javac, jar, and java (respectively.)  You can also
@@ -164,13 +192,14 @@
 will install the header files in /usr/local/include and the library files in
 /usr/local/lib64.  If 'prefix' and 'libdir' are not specified, then the default
 is to install the header files in /opt/libjpeg-turbo/include and the library
-files in /opt/libjpeg-turbo/lib.
+files in /opt/libjpeg-turbo/lib32 (32-bit) or /opt/libjpeg-turbo/lib64
+(64-bit.)
 
 NOTE: You can specify a prefix of /usr and a libdir of, for instance,
 /usr/lib64 to overwrite the system's version of libjpeg.  If you do this,
 however, then be sure to BACK UP YOUR SYSTEM'S INSTALLATION OF LIBJPEG before
 overwriting it.  It is recommended that you instead install libjpeg-turbo into
-a non-system directory and manipulate the LD_LIBRARY_PATH or create sym links
+a non-system directory and manipulate the LD_LIBRARY_PATH or create symlinks
 to force applications to use libjpeg-turbo instead of libjpeg.  See
 README-turbo.txt for more information.
 
@@ -180,8 +209,8 @@
 =============
 
 
-32-bit Library Build on 64-bit Linux
-------------------------------------
+32-bit Build on 64-bit Linux
+----------------------------
 
 Add
 
@@ -190,8 +219,8 @@
 to the configure command line.
 
 
-64-bit Library Build on 64-bit OS X
------------------------------------
+64-bit Build on 64-bit OS X
+---------------------------
 
 Add
 
@@ -201,8 +230,8 @@
 installed.
 
 
-32-bit Library Build on 64-bit OS X
------------------------------------
+32-bit Build on 64-bit OS X
+---------------------------
 
 Add
 
@@ -211,8 +240,8 @@
 to the configure command line.
 
 
-64-bit Backward-Compatible Library Build on 64-bit OS X
--------------------------------------------------------
+64-bit Backward-Compatible Build on 64-bit OS X
+-----------------------------------------------
 
 Add
 
@@ -226,8 +255,8 @@
 MacPorts, must be installed.
 
 
-32-bit Backward-Compatible Library Build on OS X
-------------------------------------------------
+32-bit Backward-Compatible Build on OS X
+----------------------------------------
 
 Add
 
@@ -250,8 +279,8 @@
 to the configure command line.
 
 
-32-bit Library Build on 64-bit FreeBSD
---------------------------------------
+32-bit Build on 64-bit FreeBSD
+------------------------------
 
 Add
 
@@ -293,34 +322,57 @@
 JPEG compression/decompression by approximately 2-4x on ARMv7 and later
 platforms.  If libjpeg-turbo is configured on an ARM Linux platform, then the
 build system will automatically include the NEON SIMD routines, if they are
-supported.
+supported.  Build instructions for other ARM-based platforms follow.
 
 
 Building libjpeg-turbo for iOS
 ------------------------------
 
-iOS platforms, such as the iPhone and iPad, also use ARM processors, some of
-which support NEON instructions.  Additional steps are required to build
-libjpeg-turbo for these platforms.  The steps below assume iOS SDK v4.3.  If
-you are using a different SDK version, then you will need to modify the
-examples accordingly.
+iOS platforms, such as the iPhone and iPad, use ARM processors, some of which
+support NEON instructions.  Additional steps are required in order to build
+libjpeg-turbo for these platforms.
 
 Additional build requirements:
 
-  gas-preprocessor.pl (https://github.com/yuvi/gas-preprocessor) should be
-  installed in your PATH.
+  gas-preprocessor.pl
+  (https://sourceforge.net/p/libjpeg-turbo/code/HEAD/tree/gas-preprocessor)
+  should be installed in your PATH.
 
 Set the following shell variables for simplicity:
 
-  IOS_PLATFORMDIR="/Developer/Platforms/iPhoneOS.platform"
-  IOS_SYSROOT="$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS4.3.sdk"
-  IOS_GCC="$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2"
+  Xcode 4.2 and earlier:
+  IOS_PLATFORMDIR=/Developer/Platforms/iPhoneOS.platform
+  Xcode 4.3 and later:
+  IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
+
+  IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
+
+  Xcode 4.6.x and earlier:
+  IOS_GCC=$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2
+  Xcode 5.0.x and later:
+  IOS_GCC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
+
+  Xcode 5.1.x / iOS 7.0 SDK:
+  IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
+  IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS7.1.sdk
+  IOS_GCC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
 
   ARM v6 only (up to and including iPhone 3G):
+  [NOTE: Requires Xcode 4.4.x or earlier]
   IOS_CFLAGS="-march=armv6 -mcpu=arm1176jzf-s -mfpu=vfp"
 
-  ARM v7 only (iPhone 3GS and newer, iPad):
+  ARM v7 only (iPhone 3GS-4S, iPad 1st-3rd Generation):
+  Xcode 4.6.x and earlier:
   IOS_CFLAGS="-march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon"
+  Xcode 5.0.x and later:
+  IOS_CFLAGS="-arch armv7"
+
+  ARM v7s only (iPhone 5, iPad 4th Generation):
+  [NOTE: Requires Xcode 4.5 or later]
+  Xcode 4.6.x and earlier:
+  IOS_CFLAGS="-march=armv7s -mcpu=swift -mtune=swift -mfpu=neon"
+  Xcode 5.0.x and later:
+  IOS_CFLAGS="-arch armv7s"
 
 Follow the procedure under "Building libjpeg-turbo" above, adding
 
@@ -329,10 +381,63 @@
     CFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT -O3 $IOS_CFLAGS" \
     LDFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT $IOS_CFLAGS"
 
+to the configure command line.  If using Xcode 5.0.x or later, also add
+
+    CCASFLAGS="-no-integrated-as $IOS_CFLAGS"
+
 to the configure command line.
 
-Once built, lipo can be used to combine the ARM v6 and v7 variants into a
-universal library.
+NOTE:  You can also add -miphoneos-version-min={version} to $IOS_CFLAGS above
+in order to support older versions of iOS than the default version supported by
+the SDK.
+
+Once built, lipo can be used to combine the ARM v6, v7, and/or v7s variants
+into a universal library.
+
+NOTE: If you are building libjpeg-turbo from the "official" project tarball,
+then it is highly likely that you will need to run 'autoreconf -fiv' in the
+source tree prior to building ARM v7 or v7s iOS binaries using the techniques
+described above.  Otherwise, you may get a libtool error such as "unable to
+infer tagged configuration."
+
+
+Building libjpeg-turbo for Android
+----------------------------------
+
+Building libjpeg-turbo for Android platforms requires the Android NDK
+(https://developer.android.com/tools/sdk/ndk) and autotools.  The following is
+a general recipe script that can be modified for your specific needs.
+
+  # Set these variables to suit your needs
+  NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/ndk}
+  BUILD_PLATFORM={the platform name for the NDK package you installed--
+    for example, "windows-x86" or "linux-x86_64"}
+  TOOLCHAIN_VERSION={"4.6", "4.8", etc.  This corresponds to a toolchain
+    directory under ${NDK_PATH}/toolchains/.}
+  ANDROID_VERSION={The minimum version of Android to support-- for example,
+    "9", "19", etc.}
+
+  HOST=arm-linux-androideabi
+  TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
+  SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm
+  ANDROID_INCLUDES="-I${SYSROOT}/usr/include -I${TOOLCHAIN}/include"
+  ANDROID_CFLAGS="-march=armv7-a -mfloat-abi=softfp -fprefetch-loop-arrays \
+    -fstrict-aliasing --sysroot=${SYSROOT}"
+  export CPP=${TOOLCHAIN}/bin/${HOST}-cpp
+  export AR=${TOOLCHAIN}/bin/${HOST}-ar
+  export AS=${TOOLCHAIN}/bin/${HOST}-as
+  export NM=${TOOLCHAIN}/bin/${HOST}-nm
+  export CC=${TOOLCHAIN}/bin/${HOST}-gcc
+  export LD=${TOOLCHAIN}/bin/${HOST}-ld
+  export RANLIB=${TOOLCHAIN}/bin/${HOST}-ranlib
+  export OBJDUMP=${TOOLCHAIN}/bin/${HOST}-objdump
+  export STRIP=${TOOLCHAIN}/bin/${HOST}-strip
+  cd {build_directory}
+  sh {source_directory}/configure --host=${HOST} \
+    CFLAGS="${ANDROID_INCLUDES} ${ANDROID_CFLAGS} -O3" \
+    CPPFLAGS="${ANDROID_INCLUDES} ${ANDROID_CFLAGS}" \
+    LDFLAGS="${ANDROID_CFLAGS}" --with-simd ${1+"$@"}
+  make
 
 
 *******************************************************************************
@@ -344,7 +449,7 @@
 Build Requirements
 ==================
 
--- CMake (http://www.cmake.org) v2.6 or later
+-- CMake (http://www.cmake.org) v2.8.8 or later
 
 -- Microsoft Visual C++ 2005 or later
 
@@ -356,8 +461,9 @@
    The Windows SDK includes both 32-bit and 64-bit Visual C++ compilers and
    everything necessary to build libjpeg-turbo.
 
-   * For 32-bit builds, you can also use Microsoft Visual C++ Express
-     Edition.  Visual C++ Express Edition is a free download.
+   * You can also use Microsoft Visual Studio Express Edition, which is a free
+     download.  (NOTE: versions prior to 2012 can only be used to build 32-bit
+     code.)
    * If you intend to build libjpeg-turbo from the command line, then add the
      appropriate compiler and SDK directories to the INCLUDE, LIB, and PATH
      environment variables.  This is generally accomplished by executing
@@ -371,13 +477,17 @@
 
 -- MinGW
 
-   GCC v4.1 or later recommended for best performance
+   MinGW-builds (http://sourceforge.net/projects/mingwbuilds/) or
+   tdm-gcc (http://tdm-gcc.tdragon.net/) recommended if building on a Windows
+   machine.  Both distributions install a Start Menu link that can be used to
+   launch a command prompt with the appropriate compiler paths automatically
+   set.
 
 -- NASM (http://www.nasm.us/) 0.98 or later (NASM 2.05 or later is required for
    a 64-bit build)
 
--- If building the TurboJPEG/OSS Java wrapper, JDK 1.5 or later is required.
-   This can be downloaded from http://www.java.com.
+-- If building the TurboJPEG Java wrapper, JDK 1.5 or later is required.  This
+   can be downloaded from http://www.java.com.
 
 
 ==================
@@ -412,17 +522,17 @@
 The following files will be generated under {build_directory}:
 
   jpeg-static.lib
-      Static link library for libjpeg-turbo
+      Static link library for the libjpeg API
   sharedlib/jpeg{version}.dll
-      DLL for libjpeg-turbo
+      DLL for the libjpeg API
   sharedlib/jpeg.lib
-      Import library for libjpeg-turbo DLL
+      Import library for the libjpeg API
   turbojpeg-static.lib
-      Static link library for TurboJPEG/OSS
+      Static link library for the TurboJPEG API
   turbojpeg.dll
-      DLL for TurboJPEG/OSS
+      DLL for the TurboJPEG API
   turbojpeg.lib
-      Import library for TurboJPEG/OSS DLL
+      Import library for the TurboJPEG API
 
 {version} is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or
 v8 emulation is enabled.
@@ -436,7 +546,12 @@
 instance:
 
   cd {build_directory}
-  cmake -G "Visual Studio 9 2008" {source_directory}
+  cmake -G "Visual Studio 10" {source_directory}
+
+NOTE:  Add "Win64" to the generator name (for example, "Visual Studio 10
+Win64") to build a 64-bit version of libjpeg-turbo.  Recent versions of CMake
+no longer document that.  A separate build directory must be used for 32-bit
+and 64-bit builds.
 
 You can then open ALL_BUILD.vcproj in Visual Studio and build one of the
 configurations in that project ("Debug", "Release", etc.) to generate a full
@@ -445,17 +560,17 @@
 This will generate the following files under {build_directory}:
 
   {configuration}/jpeg-static.lib
-      Static link library for libjpeg-turbo
+      Static link library for the libjpeg API
   sharedlib/{configuration}/jpeg{version}.dll
-      DLL for libjpeg-turbo
+      DLL for the libjpeg API
   sharedlib/{configuration}/jpeg.lib
-      Import library for libjpeg-turbo DLL
+      Import library for the libjpeg API
   {configuration}/turbojpeg-static.lib
-      Static link library for TurboJPEG/OSS
+      Static link library for the TurboJPEG API
   {configuration}/turbojpeg.dll
-      DLL for TurboJPEG/OSS
+      DLL for the TurboJPEG API
   {configuration}/turbojpeg.lib
-      Import library for TurboJPEG/OSS DLL
+      Import library for the TurboJPEG API
 
 {configuration} is Debug, Release, RelWithDebInfo, or MinSizeRel, depending on
 the configuration you built in the IDE, and {version} is 62, 7, or 8,
@@ -465,24 +580,27 @@
 MinGW
 -----
 
+NOTE: This assumes that you are building on a Windows machine.  If you are
+cross-compiling on a Linux/Unix machine, then see "Build Recipes" below.
+
   cd {build_directory}
-  cmake -G "MSYS Makefiles" {source_directory}
-  make
+  cmake -G "MinGW Makefiles" {source_directory}
+  mingw32-make
 
 This will generate the following files under {build_directory}
 
   libjpeg.a
-      Static link library for libjpeg-turbo
+      Static link library for the libjpeg API
   sharedlib/libjpeg-{version}.dll
-      DLL for libjpeg-turbo
+      DLL for the libjpeg API
   sharedlib/libjpeg.dll.a
-      Import library for libjpeg-turbo DLL
+      Import library for the libjpeg API
   libturbojpeg.a
-      Static link library for TurboJPEG/OSS
+      Static link library for the TurboJPEG API
   libturbojpeg.dll
-      DLL for TurboJPEG/OSS
+      DLL for the TurboJPEG API
   libturbojpeg.dll.a
-      Import library for TurboJPEG/OSS DLL
+      Import library for the TurboJPEG API
 
 {version} is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or
 v8 emulation is enabled.
@@ -506,6 +624,16 @@
 on libjpeg v7 and v8 emulation.
 
 
+In-Memory Source/Destination Managers
+-------------------------------------
+
+When using libjpeg v6b or v7 API/ABI emulation, add -DWITH_MEM_SRCDST=0 to the
+CMake command line to build a version of libjpeg-turbo that lacks the
+jpeg_mem_src() and jpeg_mem_dest() functions.  These functions were not part of
+the original libjpeg v6b and v7 APIs, so removing them ensures strict
+conformance with those APIs.  See README-turbo.txt for more information.
+
+
 Arithmetic Coding Support
 -------------------------
 
@@ -518,13 +646,12 @@
 disable encoding or decoding (respectively.)
 
 
-TurboJPEG/OSS Java Wrapper
---------------------------
+TurboJPEG Java Wrapper
+----------------------
 Add "-DWITH_JAVA=1" to the cmake command line to incorporate an optional Java
-Native Interface wrapper into the TurboJPEG/OSS dynamic library and build the
-Java front-end classes to support it.  This allows the TurboJPEG/OSS dynamic
-library to be used directly from Java applications.  See java/README for more
-details.
+Native Interface wrapper into the TurboJPEG shared library and build the Java
+front-end classes to support it.  This allows the TurboJPEG shared library to
+be used directly from Java applications.  See java/README for more details.
 
 If you are using CMake 2.8, you can set the Java_JAVAC_EXECUTABLE,
 Java_JAVA_EXECUTABLE, and Java_JAR_EXECUTABLE CMake variables to specify
@@ -567,8 +694,8 @@
   cd {build_directory}
   CC=/usr/bin/x86_64-w64-mingw32-gcc \
     cmake -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=Windows \
-    -DCMAKE_AR=/usr/bin/x86_64-w64-mingw32-ar \
-    -DCMAKE_RANLIB=/usr/bin/x86_64-w64-mingw32-ranlib {source_directory}
+    -DCMAKE_RC_COMPILER=/usr/bin/x86_64-w64-mingw32-windres.exe \
+    {source_directory}
   make
 
 This produces a 64-bit build of libjpeg-turbo that does not depend on
@@ -582,8 +709,8 @@
   cd {build_directory}
   CC=/usr/bin/i686-w64-mingw32-gcc \
     cmake -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=Windows \
-    -DDCMAKE_AR=/usr/bin/i686-w64-mingw32-ar \
-    -DCMAKE_RANLIB=/usr/bin/i686-w64-mingw32-ranlib {source_directory}
+    -DCMAKE_RC_COMPILER=/usr/bin/i686-w64-mingw32-windres.exe \
+    {source_directory}
   make
 
 This produces a 32-bit build of libjpeg-turbo that does not depend on
@@ -591,21 +718,6 @@
 mingw64-i686-gcc-g++ packages (and their dependencies) must be installed.
 
 
-MinGW-w64 Build on Windows
---------------------------
-
-This produces a 64-bit build of libjpeg-turbo using the "native" MinGW-w64
-toolchain (which is faster than the Cygwin version):
-
-  cd {build_directory}
-  CC={mingw-w64_binary_path}/x86_64-w64-mingw32-gcc \
-    cmake -G "MSYS Makefiles" \
-    -DCMAKE_AR={mingw-w64_binary_path}/x86_64-w64-mingw32-ar \
-    -DCMAKE_RANLIB={mingw-w64_binary_path}/x86_64-w64-mingw32-ranlib \
-    {source_directory}
-  make
-
-
 MinGW Build on Linux
 --------------------
 
@@ -625,8 +737,8 @@
 The following commands can be used to create various types of release packages:
 
 
-Unix
-----
+Unix/Linux
+----------
 
 make rpm
 
@@ -645,6 +757,9 @@
 
   Create Macintosh package/disk image.  This requires the PackageMaker
   application, which must be installed in /Developer/Applications/Utilities.
+  Note that PackageMaker is not included in recent releases of Xcode, but it
+  can be obtained by downloading the "Auxiliary Tools for Xcode" package from
+  http://developer.apple.com/downloads.
 
 make udmg [BUILDDIR32={32-bit build directory}]
 
@@ -659,35 +774,24 @@
 make iosdmg [BUILDDIR32={32-bit build directory}] \
   [BUILDDIRARMV6={ARM v6 build directory}] \
   [BUILDDIRARMV7={ARM v7 build directory}] \
+  [BUILDDIRARMV7S={ARM v7s build directory}]
 
   On OS X systems, this creates a Macintosh package and disk image in which the
   libjpeg-turbo static libraries contain ARM architectures necessary to build
   iOS applications.  If building on an x86-64 system, the binaries will also
   contain the i386 architecture, as with 'make udmg' above.  You should first
-  configure ARM v6 and ARM v7 out-of-tree builds of libjpeg-turbo (see
-  "Building libjpeg-turbo for iOS" above.)  If you are building an x86-64
+  configure ARM v6, ARM v7, and/or ARM v7s out-of-tree builds of libjpeg-turbo
+  (see "Building libjpeg-turbo for iOS" above.)  If you are building an x86-64
   version of libjpeg-turbo, you should configure a 32-bit out-of-tree build as
   well.  Next, build libjpeg-turbo as you would normally, using an out-of-tree
   build.  When it is built, run 'make iosdmg' from the build directory.  The
   build system will look for the ARM v6 build under {source_directory}/iosarmv6
   by default, the ARM v7 build under {source_directory}/iosarmv7 by default,
-  and (if applicable) the 32-bit build under {source_directory}/osxx86 by
-  default, but you can override this by setting the BUILDDIR32, BUILDDIRARMV6,
-  and/or BUILDDIRARMV7 variables on the make command line as shown above.
-
-make sunpkg
-
-  Build a Solaris package.  This requires pkgmk, pkgtrans, and bzip2.
-
-make csunpkg [BUILDDIR32={32-bit build directory}]
-
-  On 64-bit Solaris systems, this creates a combined package that contains
-  both 32-bit and 64-bit libraries.  You should first configure a 32-bit
-  out-of-tree build of libjpeg-turbo, then configure a 64-bit out-of-tree
-  build, then run 'make csunpkg' from the 64-bit build directory.  The build
-  system will look for the 32-bit build under {source_directory}/solx86 by
-  default, but you can override this by setting the BUILDDIR32 variable on the
-  make command line as shown above.
+  the ARM v7s build under {source_directory}/iosarmv7s by default, and (if
+  applicable) the 32-bit build under {source_directory}/osxx86 by default, but
+  you can override this by setting the BUILDDIR32, BUILDDIRARMV6,
+  BUILDDIRARMV7, and/or BUILDDIRARMV7S variables on the make command line as
+  shown above.
 
 make cygwinpkg
 
@@ -736,11 +840,11 @@
 the 'testclean' target (if using the Visual Studio IDE) will clean up the
 output images generated by 'make test'.
 
-On Unix/Linux platforms, more extensive tests of the TurboJPEG/OSS C and Java
+On Unix/Linux platforms, more extensive tests of the TurboJPEG C and Java
 wrappers can be run by invoking 'make tjtest'.  These extended TurboJPEG tests
 essentially iterate through all of the available features of the TurboJPEG APIs
 that are not covered by the TurboJPEG unit tests (this includes the lossless
 transform options) and compare the images generated by each feature to images
 generated using the equivalent feature in the libjpeg API.  The extended
 TurboJPEG tests are meant to test for regressions in the TurboJPEG wrappers,
-not in the underlying libjpeg-turbo library.
+not in the underlying libjpeg API library.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8448eeb..91a92cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,12 +2,16 @@
 # Setup
 #
 
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.8.8)
+# Use LINK_INTERFACE_LIBRARIES instead of INTERFACE_LINK_LIBRARIES
+if(POLICY CMP0022)
+  cmake_policy(SET CMP0022 OLD)
+endif()
 
 project(libjpeg-turbo C)
-set(VERSION 1.2.2)
+set(VERSION 1.3.2)
 
-if(MINGW OR CYGWIN)
+if(CYGWIN OR NOT CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
   execute_process(COMMAND "date" "+%Y%m%d" OUTPUT_VARIABLE BUILD)
   string(REGEX REPLACE "\n" "" BUILD ${BUILD})
 elseif(WIN32)
@@ -45,7 +49,8 @@
 option(WITH_ARITH_DEC "Include arithmetic decoding support" TRUE)
 option(WITH_JPEG7 "Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b)" FALSE)
 option(WITH_JPEG8 "Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b)" FALSE)
-option(WITH_JAVA "Build Java wrapper for the TurboJPEG/OSS library" FALSE)
+option(WITH_MEM_SRCDST "Include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI" TRUE)
+option(WITH_JAVA "Build Java wrapper for the TurboJPEG library" FALSE)
 
 if(WITH_ARITH_ENC)
   set(C_ARITH_CODING_SUPPORTED 1)
@@ -62,14 +67,19 @@
 endif()
 
 if(WITH_JAVA)
-  message(STATUS "TurboJPEG/OSS Java wrapper enabled")
+  message(STATUS "TurboJPEG Java wrapper enabled")
 else()
-  message(STATUS "TurboJPEG/OSS Java wrapper disabled")
+  message(STATUS "TurboJPEG Java wrapper disabled")
+endif()
+
+set(SO_AGE 0)
+if(WITH_MEM_SRCDST)
+  set(SO_AGE 1)
 endif()
 
 set(JPEG_LIB_VERSION 62)
 set(DLL_VERSION ${JPEG_LIB_VERSION})
-set(FULLVERSION ${DLL_VERSION}.0.0)
+set(FULLVERSION ${DLL_VERSION}.${SO_AGE}.0)
 if(WITH_JPEG8)
   set(JPEG_LIB_VERSION 80)
   set(DLL_VERSION 8)
@@ -78,10 +88,17 @@
 elseif(WITH_JPEG7)
   set(JPEG_LIB_VERSION 70)
   set(DLL_VERSION 7)
-  set(FULLVERSION ${DLL_VERSION}.0.0)
+  set(FULLVERSION ${DLL_VERSION}.${SO_AGE}.0)
   message(STATUS "Emulating libjpeg v7 API/ABI")
 endif(WITH_JPEG8)
 
+if(WITH_MEM_SRCDST)
+  set(MEM_SRCDST_SUPPORTED 1)
+  message(STATUS "In-memory source/destination managers enabled")
+else()
+  message(STATUS "In-memory source/destination managers disabled")
+endif()
+
 if(MSVC)
   # Use the static C library for all build types
   foreach(var CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
@@ -109,8 +126,24 @@
   message(STATUS "32-bit build")
 endif()
 
+if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  if(MSVC)
+    set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_PROJECT_NAME})
+  else()
+    set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_PROJECT_NAME}-gcc)
+  endif()
+  if(64BIT)
+    set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_INSTALL_PREFIX_DEFAULT}64)
+  endif()
+  set(CMAKE_INSTALL_PREFIX "c:/${CMAKE_INSTALL_PREFIX_DEFAULT}" CACHE PATH
+    "Directory into which to install libjpeg-turbo (default: c:/${CMAKE_INSTALL_PREFIX_DEFAULT})"
+    FORCE)
+endif()
+
+message(STATUS "Install directory = ${CMAKE_INSTALL_PREFIX}")
+
 configure_file(win/jconfig.h.in jconfig.h)
-configure_file(win/config.h.in config.h)
+configure_file(win/jconfigint.h.in jconfigint.h)
 
 include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_SOURCE_DIR})
 
@@ -249,6 +282,40 @@
 endif()
 
 enable_testing()
+
+set(MD5_JPEG_INT 9a68f56bc76e466aa7e52f415d0f4a5f)
+set(MD5_JPEG_FAST 0e1502e7fa421835e376a314fac2a39f)
+set(MD5_JPEG_FAST_100 7bf72a8e741d64eecb960c97323af77c)
+set(MD5_JPEG_FLOAT d1623885ffafcd40c684af09e3d65cd5)
+set(MD5_JPEG_FLOAT_NOSIMD fb4884c35f8273f498cb32879de5c455)
+set(MD5_JPEG_INT_GRAY 72b51f894b8f4a10b3ee3066770aa38d)
+set(MD5_PPM_INT d1ed0d11f076b842525271647716aeb8)
+set(MD5_PPM_FAST 048298a2d2410261c0533cb97bcfef23)
+set(MD5_PPM_FLOAT 7f5b446ee36b2630e06785b8d42af15f)
+set(MD5_PPM_FLOAT_NOSIMD 64072f1dbdc5b3a187777788604971a5)
+set(MD5_PPM_INT_2_1 9f9de8c0612f8d06869b960b05abf9c9)
+set(MD5_PPM_INT_15_8 b6875bc070720b899566cc06459b63b7)
+set(MD5_PPM_INT_7_4 06a177eae05f164fac57f7a2c346ee87)
+set(MD5_PPM_INT_13_8 bc3452573c8152f6ae552939ee19f82f)
+set(MD5_PPM_INT_3_2 f5a8b88a8a7f96016f04d259cf82ed67)
+set(MD5_PPM_INT_11_8 d8cc73c0aaacd4556569b59437ba00a5)
+set(MD5_PPM_INT_5_4 32775dd9ad2ab90f4c5b219b53e0c86c)
+set(MD5_PPM_INT_9_8 d25e61bc7eac0002f5b393aa223747b6)
+set(MD5_PPM_INT_7_8 ddb564b7c74a09494016d6cd7502a946)
+set(MD5_PPM_INT_3_4 8ed8e68808c3fbc4ea764fc9d2968646)
+set(MD5_PPM_INT_5_8 a3363274999da2366a024efae6d16c9b)
+set(MD5_PPM_INT_1_2 e692a315cea26b988c8e8b29a5dbcd81)
+set(MD5_PPM_INT_3_8 79eca9175652ced755155c90e785a996)
+set(MD5_PPM_INT_1_4 79cd778f8bf1a117690052cacdd54eca)
+set(MD5_PPM_INT_1_8 391b3d4aca640c8567d6f8745eb2142f)
+set(MD5_PPM_FAST_1_2 f30bcf6d32ccd44cbdd9aeaacbd9454f)
+set(MD5_BMP_256 4980185e3776e89bd931736e1cddeee6)
+set(MD5_JPEG_ARI e986fb0a637a8d833d96e8a6d6d84ea1)
+set(MD5_PPM_ARI 72b59a99bcf1de24c5b27d151bde2437)
+set(MD5_JPEG_PROG 1c4afddc05c0a43489ee54438a482d92)
+set(MD5_JPEG_PROG_ARI 0a8f1c8f66e113c3cf635df0a475a617)
+set(MD5_JPEG_CROP b4197f377e621c4e9b1d20471432610d)
+
 if(WITH_JAVA)
 add_test(TJUnitTest ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest)
 add_test(TJUnitTest-yuv ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -yuv)
@@ -259,125 +326,115 @@
 add_test(tjunittest-alloc tjunittest -alloc)
 add_test(tjunittest-yuv tjunittest -yuv)
 add_test(cjpeg-int sharedlib/cjpeg -dct int -outfile testoutint.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-int-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg testoutint.jpg)
+add_test(cjpeg-int-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testoutint.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-fast sharedlib/cjpeg -dct fast -opt -outfile testoutfst.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-fast-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst.jpg testoutfst.jpg)
+add_test(cjpeg-fast-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FAST} -DFILE=testoutfst.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-fast-100 sharedlib/cjpeg -dct fast -quality 100 -opt -outfile testoutfst100.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-fast-100-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst100.jpg testoutfst100.jpg)
+add_test(cjpeg-fast-100-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FAST_100} -DFILE=testoutfst100.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-float sharedlib/cjpeg -dct float -outfile testoutflt.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
 if(WITH_SIMD)
-add_test(cjpeg-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgflt.jpg testoutflt.jpg)
+add_test(cjpeg-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FLOAT} -DFILE=testoutflt.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 else()
-add_test(cjpeg-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgflt-nosimd.jpg testoutflt.jpg)
+add_test(cjpeg-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FLOAT_NOSIMD} -DFILE=testoutflt.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 endif()
 add_test(cjpeg-int-gray sharedlib/cjpeg -dct int -grayscale -outfile testoutgray.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-int-gray-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimggray.jpg testoutgray.jpg)
+add_test(cjpeg-int-gray-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT_GRAY} -DFILE=testoutgray.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-int sharedlib/djpeg -dct int -fast -ppm -outfile testoutint.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-int-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.ppm testoutint.ppm)
+add_test(djpeg-int-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_INT} -DFILE=testoutint.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-fast sharedlib/djpeg -dct fast -ppm -outfile testoutfst.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-fast-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst.ppm testoutfst.ppm)
+add_test(djpeg-fast-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FAST} -DFILE=testoutfst.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-float sharedlib/djpeg -dct float -ppm -outfile testoutflt.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
 if(WITH_SIMD)
-add_test(djpeg-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgflt.ppm testoutflt.ppm)
+add_test(djpeg-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FLOAT} -DFILE=testoutflt.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 else()
-add_test(djpeg-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm testoutflt.ppm)
+add_test(djpeg-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FLOAT_NOSIMD} -DFILE=testoutflt.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 endif()
-add_test(djpeg-int-1_2 sharedlib/djpeg -dct int -scale 1/2 -ppm -outfile testoutint1_2.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-int-1_2-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint1_2.ppm testoutint1_2.ppm)
+foreach(scale 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8)
+string(REGEX REPLACE "_" "/" scalearg ${scale})
+add_test(djpeg-int-${scale} sharedlib/djpeg -dct int -nosmooth -scale ${scalearg} -ppm -outfile testoutint${scale}.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
+add_test(djpeg-int-${scale}-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_INT_${scale}} -DFILE=testoutint${scale}.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+endforeach()
 add_test(djpeg-fast-1_2 sharedlib/djpeg -dct fast -scale 1/2 -ppm -outfile testoutfst1_2.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-fast-1_2-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst1_2.ppm testoutfst1_2.ppm)
-add_test(djpeg-int-1_4 sharedlib/djpeg -dct int -scale 1/4 -ppm -outfile testoutint1_4.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-int-1_4-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint1_4.ppm testoutint1_4.ppm)
-add_test(djpeg-fast-1_4 sharedlib/djpeg -dct fast -scale 1/4 -ppm -outfile testoutfst1_4.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-fast-1_4-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst1_4.ppm testoutfst1_4.ppm)
-add_test(djpeg-int-1_8 sharedlib/djpeg -dct int -scale 1/8 -ppm -outfile testoutint1_8.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-int-1_8-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint1_8.ppm testoutint1_8.ppm)
-add_test(djpeg-fast-1_8 sharedlib/djpeg -dct fast -scale 1/8 -ppm -outfile testoutfst1_8.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-fast-1_8-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst1_8.ppm testoutfst1_8.ppm)
+add_test(djpeg-fast-1_2-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FAST_1_2} -DFILE=testoutfst1_2.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-256 sharedlib/djpeg -dct int -bmp -colors 256 -outfile testout.bmp  ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-256-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimg.bmp testout.bmp)
+add_test(djpeg-256-cmp ${CMAKE_COMMAND} -DMD5=${MD5_BMP_256} -DFILE=testout.bmp -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-prog sharedlib/cjpeg -dct int -progressive -outfile testoutp.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-prog-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgp.jpg testoutp.jpg)
+add_test(cjpeg-prog-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_PROG} -DFILE=testoutp.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(jpegtran-prog sharedlib/jpegtran -outfile testoutt.jpg testoutp.jpg)
-add_test(jpegtran-prog-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg testoutt.jpg)
+add_test(jpegtran-prog-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testoutt.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 if(WITH_ARITH_ENC)
 add_test(cjpeg-ari sharedlib/cjpeg -dct int -arithmetic -outfile testoutari.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-ari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg testoutari.jpg)
+add_test(cjpeg-ari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_ARI} -DFILE=testoutari.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake )
 add_test(jpegtran-toari sharedlib/jpegtran -arithmetic -outfile testouta.jpg ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg)
-add_test(jpegtran-toari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg testouta.jpg)
+add_test(jpegtran-toari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_ARI} -DFILE=testouta.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-prog-ari sharedlib/cjpeg -dct int -progressive -arithmetic -sample 1x1 -outfile testoutpa.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-prog-ari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgpa.jpg testoutpa.jpg)
+add_test(cjpeg-prog-ari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_PROG_ARI} -DFILE=testoutpa.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake )
 endif()
 if(WITH_ARITH_DEC)
 add_test(djpeg-ari sharedlib/djpeg -dct int -fast -ppm -outfile testoutari.ppm ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
-add_test(djpeg-ari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgari.ppm testoutari.ppm)
+add_test(djpeg-ari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_ARI} -DFILE=testoutari.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(jpegtran-fromari	sharedlib/jpegtran -outfile testouta.jpg ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
-add_test(jpegtran-fromari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg testouta.jpg)
+add_test(jpegtran-fromari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testouta.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 endif()
 add_test(jpegtran-crop sharedlib/jpegtran -crop 120x90+20+50 -transpose -perfect -outfile testoutcrop.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(jpegtran-crop-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgcrop.jpg testoutcrop.jpg)
+add_test(jpegtran-crop-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_CROP} -DFILE=testoutcrop.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 
 add_test(tjunittest-static tjunittest-static)
 add_test(tjunittest-static-alloc tjunittest-static -alloc)
 add_test(tjunittest-static-yuv tjunittest-static -yuv)
 add_test(cjpeg-static-int cjpeg-static -dct int -outfile testoutint.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-static-int-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg testoutint.jpg)
+add_test(cjpeg-static-int-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testoutint.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-static-fast cjpeg-static -dct fast -opt -outfile testoutfst.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-static-fast-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst.jpg testoutfst.jpg)
+add_test(cjpeg-static-fast-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FAST} -DFILE=testoutfst.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-static-fast-100 cjpeg-static -dct fast -quality 100 -opt -outfile testoutfst100.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-static-fast-100-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst100.jpg testoutfst100.jpg)
+add_test(cjpeg-static-fast-100-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FAST_100} -DFILE=testoutfst100.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-static-float cjpeg-static -dct float -outfile testoutflt.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
 if(WITH_SIMD)
-add_test(cjpeg-static-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgflt.jpg testoutflt.jpg)
+add_test(cjpeg-static-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FLOAT} -DFILE=testoutflt.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 else()
-add_test(cjpeg-static-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgflt-nosimd.jpg testoutflt.jpg)
+add_test(cjpeg-static-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_FLOAT_NOSIMD} -DFILE=testoutflt.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 endif()
 add_test(cjpeg-static-int-gray cjpeg-static -dct int -grayscale -outfile testoutgray.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-static-int-gray-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimggray.jpg testoutgray.jpg)
+add_test(cjpeg-static-int-gray-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT_GRAY} -DFILE=testoutgray.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-static-int djpeg-static -dct int -fast -ppm -outfile testoutint.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-int-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.ppm testoutint.ppm)
+add_test(djpeg-static-int-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_INT} -DFILE=testoutint.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-static-fast djpeg-static -dct fast -ppm -outfile testoutfst.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-fast-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst.ppm testoutfst.ppm)
+add_test(djpeg-static-fast-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FAST} -DFILE=testoutfst.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-static-float djpeg-static -dct float -ppm -outfile testoutflt.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
 if(WITH_SIMD)
-add_test(djpeg-static-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgflt.ppm testoutflt.ppm)
+add_test(djpeg-static-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FLOAT} -DFILE=testoutflt.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 else()
-add_test(djpeg-static-float-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm testoutflt.ppm)
+add_test(djpeg-static-float-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FLOAT_NOSIMD} -DFILE=testoutflt.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 endif()
-add_test(djpeg-static-int-1_2 djpeg-static -dct int -scale 1/2 -ppm -outfile testoutint1_2.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-int-1_2-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint1_2.ppm testoutint1_2.ppm)
+foreach(scale 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8)
+string(REGEX REPLACE "_" "/" scalearg ${scale})
+add_test(djpeg-static-int-${scale} djpeg-static -dct int -nosmooth -scale ${scalearg} -ppm -outfile testoutint${scale}.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
+add_test(djpeg-static-int-${scale}-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_INT_${scale}} -DFILE=testoutint${scale}.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+endforeach()
 add_test(djpeg-static-fast-1_2 djpeg-static -dct fast -scale 1/2 -ppm -outfile testoutfst1_2.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-fast-1_2-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst1_2.ppm testoutfst1_2.ppm)
-add_test(djpeg-static-int-1_4 djpeg-static -dct int -scale 1/4 -ppm -outfile testoutint1_4.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-int-1_4-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint1_4.ppm testoutint1_4.ppm)
-add_test(djpeg-static-fast-1_4 djpeg-static -dct fast -scale 1/4 -ppm -outfile testoutfst1_4.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-fast-1_4-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst1_4.ppm testoutfst1_4.ppm)
-add_test(djpeg-static-int-1_8 djpeg-static -dct int -scale 1/8 -ppm -outfile testoutint1_8.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-int-1_8-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint1_8.ppm testoutint1_8.ppm)
-add_test(djpeg-static-fast-1_8 djpeg-static -dct fast -scale 1/8 -ppm -outfile testoutfst1_8.ppm ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-fast-1_8-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgfst1_8.ppm testoutfst1_8.ppm)
+add_test(djpeg-static-fast-1_2-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_FAST_1_2} -DFILE=testoutfst1_2.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(djpeg-static-256 djpeg-static -dct int -bmp -colors 256 -outfile testout.bmp  ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(djpeg-static-256-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimg.bmp testout.bmp)
+add_test(djpeg-static-256-cmp ${CMAKE_COMMAND} -DMD5=${MD5_BMP_256} -DFILE=testout.bmp -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-static-prog cjpeg-static -dct int -progressive -outfile testoutp.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-static-prog-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgp.jpg testoutp.jpg)
+add_test(cjpeg-static-prog-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_PROG} -DFILE=testoutp.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(jpegtran-static-prog jpegtran-static -outfile testoutt.jpg testoutp.jpg)
-add_test(jpegtran-static-prog-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg testoutt.jpg)
+add_test(jpegtran-static-prog-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testoutt.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 if(WITH_ARITH_ENC)
 add_test(cjpeg-static-ari cjpeg-static -dct int -arithmetic -outfile testoutari.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-static-ari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg testoutari.jpg)
+add_test(cjpeg-static-ari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_ARI} -DFILE=testoutari.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake )
 add_test(jpegtran-static-toari jpegtran-static -arithmetic -outfile testouta.jpg ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg)
-add_test(jpegtran-static-toari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg testouta.jpg)
+add_test(jpegtran-static-toari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_ARI} -DFILE=testouta.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(cjpeg-static-prog-ari cjpeg-static -dct int -progressive -arithmetic -sample 1x1 -outfile testoutpa.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
-add_test(cjpeg-static-prog-ari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgpa.jpg testoutpa.jpg)
+add_test(cjpeg-static-prog-ari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_PROG_ARI} -DFILE=testoutpa.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake )
 endif()
 if(WITH_ARITH_DEC)
 add_test(djpeg-static-ari djpeg-static -dct int -fast -ppm -outfile testoutari.ppm ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
-add_test(djpeg-static-ari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgari.ppm testoutari.ppm)
+add_test(djpeg-static-ari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_PPM_ARI} -DFILE=testoutari.ppm -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 add_test(jpegtran-static-fromari	jpegtran-static -outfile testouta.jpg ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
-add_test(jpegtran-static-fromari-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg testouta.jpg)
+add_test(jpegtran-static-fromari-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testouta.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 endif()
 add_test(jpegtran-static-crop jpegtran-static -crop 120x90+20+50 -transpose -perfect -outfile testoutcrop.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.jpg)
-add_test(jpegtran-static-crop-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimages/testimgcrop.jpg testoutcrop.jpg)
+add_test(jpegtran-static-crop-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_CROP} -DFILE=testoutcrop.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
 
 add_custom_target(testclean COMMAND ${CMAKE_COMMAND} -P
   ${CMAKE_SOURCE_DIR}/cmakescripts/testclean.cmake)
@@ -390,18 +447,18 @@
 if(MSVC)
   set(INST_PLATFORM "Visual C++")
   set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-vc)
-  set(INST_DIR ${CMAKE_PROJECT_NAME})
+  set(INST_REG_NAME ${CMAKE_PROJECT_NAME})
 elseif(MINGW)
   set(INST_PLATFORM GCC)
   set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-gcc)
-  set(INST_DIR ${CMAKE_PROJECT_NAME}-gcc)
+  set(INST_REG_NAME ${CMAKE_PROJECT_NAME}-gcc)
   set(INST_DEFS -DGCC)
 endif()
 
 if(64BIT)
   set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
   set(INST_NAME ${INST_NAME}64)
-  set(INST_DIR ${INST_DIR}64)
+  set(INST_REG_NAME ${INST_DIR}64)
   set(INST_DEFS ${INST_DEFS} -DWIN64)
 endif()
 
@@ -415,12 +472,17 @@
   set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=")
 endif()
 
+STRING(REGEX REPLACE "/" "\\\\" INST_DIR ${CMAKE_INSTALL_PREFIX})
+
 configure_file(release/libjpeg-turbo.nsi.in libjpeg-turbo.nsi @ONLY)
 
+if(WITH_JAVA)
+  set(JAVA_DEPEND java)
+endif()
 add_custom_target(installer
   makensis -nocd ${INST_DEFS} libjpeg-turbo.nsi
   DEPENDS jpeg jpeg-static turbojpeg turbojpeg-static rdjpgcom wrjpgcom
-    cjpeg djpeg jpegtran tjbench
+    cjpeg djpeg jpegtran tjbench ${JAVA_DEPEND}
   SOURCES libjpeg-turbo.nsi)
 
 install(TARGETS jpeg-static turbojpeg turbojpeg-static rdjpgcom wrjpgcom tjbench
@@ -430,7 +492,7 @@
 )
 
 install(FILES ${CMAKE_SOURCE_DIR}/README ${CMAKE_SOURCE_DIR}/README-turbo.txt
-  ${CMAKE_SOURCE_DIR}/example.c ${CMAKE_SOURCE_DIR}/libjpeg.txt 
+  ${CMAKE_SOURCE_DIR}/example.c ${CMAKE_SOURCE_DIR}/libjpeg.txt
   ${CMAKE_SOURCE_DIR}/structure.txt ${CMAKE_SOURCE_DIR}/usage.txt
   ${CMAKE_SOURCE_DIR}/wizard.txt
   DESTINATION doc)
diff --git a/ChangeLog.txt b/ChangeLog.txt
index 28aca8a..984b91d 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,44 +1,198 @@
-1.2.2
+1.3.2
 =====
 
-[1] The tjDecompressToYUV() function now supports the TJFLAG_FASTDCT flag.
-
-[2] The 32-bit supplementary package for amd64 Debian systems now provides
-symlinks in /usr/lib/i386-linux-gnu for the TurboJPEG libraries in /usr/lib32.
-This allows those libraries to be used on MultiArch-compatible systems (such as
-Ubuntu 11 and later) without setting the linker path.
-
-[3] Fixed a regression caused by 1.2.1[7] whereby the build would fail with
-multiple "Mismatch in operand sizes" errors when attempting to build the x86
-SIMD code with NASM 0.98.
-
-[4] Fixed a Huffman encoder bug that prevented I/O suspension from working
-properly.
-
-[5] Fixed a bug whereby attempting to encode a progressive JPEG with arithmetic
-entropy coding (by passing arguments of -progressive -arithmetic to cjpeg or
-jpegtran, for instance) would result in an error, "Requested feature was
-omitted at compile time".
-
-[6] Fixed a couple of issues whereby malformed JPEG images would cause
-libjpeg-turbo to use uninitialized memory during decompression.
-
-[7] Fixed an error ("Buffer passed to JPEG library is too small") that occurred
-when calling the TurboJPEG YUV encoding function with a very small (< 5x5)
-source image, and added a unit test to check for this error.
-
-[8] Worked around an issue with Visual C++ 2010 and later that caused incorrect
+[1] Worked around an issue with Visual C++ 2010 and later that caused incorrect
 pixels to be generated when decompressing a JPEG image to a 256-color bitmap,
 if compiler optimization was enabled when libjpeg-turbo was built.  This caused
 the regression tests to fail when doing a release build under Visual C++ 2010
 and later.
 
-[9] Fixed a segfault that occurred when calling output_message() with msg_code
+[2] Fixed a segfault that occurred when calling output_message() with msg_code
 set to JMSG_COPYRIGHT.
 
-[10] Fixed a bug in the build system that was causing the Windows version of
+[3] Fixed an issue whereby wrjpgcom was allowing comments longer than 65k
+characters to be passed on the command line, which was causing it to generate
+incorrect JPEG files.
+
+[4] Fixed a bug in the build system that was causing the Windows version of
 wrjpgcom to be built using the rdjpgcom code.
 
+[5] Fixed an extremely rare bug that could cause the Huffman encoder's local
+buffer to overrun when a very high-frequency MCU is compressed using quality
+100 and no subsampling, and when the JPEG output buffer is being dynamically
+resized by the destination manager.  This issue was so rare that, even with a
+test program specifically designed to make the bug occur (by injecting random
+high-frequency YUV data into the compressor), it was reproducible only once in
+about every 25 million iterations.
+
+[6] Fixed a build issue on OS X PowerPC platforms (md5cmp failed to build
+because OS X does not provide the le32toh() and htole32() functions.)
+
+[7] The close() method in the TJCompressor and TJDecompressor Java classes is
+now idempotent.  Previously, that method would call the native tjDestroy()
+function even if the TurboJPEG instance had already been destroyed.  This
+caused an exception to be thrown during finalization, if the close() method had
+already been called.  The exception was caught, but it was still an expensive
+operation.
+
+[8] The TurboJPEG API previously generated an error ("Could not determine
+subsampling type for JPEG image") when attempting to decompress grayscale JPEG
+images that were compressed with a sampling factor other than 1 (for instance,
+with 'cjpeg -grayscale -sample 2x2').  Subsampling technically has no meaning
+with grayscale JPEGs, and thus the horizontal and vertical sampling factors
+for such images are ignored by the decompressor.  However, the TurboJPEG API
+was being too rigid and was expecting the sampling factors to be equal to 1
+before it treated the image as a grayscale JPEG.
+
+[9] Referring to [5] above, another extremely rare circumstance was discovered
+under which the Huffman encoder's local buffer can be overrun when a buffered
+destination manager is being used and an extremely-high-frequency block
+(basically junk image data) is being encoded.  Even though the Huffman local
+buffer was increased from 128 bytes to 136 bytes to address the previous
+issue, the new issue caused even the larger buffer to be overrun.  Further
+analysis reveals that, in the absolute worst case (such as setting alternating
+AC coefficients to 32767 and -32768 in the JPEG scanning order), the Huffman
+encoder can produce encoded blocks that approach double the size of the
+unencoded blocks.  Thus, the Huffman local buffer was increased to 256 bytes,
+which should prevent any such issue from re-occurring in the future.
+
+
+1.3.1
+=====
+
+[1] On Un*x systems, 'make install' now installs the libjpeg-turbo libraries
+into /opt/libjpeg-turbo/lib32 by default on any 32-bit system, not just x86,
+and into /opt/libjpeg-turbo/lib64 by default on any 64-bit system, not just
+x86-64.  You can override this by overriding either the 'prefix' or 'libdir'
+configure variables.
+
+[2] The Windows installer now places a copy of the TurboJPEG DLLs in the same
+directory as the rest of the libjpeg-turbo binaries.  This was mainly done
+to support TurboVNC 1.3, which bundles the DLLs in its Windows installation.
+When using a 32-bit version of CMake on 64-bit Windows, it is impossible to
+access the c:\WINDOWS\system32 directory, which made it impossible for the
+TurboVNC build scripts to bundle the 64-bit TurboJPEG DLL.
+
+[3] Fixed a bug whereby attempting to encode a progressive JPEG with arithmetic
+entropy coding (by passing arguments of -progressive -arithmetic to cjpeg or
+jpegtran, for instance) would result in an error, "Requested feature was
+omitted at compile time".
+
+[4] Fixed a couple of issues whereby malformed JPEG images would cause
+libjpeg-turbo to use uninitialized memory during decompression.
+
+[5] Fixed an error ("Buffer passed to JPEG library is too small") that occurred
+when calling the TurboJPEG YUV encoding function with a very small (< 5x5)
+source image, and added a unit test to check for this error.
+
+[6] The Java classes should now build properly under Visual Studio 2010 and
+later.
+
+[7] Fixed an issue that prevented SRPMs generated using the in-tree packaging
+tools from being rebuilt on certain newer Linux distributions.
+
+[8] Numerous minor fixes to eliminate compilation and build/packaging system
+warnings, fix cosmetic issues, improve documentation clarity, and other general
+source cleanup.
+
+
+1.3.0
+=====
+
+[1] 'make test' now works properly on FreeBSD, and it no longer requires the
+md5sum executable to be present on other Un*x platforms.
+
+[2] Overhauled the packaging system:
+-- To avoid conflict with vendor-supplied libjpeg-turbo packages, the
+official RPMs and DEBs for libjpeg-turbo have been renamed to
+"libjpeg-turbo-official".
+-- The TurboJPEG libraries are now located under /opt/libjpeg-turbo in the
+official Linux and Mac packages, to avoid conflict with vendor-supplied
+packages and also to streamline the packaging system.
+-- Release packages are now created with the directory structure defined
+by the configure variables "prefix", "bindir", "libdir", etc. (Un*x) or by the
+CMAKE_INSTALL_PREFIX variable (Windows.)  The exception is that the docs are
+always located under the system default documentation directory on Un*x and Mac
+systems, and on Windows, the TurboJPEG DLL is always located in the Windows
+system directory.
+-- To avoid confusion, official libjpeg-turbo packages on Linux/Unix platforms
+(except for Mac) will always install the 32-bit libraries in
+/opt/libjpeg-turbo/lib32 and the 64-bit libraries in /opt/libjpeg-turbo/lib64.
+-- Fixed an issue whereby, in some cases, the libjpeg-turbo executables on Un*x
+systems were not properly linking with the shared libraries installed by the
+same package.
+-- Fixed an issue whereby building the "installer" target on Windows when
+WITH_JAVA=1 would fail if the TurboJPEG JAR had not been previously built.
+-- Building the "install" target on Windows now installs files into the same
+places that the installer does.
+
+[3] Fixed a Huffman encoder bug that prevented I/O suspension from working
+properly.
+
+
+1.2.90 (1.3 beta1)
+==================
+
+[1] Added support for additional scaling factors (3/8, 5/8, 3/4, 7/8, 9/8, 5/4,
+11/8, 3/2, 13/8, 7/4, 15/8, and 2) when decompressing.  Note that the IDCT will
+not be SIMD-accelerated when using any of these new scaling factors.
+
+[2] The TurboJPEG dynamic library is now versioned.  It was not strictly
+necessary to do so, because TurboJPEG uses versioned symbols, and if a function
+changes in an ABI-incompatible way, that function is renamed and a legacy
+function is provided to maintain backward compatibility.  However, certain
+Linux distro maintainers have a policy against accepting any library that isn't
+versioned.
+
+[3] Extended the TurboJPEG Java API so that it can be used to compress a JPEG
+image from and decompress a JPEG image to an arbitrary position in a large
+image buffer.
+
+[4] The tjDecompressToYUV() function now supports the TJFLAG_FASTDCT flag.
+
+[5] The 32-bit supplementary package for amd64 Debian systems now provides
+symlinks in /usr/lib/i386-linux-gnu for the TurboJPEG libraries in /usr/lib32.
+This allows those libraries to be used on MultiArch-compatible systems (such as
+Ubuntu 11 and later) without setting the linker path.
+
+[6] The TurboJPEG Java wrapper should now find the JNI library on Mac systems
+without having to pass -Djava.library.path=/usr/lib to java.
+
+[7] TJBench has been ported to Java to provide a convenient way of validating
+the performance of the TurboJPEG Java API.  It can be run with
+'java -cp turbojpeg.jar TJBench'.
+
+[8] cjpeg can now be used to generate JPEG files with the RGB colorspace
+(feature ported from jpeg-8d.)
+
+[9] The width and height in the -crop argument passed to jpegtran can now be
+suffixed with "f" to indicate that, when the upper left corner of the cropping
+region is automatically moved to the nearest iMCU boundary, the bottom right
+corner should be moved by the same amount.  In other words, this feature causes
+jpegtran to strictly honor the specified width/height rather than the specified
+bottom right corner (feature ported from jpeg-8d.)
+
+[10] JPEG files using the RGB colorspace can now be decompressed into grayscale
+images (feature ported from jpeg-8d.)
+
+[11] Fixed a regression caused by 1.2.1[7] whereby the build would fail with
+multiple "Mismatch in operand sizes" errors when attempting to build the x86
+SIMD code with NASM 0.98.
+
+[12] The in-memory source/destination managers (jpeg_mem_src() and
+jpeg_mem_dest()) are now included by default when building libjpeg-turbo with
+libjpeg v6b or v7 emulation, so that programs can take advantage of these
+functions without requiring the use of the backward-incompatible libjpeg v8
+ABI.  The "age number" of the libjpeg-turbo library on Un*x systems has been
+incremented by 1 to reflect this.  You can disable this feature with a
+configure/CMake switch in order to retain strict API/ABI compatibility with the
+libjpeg v6b or v7 API/ABI (or with previous versions of libjpeg-turbo.)  See
+README-turbo.txt for more details.
+
+[13] Added ARM v7s architecture to libjpeg.a and libturbojpeg.a in the official
+libjpeg-turbo binary package for OS X, so that those libraries can be used to
+build applications that leverage the faster CPUs in the iPhone 5 and iPad 4.
+
 
 1.2.1
 =====
@@ -119,8 +273,8 @@
 correctly.
 
 [7] Added ARM v6 and ARM v7 architectures to libjpeg.a and libturbojpeg.a in
-the official OS X distribution package, so that those libraries can be used to
-build both OS X and iOS applications.
+the official libjpeg-turbo binary package for OS X, so that those libraries can
+be used to build both OS X and iOS applications.
 
 
 1.1.90 (1.2 beta1)
@@ -166,7 +320,7 @@
 TurboJPEG 1.2 API uses pixel formats to define the size and component order of
 the uncompressed source/destination images, and it includes a more efficient
 version of TJBUFSIZE() that computes a worst-case JPEG size based on the level
-of chrominance subsampling.  The refactored implementation of TurboJPEG/OSS
+of chrominance subsampling.  The refactored implementation of the TurboJPEG API
 now uses the libjpeg memory source and destination managers, which allows the
 TurboJPEG compressor to grow the JPEG buffer as necessary.
 
@@ -242,12 +396,12 @@
 cases.
 
 [2] Despite the above, the fast integer forward DCT still degrades somewhat for
-JPEG qualities greater than 95, so TurboJPEG/OSS will now automatically use the
-slow integer forward DCT when generating JPEG images of quality 96 or greater.
-This reduces compression performance by as much as 15% for these high-quality
-images but is necessary to ensure that the images are perceptually lossless.
-It also ensures that the library can avoid the performance pitfall created by
-[1].
+JPEG qualities greater than 95, so the TurboJPEG wrapper will now automatically
+use the slow integer forward DCT when generating JPEG images of quality 96 or
+greater.  This reduces compression performance by as much as 15% for these
+high-quality images but is necessary to ensure that the images are perceptually
+lossless.  It also ensures that the library can avoid the performance pitfall
+created by [1].
 
 [3] Ported jpgtest.cxx to pure C to avoid the need for a C++ compiler.
 
@@ -320,7 +474,7 @@
 [1] 2983700: Further FreeBSD build tweaks (no longer necessary to specify
 --host when configuring on a 64-bit system)
 
-[2] Created sym. links in the Unix/Linux packages so that the TurboJPEG
+[2] Created symlinks in the Unix/Linux packages so that the TurboJPEG
 include file can always be found in /opt/libjpeg-turbo/include, the 32-bit
 static libraries can always be found in /opt/libjpeg-turbo/lib32, and the
 64-bit static libraries can always be found in /opt/libjpeg-turbo/lib64.
diff --git a/Makefile.am b/Makefile.am
index 35050e2..425e4e2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,9 +1,16 @@
-lib_LTLIBRARIES = libjpeg.la libturbojpeg.la
-libjpeg_la_LDFLAGS = -version-info ${SO_MAJOR_VERSION}:${SO_MINOR_VERSION} -no-undefined
-libturbojpeg_la_LDFLAGS = -avoid-version -no-undefined
-include_HEADERS = jerror.h jmorecfg.h jpeglib.h turbojpeg.h
+lib_LTLIBRARIES = libjpeg.la
+libjpeg_la_LDFLAGS = -version-info ${LIBTOOL_CURRENT}:${SO_MINOR_VERSION}:${SO_AGE} -no-undefined
+include_HEADERS = jerror.h jmorecfg.h jpeglib.h
+
+if WITH_TURBOJPEG
+lib_LTLIBRARIES += libturbojpeg.la
+libturbojpeg_la_LDFLAGS = -version-info 0:0 -no-undefined
+include_HEADERS += turbojpeg.h
+endif
+
 nodist_include_HEADERS = jconfig.h
 
+
 HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
 	jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h
 
@@ -17,28 +24,26 @@
 	jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c
 
 if WITH_ARITH
-
 libjpeg_la_SOURCES += jaricom.c
-
 endif
 
 if WITH_ARITH_ENC
-
 libjpeg_la_SOURCES += jcarith.c
-
 endif
 
 if WITH_ARITH_DEC
-
 libjpeg_la_SOURCES += jdarith.c
-
 endif
 
+
+SUBDIRS = java
+
+
+if WITH_TURBOJPEG
+
 libturbojpeg_la_SOURCES = $(libjpeg_la_SOURCES) turbojpeg.c turbojpeg.h \
 	transupp.c transupp.h jdatadst-tj.c jdatasrc-tj.c
 
-SUBDIRS = java
-
 if WITH_JAVA
 
 libturbojpeg_la_SOURCES += turbojpeg-jni.c
@@ -54,12 +59,17 @@
 libturbojpeg_la_SOURCES += $(TJMAPFILE)
 
 if VERSION_SCRIPT
-
 libturbojpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)$(srcdir)/$(TJMAPFILE)
-libjpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)libjpeg.map
+endif
 
 endif
 
+
+if VERSION_SCRIPT
+libjpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)libjpeg.map
+endif
+
+
 if WITH_SIMD
 
 SUBDIRS += simd
@@ -72,8 +82,16 @@
 
 endif
 
-bin_PROGRAMS = cjpeg djpeg jpegtran rdjpgcom wrjpgcom tjbench
-noinst_PROGRAMS = tjunittest jcstest
+
+bin_PROGRAMS = cjpeg djpeg jpegtran rdjpgcom wrjpgcom
+noinst_PROGRAMS = jcstest
+
+
+if WITH_TURBOJPEG
+
+bin_PROGRAMS += tjbench
+
+noinst_PROGRAMS += tjunittest
 
 tjbench_SOURCES = tjbench.c bmp.h bmp.c tjutil.h tjutil.c rdbmp.c rdppm.c \
 	wrbmp.c wrppm.c
@@ -86,8 +104,11 @@
 
 tjunittest_LDADD = libturbojpeg.la
 
+endif
+
+
 cjpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c cjpeg.c rdbmp.c rdgif.c \
-	rdppm.c rdswitch.c rdtarga.c 
+	rdppm.c rdswitch.c rdtarga.c
 
 cjpeg_LDADD = libjpeg.la
 
@@ -120,12 +141,12 @@
 
 dist_man1_MANS = cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1
 
-DOCS= install.txt coderules.txt filelist.txt jconfig.txt change.log \
-	rdrle.c wrrle.c BUILDING.txt ChangeLog.txt
+DOCS= coderules.txt jconfig.txt change.log rdrle.c wrrle.c BUILDING.txt \
+	ChangeLog.txt
 
 docdir = $(datadir)/doc
 dist_doc_DATA = README README-turbo.txt libjpeg.txt structure.txt usage.txt \
-	wizard.txt 
+	wizard.txt
 
 exampledir = $(datadir)/doc
 dist_example_DATA = example.c
@@ -139,7 +160,43 @@
 	rm -rf `find $(distdir) -name .svn`
 
 
+SUBDIRS += md5
+
+MD5_JPEG_INT = 9a68f56bc76e466aa7e52f415d0f4a5f
+MD5_JPEG_FAST = 0e1502e7fa421835e376a314fac2a39f
+MD5_JPEG_FAST_100 = 7bf72a8e741d64eecb960c97323af77c
+MD5_JPEG_FLOAT = d1623885ffafcd40c684af09e3d65cd5
+MD5_JPEG_FLOAT_NOSIMD = fb4884c35f8273f498cb32879de5c455
+MD5_JPEG_INT_GRAY = 72b51f894b8f4a10b3ee3066770aa38d
+MD5_PPM_INT = d1ed0d11f076b842525271647716aeb8
+MD5_PPM_FAST = 048298a2d2410261c0533cb97bcfef23
+MD5_PPM_FLOAT = 7f5b446ee36b2630e06785b8d42af15f
+MD5_PPM_FLOAT_NOSIMD = 64072f1dbdc5b3a187777788604971a5
+MD5_PPM_INT_2_1 = 9f9de8c0612f8d06869b960b05abf9c9
+MD5_PPM_INT_15_8 = b6875bc070720b899566cc06459b63b7
+MD5_PPM_INT_7_4 = 06a177eae05f164fac57f7a2c346ee87
+MD5_PPM_INT_13_8 = bc3452573c8152f6ae552939ee19f82f
+MD5_PPM_INT_3_2 = f5a8b88a8a7f96016f04d259cf82ed67
+MD5_PPM_INT_11_8 = d8cc73c0aaacd4556569b59437ba00a5
+MD5_PPM_INT_5_4 = 32775dd9ad2ab90f4c5b219b53e0c86c
+MD5_PPM_INT_9_8 = d25e61bc7eac0002f5b393aa223747b6
+MD5_PPM_INT_7_8 = ddb564b7c74a09494016d6cd7502a946
+MD5_PPM_INT_3_4 = 8ed8e68808c3fbc4ea764fc9d2968646
+MD5_PPM_INT_5_8 = a3363274999da2366a024efae6d16c9b
+MD5_PPM_INT_1_2 = e692a315cea26b988c8e8b29a5dbcd81
+MD5_PPM_INT_3_8 = 79eca9175652ced755155c90e785a996
+MD5_PPM_INT_1_4 = 79cd778f8bf1a117690052cacdd54eca
+MD5_PPM_INT_1_8 = 391b3d4aca640c8567d6f8745eb2142f
+MD5_PPM_FAST_1_2 = f30bcf6d32ccd44cbdd9aeaacbd9454f
+MD5_BMP_256 = 4980185e3776e89bd931736e1cddeee6
+MD5_JPEG_ARI = e986fb0a637a8d833d96e8a6d6d84ea1
+MD5_PPM_ARI = 72b59a99bcf1de24c5b27d151bde2437
+MD5_JPEG_PROG = 1c4afddc05c0a43489ee54438a482d92
+MD5_JPEG_PROG_ARI = 0a8f1c8f66e113c3cf635df0a475a617
+MD5_JPEG_CROP = b4197f377e621c4e9b1d20471432610d
+
 test: testclean all
+if WITH_TURBOJPEG
 if WITH_JAVA
 	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest
 	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -bi
@@ -149,64 +206,85 @@
 	./tjunittest
 	./tjunittest -alloc
 	./tjunittest -yuv
+endif
 	./cjpeg -dct int -outfile testoutint.jpg $(srcdir)/testimages/testorig.ppm
-	cmp $(srcdir)/testimages/testimgint.jpg testoutint.jpg
+	md5/md5cmp $(MD5_JPEG_INT) testoutint.jpg
 	./cjpeg -dct fast -opt -outfile testoutfst.jpg $(srcdir)/testimages/testorig.ppm
-	cmp $(srcdir)/testimages/testimgfst.jpg testoutfst.jpg
+	md5/md5cmp $(MD5_JPEG_FAST) testoutfst.jpg
 	./cjpeg -dct fast -quality 100 -opt -outfile testoutfst100.jpg $(srcdir)/testimages/testorig.ppm
-	cmp $(srcdir)/testimages/testimgfst100.jpg testoutfst100.jpg
+	md5/md5cmp $(MD5_JPEG_FAST_100) testoutfst100.jpg
 	./cjpeg -dct float -outfile testoutflt.jpg $(srcdir)/testimages/testorig.ppm
 if WITH_SSE_FLOAT_DCT
-	cmp $(srcdir)/testimages/testimgflt.jpg testoutflt.jpg
+	md5/md5cmp $(MD5_JPEG_FLOAT) testoutflt.jpg
 else
-	cmp $(srcdir)/testimages/testimgflt-nosimd.jpg testoutflt.jpg
+	md5/md5cmp $(MD5_JPEG_FLOAT_NOSIMD) testoutflt.jpg
 endif
 	./cjpeg -dct int -grayscale -outfile testoutgray.jpg $(srcdir)/testimages/testorig.ppm
-	cmp $(srcdir)/testimages/testimggray.jpg testoutgray.jpg
+	md5/md5cmp $(MD5_JPEG_INT_GRAY) testoutgray.jpg
 	./djpeg -dct int -fast -ppm -outfile testoutint.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgint.ppm testoutint.ppm
+	md5/md5cmp $(MD5_PPM_INT) testoutint.ppm
 	./djpeg -dct fast -ppm -outfile testoutfst.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgfst.ppm testoutfst.ppm
+	md5/md5cmp $(MD5_PPM_FAST) testoutfst.ppm
 	./djpeg -dct float -ppm -outfile testoutflt.ppm $(srcdir)/testimages/testorig.jpg
 if WITH_SSE_FLOAT_DCT
-	cmp $(srcdir)/testimages/testimgflt.ppm testoutflt.ppm
+	md5/md5cmp $(MD5_PPM_FLOAT) testoutflt.ppm
 else
-	cmp $(srcdir)/testimages/testorig.ppm testoutflt.ppm
+	md5/md5cmp $(MD5_PPM_FLOAT_NOSIMD) testoutflt.ppm
 endif
-	./djpeg -dct int -scale 1/2 -ppm -outfile testoutint1_2.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgint1_2.ppm testoutint1_2.ppm
+	./djpeg -dct int -nosmooth -scale 2/1 -ppm -outfile testoutint2_1.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_2_1) testoutint2_1.ppm;
+	./djpeg -dct int -nosmooth -scale 15/8 -ppm -outfile testoutint15_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_15_8) testoutint15_8.ppm;
+	./djpeg -dct int -nosmooth -scale 7/4 -ppm -outfile testoutint7_4.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_7_4) testoutint7_4.ppm;
+	./djpeg -dct int -nosmooth -scale 13/8 -ppm -outfile testoutint13_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_13_8) testoutint13_8.ppm;
+	./djpeg -dct int -nosmooth -scale 3/2 -ppm -outfile testoutint3_2.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_3_2) testoutint3_2.ppm;
+	./djpeg -dct int -nosmooth -scale 11/8 -ppm -outfile testoutint11_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_11_8) testoutint11_8.ppm;
+	./djpeg -dct int -nosmooth -scale 5/4 -ppm -outfile testoutint5_4.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_5_4) testoutint5_4.ppm;
+	./djpeg -dct int -nosmooth -scale 9/8 -ppm -outfile testoutint9_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_9_8) testoutint9_8.ppm;
+	./djpeg -dct int -nosmooth -scale 7/8 -ppm -outfile testoutint7_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_7_8) testoutint7_8.ppm;
+	./djpeg -dct int -nosmooth -scale 3/4 -ppm -outfile testoutint3_4.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_3_4) testoutint3_4.ppm;
+	./djpeg -dct int -nosmooth -scale 5/8 -ppm -outfile testoutint5_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_5_8) testoutint5_8.ppm;
+	./djpeg -dct int -nosmooth -scale 1/2 -ppm -outfile testoutint1_2.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_1_2) testoutint1_2.ppm;
+	./djpeg -dct int -nosmooth -scale 3/8 -ppm -outfile testoutint3_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_3_8) testoutint3_8.ppm;
+	./djpeg -dct int -nosmooth -scale 1/4 -ppm -outfile testoutint1_4.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_1_4) testoutint1_4.ppm;
+	./djpeg -dct int -nosmooth -scale 1/8 -ppm -outfile testoutint1_8.ppm $(srcdir)/testimages/testorig.jpg;
+	md5/md5cmp $(MD5_PPM_INT_1_8) testoutint1_8.ppm;
 	./djpeg -dct fast -scale 1/2 -ppm -outfile testoutfst1_2.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgfst1_2.ppm testoutfst1_2.ppm
-	./djpeg -dct int -scale 1/4 -ppm -outfile testoutint1_4.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgint1_4.ppm testoutint1_4.ppm
-	./djpeg -dct fast -scale 1/4 -ppm -outfile testoutfst1_4.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgfst1_4.ppm testoutfst1_4.ppm
-	./djpeg -dct int -scale 1/8 -ppm -outfile testoutint1_8.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgint1_8.ppm testoutint1_8.ppm
-	./djpeg -dct fast -scale 1/8 -ppm -outfile testoutfst1_8.ppm $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgfst1_8.ppm testoutfst1_8.ppm
-	./djpeg -dct int -bmp -colors 256 -outfile testout.bmp  $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimg.bmp testout.bmp
+	md5/md5cmp $(MD5_PPM_FAST_1_2) testoutfst1_2.ppm
+	./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)/testimages/testorig.jpg
+	md5/md5cmp $(MD5_BMP_256) testout.bmp
 if WITH_ARITH_ENC
 	./cjpeg -dct int -arithmetic -outfile testoutari.jpg $(srcdir)/testimages/testorig.ppm
-	cmp $(srcdir)/testimages/testimgari.jpg testoutari.jpg
+	md5/md5cmp $(MD5_JPEG_ARI) testoutari.jpg
 	./jpegtran -arithmetic -outfile testouta.jpg $(srcdir)/testimages/testimgint.jpg
-	cmp $(srcdir)/testimages/testimgari.jpg testouta.jpg
+	md5/md5cmp $(MD5_JPEG_ARI) testouta.jpg
 	./cjpeg -dct int -progressive -arithmetic -sample 1x1 -outfile testoutpa.jpg $(srcdir)/testimages/testorig.ppm
-	cmp $(srcdir)/testimages/testimgpa.jpg testoutpa.jpg
+	md5/md5cmp $(MD5_JPEG_PROG_ARI) testoutpa.jpg
 endif
 if WITH_ARITH_DEC
 	./djpeg -dct int -fast -ppm -outfile testoutari.ppm $(srcdir)/testimages/testimgari.jpg
-	cmp $(srcdir)/testimages/testimgari.ppm testoutari.ppm
+	md5/md5cmp $(MD5_PPM_ARI) testoutari.ppm
 	./jpegtran -outfile testouta.jpg $(srcdir)/testimages/testimgari.jpg
-	cmp $(srcdir)/testimages/testimgint.jpg testouta.jpg
+	md5/md5cmp $(MD5_JPEG_INT) testouta.jpg
 endif
 	./cjpeg -dct int -progressive -outfile testoutp.jpg $(srcdir)/testimages/testorig.ppm
-	cmp $(srcdir)/testimages/testimgp.jpg testoutp.jpg
+	md5/md5cmp $(MD5_JPEG_PROG) testoutp.jpg
 	./jpegtran -outfile testoutt.jpg testoutp.jpg
-	cmp $(srcdir)/testimages/testimgint.jpg testoutt.jpg
+	md5/md5cmp $(MD5_JPEG_INT) testoutt.jpg
 	./jpegtran -crop 120x90+20+50 -transpose -perfect -outfile testoutcrop.jpg $(srcdir)/testimages/testorig.jpg
-	cmp $(srcdir)/testimages/testimgcrop.jpg testoutcrop.jpg
+	md5/md5cmp $(MD5_JPEG_CROP) testoutcrop.jpg
 
 
 testclean:
@@ -240,94 +318,89 @@
 
 tjtest:
 	sh ./tjbenchtest
+	sh ./tjbenchtest -alloc
 if WITH_JAVA
-	sh ./tjexampletest
+	sh ./tjbenchtest.java
 endif
 
 
-if X86_64
+pkgscripts/libjpeg-turbo.spec: pkgscripts/libjpeg-turbo.spec.tmpl
+	cat pkgscripts/libjpeg-turbo.spec.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \
+		sed s@%{__docdir}@$(docdir)@g | sed s@%{__includedir}@$(includedir)@g | \
+		sed s@%{__libdir}@$(libdir)@g | sed s@%{__mandir}@$(mandir)@g \
+		> pkgscripts/libjpeg-turbo.spec
 
-install-exec-hook:
-	__PREFIX=`echo ${prefix} | sed -e 's@\/*$$@@'`;  \
-	if [ "$$__PREFIX" = "/opt/libjpeg-turbo" ]; then  \
-		cd $(DESTDIR)/${prefix};  \
-		if [ -d lib -a ! -d lib64 -a ! -h lib64 ]; then  \
-			$(LN_S) lib lib64;  \
-		fi  \
-	fi
-
-else
-
-install-exec-hook:
-	__PREFIX=`echo ${prefix} | sed -e 's@\/*$$@@'`;  \
-	if [ "$$__PREFIX" = "/opt/libjpeg-turbo" ]; then  \
-		cd $(DESTDIR)/${prefix};  \
-		if [ -d lib -a ! -d lib32 -a ! -h lib32 ]; then  \
-			$(LN_S) lib lib32;  \
-		fi  \
-	fi
-
-endif
-
-rpm: all
+rpm: all pkgscripts/libjpeg-turbo.spec
 	TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \
 	mkdir -p $$TMPDIR/RPMS; \
 	ln -fs `pwd` $$TMPDIR/BUILD; \
-	rm -f ${PACKAGE_NAME}-${VERSION}.${RPMARCH}.rpm; \
+	rm -f ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \
 	rpmbuild -bb --define "_blddir $$TMPDIR/buildroot"  \
 		--define "_topdir $$TMPDIR" \
 		--target ${RPMARCH} pkgscripts/libjpeg-turbo.spec; \
-	cp $$TMPDIR/RPMS/${RPMARCH}/${PACKAGE_NAME}-${VERSION}-${BUILD}.${RPMARCH}.rpm \
-		${PACKAGE_NAME}-${VERSION}.${RPMARCH}.rpm; \
+	cp $$TMPDIR/RPMS/${RPMARCH}/${PKGNAME}-${VERSION}-${BUILD}.${RPMARCH}.rpm \
+		${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \
 	rm -rf $$TMPDIR
 
-srpm: dist-gzip
+srpm: dist-gzip pkgscripts/libjpeg-turbo.spec
 	TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \
 	mkdir -p $$TMPDIR/RPMS; \
 	mkdir -p $$TMPDIR/SRPMS; \
 	mkdir -p $$TMPDIR/BUILD; \
 	mkdir -p $$TMPDIR/SOURCES; \
 	mkdir -p $$TMPDIR/SPECS; \
-	rm -f ${PACKAGE_NAME}-${VERSION}.src.rpm; \
+	rm -f ${PKGNAME}-${VERSION}.src.rpm; \
 	cp ${PACKAGE_NAME}-${VERSION}.tar.gz $$TMPDIR/SOURCES; \
 	cat pkgscripts/libjpeg-turbo.spec | sed s/%{_blddir}/%{_tmppath}/g \
 		| sed s/#--\>//g \
 		> $$TMPDIR/SPECS/libjpeg-turbo.spec; \
 	rpmbuild -bs --define "_topdir $$TMPDIR" $$TMPDIR/SPECS/libjpeg-turbo.spec; \
-	cp $$TMPDIR/SRPMS/${PACKAGE_NAME}-${VERSION}-${BUILD}.src.rpm \
-		${PACKAGE_NAME}-${VERSION}.src.rpm; \
+	cp $$TMPDIR/SRPMS/${PKGNAME}-${VERSION}-${BUILD}.src.rpm \
+		${PKGNAME}-${VERSION}.src.rpm; \
 	rm -rf $$TMPDIR
 
-deb: all
+pkgscripts/makedpkg: pkgscripts/makedpkg.tmpl
+	cat pkgscripts/makedpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \
+		> pkgscripts/makedpkg
+
+deb: all pkgscripts/makedpkg
 	sh pkgscripts/makedpkg
 
+pkgscripts/uninstall: pkgscripts/uninstall.tmpl
+	cat pkgscripts/uninstall.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \
+		sed s@%{__includedir}@$(includedir)@g | sed s@%{__libdir}@$(libdir)@g | \
+		sed s@%{__mandir}@$(mandir)@g > pkgscripts/uninstall
+
+pkgscripts/makemacpkg: pkgscripts/makemacpkg.tmpl
+	cat pkgscripts/makemacpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__bindir}@$(bindir)@g | sed s@%{__docdir}@$(docdir)@g | \
+		sed s@%{__libdir}@$(libdir)@g > pkgscripts/makemacpkg
+
 if X86_64
 
-udmg: all
+udmg: all pkgscripts/makemacpkg pkgscripts/uninstall
 	sh pkgscripts/makemacpkg -build32 ${BUILDDIR32}
 
-iosdmg: all
-	sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7}
+iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
+	sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S}
 
 else
 
-iosdmg: all
-	sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7}
+iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
+	sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S}
 
 endif
 
-dmg: all
+dmg: all pkgscripts/makemacpkg pkgscripts/uninstall
 	sh pkgscripts/makemacpkg
 
-if X86_64
+pkgscripts/makecygwinpkg: pkgscripts/makecygwinpkg.tmpl
+	cat pkgscripts/makecygwinpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \
+		> pkgscripts/makecygwinpkg
 
-csunpkg: all
-	sh pkgscripts/makesunpkg combined ${BUILDDIR32}
-
-endif
-
-sunpkg: all
-	sh pkgscripts/makesunpkg
-
-cygwinpkg: all
+cygwinpkg: all pkgscripts/makecygwinpkg
 	sh pkgscripts/makecygwinpkg
diff --git a/README b/README
index eb9ebb1..44e69d3 100644
--- a/README
+++ b/README
@@ -17,7 +17,8 @@
 Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers,
 and other members of the Independent JPEG Group.
 
-IJG is not affiliated with the official ISO JPEG standards committee.
+IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
+(also known as JPEG, together with ITU-T SG16).
 
 
 DOCUMENTATION ROADMAP
@@ -45,7 +46,6 @@
   libjpeg.txt       How to use the JPEG library in your own programs.
   example.c         Sample code for calling the JPEG library.
   structure.txt     Overview of the JPEG library's internal structure.
-  filelist.txt      Road map of IJG files.
   coderules.txt     Coding style rules --- please read if you contribute code.
 
 Please read at least the files install.txt and usage.txt.  Some information
@@ -62,7 +62,7 @@
 
 This package contains C software to implement JPEG image encoding, decoding,
 and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
-method for full-color and gray-scale images.  JPEG's strong suit is compressing
+method for full-color and grayscale images.  JPEG's strong suit is compressing
 photographic images or other types of images that have smooth color and
 brightness transitions between neighboring pixels.  Images with sharp lines or
 other abrupt features may not compress well with JPEG, and a higher JPEG
@@ -129,7 +129,7 @@
 fitness for a particular purpose.  This software is provided "AS IS", and you,
 its user, assume the entire risk as to its quality and accuracy.
 
-This software is copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding.
+This software is copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
 All Rights Reserved except as specified below.
 
 Permission is hereby granted to use, copy, modify, and distribute this
@@ -160,15 +160,6 @@
 assumed by the product vendor.
 
 
-ansi2knr.c is included in this distribution by permission of L. Peter Deutsch,
-sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA.
-ansi2knr.c is NOT covered by the above copyright and conditions, but instead
-by the usual distribution terms of the Free Software Foundation; principally,
-that you must include source code if you redistribute it.  (See the file
-ansi2knr.c for full details.)  However, since ansi2knr.c is not needed as part
-of any program generated from the IJG code, this does not limit you more than
-the foregoing paragraphs do.
-
 The Unix configuration script "configure" was produced with GNU Autoconf.
 It is copyright by the Free Software Foundation but is freely distributable.
 The same holds for its supporting scripts (config.guess, config.sub,
@@ -274,11 +265,12 @@
 FILE FORMAT WARS
 ================
 
-The ISO JPEG standards committee actually promotes different formats like
-"JPEG 2000" or "JPEG XR", which are incompatible with original DCT-based
-JPEG.  IJG therefore does not support these formats (see REFERENCES).  Indeed,
-one of the original reasons for developing this free software was to help
-force convergence on common, interoperable format standards for JPEG files.
+The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together
+with ITU-T SG16) currently promotes different formats containing the name
+"JPEG" which are incompatible with original DCT-based JPEG.  IJG therefore does
+not support these formats (see REFERENCES).  Indeed, one of the original
+reasons for developing this free software was to help force convergence on
+common, interoperable format standards for JPEG files.
 Don't use an incompatible file format!
 (In any case, our decoder will remain capable of reading existing JPEG
 image files indefinitely.)
diff --git a/README-turbo.txt b/README-turbo.txt
index b8c83a8..a94ff97 100755
--- a/README-turbo.txt
+++ b/README-turbo.txt
@@ -2,24 +2,26 @@
 **     Background
 *******************************************************************************
 
-libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX,
-SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86,
-x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as
-fast as the unmodified version of libjpeg, all else being equal.
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
+NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64,
+and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as fast as
+libjpeg, all else being equal.  On other types of systems, libjpeg-turbo can
+still outperform libjpeg by a significant amount, by virtue of its
+highly-optimized Huffman coding routines.  In many cases, the performance of
+libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
 
-libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but
-the TigerVNC and VirtualGL projects made numerous enhancements to the codec in
-2009, including improved support for Mac OS X, 64-bit support, support for
-32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman
-encoding/decoding, and various bug fixes.  The goal was to produce a fully
-open-source codec that could replace the partially closed-source TurboJPEG/IPP
-codec used by VirtualGL and TurboVNC.  libjpeg-turbo generally achieves 80-120%
-of the performance of TurboJPEG/IPP.  It is faster in some areas but slower in
-others.
+libjpeg-turbo implements both the traditional libjpeg API as well as the less
+powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features
+colorspace extensions that allow it to compress from/decompress to 32-bit and
+big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java
+interface.
 
-In early 2010, libjpeg-turbo spun off into its own independent project, with
-the goal of making high-speed JPEG compression/decompression technology
-available to a broader range of users and developers.
+libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated
+derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and
+VirtualGL projects made numerous enhancements to the codec in 2009, and in
+early 2010, libjpeg-turbo spun off into an independent project, with the goal
+of making high-speed JPEG compression/decompression technology available to a
+broader range of users and developers.
 
 
 *******************************************************************************
@@ -27,7 +29,7 @@
 *******************************************************************************
 
 Most of libjpeg-turbo inherits the non-restrictive, BSD-style license used by
-libjpeg (see README.)  The TurboJPEG/OSS wrapper (both C and Java versions) and
+libjpeg (see README.)  The TurboJPEG wrapper (both C and Java versions) and
 associated test programs bear a similar license, which is reproduced below:
 
 Redistribution and use in source and binary forms, with or without
@@ -71,16 +73,32 @@
 
   libjpeg API:  This is the de facto industry-standard API for compressing and
   decompressing JPEG images.  It is more difficult to use than the TurboJPEG
-  API but also more powerful.  libjpeg-turbo is both API/ABI-compatible and
-  mathematically compatible with libjpeg v6b.  It can also optionally be
-  configured to be API/ABI-compatible with libjpeg v7 and v8 (see below.)
+  API but also more powerful.  The libjpeg API implementation in libjpeg-turbo
+  is both API/ABI-compatible and mathematically compatible with libjpeg v6b.
+  It can also optionally be configured to be API/ABI-compatible with libjpeg v7
+  and v8 (see below.)
 
+There is no significant performance advantage to either API when both are used
+to perform similar operations.
+
+======================
+Installation Directory
+======================
+
+This document assumes that libjpeg-turbo will be installed in the default
+directory (/opt/libjpeg-turbo on Un*x and Mac systems and
+c:\libjpeg-turbo[-gcc][64] on Windows systems.  If your installation of
+libjpeg-turbo resides in a different directory, then adjust the instructions
+accordingly.
 
 =============================
 Replacing libjpeg at Run Time
 =============================
 
-If a Unix application is dynamically linked with libjpeg, then you can replace
+Un*x
+----
+
+If a Un*x application is dynamically linked with libjpeg, then you can replace
 libjpeg with libjpeg-turbo at run time by manipulating LD_LIBRARY_PATH.
 For instance:
 
@@ -97,67 +115,51 @@
   user  0m0.029s
   sys   0m0.010s
 
-NOTE: {lib} can be lib, lib32, lib64, or lib/64, depending on the O/S and
-architecture.
+({lib} = lib32 or lib64, depending on whether you wish to use the 32-bit or the
+64-bit version of libjpeg-turbo.)
 
-System administrators can also replace the libjpeg sym links in /usr/{lib} with
+System administrators can also replace the libjpeg symlinks in /usr/lib* with
 links to the libjpeg-turbo dynamic library located in /opt/libjpeg-turbo/{lib}.
 This will effectively accelerate every application that uses the libjpeg
 dynamic library on the system.
 
-The libjpeg-turbo SDK for Visual C++ installs the libjpeg-turbo DLL
-(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether it was built with
-libjpeg v6b, v7, or v8 emulation) into c:\libjpeg-turbo[64]\bin, and the PATH
-environment variable can be modified such that this directory is searched
-before any others that might contain a libjpeg DLL.  However, if a libjpeg
-DLL exists in an application's install directory, then Windows will load this
-DLL first whenever the application is launched.  Thus, if an application ships
-with jpeg62.dll, jpeg7.dll, or jpeg8.dll, then back up the application's
-version of this DLL and copy c:\libjpeg-turbo[64]\bin\jpeg*.dll into the
-application's install directory to accelerate it.
+Windows
+-------
 
-The version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for
-Visual C++ requires the Visual C++ 2008 C run-time DLL (msvcr90.dll).
+If a Windows application is dynamically linked with libjpeg, then you can
+replace libjpeg with libjpeg-turbo at run time by backing up the application's
+copy of jpeg62.dll, jpeg7.dll, or jpeg8.dll (assuming the application has its
+own local copy of this library) and copying the corresponding DLL from
+libjpeg-turbo into the application's install directory.  The official
+libjpeg-turbo binary packages only provide jpeg62.dll.  If the application uses
+jpeg7.dll or jpeg8.dll instead, then it will be necessary to build
+libjpeg-turbo from source (see "libjpeg v7 and v8 API/ABI Emulation" below.)
+
+The following information is specific to the official libjpeg-turbo binary
+packages for Visual C++:
+
+-- jpeg62.dll requires the Visual C++ 2008 C run-time DLL (msvcr90.dll).
 msvcr90.dll ships with more recent versions of Windows, but users of older
 Windows releases can obtain it from the Visual C++ 2008 Redistributable
 Package, which is available as a free download from Microsoft's web site.
 
-NOTE:  Features of libjpeg that require passing a C run-time structure, such
-as a file handle, from an application to libjpeg will probably not work with
-the version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for
-Visual C++, unless the application is also built to use the Visual C++ 2008 C
-run-time DLL.  In particular, this affects jpeg_stdio_dest() and
+-- Features of the libjpeg API that require passing a C run-time structure,
+such as a file handle, from an application to the library will probably not
+work with jpeg62.dll, unless the application is also built to use the Visual
+C++ 2008 C run-time DLL.  In particular, this affects jpeg_stdio_dest() and
 jpeg_stdio_src().
 
+Mac
+---
+
 Mac applications typically embed their own copies of the libjpeg dylib inside
 the (hidden) application bundle, so it is not possible to globally replace
-libjpeg on OS X systems.  If an application uses a shared library version of
-libjpeg, then it may be possible to replace the application's version of it.
-This would generally involve copying libjpeg.*.dylib from libjpeg-turbo into
+libjpeg on OS X systems.  Replacing the application's version of the libjpeg
+dylib would generally involve copying libjpeg.*.dylib from libjpeg-turbo into
 the appropriate place in the application bundle and using install_name_tool to
-repoint the dylib to the new directory.  This requires an advanced knowledge of
-OS X and would not survive an upgrade or a re-install of the application.
-Thus, it is not recommended for most users.
-
-=======================
-Replacing TurboJPEG/IPP
-=======================
-
-libjpeg-turbo is a drop-in replacement for the TurboJPEG/IPP SDK used by
-VirtualGL 2.1.x and TurboVNC 0.6 (and prior.)  libjpeg-turbo contains a wrapper
-library (TurboJPEG/OSS) that emulates the TurboJPEG API using libjpeg-turbo
-instead of the closed-source Intel Performance Primitives.  You can replace the
-TurboJPEG/IPP package on Linux systems with the libjpeg-turbo package in order
-to make existing releases of VirtualGL 2.1.x and TurboVNC 0.x use the new codec
-at run time.  Note that the 64-bit libjpeg-turbo packages contain only 64-bit
-binaries, whereas the TurboJPEG/IPP 64-bit packages contained both 64-bit and
-32-bit binaries.  Thus, to replace a TurboJPEG/IPP 64-bit package, install
-both the 64-bit and 32-bit versions of libjpeg-turbo.
-
-You can also build the VirtualGL 2.1.x and TurboVNC 0.6 source code with
-the libjpeg-turbo SDK instead of TurboJPEG/IPP.  It should work identically.
-libjpeg-turbo also includes static library versions of TurboJPEG/OSS, which
-are used to build VirtualGL 2.2 and TurboVNC 1.0 and later.
+repoint the libjpeg-turbo dylib to its new directory.  This requires an
+advanced knowledge of OS X and would not survive an upgrade or a re-install of
+the application.  Thus, it is not recommended for most users.
 
 ========================================
 Using libjpeg-turbo in Your Own Programs
@@ -165,8 +167,8 @@
 
 For the most part, libjpeg-turbo should work identically to libjpeg, so in
 most cases, an application can be built against libjpeg and then run against
-libjpeg-turbo.  On Unix systems (including Cygwin), you can build against
-libjpeg-turbo instead of libjpeg by setting
+libjpeg-turbo.  On Un*x systems and Cygwin, you can build against libjpeg-turbo
+instead of libjpeg by setting
 
   CPATH=/opt/libjpeg-turbo/include
   and
@@ -183,20 +185,20 @@
 
 Building against libjpeg-turbo is useful, for instance, if you want to build an
 application that leverages the libjpeg-turbo colorspace extensions (see below.)
-On Linux and Solaris systems, you would still need to manipulate
-LD_LIBRARY_PATH or create appropriate sym links to use libjpeg-turbo at run
-time.  On such systems, you can pass -R /opt/libjpeg-turbo/{lib} to the linker
-to force the use of libjpeg-turbo at run time rather than libjpeg (also useful
-if you want to leverage the colorspace extensions), or you can link against the
-libjpeg-turbo static library.
+On Un*x systems, you would still need to manipulate LD_LIBRARY_PATH or create
+appropriate symlinks to use libjpeg-turbo at run time.  On such systems, you
+can pass -R /opt/libjpeg-turbo/{lib} to the linker to force the use of
+libjpeg-turbo at run time rather than libjpeg (also useful if you want to
+leverage the colorspace extensions), or you can link against the libjpeg-turbo
+static library.
 
-To force a Linux, Solaris, or MinGW application to link against the static
-version of libjpeg-turbo, you can use the following linker options:
+To force a Un*x or MinGW application to link against the static version of
+libjpeg-turbo, you can use the following linker options:
 
   -Wl,-Bstatic -ljpeg -Wl,-Bdynamic
 
 On OS X, simply add /opt/libjpeg-turbo/lib/libjpeg.a to the linker command
-line (this also works on Linux and Solaris.)
+line.
 
 To build Visual C++ applications using libjpeg-turbo, add
 c:\libjpeg-turbo[64]\include to the system or user INCLUDE environment
@@ -234,8 +236,10 @@
 
   #ifdef JCS_EXTENSIONS
 
-At run time, attempting to use these extensions with a version of libjpeg
-that doesn't support them will result in a "Bogus input colorspace" error.
+At run time, attempting to use these extensions with a libjpeg implementation
+that does not support them will result in a "Bogus input colorspace" error.
+Applications can trap this error in order to test whether run-time support is
+available for the colorspace extensions.
 
 When using the RGBX, BGRX, XBGR, and XRGB colorspaces during decompression, the
 X byte is undefined, and in order to ensure the best performance, libjpeg-turbo
@@ -259,14 +263,17 @@
 With libjpeg v7 and v8, new features were added that necessitated extending the
 compression and decompression structures.  Unfortunately, due to the exposed
 nature of those structures, extending them also necessitated breaking backward
-ABI compatibility with previous libjpeg releases.  Thus, programs that are
+ABI compatibility with previous libjpeg releases.  Thus, programs that were
 built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is
 based on the libjpeg v6b code base.  Although libjpeg v7 and v8 are still not
-as widely used as v6b, enough programs (including a few Linux distros) have
-made the switch that it was desirable to provide support for the libjpeg v7/v8
-ABI in libjpeg-turbo.  Although libjpeg-turbo can now be configured as a
-drop-in replacement for libjpeg v7 or v8, it should be noted that not all of
-the features in libjpeg v7 and v8 are supported (see below.)
+as widely used as v6b, enough programs (including a few Linux distros) made
+the switch that there was a demand to emulate the libjpeg v7 and v8 ABIs
+in libjpeg-turbo.  It should be noted, however, that this feature was added
+primarily so that applications that had already been compiled to use libjpeg
+v7+ could take advantage of accelerated baseline JPEG encoding/decoding
+without recompiling.  libjpeg-turbo does not claim to support all of the
+libjpeg v7+ features, nor to produce identical output to libjpeg v7+ in all
+cases (see below.)
 
 By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an
 argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version
@@ -275,13 +282,21 @@
 following section describes which libjpeg v7+ features are supported and which
 aren't.
 
-libjpeg v7 and v8 Features:
----------------------------
+Support for libjpeg v7 and v8 Features:
+---------------------------------------
 
 Fully supported:
 
+-- libjpeg: IDCT scaling extensions in decompressor
+   libjpeg-turbo supports IDCT scaling with scaling factors of 1/8, 1/4, 3/8,
+   1/2, 5/8, 3/4, 7/8, 9/8, 5/4, 11/8, 3/2, 13/8, 7/4, 15/8, and 2/1 (only 1/4
+   and 1/2 are SIMD-accelerated.)
+
 -- libjpeg: arithmetic coding
 
+-- libjpeg: In-memory source and destination managers
+   See notes below.
+
 -- cjpeg: Separate quality settings for luminance and chrominance
    Note that the libpjeg v7+ API was extended to accommodate this feature only
    for convenience purposes.  It has always been possible to implement this
@@ -289,38 +304,47 @@
 
 -- cjpeg: 32-bit BMP support
 
+-- cjpeg: -rgb option
+
 -- jpegtran: lossless cropping
 
 -- jpegtran: -perfect option
 
+-- jpegtran: forcing width/height when performing lossless crop
+
 -- rdjpgcom: -raw option
 
 -- rdjpgcom: locale awareness
 
 
-Fully supported when using libjpeg v7/v8 emulation:
-
--- libjpeg: In-memory source and destination managers
-
-
 Not supported:
 
+NOTE:  As of this writing, extensive research has been conducted into the
+usefulness of DCT scaling as a means of data reduction and SmartScale as a
+means of quality improvement.  The reader is invited to peruse the research at
+http://www.libjpeg-turbo.org/About/SmartScale and draw his/her own conclusions,
+but it is the general belief of our project that these features have not
+demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.
+
 -- libjpeg: DCT scaling in compressor
    cinfo.scale_num and cinfo.scale_denom are silently ignored.
-   There is no technical reason why DCT scaling cannot be supported, but
-   without the SmartScale extension (see below), it would only be able to
-   down-scale using ratios of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and 8/9,
-   which is of limited usefulness.
+   There is no technical reason why DCT scaling could not be supported when
+   emulating the libjpeg v7+ API/ABI, but without the SmartScale extension (see
+   below), only scaling factors of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and
+   8/9 would be available, which is of limited usefulness.
 
 -- libjpeg: SmartScale
    cinfo.block_size is silently ignored.
    SmartScale is an extension to the JPEG format that allows for DCT block
-   sizes other than 8x8.  It would be difficult to support this feature while
-   retaining backward compatibility with libjpeg v6b.
-
--- libjpeg: IDCT scaling extensions in decompressor
-   libjpeg-turbo still supports IDCT scaling with scaling factors of 1/2, 1/4,
-   and 1/8 (same as libjpeg v6b.)
+   sizes other than 8x8.  Providing support for this new format would be
+   feasible (particularly without full acceleration.)  However, until/unless
+   the format becomes either an official industry standard or, at minimum, an
+   accepted solution in the community, we are hesitant to implement it, as
+   there is no sense of whether or how it might change in the future.  It is
+   our belief that SmartScale has not demonstrated sufficient usefulness as a
+   lossless format nor as a means of quality enhancement, and thus, our primary
+   interest in providing this feature would be as a means of supporting
+   additional DCT scaling factors.
 
 -- libjpeg: Fancy downsampling in compressor
    cinfo.do_fancy_downsampling is silently ignored.
@@ -333,9 +357,105 @@
 -- Lossless RGB JPEG files
    This requires the SmartScale feature, which is not supported.
 
+What About libjpeg v9?
+----------------------
+
+libjpeg v9 introduced yet another field to the JPEG compression structure
+(color_transform), thus making the ABI backward incompatible with that of
+libjpeg v8.  This new field was introduced solely for the purpose of supporting
+lossless SmartScale encoding.  Further, there was actually no reason to extend
+the API in this manner, as the color transform could have just as easily been
+activated by way of a new JPEG colorspace constant, thus preserving backward
+ABI compatibility.
+
+Our research (see link above) has shown that lossless SmartScale does not
+generally accomplish anything that can't already be accomplished better with
+existing, standard lossless formats.  Thus, at this time, it is our belief that
+there is not sufficient technical justification for software to upgrade from
+libjpeg v8 to libjpeg v9, and therefore, not sufficient technical justification
+for us to emulate the libjpeg v9 ABI.
+
+=====================================
+In-Memory Source/Destination Managers
+=====================================
+
+By default, libjpeg-turbo 1.3 and later includes the jpeg_mem_src() and
+jpeg_mem_dest() functions, even when not emulating the libjpeg v8 API/ABI.
+Previously, it was necessary to build libjpeg-turbo from source with libjpeg v8
+API/ABI emulation in order to use the in-memory source/destination managers,
+but several projects requested that those functions be included when emulating
+the libjpeg v6b API/ABI as well.  This allows the use of those functions by
+programs that need them without breaking ABI compatibility for programs that
+don't, and it allows those functions to be provided in the "official"
+libjpeg-turbo binaries.
+
+Those who are concerned about maintaining strict conformance with the libjpeg
+v6b or v7 API can pass an argument of --without-mem-srcdst to configure or
+an argument of -DWITH_MEM_SRCDST=0 to CMake prior to building libjpeg-turbo.
+This will restore the pre-1.3 behavior, in which jpeg_mem_src() and
+jpeg_mem_dest() are only included when emulating the libjpeg v8 API/ABI.
+
+On Un*x systems, including the in-memory source/destination managers changes
+the dynamic library version from 62.0.0 to 62.1.0 if using libjpeg v6b API/ABI
+emulation and from 7.0.0 to 7.1.0 if using libjpeg v7 API/ABI emulation.
+
+Note that, on most Un*x systems, the dynamic linker will not look for a
+function in a library until that function is actually used.  Thus, if a program
+is built against libjpeg-turbo 1.3+ and uses jpeg_mem_src() or jpeg_mem_dest(),
+that program will not fail if run against an older version of libjpeg-turbo or
+against libjpeg v7- until the program actually tries to call jpeg_mem_src() or
+jpeg_mem_dest().  Such is not the case on Windows.  If a program is built
+against the libjpeg-turbo 1.3+ DLL and uses jpeg_mem_src() or jpeg_mem_dest(),
+then it must use the libjpeg-turbo 1.3+ DLL at run time.
+
+Both cjpeg and djpeg have been extended to allow testing the in-memory
+source/destination manager functions.  See their respective man pages for more
+details.
+
 
 *******************************************************************************
-**     Performance pitfalls
+**     Mathematical Compatibility
+*******************************************************************************
+
+For the most part, libjpeg-turbo should produce identical output to libjpeg
+v6b.  The one exception to this is when using the floating point DCT/IDCT, in
+which case the outputs of libjpeg v6b and libjpeg-turbo can differ for the
+following reasons:
+
+-- The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever so
+   slightly more accurate than the implementation in libjpeg v6b, but not by
+   any amount perceptible to human vision (generally in the range of 0.01 to
+   0.08 dB gain in PNSR.)
+-- When not using the SIMD extensions, then the accuracy of the floating point
+   DCT/IDCT can depend on the compiler and compiler settings.
+
+
+While libjpeg-turbo does emulate the libjpeg v8 API/ABI, under the hood, it is
+still using the same algorithms as libjpeg v6b, so there are several specific
+cases in which libjpeg-turbo cannot be expected to produce the same output as
+libjpeg v8:
+
+-- When decompressing using scaling factors of 1/2 and 1/4, because libjpeg v8
+   implements those scaling algorithms differently than libjpeg v6b does, and
+   libjpeg-turbo's SIMD extensions are based on the libjpeg v6b behavior.
+
+-- When using chrominance subsampling, because libjpeg v8 implements this
+   with its DCT/IDCT scaling algorithms rather than with a separate
+   downsampling/upsampling algorithm.  In our testing, the subsampled/upsampled
+   output of libjpeg v8 is less accurate than that of libjpeg v6b for this
+   reason.
+
+-- When using the floating point IDCT, for the reasons stated above and also
+   because the floating point IDCT algorithm was modified in libjpeg v8a to
+   improve accuracy.
+
+-- When decompressing using a scaling factor > 1 and merged (AKA "non-fancy" or
+   "non-smooth") chrominance upsampling, because libjpeg v8 does not support
+   merged upsampling with scaling factors > 1.
+
+
+*******************************************************************************
+**     Performance Pitfalls
 *******************************************************************************
 
 ===============
diff --git a/acinclude.m4 b/acinclude.m4
index 70e2555..afb4359 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -40,7 +40,7 @@
         ;;
     esac
   ;;
-  freebsd* | netbsd* | openbsd*)
+  kfreebsd* | freebsd* | netbsd* | openbsd*)
     if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
       objfmt='BSD-a.out'
     else
@@ -144,26 +144,26 @@
   ac_save_CFLAGS="$CFLAGS"
   CFLAGS="$CCASFLAGS -x assembler-with-cpp"
   CC="$CCAS"
-  AC_COMPILE_IFELSE([[
+  AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
     .text
     .fpu neon
     .arch armv7a
     .object_arch armv4
     .arm
     pld [r0]
-    vmovn.u16 d0, q0]], ac_good_gnu_arm_assembler=yes)
+    vmovn.u16 d0, q0]])], ac_good_gnu_arm_assembler=yes)
 
   ac_use_gas_preprocessor=no
   if test "x$ac_good_gnu_arm_assembler" = "xno" ; then
     CC="gas-preprocessor.pl $CCAS"
-    AC_COMPILE_IFELSE([[
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
       .text
       .fpu neon
       .arch armv7a
       .object_arch armv4
       .arm
       pld [r0]
-      vmovn.u16 d0, q0]], ac_use_gas_preprocessor=yes)
+      vmovn.u16 d0, q0]])], ac_use_gas_preprocessor=yes)
   fi
   CFLAGS="$ac_save_CFLAGS"
   CC="$ac_save_CC"
diff --git a/bmp.c b/bmp.c
index 4caefb4..4986055 100644
--- a/bmp.c
+++ b/bmp.c
@@ -98,7 +98,7 @@
 }
 
 
-int loadbmp(char *filename, unsigned char **buf, int *w, int *h, 
+int loadbmp(char *filename, unsigned char **buf, int *w, int *h,
 	int dstpf, int bottomup)
 {
 	int retval=0, dstps, srcpf, tempc;
@@ -169,7 +169,7 @@
 				nlines);
 		}
 		cinfo.next_scanline+=nlines;
-  }
+	}
 
 	(*src->finish_input)(&cinfo, src);
 
@@ -236,7 +236,7 @@
 			_throw("savebmp(): Could not initialize PPM writer");
 	}
 
-  dst->output_file=file;
+	dst->output_file=file;
 	(*dst->start_output)(&dinfo, dst);
 	(*dinfo.mem->realize_virt_arrays)((j_common_ptr)&dinfo);
 
@@ -258,7 +258,7 @@
 		}
 		(*dst->put_pixel_rows)(&dinfo, dst, nlines);
 		dinfo.output_scanline+=nlines;
-  }
+	}
 
 	(*dst->finish_output)(&dinfo, dst);
 
diff --git a/bmp.h b/bmp.h
index 0d1e4dc..c50c260 100644
--- a/bmp.h
+++ b/bmp.h
@@ -39,8 +39,4 @@
 
 const char *bmpgeterr(void);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/cderror.h b/cderror.h
index e19c475..6ccb37c 100644
--- a/cderror.h
+++ b/cderror.h
@@ -33,7 +33,7 @@
 
 typedef enum {
 
-#define JMESSAGE(code,string)	code ,
+#define JMESSAGE(code,string)   code ,
 
 #endif /* JMAKE_ENUM_LIST */
 
@@ -62,7 +62,7 @@
 JMESSAGE(JERR_GIF_NOT, "Not a GIF file")
 JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image")
 JMESSAGE(JTRC_GIF_BADVERSION,
-	 "Warning: unexpected GIF version number '%c%c%c'")
+         "Warning: unexpected GIF version number '%c%c%c'")
 JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x")
 JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input")
 JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file")
@@ -110,13 +110,13 @@
 #endif /* TARGA_SUPPORTED */
 
 JMESSAGE(JERR_BAD_CMAP_FILE,
-	 "Color map file is invalid or of unsupported format")
+         "Color map file is invalid or of unsupported format")
 JMESSAGE(JERR_TOO_MANY_COLORS,
-	 "Output file format cannot handle %d colormap entries")
+         "Output file format cannot handle %d colormap entries")
 JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed")
 #ifdef TARGA_SUPPORTED
 JMESSAGE(JERR_UNKNOWN_FORMAT,
-	 "Unrecognized input file format --- perhaps you need -targa")
+         "Unrecognized input file format --- perhaps you need -targa")
 #else
 JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format")
 #endif
diff --git a/cdjpeg.c b/cdjpeg.c
index b6250ff..17f1de7 100644
--- a/cdjpeg.c
+++ b/cdjpeg.c
@@ -9,15 +9,15 @@
  * programs (cjpeg, djpeg, jpegtran).
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include <ctype.h>		/* to declare isupper(), tolower() */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef NEED_SIGNAL_CATCHER
-#include <signal.h>		/* to declare signal() */
+#include <signal.h>             /* to declare signal() */
 #endif
 #ifdef USE_SETMODE
-#include <fcntl.h>		/* to declare setmode()'s parameter macros */
+#include <fcntl.h>              /* to declare setmode()'s parameter macros */
 /* If you have setmode() but not <io.h>, just delete this line: */
-#include <io.h>			/* to declare setmode() */
+#include <io.h>                 /* to declare setmode() */
 #endif
 
 
@@ -31,13 +31,13 @@
 
 static j_common_ptr sig_cinfo;
 
-void				/* must be global for Manx C */
+void                            /* must be global for Manx C */
 signal_catcher (int signum)
 {
   if (sig_cinfo != NULL) {
     if (sig_cinfo->err != NULL) /* turn off trace output */
       sig_cinfo->err->trace_level = 0;
-    jpeg_destroy(sig_cinfo);	/* clean up memory allocation & temp files */
+    jpeg_destroy(sig_cinfo);    /* clean up memory allocation & temp files */
   }
   exit(EXIT_FAILURE);
 }
@@ -47,10 +47,10 @@
 enable_signal_catcher (j_common_ptr cinfo)
 {
   sig_cinfo = cinfo;
-#ifdef SIGINT			/* not all systems have SIGINT */
+#ifdef SIGINT                   /* not all systems have SIGINT */
   signal(SIGINT, signal_catcher);
 #endif
-#ifdef SIGTERM			/* not all systems have SIGTERM */
+#ifdef SIGTERM                  /* not all systems have SIGTERM */
   signal(SIGTERM, signal_catcher);
 #endif
 }
@@ -75,8 +75,8 @@
     prog->percent_done = percent_done;
     if (total_passes > 1) {
       fprintf(stderr, "\rPass %d/%d: %3d%% ",
-	      prog->pub.completed_passes + prog->completed_extra_passes + 1,
-	      total_passes, percent_done);
+              prog->pub.completed_passes + prog->completed_extra_passes + 1,
+              total_passes, percent_done);
     } else {
       fprintf(stderr, "\r %3d%% ", percent_done);
     }
@@ -126,17 +126,17 @@
 
   while ((ca = *arg++) != '\0') {
     if ((ck = *keyword++) == '\0')
-      return FALSE;		/* arg longer than keyword, no good */
-    if (isupper(ca))		/* force arg to lcase (assume ck is already) */
+      return FALSE;             /* arg longer than keyword, no good */
+    if (isupper(ca))            /* force arg to lcase (assume ck is already) */
       ca = tolower(ca);
     if (ca != ck)
-      return FALSE;		/* no good */
-    nmatched++;			/* count matched characters */
+      return FALSE;             /* no good */
+    nmatched++;                 /* count matched characters */
   }
   /* reached end of argument; fail if it's too short for unique abbrev */
   if (nmatched < minchars)
     return FALSE;
-  return TRUE;			/* A-OK */
+  return TRUE;                  /* A-OK */
 }
 
 
@@ -150,10 +150,10 @@
 {
   FILE * input_file = stdin;
 
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
   setmode(fileno(stdin), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
   if ((input_file = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
     fprintf(stderr, "Cannot reopen stdin\n");
     exit(EXIT_FAILURE);
@@ -168,10 +168,10 @@
 {
   FILE * output_file = stdout;
 
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
   setmode(fileno(stdout), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
   if ((output_file = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) {
     fprintf(stderr, "Cannot reopen stdout\n");
     exit(EXIT_FAILURE);
diff --git a/cdjpeg.h b/cdjpeg.h
index ed024ac..0a8f197 100644
--- a/cdjpeg.h
+++ b/cdjpeg.h
@@ -9,12 +9,12 @@
  * cjpeg and djpeg.  It is NOT used by the core JPEG library.
  */
 
-#define JPEG_CJPEG_DJPEG	/* define proper options in jconfig.h */
-#define JPEG_INTERNAL_OPTIONS	/* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
+#define JPEG_CJPEG_DJPEG        /* define proper options in jconfig.h */
+#define JPEG_INTERNAL_OPTIONS   /* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jerror.h"		/* get library error codes too */
-#include "cderror.h"		/* get application-specific error codes */
+#include "jerror.h"             /* get library error codes too */
+#include "cderror.h"            /* get application-specific error codes */
 
 
 /*
@@ -25,11 +25,11 @@
 
 struct cjpeg_source_struct {
   JMETHOD(void, start_input, (j_compress_ptr cinfo,
-			      cjpeg_source_ptr sinfo));
+                              cjpeg_source_ptr sinfo));
   JMETHOD(JDIMENSION, get_pixel_rows, (j_compress_ptr cinfo,
-				       cjpeg_source_ptr sinfo));
+                                       cjpeg_source_ptr sinfo));
   JMETHOD(void, finish_input, (j_compress_ptr cinfo,
-			       cjpeg_source_ptr sinfo));
+                               cjpeg_source_ptr sinfo));
 
   FILE *input_file;
 
@@ -49,14 +49,14 @@
    * The color map will be ready at this time, if one is needed.
    */
   JMETHOD(void, start_output, (j_decompress_ptr cinfo,
-			       djpeg_dest_ptr dinfo));
+                               djpeg_dest_ptr dinfo));
   /* Emit the specified number of pixel rows from the buffer. */
   JMETHOD(void, put_pixel_rows, (j_decompress_ptr cinfo,
-				 djpeg_dest_ptr dinfo,
-				 JDIMENSION rows_supplied));
+                                 djpeg_dest_ptr dinfo,
+                                 JDIMENSION rows_supplied));
   /* Finish up at the end of the image. */
   JMETHOD(void, finish_output, (j_decompress_ptr cinfo,
-				djpeg_dest_ptr dinfo));
+                                djpeg_dest_ptr dinfo));
 
   /* Target file spec; filled in by djpeg.c after object is created. */
   FILE * output_file;
@@ -79,9 +79,9 @@
  */
 
 struct cdjpeg_progress_mgr {
-  struct jpeg_progress_mgr pub;	/* fields known to JPEG library */
-  int completed_extra_passes;	/* extra passes completed */
-  int total_extra_passes;	/* total extra */
+  struct jpeg_progress_mgr pub; /* fields known to JPEG library */
+  int completed_extra_passes;   /* extra passes completed */
+  int total_extra_passes;       /* total extra */
   /* last printed percentage stored here to avoid multiple printouts */
   int percent_done;
 };
@@ -92,34 +92,34 @@
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_read_bmp		jIRdBMP
-#define jinit_write_bmp		jIWrBMP
-#define jinit_read_gif		jIRdGIF
-#define jinit_write_gif		jIWrGIF
-#define jinit_read_ppm		jIRdPPM
-#define jinit_write_ppm		jIWrPPM
-#define jinit_read_rle		jIRdRLE
-#define jinit_write_rle		jIWrRLE
-#define jinit_read_targa	jIRdTarga
-#define jinit_write_targa	jIWrTarga
-#define read_quant_tables	RdQTables
-#define read_scan_script	RdScnScript
+#define jinit_read_bmp          jIRdBMP
+#define jinit_write_bmp         jIWrBMP
+#define jinit_read_gif          jIRdGIF
+#define jinit_write_gif         jIWrGIF
+#define jinit_read_ppm          jIRdPPM
+#define jinit_write_ppm         jIWrPPM
+#define jinit_read_rle          jIRdRLE
+#define jinit_write_rle         jIWrRLE
+#define jinit_read_targa        jIRdTarga
+#define jinit_write_targa       jIWrTarga
+#define read_quant_tables       RdQTables
+#define read_scan_script        RdScnScript
 #define set_quality_ratings     SetQRates
-#define set_quant_slots		SetQSlots
-#define set_sample_factors	SetSFacts
-#define read_color_map		RdCMap
-#define enable_signal_catcher	EnSigCatcher
-#define start_progress_monitor	StProgMon
-#define end_progress_monitor	EnProgMon
-#define read_stdin		RdStdin
-#define write_stdout		WrStdout
+#define set_quant_slots         SetQSlots
+#define set_sample_factors      SetSFacts
+#define read_color_map          RdCMap
+#define enable_signal_catcher   EnSigCatcher
+#define start_progress_monitor  StProgMon
+#define end_progress_monitor    EnProgMon
+#define read_stdin              RdStdin
+#define write_stdout            WrStdout
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 /* Module selection routines for I/O modules. */
 
 EXTERN(cjpeg_source_ptr) jinit_read_bmp JPP((j_compress_ptr cinfo));
 EXTERN(djpeg_dest_ptr) jinit_write_bmp JPP((j_decompress_ptr cinfo,
-					    boolean is_os2));
+                                            boolean is_os2));
 EXTERN(cjpeg_source_ptr) jinit_read_gif JPP((j_compress_ptr cinfo));
 EXTERN(djpeg_dest_ptr) jinit_write_gif JPP((j_decompress_ptr cinfo));
 EXTERN(cjpeg_source_ptr) jinit_read_ppm JPP((j_compress_ptr cinfo));
@@ -132,10 +132,10 @@
 /* cjpeg support routines (in rdswitch.c) */
 
 EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
-				       boolean force_baseline));
+                                       boolean force_baseline));
 EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
 EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg,
-					 boolean force_baseline));
+                                         boolean force_baseline));
 EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
 EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));
 
@@ -147,7 +147,7 @@
 
 EXTERN(void) enable_signal_catcher JPP((j_common_ptr cinfo));
 EXTERN(void) start_progress_monitor JPP((j_common_ptr cinfo,
-					 cd_progress_ptr progress));
+                                         cd_progress_ptr progress));
 EXTERN(void) end_progress_monitor JPP((j_common_ptr cinfo));
 EXTERN(boolean) keymatch JPP((char * arg, const char * keyword, int minchars));
 EXTERN(FILE *) read_stdin JPP((void));
@@ -155,32 +155,32 @@
 
 /* miscellaneous useful macros */
 
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#define WRITE_BINARY	"w"
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
+#define WRITE_BINARY    "w"
 #else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#define WRITE_BINARY	"wb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#define WRITE_BINARY	"wb"
+#ifdef VMS                      /* VMS is very nonstandard */
+#define READ_BINARY     "rb", "ctx=stm"
+#define WRITE_BINARY    "wb", "ctx=stm"
+#else                           /* standard ANSI-compliant case */
+#define READ_BINARY     "rb"
+#define WRITE_BINARY    "wb"
 #endif
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 #ifndef EXIT_SUCCESS
 #ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
+#define EXIT_SUCCESS  1         /* VMS is very nonstandard */
 #else
 #define EXIT_SUCCESS  0
 #endif
 #endif
 #ifndef EXIT_WARNING
 #ifdef VMS
-#define EXIT_WARNING  1		/* VMS is very nonstandard */
+#define EXIT_WARNING  1         /* VMS is very nonstandard */
 #else
 #define EXIT_WARNING  2
 #endif
diff --git a/change.log b/change.log
index c7885a7..b60ddd6 100644
--- a/change.log
+++ b/change.log
@@ -4,6 +4,18 @@
 CHANGE LOG for Independent JPEG Group's JPEG software
 
 
+Version 8d  15-Jan-2012
+-----------------------
+
+Add cjpeg -rgb option to create RGB JPEG files.
+Using this switch suppresses the conversion from RGB
+colorspace input to the default YCbCr JPEG colorspace.
+Thank to Michael Koch for the initial suggestion.
+
+Add option to disable the region adjustment in the transupp crop code.
+Thank to Jeffrey Friedl for the suggestion.
+
+
 Version 8b  16-May-2010
 -----------------------
 
@@ -34,6 +46,9 @@
 Version 7  27-Jun-2009
 ----------------------
 
+New scaled DCTs implemented.
+djpeg now supports scalings N/8 with all N from 1 to 16.
+
 cjpeg -quality option has been extended for support of separate quality
 settings for luminance and chrominance (or in general, for every provided
 quantization table slot).
diff --git a/cjpeg.1 b/cjpeg.1
index f59d290..5f5090c 100644
--- a/cjpeg.1
+++ b/cjpeg.1
@@ -1,4 +1,4 @@
-.TH CJPEG 1 "1 January 2013"
+.TH CJPEG 1 "11 May 2014"
 .SH NAME
 cjpeg \- compress an image file to a JPEG file
 .SH SYNOPSIS
@@ -16,7 +16,7 @@
 compresses the named image file, or the standard input if no file is
 named, and produces a JPEG/JFIF file on the standard output.
 The currently supported input file formats are: PPM (PBMPLUS color
-format), PGM (PBMPLUS gray-scale format), BMP, Targa, and RLE (Utah Raster
+format), PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster
 Toolkit format).  (RLE is supported only if the URT library is available.)
 .SH OPTIONS
 All switch names may be abbreviated; for example,
@@ -49,6 +49,11 @@
 .BR \-grayscale ,
 you'll get a smaller JPEG file that takes less time to process.
 .TP
+.B \-rgb
+Create RGB JPEG file.
+Using this switch suppresses the conversion from RGB
+colorspace input to the default YCbCr JPEG colorspace.
+.TP
 .B \-optimize
 Perform optimization of entropy encoding parameters.  Without this, default
 encoding parameters are used.
@@ -161,14 +166,27 @@
 .TP
 .B \-dct fast
 Use fast integer DCT (less accurate).
+In libjpeg-turbo, the fast method is generally about 5-15% faster than the int
+method when using the x86/x86-64 SIMD extensions (results may vary with other
+SIMD implementations, or when using libjpeg-turbo without SIMD extensions.)
+For quality levels of 90 and below, there should be little or no perceptible
+difference between the two algorithms.  For quality levels above 90, however,
+the difference between the fast and the int methods becomes more pronounced.
+With quality=97, for instance, the fast method incurs generally about a 1-3 dB
+loss (in PSNR) relative to the int method, but this can be larger for some
+images.  Do not use the fast method with quality levels above 97.  The
+algorithm often degenerates at quality=98 and above and can actually produce a
+more lossy image than if lower quality levels had been used.  Also, in
+libjpeg-turbo, the fast method is not fully accelerated for quality levels
+above 97, so it will be slower than the int method.
 .TP
 .B \-dct float
 Use floating-point DCT method.
-The float method is very slightly more accurate than the int method, but is
-much slower unless your machine has very fast floating-point hardware.  Also
-note that results of the floating-point method may vary slightly across
-machines, while the integer methods should give the same results everywhere.
-The fast integer method is much less accurate than the other two.
+The float method is mainly a legacy feature.  It does not produce significantly
+more accurate results than the int method, and it is much slower.  The float
+method may also give different results on different machines due to varying
+roundoff behavior, whereas the integer methods should give the same results on
+all machines.
 .TP
 .BI \-restart " N"
 Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is
@@ -190,6 +208,11 @@
 .BI \-outfile " name"
 Send output image to the named file, not to standard output.
 .TP
+.BI \-memdst
+Compress to memory instead of a file.  This feature was implemented mainly as a
+way of testing the in-memory destination manager (jpeg_mem_dest()), but it is
+also useful for benchmarking, since it reduces the I/O overhead.
+.TP
 .B \-verbose
 Enable debug printout.  More
 .BR \-v 's
@@ -310,7 +333,8 @@
 Independent JPEG Group
 .PP
 This file was modified by The libjpeg-turbo Project to include only information
-relevant to libjpeg-turbo and to wordsmith certain sections.
+relevant to libjpeg-turbo, to wordsmith certain sections, and to describe
+features not present in libjpeg.
 .SH BUGS
 Support for GIF input files was removed in cjpeg v6b due to concerns over
 the Unisys LZW patent.  Although this patent expired in 2006, cjpeg still
diff --git a/cjpeg.c b/cjpeg.c
index 703ce10..4429c49 100644
--- a/cjpeg.c
+++ b/cjpeg.c
@@ -3,9 +3,9 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2008 by Guido Vollbeding.
- * Modifications:
- * Copyright (C) 2010, D. R. Commander.
+ * Modified 2003-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a command-line user interface for the JPEG compressor.
@@ -13,8 +13,8 @@
  *
  * Two different command line styles are permitted, depending on the
  * compile-time switch TWO_FILE_COMMANDLINE:
- *	cjpeg [options]  inputfile outputfile
- *	cjpeg [options]  [inputfile]
+ *      cjpeg [options]  inputfile outputfile
+ *      cjpeg [options]  [inputfile]
  * In the second style, output is always to standard output, which you'd
  * normally redirect to a file or pipe to some other program.  Input is
  * either from a named file or from standard input (typically redirected).
@@ -22,28 +22,28 @@
  * don't support pipes.  Also, you MUST use the first style if your system
  * doesn't do binary I/O to stdin/stdout.
  * To simplify script writing, the "-outfile" switch is provided.  The syntax
- *	cjpeg [options]  -outfile outputfile  inputfile
+ *      cjpeg [options]  -outfile outputfile  inputfile
  * works regardless of which command line style is used.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include "jversion.h"		/* for version message */
-#include "config.h"
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
 
 /* Create the add-on message string table. */
 
-#define JMESSAGE(code,string)	string ,
+#define JMESSAGE(code,string)   string ,
 
 static const char * const cdjpeg_message_table[] = {
 #include "cderror.h"
@@ -77,7 +77,7 @@
  * seldom-used ID field), so we provide a switch to force Targa input mode.
  */
 
-static boolean is_targa;	/* records user -targa switch */
+static boolean is_targa;        /* records user -targa switch */
 
 
 LOCAL(cjpeg_source_ptr)
@@ -124,7 +124,7 @@
     break;
   }
 
-  return NULL;			/* suppress compiler warnings */
+  return NULL;                  /* suppress compiler warnings */
 }
 
 
@@ -137,8 +137,9 @@
  */
 
 
-static const char * progname;	/* program name for error messages */
-static char * outfilename;	/* for -outfile switch */
+static const char * progname;   /* program name for error messages */
+static char * outfilename;      /* for -outfile switch */
+boolean memdst;  /* for -memdst switch */
 
 
 LOCAL(void)
@@ -155,6 +156,7 @@
   fprintf(stderr, "Switches (names may be abbreviated):\n");
   fprintf(stderr, "  -quality N[,...]   Compression quality (0..100; 5-95 is useful range)\n");
   fprintf(stderr, "  -grayscale     Create monochrome JPEG file\n");
+  fprintf(stderr, "  -rgb           Create RGB JPEG file\n");
 #ifdef ENTROPY_OPT_SUPPORTED
   fprintf(stderr, "  -optimize      Optimize Huffman table (smaller file, but slow compression)\n");
 #endif
@@ -170,15 +172,15 @@
 #endif
 #ifdef DCT_ISLOW_SUPPORTED
   fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
 #endif
 #ifdef DCT_IFAST_SUPPORTED
   fprintf(stderr, "  -dct fast      Use fast integer DCT (less accurate)%s\n",
-	  (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
   fprintf(stderr, "  -dct float     Use floating-point DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
 #endif
   fprintf(stderr, "  -restart N     Set restart interval in rows, or in blocks with B\n");
 #ifdef INPUT_SMOOTHING_SUPPORTED
@@ -186,6 +188,9 @@
 #endif
   fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
   fprintf(stderr, "  -outfile name  Specify name for output file\n");
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  fprintf(stderr, "  -memdst        Compress to memory instead of file (useful for benchmarking)\n");
+#endif
   fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
   fprintf(stderr, "Switches for wizards:\n");
   fprintf(stderr, "  -baseline      Force baseline quantization tables\n");
@@ -201,7 +206,7 @@
 
 LOCAL(int)
 parse_switches (j_compress_ptr cinfo, int argc, char **argv,
-		int last_file_arg_seen, boolean for_real)
+                int last_file_arg_seen, boolean for_real)
 /* Parse optional switches.
  * Returns argv[] index of first file-name argument (== argc if none).
  * Any file names with indexes <= last_file_arg_seen are ignored;
@@ -215,18 +220,19 @@
   char * arg;
   boolean force_baseline;
   boolean simple_progressive;
-  char * qualityarg = NULL;	/* saves -quality parm if any */
-  char * qtablefile = NULL;	/* saves -qtables filename if any */
-  char * qslotsarg = NULL;	/* saves -qslots parm if any */
-  char * samplearg = NULL;	/* saves -sample parm if any */
-  char * scansarg = NULL;	/* saves -scans parm if any */
+  char * qualityarg = NULL;     /* saves -quality parm if any */
+  char * qtablefile = NULL;     /* saves -qtables filename if any */
+  char * qslotsarg = NULL;      /* saves -qslots parm if any */
+  char * samplearg = NULL;      /* saves -sample parm if any */
+  char * scansarg = NULL;       /* saves -scans parm if any */
 
   /* Set up default JPEG parameters. */
 
-  force_baseline = FALSE;	/* by default, allow 16-bit quantizers */
+  force_baseline = FALSE;       /* by default, allow 16-bit quantizers */
   simple_progressive = FALSE;
   is_targa = FALSE;
   outfilename = NULL;
+  memdst = FALSE;
   cinfo->err->trace_level = 0;
 
   /* Scan command line options, adjust parameters */
@@ -236,12 +242,12 @@
     if (*arg != '-') {
       /* Not a switch, must be a file name argument */
       if (argn <= last_file_arg_seen) {
-	outfilename = NULL;	/* -outfile applies to just one input file */
-	continue;		/* ignore this name if previously processed */
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
       }
-      break;			/* else done parsing switches */
+      break;                    /* else done parsing switches */
     }
-    arg++;			/* advance past switch marker character */
+    arg++;                      /* advance past switch marker character */
 
     if (keymatch(arg, "arithmetic", 1)) {
       /* Use arithmetic coding. */
@@ -249,7 +255,7 @@
       cinfo->arith_code = TRUE;
 #else
       fprintf(stderr, "%s: sorry, arithmetic coding not supported\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -259,16 +265,16 @@
 
     } else if (keymatch(arg, "dct", 2)) {
       /* Select DCT algorithm. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "int", 1)) {
-	cinfo->dct_method = JDCT_ISLOW;
+        cinfo->dct_method = JDCT_ISLOW;
       } else if (keymatch(argv[argn], "fast", 2)) {
-	cinfo->dct_method = JDCT_IFAST;
+        cinfo->dct_method = JDCT_IFAST;
       } else if (keymatch(argv[argn], "float", 2)) {
-	cinfo->dct_method = JDCT_FLOAT;
+        cinfo->dct_method = JDCT_FLOAT;
       } else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
       /* Enable debug printouts. */
@@ -276,12 +282,12 @@
       static boolean printed_version = FALSE;
 
       if (! printed_version) {
-	fprintf(stderr, "%s version %s (build %s)\n",
-		PACKAGE_NAME, VERSION, BUILD);
-	fprintf(stderr, "%s\n\n", JCOPYRIGHT);
-	fprintf(stderr, "Emulating The Independent JPEG Group's libjpeg, version %s\n\n",
-		JVERSION);
-	printed_version = TRUE;
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, "%s\n\n", JCOPYRIGHT);
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
 
@@ -289,17 +295,21 @@
       /* Force a monochrome JPEG file to be generated. */
       jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
 
+    } else if (keymatch(arg, "rgb", 3)) {
+      /* Force an RGB JPEG file to be generated. */
+      jpeg_set_colorspace(cinfo, JCS_RGB);
+
     } else if (keymatch(arg, "maxmemory", 3)) {
       /* Maximum memory in Kb (or Mb with 'm'). */
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (ch == 'm' || ch == 'M')
-	lval *= 1000L;
+        lval *= 1000L;
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
@@ -307,16 +317,16 @@
 #ifdef ENTROPY_OPT_SUPPORTED
       cinfo->optimize_coding = TRUE;
 #else
-      fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n",
-	      progname);
+      fprintf(stderr, "%s: sorry, entropy optimization was not compiled in\n",
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
     } else if (keymatch(arg, "outfile", 4)) {
       /* Set output file name. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      outfilename = argv[argn];	/* save it away for later use */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
 
     } else if (keymatch(arg, "progressive", 1)) {
       /* Select simple progressive mode. */
@@ -324,21 +334,31 @@
       simple_progressive = TRUE;
       /* We must postpone execution until num_components is known. */
 #else
-      fprintf(stderr, "%s: sorry, progressive output was not compiled\n",
-	      progname);
+      fprintf(stderr, "%s: sorry, progressive output was not compiled in\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "memdst", 2)) {
+      /* Use in-memory destination manager */
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+      memdst = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, in-memory destination manager was not compiled in\n",
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
     } else if (keymatch(arg, "quality", 1)) {
       /* Quality ratings (quantization table scaling factors). */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       qualityarg = argv[argn];
 
     } else if (keymatch(arg, "qslots", 2)) {
       /* Quantization table slot numbers. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       qslotsarg = argv[argn];
       /* Must delay setting qslots until after we have processed any
        * colorspace-determining switches, since jpeg_set_colorspace sets
@@ -347,8 +367,8 @@
 
     } else if (keymatch(arg, "qtables", 2)) {
       /* Quantization tables fetched from file. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       qtablefile = argv[argn];
       /* We postpone actually reading the file in case -quality comes later. */
 
@@ -357,24 +377,24 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (lval < 0 || lval > 65535L)
-	usage();
+        usage();
       if (ch == 'b' || ch == 'B') {
-	cinfo->restart_interval = (unsigned int) lval;
-	cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
+        cinfo->restart_interval = (unsigned int) lval;
+        cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
       } else {
-	cinfo->restart_in_rows = (int) lval;
-	/* restart_interval will be computed during startup */
+        cinfo->restart_in_rows = (int) lval;
+        /* restart_interval will be computed during startup */
       }
 
     } else if (keymatch(arg, "sample", 2)) {
       /* Set sampling factors. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       samplearg = argv[argn];
       /* Must delay setting sample factors until after we have processed any
        * colorspace-determining switches, since jpeg_set_colorspace sets
@@ -384,13 +404,13 @@
     } else if (keymatch(arg, "scans", 4)) {
       /* Set scan script. */
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       scansarg = argv[argn];
       /* We must postpone reading the file in case -progressive appears. */
 #else
-      fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n",
-	      progname);
+      fprintf(stderr, "%s: sorry, multi-scan output was not compiled in\n",
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -398,12 +418,12 @@
       /* Set input smoothing factor. */
       int val;
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%d", &val) != 1)
-	usage();
+        usage();
       if (val < 0 || val > 100)
-	usage();
+        usage();
       cinfo->smoothing_factor = val;
 
     } else if (keymatch(arg, "targa", 1)) {
@@ -411,7 +431,7 @@
       is_targa = TRUE;
 
     } else {
-      usage();			/* bogus switch */
+      usage();                  /* bogus switch */
     }
   }
 
@@ -421,35 +441,35 @@
 
     /* Set quantization tables for selected quality. */
     /* Some or all may be overridden if -qtables is present. */
-    if (qualityarg != NULL)	/* process -quality if it was present */
+    if (qualityarg != NULL)     /* process -quality if it was present */
       if (! set_quality_ratings(cinfo, qualityarg, force_baseline))
-	usage();
+        usage();
 
-    if (qtablefile != NULL)	/* process -qtables if it was present */
+    if (qtablefile != NULL)     /* process -qtables if it was present */
       if (! read_quant_tables(cinfo, qtablefile, force_baseline))
-	usage();
+        usage();
 
-    if (qslotsarg != NULL)	/* process -qslots if it was present */
+    if (qslotsarg != NULL)      /* process -qslots if it was present */
       if (! set_quant_slots(cinfo, qslotsarg))
-	usage();
+        usage();
 
-    if (samplearg != NULL)	/* process -sample if it was present */
+    if (samplearg != NULL)      /* process -sample if it was present */
       if (! set_sample_factors(cinfo, samplearg))
-	usage();
+        usage();
 
 #ifdef C_PROGRESSIVE_SUPPORTED
-    if (simple_progressive)	/* process -progressive; -scans can override */
+    if (simple_progressive)     /* process -progressive; -scans can override */
       jpeg_simple_progression(cinfo);
 #endif
 
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-    if (scansarg != NULL)	/* process -scans if it was present */
+    if (scansarg != NULL)       /* process -scans if it was present */
       if (! read_scan_script(cinfo, scansarg))
-	usage();
+        usage();
 #endif
   }
 
-  return argn;			/* return index of next arg (file name) */
+  return argn;                  /* return index of next arg (file name) */
 }
 
 
@@ -468,7 +488,9 @@
   int file_index;
   cjpeg_source_ptr src_mgr;
   FILE * input_file;
-  FILE * output_file;
+  FILE * output_file = NULL;
+  unsigned char *outbuffer = NULL;
+  unsigned long outsize = 0;
   JDIMENSION num_scanlines;
 
   /* On Mac, fetch a command line. */
@@ -478,7 +500,7 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "cjpeg";		/* in case C library doesn't provide it */
+    progname = "cjpeg";         /* in case C library doesn't provide it */
 
   /* Initialize the JPEG compression object with default error handling. */
   cinfo.err = jpeg_std_error(&jerr);
@@ -511,19 +533,21 @@
   file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
 
 #ifdef TWO_FILE_COMMANDLINE
-  /* Must have either -outfile switch or explicit output file name */
-  if (outfilename == NULL) {
-    if (file_index != argc-2) {
-      fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
-      usage();
-    }
-    outfilename = argv[file_index+1];
-  } else {
-    if (file_index != argc-1) {
-      fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
-      usage();
+  if (!memdst) {
+    /* Must have either -outfile switch or explicit output file name */
+    if (outfilename == NULL) {
+      if (file_index != argc-2) {
+        fprintf(stderr, "%s: must name one input and one output file\n",
+                progname);
+        usage();
+      }
+      outfilename = argv[file_index+1];
+    } else {
+      if (file_index != argc-1) {
+        fprintf(stderr, "%s: must name one input and one output file\n",
+                progname);
+        usage();
+      }
     }
   }
 #else
@@ -551,7 +575,7 @@
       fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
       exit(EXIT_FAILURE);
     }
-  } else {
+  } else if (!memdst) {
     /* default output file is stdout */
     output_file = write_stdout();
   }
@@ -574,7 +598,12 @@
   file_index = parse_switches(&cinfo, argc, argv, 0, TRUE);
 
   /* Specify data destination for compression */
-  jpeg_stdio_dest(&cinfo, output_file);
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  if (memdst)
+    jpeg_mem_dest(&cinfo, &outbuffer, &outsize);
+  else
+#endif
+    jpeg_stdio_dest(&cinfo, output_file);
 
   /* Start compressor */
   jpeg_start_compress(&cinfo, TRUE);
@@ -593,14 +622,20 @@
   /* Close files, if we opened them */
   if (input_file != stdin)
     fclose(input_file);
-  if (output_file != stdout)
+  if (output_file != stdout && output_file != NULL)
     fclose(output_file);
 
 #ifdef PROGRESS_REPORT
   end_progress_monitor((j_common_ptr) &cinfo);
 #endif
 
+  if (memdst) {
+    fprintf(stderr, "Compressed size:  %lu bytes\n", outsize);
+    if (outbuffer != NULL)
+      free(outbuffer);
+  }
+
   /* All done. */
   exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/cmakescripts/md5cmp.cmake b/cmakescripts/md5cmp.cmake
new file mode 100644
index 0000000..c315aa8
--- /dev/null
+++ b/cmakescripts/md5cmp.cmake
@@ -0,0 +1,15 @@
+if(NOT MD5)
+  message(FATAL_ERROR "MD5 not specified")
+endif()
+
+if(NOT FILE)
+  message(FATAL_ERROR "FILE not specified")
+endif()
+
+file(MD5 ${FILE} MD5FILE)
+
+if(NOT MD5 STREQUAL MD5FILE)
+	message(FATAL_ERROR "MD5 of ${FILE} should be ${MD5}, not ${MD5FILE}.")
+else()
+	message(STATUS "${MD5}: OK")
+endif()
diff --git a/coderules.txt b/coderules.txt
index 357929f..ea8fcc0 100644
--- a/coderules.txt
+++ b/coderules.txt
@@ -21,11 +21,11 @@
  */
 
 We indent statements in K&R style, e.g.,
-	if (test) {
-	  then-part;
-	} else {
-	  else-part;
-	}
+        if (test) {
+          then-part;
+        } else {
+          else-part;
+        }
 with two spaces per indentation level.  (This indentation convention is
 handled automatically by GNU Emacs and many other text editors.)
 
@@ -57,16 +57,16 @@
 
 ansi2knr does not transform method declarations (function pointers in
 structs).  We handle these with a macro JMETHOD, defined as
-	#ifdef HAVE_PROTOTYPES
-	#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
-	#else
-	#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
-	#endif
+        #ifdef HAVE_PROTOTYPES
+        #define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+        #else
+        #define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+        #endif
 which is used like this:
-	struct function_pointers {
-	  JMETHOD(void, init_entropy_encoder, (int somearg, jparms *jp));
-	  JMETHOD(void, term_entropy_encoder, (void));
-	};
+        struct function_pointers {
+          JMETHOD(void, init_entropy_encoder, (int somearg, jparms *jp));
+          JMETHOD(void, term_entropy_encoder, (void));
+        };
 Note the set of parentheses surrounding the parameter list.
 
 A similar solution is used for forward and external function declarations
diff --git a/configure.ac b/configure.ac
index 6c8dd6d..9fd2948 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ([2.56])
-AC_INIT([libjpeg-turbo], [1.2.2])
+AC_INIT([libjpeg-turbo], [1.3.2])
 BUILD=`date +%Y%m%d`
 
 AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
@@ -11,22 +11,71 @@
 # Always build with prototypes
 AC_DEFINE([HAVE_PROTOTYPES], 1, [Define if your compiler supports prototypes])
 
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
 # Checks for programs.
 SAVED_CFLAGS=${CFLAGS}
 SAVED_CPPFLAGS=${CPPFLAGS}
 AC_PROG_CPP
 AC_PROG_CC
+m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
 AM_PROG_AS
+AM_PROG_CC_C_O
 AC_PROG_INSTALL
 AC_PROG_LIBTOOL
 AC_PROG_LN_S
 
+# When the prefix is /opt/libjpeg-turbo, we assume that an "official" binary is
+# being created, and thus we install things into specific locations.
+
+old_prefix=${prefix}
+if test "x$prefix" = "xNONE" -a "x$ac_default_prefix" != "x"; then
+  prefix=$ac_default_prefix
+fi
+DATADIR=`eval echo ${datadir}`
+DATADIR=`eval echo $DATADIR`
+if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then
+  datadir='${prefix}'
+fi
+DATADIR=`eval echo ${datarootdir}`
+DATADIR=`eval echo $DATADIR`
+if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then
+  datarootdir='${prefix}'
+fi
+
+old_exec_prefix=${exec_prefix}
+if test "x$exec_prefix" = "xNONE"; then
+  exec_prefix=${prefix}
+fi
+
+if test "x${libdir}" = 'x${exec_prefix}/lib' -o "x${libdir}" = 'x${prefix}/lib'; then
+  LIBDIR=`eval echo ${libdir}`
+  LIBDIR=`eval echo $LIBDIR`
+  if test "$LIBDIR" = "/opt/libjpeg-turbo/lib"; then
+    case $host_os in
+      darwin*)
+        ;;
+      *)
+        AC_CHECK_SIZEOF(long)
+        if test "${ac_cv_sizeof_long}" = "8"; then
+          libdir='${exec_prefix}/lib64'
+        elif test "${ac_cv_sizeof_long}" = "4"; then
+          libdir='${exec_prefix}/lib32'
+        fi
+        ;;
+    esac
+  fi
+fi
+exec_prefix=${old_exec_prefix}
+prefix=${old_prefix}
+
 # Check whether compiler supports pointers to undefined structures
 AC_MSG_CHECKING(whether compiler supports pointers to undefined structures)
 AC_TRY_COMPILE([ typedef struct undefined_structure * undef_struct_ptr; ], ,
-AC_MSG_RESULT(yes),
-[AC_MSG_RESULT(no)
-AC_DEFINE([INCOMPLETE_TYPES_BROKEN],[1],[Compiler does not support pointers to undefined structures.])])
+  AC_MSG_RESULT(yes),
+  [AC_MSG_RESULT(no)
+   AC_DEFINE([INCOMPLETE_TYPES_BROKEN], [1],
+     [Compiler does not support pointers to undefined structures.])])
 
 if test "x${GCC}" = "xyes"; then
   if test "x${SAVED_CFLAGS}" = "x"; then
@@ -48,8 +97,9 @@
 
 # Checks for header files.
 AC_HEADER_STDC
-AC_CHECK_HEADERS([stddef.h stdlib.h string.h])
-AC_CHECK_HEADER([sys/types.h], AC_DEFINE([NEED_SYS_TYPES_H], 1, [Define if you have sys/types.h]))
+AC_CHECK_HEADERS([stddef.h stdlib.h locale.h string.h])
+AC_CHECK_HEADER([sys/types.h],
+  AC_DEFINE([NEED_SYS_TYPES_H], 1, [Define if you need to include <sys/types.h> to get size_t.]))
 
 # Checks for typedefs, structures, and compiler characteristics.
 AC_C_CONST
@@ -60,85 +110,119 @@
 
 AC_MSG_CHECKING([if right shift is signed])
 AC_TRY_RUN(
-	[#include <stdio.h>
-	 int is_shifting_signed (long arg) {
-	 long res = arg >> 4;
+  [#include <stdio.h>
+   int is_shifting_signed (long arg) {
+     long res = arg >> 4;
 
-	 if (res == -0x7F7E80CL)
-		return 1; /* right shift is signed */
+     if (res == -0x7F7E80CL)
+       return 1; /* right shift is signed */
 
-	 /* see if unsigned-shift hack will fix it. */
-	 /* we can't just test exact value since it depends on width of long... */
-	 res |= (~0L) << (32-4);
-	 if (res == -0x7F7E80CL)
-		return 0; /* right shift is unsigned */
+     /* see if unsigned-shift hack will fix it. */
+     /* we can't just test exact value since it depends on width of long... */
+     res |= (~0L) << (32-4);
+     if (res == -0x7F7E80CL)
+       return 0; /* right shift is unsigned */
 
-	 printf("Right shift isn't acting as I expect it to.\n");
-	 printf("I fear the JPEG software will not work at all.\n\n");
-	 return 0; /* try it with unsigned anyway */
-	 }
-	 int main (void) {
-		exit(is_shifting_signed(-0x7F7E80B1L));
-	 }],
-	[AC_MSG_RESULT(no)
-	 AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED], 1, [Define if shift is unsigned])],
-	[AC_MSG_RESULT(yes)],
-	[AC_MSG_RESULT(Assuming that right shift is signed on target machine.)])
+     printf("Right shift isn't acting as I expect it to.\n");
+     printf("I fear the JPEG software will not work at all.\n\n");
+     return 0; /* try it with unsigned anyway */
+   }
+   int main (void) {
+     exit(is_shifting_signed(-0x7F7E80B1L));
+   }],
+  [AC_MSG_RESULT(no)
+   AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED], 1,
+     [Define if your (broken) compiler shifts signed values as if they were unsigned.])],
+  [AC_MSG_RESULT(yes)],
+  [AC_MSG_RESULT(Assuming that right shift is signed on target machine.)])
 
 # test whether global names are unique to at least 15 chars
 AC_MSG_CHECKING([for short external names])
 AC_TRY_LINK(
-	[int possibly_duplicate_function () { return 0; }
-	 int possibly_dupli_function () { return 1; }], [ ],
-	[AC_MSG_RESULT(ok)],
-	[AC_MSG_RESULT(short)
-	 AC_DEFINE([NEED_SHORT_EXTERNAL_NAMES], 1, [Define if you need short function names])])
+  [int possibly_duplicate_function () { return 0; }
+   int possibly_dupli_function () { return 1; }], [ ],
+  [AC_MSG_RESULT(ok)],
+  [AC_MSG_RESULT(short)
+   AC_DEFINE([NEED_SHORT_EXTERNAL_NAMES], 1,
+     [Define if linker requires that the first 15 characters of global names be unique.])])
 
 # Checks for library functions.
 AC_CHECK_FUNCS([memset memcpy], [],
-	[AC_DEFINE([NEED_BSD_STRINGS], 1,
-		   [Define if you have BSD-like bzero and bcopy])])
+  [AC_DEFINE([NEED_BSD_STRINGS], 1,
+     [Define if you have BSD-like bzero and bcopy in <strings.h> rather than memset/memcpy in <string.h>.])])
 
 AC_MSG_CHECKING([libjpeg API version])
 AC_ARG_VAR(JPEG_LIB_VERSION, [libjpeg API version (62, 70, or 80)])
 if test "x$JPEG_LIB_VERSION" = "x"; then
-    AC_ARG_WITH([jpeg7],
-        AC_HELP_STRING([--with-jpeg7], [Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)]))
-    AC_ARG_WITH([jpeg8],
-        AC_HELP_STRING([--with-jpeg8], [Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)]))
-    if test "x${with_jpeg8}" = "xyes"; then
-        JPEG_LIB_VERSION=80
+  AC_ARG_WITH([jpeg7],
+    AC_HELP_STRING([--with-jpeg7],
+      [Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)]))
+  AC_ARG_WITH([jpeg8],
+    AC_HELP_STRING([--with-jpeg8],
+      [Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)]))
+  if test "x${with_jpeg8}" = "xyes"; then
+    JPEG_LIB_VERSION=80
+  else
+    if test "x${with_jpeg7}" = "xyes"; then
+      JPEG_LIB_VERSION=70
     else
-        if test "x${with_jpeg7}" = "xyes"; then
-            JPEG_LIB_VERSION=70
-        else
-            JPEG_LIB_VERSION=62
-        fi
+      JPEG_LIB_VERSION=62
     fi
+  fi
 fi
 JPEG_LIB_VERSION_DECIMAL=`expr $JPEG_LIB_VERSION / 10`.`expr $JPEG_LIB_VERSION % 10`
 AC_SUBST(JPEG_LIB_VERSION_DECIMAL)
 AC_MSG_RESULT([$JPEG_LIB_VERSION_DECIMAL])
-AC_DEFINE_UNQUOTED(JPEG_LIB_VERSION, [$JPEG_LIB_VERSION], [libjpeg API version])
+AC_DEFINE_UNQUOTED(JPEG_LIB_VERSION, [$JPEG_LIB_VERSION],
+  [libjpeg API version])
 
-AC_MSG_CHECKING([libjpeg shared library version])
-AC_ARG_VAR(SO_MAJOR_VERSION, [Major version of the libjpeg-turbo shared library (default is determined by the API version)])
-AC_ARG_VAR(SO_MINOR_VERSION, [Minor version of the libjpeg-turbo shared library (default is determined by the API version)])
+AC_ARG_VAR(SO_MAJOR_VERSION,
+  [Major version of the libjpeg-turbo shared library (default is determined by the API version)])
+AC_ARG_VAR(SO_MINOR_VERSION,
+  [Minor version of the libjpeg-turbo shared library (default is determined by the API version)])
 if test "x$SO_MAJOR_VERSION" = "x"; then
-    case "$JPEG_LIB_VERSION" in
-        62)  SO_MAJOR_VERSION=$JPEG_LIB_VERSION ;;
-        *)   SO_MAJOR_VERSION=`expr $JPEG_LIB_VERSION / 10` ;;
-    esac
+  case "$JPEG_LIB_VERSION" in
+    62)  SO_MAJOR_VERSION=$JPEG_LIB_VERSION ;;
+    *)   SO_MAJOR_VERSION=`expr $JPEG_LIB_VERSION / 10` ;;
+  esac
 fi
 if test "x$SO_MINOR_VERSION" = "x"; then
-    case "$JPEG_LIB_VERSION" in
-        80)  SO_MINOR_VERSION=2 ;;
-        *)   SO_MINOR_VERSION=0 ;;
-    esac
+  case "$JPEG_LIB_VERSION" in
+    80)  SO_MINOR_VERSION=2 ;;
+    *)   SO_MINOR_VERSION=0 ;;
+  esac
 fi
-AC_MSG_RESULT([$SO_MAJOR_VERSION:$SO_MINOR_VERSION])
+
+RPM_CONFIG_ARGS=
+
+# Memory source/destination managers
+SO_AGE=0
+MEM_SRCDST_FUNCTIONS=
+if test "x${with_jpeg8}" != "xyes"; then
+  AC_MSG_CHECKING([whether to include in-memory source/destination managers])
+  AC_ARG_WITH([mem-srcdst],
+    AC_HELP_STRING([--without-mem-srcdst],
+      [Do not include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI]))
+  if test "x$with_mem_srcdst" != "xno"; then
+    AC_MSG_RESULT(yes)
+    AC_DEFINE([MEM_SRCDST_SUPPORTED], [1],
+      [Support in-memory source/destination managers])
+    SO_AGE=1
+    MEM_SRCDST_FUNCTIONS="global:  jpeg_mem_dest;  jpeg_mem_src;";
+  else
+    AC_MSG_RESULT(no)
+    RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-mem-srcdst"
+  fi
+fi
+
+AC_MSG_CHECKING([libjpeg shared library version])
+AC_MSG_RESULT([$SO_MAJOR_VERSION.$SO_AGE.$SO_MINOR_VERSION])
+LIBTOOL_CURRENT=`expr $SO_MAJOR_VERSION + $SO_AGE`
+AC_SUBST(LIBTOOL_CURRENT)
 AC_SUBST(SO_MAJOR_VERSION)
 AC_SUBST(SO_MINOR_VERSION)
+AC_SUBST(SO_AGE)
+AC_SUBST(MEM_SRCDST_FUNCTIONS)
 
 AC_DEFINE_UNQUOTED(LIBJPEG_TURBO_VERSION, [$VERSION], [libjpeg-turbo version])
 
@@ -146,7 +230,7 @@
 AC_ARG_ENABLE([ld-version-script],
   AS_HELP_STRING([--disable-ld-version-script],
     [Disable linker version script for libjpeg-turbo (default is to use linker version script if the linker supports it)]),
-    [VERSION_SCRIPT=$enableval], [])
+  [VERSION_SCRIPT=$enableval], [])
 
 AC_MSG_CHECKING([whether the linker supports version scripts])
 SAVED_LDFLAGS="$LDFLAGS"
@@ -156,12 +240,16 @@
   global: *;
 };
 EOF
-AC_LINK_IFELSE(AC_LANG_PROGRAM([], []),
-  [VERSION_SCRIPT_FLAG=-Wl,--version-script,; AC_MSG_RESULT([yes (GNU style)])], [])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])],
+  [VERSION_SCRIPT_FLAG=-Wl,--version-script,;
+   AC_MSG_RESULT([yes (GNU style)])],
+  [])
 if test "x$VERSION_SCRIPT_FLAG" = "x"; then
   LDFLAGS="$SAVED_LDFLAGS -Wl,-M,conftest.map"
-  AC_LINK_IFELSE(AC_LANG_PROGRAM([], []),
-    [VERSION_SCRIPT_FLAG=-Wl,-M,; AC_MSG_RESULT([yes (Sun style)])], [])
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])],
+    [VERSION_SCRIPT_FLAG=-Wl,-M,;
+     AC_MSG_RESULT([yes (Sun style)])],
+    [])
 fi
 if test "x$VERSION_SCRIPT_FLAG" = "x"; then
   VERSION_SCRIPT=no
@@ -178,8 +266,8 @@
 # Check for non-broken inline under various spellings
 AC_MSG_CHECKING(for inline)
 ljt_cv_inline=""
-AC_TRY_COMPILE(, [} __attribute__((always_inline)) int foo() { return 0; }
-int bar() { return foo();], ljt_cv_inline="__attribute__((always_inline))",
+AC_TRY_COMPILE(, [} inline __attribute__((always_inline)) int foo() { return 0; }
+int bar() { return foo();], ljt_cv_inline="inline __attribute__((always_inline))",
 AC_TRY_COMPILE(, [} __inline__ int foo() { return 0; }
 int bar() { return foo();], ljt_cv_inline="__inline__",
 AC_TRY_COMPILE(, [} __inline int foo() { return 0; }
@@ -189,30 +277,49 @@
 AC_MSG_RESULT($ljt_cv_inline)
 AC_DEFINE_UNQUOTED([INLINE],[$ljt_cv_inline],[How to obtain function inlining.])
 
+# Arithmetic coding support
 AC_MSG_CHECKING([whether to include arithmetic encoding support])
 AC_ARG_WITH([arith-enc],
-    AC_HELP_STRING([--without-arith-enc], [Omit arithmetic encoding support]))
+  AC_HELP_STRING([--without-arith-enc],
+    [Do not include arithmetic encoding support]))
 if test "x$with_arith_enc" = "xno"; then
-    AC_MSG_RESULT(no)
+  AC_MSG_RESULT(no)
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-arith-enc"
 else
-    AC_DEFINE([C_ARITH_CODING_SUPPORTED], [1], [Support arithmetic encoding])
-    AC_MSG_RESULT(yes)
+  AC_DEFINE([C_ARITH_CODING_SUPPORTED], [1], [Support arithmetic encoding])
+  AC_MSG_RESULT(yes)
 fi
 AM_CONDITIONAL([WITH_ARITH_ENC], [test "x$with_arith_enc" != "xno"])
 
 AC_MSG_CHECKING([whether to include arithmetic decoding support])
 AC_ARG_WITH([arith-dec],
-    AC_HELP_STRING([--without-arith-dec], [Omit arithmetic decoding support]))
+  AC_HELP_STRING([--without-arith-dec],
+    [Do not include arithmetic decoding support]))
 if test "x$with_arith_dec" = "xno"; then
-    AC_MSG_RESULT(no)
+  AC_MSG_RESULT(no)
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-arith-dec"
 else
-    AC_DEFINE([D_ARITH_CODING_SUPPORTED], [1], [Support arithmetic decoding])
-    AC_MSG_RESULT(yes)
+  AC_DEFINE([D_ARITH_CODING_SUPPORTED], [1], [Support arithmetic decoding])
+  AC_MSG_RESULT(yes)
 fi
 AM_CONDITIONAL([WITH_ARITH_DEC], [test "x$with_arith_dec" != "xno"])
 
-AM_CONDITIONAL([WITH_ARITH], [test "x$with_arith_dec" != "xno" -o "x$with_arith_enc" != "xno"])
+AM_CONDITIONAL([WITH_ARITH],
+  [test "x$with_arith_dec" != "xno" -o "x$with_arith_enc" != "xno"])
 
+# TurboJPEG support
+AC_MSG_CHECKING([whether to build TurboJPEG C wrapper])
+AC_ARG_WITH([turbojpeg],
+  AC_HELP_STRING([--without-turbojpeg],
+    [Do not include the TurboJPEG wrapper library and associated test programs]))
+if test "x$with_turbojpeg" = "xno"; then
+  AC_MSG_RESULT(no)
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-turbojpeg"
+else
+  AC_MSG_RESULT(yes)
+fi
+
+# Java support
 AC_ARG_VAR(JAVAC, [Java compiler command (default: javac)])
 if test "x$JAVAC" = "x"; then
   JAVAC=javac
@@ -230,55 +337,58 @@
   JAVA=java
 fi
 AC_SUBST(JAVA)
-AC_ARG_VAR(JNI_CFLAGS, [C compiler flags needed to include jni.h (default: -I/System/Library/Frameworks/JavaVM.framework/Headers on OS X, '-I/usr/java/include -I/usr/java/include/solaris' on Solaris, and '-I/usr/java/default/include -I/usr/java/default/include/linux' on Linux)])
+AC_ARG_VAR(JNI_CFLAGS,
+  [C compiler flags needed to include jni.h (default: -I/System/Library/Frameworks/JavaVM.framework/Headers on OS X, '-I/usr/java/include -I/usr/java/include/solaris' on Solaris, and '-I/usr/java/default/include -I/usr/java/default/include/linux' on Linux)])
 
-AC_MSG_CHECKING([whether to build TurboJPEG/OSS Java wrapper])
+AC_MSG_CHECKING([whether to build TurboJPEG Java wrapper])
 AC_ARG_WITH([java],
-    AC_HELP_STRING([--with-java],[Build Java wrapper for the TurboJPEG/OSS library]))
+  AC_HELP_STRING([--with-java], [Build Java wrapper for the TurboJPEG library]))
+if test "x$with_turbojpeg" = "xno"; then
+  with_java=no
+fi
 
-RPM_CONFIG_ARGS=
 WITH_JAVA=0
 if test "x$with_java" = "xyes"; then
-    AC_MSG_RESULT(yes)
+  AC_MSG_RESULT(yes)
 
-    case $host_os in
-      darwin*)
-        DEFAULT_JNI_CFLAGS=-I/System/Library/Frameworks/JavaVM.framework/Headers
-        ;;
-      solaris*)
-        DEFAULT_JNI_CFLAGS='-I/usr/java/include -I/usr/java/include/solaris'
-        ;;
-      linux*)
-        DEFAULT_JNI_CFLAGS='-I/usr/java/default/include -I/usr/java/default/include/linux'
-        ;;
-    esac
-    if test "x$JNI_CFLAGS" = "x"; then
-        JNI_CFLAGS=$DEFAULT_JNI_CFLAGS
-    fi
+  case $host_os in
+    darwin*)
+      DEFAULT_JNI_CFLAGS=-I/System/Library/Frameworks/JavaVM.framework/Headers
+      ;;
+    solaris*)
+      DEFAULT_JNI_CFLAGS='-I/usr/java/include -I/usr/java/include/solaris'
+      ;;
+    linux*)
+      DEFAULT_JNI_CFLAGS='-I/usr/java/default/include -I/usr/java/default/include/linux'
+      ;;
+  esac
+  if test "x$JNI_CFLAGS" = "x"; then
+    JNI_CFLAGS=$DEFAULT_JNI_CFLAGS
+  fi
 
-    SAVE_CPPFLAGS=${CPPFLAGS}
-    CPPFLAGS="${CPPFLAGS} ${JNI_CFLAGS}"
-    AC_CHECK_HEADERS([jni.h], [DUMMY=1],
-        [AC_MSG_ERROR([Could not find JNI header file])])
-    CPPFLAGS=${SAVE_CPPFLAGS}
-    AC_SUBST(JNI_CFLAGS)
+  SAVE_CPPFLAGS=${CPPFLAGS}
+  CPPFLAGS="${CPPFLAGS} ${JNI_CFLAGS}"
+  AC_CHECK_HEADERS([jni.h], [DUMMY=1],
+    [AC_MSG_ERROR([Could not find JNI header file])])
+  CPPFLAGS=${SAVE_CPPFLAGS}
+  AC_SUBST(JNI_CFLAGS)
 
-    RPM_CONFIG_ARGS=--with-java
-    JAVA_RPM_CONTENTS_1='%dir /opt/%{name}/classes'
-    JAVA_RPM_CONTENTS_2=/opt/%{name}/classes/turbojpeg.jar
-    WITH_JAVA=1
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --with-java"
+  JAVA_RPM_CONTENTS_1='%dir %{_datadir}/classes'
+  JAVA_RPM_CONTENTS_2=%{_datadir}/classes/turbojpeg.jar
+  WITH_JAVA=1
 else
-    AC_MSG_RESULT(no)
+  AC_MSG_RESULT(no)
 fi
 AM_CONDITIONAL([WITH_JAVA], [test "x$with_java" = "xyes"])
 AC_SUBST(WITH_JAVA)
 AC_SUBST(JAVA_RPM_CONTENTS_1)
 AC_SUBST(JAVA_RPM_CONTENTS_2)
-AC_SUBST(RPM_CONFIG_ARGS)
 
 # optionally force using gas-preprocessor.pl for compatibility testing
 AC_ARG_WITH([gas-preprocessor],
-    AC_HELP_STRING([--with-gas-preprocessor],[Force using gas-preprocessor.pl on ARM.]))
+  AC_HELP_STRING([--with-gas-preprocessor],
+    [Force using gas-preprocessor.pl on ARM.]))
 if test "x${with_gas_preprocessor}" = "xyes"; then
   case $host_os in
     darwin*)
@@ -293,8 +403,12 @@
 
 # SIMD is optional
 AC_ARG_WITH([simd],
-    AC_HELP_STRING([--without-simd],[Omit SIMD extensions.]))
+  AC_HELP_STRING([--without-simd], [Do not include SIMD extensions]))
 if test "x${with_simd}" != "xno"; then
+  require_simd=no
+  if test "x${with_simd}" = "xyes"; then
+    require_simd=yes
+  fi
   # Check if we're on a supported CPU
   AC_MSG_CHECKING([if we have SIMD optimisations for cpu type])
   case "$host_cpu" in
@@ -302,12 +416,12 @@
       AC_MSG_RESULT([yes (x86_64)])
       AC_PROG_NASM
       simd_arch=x86_64
-    ;;
+      ;;
     i*86 | x86 | ia32)
       AC_MSG_RESULT([yes (i386)])
       AC_PROG_NASM
       simd_arch=i386
-    ;;
+      ;;
     arm*)
       AC_MSG_RESULT([yes (arm)])
       AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
@@ -315,19 +429,31 @@
         [AC_MSG_RESULT([yes])
          simd_arch=arm],
         [AC_MSG_RESULT([no])
-         with_simd=no
-         AC_MSG_WARN([SIMD support can't be enabled.  Performance will suffer.])])
-    ;;
+         with_simd=no])
+      if test "x${with_simd}" = "xno"; then
+        if test "x${require_simd}" = "xyes"; then
+          AC_MSG_ERROR([SIMD support can't be enabled.])
+        else
+          AC_MSG_WARN([SIMD support can't be enabled.  Performance will suffer.])
+        fi
+      fi
+      ;;
     *)
       AC_MSG_RESULT([no ("$host_cpu")])
-      AC_MSG_WARN([SIMD support not available for this CPU.  Performance will suffer.])
       with_simd=no;
-    ;;
+      if test "x${require_simd}" = "xyes"; then
+        AC_MSG_ERROR([SIMD support not available for this CPU.])
+      else
+        AC_MSG_WARN([SIMD support not available for this CPU.  Performance will suffer.])
+      fi
+      ;;
   esac
 
   if test "x${with_simd}" != "xno"; then
     AC_DEFINE([WITH_SIMD], [1], [Use accelerated SIMD routines.])
   fi
+else
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-simd"
 fi
 
 AM_CONDITIONAL([WITH_SIMD], [test "x$with_simd" != "xno"])
@@ -336,6 +462,13 @@
 AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"])
 AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"])
 AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"])
+AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"])
+
+AC_ARG_VAR(PKGNAME, [distribution package name (default: libjpeg-turbo)])
+if test "x$PKGNAME" = "x"; then
+  PKGNAME=$PACKAGE_NAME
+fi
+AC_SUBST(PKGNAME)
 
 case "$host_cpu" in
   x86_64)
@@ -349,9 +482,10 @@
 esac
 
 AC_SUBST(RPMARCH)
+AC_SUBST(RPM_CONFIG_ARGS)
 AC_SUBST(DEBARCH)
 AC_SUBST(BUILD)
-AC_DEFINE_UNQUOTED([BUILD], "$BUILD", [Build number])
+AC_DEFINE_UNQUOTED([BUILD], "$BUILD", [libjpeg-turbo build number])
 
 # jconfig.h is the file we use, but we have another before that to
 # fool autoheader. the reason is that we include this header in our
@@ -359,18 +493,23 @@
 # jconfig.h is a minimal version that allows this package to be built
 AC_CONFIG_HEADERS([config.h])
 AC_CONFIG_HEADERS([jconfig.h])
-AC_CONFIG_FILES([pkgscripts/libjpeg-turbo.spec:release/libjpeg-turbo.spec.in])
-AC_CONFIG_FILES([pkgscripts/makecygwinpkg:release/makecygwinpkg.in])
-AC_CONFIG_FILES([pkgscripts/makedpkg:release/makedpkg.in])
-AC_CONFIG_FILES([pkgscripts/makemacpkg:release/makemacpkg.in])
+AC_CONFIG_HEADERS([jconfigint.h])
+AC_CONFIG_FILES([pkgscripts/libjpeg-turbo.spec.tmpl:release/libjpeg-turbo.spec.in])
+AC_CONFIG_FILES([pkgscripts/makecygwinpkg.tmpl:release/makecygwinpkg.in])
+AC_CONFIG_FILES([pkgscripts/makedpkg.tmpl:release/makedpkg.in])
+AC_CONFIG_FILES([pkgscripts/makemacpkg.tmpl:release/makemacpkg.in])
 AC_CONFIG_FILES([pkgscripts/Description.plist:release/Description.plist.in])
 AC_CONFIG_FILES([pkgscripts/Info.plist:release/Info.plist.in])
-AC_CONFIG_FILES([pkgscripts/uninstall:release/uninstall.in])
-AC_CONFIG_FILES([pkgscripts/makesunpkg:release/makesunpkg.in])
-AC_CONFIG_FILES([pkgscripts/pkginfo:release/pkginfo.in])
-AC_CONFIG_FILES([tjbenchtest])
-AC_CONFIG_FILES([tjexampletest])
+AC_CONFIG_FILES([pkgscripts/uninstall.tmpl:release/uninstall.in])
+if test "x$with_turbojpeg" != "xno"; then
+  AC_CONFIG_FILES([tjbenchtest])
+fi
+if test "x$with_java" = "xyes"; then
+  AC_CONFIG_FILES([tjbenchtest.java])
+  AC_CONFIG_FILES([tjexampletest])
+fi
 AC_CONFIG_FILES([libjpeg.map])
 AC_CONFIG_FILES([Makefile simd/Makefile])
 AC_CONFIG_FILES([java/Makefile])
+AC_CONFIG_FILES([md5/Makefile])
 AC_OUTPUT
diff --git a/djpeg.1 b/djpeg.1
index d9804e3..9b040ae 100644
--- a/djpeg.1
+++ b/djpeg.1
@@ -1,4 +1,4 @@
-.TH DJPEG 1 "1 January 2013"
+.TH DJPEG 1 "11 May 2014"
 .SH NAME
 djpeg \- decompress a JPEG file to an image file
 .SH SYNOPSIS
@@ -55,15 +55,19 @@
 equivalent to \fB\-dct fast \-nosmooth \-onepass \-dither ordered\fR.
 .TP
 .B \-grayscale
-Force gray-scale output even if JPEG file is color.  Useful for viewing on
+Force grayscale output even if JPEG file is color.  Useful for viewing on
 monochrome displays; also,
 .B djpeg
 runs noticeably faster in this mode.
 .TP
+.B \-rgb
+Force RGB output even if JPEG file is grayscale.
+.TP
 .BI \-scale " M/N"
 Scale the output image by a factor M/N.  Currently the scale factor must be
-1/1, 1/2, 1/4, or 1/8.  Scaling is handy if the image is larger than your
-screen; also,
+M/8, where M is an integer between 1 and 16 inclusive, or any reduced fraction
+thereof (such as 1/2, 3/4, etc.)  Scaling is handy if the image is larger than
+your screen; also,
 .B djpeg
 runs much faster when scaling down the output.
 .TP
@@ -73,7 +77,7 @@
 .B \-colors
 or
 .B \-grayscale
-is specified, or if the JPEG file is gray-scale; otherwise, 24-bit full-color
+is specified, or if the JPEG file is grayscale; otherwise, 24-bit full-color
 format is emitted.
 .TP
 .B \-gif
@@ -87,12 +91,12 @@
 .B \-colors
 or
 .B \-grayscale
-is specified, or if the JPEG file is gray-scale; otherwise, 24-bit full-color
+is specified, or if the JPEG file is grayscale; otherwise, 24-bit full-color
 format is emitted.
 .TP
 .B \-pnm
 Select PBMPLUS (PPM/PGM) output format (this is the default format).
-PGM is emitted if the JPEG file is gray-scale or if
+PGM is emitted if the JPEG file is grayscale or if
 .B \-grayscale
 is specified; otherwise PPM is emitted.
 .TP
@@ -100,8 +104,8 @@
 Select RLE output format.  (Requires URT library.)
 .TP
 .B \-targa
-Select Targa output format.  Gray-scale format is emitted if the JPEG file is
-gray-scale or if
+Select Targa output format.  Grayscale format is emitted if the JPEG file is
+grayscale or if
 .B \-grayscale
 is specified; otherwise, colormapped format is emitted if
 .B \-colors
@@ -114,14 +118,28 @@
 .TP
 .B \-dct fast
 Use fast integer DCT (less accurate).
+In libjpeg-turbo, the fast method is generally about 5-15% faster than the int
+method when using the x86/x86-64 SIMD extensions (results may vary with other
+SIMD implementations, or when using libjpeg-turbo without SIMD extensions.)  If
+the JPEG image was compressed using a quality level of 85 or below, then there
+should be little or no perceptible difference between the two algorithms.  When
+decompressing images that were compressed using quality levels above 85,
+however, the difference between the fast and int methods becomes more
+pronounced.  With images compressed using quality=97, for instance, the fast
+method incurs generally about a 4-6 dB loss (in PSNR) relative to the int
+method, but this can be larger for some images.  If you can avoid it, do not
+use the fast method when decompressing images that were compressed using
+quality levels above 97.  The algorithm often degenerates for such images and
+can actually produce a more lossy output image than if the JPEG image had been
+compressed using lower quality levels.
 .TP
 .B \-dct float
 Use floating-point DCT method.
-The float method is very slightly more accurate than the int method, but is
-much slower unless your machine has very fast floating-point hardware.  Also
-note that results of the floating-point method may vary slightly across
-machines, while the integer methods should give the same results everywhere.
-The fast integer method is much less accurate than the other two.
+The float method is mainly a legacy feature.  It does not produce significantly
+more accurate results than the int method, and it is much slower.  The float
+method may also give different results on different machines due to varying
+roundoff behavior, whereas the integer methods should give the same results on
+all machines.
 .TP
 .B \-dither fs
 Use Floyd-Steinberg dithering in color quantization.
@@ -159,7 +177,7 @@
 is ignored unless you also say
 .B \-colors
 .IR N .
-Also, the one-pass method is always used for gray-scale output (the two-pass
+Also, the one-pass method is always used for grayscale output (the two-pass
 method is no improvement then).
 .TP
 .BI \-maxmemory " N"
@@ -172,6 +190,10 @@
 .BI \-outfile " name"
 Send output image to the named file, not to standard output.
 .TP
+.BI \-memsrc
+Load input file into memory before decompressing.  This feature was implemented
+mainly as a way of testing the in-memory source manager (jpeg_mem_src().)
+.TP
 .B \-verbose
 Enable debug printout.  More
 .BR \-v 's
@@ -244,7 +266,8 @@
 Independent JPEG Group
 .PP
 This file was modified by The libjpeg-turbo Project to include only information
-relevant to libjpeg-turbo and to wordsmith certain sections.
+relevant to libjpeg-turbo, to wordsmith certain sections, and to describe
+features not present in libjpeg.
 .SH BUGS
 To avoid the Unisys LZW patent,
 .B djpeg
diff --git a/djpeg.c b/djpeg.c
index b8f1187..7a2eaa0 100644
--- a/djpeg.c
+++ b/djpeg.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
- * Copyright (C) 2010-2011, D. R. Commander.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010-2011, 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a command-line user interface for the JPEG decompressor.
@@ -12,8 +12,8 @@
  *
  * Two different command line styles are permitted, depending on the
  * compile-time switch TWO_FILE_COMMANDLINE:
- *	djpeg [options]  inputfile outputfile
- *	djpeg [options]  [inputfile]
+ *      djpeg [options]  inputfile outputfile
+ *      djpeg [options]  [inputfile]
  * In the second style, output is always to standard output, which you'd
  * normally redirect to a file or pipe to some other program.  Input is
  * either from a named file or from standard input (typically redirected).
@@ -21,30 +21,30 @@
  * don't support pipes.  Also, you MUST use the first style if your system
  * doesn't do binary I/O to stdin/stdout.
  * To simplify script writing, the "-outfile" switch is provided.  The syntax
- *	djpeg [options]  -outfile outputfile  inputfile
+ *      djpeg [options]  -outfile outputfile  inputfile
  * works regardless of which command line style is used.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include "jversion.h"		/* for version message */
-#include "config.h"
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
 
-#include <ctype.h>		/* to declare isprint() */
+#include <ctype.h>              /* to declare isprint() */
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
 
 /* Create the add-on message string table. */
 
-#define JMESSAGE(code,string)	string ,
+#define JMESSAGE(code,string)   string ,
 
 static const char * const cdjpeg_message_table[] = {
 #include "cderror.h"
@@ -60,17 +60,17 @@
  */
 
 typedef enum {
-	FMT_BMP,		/* BMP format (Windows flavor) */
-	FMT_GIF,		/* GIF format */
-	FMT_OS2,		/* BMP format (OS/2 flavor) */
-	FMT_PPM,		/* PPM/PGM (PBMPLUS formats) */
-	FMT_RLE,		/* RLE format */
-	FMT_TARGA,		/* Targa format */
-	FMT_TIFF		/* TIFF format */
+        FMT_BMP,                /* BMP format (Windows flavor) */
+        FMT_GIF,                /* GIF format */
+        FMT_OS2,                /* BMP format (OS/2 flavor) */
+        FMT_PPM,                /* PPM/PGM (PBMPLUS formats) */
+        FMT_RLE,                /* RLE format */
+        FMT_TARGA,              /* Targa format */
+        FMT_TIFF                /* TIFF format */
 } IMAGE_FORMATS;
 
-#ifndef DEFAULT_FMT		/* so can override from CFLAGS in Makefile */
-#define DEFAULT_FMT	FMT_PPM
+#ifndef DEFAULT_FMT             /* so can override from CFLAGS in Makefile */
+#define DEFAULT_FMT     FMT_PPM
 #endif
 
 static IMAGE_FORMATS requested_fmt;
@@ -85,8 +85,10 @@
  */
 
 
-static const char * progname;	/* program name for error messages */
-static char * outfilename;	/* for -outfile switch */
+static const char * progname;   /* program name for error messages */
+static char * outfilename;      /* for -outfile switch */
+boolean memsrc;  /* for -memsrc switch */
+#define INPUT_BUF_SIZE  4096
 
 
 LOCAL(void)
@@ -110,40 +112,40 @@
 #endif
 #ifdef BMP_SUPPORTED
   fprintf(stderr, "  -bmp           Select BMP output format (Windows style)%s\n",
-	  (DEFAULT_FMT == FMT_BMP ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_BMP ? " (default)" : ""));
 #endif
 #ifdef GIF_SUPPORTED
   fprintf(stderr, "  -gif           Select GIF output format%s\n",
-	  (DEFAULT_FMT == FMT_GIF ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_GIF ? " (default)" : ""));
 #endif
 #ifdef BMP_SUPPORTED
   fprintf(stderr, "  -os2           Select BMP output format (OS/2 style)%s\n",
-	  (DEFAULT_FMT == FMT_OS2 ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_OS2 ? " (default)" : ""));
 #endif
 #ifdef PPM_SUPPORTED
   fprintf(stderr, "  -pnm           Select PBMPLUS (PPM/PGM) output format%s\n",
-	  (DEFAULT_FMT == FMT_PPM ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_PPM ? " (default)" : ""));
 #endif
 #ifdef RLE_SUPPORTED
   fprintf(stderr, "  -rle           Select Utah RLE output format%s\n",
-	  (DEFAULT_FMT == FMT_RLE ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_RLE ? " (default)" : ""));
 #endif
 #ifdef TARGA_SUPPORTED
   fprintf(stderr, "  -targa         Select Targa output format%s\n",
-	  (DEFAULT_FMT == FMT_TARGA ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_TARGA ? " (default)" : ""));
 #endif
   fprintf(stderr, "Switches for advanced users:\n");
 #ifdef DCT_ISLOW_SUPPORTED
   fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
 #endif
 #ifdef DCT_IFAST_SUPPORTED
   fprintf(stderr, "  -dct fast      Use fast integer DCT (less accurate)%s\n",
-	  (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
   fprintf(stderr, "  -dct float     Use floating-point DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
 #endif
   fprintf(stderr, "  -dither fs     Use F-S dithering (default)\n");
   fprintf(stderr, "  -dither none   Don't use dithering in quantization\n");
@@ -157,6 +159,10 @@
 #endif
   fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
   fprintf(stderr, "  -outfile name  Specify name for output file\n");
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  fprintf(stderr, "  -memsrc        Load input file into memory before decompressing\n");
+#endif
+
   fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
   exit(EXIT_FAILURE);
 }
@@ -164,7 +170,7 @@
 
 LOCAL(int)
 parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
-		int last_file_arg_seen, boolean for_real)
+                int last_file_arg_seen, boolean for_real)
 /* Parse optional switches.
  * Returns argv[] index of first file-name argument (== argc if none).
  * Any file names with indexes <= last_file_arg_seen are ignored;
@@ -178,8 +184,9 @@
   char * arg;
 
   /* Set up default JPEG parameters. */
-  requested_fmt = DEFAULT_FMT;	/* set default output file format */
+  requested_fmt = DEFAULT_FMT;  /* set default output file format */
   outfilename = NULL;
+  memsrc = FALSE;
   cinfo->err->trace_level = 0;
 
   /* Scan command line options, adjust parameters */
@@ -189,54 +196,54 @@
     if (*arg != '-') {
       /* Not a switch, must be a file name argument */
       if (argn <= last_file_arg_seen) {
-	outfilename = NULL;	/* -outfile applies to just one input file */
-	continue;		/* ignore this name if previously processed */
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
       }
-      break;			/* else done parsing switches */
+      break;                    /* else done parsing switches */
     }
-    arg++;			/* advance past switch marker character */
+    arg++;                      /* advance past switch marker character */
 
     if (keymatch(arg, "bmp", 1)) {
       /* BMP output format. */
       requested_fmt = FMT_BMP;
 
     } else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) ||
-	       keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) {
+               keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) {
       /* Do color quantization. */
       int val;
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%d", &val) != 1)
-	usage();
+        usage();
       cinfo->desired_number_of_colors = val;
       cinfo->quantize_colors = TRUE;
 
     } else if (keymatch(arg, "dct", 2)) {
       /* Select IDCT algorithm. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "int", 1)) {
-	cinfo->dct_method = JDCT_ISLOW;
+        cinfo->dct_method = JDCT_ISLOW;
       } else if (keymatch(argv[argn], "fast", 2)) {
-	cinfo->dct_method = JDCT_IFAST;
+        cinfo->dct_method = JDCT_IFAST;
       } else if (keymatch(argv[argn], "float", 2)) {
-	cinfo->dct_method = JDCT_FLOAT;
+        cinfo->dct_method = JDCT_FLOAT;
       } else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "dither", 2)) {
       /* Select dithering algorithm. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "fs", 2)) {
-	cinfo->dither_mode = JDITHER_FS;
+        cinfo->dither_mode = JDITHER_FS;
       } else if (keymatch(argv[argn], "none", 2)) {
-	cinfo->dither_mode = JDITHER_NONE;
+        cinfo->dither_mode = JDITHER_NONE;
       } else if (keymatch(argv[argn], "ordered", 2)) {
-	cinfo->dither_mode = JDITHER_ORDERED;
+        cinfo->dither_mode = JDITHER_ORDERED;
       } else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
       /* Enable debug printouts. */
@@ -244,12 +251,12 @@
       static boolean printed_version = FALSE;
 
       if (! printed_version) {
-	fprintf(stderr, "%s version %s (build %s)\n",
-		PACKAGE_NAME, VERSION, BUILD);
-	fprintf(stderr, "%s\n\n", JCOPYRIGHT);
-	fprintf(stderr, "Emulating The Independent JPEG Group's libjpeg, version %s\n\n",
-		JVERSION);
-	printed_version = TRUE;
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, "%s\n\n", JCOPYRIGHT);
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
 
@@ -258,7 +265,7 @@
       cinfo->two_pass_quantize = FALSE;
       cinfo->dither_mode = JDITHER_ORDERED;
       if (! cinfo->quantize_colors) /* don't override an earlier -colors */
-	cinfo->desired_number_of_colors = 216;
+        cinfo->desired_number_of_colors = 216;
       cinfo->dct_method = JDCT_FASTEST;
       cinfo->do_fancy_upsampling = FALSE;
 
@@ -276,21 +283,21 @@
 
     } else if (keymatch(arg, "map", 3)) {
       /* Quantize to a color map taken from an input file. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      if (for_real) {		/* too expensive to do twice! */
-#ifdef QUANT_2PASS_SUPPORTED	/* otherwise can't quantize to supplied map */
-	FILE * mapfile;
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (for_real) {           /* too expensive to do twice! */
+#ifdef QUANT_2PASS_SUPPORTED    /* otherwise can't quantize to supplied map */
+        FILE * mapfile;
 
-	if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) {
-	  fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
-	  exit(EXIT_FAILURE);
-	}
-	read_color_map(cinfo, mapfile);
-	fclose(mapfile);
-	cinfo->quantize_colors = TRUE;
+        if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) {
+          fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
+          exit(EXIT_FAILURE);
+        }
+        read_color_map(cinfo, mapfile);
+        fclose(mapfile);
+        cinfo->quantize_colors = TRUE;
 #else
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
       }
 
@@ -299,12 +306,12 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (ch == 'm' || ch == 'M')
-	lval *= 1000L;
+        lval *= 1000L;
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "nosmooth", 3)) {
@@ -321,9 +328,19 @@
 
     } else if (keymatch(arg, "outfile", 4)) {
       /* Set output file name. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      outfilename = argv[argn];	/* save it away for later use */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
+
+    } else if (keymatch(arg, "memsrc", 2)) {
+      /* Use in-memory source manager */
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+      memsrc = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, in-memory source manager was not compiled in\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
 
     } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) {
       /* PPM/PGM output format. */
@@ -335,22 +352,22 @@
 
     } else if (keymatch(arg, "scale", 1)) {
       /* Scale the output image by a fraction M/N. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%d/%d",
-		 &cinfo->scale_num, &cinfo->scale_denom) != 2)
-	usage();
+                 &cinfo->scale_num, &cinfo->scale_denom) != 2)
+        usage();
 
     } else if (keymatch(arg, "targa", 1)) {
       /* Targa output format. */
       requested_fmt = FMT_TARGA;
 
     } else {
-      usage();			/* bogus switch */
+      usage();                  /* bogus switch */
     }
   }
 
-  return argn;			/* return index of next arg (file name) */
+  return argn;                  /* return index of next arg (file name) */
 }
 
 
@@ -386,14 +403,14 @@
 
   length = jpeg_getc(cinfo) << 8;
   length += jpeg_getc(cinfo);
-  length -= 2;			/* discount the length word itself */
+  length -= 2;                  /* discount the length word itself */
 
   if (traceit) {
     if (cinfo->unread_marker == JPEG_COM)
       fprintf(stderr, "Comment, length %ld:\n", (long) length);
-    else			/* assume it is an APPn otherwise */
+    else                        /* assume it is an APPn otherwise */
       fprintf(stderr, "APP%d, length %ld:\n",
-	      cinfo->unread_marker - JPEG_APP0, (long) length);
+              cinfo->unread_marker - JPEG_APP0, (long) length);
   }
 
   while (--length >= 0) {
@@ -405,16 +422,16 @@
        * Newlines in CR, CR/LF, or LF form will be printed as one newline.
        */
       if (ch == '\r') {
-	fprintf(stderr, "\n");
+        fprintf(stderr, "\n");
       } else if (ch == '\n') {
-	if (lastch != '\r')
-	  fprintf(stderr, "\n");
+        if (lastch != '\r')
+          fprintf(stderr, "\n");
       } else if (ch == '\\') {
-	fprintf(stderr, "\\\\");
+        fprintf(stderr, "\\\\");
       } else if (isprint(ch)) {
-	putc(ch, stderr);
+        putc(ch, stderr);
       } else {
-	fprintf(stderr, "\\%03o", ch);
+        fprintf(stderr, "\\%03o", ch);
       }
       lastch = ch;
     }
@@ -443,6 +460,8 @@
   djpeg_dest_ptr dest_mgr = NULL;
   FILE * input_file;
   FILE * output_file;
+  unsigned char *inbuffer = NULL;
+  unsigned long insize = 0;
   JDIMENSION num_scanlines;
 
   /* On Mac, fetch a command line. */
@@ -452,7 +471,7 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "djpeg";		/* in case C library doesn't provide it */
+    progname = "djpeg";         /* in case C library doesn't provide it */
 
   /* Initialize the JPEG decompression object with default error handling. */
   cinfo.err = jpeg_std_error(&jerr);
@@ -491,14 +510,14 @@
   if (outfilename == NULL) {
     if (file_index != argc-2) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
     outfilename = argv[file_index+1];
   } else {
     if (file_index != argc-1) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
   }
@@ -537,7 +556,30 @@
 #endif
 
   /* Specify data source for decompression */
-  jpeg_stdio_src(&cinfo, input_file);
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  if (memsrc) {
+    size_t nbytes;
+    do {
+      inbuffer = (unsigned char *)realloc(inbuffer, insize + INPUT_BUF_SIZE);
+      if (inbuffer == NULL) {
+        fprintf(stderr, "%s: memory allocation failure\n", progname);
+        exit(EXIT_FAILURE);
+      }
+      nbytes = JFREAD(input_file, &inbuffer[insize], INPUT_BUF_SIZE);
+      if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) {
+        if (file_index < argc)
+          fprintf(stderr, "%s: can't read from %s\n", progname,
+                  argv[file_index]);
+        else
+          fprintf(stderr, "%s: can't read from stdin\n", progname);
+      }
+      insize += (unsigned long)nbytes;
+    } while (nbytes == INPUT_BUF_SIZE);
+    fprintf(stderr, "Compressed size:  %lu bytes\n", insize);
+    jpeg_mem_src(&cinfo, inbuffer, insize);
+  } else
+#endif
+    jpeg_stdio_src(&cinfo, input_file);
 
   /* Read file header, set default decompression parameters */
   (void) jpeg_read_header(&cinfo, TRUE);
@@ -592,7 +634,7 @@
   /* Process data */
   while (cinfo.output_scanline < cinfo.output_height) {
     num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
-					dest_mgr->buffer_height);
+                                        dest_mgr->buffer_height);
     (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
   }
 
@@ -621,7 +663,10 @@
   end_progress_monitor((j_common_ptr) &cinfo);
 #endif
 
+  if (memsrc && inbuffer != NULL)
+    free(inbuffer);
+
   /* All done. */
   exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/doc/html/annotated.html b/doc/html/annotated.html
index 16a9c48..7ceaf3e 100644
--- a/doc/html/annotated.html
+++ b/doc/html/annotated.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: Data Structures</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -55,24 +66,13 @@
       <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
     </ul>
   </div>
-</div>
-<div class="header">
-  <div class="headertitle">
-<div class="title">Data Structures</div>  </div>
-</div>
-<div class="contents">
-<div class="textblock">Here are the data structures with brief descriptions:</div><table>
-  <tr><td class="indexkey"><a class="el" href="structtjregion.html">tjregion</a></td><td class="indexvalue">Cropping region </td></tr>
-  <tr><td class="indexkey"><a class="el" href="structtjscalingfactor.html">tjscalingfactor</a></td><td class="indexvalue">Scaling factor </td></tr>
-  <tr><td class="indexkey"><a class="el" href="structtjtransform.html">tjtransform</a></td><td class="indexvalue">Lossless transform </td></tr>
-</table>
-</div>
+</div><!-- top -->
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
      onmouseout="return searchBox.OnSearchSelectHide()"
      onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
@@ -81,8 +81,24 @@
 </iframe>
 </div>
 
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+<div class="header">
+  <div class="headertitle">
+<div class="title">Data Structures</div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="textblock">Here are the data structures with brief descriptions:</div><div class="directory">
+<table class="directory">
+<tr id="row_0_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structtjregion.html" target="_self">tjregion</a></td><td class="desc">Cropping region</td></tr>
+<tr id="row_1_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structtjscalingfactor.html" target="_self">tjscalingfactor</a></td><td class="desc">Scaling factor</td></tr>
+<tr id="row_2_" class="even"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structtjtransform.html" target="_self">tjtransform</a></td><td class="desc">Lossless transform</td></tr>
+</table>
+</div><!-- directory -->
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/bc_s.png b/doc/html/bc_s.png
index e401862..224b29a 100644
--- a/doc/html/bc_s.png
+++ b/doc/html/bc_s.png
Binary files differ
diff --git a/doc/html/bdwn.png b/doc/html/bdwn.png
new file mode 100644
index 0000000..940a0b9
--- /dev/null
+++ b/doc/html/bdwn.png
Binary files differ
diff --git a/doc/html/classes.html b/doc/html/classes.html
index 9327b4f..613ca2b 100644
--- a/doc/html/classes.html
+++ b/doc/html/classes.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: Data Structure Index</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -55,23 +66,13 @@
       <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
     </ul>
   </div>
-</div>
-<div class="header">
-  <div class="headertitle">
-<div class="title">Data Structure Index</div>  </div>
-</div>
-<div class="contents">
-<div class="qindex"><a class="qindex" href="#letter_T">T</a></div>
-<table align="center" width="95%" border="0" cellspacing="0" cellpadding="0">
-<tr><td><a name="letter_T"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;T&#160;&#160;</div></td></tr></table>
-</td><td><a class="el" href="structtjregion.html">tjregion</a>&#160;&#160;&#160;</td><td><a class="el" href="structtjscalingfactor.html">tjscalingfactor</a>&#160;&#160;&#160;</td><td><a class="el" href="structtjtransform.html">tjtransform</a>&#160;&#160;&#160;</td></tr></table><div class="qindex"><a class="qindex" href="#letter_T">T</a></div>
-</div>
+</div><!-- top -->
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
      onmouseout="return searchBox.OnSearchSelectHide()"
      onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
@@ -80,8 +81,26 @@
 </iframe>
 </div>
 
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+<div class="header">
+  <div class="headertitle">
+<div class="title">Data Structure Index</div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="qindex"><a class="qindex" href="#letter_T">T</a></div>
+<table style="margin: 10px; white-space: nowrap;" align="center" width="95%" border="0" cellspacing="0" cellpadding="0">
+<tr><td rowspan="2" valign="bottom"><a name="letter_T"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;T&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structtjscalingfactor.html">tjscalingfactor</a>&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtjtransform.html">tjtransform</a>&#160;&#160;&#160;</td><td></td></tr>
+<tr><td></td><td></td><td></td></tr>
+<tr><td valign="top"><a class="el" href="structtjregion.html">tjregion</a>&#160;&#160;&#160;</td><td></td><td></td><td></td></tr>
+<tr><td></td><td></td><td></td><td></td></tr>
+</table>
+<div class="qindex"><a class="qindex" href="#letter_T">T</a></div>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/closed.png b/doc/html/closed.png
index b7d4bd9..98cc2c9 100644
--- a/doc/html/closed.png
+++ b/doc/html/closed.png
Binary files differ
diff --git a/doc/html/doxygen-extra.css b/doc/html/doxygen-extra.css
new file mode 100644
index 0000000..5abbcc2
--- /dev/null
+++ b/doc/html/doxygen-extra.css
@@ -0,0 +1,3 @@
+code {
+	color: #4665A2; 
+}
diff --git a/doc/html/doxygen.css b/doc/html/doxygen.css
index 74445fe..dabaff2 100644
--- a/doc/html/doxygen.css
+++ b/doc/html/doxygen.css
@@ -1,13 +1,12 @@
-/* The standard CSS for doxygen */
+/* The standard CSS for doxygen 1.8.3.1 */
 
 body, table, div, p, dl {
-	font-family: Lucida Grande, Verdana, Geneva, Arial, sans-serif;
-	font-size: 12px;
+	font: 400 14px/19px Roboto,sans-serif;
 }
 
 /* @group Heading Levels */
 
-h1 {
+h1.groupheader {
 	font-size: 150%;
 }
 
@@ -17,14 +16,34 @@
 	margin: 10px 2px;
 }
 
-h2 {
-	font-size: 120%;
+h2.groupheader {
+	border-bottom: 1px solid #879ECB;
+	color: #354C7B;
+	font-size: 150%;
+	font-weight: normal;
+	margin-top: 1.75em;
+	padding-top: 8px;
+	padding-bottom: 4px;
+	width: 100%;
 }
 
-h3 {
+h3.groupheader {
 	font-size: 100%;
 }
 
+h1, h2, h3, h4, h5, h6 {
+	-webkit-transition: text-shadow 0.5s linear;
+	-moz-transition: text-shadow 0.5s linear;
+	-ms-transition: text-shadow 0.5s linear;
+	-o-transition: text-shadow 0.5s linear;
+	transition: text-shadow 0.5s linear;
+	margin-right: 15px;
+}
+
+h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow {
+	text-shadow: 0 0 15px cyan;
+}
+
 dt {
 	font-weight: bold;
 }
@@ -72,8 +91,6 @@
 	background-color: #EBEFF6;
 	border: 1px solid #A3B4D7;
 	text-align: center;
-	margin: 2px;
-	padding: 2px;
 }
 
 div.qindex, div.navpath {
@@ -123,12 +140,12 @@
 a.elRef {
 }
 
-a.code {
-	color: #4665A2;
+a.code, a.code:visited {
+	color: #4665A2; 
 }
 
-a.codeRef {
-	color: #4665A2;
+a.codeRef, a.codeRef:visited {
+	color: #4665A2; 
 }
 
 /* @end */
@@ -137,20 +154,72 @@
 	margin-left: -1cm;
 }
 
-.fragment {
-	font-family: monospace, fixed;
-	font-size: 105%;
+pre.fragment {
+        border: 1px solid #C4CFE5;
+        background-color: #FBFCFD;
+        padding: 4px 6px;
+        margin: 4px 8px 4px 2px;
+        overflow: auto;
+        word-wrap: break-word;
+        font-size:  9pt;
+        line-height: 125%;
+        font-family: monospace, fixed;
+        font-size: 105%;
 }
 
-pre.fragment {
-	border: 1px solid #C4CFE5;
+div.fragment {
+        padding: 4px;
+        margin: 4px;
 	background-color: #FBFCFD;
-	padding: 4px 6px;
-	margin: 4px 8px 4px 2px;
-	overflow: auto;
-	word-wrap: break-word;
-	font-size:  9pt;
-	line-height: 125%;
+	border: 1px solid #C4CFE5;
+}
+
+div.line {
+	font-family: monospace, fixed;
+        font-size: 13px;
+	min-height: 13px;
+	line-height: 1.0;
+	text-wrap: unrestricted;
+	white-space: -moz-pre-wrap; /* Moz */
+	white-space: -pre-wrap;     /* Opera 4-6 */
+	white-space: -o-pre-wrap;   /* Opera 7 */
+	white-space: pre-wrap;      /* CSS3  */
+	word-wrap: break-word;      /* IE 5.5+ */
+	text-indent: -53px;
+	padding-left: 53px;
+	padding-bottom: 0px;
+	margin: 0px;
+	-webkit-transition-property: background-color, box-shadow;
+	-webkit-transition-duration: 0.5s;
+	-moz-transition-property: background-color, box-shadow;
+	-moz-transition-duration: 0.5s;
+	-ms-transition-property: background-color, box-shadow;
+	-ms-transition-duration: 0.5s;
+	-o-transition-property: background-color, box-shadow;
+	-o-transition-duration: 0.5s;
+	transition-property: background-color, box-shadow;
+	transition-duration: 0.5s;
+}
+
+div.line.glow {
+	background-color: cyan;
+	box-shadow: 0 0 10px cyan;
+}
+
+
+span.lineno {
+	padding-right: 4px;
+	text-align: right;
+	border-right: 2px solid #0F0;
+	background-color: #E8E8E8;
+        white-space: pre;
+}
+span.lineno a {
+	background-color: #D8D8D8;
+}
+
+span.lineno a:hover {
+	background-color: #C8C8C8;
 }
 
 div.ah {
@@ -183,15 +252,15 @@
 }
 
 body {
-	background: white;
+	background-color: white;
 	color: black;
         margin: 0;
 }
 
 div.contents {
 	margin-top: 10px;
-	margin-left: 10px;
-	margin-right: 5px;
+	margin-left: 12px;
+	margin-right: 8px;
 }
 
 td.indexkey {
@@ -200,6 +269,8 @@
 	border: 1px solid #C4CFE5;
 	margin: 2px 0px 2px 0;
 	padding: 2px 10px;
+        white-space: nowrap;
+        vertical-align: top;
 }
 
 td.indexvalue {
@@ -292,6 +363,13 @@
 	color: #ff0000 
 }
 
+blockquote {
+        background-color: #F7F8FB;
+        border-left: 2px solid #9CAFD4;
+        margin: 0 24px 0 4px;
+        padding: 0 12px 0 16px;
+}
+
 /* @end */
 
 /*
@@ -345,6 +423,24 @@
 	padding: 0px;
 }
 
+.memberdecls td, .fieldtable tr {
+	-webkit-transition-property: background-color, box-shadow;
+	-webkit-transition-duration: 0.5s;
+	-moz-transition-property: background-color, box-shadow;
+	-moz-transition-duration: 0.5s;
+	-ms-transition-property: background-color, box-shadow;
+	-ms-transition-duration: 0.5s;
+	-o-transition-property: background-color, box-shadow;
+	-o-transition-duration: 0.5s;
+	transition-property: background-color, box-shadow;
+	transition-duration: 0.5s;
+}
+
+.memberdecls td.glow, .fieldtable tr.glow {
+	background-color: cyan;
+	box-shadow: 0 0 15px cyan;
+}
+
 .mdescLeft, .mdescRight,
 .memItemLeft, .memItemRight,
 .memTemplItemLeft, .memTemplItemRight, .memTemplParams {
@@ -359,8 +455,11 @@
 	color: #555;
 }
 
-.memItemLeft, .memItemRight, .memTemplParams {
-	border-top: 1px solid #C4CFE5;
+.memSeparator {
+        border-bottom: 1px solid #DEE4F0;
+        line-height: 1px;
+        margin: 0px;
+        padding: 0px;
 }
 
 .memItemLeft, .memTemplItemLeft {
@@ -374,6 +473,7 @@
 .memTemplParams {
 	color: #4665A2;
         white-space: nowrap;
+	font-size: 80%;
 }
 
 /* @end */
@@ -406,15 +506,29 @@
 	padding: 0;
 	margin-bottom: 10px;
 	margin-right: 5px;
+        -webkit-transition: box-shadow 0.5s linear;
+        -moz-transition: box-shadow 0.5s linear;
+        -ms-transition: box-shadow 0.5s linear;
+        -o-transition: box-shadow 0.5s linear;
+        transition: box-shadow 0.5s linear;
+        display: table !important;
+        width: 100%;
+}
+
+.memitem.glow {
+         box-shadow: 0 0 15px cyan;
 }
 
 .memname {
-        white-space: nowrap;
         font-weight: bold;
         margin-left: 6px;
 }
 
-.memproto {
+.memname td {
+	vertical-align: bottom;
+}
+
+.memproto, dl.reflist dt {
         border-top: 1px solid #A8B8D9;
         border-left: 1px solid #A8B8D9;
         border-right: 1px solid #A8B8D9;
@@ -422,45 +536,55 @@
         color: #253555;
         font-weight: bold;
         text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9);
-        /* opera specific markup */
-        box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
-        border-top-right-radius: 8px;
-        border-top-left-radius: 8px;
-        /* firefox specific markup */
-        -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px;
-        -moz-border-radius-topright: 8px;
-        -moz-border-radius-topleft: 8px;
-        /* webkit specific markup */
-        -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
-        -webkit-border-top-right-radius: 8px;
-        -webkit-border-top-left-radius: 8px;
         background-image:url('nav_f.png');
         background-repeat:repeat-x;
         background-color: #E2E8F2;
+        /* opera specific markup */
+        box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
+        border-top-right-radius: 4px;
+        border-top-left-radius: 4px;
+        /* firefox specific markup */
+        -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px;
+        -moz-border-radius-topright: 4px;
+        -moz-border-radius-topleft: 4px;
+        /* webkit specific markup */
+        -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
+        -webkit-border-top-right-radius: 4px;
+        -webkit-border-top-left-radius: 4px;
 
 }
 
-.memdoc {
+.memdoc, dl.reflist dd {
         border-bottom: 1px solid #A8B8D9;      
         border-left: 1px solid #A8B8D9;      
         border-right: 1px solid #A8B8D9; 
-        padding: 2px 5px;
+        padding: 6px 10px 2px 10px;
         background-color: #FBFCFD;
         border-top-width: 0;
+        background-image:url('nav_g.png');
+        background-repeat:repeat-x;
+        background-color: #FFFFFF;
         /* opera specific markup */
-        border-bottom-left-radius: 8px;
-        border-bottom-right-radius: 8px;
+        border-bottom-left-radius: 4px;
+        border-bottom-right-radius: 4px;
         box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
         /* firefox specific markup */
-        -moz-border-radius-bottomleft: 8px;
-        -moz-border-radius-bottomright: 8px;
+        -moz-border-radius-bottomleft: 4px;
+        -moz-border-radius-bottomright: 4px;
         -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px;
-        background-image: -moz-linear-gradient(center top, #FFFFFF 0%, #FFFFFF 60%, #F7F8FB 95%, #EEF1F7);
         /* webkit specific markup */
-        -webkit-border-bottom-left-radius: 8px;
-        -webkit-border-bottom-right-radius: 8px;
+        -webkit-border-bottom-left-radius: 4px;
+        -webkit-border-bottom-right-radius: 4px;
         -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
-        background-image: -webkit-gradient(linear,center top,center bottom,from(#FFFFFF), color-stop(0.6,#FFFFFF), color-stop(0.60,#FFFFFF), color-stop(0.95,#F7F8FB), to(#EEF1F7));
+}
+
+dl.reflist dt {
+        padding: 5px;
+}
+
+dl.reflist dd {
+        margin: 0px 0px 10px 0px;
+        padding: 5px;
 }
 
 .paramkey {
@@ -478,9 +602,13 @@
 .paramname em {
 	font-style: normal;
 }
+.paramname code {
+        line-height: 14px;
+}
 
 .params, .retval, .exception, .tparams {
-        border-spacing: 6px 2px;
+        margin-left: 0px;
+        padding-left: 0px;
 }       
 
 .params .paramname, .retval .paramname {
@@ -498,105 +626,116 @@
         vertical-align: top;
 }
 
+table.mlabels {
+	border-spacing: 0px;
+}
+
+td.mlabels-left {
+	width: 100%;
+	padding: 0px;
+}
+
+td.mlabels-right {
+	vertical-align: bottom;
+	padding: 0px;
+	white-space: nowrap;
+}
+
+span.mlabels {
+        margin-left: 8px;
+}
+
+span.mlabel {
+        background-color: #728DC1;
+        border-top:1px solid #5373B4;
+        border-left:1px solid #5373B4;
+        border-right:1px solid #C4CFE5;
+        border-bottom:1px solid #C4CFE5;
+	text-shadow: none;
+	color: white;
+	margin-right: 4px;
+	padding: 2px 3px;
+	border-radius: 3px;
+	font-size: 7pt;
+	white-space: nowrap;
+	vertical-align: middle;
+}
 
 
 
 /* @end */
 
-/* @group Directory (tree) */
+/* these are for tree view when not used as main index */
 
-/* for the tree view */
-
-.ftvtree {
-	font-family: sans-serif;
-	margin: 0px;
+div.directory {
+        margin: 10px 0px;
+        border-top: 1px solid #A8B8D9;
+        border-bottom: 1px solid #A8B8D9;
+        width: 100%;
 }
 
-/* these are for tree view when used as main index */
-
-.directory {
-	font-size: 9pt;
-	font-weight: bold;
-	margin: 5px;
+.directory table {
+        border-collapse:collapse;
 }
 
-.directory h3 {
-	margin: 0px;
-	margin-top: 1em;
-	font-size: 11pt;
+.directory td {
+        margin: 0px;
+        padding: 0px;
+	vertical-align: top;
 }
 
-/*
-The following two styles can be used to replace the root node title
-with an image of your choice.  Simply uncomment the next two styles,
-specify the name of your image and be sure to set 'height' to the
-proper pixel height of your image.
-*/
-
-/*
-.directory h3.swap {
-	height: 61px;
-	background-repeat: no-repeat;
-	background-image: url("yourimage.gif");
-}
-.directory h3.swap span {
-	display: none;
-}
-*/
-
-.directory > h3 {
-	margin-top: 0;
+.directory td.entry {
+        white-space: nowrap;
+        padding-right: 6px;
 }
 
-.directory p {
-	margin: 0px;
-	white-space: nowrap;
+.directory td.entry a {
+        outline:none;
 }
 
-.directory div {
-	display: none;
-	margin: 0px;
+.directory td.entry a img {
+        border: none;
+}
+
+.directory td.desc {
+        width: 100%;
+        padding-left: 6px;
+	padding-right: 6px;
+	padding-top: 3px;
+	border-left: 1px solid rgba(0,0,0,0.05);
+}
+
+.directory tr.even {
+	padding-left: 6px;
+	background-color: #F7F8FB;
 }
 
 .directory img {
 	vertical-align: -30%;
 }
 
-/* these are for tree view when not used as main index */
-
-.directory-alt {
-	font-size: 100%;
-	font-weight: bold;
+.directory .levels {
+        white-space: nowrap;
+        width: 100%;
+        text-align: right;
+        font-size: 9pt;
 }
 
-.directory-alt h3 {
-	margin: 0px;
-	margin-top: 1em;
-	font-size: 11pt;
+.directory .levels span {
+        cursor: pointer;
+        padding-left: 2px;
+        padding-right: 2px;
+	color: #3D578C;
 }
 
-.directory-alt > h3 {
-	margin-top: 0;
-}
-
-.directory-alt p {
-	margin: 0px;
-	white-space: nowrap;
-}
-
-.directory-alt div {
-	display: none;
-	margin: 0px;
-}
-
-.directory-alt img {
-	vertical-align: -30%;
-}
-
-/* @end */
-
 div.dynheader {
         margin-top: 8px;
+	-webkit-touch-callout: none;
+	-webkit-user-select: none;
+	-khtml-user-select: none;
+	-moz-user-select: none;
+	-ms-user-select: none;
+	user-select: none;
 }
 
 address {
@@ -606,6 +745,8 @@
 
 table.doxtable {
 	border-collapse:collapse;
+        margin-top: 4px;
+        margin-bottom: 4px;
 }
 
 table.doxtable td, table.doxtable th {
@@ -619,9 +760,72 @@
 	font-size: 110%;
 	padding-bottom: 4px;
 	padding-top: 5px;
-	text-align:left;
 }
 
+table.fieldtable {
+        /*width: 100%;*/
+        margin-bottom: 10px;
+        border: 1px solid #A8B8D9;
+        border-spacing: 0px;
+        -moz-border-radius: 4px;
+        -webkit-border-radius: 4px;
+        border-radius: 4px;
+        -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px;
+        -webkit-box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15);
+        box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15);
+}
+
+.fieldtable td, .fieldtable th {
+        padding: 3px 7px 2px;
+}
+
+.fieldtable td.fieldtype, .fieldtable td.fieldname {
+        white-space: nowrap;
+        border-right: 1px solid #A8B8D9;
+        border-bottom: 1px solid #A8B8D9;
+        vertical-align: top;
+}
+
+.fieldtable td.fieldname {
+        padding-top: 5px;
+}
+
+.fieldtable td.fielddoc {
+        border-bottom: 1px solid #A8B8D9;
+        /*width: 100%;*/
+}
+
+.fieldtable td.fielddoc p:first-child {
+        margin-top: 2px;
+}       
+        
+.fieldtable td.fielddoc p:last-child {
+        margin-bottom: 2px;
+}
+
+.fieldtable tr:last-child td {
+        border-bottom: none;
+}
+
+.fieldtable th {
+        background-image:url('nav_f.png');
+        background-repeat:repeat-x;
+        background-color: #E2E8F2;
+        font-size: 90%;
+        color: #253555;
+        padding-bottom: 4px;
+        padding-top: 5px;
+        text-align:left;
+        -moz-border-radius-topleft: 4px;
+        -moz-border-radius-topright: 4px;
+        -webkit-border-top-left-radius: 4px;
+        -webkit-border-top-right-radius: 4px;
+        border-top-left-radius: 4px;
+        border-top-right-radius: 4px;
+        border-bottom: 1px solid #A8B8D9;
+}
+
+
 .tabsearch {
 	top: 0px;
 	left: 10px;
@@ -637,6 +841,7 @@
 	font-size: 11px;
 	background-image:url('tab_b.png');
 	background-repeat:repeat-x;
+	background-position: 0 -5px;
 	height:30px;
 	line-height:30px;
 	color:#8AA0CC;
@@ -664,6 +869,10 @@
 	display:block;
 	text-decoration: none;
 	outline: none;
+	color: #283A5D;
+	font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif;
+	text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9);
+	text-decoration: none;        
 }
 
 .navpath li.navelem a:hover
@@ -702,7 +911,6 @@
 div.ingroups
 {
 	font-size: 8pt;
-	padding-left: 5px;
 	width: 50%;
 	text-align: left;
 }
@@ -731,47 +939,74 @@
         padding: 0 0 0 10px;
 }
 
-dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant, dl.deprecated, dl.todo, dl.test, dl.bug
+/* dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant, dl.deprecated, dl.todo, dl.test, dl.bug */
+dl.section
 {
-        border-left:4px solid;
-        padding: 0 0 0 6px;
+	margin-left: 0px;
+	padding-left: 0px;
 }
 
 dl.note
 {
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
         border-color: #D0C000;
 }
 
 dl.warning, dl.attention
 {
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
         border-color: #FF0000;
 }
 
 dl.pre, dl.post, dl.invariant
 {
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
         border-color: #00D000;
 }
 
 dl.deprecated
 {
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
         border-color: #505050;
 }
 
 dl.todo
 {
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
         border-color: #00C0E0;
 }
 
 dl.test
 {
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
         border-color: #3030E0;
 }
 
 dl.bug
 {
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
         border-color: #C08050;
 }
 
+dl.section dd {
+	margin-bottom: 6px;
+}
+
+
 #projectlogo
 {
 	text-align: center;
@@ -833,3 +1068,117 @@
 	font-weight: bold;
 }
 
+div.zoom
+{
+	border: 1px solid #90A5CE;
+}
+
+dl.citelist {
+        margin-bottom:50px;
+}
+
+dl.citelist dt {
+        color:#334975;
+        float:left;
+        font-weight:bold;
+        margin-right:10px;
+        padding:5px;
+}
+
+dl.citelist dd {
+        margin:2px 0;
+        padding:5px 0;
+}
+
+div.toc {
+        padding: 14px 25px;
+        background-color: #F4F6FA;
+        border: 1px solid #D8DFEE;
+        border-radius: 7px 7px 7px 7px;
+        float: right;
+        height: auto;
+        margin: 0 20px 10px 10px;
+        width: 200px;
+}
+
+div.toc li {
+        background: url("bdwn.png") no-repeat scroll 0 5px transparent;
+        font: 10px/1.2 Verdana,DejaVu Sans,Geneva,sans-serif;
+        margin-top: 5px;
+        padding-left: 10px;
+        padding-top: 2px;
+}
+
+div.toc h3 {
+        font: bold 12px/1.2 Arial,FreeSans,sans-serif;
+	color: #4665A2;
+        border-bottom: 0 none;
+        margin: 0;
+}
+
+div.toc ul {
+        list-style: none outside none;
+        border: medium none;
+        padding: 0px;
+}       
+
+div.toc li.level1 {
+        margin-left: 0px;
+}
+
+div.toc li.level2 {
+        margin-left: 15px;
+}
+
+div.toc li.level3 {
+        margin-left: 30px;
+}
+
+div.toc li.level4 {
+        margin-left: 45px;
+}
+
+.inherit_header {
+        font-weight: bold;
+        color: gray;
+        cursor: pointer;
+	-webkit-touch-callout: none;
+	-webkit-user-select: none;
+	-khtml-user-select: none;
+	-moz-user-select: none;
+	-ms-user-select: none;
+	user-select: none;
+}
+
+.inherit_header td {
+        padding: 6px 0px 2px 5px;
+}
+
+.inherit {
+        display: none;
+}
+
+tr.heading h2 {
+        margin-top: 12px;
+        margin-bottom: 4px;
+}
+
+@media print
+{
+  #top { display: none; }
+  #side-nav { display: none; }
+  #nav-path { display: none; }
+  body { overflow:visible; }
+  h1, h2, h3, h4, h5, h6 { page-break-after: avoid; }
+  .summary { display: none; }
+  .memitem { page-break-inside: avoid; }
+  #doc-content
+  {
+    margin-left:0 !important;
+    height:auto !important;
+    width:auto !important;
+    overflow:inherit;
+    display:inline;
+  }
+}
+
diff --git a/doc/html/doxygen.png b/doc/html/doxygen.png
index 635ed52..3ff17d8 100644
--- a/doc/html/doxygen.png
+++ b/doc/html/doxygen.png
Binary files differ
diff --git a/doc/html/dynsections.js b/doc/html/dynsections.js
new file mode 100644
index 0000000..ed092c7
--- /dev/null
+++ b/doc/html/dynsections.js
@@ -0,0 +1,97 @@
+function toggleVisibility(linkObj)
+{
+ var base = $(linkObj).attr('id');
+ var summary = $('#'+base+'-summary');
+ var content = $('#'+base+'-content');
+ var trigger = $('#'+base+'-trigger');
+ var src=$(trigger).attr('src');
+ if (content.is(':visible')===true) {
+   content.hide();
+   summary.show();
+   $(linkObj).addClass('closed').removeClass('opened');
+   $(trigger).attr('src',src.substring(0,src.length-8)+'closed.png');
+ } else {
+   content.show();
+   summary.hide();
+   $(linkObj).removeClass('closed').addClass('opened');
+   $(trigger).attr('src',src.substring(0,src.length-10)+'open.png');
+ } 
+ return false;
+}
+
+function updateStripes()
+{
+  $('table.directory tr').
+       removeClass('even').filter(':visible:even').addClass('even');
+}
+function toggleLevel(level)
+{
+  $('table.directory tr').each(function(){ 
+    var l = this.id.split('_').length-1;
+    var i = $('#img'+this.id.substring(3));
+    var a = $('#arr'+this.id.substring(3));
+    if (l<level+1) {
+      i.attr('src','ftv2folderopen.png');
+      a.attr('src','ftv2mnode.png');
+      $(this).show();
+    } else if (l==level+1) {
+      i.attr('src','ftv2folderclosed.png');
+      a.attr('src','ftv2pnode.png');
+      $(this).show();
+    } else {
+      $(this).hide();
+    }
+  });
+  updateStripes();
+}
+
+function toggleFolder(id)
+{
+  //The clicked row
+  var currentRow = $('#row_'+id);
+  var currentRowImages = currentRow.find("img");
+
+  //All rows after the clicked row
+  var rows = currentRow.nextAll("tr");
+
+  //Only match elements AFTER this one (can't hide elements before)
+  var childRows = rows.filter(function() {
+    var re = new RegExp('^row_'+id+'\\d+_$', "i"); //only one sub
+    return this.id.match(re);
+  });
+
+  //First row is visible we are HIDING
+  if (childRows.filter(':first').is(':visible')===true) {
+    currentRowImages.filter("[id^=arr]").attr('src', 'ftv2pnode.png');
+    currentRowImages.filter("[id^=img]").attr('src', 'ftv2folderclosed.png');
+    rows.filter("[id^=row_"+id+"]").hide();
+  } else { //We are SHOWING
+    //All sub images
+    var childImages = childRows.find("img");
+    var childImg = childImages.filter("[id^=img]");
+    var childArr = childImages.filter("[id^=arr]");
+
+    currentRow.find("[id^=arr]").attr('src', 'ftv2mnode.png'); //open row
+    currentRow.find("[id^=img]").attr('src', 'ftv2folderopen.png'); //open row
+    childImg.attr('src','ftv2folderclosed.png'); //children closed
+    childArr.attr('src','ftv2pnode.png'); //children closed
+    childRows.show(); //show all children
+  }
+  updateStripes();
+}
+
+
+function toggleInherit(id)
+{
+  var rows = $('tr.inherit.'+id);
+  var img = $('tr.inherit_header.'+id+' img');
+  var src = $(img).attr('src');
+  if (rows.filter(':first').is(':visible')===true) {
+    rows.css('display','none');
+    $(img).attr('src',src.substring(0,src.length-8)+'closed.png');
+  } else {
+    rows.css('display','table-row'); // using show() causes jump in firefox
+    $(img).attr('src',src.substring(0,src.length-10)+'open.png');
+  }
+}
+
diff --git a/doc/html/ftv2blank.png b/doc/html/ftv2blank.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2blank.png
Binary files differ
diff --git a/doc/html/ftv2cl.png b/doc/html/ftv2cl.png
new file mode 100644
index 0000000..132f657
--- /dev/null
+++ b/doc/html/ftv2cl.png
Binary files differ
diff --git a/doc/html/ftv2doc.png b/doc/html/ftv2doc.png
new file mode 100644
index 0000000..17edabf
--- /dev/null
+++ b/doc/html/ftv2doc.png
Binary files differ
diff --git a/doc/html/ftv2folderclosed.png b/doc/html/ftv2folderclosed.png
new file mode 100644
index 0000000..bb8ab35
--- /dev/null
+++ b/doc/html/ftv2folderclosed.png
Binary files differ
diff --git a/doc/html/ftv2folderopen.png b/doc/html/ftv2folderopen.png
new file mode 100644
index 0000000..d6c7f67
--- /dev/null
+++ b/doc/html/ftv2folderopen.png
Binary files differ
diff --git a/doc/html/ftv2lastnode.png b/doc/html/ftv2lastnode.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2lastnode.png
Binary files differ
diff --git a/doc/html/ftv2link.png b/doc/html/ftv2link.png
new file mode 100644
index 0000000..17edabf
--- /dev/null
+++ b/doc/html/ftv2link.png
Binary files differ
diff --git a/doc/html/ftv2mlastnode.png b/doc/html/ftv2mlastnode.png
new file mode 100644
index 0000000..0b63f6d
--- /dev/null
+++ b/doc/html/ftv2mlastnode.png
Binary files differ
diff --git a/doc/html/ftv2mnode.png b/doc/html/ftv2mnode.png
new file mode 100644
index 0000000..0b63f6d
--- /dev/null
+++ b/doc/html/ftv2mnode.png
Binary files differ
diff --git a/doc/html/ftv2mo.png b/doc/html/ftv2mo.png
new file mode 100644
index 0000000..4bfb80f
--- /dev/null
+++ b/doc/html/ftv2mo.png
Binary files differ
diff --git a/doc/html/ftv2node.png b/doc/html/ftv2node.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2node.png
Binary files differ
diff --git a/doc/html/ftv2ns.png b/doc/html/ftv2ns.png
new file mode 100644
index 0000000..72e3d71
--- /dev/null
+++ b/doc/html/ftv2ns.png
Binary files differ
diff --git a/doc/html/ftv2plastnode.png b/doc/html/ftv2plastnode.png
new file mode 100644
index 0000000..c6ee22f
--- /dev/null
+++ b/doc/html/ftv2plastnode.png
Binary files differ
diff --git a/doc/html/ftv2pnode.png b/doc/html/ftv2pnode.png
new file mode 100644
index 0000000..c6ee22f
--- /dev/null
+++ b/doc/html/ftv2pnode.png
Binary files differ
diff --git a/doc/html/ftv2splitbar.png b/doc/html/ftv2splitbar.png
new file mode 100644
index 0000000..fe895f2
--- /dev/null
+++ b/doc/html/ftv2splitbar.png
Binary files differ
diff --git a/doc/html/ftv2vertline.png b/doc/html/ftv2vertline.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2vertline.png
Binary files differ
diff --git a/doc/html/functions.html b/doc/html/functions.html
index a3470a4..276a55c 100644
--- a/doc/html/functions.html
+++ b/doc/html/functions.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: Data Fields</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -61,7 +72,21 @@
       <li><a href="functions_vars.html"><span>Variables</span></a></li>
     </ul>
   </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
 </div>
+
 <div class="contents">
 <div class="textblock">Here is a list of all documented struct and union fields with links to the struct/union documentation for each field:</div><ul>
 <li>customFilter
@@ -98,23 +123,12 @@
 : <a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">tjregion</a>
 </li>
 </ul>
-</div>
-<!-- window showing the filter options -->
-<div id="MSearchSelectWindow"
-     onmouseover="return searchBox.OnSearchSelectShow()"
-     onmouseout="return searchBox.OnSearchSelectHide()"
-     onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
-
-<!-- iframe showing the search results (closed by default) -->
-<div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
-        name="MSearchResults" id="MSearchResults">
-</iframe>
-</div>
-
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/functions_vars.html b/doc/html/functions_vars.html
index ea240cc..c5140f6 100644
--- a/doc/html/functions_vars.html
+++ b/doc/html/functions_vars.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: Data Fields - Variables</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -61,7 +72,21 @@
       <li class="current"><a href="functions_vars.html"><span>Variables</span></a></li>
     </ul>
   </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
 </div>
+
 <div class="contents">
 &#160;<ul>
 <li>customFilter
@@ -98,23 +123,12 @@
 : <a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">tjregion</a>
 </li>
 </ul>
-</div>
-<!-- window showing the filter options -->
-<div id="MSearchSelectWindow"
-     onmouseover="return searchBox.OnSearchSelectShow()"
-     onmouseout="return searchBox.OnSearchSelectHide()"
-     onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
-
-<!-- iframe showing the search results (closed by default) -->
-<div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
-        name="MSearchResults" id="MSearchResults">
-</iframe>
-</div>
-
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/group___turbo_j_p_e_g.html b/doc/html/group___turbo_j_p_e_g.html
index cd14f44..fa6bf0c 100644
--- a/doc/html/group___turbo_j_p_e_g.html
+++ b/doc/html/group___turbo_j_p_e_g.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: TurboJPEG</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -48,80 +59,121 @@
       </li>
     </ul>
   </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
 </div>
+
 <div class="header">
   <div class="summary">
 <a href="#nested-classes">Data Structures</a> &#124;
-<a href="#define-members">Defines</a> &#124;
+<a href="#define-members">Macros</a> &#124;
 <a href="#typedef-members">Typedefs</a> &#124;
 <a href="#enum-members">Enumerations</a> &#124;
 <a href="#func-members">Functions</a> &#124;
 <a href="#var-members">Variables</a>  </div>
   <div class="headertitle">
 <div class="title">TurboJPEG</div>  </div>
-</div>
+</div><!--header-->
 <div class="contents">
 
 <p>TurboJPEG API.  
 <a href="#details">More...</a></p>
 <table class="memberdecls">
-<tr><td colspan="2"><h2><a name="nested-classes"></a>
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
 Data Structures</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html">tjscalingfactor</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Scaling factor.  <a href="structtjscalingfactor.html#details">More...</a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html">tjregion</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Cropping region.  <a href="structtjregion.html#details">More...</a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html">tjtransform</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Lossless transform.  <a href="structtjtransform.html#details">More...</a><br/></td></tr>
-<tr><td colspan="2"><h2><a name="define-members"></a>
-Defines</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of chrominance subsampling options.  <a href="#ga5ef3d169162ce77ce348e292a0b7477c"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of pixel formats.  <a href="#ga7010a4402f54a45ba822ad8675a4655e"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">TJFLAG_BOTTOMUP</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order.  <a href="#ga72ecf4ebe6eb702d3c6f5ca27455e1ec"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4e872f11c82f241736fa8297920f24e5">TJFLAG_FORCEMMX</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the underlying codec supports it.)  <a href="#ga4e872f11c82f241736fa8297920f24e5"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae17e63189e8cd730feed3efbd2454f38">TJFLAG_FORCESSE</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the underlying codec supports it.)  <a href="#gae17e63189e8cd730feed3efbd2454f38"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8cf0bca96ea4d472563f4b0ebf8c48e7">TJFLAG_FORCESSE2</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the underlying codec supports it.)  <a href="#ga8cf0bca96ea4d472563f4b0ebf8c48e7"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaf9d49066633404da4386d70820295dd2">TJFLAG_FORCESSE3</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the underlying codec supports it.)  <a href="#gaf9d49066633404da4386d70820295dd2"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d">TJFLAG_FASTUPSAMPLE</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">When decompressing, use the fastest chrominance upsampling algorithm available in the underlying codec.  <a href="#ga4ee4506c81177a06f77e2504a22efd2d"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963">TJFLAG_NOREALLOC</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Disable buffer (re)allocation.  <a href="#ga8808d403c68b62aaa58a4c1e58e98963"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaabce235db80d3f698b27f36cbd453da2">TJFLAG_FASTDCT</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Use the fastest DCT/IDCT algorithm available in the underlying codec.  <a href="#gaabce235db80d3f698b27f36cbd453da2"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gacb233cfd722d66d1ccbf48a7de81f0e0">TJFLAG_ACCURATEDCT</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Use the most accurate DCT/IDCT algorithm available in the underlying codec.  <a href="#gacb233cfd722d66d1ccbf48a7de81f0e0"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0f6dbd18adf38b7d46ac547f0f4d562c">TJ_NUMXOP</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Number of transform operations.  <a href="#ga0f6dbd18adf38b7d46ac547f0f4d562c"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00">TJXOPT_PERFECT</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to return an error if the transform is not perfect.  <a href="#ga50e03cb5ed115330e212417429600b00"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga319826b7eb1583c0595bbe7b95428709">TJXOPT_TRIM</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to discard any partial MCU blocks that cannot be transformed.  <a href="#ga319826b7eb1583c0595bbe7b95428709"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">TJXOPT_CROP</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will enable lossless cropping.  <a href="#ga9c771a757fc1294add611906b89ab2d2"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3acee7b48ade1b99e5588736007c2589">TJXOPT_GRAY</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will discard the color data in the input image and produce a grayscale output image.  <a href="#ga3acee7b48ade1b99e5588736007c2589"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gafbf992bbf6e006705886333703ffab31">TJXOPT_NOOUTPUT</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will prevent <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.)  <a href="#gafbf992bbf6e006705886333703ffab31"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511">TJPAD</a>(width)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Pad the given width to the nearest 32-bit boundary.  <a href="#ga0aba955473315e405295d978f0c16511"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df">TJSCALED</a>(dimension, scalingFactor)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Compute the scaled value of <code>dimension</code> using the given scaling factor.  <a href="#ga84878bb65404204743aa18cac02781df"></a><br/></td></tr>
-<tr><td colspan="2"><h2><a name="typedef-members"></a>
+<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html">tjscalingfactor</a></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Scaling factor.  <a href="structtjscalingfactor.html#details">More...</a><br/></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html">tjregion</a></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Cropping region.  <a href="structtjregion.html#details">More...</a><br/></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html">tjtransform</a></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Lossless transform.  <a href="structtjtransform.html#details">More...</a><br/></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a>
+Macros</h2></td></tr>
+<tr class="memitem:ga5ef3d169162ce77ce348e292a0b7477c"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a></td></tr>
+<tr class="memdesc:ga5ef3d169162ce77ce348e292a0b7477c"><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of chrominance subsampling options.  <a href="#ga5ef3d169162ce77ce348e292a0b7477c">More...</a><br/></td></tr>
+<tr class="separator:ga5ef3d169162ce77ce348e292a0b7477c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga7010a4402f54a45ba822ad8675a4655e"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a></td></tr>
+<tr class="memdesc:ga7010a4402f54a45ba822ad8675a4655e"><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of pixel formats.  <a href="#ga7010a4402f54a45ba822ad8675a4655e">More...</a><br/></td></tr>
+<tr class="separator:ga7010a4402f54a45ba822ad8675a4655e"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">TJFLAG_BOTTOMUP</a></td></tr>
+<tr class="memdesc:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="mdescLeft">&#160;</td><td class="mdescRight">The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order.  <a href="#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">More...</a><br/></td></tr>
+<tr class="separator:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga4e872f11c82f241736fa8297920f24e5"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4e872f11c82f241736fa8297920f24e5">TJFLAG_FORCEMMX</a></td></tr>
+<tr class="memdesc:ga4e872f11c82f241736fa8297920f24e5"><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the underlying codec supports it.)  <a href="#ga4e872f11c82f241736fa8297920f24e5">More...</a><br/></td></tr>
+<tr class="separator:ga4e872f11c82f241736fa8297920f24e5"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gae17e63189e8cd730feed3efbd2454f38"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae17e63189e8cd730feed3efbd2454f38">TJFLAG_FORCESSE</a></td></tr>
+<tr class="memdesc:gae17e63189e8cd730feed3efbd2454f38"><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the underlying codec supports it.)  <a href="#gae17e63189e8cd730feed3efbd2454f38">More...</a><br/></td></tr>
+<tr class="separator:gae17e63189e8cd730feed3efbd2454f38"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga8cf0bca96ea4d472563f4b0ebf8c48e7"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8cf0bca96ea4d472563f4b0ebf8c48e7">TJFLAG_FORCESSE2</a></td></tr>
+<tr class="memdesc:ga8cf0bca96ea4d472563f4b0ebf8c48e7"><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the underlying codec supports it.)  <a href="#ga8cf0bca96ea4d472563f4b0ebf8c48e7">More...</a><br/></td></tr>
+<tr class="separator:ga8cf0bca96ea4d472563f4b0ebf8c48e7"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaf9d49066633404da4386d70820295dd2"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaf9d49066633404da4386d70820295dd2">TJFLAG_FORCESSE3</a></td></tr>
+<tr class="memdesc:gaf9d49066633404da4386d70820295dd2"><td class="mdescLeft">&#160;</td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the underlying codec supports it.)  <a href="#gaf9d49066633404da4386d70820295dd2">More...</a><br/></td></tr>
+<tr class="separator:gaf9d49066633404da4386d70820295dd2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga4ee4506c81177a06f77e2504a22efd2d"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d">TJFLAG_FASTUPSAMPLE</a></td></tr>
+<tr class="memdesc:ga4ee4506c81177a06f77e2504a22efd2d"><td class="mdescLeft">&#160;</td><td class="mdescRight">When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available in the underlying codec.  <a href="#ga4ee4506c81177a06f77e2504a22efd2d">More...</a><br/></td></tr>
+<tr class="separator:ga4ee4506c81177a06f77e2504a22efd2d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga8808d403c68b62aaa58a4c1e58e98963"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963">TJFLAG_NOREALLOC</a></td></tr>
+<tr class="memdesc:ga8808d403c68b62aaa58a4c1e58e98963"><td class="mdescLeft">&#160;</td><td class="mdescRight">Disable buffer (re)allocation.  <a href="#ga8808d403c68b62aaa58a4c1e58e98963">More...</a><br/></td></tr>
+<tr class="separator:ga8808d403c68b62aaa58a4c1e58e98963"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaabce235db80d3f698b27f36cbd453da2"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaabce235db80d3f698b27f36cbd453da2">TJFLAG_FASTDCT</a></td></tr>
+<tr class="memdesc:gaabce235db80d3f698b27f36cbd453da2"><td class="mdescLeft">&#160;</td><td class="mdescRight">Use the fastest DCT/IDCT algorithm available in the underlying codec.  <a href="#gaabce235db80d3f698b27f36cbd453da2">More...</a><br/></td></tr>
+<tr class="separator:gaabce235db80d3f698b27f36cbd453da2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gacb233cfd722d66d1ccbf48a7de81f0e0"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gacb233cfd722d66d1ccbf48a7de81f0e0">TJFLAG_ACCURATEDCT</a></td></tr>
+<tr class="memdesc:gacb233cfd722d66d1ccbf48a7de81f0e0"><td class="mdescLeft">&#160;</td><td class="mdescRight">Use the most accurate DCT/IDCT algorithm available in the underlying codec.  <a href="#gacb233cfd722d66d1ccbf48a7de81f0e0">More...</a><br/></td></tr>
+<tr class="separator:gacb233cfd722d66d1ccbf48a7de81f0e0"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga0f6dbd18adf38b7d46ac547f0f4d562c"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0f6dbd18adf38b7d46ac547f0f4d562c">TJ_NUMXOP</a></td></tr>
+<tr class="memdesc:ga0f6dbd18adf38b7d46ac547f0f4d562c"><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of transform operations.  <a href="#ga0f6dbd18adf38b7d46ac547f0f4d562c">More...</a><br/></td></tr>
+<tr class="separator:ga0f6dbd18adf38b7d46ac547f0f4d562c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga50e03cb5ed115330e212417429600b00"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00">TJXOPT_PERFECT</a></td></tr>
+<tr class="memdesc:ga50e03cb5ed115330e212417429600b00"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to return an error if the transform is not perfect.  <a href="#ga50e03cb5ed115330e212417429600b00">More...</a><br/></td></tr>
+<tr class="separator:ga50e03cb5ed115330e212417429600b00"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga319826b7eb1583c0595bbe7b95428709"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga319826b7eb1583c0595bbe7b95428709">TJXOPT_TRIM</a></td></tr>
+<tr class="memdesc:ga319826b7eb1583c0595bbe7b95428709"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to discard any partial MCU blocks that cannot be transformed.  <a href="#ga319826b7eb1583c0595bbe7b95428709">More...</a><br/></td></tr>
+<tr class="separator:ga319826b7eb1583c0595bbe7b95428709"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga9c771a757fc1294add611906b89ab2d2"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">TJXOPT_CROP</a></td></tr>
+<tr class="memdesc:ga9c771a757fc1294add611906b89ab2d2"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will enable lossless cropping.  <a href="#ga9c771a757fc1294add611906b89ab2d2">More...</a><br/></td></tr>
+<tr class="separator:ga9c771a757fc1294add611906b89ab2d2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga3acee7b48ade1b99e5588736007c2589"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3acee7b48ade1b99e5588736007c2589">TJXOPT_GRAY</a></td></tr>
+<tr class="memdesc:ga3acee7b48ade1b99e5588736007c2589"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will discard the color data in the input image and produce a grayscale output image.  <a href="#ga3acee7b48ade1b99e5588736007c2589">More...</a><br/></td></tr>
+<tr class="separator:ga3acee7b48ade1b99e5588736007c2589"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gafbf992bbf6e006705886333703ffab31"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gafbf992bbf6e006705886333703ffab31">TJXOPT_NOOUTPUT</a></td></tr>
+<tr class="memdesc:gafbf992bbf6e006705886333703ffab31"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will prevent <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.)  <a href="#gafbf992bbf6e006705886333703ffab31">More...</a><br/></td></tr>
+<tr class="separator:gafbf992bbf6e006705886333703ffab31"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga0aba955473315e405295d978f0c16511"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511">TJPAD</a>(width)</td></tr>
+<tr class="memdesc:ga0aba955473315e405295d978f0c16511"><td class="mdescLeft">&#160;</td><td class="mdescRight">Pad the given width to the nearest 32-bit boundary.  <a href="#ga0aba955473315e405295d978f0c16511">More...</a><br/></td></tr>
+<tr class="separator:ga0aba955473315e405295d978f0c16511"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga84878bb65404204743aa18cac02781df"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df">TJSCALED</a>(dimension, scalingFactor)</td></tr>
+<tr class="memdesc:ga84878bb65404204743aa18cac02781df"><td class="mdescLeft">&#160;</td><td class="mdescRight">Compute the scaled value of <code>dimension</code> using the given scaling factor.  <a href="#ga84878bb65404204743aa18cac02781df">More...</a><br/></td></tr>
+<tr class="separator:ga84878bb65404204743aa18cac02781df"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="typedef-members"></a>
 Typedefs</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">typedef struct <a class="el" href="structtjtransform.html">tjtransform</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31">tjtransform</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Lossless transform.  <a href="#gaa29f3189c41be12ec5dee7caec318a31"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">typedef void *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">TurboJPEG instance handle.  <a href="#ga758d2634ecb4949de7815cba621f5763"></a><br/></td></tr>
-<tr><td colspan="2"><h2><a name="enum-members"></a>
+<tr class="memitem:gaa29f3189c41be12ec5dee7caec318a31"><td class="memItemLeft" align="right" valign="top">typedef struct <a class="el" href="structtjtransform.html">tjtransform</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31">tjtransform</a></td></tr>
+<tr class="memdesc:gaa29f3189c41be12ec5dee7caec318a31"><td class="mdescLeft">&#160;</td><td class="mdescRight">Lossless transform.  <a href="#gaa29f3189c41be12ec5dee7caec318a31">More...</a><br/></td></tr>
+<tr class="separator:gaa29f3189c41be12ec5dee7caec318a31"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga758d2634ecb4949de7815cba621f5763"><td class="memItemLeft" align="right" valign="top">typedef void *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a></td></tr>
+<tr class="memdesc:ga758d2634ecb4949de7815cba621f5763"><td class="mdescLeft">&#160;</td><td class="mdescRight">TurboJPEG instance handle.  <a href="#ga758d2634ecb4949de7815cba621f5763">More...</a><br/></td></tr>
+<tr class="separator:ga758d2634ecb4949de7815cba621f5763"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="enum-members"></a>
 Enumerations</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">TJSAMP</a> { <br/>
+<tr class="memitem:ga1d047060ea80bb9820d540bb928e9074"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">TJSAMP</a> { <br/>
 &#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3">TJSAMP_444</a>, 
 <a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404">TJSAMP_422</a>, 
 <a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737">TJSAMP_420</a>, 
@@ -130,8 +182,9 @@
 &#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974">TJSAMP_440</a>
 <br/>
  }</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Chrominance subsampling options.  <a href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">More...</a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">TJPF</a> { <br/>
+<tr class="memdesc:ga1d047060ea80bb9820d540bb928e9074"><td class="mdescLeft">&#160;</td><td class="mdescRight">Chrominance subsampling options.  <a href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">More...</a><br/></td></tr>
+<tr class="separator:ga1d047060ea80bb9820d540bb928e9074"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gac916144e26c3817ac514e64ae5d12e2a"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">TJPF</a> { <br/>
 &#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c">TJPF_RGB</a>, 
 <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839">TJPF_BGR</a>, 
 <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01">TJPF_RGBX</a>, 
@@ -147,8 +200,9 @@
 <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c">TJPF_ARGB</a>
 <br/>
  }</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Pixel formats.  <a href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">More...</a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">TJXOP</a> { <br/>
+<tr class="memdesc:gac916144e26c3817ac514e64ae5d12e2a"><td class="mdescLeft">&#160;</td><td class="mdescRight">Pixel formats.  <a href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">More...</a><br/></td></tr>
+<tr class="separator:gac916144e26c3817ac514e64ae5d12e2a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga2de531af4e7e6c4f124908376b354866"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">TJXOP</a> { <br/>
 &#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27">TJXOP_NONE</a>, 
 <a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce">TJXOP_HFLIP</a>, 
 <a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d">TJXOP_VFLIP</a>, 
@@ -160,61 +214,86 @@
 <a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08">TJXOP_ROT270</a>
 <br/>
  }</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Transform operations for <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>  <a href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">More...</a><br/></td></tr>
-<tr><td colspan="2"><h2><a name="func-members"></a>
+<tr class="memdesc:ga2de531af4e7e6c4f124908376b354866"><td class="mdescLeft">&#160;</td><td class="mdescRight">Transform operations for <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>  <a href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">More...</a><br/></td></tr>
+<tr class="separator:ga2de531af4e7e6c4f124908376b354866"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
 Functions</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a">tjInitCompress</a> (void)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a TurboJPEG compressor instance.  <a href="#ga3d10c47fbe4a2489a2b30c931551d01a"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2">tjCompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Compress an RGB or grayscale image into a JPEG image.  <a href="#gaba62b7a98f960839b588579898495cf2"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b">tjBufSize</a> (int width, int height, int jpegSubsamp)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters.  <a href="#gaccc5bca7f12fcdcc302e6e1c6d4b311b"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c">tjBufSizeYUV</a> (int width, int height, int subsamp)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters.  <a href="#ga9d0cb06fd5052d21b6f2b382db8b219c"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35">tjEncodeYUV2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int subsamp, int flags)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Encode an RGB or grayscale image into a YUV planar image.  <a href="#ga0fa4e7b1943687c6a0c0304529c55d35"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e">tjInitDecompress</a> (void)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a TurboJPEG decompressor instance.  <a href="#gae5408179d041e2a2f7199c8283cf649e"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gac5675fceb7997b385516cdffdb34e6aa">tjDecompressHeader2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Retrieve information about a JPEG image without decompressing it.  <a href="#gac5675fceb7997b385516cdffdb34e6aa"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="structtjscalingfactor.html">tjscalingfactor</a> *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8">tjGetScalingFactors</a> (int *numscalingfactors)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports.  <a href="#ga6449044b9af402999ccf52f401333be8"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9">tjDecompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Decompress a JPEG image to an RGB or grayscale image.  <a href="#gada69cc6443d1bb493b40f1626259e5e9"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gad7810af095624a4016e72957a50f77d8">tjDecompressToYUV</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int flags)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Decompress a JPEG image to a YUV planar image.  <a href="#gad7810af095624a4016e72957a50f77d8"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902">tjInitTransform</a> (void)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a new TurboJPEG transformer instance.  <a href="#ga3155b775bfbac9dbba869b95a0367902"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616">tjTransform</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, <a class="el" href="structtjtransform.html">tjtransform</a> *transforms, int flags)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Losslessly transform a JPEG image into another JPEG image.  <a href="#gae403193ceb4aafb7e0f56ab587b48616"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540">tjDestroy</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Destroy a TurboJPEG compressor, decompressor, or transformer instance.  <a href="#ga674adee917b95ad4a896f1ba39e12540"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned char *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff">tjAlloc</a> (int bytes)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Allocate an image buffer for use with TurboJPEG.  <a href="#ga5c9234bda6d993cdaffdd89bf81a00ff"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT void DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137">tjFree</a> (unsigned char *buffer)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Free an image buffer previously allocated by TurboJPEG.  <a href="#ga8c4a1231dc06a450514c835f6471f137"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">DLLEXPORT char *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf">tjGetErrorStr</a> (void)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns a descriptive error message explaining why the last command failed.  <a href="#ga9af79c908ec131b1ae8d52fe40375abf"></a><br/></td></tr>
-<tr><td colspan="2"><h2><a name="var-members"></a>
+<tr class="memitem:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a">tjInitCompress</a> (void)</td></tr>
+<tr class="memdesc:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a TurboJPEG compressor instance.  <a href="#ga3d10c47fbe4a2489a2b30c931551d01a">More...</a><br/></td></tr>
+<tr class="separator:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaba62b7a98f960839b588579898495cf2"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2">tjCompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)</td></tr>
+<tr class="memdesc:gaba62b7a98f960839b588579898495cf2"><td class="mdescLeft">&#160;</td><td class="mdescRight">Compress an RGB or grayscale image into a JPEG image.  <a href="#gaba62b7a98f960839b588579898495cf2">More...</a><br/></td></tr>
+<tr class="separator:gaba62b7a98f960839b588579898495cf2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b">tjBufSize</a> (int width, int height, int jpegSubsamp)</td></tr>
+<tr class="memdesc:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="mdescLeft">&#160;</td><td class="mdescRight">The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters.  <a href="#gaccc5bca7f12fcdcc302e6e1c6d4b311b">More...</a><br/></td></tr>
+<tr class="separator:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga9d0cb06fd5052d21b6f2b382db8b219c"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c">tjBufSizeYUV</a> (int width, int height, int subsamp)</td></tr>
+<tr class="memdesc:ga9d0cb06fd5052d21b6f2b382db8b219c"><td class="mdescLeft">&#160;</td><td class="mdescRight">The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters.  <a href="#ga9d0cb06fd5052d21b6f2b382db8b219c">More...</a><br/></td></tr>
+<tr class="separator:ga9d0cb06fd5052d21b6f2b382db8b219c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga0fa4e7b1943687c6a0c0304529c55d35"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35">tjEncodeYUV2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int subsamp, int flags)</td></tr>
+<tr class="memdesc:ga0fa4e7b1943687c6a0c0304529c55d35"><td class="mdescLeft">&#160;</td><td class="mdescRight">Encode an RGB or grayscale image into a YUV planar image.  <a href="#ga0fa4e7b1943687c6a0c0304529c55d35">More...</a><br/></td></tr>
+<tr class="separator:ga0fa4e7b1943687c6a0c0304529c55d35"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gae5408179d041e2a2f7199c8283cf649e"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e">tjInitDecompress</a> (void)</td></tr>
+<tr class="memdesc:gae5408179d041e2a2f7199c8283cf649e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a TurboJPEG decompressor instance.  <a href="#gae5408179d041e2a2f7199c8283cf649e">More...</a><br/></td></tr>
+<tr class="separator:gae5408179d041e2a2f7199c8283cf649e"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gac5675fceb7997b385516cdffdb34e6aa"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gac5675fceb7997b385516cdffdb34e6aa">tjDecompressHeader2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp)</td></tr>
+<tr class="memdesc:gac5675fceb7997b385516cdffdb34e6aa"><td class="mdescLeft">&#160;</td><td class="mdescRight">Retrieve information about a JPEG image without decompressing it.  <a href="#gac5675fceb7997b385516cdffdb34e6aa">More...</a><br/></td></tr>
+<tr class="separator:gac5675fceb7997b385516cdffdb34e6aa"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga6449044b9af402999ccf52f401333be8"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="structtjscalingfactor.html">tjscalingfactor</a> *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8">tjGetScalingFactors</a> (int *numscalingfactors)</td></tr>
+<tr class="memdesc:ga6449044b9af402999ccf52f401333be8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports.  <a href="#ga6449044b9af402999ccf52f401333be8">More...</a><br/></td></tr>
+<tr class="separator:ga6449044b9af402999ccf52f401333be8"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gada69cc6443d1bb493b40f1626259e5e9"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9">tjDecompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)</td></tr>
+<tr class="memdesc:gada69cc6443d1bb493b40f1626259e5e9"><td class="mdescLeft">&#160;</td><td class="mdescRight">Decompress a JPEG image to an RGB or grayscale image.  <a href="#gada69cc6443d1bb493b40f1626259e5e9">More...</a><br/></td></tr>
+<tr class="separator:gada69cc6443d1bb493b40f1626259e5e9"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gad7810af095624a4016e72957a50f77d8"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gad7810af095624a4016e72957a50f77d8">tjDecompressToYUV</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int flags)</td></tr>
+<tr class="memdesc:gad7810af095624a4016e72957a50f77d8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Decompress a JPEG image to a YUV planar image.  <a href="#gad7810af095624a4016e72957a50f77d8">More...</a><br/></td></tr>
+<tr class="separator:gad7810af095624a4016e72957a50f77d8"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga3155b775bfbac9dbba869b95a0367902"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902">tjInitTransform</a> (void)</td></tr>
+<tr class="memdesc:ga3155b775bfbac9dbba869b95a0367902"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a new TurboJPEG transformer instance.  <a href="#ga3155b775bfbac9dbba869b95a0367902">More...</a><br/></td></tr>
+<tr class="separator:ga3155b775bfbac9dbba869b95a0367902"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gae403193ceb4aafb7e0f56ab587b48616"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616">tjTransform</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, <a class="el" href="structtjtransform.html">tjtransform</a> *transforms, int flags)</td></tr>
+<tr class="memdesc:gae403193ceb4aafb7e0f56ab587b48616"><td class="mdescLeft">&#160;</td><td class="mdescRight">Losslessly transform a JPEG image into another JPEG image.  <a href="#gae403193ceb4aafb7e0f56ab587b48616">More...</a><br/></td></tr>
+<tr class="separator:gae403193ceb4aafb7e0f56ab587b48616"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga674adee917b95ad4a896f1ba39e12540"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540">tjDestroy</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle)</td></tr>
+<tr class="memdesc:ga674adee917b95ad4a896f1ba39e12540"><td class="mdescLeft">&#160;</td><td class="mdescRight">Destroy a TurboJPEG compressor, decompressor, or transformer instance.  <a href="#ga674adee917b95ad4a896f1ba39e12540">More...</a><br/></td></tr>
+<tr class="separator:ga674adee917b95ad4a896f1ba39e12540"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga5c9234bda6d993cdaffdd89bf81a00ff"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned char *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff">tjAlloc</a> (int bytes)</td></tr>
+<tr class="memdesc:ga5c9234bda6d993cdaffdd89bf81a00ff"><td class="mdescLeft">&#160;</td><td class="mdescRight">Allocate an image buffer for use with TurboJPEG.  <a href="#ga5c9234bda6d993cdaffdd89bf81a00ff">More...</a><br/></td></tr>
+<tr class="separator:ga5c9234bda6d993cdaffdd89bf81a00ff"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga8c4a1231dc06a450514c835f6471f137"><td class="memItemLeft" align="right" valign="top">DLLEXPORT void DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137">tjFree</a> (unsigned char *buffer)</td></tr>
+<tr class="memdesc:ga8c4a1231dc06a450514c835f6471f137"><td class="mdescLeft">&#160;</td><td class="mdescRight">Free an image buffer previously allocated by TurboJPEG.  <a href="#ga8c4a1231dc06a450514c835f6471f137">More...</a><br/></td></tr>
+<tr class="separator:ga8c4a1231dc06a450514c835f6471f137"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga9af79c908ec131b1ae8d52fe40375abf"><td class="memItemLeft" align="right" valign="top">DLLEXPORT char *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf">tjGetErrorStr</a> (void)</td></tr>
+<tr class="memdesc:ga9af79c908ec131b1ae8d52fe40375abf"><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns a descriptive error message explaining why the last command failed.  <a href="#ga9af79c908ec131b1ae8d52fe40375abf">More...</a><br/></td></tr>
+<tr class="separator:ga9af79c908ec131b1ae8d52fe40375abf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="var-members"></a>
 Variables</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c">tjMCUWidth</a> [TJ_NUMSAMP]</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">MCU block width (in pixels) for a given level of chrominance subsampling.  <a href="#ga9e61e7cd47a15a173283ba94e781308c"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf">tjMCUHeight</a> [TJ_NUMSAMP]</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">MCU block height (in pixels) for a given level of chrominance subsampling.  <a href="#gabd247bb9fecb393eca57366feb8327bf"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8">tjRedOffset</a> [TJ_NUMPF]</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Red offset (in bytes) for a given pixel format.  <a href="#gadd9b446742ac8a3923f7992c7988fea8"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f">tjGreenOffset</a> [TJ_NUMPF]</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Green offset (in bytes) for a given pixel format.  <a href="#ga82d6e35da441112a411da41923c0ba2f"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea">tjBlueOffset</a> [TJ_NUMPF]</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Blue offset (in bytes) for a given pixel format.  <a href="#ga84e2e35d3f08025f976ec1ec53693dea"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c">tjPixelSize</a> [TJ_NUMPF]</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Pixel size (in bytes) for a given pixel format.  <a href="#gad77cf8fe5b2bfd3cb3f53098146abb4c"></a><br/></td></tr>
+<tr class="memitem:ga9e61e7cd47a15a173283ba94e781308c"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c">tjMCUWidth</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td></tr>
+<tr class="memdesc:ga9e61e7cd47a15a173283ba94e781308c"><td class="mdescLeft">&#160;</td><td class="mdescRight">MCU block width (in pixels) for a given level of chrominance subsampling.  <a href="#ga9e61e7cd47a15a173283ba94e781308c">More...</a><br/></td></tr>
+<tr class="separator:ga9e61e7cd47a15a173283ba94e781308c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gabd247bb9fecb393eca57366feb8327bf"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf">tjMCUHeight</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td></tr>
+<tr class="memdesc:gabd247bb9fecb393eca57366feb8327bf"><td class="mdescLeft">&#160;</td><td class="mdescRight">MCU block height (in pixels) for a given level of chrominance subsampling.  <a href="#gabd247bb9fecb393eca57366feb8327bf">More...</a><br/></td></tr>
+<tr class="separator:gabd247bb9fecb393eca57366feb8327bf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gadd9b446742ac8a3923f7992c7988fea8"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8">tjRedOffset</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:gadd9b446742ac8a3923f7992c7988fea8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Red offset (in bytes) for a given pixel format.  <a href="#gadd9b446742ac8a3923f7992c7988fea8">More...</a><br/></td></tr>
+<tr class="separator:gadd9b446742ac8a3923f7992c7988fea8"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga82d6e35da441112a411da41923c0ba2f"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f">tjGreenOffset</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:ga82d6e35da441112a411da41923c0ba2f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Green offset (in bytes) for a given pixel format.  <a href="#ga82d6e35da441112a411da41923c0ba2f">More...</a><br/></td></tr>
+<tr class="separator:ga82d6e35da441112a411da41923c0ba2f"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga84e2e35d3f08025f976ec1ec53693dea"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea">tjBlueOffset</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:ga84e2e35d3f08025f976ec1ec53693dea"><td class="mdescLeft">&#160;</td><td class="mdescRight">Blue offset (in bytes) for a given pixel format.  <a href="#ga84e2e35d3f08025f976ec1ec53693dea">More...</a><br/></td></tr>
+<tr class="separator:ga84e2e35d3f08025f976ec1ec53693dea"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gad77cf8fe5b2bfd3cb3f53098146abb4c"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c">tjPixelSize</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:gad77cf8fe5b2bfd3cb3f53098146abb4c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Pixel size (in bytes) for a given pixel format.  <a href="#gad77cf8fe5b2bfd3cb3f53098146abb4c">More...</a><br/></td></tr>
+<tr class="separator:gad77cf8fe5b2bfd3cb3f53098146abb4c"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
-<hr/><a name="details" id="details"></a><h2>Detailed Description</h2>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
 <p>TurboJPEG API. </p>
 <p>This API provides an interface for generating, decoding, and transforming planar YUV and JPEG images in memory. </p>
-<hr/><h2>Define Documentation</h2>
-<a class="anchor" id="ga7010a4402f54a45ba822ad8675a4655e"></a><!-- doxytag: member="turbojpeg.h::TJ_NUMPF" ref="ga7010a4402f54a45ba822ad8675a4655e" args="" -->
+<h2 class="groupheader">Macro Definition Documentation</h2>
+<a class="anchor" id="ga7010a4402f54a45ba822ad8675a4655e"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -222,14 +301,13 @@
           <td class="memname">#define TJ_NUMPF</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The number of pixel formats. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga5ef3d169162ce77ce348e292a0b7477c"></a><!-- doxytag: member="turbojpeg.h::TJ_NUMSAMP" ref="ga5ef3d169162ce77ce348e292a0b7477c" args="" -->
+<a class="anchor" id="ga5ef3d169162ce77ce348e292a0b7477c"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -237,14 +315,13 @@
           <td class="memname">#define TJ_NUMSAMP</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The number of chrominance subsampling options. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga0f6dbd18adf38b7d46ac547f0f4d562c"></a><!-- doxytag: member="turbojpeg.h::TJ_NUMXOP" ref="ga0f6dbd18adf38b7d46ac547f0f4d562c" args="" -->
+<a class="anchor" id="ga0f6dbd18adf38b7d46ac547f0f4d562c"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -252,14 +329,13 @@
           <td class="memname">#define TJ_NUMXOP</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
-<p>Number of transform operations. </p>
+<p>The number of transform operations. </p>
 
 </div>
 </div>
-<a class="anchor" id="gacb233cfd722d66d1ccbf48a7de81f0e0"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_ACCURATEDCT" ref="gacb233cfd722d66d1ccbf48a7de81f0e0" args="" -->
+<a class="anchor" id="gacb233cfd722d66d1ccbf48a7de81f0e0"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -267,15 +343,14 @@
           <td class="memname">#define TJFLAG_ACCURATEDCT</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Use the most accurate DCT/IDCT algorithm available in the underlying codec. </p>
-<p>The default if this flag is not specified is implementation-specific. The libjpeg implementation, for example, uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect. </p>
+<p>The default if this flag is not specified is implementation-specific. For example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga72ecf4ebe6eb702d3c6f5ca27455e1ec"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_BOTTOMUP" ref="ga72ecf4ebe6eb702d3c6f5ca27455e1ec" args="" -->
+<a class="anchor" id="ga72ecf4ebe6eb702d3c6f5ca27455e1ec"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -283,14 +358,13 @@
           <td class="memname">#define TJFLAG_BOTTOMUP</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order. </p>
 
 </div>
 </div>
-<a class="anchor" id="gaabce235db80d3f698b27f36cbd453da2"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_FASTDCT" ref="gaabce235db80d3f698b27f36cbd453da2" args="" -->
+<a class="anchor" id="gaabce235db80d3f698b27f36cbd453da2"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -298,15 +372,14 @@
           <td class="memname">#define TJFLAG_FASTDCT</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Use the fastest DCT/IDCT algorithm available in the underlying codec. </p>
-<p>The default if this flag is not specified is implementation-specific. The libjpeg implementation, for example, uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect. </p>
+<p>The default if this flag is not specified is implementation-specific. For example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga4ee4506c81177a06f77e2504a22efd2d"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_FASTUPSAMPLE" ref="ga4ee4506c81177a06f77e2504a22efd2d" args="" -->
+<a class="anchor" id="ga4ee4506c81177a06f77e2504a22efd2d"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -314,15 +387,14 @@
           <td class="memname">#define TJFLAG_FASTUPSAMPLE</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
-<p>When decompressing, use the fastest chrominance upsampling algorithm available in the underlying codec. </p>
+<p>When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available in the underlying codec. </p>
 <p>The default is to use smooth upsampling, which creates a smooth transition between neighboring chrominance components in order to reduce upsampling artifacts in the decompressed image. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga4e872f11c82f241736fa8297920f24e5"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_FORCEMMX" ref="ga4e872f11c82f241736fa8297920f24e5" args="" -->
+<a class="anchor" id="ga4e872f11c82f241736fa8297920f24e5"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -330,14 +402,13 @@
           <td class="memname">#define TJFLAG_FORCEMMX</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the underlying codec supports it.) </p>
 
 </div>
 </div>
-<a class="anchor" id="gae17e63189e8cd730feed3efbd2454f38"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_FORCESSE" ref="gae17e63189e8cd730feed3efbd2454f38" args="" -->
+<a class="anchor" id="gae17e63189e8cd730feed3efbd2454f38"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -345,14 +416,13 @@
           <td class="memname">#define TJFLAG_FORCESSE</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the underlying codec supports it.) </p>
 
 </div>
 </div>
-<a class="anchor" id="ga8cf0bca96ea4d472563f4b0ebf8c48e7"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_FORCESSE2" ref="ga8cf0bca96ea4d472563f4b0ebf8c48e7" args="" -->
+<a class="anchor" id="ga8cf0bca96ea4d472563f4b0ebf8c48e7"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -360,14 +430,13 @@
           <td class="memname">#define TJFLAG_FORCESSE2</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the underlying codec supports it.) </p>
 
 </div>
 </div>
-<a class="anchor" id="gaf9d49066633404da4386d70820295dd2"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_FORCESSE3" ref="gaf9d49066633404da4386d70820295dd2" args="" -->
+<a class="anchor" id="gaf9d49066633404da4386d70820295dd2"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -375,14 +444,13 @@
           <td class="memname">#define TJFLAG_FORCESSE3</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the underlying codec supports it.) </p>
 
 </div>
 </div>
-<a class="anchor" id="ga8808d403c68b62aaa58a4c1e58e98963"></a><!-- doxytag: member="turbojpeg.h::TJFLAG_NOREALLOC" ref="ga8808d403c68b62aaa58a4c1e58e98963" args="" -->
+<a class="anchor" id="ga8808d403c68b62aaa58a4c1e58e98963"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -390,15 +458,14 @@
           <td class="memname">#define TJFLAG_NOREALLOC</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Disable buffer (re)allocation. </p>
 <p>If passed to <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB or grayscale image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>, this flag will cause those functions to generate an error if the JPEG image buffer is invalid or too small rather than attempting to allocate or reallocate that buffer. This reproduces the behavior of earlier versions of TurboJPEG. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga0aba955473315e405295d978f0c16511"></a><!-- doxytag: member="turbojpeg.h::TJPAD" ref="ga0aba955473315e405295d978f0c16511" args="(width)" -->
+<a class="anchor" id="ga0aba955473315e405295d978f0c16511"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -410,14 +477,13 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Pad the given width to the nearest 32-bit boundary. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga84878bb65404204743aa18cac02781df"></a><!-- doxytag: member="turbojpeg.h::TJSCALED" ref="ga84878bb65404204743aa18cac02781df" args="(dimension, scalingFactor)" -->
+<a class="anchor" id="ga84878bb65404204743aa18cac02781df"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -439,15 +505,14 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Compute the scaled value of <code>dimension</code> using the given scaling factor. </p>
 <p>This macro performs the integer equivalent of <code>ceil(dimension * scalingFactor)</code>. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga9c771a757fc1294add611906b89ab2d2"></a><!-- doxytag: member="turbojpeg.h::TJXOPT_CROP" ref="ga9c771a757fc1294add611906b89ab2d2" args="" -->
+<a class="anchor" id="ga9c771a757fc1294add611906b89ab2d2"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -455,15 +520,14 @@
           <td class="memname">#define TJXOPT_CROP</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>This option will enable lossless cropping. </p>
 <p>See <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> for more information. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga3acee7b48ade1b99e5588736007c2589"></a><!-- doxytag: member="turbojpeg.h::TJXOPT_GRAY" ref="ga3acee7b48ade1b99e5588736007c2589" args="" -->
+<a class="anchor" id="ga3acee7b48ade1b99e5588736007c2589"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -471,14 +535,13 @@
           <td class="memname">#define TJXOPT_GRAY</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>This option will discard the color data in the input image and produce a grayscale output image. </p>
 
 </div>
 </div>
-<a class="anchor" id="gafbf992bbf6e006705886333703ffab31"></a><!-- doxytag: member="turbojpeg.h::TJXOPT_NOOUTPUT" ref="gafbf992bbf6e006705886333703ffab31" args="" -->
+<a class="anchor" id="gafbf992bbf6e006705886333703ffab31"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -486,14 +549,13 @@
           <td class="memname">#define TJXOPT_NOOUTPUT</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>This option will prevent <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.) </p>
 
 </div>
 </div>
-<a class="anchor" id="ga50e03cb5ed115330e212417429600b00"></a><!-- doxytag: member="turbojpeg.h::TJXOPT_PERFECT" ref="ga50e03cb5ed115330e212417429600b00" args="" -->
+<a class="anchor" id="ga50e03cb5ed115330e212417429600b00"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -501,15 +563,14 @@
           <td class="memname">#define TJXOPT_PERFECT</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to return an error if the transform is not perfect. </p>
 <p>Lossless transforms operate on MCU blocks, whose size depends on the level of chrominance subsampling used (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>.) If the image's width or height is not evenly divisible by the MCU block size, then there will be partial MCU blocks on the right and/or bottom edges. It is not possible to move these partial MCU blocks to the top or left of the image, so any transform that would require that is "imperfect." If this option is not specified, then any partial MCU blocks that cannot be transformed will be left in place, which will create odd-looking strips on the right or bottom edge of the image. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga319826b7eb1583c0595bbe7b95428709"></a><!-- doxytag: member="turbojpeg.h::TJXOPT_TRIM" ref="ga319826b7eb1583c0595bbe7b95428709" args="" -->
+<a class="anchor" id="ga319826b7eb1583c0595bbe7b95428709"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -517,15 +578,14 @@
           <td class="memname">#define TJXOPT_TRIM</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to discard any partial MCU blocks that cannot be transformed. </p>
 
 </div>
 </div>
-<hr/><h2>Typedef Documentation</h2>
-<a class="anchor" id="ga758d2634ecb4949de7815cba621f5763"></a><!-- doxytag: member="turbojpeg.h::tjhandle" ref="ga758d2634ecb4949de7815cba621f5763" args="" -->
+<h2 class="groupheader">Typedef Documentation</h2>
+<a class="anchor" id="ga758d2634ecb4949de7815cba621f5763"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -533,14 +593,13 @@
           <td class="memname">typedef void* <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>TurboJPEG instance handle. </p>
 
 </div>
 </div>
-<a class="anchor" id="gaa29f3189c41be12ec5dee7caec318a31"></a><!-- doxytag: member="turbojpeg.h::tjtransform" ref="gaa29f3189c41be12ec5dee7caec318a31" args="" -->
+<a class="anchor" id="gaa29f3189c41be12ec5dee7caec318a31"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -548,15 +607,14 @@
           <td class="memname">typedef struct <a class="el" href="structtjtransform.html">tjtransform</a>  <a class="el" href="structtjtransform.html">tjtransform</a></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Lossless transform. </p>
 
 </div>
 </div>
-<hr/><h2>Enumeration Type Documentation</h2>
-<a class="anchor" id="gac916144e26c3817ac514e64ae5d12e2a"></a><!-- doxytag: member="turbojpeg.h::TJPF" ref="gac916144e26c3817ac514e64ae5d12e2a" args="" -->
+<h2 class="groupheader">Enumeration Type Documentation</h2>
+<a class="anchor" id="gac916144e26c3817ac514e64ae5d12e2a"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -564,62 +622,59 @@
           <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">TJPF</a></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Pixel formats. </p>
-<dl><dt><b>Enumerator: </b></dt><dd><table border="0" cellspacing="2" cellpadding="0">
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c"></a><!-- doxytag: member="TJPF_RGB" ref="ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c" args="" -->TJPF_RGB</em>&nbsp;</td><td>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c"></a>TJPF_RGB</em>&nbsp;</td><td class="fielddoc">
 <p>RGB pixel format. </p>
 <p>The red, green, and blue components in the image are stored in 3-byte pixels in the order R, G, B from lowest to highest byte address within each pixel. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839"></a><!-- doxytag: member="TJPF_BGR" ref="ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839" args="" -->TJPF_BGR</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839"></a>TJPF_BGR</em>&nbsp;</td><td class="fielddoc">
 <p>BGR pixel format. </p>
 <p>The red, green, and blue components in the image are stored in 3-byte pixels in the order B, G, R from lowest to highest byte address within each pixel. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01"></a><!-- doxytag: member="TJPF_RGBX" ref="ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01" args="" -->TJPF_RGBX</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01"></a>TJPF_RGBX</em>&nbsp;</td><td class="fielddoc">
 <p>RGBX pixel format. </p>
 <p>The red, green, and blue components in the image are stored in 4-byte pixels in the order R, G, B from lowest to highest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8"></a><!-- doxytag: member="TJPF_BGRX" ref="ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8" args="" -->TJPF_BGRX</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8"></a>TJPF_BGRX</em>&nbsp;</td><td class="fielddoc">
 <p>BGRX pixel format. </p>
 <p>The red, green, and blue components in the image are stored in 4-byte pixels in the order B, G, R from lowest to highest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af"></a><!-- doxytag: member="TJPF_XBGR" ref="ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af" args="" -->TJPF_XBGR</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af"></a>TJPF_XBGR</em>&nbsp;</td><td class="fielddoc">
 <p>XBGR pixel format. </p>
 <p>The red, green, and blue components in the image are stored in 4-byte pixels in the order R, G, B from highest to lowest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84"></a><!-- doxytag: member="TJPF_XRGB" ref="ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84" args="" -->TJPF_XRGB</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84"></a>TJPF_XRGB</em>&nbsp;</td><td class="fielddoc">
 <p>XRGB pixel format. </p>
 <p>The red, green, and blue components in the image are stored in 4-byte pixels in the order B, G, R from highest to lowest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a"></a><!-- doxytag: member="TJPF_GRAY" ref="ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a" args="" -->TJPF_GRAY</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a"></a>TJPF_GRAY</em>&nbsp;</td><td class="fielddoc">
 <p>Grayscale pixel format. </p>
 <p>Each 1-byte pixel represents a luminance (brightness) level from 0 to 255. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12"></a><!-- doxytag: member="TJPF_RGBA" ref="ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12" args="" -->TJPF_RGBA</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12"></a>TJPF_RGBA</em>&nbsp;</td><td class="fielddoc">
 <p>RGBA pixel format. </p>
 <p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01">TJPF_RGBX</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4"></a><!-- doxytag: member="TJPF_BGRA" ref="ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4" args="" -->TJPF_BGRA</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4"></a>TJPF_BGRA</em>&nbsp;</td><td class="fielddoc">
 <p>BGRA pixel format. </p>
 <p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8">TJPF_BGRX</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081"></a><!-- doxytag: member="TJPF_ABGR" ref="ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081" args="" -->TJPF_ABGR</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081"></a>TJPF_ABGR</em>&nbsp;</td><td class="fielddoc">
 <p>ABGR pixel format. </p>
 <p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af">TJPF_XBGR</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c"></a><!-- doxytag: member="TJPF_ARGB" ref="ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c" args="" -->TJPF_ARGB</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c"></a>TJPF_ARGB</em>&nbsp;</td><td class="fielddoc">
 <p>ARGB pixel format. </p>
 <p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84">TJPF_XRGB</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
 </td></tr>
 </table>
-</dd>
-</dl>
 
 </div>
 </div>
-<a class="anchor" id="ga1d047060ea80bb9820d540bb928e9074"></a><!-- doxytag: member="turbojpeg.h::TJSAMP" ref="ga1d047060ea80bb9820d540bb928e9074" args="" -->
+<a class="anchor" id="ga1d047060ea80bb9820d540bb928e9074"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -627,39 +682,38 @@
           <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">TJSAMP</a></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Chrominance subsampling options. </p>
 <p>When an image is converted from the RGB to the YCbCr colorspace as part of the JPEG compression process, some of the Cb and Cr (chrominance) components can be discarded or averaged together to produce a smaller image with little perceptible loss of image clarity (the human eye is more sensitive to small changes in brightness than small changes in color.) This is called "chrominance subsampling". </p>
-<dl><dt><b>Enumerator: </b></dt><dd><table border="0" cellspacing="2" cellpadding="0">
-<tr><td valign="top"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3"></a><!-- doxytag: member="TJSAMP_444" ref="gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3" args="" -->TJSAMP_444</em>&nbsp;</td><td>
+<dl class="section note"><dt>Note</dt><dd>Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes. </dd></dl>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3"></a>TJSAMP_444</em>&nbsp;</td><td class="fielddoc">
 <p>4:4:4 chrominance subsampling (no chrominance subsampling). </p>
 <p>The JPEG or YUV image will contain one chrominance component for every pixel in the source image. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404"></a><!-- doxytag: member="TJSAMP_422" ref="gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404" args="" -->TJSAMP_422</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404"></a>TJSAMP_422</em>&nbsp;</td><td class="fielddoc">
 <p>4:2:2 chrominance subsampling. </p>
 <p>The JPEG or YUV image will contain one chrominance component for every 2x1 block of pixels in the source image. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737"></a><!-- doxytag: member="TJSAMP_420" ref="gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737" args="" -->TJSAMP_420</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737"></a>TJSAMP_420</em>&nbsp;</td><td class="fielddoc">
 <p>4:2:0 chrominance subsampling. </p>
 <p>The JPEG or YUV image will contain one chrominance component for every 2x2 block of pixels in the source image. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248"></a><!-- doxytag: member="TJSAMP_GRAY" ref="gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248" args="" -->TJSAMP_GRAY</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248"></a>TJSAMP_GRAY</em>&nbsp;</td><td class="fielddoc">
 <p>Grayscale. </p>
 <p>The JPEG or YUV image will contain no chrominance components. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974"></a><!-- doxytag: member="TJSAMP_440" ref="gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974" args="" -->TJSAMP_440</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974"></a>TJSAMP_440</em>&nbsp;</td><td class="fielddoc">
 <p>4:4:0 chrominance subsampling. </p>
 <p>The JPEG or YUV image will contain one chrominance component for every 1x2 block of pixels in the source image. </p>
+<dl class="section note"><dt>Note</dt><dd>4:4:0 subsampling is not fully accelerated in libjpeg-turbo. </dd></dl>
 </td></tr>
 </table>
-</dd>
-</dl>
 
 </div>
 </div>
-<a class="anchor" id="ga2de531af4e7e6c4f124908376b354866"></a><!-- doxytag: member="turbojpeg.h::TJXOP" ref="ga2de531af4e7e6c4f124908376b354866" args="" -->
+<a class="anchor" id="ga2de531af4e7e6c4f124908376b354866"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -667,48 +721,45 @@
           <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">TJXOP</a></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Transform operations for <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> </p>
-<dl><dt><b>Enumerator: </b></dt><dd><table border="0" cellspacing="2" cellpadding="0">
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27"></a><!-- doxytag: member="TJXOP_NONE" ref="gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27" args="" -->TJXOP_NONE</em>&nbsp;</td><td>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27"></a>TJXOP_NONE</em>&nbsp;</td><td class="fielddoc">
 <p>Do not transform the position of the image pixels. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce"></a><!-- doxytag: member="TJXOP_HFLIP" ref="gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce" args="" -->TJXOP_HFLIP</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce"></a>TJXOP_HFLIP</em>&nbsp;</td><td class="fielddoc">
 <p>Flip (mirror) image horizontally. </p>
 <p>This transform is imperfect if there are any partial MCU blocks on the right edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d"></a><!-- doxytag: member="TJXOP_VFLIP" ref="gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d" args="" -->TJXOP_VFLIP</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d"></a>TJXOP_VFLIP</em>&nbsp;</td><td class="fielddoc">
 <p>Flip (mirror) image vertically. </p>
 <p>This transform is imperfect if there are any partial MCU blocks on the bottom edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d"></a><!-- doxytag: member="TJXOP_TRANSPOSE" ref="gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d" args="" -->TJXOP_TRANSPOSE</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d"></a>TJXOP_TRANSPOSE</em>&nbsp;</td><td class="fielddoc">
 <p>Transpose image (flip/mirror along upper left to lower right axis.) This transform is always perfect. </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4"></a><!-- doxytag: member="TJXOP_TRANSVERSE" ref="gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4" args="" -->TJXOP_TRANSVERSE</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4"></a>TJXOP_TRANSVERSE</em>&nbsp;</td><td class="fielddoc">
 <p>Transverse transpose image (flip/mirror along upper right to lower left axis.) This transform is imperfect if there are any partial MCU blocks in the image (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128"></a><!-- doxytag: member="TJXOP_ROT90" ref="gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128" args="" -->TJXOP_ROT90</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128"></a>TJXOP_ROT90</em>&nbsp;</td><td class="fielddoc">
 <p>Rotate image clockwise by 90 degrees. </p>
 <p>This transform is imperfect if there are any partial MCU blocks on the bottom edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692"></a><!-- doxytag: member="TJXOP_ROT180" ref="gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692" args="" -->TJXOP_ROT180</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692"></a>TJXOP_ROT180</em>&nbsp;</td><td class="fielddoc">
 <p>Rotate image 180 degrees. </p>
 <p>This transform is imperfect if there are any partial MCU blocks in the image (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
 </td></tr>
-<tr><td valign="top"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08"></a><!-- doxytag: member="TJXOP_ROT270" ref="gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08" args="" -->TJXOP_ROT270</em>&nbsp;</td><td>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08"></a>TJXOP_ROT270</em>&nbsp;</td><td class="fielddoc">
 <p>Rotate image counter-clockwise by 90 degrees. </p>
 <p>This transform is imperfect if there are any partial MCU blocks on the right edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
 </td></tr>
 </table>
-</dd>
-</dl>
 
 </div>
 </div>
-<hr/><h2>Function Documentation</h2>
-<a class="anchor" id="ga5c9234bda6d993cdaffdd89bf81a00ff"></a><!-- doxytag: member="turbojpeg.h::tjAlloc" ref="ga5c9234bda6d993cdaffdd89bf81a00ff" args="(int bytes)" -->
+<h2 class="groupheader">Function Documentation</h2>
+<a class="anchor" id="ga5c9234bda6d993cdaffdd89bf81a00ff"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -720,23 +771,22 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Allocate an image buffer for use with TurboJPEG. </p>
 <p>You should always use this function to allocate the JPEG destination buffer(s) for <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB or grayscale image into a JPEG image.">tjCompress2()</a> and <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> unless you are disabling automatic buffer (re)allocation (by setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>.)</p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">bytes</td><td>the number of bytes to allocate</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>a pointer to a newly-allocated buffer with the specified number of bytes</dd></dl>
-<dl class="see"><dt><b>See also:</b></dt><dd><a class="el" href="group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137" title="Free an image buffer previously allocated by TurboJPEG.">tjFree()</a> </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>a pointer to a newly-allocated buffer with the specified number of bytes</dd></dl>
+<dl class="section see"><dt>See Also</dt><dd><a class="el" href="group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137" title="Free an image buffer previously allocated by TurboJPEG.">tjFree()</a> </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="gaccc5bca7f12fcdcc302e6e1c6d4b311b"></a><!-- doxytag: member="turbojpeg.h::tjBufSize" ref="gaccc5bca7f12fcdcc302e6e1c6d4b311b" args="(int width, int height, int jpegSubsamp)" -->
+<a class="anchor" id="gaccc5bca7f12fcdcc302e6e1c6d4b311b"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -764,12 +814,11 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters. </p>
-<p>The number of bytes returned by this function is larger than the size of the uncompressed source image. The reason for this is that the JPEG format uses 16-bit coefficients, and it is thus possible for a very high-quality JPEG image with very high frequency content to expand rather than compress when converted to the JPEG format. Such images represent a very rare corner case, but since there is no way to predict the size of a JPEG image prior to compression, the corner case has to be handled.</p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<p>The number of bytes returned by this function is larger than the size of the uncompressed source image. The reason for this is that the JPEG format uses 16-bit coefficients, and it is thus possible for a very high-quality JPEG image with very high-frequency content to expand rather than compress when converted to the JPEG format. Such images represent a very rare corner case, but since there is no way to predict the size of a JPEG image prior to compression, the corner case has to be handled.</p>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">width</td><td>width of the image (in pixels) </td></tr>
     <tr><td class="paramname">height</td><td>height of the image (in pixels) </td></tr>
@@ -777,11 +826,11 @@
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>the maximum size of the buffer (in bytes) required to hold the image, or -1 if the arguments are out of bounds. </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>the maximum size of the buffer (in bytes) required to hold the image, or -1 if the arguments are out of bounds. </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga9d0cb06fd5052d21b6f2b382db8b219c"></a><!-- doxytag: member="turbojpeg.h::tjBufSizeYUV" ref="ga9d0cb06fd5052d21b6f2b382db8b219c" args="(int width, int height, int subsamp)" -->
+<a class="anchor" id="ga9d0cb06fd5052d21b6f2b382db8b219c"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -809,11 +858,10 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters. </p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">width</td><td>width of the image (in pixels) </td></tr>
     <tr><td class="paramname">height</td><td>height of the image (in pixels) </td></tr>
@@ -821,11 +869,11 @@
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>the size of the buffer (in bytes) required to hold the image, or -1 if the arguments are out of bounds. </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>the size of the buffer (in bytes) required to hold the image, or -1 if the arguments are out of bounds. </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="gaba62b7a98f960839b588579898495cf2"></a><!-- doxytag: member="turbojpeg.h::tjCompress2" ref="gaba62b7a98f960839b588579898495cf2" args="(tjhandle handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)" -->
+<a class="anchor" id="gaba62b7a98f960839b588579898495cf2"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -901,11 +949,10 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Compress an RGB or grayscale image into a JPEG image. </p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor or transformer instance </td></tr>
     <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing RGB or grayscale pixels to be compressed </td></tr>
@@ -926,11 +973,11 @@
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="gada69cc6443d1bb493b40f1626259e5e9"></a><!-- doxytag: member="turbojpeg.h::tjDecompress2" ref="gada69cc6443d1bb493b40f1626259e5e9" args="(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)" -->
+<a class="anchor" id="gada69cc6443d1bb493b40f1626259e5e9"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -994,29 +1041,28 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Decompress a JPEG image to an RGB or grayscale image. </p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
     <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing the JPEG image to decompress </td></tr>
     <tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
-    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the decompressed image. This buffer should normally be <code>pitch * scaledHeight</code> bytes in size, where <code>scaledHeight</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image height and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>. The dstBuf pointer may also be used to decompress into a specific region of a larger buffer. </td></tr>
-    <tr><td class="paramname">width</td><td>desired width (in pixels) of the destination image. If this is smaller than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size. </td></tr>
+    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the decompressed image. This buffer should normally be <code>pitch * scaledHeight</code> bytes in size, where <code>scaledHeight</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image height and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>. The <code>dstBuf</code> pointer may also be used to decompress into a specific region of a larger buffer. </td></tr>
+    <tr><td class="paramname">width</td><td>desired width (in pixels) of the destination image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If <code>width</code> is set to 0, then only the height will be considered when determining the scaled image size. </td></tr>
     <tr><td class="paramname">pitch</td><td>bytes per line of the destination image. Normally, this is <code>scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the decompressed image is unpadded, else <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the decompressed image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. (NOTE: <code>scaledWidth</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image width and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>.) You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
-    <tr><td class="paramname">height</td><td>desired height (in pixels) of the destination image. If this is smaller than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size. </td></tr>
+    <tr><td class="paramname">height</td><td>desired height (in pixels) of the destination image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If <code>height</code> is set to 0, then only the width will be considered when determining the scaled image size. </td></tr>
     <tr><td class="paramname">pixelFormat</td><td>pixel format of the destination image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
     <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="gac5675fceb7997b385516cdffdb34e6aa"></a><!-- doxytag: member="turbojpeg.h::tjDecompressHeader2" ref="gac5675fceb7997b385516cdffdb34e6aa" args="(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp)" -->
+<a class="anchor" id="gac5675fceb7997b385516cdffdb34e6aa"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1062,11 +1108,10 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Retrieve information about a JPEG image without decompressing it. </p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
     <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing a JPEG image </td></tr>
@@ -1077,11 +1122,11 @@
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="gad7810af095624a4016e72957a50f77d8"></a><!-- doxytag: member="turbojpeg.h::tjDecompressToYUV" ref="gad7810af095624a4016e72957a50f77d8" args="(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int flags)" -->
+<a class="anchor" id="gad7810af095624a4016e72957a50f77d8"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1121,26 +1166,26 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Decompress a JPEG image to a YUV planar image. </p>
-<p>This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB image. The padding of the planes in this image is the same as the images generated by <a class="el" href="group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35" title="Encode an RGB or grayscale image into a YUV planar image.">tjEncodeYUV2()</a>. Note that, if the width or height of the image is not an even multiple of the MCU block size (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>), then an intermediate buffer copy will be performed within TurboJPEG.</p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<p>This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB image. The padding of the planes in this image is the same as in the images generated by <a class="el" href="group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35" title="Encode an RGB or grayscale image into a YUV planar image.">tjEncodeYUV2()</a>. If the width or height of the image is not an even multiple of the MCU block size (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>), then an intermediate buffer copy will be performed within TurboJPEG. </p>
+<dl class="section note"><dt>Note</dt><dd>Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</dd></dl>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
     <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing the JPEG image to decompress </td></tr>
     <tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
-    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV</a> to determine the appropriate size for this buffer based on the image width, height, and level of subsampling. </td></tr>
+    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV()</a> to determine the appropriate size for this buffer based on the image width, height, and level of subsampling. </td></tr>
     <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga674adee917b95ad4a896f1ba39e12540"></a><!-- doxytag: member="turbojpeg.h::tjDestroy" ref="ga674adee917b95ad4a896f1ba39e12540" args="(tjhandle handle)" -->
+<a class="anchor" id="ga674adee917b95ad4a896f1ba39e12540"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1152,21 +1197,20 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Destroy a TurboJPEG compressor, decompressor, or transformer instance. </p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor, decompressor or transformer instance</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga0fa4e7b1943687c6a0c0304529c55d35"></a><!-- doxytag: member="turbojpeg.h::tjEncodeYUV2" ref="ga0fa4e7b1943687c6a0c0304529c55d35" args="(tjhandle handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int subsamp, int flags)" -->
+<a class="anchor" id="ga0fa4e7b1943687c6a0c0304529c55d35"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1230,12 +1274,12 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Encode an RGB or grayscale image into a YUV planar image. </p>
-<p>This function uses the accelerated color conversion routines in TurboJPEG's underlying codec to produce a planar YUV image that is suitable for X Video. Specifically, if the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane is padded to 2 in the output image (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) Also, each line of each plane in the output image is padded to 4 bytes. Although this will work with any subsampling option, it is really only useful in combination with TJ_420, which produces an image compatible with the I420 (AKA "YUV420P") format.</p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<p>This function uses the accelerated color conversion routines in TurboJPEG's underlying codec to produce a planar YUV image that is suitable for X Video. Specifically, if the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane is padded to the nearest multiple of 2 in the output image (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) Also, each line of each plane in the output image is padded to 4 bytes. Although this will work with any subsampling option, it is really only useful in combination with TJ_420, which produces an image compatible with the I420 (AKA "YUV420P") format. </p>
+<dl class="section note"><dt>Note</dt><dd>Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</dd></dl>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor or transformer instance </td></tr>
     <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing RGB or grayscale pixels to be encoded </td></tr>
@@ -1249,11 +1293,11 @@
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga8c4a1231dc06a450514c835f6471f137"></a><!-- doxytag: member="turbojpeg.h::tjFree" ref="ga8c4a1231dc06a450514c835f6471f137" args="(unsigned char *buffer)" -->
+<a class="anchor" id="ga8c4a1231dc06a450514c835f6471f137"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1265,22 +1309,21 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Free an image buffer previously allocated by TurboJPEG. </p>
 <p>You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB or grayscale image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> or that were manually allocated using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a>.</p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">buffer</td><td>address of the buffer to free</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="see"><dt><b>See also:</b></dt><dd><a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> </dd></dl>
+<dl class="section see"><dt>See Also</dt><dd><a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga9af79c908ec131b1ae8d52fe40375abf"></a><!-- doxytag: member="turbojpeg.h::tjGetErrorStr" ref="ga9af79c908ec131b1ae8d52fe40375abf" args="(void)" -->
+<a class="anchor" id="ga9af79c908ec131b1ae8d52fe40375abf"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1292,15 +1335,14 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Returns a descriptive error message explaining why the last command failed. </p>
-<dl class="return"><dt><b>Returns:</b></dt><dd>a descriptive error message explaining why the last command failed. </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>a descriptive error message explaining why the last command failed. </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga6449044b9af402999ccf52f401333be8"></a><!-- doxytag: member="turbojpeg.h::tjGetScalingFactors" ref="ga6449044b9af402999ccf52f401333be8" args="(int *numscalingfactors)" -->
+<a class="anchor" id="ga6449044b9af402999ccf52f401333be8"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1312,21 +1354,20 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports. </p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">numscalingfactors</td><td>pointer to an integer variable that will receive the number of elements in the list</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>a pointer to a list of fractional scaling factors, or NULL if an error is encountered (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>a pointer to a list of fractional scaling factors, or NULL if an error is encountered (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga3d10c47fbe4a2489a2b30c931551d01a"></a><!-- doxytag: member="turbojpeg.h::tjInitCompress" ref="ga3d10c47fbe4a2489a2b30c931551d01a" args="(void)" -->
+<a class="anchor" id="ga3d10c47fbe4a2489a2b30c931551d01a"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1338,15 +1379,14 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Create a TurboJPEG compressor instance. </p>
-<dl class="return"><dt><b>Returns:</b></dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="gae5408179d041e2a2f7199c8283cf649e"></a><!-- doxytag: member="turbojpeg.h::tjInitDecompress" ref="gae5408179d041e2a2f7199c8283cf649e" args="(void)" -->
+<a class="anchor" id="gae5408179d041e2a2f7199c8283cf649e"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1358,15 +1398,14 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Create a TurboJPEG decompressor instance. </p>
-<dl class="return"><dt><b>Returns:</b></dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="ga3155b775bfbac9dbba869b95a0367902"></a><!-- doxytag: member="turbojpeg.h::tjInitTransform" ref="ga3155b775bfbac9dbba869b95a0367902" args="(void)" -->
+<a class="anchor" id="ga3155b775bfbac9dbba869b95a0367902"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1378,15 +1417,14 @@
           <td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Create a new TurboJPEG transformer instance. </p>
-<dl class="return"><dt><b>Returns:</b></dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<a class="anchor" id="gae403193ceb4aafb7e0f56ab587b48616"></a><!-- doxytag: member="turbojpeg.h::tjTransform" ref="gae403193ceb4aafb7e0f56ab587b48616" args="(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags)" -->
+<a class="anchor" id="gae403193ceb4aafb7e0f56ab587b48616"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
@@ -1444,12 +1482,11 @@
           <td></td><td></td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>Losslessly transform a JPEG image into another JPEG image. </p>
-<p>Lossless transforms work by moving the raw coefficients from one JPEG image structure to another without altering the values of the coefficients. While this is typically faster than decompressing the image, transforming it, and re-compressing it, lossless transforms are not free. Each lossless transform requires reading and Huffman decoding all of the coefficients in the source image, regardless of the size of the destination image. Thus, this function provides a means of generating multiple transformed images from the same source or of applying multiple transformations simultaneously, in order to eliminate the need to read the source coefficients multiple times.</p>
-<dl><dt><b>Parameters:</b></dt><dd>
+<p>Lossless transforms work by moving the raw coefficients from one JPEG image structure to another without altering the values of the coefficients. While this is typically faster than decompressing the image, transforming it, and re-compressing it, lossless transforms are not free. Each lossless transform requires reading and performing Huffman decoding on all of the coefficients in the source image, regardless of the size of the destination image. Thus, this function provides a means of generating multiple transformed images from the same source or applying multiple transformations simultaneously, in order to eliminate the need to read the source coefficients multiple times.</p>
+<dl class="params"><dt>Parameters</dt><dd>
   <table class="params">
     <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG transformer instance </td></tr>
     <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing the JPEG image to transform </td></tr>
@@ -1458,62 +1495,83 @@
     <tr><td class="paramname">dstBufs</td><td>pointer to an array of n image buffers. <code>dstBufs[i]</code> will receive a JPEG image that has been transformed using the parameters in <code>transforms[i]</code>. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:<ol type="1">
 <li>pre-allocate the JPEG buffer with an arbitrary size using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> and let TurboJPEG grow the buffer as needed,</li>
 <li>set <code>dstBufs[i]</code> to NULL to tell TurboJPEG to allocate the buffer for you, or</li>
-<li>pre-allocate the buffer to a "worst case" size determined by calling <a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b" title="The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters...">tjBufSize()</a> with the cropped width and height. This should ensure that the buffer never has to be re-allocated (setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a> guarantees this.)</li>
+<li>pre-allocate the buffer to a "worst case" size determined by calling <a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b" title="The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters...">tjBufSize()</a> with the transformed or cropped width and height. This should ensure that the buffer never has to be re-allocated (setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a> guarantees this.)</li>
 </ol>
 If you choose option 1, <code>dstSizes[i]</code> should be set to the size of your pre-allocated buffer. In any case, unless you have set <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>, you should always check <code>dstBufs[i]</code> upon return from this function, as it may have changed. </td></tr>
     <tr><td class="paramname">dstSizes</td><td>pointer to an array of n unsigned long variables that will receive the actual sizes (in bytes) of each transformed JPEG image. If <code>dstBufs[i]</code> points to a pre-allocated buffer, then <code>dstSizes[i]</code> should be set to the size of the buffer. Upon return, <code>dstSizes[i]</code> will contain the size of the JPEG image (in bytes.) </td></tr>
-    <tr><td class="paramname">transforms</td><td>pointer to an array of n tjtransform structures, each of which specifies the transform parameters and/or cropping region for the corresponding transformed output image. </td></tr>
+    <tr><td class="paramname">transforms</td><td>pointer to an array of n <a class="el" href="structtjtransform.html" title="Lossless transform.">tjtransform</a> structures, each of which specifies the transform parameters and/or cropping region for the corresponding transformed output image. </td></tr>
     <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
   </table>
   </dd>
 </dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
 
 </div>
 </div>
-<hr/><h2>Variable Documentation</h2>
-<a class="anchor" id="ga84e2e35d3f08025f976ec1ec53693dea"></a><!-- doxytag: member="turbojpeg.h::tjBlueOffset" ref="ga84e2e35d3f08025f976ec1ec53693dea" args="[TJ_NUMPF]" -->
+<h2 class="groupheader">Variable Documentation</h2>
+<a class="anchor" id="ga84e2e35d3f08025f976ec1ec53693dea"></a>
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">const int <a class="el" href="group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea">tjBlueOffset</a>[TJ_NUMPF]<code> [static]</code></td>
+          <td class="memname">const int tjBlueOffset[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
 
 <p>Blue offset (in bytes) for a given pixel format. </p>
 <p>This specifies the number of bytes that the Blue component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in <code>char pixel[]</code>, then the blue component will be <code>pixel[tjBlueOffset[TJ_BGRX]]</code>. </p>
 
 </div>
 </div>
-<a class="anchor" id="ga82d6e35da441112a411da41923c0ba2f"></a><!-- doxytag: member="turbojpeg.h::tjGreenOffset" ref="ga82d6e35da441112a411da41923c0ba2f" args="[TJ_NUMPF]" -->
+<a class="anchor" id="ga82d6e35da441112a411da41923c0ba2f"></a>
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">const int <a class="el" href="group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f">tjGreenOffset</a>[TJ_NUMPF]<code> [static]</code></td>
+          <td class="memname">const int tjGreenOffset[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
 
 <p>Green offset (in bytes) for a given pixel format. </p>
 <p>This specifies the number of bytes that the green component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in <code>char pixel[]</code>, then the green component will be <code>pixel[tjGreenOffset[TJ_BGRX]]</code>. </p>
 
 </div>
 </div>
-<a class="anchor" id="gabd247bb9fecb393eca57366feb8327bf"></a><!-- doxytag: member="turbojpeg.h::tjMCUHeight" ref="gabd247bb9fecb393eca57366feb8327bf" args="[TJ_NUMSAMP]" -->
+<a class="anchor" id="gabd247bb9fecb393eca57366feb8327bf"></a>
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">const int <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf">tjMCUHeight</a>[TJ_NUMSAMP]<code> [static]</code></td>
+          <td class="memname">const int tjMCUHeight[<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
 
 <p>MCU block height (in pixels) for a given level of chrominance subsampling. </p>
 <p>MCU block sizes:</p>
@@ -1526,16 +1584,23 @@
 
 </div>
 </div>
-<a class="anchor" id="ga9e61e7cd47a15a173283ba94e781308c"></a><!-- doxytag: member="turbojpeg.h::tjMCUWidth" ref="ga9e61e7cd47a15a173283ba94e781308c" args="[TJ_NUMSAMP]" -->
+<a class="anchor" id="ga9e61e7cd47a15a173283ba94e781308c"></a>
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">const int <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c">tjMCUWidth</a>[TJ_NUMSAMP]<code> [static]</code></td>
+          <td class="memname">const int tjMCUWidth[<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
 
 <p>MCU block width (in pixels) for a given level of chrominance subsampling. </p>
 <p>MCU block sizes:</p>
@@ -1548,54 +1613,57 @@
 
 </div>
 </div>
-<a class="anchor" id="gad77cf8fe5b2bfd3cb3f53098146abb4c"></a><!-- doxytag: member="turbojpeg.h::tjPixelSize" ref="gad77cf8fe5b2bfd3cb3f53098146abb4c" args="[TJ_NUMPF]" -->
+<a class="anchor" id="gad77cf8fe5b2bfd3cb3f53098146abb4c"></a>
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">const int <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c">tjPixelSize</a>[TJ_NUMPF]<code> [static]</code></td>
+          <td class="memname">const int tjPixelSize[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
 
 <p>Pixel size (in bytes) for a given pixel format. </p>
 
 </div>
 </div>
-<a class="anchor" id="gadd9b446742ac8a3923f7992c7988fea8"></a><!-- doxytag: member="turbojpeg.h::tjRedOffset" ref="gadd9b446742ac8a3923f7992c7988fea8" args="[TJ_NUMPF]" -->
+<a class="anchor" id="gadd9b446742ac8a3923f7992c7988fea8"></a>
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">const int <a class="el" href="group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8">tjRedOffset</a>[TJ_NUMPF]<code> [static]</code></td>
+          <td class="memname">const int tjRedOffset[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
 
 <p>Red offset (in bytes) for a given pixel format. </p>
 <p>This specifies the number of bytes that the red component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in <code>char pixel[]</code>, then the red component will be <code>pixel[tjRedOffset[TJ_BGRX]]</code>. </p>
 
 </div>
 </div>
-</div>
-<!-- window showing the filter options -->
-<div id="MSearchSelectWindow"
-     onmouseover="return searchBox.OnSearchSelectShow()"
-     onmouseout="return searchBox.OnSearchSelectHide()"
-     onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
-
-<!-- iframe showing the search results (closed by default) -->
-<div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
-        name="MSearchResults" id="MSearchResults">
-</iframe>
-</div>
-
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/index.html b/doc/html/index.html
index 4c7b84c..45a5136 100644
--- a/doc/html/index.html
+++ b/doc/html/index.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: Main Page</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li class="current"><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -48,19 +59,13 @@
       </li>
     </ul>
   </div>
-</div>
-<div class="header">
-  <div class="headertitle">
-<div class="title">TurboJPEG Documentation</div>  </div>
-</div>
-<div class="contents">
-</div>
+</div><!-- top -->
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
      onmouseout="return searchBox.OnSearchSelectHide()"
      onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
@@ -69,8 +74,17 @@
 </iframe>
 </div>
 
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+<div class="header">
+  <div class="headertitle">
+<div class="title">TurboJPEG Documentation</div>  </div>
+</div><!--header-->
+<div class="contents">
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/installdox b/doc/html/installdox
deleted file mode 100755
index edf5bbf..0000000
--- a/doc/html/installdox
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/perl
-
-%subst = ( );
-$quiet   = 0;
-
-while ( @ARGV ) {
-  $_ = shift @ARGV;
-  if ( s/^-// ) {
-    if ( /^l(.*)/ ) {
-      $v = ($1 eq "") ? shift @ARGV : $1;
-      ($v =~ /\/$/) || ($v .= "/");
-      $_ = $v;
-      if ( /(.+)\@(.+)/ ) {
-        if ( exists $subst{$1} ) {
-          $subst{$1} = $2;
-        } else {
-          print STDERR "Unknown tag file $1 given with option -l\n";
-          &usage();
-        }
-      } else {
-        print STDERR "Argument $_ is invalid for option -l\n";
-        &usage();
-      }
-    }
-    elsif ( /^q/ ) {
-      $quiet = 1;
-    }
-    elsif ( /^\?|^h/ ) {
-      &usage();
-    }
-    else {
-      print STDERR "Illegal option -$_\n";
-      &usage();
-    }
-  }
-  else {
-    push (@files, $_ );
-  }
-}
-
-foreach $sub (keys %subst)
-{
-  if ( $subst{$sub} eq "" ) 
-  {
-    print STDERR "No substitute given for tag file `$sub'\n";
-    &usage();
-  }
-  elsif ( ! $quiet && $sub ne "_doc" && $sub ne "_cgi" )
-  {
-    print "Substituting $subst{$sub} for each occurrence of tag file $sub\n"; 
-  }
-}
-
-if ( ! @files ) {
-  if (opendir(D,".")) {
-    foreach $file ( readdir(D) ) {
-      $match = ".html";
-      next if ( $file =~ /^\.\.?$/ );
-      ($file =~ /$match/) && (push @files, $file);
-      ($file =~ /\.svg/) && (push @files, $file);
-      ($file =~ "navtree.js") && (push @files, $file);
-    }
-    closedir(D);
-  }
-}
-
-if ( ! @files ) {
-  print STDERR "Warning: No input files given and none found!\n";
-}
-
-foreach $f (@files)
-{
-  if ( ! $quiet ) {
-    print "Editing: $f...\n";
-  }
-  $oldf = $f;
-  $f   .= ".bak";
-  unless (rename $oldf,$f) {
-    print STDERR "Error: cannot rename file $oldf\n";
-    exit 1;
-  }
-  if (open(F,"<$f")) {
-    unless (open(G,">$oldf")) {
-      print STDERR "Error: opening file $oldf for writing\n";
-      exit 1;
-    }
-    if ($oldf ne "tree.js") {
-      while (<F>) {
-        s/doxygen\=\"([^ \"\:\t\>\<]*)\:([^ \"\t\>\<]*)\" (xlink:href|href|src)=\"\2/doxygen\=\"$1:$subst{$1}\" \3=\"$subst{$1}/g;
-        print G "$_";
-      }
-    }
-    else {
-      while (<F>) {
-        s/\"([^ \"\:\t\>\<]*)\:([^ \"\t\>\<]*)\", \"\2/\"$1:$subst{$1}\" ,\"$subst{$1}/g;
-        print G "$_";
-      }
-    }
-  } 
-  else {
-    print STDERR "Warning file $f does not exist\n";
-  }
-  unlink $f;
-}
-
-sub usage {
-  print STDERR "Usage: installdox [options] [html-file [html-file ...]]\n";
-  print STDERR "Options:\n";
-  print STDERR "     -l tagfile\@linkName   tag file + URL or directory \n";
-  print STDERR "     -q                    Quiet mode\n\n";
-  exit 1;
-}
diff --git a/doc/html/jquery.js b/doc/html/jquery.js
index c052173..63939e7 100644
--- a/doc/html/jquery.js
+++ b/doc/html/jquery.js
@@ -1,54 +1,8 @@
-/*
- * jQuery JavaScript Library v1.3.2
- * http://jquery.com/
- *
- * Copyright (c) 2009 John Resig
- * Dual licensed under the MIT and GPL licenses.
- * http://docs.jquery.com/License
- *
- * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009)
- * Revision: 6246
- */
-(function(){var l=this,g,y=l.jQuery,p=l.$,o=l.jQuery=l.$=function(E,F){return new o.fn.init(E,F)},D=/^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/,f=/^.[^:#\[\.,]*$/;o.fn=o.prototype={init:function(E,H){E=E||document;if(E.nodeType){this[0]=E;this.length=1;this.context=E;return this}if(typeof E==="string"){var G=D.exec(E);if(G&&(G[1]||!H)){if(G[1]){E=o.clean([G[1]],H)}else{var I=document.getElementById(G[3]);if(I&&I.id!=G[3]){return o().find(E)}var F=o(I||[]);F.context=document;F.selector=E;return F}}else{return o(H).find(E)}}else{if(o.isFunction(E)){return o(document).ready(E)}}if(E.selector&&E.context){this.selector=E.selector;this.context=E.context}return this.setArray(o.isArray(E)?E:o.makeArray(E))},selector:"",jquery:"1.3.2",size:function(){return this.length},get:function(E){return E===g?Array.prototype.slice.call(this):this[E]},pushStack:function(F,H,E){var G=o(F);G.prevObject=this;G.context=this.context;if(H==="find"){G.selector=this.selector+(this.selector?" ":"")+E}else{if(H){G.selector=this.selector+"."+H+"("+E+")"}}return G},setArray:function(E){this.length=0;Array.prototype.push.apply(this,E);return this},each:function(F,E){return o.each(this,F,E)},index:function(E){return o.inArray(E&&E.jquery?E[0]:E,this)},attr:function(F,H,G){var E=F;if(typeof F==="string"){if(H===g){return this[0]&&o[G||"attr"](this[0],F)}else{E={};E[F]=H}}return this.each(function(I){for(F in E){o.attr(G?this.style:this,F,o.prop(this,E[F],G,I,F))}})},css:function(E,F){if((E=="width"||E=="height")&&parseFloat(F)<0){F=g}return this.attr(E,F,"curCSS")},text:function(F){if(typeof F!=="object"&&F!=null){return this.empty().append((this[0]&&this[0].ownerDocument||document).createTextNode(F))}var E="";o.each(F||this,function(){o.each(this.childNodes,function(){if(this.nodeType!=8){E+=this.nodeType!=1?this.nodeValue:o.fn.text([this])}})});return E},wrapAll:function(E){if(this[0]){var F=o(E,this[0].ownerDocument).clone();if(this[0].parentNode){F.insertBefore(this[0])}F.map(function(){var G=this;while(G.firstChild){G=G.firstChild}return G}).append(this)}return this},wrapInner:function(E){return this.each(function(){o(this).contents().wrapAll(E)})},wrap:function(E){return this.each(function(){o(this).wrapAll(E)})},append:function(){return this.domManip(arguments,true,function(E){if(this.nodeType==1){this.appendChild(E)}})},prepend:function(){return this.domManip(arguments,true,function(E){if(this.nodeType==1){this.insertBefore(E,this.firstChild)}})},before:function(){return this.domManip(arguments,false,function(E){this.parentNode.insertBefore(E,this)})},after:function(){return this.domManip(arguments,false,function(E){this.parentNode.insertBefore(E,this.nextSibling)})},end:function(){return this.prevObject||o([])},push:[].push,sort:[].sort,splice:[].splice,find:function(E){if(this.length===1){var F=this.pushStack([],"find",E);F.length=0;o.find(E,this[0],F);return F}else{return this.pushStack(o.unique(o.map(this,function(G){return o.find(E,G)})),"find",E)}},clone:function(G){var E=this.map(function(){if(!o.support.noCloneEvent&&!o.isXMLDoc(this)){var I=this.outerHTML;if(!I){var J=this.ownerDocument.createElement("div");J.appendChild(this.cloneNode(true));I=J.innerHTML}return o.clean([I.replace(/ jQuery\d+="(?:\d+|null)"/g,"").replace(/^\s*/,"")])[0]}else{return this.cloneNode(true)}});if(G===true){var H=this.find("*").andSelf(),F=0;E.find("*").andSelf().each(function(){if(this.nodeName!==H[F].nodeName){return}var I=o.data(H[F],"events");for(var K in I){for(var J in I[K]){o.event.add(this,K,I[K][J],I[K][J].data)}}F++})}return E},filter:function(E){return this.pushStack(o.isFunction(E)&&o.grep(this,function(G,F){return E.call(G,F)})||o.multiFilter(E,o.grep(this,function(F){return F.nodeType===1})),"filter",E)},closest:function(E){var G=o.expr.match.POS.test(E)?o(E):null,F=0;return this.map(function(){var H=this;while(H&&H.ownerDocument){if(G?G.index(H)>-1:o(H).is(E)){o.data(H,"closest",F);return H}H=H.parentNode;F++}})},not:function(E){if(typeof E==="string"){if(f.test(E)){return this.pushStack(o.multiFilter(E,this,true),"not",E)}else{E=o.multiFilter(E,this)}}var F=E.length&&E[E.length-1]!==g&&!E.nodeType;return this.filter(function(){return F?o.inArray(this,E)<0:this!=E})},add:function(E){return this.pushStack(o.unique(o.merge(this.get(),typeof E==="string"?o(E):o.makeArray(E))))},is:function(E){return !!E&&o.multiFilter(E,this).length>0},hasClass:function(E){return !!E&&this.is("."+E)},val:function(K){if(K===g){var E=this[0];if(E){if(o.nodeName(E,"option")){return(E.attributes.value||{}).specified?E.value:E.text}if(o.nodeName(E,"select")){var I=E.selectedIndex,L=[],M=E.options,H=E.type=="select-one";if(I<0){return null}for(var F=H?I:0,J=H?I+1:M.length;F<J;F++){var G=M[F];if(G.selected){K=o(G).val();if(H){return K}L.push(K)}}return L}return(E.value||"").replace(/\r/g,"")}return g}if(typeof K==="number"){K+=""}return this.each(function(){if(this.nodeType!=1){return}if(o.isArray(K)&&/radio|checkbox/.test(this.type)){this.checked=(o.inArray(this.value,K)>=0||o.inArray(this.name,K)>=0)}else{if(o.nodeName(this,"select")){var N=o.makeArray(K);o("option",this).each(function(){this.selected=(o.inArray(this.value,N)>=0||o.inArray(this.text,N)>=0)});if(!N.length){this.selectedIndex=-1}}else{this.value=K}}})},html:function(E){return E===g?(this[0]?this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g,""):null):this.empty().append(E)},replaceWith:function(E){return this.after(E).remove()},eq:function(E){return this.slice(E,+E+1)},slice:function(){return this.pushStack(Array.prototype.slice.apply(this,arguments),"slice",Array.prototype.slice.call(arguments).join(","))},map:function(E){return this.pushStack(o.map(this,function(G,F){return E.call(G,F,G)}))},andSelf:function(){return this.add(this.prevObject)},domManip:function(J,M,L){if(this[0]){var I=(this[0].ownerDocument||this[0]).createDocumentFragment(),F=o.clean(J,(this[0].ownerDocument||this[0]),I),H=I.firstChild;if(H){for(var G=0,E=this.length;G<E;G++){L.call(K(this[G],H),this.length>1||G>0?I.cloneNode(true):I)}}if(F){o.each(F,z)}}return this;function K(N,O){return M&&o.nodeName(N,"table")&&o.nodeName(O,"tr")?(N.getElementsByTagName("tbody")[0]||N.appendChild(N.ownerDocument.createElement("tbody"))):N}}};o.fn.init.prototype=o.fn;function z(E,F){if(F.src){o.ajax({url:F.src,async:false,dataType:"script"})}else{o.globalEval(F.text||F.textContent||F.innerHTML||"")}if(F.parentNode){F.parentNode.removeChild(F)}}function e(){return +new Date}o.extend=o.fn.extend=function(){var J=arguments[0]||{},H=1,I=arguments.length,E=false,G;if(typeof J==="boolean"){E=J;J=arguments[1]||{};H=2}if(typeof J!=="object"&&!o.isFunction(J)){J={}}if(I==H){J=this;--H}for(;H<I;H++){if((G=arguments[H])!=null){for(var F in G){var K=J[F],L=G[F];if(J===L){continue}if(E&&L&&typeof L==="object"&&!L.nodeType){J[F]=o.extend(E,K||(L.length!=null?[]:{}),L)}else{if(L!==g){J[F]=L}}}}}return J};var b=/z-?index|font-?weight|opacity|zoom|line-?height/i,q=document.defaultView||{},s=Object.prototype.toString;o.extend({noConflict:function(E){l.$=p;if(E){l.jQuery=y}return o},isFunction:function(E){return s.call(E)==="[object Function]"},isArray:function(E){return s.call(E)==="[object Array]"},isXMLDoc:function(E){return E.nodeType===9&&E.documentElement.nodeName!=="HTML"||!!E.ownerDocument&&o.isXMLDoc(E.ownerDocument)},globalEval:function(G){if(G&&/\S/.test(G)){var F=document.getElementsByTagName("head")[0]||document.documentElement,E=document.createElement("script");E.type="text/javascript";if(o.support.scriptEval){E.appendChild(document.createTextNode(G))}else{E.text=G}F.insertBefore(E,F.firstChild);F.removeChild(E)}},nodeName:function(F,E){return F.nodeName&&F.nodeName.toUpperCase()==E.toUpperCase()},each:function(G,K,F){var E,H=0,I=G.length;if(F){if(I===g){for(E in G){if(K.apply(G[E],F)===false){break}}}else{for(;H<I;){if(K.apply(G[H++],F)===false){break}}}}else{if(I===g){for(E in G){if(K.call(G[E],E,G[E])===false){break}}}else{for(var J=G[0];H<I&&K.call(J,H,J)!==false;J=G[++H]){}}}return G},prop:function(H,I,G,F,E){if(o.isFunction(I)){I=I.call(H,F)}return typeof I==="number"&&G=="curCSS"&&!b.test(E)?I+"px":I},className:{add:function(E,F){o.each((F||"").split(/\s+/),function(G,H){if(E.nodeType==1&&!o.className.has(E.className,H)){E.className+=(E.className?" ":"")+H}})},remove:function(E,F){if(E.nodeType==1){E.className=F!==g?o.grep(E.className.split(/\s+/),function(G){return !o.className.has(F,G)}).join(" "):""}},has:function(F,E){return F&&o.inArray(E,(F.className||F).toString().split(/\s+/))>-1}},swap:function(H,G,I){var E={};for(var F in G){E[F]=H.style[F];H.style[F]=G[F]}I.call(H);for(var F in G){H.style[F]=E[F]}},css:function(H,F,J,E){if(F=="width"||F=="height"){var L,G={position:"absolute",visibility:"hidden",display:"block"},K=F=="width"?["Left","Right"]:["Top","Bottom"];function I(){L=F=="width"?H.offsetWidth:H.offsetHeight;if(E==="border"){return}o.each(K,function(){if(!E){L-=parseFloat(o.curCSS(H,"padding"+this,true))||0}if(E==="margin"){L+=parseFloat(o.curCSS(H,"margin"+this,true))||0}else{L-=parseFloat(o.curCSS(H,"border"+this+"Width",true))||0}})}if(H.offsetWidth!==0){I()}else{o.swap(H,G,I)}return Math.max(0,Math.round(L))}return o.curCSS(H,F,J)},curCSS:function(I,F,G){var L,E=I.style;if(F=="opacity"&&!o.support.opacity){L=o.attr(E,"opacity");return L==""?"1":L}if(F.match(/float/i)){F=w}if(!G&&E&&E[F]){L=E[F]}else{if(q.getComputedStyle){if(F.match(/float/i)){F="float"}F=F.replace(/([A-Z])/g,"-$1").toLowerCase();var M=q.getComputedStyle(I,null);if(M){L=M.getPropertyValue(F)}if(F=="opacity"&&L==""){L="1"}}else{if(I.currentStyle){var J=F.replace(/\-(\w)/g,function(N,O){return O.toUpperCase()});L=I.currentStyle[F]||I.currentStyle[J];if(!/^\d+(px)?$/i.test(L)&&/^\d/.test(L)){var H=E.left,K=I.runtimeStyle.left;I.runtimeStyle.left=I.currentStyle.left;E.left=L||0;L=E.pixelLeft+"px";E.left=H;I.runtimeStyle.left=K}}}}return L},clean:function(F,K,I){K=K||document;if(typeof K.createElement==="undefined"){K=K.ownerDocument||K[0]&&K[0].ownerDocument||document}if(!I&&F.length===1&&typeof F[0]==="string"){var H=/^<(\w+)\s*\/?>$/.exec(F[0]);if(H){return[K.createElement(H[1])]}}var G=[],E=[],L=K.createElement("div");o.each(F,function(P,S){if(typeof S==="number"){S+=""}if(!S){return}if(typeof S==="string"){S=S.replace(/(<(\w+)[^>]*?)\/>/g,function(U,V,T){return T.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i)?U:V+"></"+T+">"});var O=S.replace(/^\s+/,"").substring(0,10).toLowerCase();var Q=!O.indexOf("<opt")&&[1,"<select multiple='multiple'>","</select>"]||!O.indexOf("<leg")&&[1,"<fieldset>","</fieldset>"]||O.match(/^<(thead|tbody|tfoot|colg|cap)/)&&[1,"<table>","</table>"]||!O.indexOf("<tr")&&[2,"<table><tbody>","</tbody></table>"]||(!O.indexOf("<td")||!O.indexOf("<th"))&&[3,"<table><tbody><tr>","</tr></tbody></table>"]||!O.indexOf("<col")&&[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"]||!o.support.htmlSerialize&&[1,"div<div>","</div>"]||[0,"",""];L.innerHTML=Q[1]+S+Q[2];while(Q[0]--){L=L.lastChild}if(!o.support.tbody){var R=/<tbody/i.test(S),N=!O.indexOf("<table")&&!R?L.firstChild&&L.firstChild.childNodes:Q[1]=="<table>"&&!R?L.childNodes:[];for(var M=N.length-1;M>=0;--M){if(o.nodeName(N[M],"tbody")&&!N[M].childNodes.length){N[M].parentNode.removeChild(N[M])}}}if(!o.support.leadingWhitespace&&/^\s/.test(S)){L.insertBefore(K.createTextNode(S.match(/^\s*/)[0]),L.firstChild)}S=o.makeArray(L.childNodes)}if(S.nodeType){G.push(S)}else{G=o.merge(G,S)}});if(I){for(var J=0;G[J];J++){if(o.nodeName(G[J],"script")&&(!G[J].type||G[J].type.toLowerCase()==="text/javascript")){E.push(G[J].parentNode?G[J].parentNode.removeChild(G[J]):G[J])}else{if(G[J].nodeType===1){G.splice.apply(G,[J+1,0].concat(o.makeArray(G[J].getElementsByTagName("script"))))}I.appendChild(G[J])}}return E}return G},attr:function(J,G,K){if(!J||J.nodeType==3||J.nodeType==8){return g}var H=!o.isXMLDoc(J),L=K!==g;G=H&&o.props[G]||G;if(J.tagName){var F=/href|src|style/.test(G);if(G=="selected"&&J.parentNode){J.parentNode.selectedIndex}if(G in J&&H&&!F){if(L){if(G=="type"&&o.nodeName(J,"input")&&J.parentNode){throw"type property can't be changed"}J[G]=K}if(o.nodeName(J,"form")&&J.getAttributeNode(G)){return J.getAttributeNode(G).nodeValue}if(G=="tabIndex"){var I=J.getAttributeNode("tabIndex");return I&&I.specified?I.value:J.nodeName.match(/(button|input|object|select|textarea)/i)?0:J.nodeName.match(/^(a|area)$/i)&&J.href?0:g}return J[G]}if(!o.support.style&&H&&G=="style"){return o.attr(J.style,"cssText",K)}if(L){J.setAttribute(G,""+K)}var E=!o.support.hrefNormalized&&H&&F?J.getAttribute(G,2):J.getAttribute(G);return E===null?g:E}if(!o.support.opacity&&G=="opacity"){if(L){J.zoom=1;J.filter=(J.filter||"").replace(/alpha\([^)]*\)/,"")+(parseInt(K)+""=="NaN"?"":"alpha(opacity="+K*100+")")}return J.filter&&J.filter.indexOf("opacity=")>=0?(parseFloat(J.filter.match(/opacity=([^)]*)/)[1])/100)+"":""}G=G.replace(/-([a-z])/ig,function(M,N){return N.toUpperCase()});if(L){J[G]=K}return J[G]},trim:function(E){return(E||"").replace(/^\s+|\s+$/g,"")},makeArray:function(G){var E=[];if(G!=null){var F=G.length;if(F==null||typeof G==="string"||o.isFunction(G)||G.setInterval){E[0]=G}else{while(F){E[--F]=G[F]}}}return E},inArray:function(G,H){for(var E=0,F=H.length;E<F;E++){if(H[E]===G){return E}}return -1},merge:function(H,E){var F=0,G,I=H.length;if(!o.support.getAll){while((G=E[F++])!=null){if(G.nodeType!=8){H[I++]=G}}}else{while((G=E[F++])!=null){H[I++]=G}}return H},unique:function(K){var F=[],E={};try{for(var G=0,H=K.length;G<H;G++){var J=o.data(K[G]);if(!E[J]){E[J]=true;F.push(K[G])}}}catch(I){F=K}return F},grep:function(F,J,E){var G=[];for(var H=0,I=F.length;H<I;H++){if(!E!=!J(F[H],H)){G.push(F[H])}}return G},map:function(E,J){var F=[];for(var G=0,H=E.length;G<H;G++){var I=J(E[G],G);if(I!=null){F[F.length]=I}}return F.concat.apply([],F)}});var C=navigator.userAgent.toLowerCase();o.browser={version:(C.match(/.+(?:rv|it|ra|ie)[\/: ]([\d.]+)/)||[0,"0"])[1],safari:/webkit/.test(C),opera:/opera/.test(C),msie:/msie/.test(C)&&!/opera/.test(C),mozilla:/mozilla/.test(C)&&!/(compatible|webkit)/.test(C)};o.each({parent:function(E){return E.parentNode},parents:function(E){return o.dir(E,"parentNode")},next:function(E){return o.nth(E,2,"nextSibling")},prev:function(E){return o.nth(E,2,"previousSibling")},nextAll:function(E){return o.dir(E,"nextSibling")},prevAll:function(E){return o.dir(E,"previousSibling")},siblings:function(E){return o.sibling(E.parentNode.firstChild,E)},children:function(E){return o.sibling(E.firstChild)},contents:function(E){return o.nodeName(E,"iframe")?E.contentDocument||E.contentWindow.document:o.makeArray(E.childNodes)}},function(E,F){o.fn[E]=function(G){var H=o.map(this,F);if(G&&typeof G=="string"){H=o.multiFilter(G,H)}return this.pushStack(o.unique(H),E,G)}});o.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(E,F){o.fn[E]=function(G){var J=[],L=o(G);for(var K=0,H=L.length;K<H;K++){
-var I=(K>0?this.clone(true):this).get();o.fn[F].apply(o(L[K]),I);J=J.concat(I)}return this.pushStack(J,E,G)}});o.each({removeAttr:function(E){o.attr(this,E,"");if(this.nodeType==1){this.removeAttribute(E)}},addClass:function(E){o.className.add(this,E)},removeClass:function(E){o.className.remove(this,E)},toggleClass:function(F,E){if(typeof E!=="boolean"){E=!o.className.has(this,F)}o.className[E?"add":"remove"](this,F)},remove:function(E){if(!E||o.filter(E,[this]).length){o("*",this).add([this]).each(function(){o.event.remove(this);o.removeData(this)});if(this.parentNode){this.parentNode.removeChild(this)}}},empty:function(){o(this).children().remove();while(this.firstChild){this.removeChild(this.firstChild)}}},function(E,F){o.fn[E]=function(){return this.each(F,arguments)}});function j(E,F){return E[0]&&parseInt(o.curCSS(E[0],F,true),10)||0}var h="jQuery"+e(),v=0,A={};o.extend({cache:{},data:function(F,E,G){F=F==l?A:F;var H=F[h];if(!H){H=F[h]=++v}if(E&&!o.cache[H]){o.cache[H]={}}if(G!==g){o.cache[H][E]=G}return E?o.cache[H][E]:H},removeData:function(F,E){F=F==l?A:F;var H=F[h];if(E){if(o.cache[H]){delete o.cache[H][E];E="";for(E in o.cache[H]){break}if(!E){o.removeData(F)}}}else{try{delete F[h]}catch(G){if(F.removeAttribute){F.removeAttribute(h)}}delete o.cache[H]}},queue:function(F,E,H){if(F){E=(E||"fx")+"queue";var G=o.data(F,E);if(!G||o.isArray(H)){G=o.data(F,E,o.makeArray(H))}else{if(H){G.push(H)}}}return G},dequeue:function(H,G){var E=o.queue(H,G),F=E.shift();if(!G||G==="fx"){F=E[0]}if(F!==g){F.call(H)}}});o.fn.extend({data:function(E,G){var H=E.split(".");H[1]=H[1]?"."+H[1]:"";if(G===g){var F=this.triggerHandler("getData"+H[1]+"!",[H[0]]);if(F===g&&this.length){F=o.data(this[0],E)}return F===g&&H[1]?this.data(H[0]):F}else{return this.trigger("setData"+H[1]+"!",[H[0],G]).each(function(){o.data(this,E,G)})}},removeData:function(E){return this.each(function(){o.removeData(this,E)})},queue:function(E,F){if(typeof E!=="string"){F=E;E="fx"}if(F===g){return o.queue(this[0],E)}return this.each(function(){var G=o.queue(this,E,F);if(E=="fx"&&G.length==1){G[0].call(this)}})},dequeue:function(E){return this.each(function(){o.dequeue(this,E)})}});
-/*
- * Sizzle CSS Selector Engine - v0.9.3
- *  Copyright 2009, The Dojo Foundation
- *  Released under the MIT, BSD, and GPL Licenses.
- *  More information: http://sizzlejs.com/
- */
-(function(){var R=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^[\]]*\]|['"][^'"]*['"]|[^[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?/g,L=0,H=Object.prototype.toString;var F=function(Y,U,ab,ac){ab=ab||[];U=U||document;if(U.nodeType!==1&&U.nodeType!==9){return[]}if(!Y||typeof Y!=="string"){return ab}var Z=[],W,af,ai,T,ad,V,X=true;R.lastIndex=0;while((W=R.exec(Y))!==null){Z.push(W[1]);if(W[2]){V=RegExp.rightContext;break}}if(Z.length>1&&M.exec(Y)){if(Z.length===2&&I.relative[Z[0]]){af=J(Z[0]+Z[1],U)}else{af=I.relative[Z[0]]?[U]:F(Z.shift(),U);while(Z.length){Y=Z.shift();if(I.relative[Y]){Y+=Z.shift()}af=J(Y,af)}}}else{var ae=ac?{expr:Z.pop(),set:E(ac)}:F.find(Z.pop(),Z.length===1&&U.parentNode?U.parentNode:U,Q(U));af=F.filter(ae.expr,ae.set);if(Z.length>0){ai=E(af)}else{X=false}while(Z.length){var ah=Z.pop(),ag=ah;if(!I.relative[ah]){ah=""}else{ag=Z.pop()}if(ag==null){ag=U}I.relative[ah](ai,ag,Q(U))}}if(!ai){ai=af}if(!ai){throw"Syntax error, unrecognized expression: "+(ah||Y)}if(H.call(ai)==="[object Array]"){if(!X){ab.push.apply(ab,ai)}else{if(U.nodeType===1){for(var aa=0;ai[aa]!=null;aa++){if(ai[aa]&&(ai[aa]===true||ai[aa].nodeType===1&&K(U,ai[aa]))){ab.push(af[aa])}}}else{for(var aa=0;ai[aa]!=null;aa++){if(ai[aa]&&ai[aa].nodeType===1){ab.push(af[aa])}}}}}else{E(ai,ab)}if(V){F(V,U,ab,ac);if(G){hasDuplicate=false;ab.sort(G);if(hasDuplicate){for(var aa=1;aa<ab.length;aa++){if(ab[aa]===ab[aa-1]){ab.splice(aa--,1)}}}}}return ab};F.matches=function(T,U){return F(T,null,null,U)};F.find=function(aa,T,ab){var Z,X;if(!aa){return[]}for(var W=0,V=I.order.length;W<V;W++){var Y=I.order[W],X;if((X=I.match[Y].exec(aa))){var U=RegExp.leftContext;if(U.substr(U.length-1)!=="\\"){X[1]=(X[1]||"").replace(/\\/g,"");Z=I.find[Y](X,T,ab);if(Z!=null){aa=aa.replace(I.match[Y],"");break}}}}if(!Z){Z=T.getElementsByTagName("*")}return{set:Z,expr:aa}};F.filter=function(ad,ac,ag,W){var V=ad,ai=[],aa=ac,Y,T,Z=ac&&ac[0]&&Q(ac[0]);while(ad&&ac.length){for(var ab in I.filter){if((Y=I.match[ab].exec(ad))!=null){var U=I.filter[ab],ah,af;T=false;if(aa==ai){ai=[]}if(I.preFilter[ab]){Y=I.preFilter[ab](Y,aa,ag,ai,W,Z);if(!Y){T=ah=true}else{if(Y===true){continue}}}if(Y){for(var X=0;(af=aa[X])!=null;X++){if(af){ah=U(af,Y,X,aa);var ae=W^!!ah;if(ag&&ah!=null){if(ae){T=true}else{aa[X]=false}}else{if(ae){ai.push(af);T=true}}}}}if(ah!==g){if(!ag){aa=ai}ad=ad.replace(I.match[ab],"");if(!T){return[]}break}}}if(ad==V){if(T==null){throw"Syntax error, unrecognized expression: "+ad}else{break}}V=ad}return aa};var I=F.selectors={order:["ID","NAME","TAG"],match:{ID:/#((?:[\w\u00c0-\uFFFF_-]|\\.)+)/,CLASS:/\.((?:[\w\u00c0-\uFFFF_-]|\\.)+)/,NAME:/\[name=['"]*((?:[\w\u00c0-\uFFFF_-]|\\.)+)['"]*\]/,ATTR:/\[\s*((?:[\w\u00c0-\uFFFF_-]|\\.)+)\s*(?:(\S?=)\s*(['"]*)(.*?)\3|)\s*\]/,TAG:/^((?:[\w\u00c0-\uFFFF\*_-]|\\.)+)/,CHILD:/:(only|nth|last|first)-child(?:\((even|odd|[\dn+-]*)\))?/,POS:/:(nth|eq|gt|lt|first|last|even|odd)(?:\((\d*)\))?(?=[^-]|$)/,PSEUDO:/:((?:[\w\u00c0-\uFFFF_-]|\\.)+)(?:\((['"]*)((?:\([^\)]+\)|[^\2\(\)]*)+)\2\))?/},attrMap:{"class":"className","for":"htmlFor"},attrHandle:{href:function(T){return T.getAttribute("href")}},relative:{"+":function(aa,T,Z){var X=typeof T==="string",ab=X&&!/\W/.test(T),Y=X&&!ab;if(ab&&!Z){T=T.toUpperCase()}for(var W=0,V=aa.length,U;W<V;W++){if((U=aa[W])){while((U=U.previousSibling)&&U.nodeType!==1){}aa[W]=Y||U&&U.nodeName===T?U||false:U===T}}if(Y){F.filter(T,aa,true)}},">":function(Z,U,aa){var X=typeof U==="string";if(X&&!/\W/.test(U)){U=aa?U:U.toUpperCase();for(var V=0,T=Z.length;V<T;V++){var Y=Z[V];if(Y){var W=Y.parentNode;Z[V]=W.nodeName===U?W:false}}}else{for(var V=0,T=Z.length;V<T;V++){var Y=Z[V];if(Y){Z[V]=X?Y.parentNode:Y.parentNode===U}}if(X){F.filter(U,Z,true)}}},"":function(W,U,Y){var V=L++,T=S;if(!U.match(/\W/)){var X=U=Y?U:U.toUpperCase();T=P}T("parentNode",U,V,W,X,Y)},"~":function(W,U,Y){var V=L++,T=S;if(typeof U==="string"&&!U.match(/\W/)){var X=U=Y?U:U.toUpperCase();T=P}T("previousSibling",U,V,W,X,Y)}},find:{ID:function(U,V,W){if(typeof V.getElementById!=="undefined"&&!W){var T=V.getElementById(U[1]);return T?[T]:[]}},NAME:function(V,Y,Z){if(typeof Y.getElementsByName!=="undefined"){var U=[],X=Y.getElementsByName(V[1]);for(var W=0,T=X.length;W<T;W++){if(X[W].getAttribute("name")===V[1]){U.push(X[W])}}return U.length===0?null:U}},TAG:function(T,U){return U.getElementsByTagName(T[1])}},preFilter:{CLASS:function(W,U,V,T,Z,aa){W=" "+W[1].replace(/\\/g,"")+" ";if(aa){return W}for(var X=0,Y;(Y=U[X])!=null;X++){if(Y){if(Z^(Y.className&&(" "+Y.className+" ").indexOf(W)>=0)){if(!V){T.push(Y)}}else{if(V){U[X]=false}}}}return false},ID:function(T){return T[1].replace(/\\/g,"")},TAG:function(U,T){for(var V=0;T[V]===false;V++){}return T[V]&&Q(T[V])?U[1]:U[1].toUpperCase()},CHILD:function(T){if(T[1]=="nth"){var U=/(-?)(\d*)n((?:\+|-)?\d*)/.exec(T[2]=="even"&&"2n"||T[2]=="odd"&&"2n+1"||!/\D/.test(T[2])&&"0n+"+T[2]||T[2]);T[2]=(U[1]+(U[2]||1))-0;T[3]=U[3]-0}T[0]=L++;return T},ATTR:function(X,U,V,T,Y,Z){var W=X[1].replace(/\\/g,"");if(!Z&&I.attrMap[W]){X[1]=I.attrMap[W]}if(X[2]==="~="){X[4]=" "+X[4]+" "}return X},PSEUDO:function(X,U,V,T,Y){if(X[1]==="not"){if(X[3].match(R).length>1||/^\w/.test(X[3])){X[3]=F(X[3],null,null,U)}else{var W=F.filter(X[3],U,V,true^Y);if(!V){T.push.apply(T,W)}return false}}else{if(I.match.POS.test(X[0])||I.match.CHILD.test(X[0])){return true}}return X},POS:function(T){T.unshift(true);return T}},filters:{enabled:function(T){return T.disabled===false&&T.type!=="hidden"},disabled:function(T){return T.disabled===true},checked:function(T){return T.checked===true},selected:function(T){T.parentNode.selectedIndex;return T.selected===true},parent:function(T){return !!T.firstChild},empty:function(T){return !T.firstChild},has:function(V,U,T){return !!F(T[3],V).length},header:function(T){return/h\d/i.test(T.nodeName)},text:function(T){return"text"===T.type},radio:function(T){return"radio"===T.type},checkbox:function(T){return"checkbox"===T.type},file:function(T){return"file"===T.type},password:function(T){return"password"===T.type},submit:function(T){return"submit"===T.type},image:function(T){return"image"===T.type},reset:function(T){return"reset"===T.type},button:function(T){return"button"===T.type||T.nodeName.toUpperCase()==="BUTTON"},input:function(T){return/input|select|textarea|button/i.test(T.nodeName)}},setFilters:{first:function(U,T){return T===0},last:function(V,U,T,W){return U===W.length-1},even:function(U,T){return T%2===0},odd:function(U,T){return T%2===1},lt:function(V,U,T){return U<T[3]-0},gt:function(V,U,T){return U>T[3]-0},nth:function(V,U,T){return T[3]-0==U},eq:function(V,U,T){return T[3]-0==U}},filter:{PSEUDO:function(Z,V,W,aa){var U=V[1],X=I.filters[U];if(X){return X(Z,W,V,aa)}else{if(U==="contains"){return(Z.textContent||Z.innerText||"").indexOf(V[3])>=0}else{if(U==="not"){var Y=V[3];for(var W=0,T=Y.length;W<T;W++){if(Y[W]===Z){return false}}return true}}}},CHILD:function(T,W){var Z=W[1],U=T;switch(Z){case"only":case"first":while(U=U.previousSibling){if(U.nodeType===1){return false}}if(Z=="first"){return true}U=T;case"last":while(U=U.nextSibling){if(U.nodeType===1){return false}}return true;case"nth":var V=W[2],ac=W[3];if(V==1&&ac==0){return true}var Y=W[0],ab=T.parentNode;if(ab&&(ab.sizcache!==Y||!T.nodeIndex)){var X=0;for(U=ab.firstChild;U;U=U.nextSibling){if(U.nodeType===1){U.nodeIndex=++X}}ab.sizcache=Y}var aa=T.nodeIndex-ac;if(V==0){return aa==0}else{return(aa%V==0&&aa/V>=0)}}},ID:function(U,T){return U.nodeType===1&&U.getAttribute("id")===T},TAG:function(U,T){return(T==="*"&&U.nodeType===1)||U.nodeName===T},CLASS:function(U,T){return(" "+(U.className||U.getAttribute("class"))+" ").indexOf(T)>-1},ATTR:function(Y,W){var V=W[1],T=I.attrHandle[V]?I.attrHandle[V](Y):Y[V]!=null?Y[V]:Y.getAttribute(V),Z=T+"",X=W[2],U=W[4];return T==null?X==="!=":X==="="?Z===U:X==="*="?Z.indexOf(U)>=0:X==="~="?(" "+Z+" ").indexOf(U)>=0:!U?Z&&T!==false:X==="!="?Z!=U:X==="^="?Z.indexOf(U)===0:X==="$="?Z.substr(Z.length-U.length)===U:X==="|="?Z===U||Z.substr(0,U.length+1)===U+"-":false},POS:function(X,U,V,Y){var T=U[2],W=I.setFilters[T];if(W){return W(X,V,U,Y)}}}};var M=I.match.POS;for(var O in I.match){I.match[O]=RegExp(I.match[O].source+/(?![^\[]*\])(?![^\(]*\))/.source)}var E=function(U,T){U=Array.prototype.slice.call(U);if(T){T.push.apply(T,U);return T}return U};try{Array.prototype.slice.call(document.documentElement.childNodes)}catch(N){E=function(X,W){var U=W||[];if(H.call(X)==="[object Array]"){Array.prototype.push.apply(U,X)}else{if(typeof X.length==="number"){for(var V=0,T=X.length;V<T;V++){U.push(X[V])}}else{for(var V=0;X[V];V++){U.push(X[V])}}}return U}}var G;if(document.documentElement.compareDocumentPosition){G=function(U,T){var V=U.compareDocumentPosition(T)&4?-1:U===T?0:1;if(V===0){hasDuplicate=true}return V}}else{if("sourceIndex" in document.documentElement){G=function(U,T){var V=U.sourceIndex-T.sourceIndex;if(V===0){hasDuplicate=true}return V}}else{if(document.createRange){G=function(W,U){var V=W.ownerDocument.createRange(),T=U.ownerDocument.createRange();V.selectNode(W);V.collapse(true);T.selectNode(U);T.collapse(true);var X=V.compareBoundaryPoints(Range.START_TO_END,T);if(X===0){hasDuplicate=true}return X}}}}(function(){var U=document.createElement("form"),V="script"+(new Date).getTime();U.innerHTML="<input name='"+V+"'/>";var T=document.documentElement;T.insertBefore(U,T.firstChild);if(!!document.getElementById(V)){I.find.ID=function(X,Y,Z){if(typeof Y.getElementById!=="undefined"&&!Z){var W=Y.getElementById(X[1]);return W?W.id===X[1]||typeof W.getAttributeNode!=="undefined"&&W.getAttributeNode("id").nodeValue===X[1]?[W]:g:[]}};I.filter.ID=function(Y,W){var X=typeof Y.getAttributeNode!=="undefined"&&Y.getAttributeNode("id");return Y.nodeType===1&&X&&X.nodeValue===W}}T.removeChild(U)})();(function(){var T=document.createElement("div");T.appendChild(document.createComment(""));if(T.getElementsByTagName("*").length>0){I.find.TAG=function(U,Y){var X=Y.getElementsByTagName(U[1]);if(U[1]==="*"){var W=[];for(var V=0;X[V];V++){if(X[V].nodeType===1){W.push(X[V])}}X=W}return X}}T.innerHTML="<a href='#'></a>";if(T.firstChild&&typeof T.firstChild.getAttribute!=="undefined"&&T.firstChild.getAttribute("href")!=="#"){I.attrHandle.href=function(U){return U.getAttribute("href",2)}}})();if(document.querySelectorAll){(function(){var T=F,U=document.createElement("div");U.innerHTML="<p class='TEST'></p>";if(U.querySelectorAll&&U.querySelectorAll(".TEST").length===0){return}F=function(Y,X,V,W){X=X||document;if(!W&&X.nodeType===9&&!Q(X)){try{return E(X.querySelectorAll(Y),V)}catch(Z){}}return T(Y,X,V,W)};F.find=T.find;F.filter=T.filter;F.selectors=T.selectors;F.matches=T.matches})()}if(document.getElementsByClassName&&document.documentElement.getElementsByClassName){(function(){var T=document.createElement("div");T.innerHTML="<div class='test e'></div><div class='test'></div>";if(T.getElementsByClassName("e").length===0){return}T.lastChild.className="e";if(T.getElementsByClassName("e").length===1){return}I.order.splice(1,0,"CLASS");I.find.CLASS=function(U,V,W){if(typeof V.getElementsByClassName!=="undefined"&&!W){return V.getElementsByClassName(U[1])}}})()}function P(U,Z,Y,ad,aa,ac){var ab=U=="previousSibling"&&!ac;for(var W=0,V=ad.length;W<V;W++){var T=ad[W];if(T){if(ab&&T.nodeType===1){T.sizcache=Y;T.sizset=W}T=T[U];var X=false;while(T){if(T.sizcache===Y){X=ad[T.sizset];break}if(T.nodeType===1&&!ac){T.sizcache=Y;T.sizset=W}if(T.nodeName===Z){X=T;break}T=T[U]}ad[W]=X}}}function S(U,Z,Y,ad,aa,ac){var ab=U=="previousSibling"&&!ac;for(var W=0,V=ad.length;W<V;W++){var T=ad[W];if(T){if(ab&&T.nodeType===1){T.sizcache=Y;T.sizset=W}T=T[U];var X=false;while(T){if(T.sizcache===Y){X=ad[T.sizset];break}if(T.nodeType===1){if(!ac){T.sizcache=Y;T.sizset=W}if(typeof Z!=="string"){if(T===Z){X=true;break}}else{if(F.filter(Z,[T]).length>0){X=T;break}}}T=T[U]}ad[W]=X}}}var K=document.compareDocumentPosition?function(U,T){return U.compareDocumentPosition(T)&16}:function(U,T){return U!==T&&(U.contains?U.contains(T):true)};var Q=function(T){return T.nodeType===9&&T.documentElement.nodeName!=="HTML"||!!T.ownerDocument&&Q(T.ownerDocument)};var J=function(T,aa){var W=[],X="",Y,V=aa.nodeType?[aa]:aa;while((Y=I.match.PSEUDO.exec(T))){X+=Y[0];T=T.replace(I.match.PSEUDO,"")}T=I.relative[T]?T+"*":T;for(var Z=0,U=V.length;Z<U;Z++){F(T,V[Z],W)}return F.filter(X,W)};o.find=F;o.filter=F.filter;o.expr=F.selectors;o.expr[":"]=o.expr.filters;F.selectors.filters.hidden=function(T){return T.offsetWidth===0||T.offsetHeight===0};F.selectors.filters.visible=function(T){return T.offsetWidth>0||T.offsetHeight>0};F.selectors.filters.animated=function(T){return o.grep(o.timers,function(U){return T===U.elem}).length};o.multiFilter=function(V,T,U){if(U){V=":not("+V+")"}return F.matches(V,T)};o.dir=function(V,U){var T=[],W=V[U];while(W&&W!=document){if(W.nodeType==1){T.push(W)}W=W[U]}return T};o.nth=function(X,T,V,W){T=T||1;var U=0;for(;X;X=X[V]){if(X.nodeType==1&&++U==T){break}}return X};o.sibling=function(V,U){var T=[];for(;V;V=V.nextSibling){if(V.nodeType==1&&V!=U){T.push(V)}}return T};return;l.Sizzle=F})();o.event={add:function(I,F,H,K){if(I.nodeType==3||I.nodeType==8){return}if(I.setInterval&&I!=l){I=l}if(!H.guid){H.guid=this.guid++}if(K!==g){var G=H;H=this.proxy(G);H.data=K}var E=o.data(I,"events")||o.data(I,"events",{}),J=o.data(I,"handle")||o.data(I,"handle",function(){return typeof o!=="undefined"&&!o.event.triggered?o.event.handle.apply(arguments.callee.elem,arguments):g});J.elem=I;o.each(F.split(/\s+/),function(M,N){var O=N.split(".");N=O.shift();H.type=O.slice().sort().join(".");var L=E[N];if(o.event.specialAll[N]){o.event.specialAll[N].setup.call(I,K,O)}if(!L){L=E[N]={};if(!o.event.special[N]||o.event.special[N].setup.call(I,K,O)===false){if(I.addEventListener){I.addEventListener(N,J,false)}else{if(I.attachEvent){I.attachEvent("on"+N,J)}}}}L[H.guid]=H;o.event.global[N]=true});I=null},guid:1,global:{},remove:function(K,H,J){if(K.nodeType==3||K.nodeType==8){return}var G=o.data(K,"events"),F,E;if(G){if(H===g||(typeof H==="string"&&H.charAt(0)==".")){for(var I in G){this.remove(K,I+(H||""))}}else{if(H.type){J=H.handler;H=H.type}o.each(H.split(/\s+/),function(M,O){var Q=O.split(".");O=Q.shift();var N=RegExp("(^|\\.)"+Q.slice().sort().join(".*\\.")+"(\\.|$)");if(G[O]){if(J){delete G[O][J.guid]}else{for(var P in G[O]){if(N.test(G[O][P].type)){delete G[O][P]}}}if(o.event.specialAll[O]){o.event.specialAll[O].teardown.call(K,Q)}for(F in G[O]){break}if(!F){if(!o.event.special[O]||o.event.special[O].teardown.call(K,Q)===false){if(K.removeEventListener){K.removeEventListener(O,o.data(K,"handle"),false)}else{if(K.detachEvent){K.detachEvent("on"+O,o.data(K,"handle"))}}}F=null;delete G[O]}}})}for(F in G){break}if(!F){var L=o.data(K,"handle");if(L){L.elem=null}o.removeData(K,"events");o.removeData(K,"handle")}}},trigger:function(I,K,H,E){var G=I.type||I;if(!E){I=typeof I==="object"?I[h]?I:o.extend(o.Event(G),I):o.Event(G);if(G.indexOf("!")>=0)
-{I.type=G=G.slice(0,-1);I.exclusive=true}if(!H){I.stopPropagation();if(this.global[G]){o.each(o.cache,function(){if(this.events&&this.events[G]){o.event.trigger(I,K,this.handle.elem)}})}}if(!H||H.nodeType==3||H.nodeType==8){return g}I.result=g;I.target=H;K=o.makeArray(K);K.unshift(I)}I.currentTarget=H;var J=o.data(H,"handle");if(J){J.apply(H,K)}if((!H[G]||(o.nodeName(H,"a")&&G=="click"))&&H["on"+G]&&H["on"+G].apply(H,K)===false){I.result=false}if(!E&&H[G]&&!I.isDefaultPrevented()&&!(o.nodeName(H,"a")&&G=="click")){this.triggered=true;try{H[G]()}catch(L){}}this.triggered=false;if(!I.isPropagationStopped()){var F=H.parentNode||H.ownerDocument;if(F){o.event.trigger(I,K,F,true)}}},handle:function(K){var J,E;K=arguments[0]=o.event.fix(K||l.event);K.currentTarget=this;var L=K.type.split(".");K.type=L.shift();J=!L.length&&!K.exclusive;var I=RegExp("(^|\\.)"+L.slice().sort().join(".*\\.")+"(\\.|$)");E=(o.data(this,"events")||{})[K.type];for(var G in E){var H=E[G];if(J||I.test(H.type)){K.handler=H;K.data=H.data;var F=H.apply(this,arguments);if(F!==g){K.result=F;if(F===false){K.preventDefault();K.stopPropagation()}}if(K.isImmediatePropagationStopped()){break}}}},props:"altKey attrChange attrName bubbles button cancelable charCode clientX clientY ctrlKey currentTarget data detail eventPhase fromElement handler keyCode metaKey newValue originalTarget pageX pageY prevValue relatedNode relatedTarget screenX screenY shiftKey srcElement target toElement view wheelDelta which".split(" "),fix:function(H){if(H[h]){return H}var F=H;H=o.Event(F);for(var G=this.props.length,J;G;){J=this.props[--G];H[J]=F[J]}if(!H.target){H.target=H.srcElement||document}if(H.target.nodeType==3){H.target=H.target.parentNode}if(!H.relatedTarget&&H.fromElement){H.relatedTarget=H.fromElement==H.target?H.toElement:H.fromElement}if(H.pageX==null&&H.clientX!=null){var I=document.documentElement,E=document.body;H.pageX=H.clientX+(I&&I.scrollLeft||E&&E.scrollLeft||0)-(I.clientLeft||0);H.pageY=H.clientY+(I&&I.scrollTop||E&&E.scrollTop||0)-(I.clientTop||0)}if(!H.which&&((H.charCode||H.charCode===0)?H.charCode:H.keyCode)){H.which=H.charCode||H.keyCode}if(!H.metaKey&&H.ctrlKey){H.metaKey=H.ctrlKey}if(!H.which&&H.button){H.which=(H.button&1?1:(H.button&2?3:(H.button&4?2:0)))}return H},proxy:function(F,E){E=E||function(){return F.apply(this,arguments)};E.guid=F.guid=F.guid||E.guid||this.guid++;return E},special:{ready:{setup:B,teardown:function(){}}},specialAll:{live:{setup:function(E,F){o.event.add(this,F[0],c)},teardown:function(G){if(G.length){var E=0,F=RegExp("(^|\\.)"+G[0]+"(\\.|$)");o.each((o.data(this,"events").live||{}),function(){if(F.test(this.type)){E++}});if(E<1){o.event.remove(this,G[0],c)}}}}}};o.Event=function(E){if(!this.preventDefault){return new o.Event(E)}if(E&&E.type){this.originalEvent=E;this.type=E.type}else{this.type=E}this.timeStamp=e();this[h]=true};function k(){return false}function u(){return true}o.Event.prototype={preventDefault:function(){this.isDefaultPrevented=u;var E=this.originalEvent;if(!E){return}if(E.preventDefault){E.preventDefault()}E.returnValue=false},stopPropagation:function(){this.isPropagationStopped=u;var E=this.originalEvent;if(!E){return}if(E.stopPropagation){E.stopPropagation()}E.cancelBubble=true},stopImmediatePropagation:function(){this.isImmediatePropagationStopped=u;this.stopPropagation()},isDefaultPrevented:k,isPropagationStopped:k,isImmediatePropagationStopped:k};var a=function(F){var E=F.relatedTarget;while(E&&E!=this){try{E=E.parentNode}catch(G){E=this}}if(E!=this){F.type=F.data;o.event.handle.apply(this,arguments)}};o.each({mouseover:"mouseenter",mouseout:"mouseleave"},function(F,E){o.event.special[E]={setup:function(){o.event.add(this,F,a,E)},teardown:function(){o.event.remove(this,F,a)}}});o.fn.extend({bind:function(F,G,E){return F=="unload"?this.one(F,G,E):this.each(function(){o.event.add(this,F,E||G,E&&G)})},one:function(G,H,F){var E=o.event.proxy(F||H,function(I){o(this).unbind(I,E);return(F||H).apply(this,arguments)});return this.each(function(){o.event.add(this,G,E,F&&H)})},unbind:function(F,E){return this.each(function(){o.event.remove(this,F,E)})},trigger:function(E,F){return this.each(function(){o.event.trigger(E,F,this)})},triggerHandler:function(E,G){if(this[0]){var F=o.Event(E);F.preventDefault();F.stopPropagation();o.event.trigger(F,G,this[0]);return F.result}},toggle:function(G){var E=arguments,F=1;while(F<E.length){o.event.proxy(G,E[F++])}return this.click(o.event.proxy(G,function(H){this.lastToggle=(this.lastToggle||0)%F;H.preventDefault();return E[this.lastToggle++].apply(this,arguments)||false}))},hover:function(E,F){return this.mouseenter(E).mouseleave(F)},ready:function(E){B();if(o.isReady){E.call(document,o)}else{o.readyList.push(E)}return this},live:function(G,F){var E=o.event.proxy(F);E.guid+=this.selector+G;o(document).bind(i(G,this.selector),this.selector,E);return this},die:function(F,E){o(document).unbind(i(F,this.selector),E?{guid:E.guid+this.selector+F}:null);return this}});function c(H){var E=RegExp("(^|\\.)"+H.type+"(\\.|$)"),G=true,F=[];o.each(o.data(this,"events").live||[],function(I,J){if(E.test(J.type)){var K=o(H.target).closest(J.data)[0];if(K){F.push({elem:K,fn:J})}}});F.sort(function(J,I){return o.data(J.elem,"closest")-o.data(I.elem,"closest")});o.each(F,function(){if(this.fn.call(this.elem,H,this.fn.data)===false){return(G=false)}});return G}function i(F,E){return["live",F,E.replace(/\./g,"`").replace(/ /g,"|")].join(".")}o.extend({isReady:false,readyList:[],ready:function(){if(!o.isReady){o.isReady=true;if(o.readyList){o.each(o.readyList,function(){this.call(document,o)});o.readyList=null}o(document).triggerHandler("ready")}}});var x=false;function B(){if(x){return}x=true;if(document.addEventListener){document.addEventListener("DOMContentLoaded",function(){document.removeEventListener("DOMContentLoaded",arguments.callee,false);o.ready()},false)}else{if(document.attachEvent){document.attachEvent("onreadystatechange",function(){if(document.readyState==="complete"){document.detachEvent("onreadystatechange",arguments.callee);o.ready()}});if(document.documentElement.doScroll&&l==l.top){(function(){if(o.isReady){return}try{document.documentElement.doScroll("left")}catch(E){setTimeout(arguments.callee,0);return}o.ready()})()}}}o.event.add(l,"load",o.ready)}o.each(("blur,focus,load,resize,scroll,unload,click,dblclick,mousedown,mouseup,mousemove,mouseover,mouseout,mouseenter,mouseleave,change,select,submit,keydown,keypress,keyup,error").split(","),function(F,E){o.fn[E]=function(G){return G?this.bind(E,G):this.trigger(E)}});o(l).bind("unload",function(){for(var E in o.cache){if(E!=1&&o.cache[E].handle){o.event.remove(o.cache[E].handle.elem)}}});(function(){o.support={};var F=document.documentElement,G=document.createElement("script"),K=document.createElement("div"),J="script"+(new Date).getTime();K.style.display="none";K.innerHTML='   <link/><table></table><a href="/a" style="color:red;float:left;opacity:.5;">a</a><select><option>text</option></select><object><param/></object>';var H=K.getElementsByTagName("*"),E=K.getElementsByTagName("a")[0];if(!H||!H.length||!E){return}o.support={leadingWhitespace:K.firstChild.nodeType==3,tbody:!K.getElementsByTagName("tbody").length,objectAll:!!K.getElementsByTagName("object")[0].getElementsByTagName("*").length,htmlSerialize:!!K.getElementsByTagName("link").length,style:/red/.test(E.getAttribute("style")),hrefNormalized:E.getAttribute("href")==="/a",opacity:E.style.opacity==="0.5",cssFloat:!!E.style.cssFloat,scriptEval:false,noCloneEvent:true,boxModel:null};G.type="text/javascript";try{G.appendChild(document.createTextNode("window."+J+"=1;"))}catch(I){}F.insertBefore(G,F.firstChild);if(l[J]){o.support.scriptEval=true;delete l[J]}F.removeChild(G);if(K.attachEvent&&K.fireEvent){K.attachEvent("onclick",function(){o.support.noCloneEvent=false;K.detachEvent("onclick",arguments.callee)});K.cloneNode(true).fireEvent("onclick")}o(function(){var L=document.createElement("div");L.style.width=L.style.paddingLeft="1px";document.body.appendChild(L);o.boxModel=o.support.boxModel=L.offsetWidth===2;document.body.removeChild(L).style.display="none"})})();var w=o.support.cssFloat?"cssFloat":"styleFloat";o.props={"for":"htmlFor","class":"className","float":w,cssFloat:w,styleFloat:w,readonly:"readOnly",maxlength:"maxLength",cellspacing:"cellSpacing",rowspan:"rowSpan",tabindex:"tabIndex"};o.fn.extend({_load:o.fn.load,load:function(G,J,K){if(typeof G!=="string"){return this._load(G)}var I=G.indexOf(" ");if(I>=0){var E=G.slice(I,G.length);G=G.slice(0,I)}var H="GET";if(J){if(o.isFunction(J)){K=J;J=null}else{if(typeof J==="object"){J=o.param(J);H="POST"}}}var F=this;o.ajax({url:G,type:H,dataType:"html",data:J,complete:function(M,L){if(L=="success"||L=="notmodified"){F.html(E?o("<div/>").append(M.responseText.replace(/<script(.|\s)*?\/script>/g,"")).find(E):M.responseText)}if(K){F.each(K,[M.responseText,L,M])}}});return this},serialize:function(){return o.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?o.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||/select|textarea/i.test(this.nodeName)||/text|hidden|password|search/i.test(this.type))}).map(function(E,F){var G=o(this).val();return G==null?null:o.isArray(G)?o.map(G,function(I,H){return{name:F.name,value:I}}):{name:F.name,value:G}}).get()}});o.each("ajaxStart,ajaxStop,ajaxComplete,ajaxError,ajaxSuccess,ajaxSend".split(","),function(E,F){o.fn[F]=function(G){return this.bind(F,G)}});var r=e();o.extend({get:function(E,G,H,F){if(o.isFunction(G)){H=G;G=null}return o.ajax({type:"GET",url:E,data:G,success:H,dataType:F})},getScript:function(E,F){return o.get(E,null,F,"script")},getJSON:function(E,F,G){return o.get(E,F,G,"json")},post:function(E,G,H,F){if(o.isFunction(G)){H=G;G={}}return o.ajax({type:"POST",url:E,data:G,success:H,dataType:F})},ajaxSetup:function(E){o.extend(o.ajaxSettings,E)},ajaxSettings:{url:location.href,global:true,type:"GET",contentType:"application/x-www-form-urlencoded",processData:true,async:true,xhr:function(){return l.ActiveXObject?new ActiveXObject("Microsoft.XMLHTTP"):new XMLHttpRequest()},accepts:{xml:"application/xml, text/xml",html:"text/html",script:"text/javascript, application/javascript",json:"application/json, text/javascript",text:"text/plain",_default:"*/*"}},lastModified:{},ajax:function(M){M=o.extend(true,M,o.extend(true,{},o.ajaxSettings,M));var W,F=/=\?(&|$)/g,R,V,G=M.type.toUpperCase();if(M.data&&M.processData&&typeof M.data!=="string"){M.data=o.param(M.data)}if(M.dataType=="jsonp"){if(G=="GET"){if(!M.url.match(F)){M.url+=(M.url.match(/\?/)?"&":"?")+(M.jsonp||"callback")+"=?"}}else{if(!M.data||!M.data.match(F)){M.data=(M.data?M.data+"&":"")+(M.jsonp||"callback")+"=?"}}M.dataType="json"}if(M.dataType=="json"&&(M.data&&M.data.match(F)||M.url.match(F))){W="jsonp"+r++;if(M.data){M.data=(M.data+"").replace(F,"="+W+"$1")}M.url=M.url.replace(F,"="+W+"$1");M.dataType="script";l[W]=function(X){V=X;I();L();l[W]=g;try{delete l[W]}catch(Y){}if(H){H.removeChild(T)}}}if(M.dataType=="script"&&M.cache==null){M.cache=false}if(M.cache===false&&G=="GET"){var E=e();var U=M.url.replace(/(\?|&)_=.*?(&|$)/,"$1_="+E+"$2");M.url=U+((U==M.url)?(M.url.match(/\?/)?"&":"?")+"_="+E:"")}if(M.data&&G=="GET"){M.url+=(M.url.match(/\?/)?"&":"?")+M.data;M.data=null}if(M.global&&!o.active++){o.event.trigger("ajaxStart")}var Q=/^(\w+:)?\/\/([^\/?#]+)/.exec(M.url);if(M.dataType=="script"&&G=="GET"&&Q&&(Q[1]&&Q[1]!=location.protocol||Q[2]!=location.host)){var H=document.getElementsByTagName("head")[0];var T=document.createElement("script");T.src=M.url;if(M.scriptCharset){T.charset=M.scriptCharset}if(!W){var O=false;T.onload=T.onreadystatechange=function(){if(!O&&(!this.readyState||this.readyState=="loaded"||this.readyState=="complete")){O=true;I();L();T.onload=T.onreadystatechange=null;H.removeChild(T)}}}H.appendChild(T);return g}var K=false;var J=M.xhr();if(M.username){J.open(G,M.url,M.async,M.username,M.password)}else{J.open(G,M.url,M.async)}try{if(M.data){J.setRequestHeader("Content-Type",M.contentType)}if(M.ifModified){J.setRequestHeader("If-Modified-Since",o.lastModified[M.url]||"Thu, 01 Jan 1970 00:00:00 GMT")}J.setRequestHeader("X-Requested-With","XMLHttpRequest");J.setRequestHeader("Accept",M.dataType&&M.accepts[M.dataType]?M.accepts[M.dataType]+", */*":M.accepts._default)}catch(S){}if(M.beforeSend&&M.beforeSend(J,M)===false){if(M.global&&!--o.active){o.event.trigger("ajaxStop")}J.abort();return false}if(M.global){o.event.trigger("ajaxSend",[J,M])}var N=function(X){if(J.readyState==0){if(P){clearInterval(P);P=null;if(M.global&&!--o.active){o.event.trigger("ajaxStop")}}}else{if(!K&&J&&(J.readyState==4||X=="timeout")){K=true;if(P){clearInterval(P);P=null}R=X=="timeout"?"timeout":!o.httpSuccess(J)?"error":M.ifModified&&o.httpNotModified(J,M.url)?"notmodified":"success";if(R=="success"){try{V=o.httpData(J,M.dataType,M)}catch(Z){R="parsererror"}}if(R=="success"){var Y;try{Y=J.getResponseHeader("Last-Modified")}catch(Z){}if(M.ifModified&&Y){o.lastModified[M.url]=Y}if(!W){I()}}else{o.handleError(M,J,R)}L();if(X){J.abort()}if(M.async){J=null}}}};if(M.async){var P=setInterval(N,13);if(M.timeout>0){setTimeout(function(){if(J&&!K){N("timeout")}},M.timeout)}}try{J.send(M.data)}catch(S){o.handleError(M,J,null,S)}if(!M.async){N()}function I(){if(M.success){M.success(V,R)}if(M.global){o.event.trigger("ajaxSuccess",[J,M])}}function L(){if(M.complete){M.complete(J,R)}if(M.global){o.event.trigger("ajaxComplete",[J,M])}if(M.global&&!--o.active){o.event.trigger("ajaxStop")}}return J},handleError:function(F,H,E,G){if(F.error){F.error(H,E,G)}if(F.global){o.event.trigger("ajaxError",[H,F,G])}},active:0,httpSuccess:function(F){try{return !F.status&&location.protocol=="file:"||(F.status>=200&&F.status<300)||F.status==304||F.status==1223}catch(E){}return false},httpNotModified:function(G,E){try{var H=G.getResponseHeader("Last-Modified");return G.status==304||H==o.lastModified[E]}catch(F){}return false},httpData:function(J,H,G){var F=J.getResponseHeader("content-type"),E=H=="xml"||!H&&F&&F.indexOf("xml")>=0,I=E?J.responseXML:J.responseText;if(E&&I.documentElement.tagName=="parsererror"){throw"parsererror"}if(G&&G.dataFilter){I=G.dataFilter(I,H)}if(typeof I==="string"){if(H=="script"){o.globalEval(I)}if(H=="json"){I=l["eval"]("("+I+")")}}return I},param:function(E){var G=[];function H(I,J){G[G.length]=encodeURIComponent(I)+"="+encodeURIComponent(J)}if(o.isArray(E)||E.jquery){o.each(E,function(){H(this.name,this.value)})}else{for(var F in E){if(o.isArray(E[F])){o.each(E[F],function(){H(F,this)})}else{H(F,o.isFunction(E[F])?E[F]():E[F])}}}return G.join("&").replace(/%20/g,"+")}});var m={},n,d=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]];function t(F,E){var G={};o.each(d.concat.apply([],d.slice(0,E)),function()
-{G[this]=F});return G}o.fn.extend({show:function(J,L){if(J){return this.animate(t("show",3),J,L)}else{for(var H=0,F=this.length;H<F;H++){var E=o.data(this[H],"olddisplay");this[H].style.display=E||"";if(o.css(this[H],"display")==="none"){var G=this[H].tagName,K;if(m[G]){K=m[G]}else{var I=o("<"+G+" />").appendTo("body");K=I.css("display");if(K==="none"){K="block"}I.remove();m[G]=K}o.data(this[H],"olddisplay",K)}}for(var H=0,F=this.length;H<F;H++){this[H].style.display=o.data(this[H],"olddisplay")||""}return this}},hide:function(H,I){if(H){return this.animate(t("hide",3),H,I)}else{for(var G=0,F=this.length;G<F;G++){var E=o.data(this[G],"olddisplay");if(!E&&E!=="none"){o.data(this[G],"olddisplay",o.css(this[G],"display"))}}for(var G=0,F=this.length;G<F;G++){this[G].style.display="none"}return this}},_toggle:o.fn.toggle,toggle:function(G,F){var E=typeof G==="boolean";return o.isFunction(G)&&o.isFunction(F)?this._toggle.apply(this,arguments):G==null||E?this.each(function(){var H=E?G:o(this).is(":hidden");o(this)[H?"show":"hide"]()}):this.animate(t("toggle",3),G,F)},fadeTo:function(E,G,F){return this.animate({opacity:G},E,F)},animate:function(I,F,H,G){var E=o.speed(F,H,G);return this[E.queue===false?"each":"queue"](function(){var K=o.extend({},E),M,L=this.nodeType==1&&o(this).is(":hidden"),J=this;for(M in I){if(I[M]=="hide"&&L||I[M]=="show"&&!L){return K.complete.call(this)}if((M=="height"||M=="width")&&this.style){K.display=o.css(this,"display");K.overflow=this.style.overflow}}if(K.overflow!=null){this.style.overflow="hidden"}K.curAnim=o.extend({},I);o.each(I,function(O,S){var R=new o.fx(J,K,O);if(/toggle|show|hide/.test(S)){R[S=="toggle"?L?"show":"hide":S](I)}else{var Q=S.toString().match(/^([+-]=)?([\d+-.]+)(.*)$/),T=R.cur(true)||0;if(Q){var N=parseFloat(Q[2]),P=Q[3]||"px";if(P!="px"){J.style[O]=(N||1)+P;T=((N||1)/R.cur(true))*T;J.style[O]=T+P}if(Q[1]){N=((Q[1]=="-="?-1:1)*N)+T}R.custom(T,N,P)}else{R.custom(T,S,"")}}});return true})},stop:function(F,E){var G=o.timers;if(F){this.queue([])}this.each(function(){for(var H=G.length-1;H>=0;H--){if(G[H].elem==this){if(E){G[H](true)}G.splice(H,1)}}});if(!E){this.dequeue()}return this}});o.each({slideDown:t("show",1),slideUp:t("hide",1),slideToggle:t("toggle",1),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"}},function(E,F){o.fn[E]=function(G,H){return this.animate(F,G,H)}});o.extend({speed:function(G,H,F){var E=typeof G==="object"?G:{complete:F||!F&&H||o.isFunction(G)&&G,duration:G,easing:F&&H||H&&!o.isFunction(H)&&H};E.duration=o.fx.off?0:typeof E.duration==="number"?E.duration:o.fx.speeds[E.duration]||o.fx.speeds._default;E.old=E.complete;E.complete=function(){if(E.queue!==false){o(this).dequeue()}if(o.isFunction(E.old)){E.old.call(this)}};return E},easing:{linear:function(G,H,E,F){return E+F*G},swing:function(G,H,E,F){return((-Math.cos(G*Math.PI)/2)+0.5)*F+E}},timers:[],fx:function(F,E,G){this.options=E;this.elem=F;this.prop=G;if(!E.orig){E.orig={}}}});o.fx.prototype={update:function(){if(this.options.step){this.options.step.call(this.elem,this.now,this)}(o.fx.step[this.prop]||o.fx.step._default)(this);if((this.prop=="height"||this.prop=="width")&&this.elem.style){this.elem.style.display="block"}},cur:function(F){if(this.elem[this.prop]!=null&&(!this.elem.style||this.elem.style[this.prop]==null)){return this.elem[this.prop]}var E=parseFloat(o.css(this.elem,this.prop,F));return E&&E>-10000?E:parseFloat(o.curCSS(this.elem,this.prop))||0},custom:function(I,H,G){this.startTime=e();this.start=I;this.end=H;this.unit=G||this.unit||"px";this.now=this.start;this.pos=this.state=0;var E=this;function F(J){return E.step(J)}F.elem=this.elem;if(F()&&o.timers.push(F)&&!n){n=setInterval(function(){var K=o.timers;for(var J=0;J<K.length;J++){if(!K[J]()){K.splice(J--,1)}}if(!K.length){clearInterval(n);n=g}},13)}},show:function(){this.options.orig[this.prop]=o.attr(this.elem.style,this.prop);this.options.show=true;this.custom(this.prop=="width"||this.prop=="height"?1:0,this.cur());o(this.elem).show()},hide:function(){this.options.orig[this.prop]=o.attr(this.elem.style,this.prop);this.options.hide=true;this.custom(this.cur(),0)},step:function(H){var G=e();if(H||G>=this.options.duration+this.startTime){this.now=this.end;this.pos=this.state=1;this.update();this.options.curAnim[this.prop]=true;var E=true;for(var F in this.options.curAnim){if(this.options.curAnim[F]!==true){E=false}}if(E){if(this.options.display!=null){this.elem.style.overflow=this.options.overflow;this.elem.style.display=this.options.display;if(o.css(this.elem,"display")=="none"){this.elem.style.display="block"}}if(this.options.hide){o(this.elem).hide()}if(this.options.hide||this.options.show){for(var I in this.options.curAnim){o.attr(this.elem.style,I,this.options.orig[I])}}this.options.complete.call(this.elem)}return false}else{var J=G-this.startTime;this.state=J/this.options.duration;this.pos=o.easing[this.options.easing||(o.easing.swing?"swing":"linear")](this.state,J,0,1,this.options.duration);this.now=this.start+((this.end-this.start)*this.pos);this.update()}return true}};o.extend(o.fx,{speeds:{slow:600,fast:200,_default:400},step:{opacity:function(E){o.attr(E.elem.style,"opacity",E.now)},_default:function(E){if(E.elem.style&&E.elem.style[E.prop]!=null){E.elem.style[E.prop]=E.now+E.unit}else{E.elem[E.prop]=E.now}}}});if(document.documentElement.getBoundingClientRect){o.fn.offset=function(){if(!this[0]){return{top:0,left:0}}if(this[0]===this[0].ownerDocument.body){return o.offset.bodyOffset(this[0])}var G=this[0].getBoundingClientRect(),J=this[0].ownerDocument,F=J.body,E=J.documentElement,L=E.clientTop||F.clientTop||0,K=E.clientLeft||F.clientLeft||0,I=G.top+(self.pageYOffset||o.boxModel&&E.scrollTop||F.scrollTop)-L,H=G.left+(self.pageXOffset||o.boxModel&&E.scrollLeft||F.scrollLeft)-K;return{top:I,left:H}}}else{o.fn.offset=function(){if(!this[0]){return{top:0,left:0}}if(this[0]===this[0].ownerDocument.body){return o.offset.bodyOffset(this[0])}o.offset.initialized||o.offset.initialize();var J=this[0],G=J.offsetParent,F=J,O=J.ownerDocument,M,H=O.documentElement,K=O.body,L=O.defaultView,E=L.getComputedStyle(J,null),N=J.offsetTop,I=J.offsetLeft;while((J=J.parentNode)&&J!==K&&J!==H){M=L.getComputedStyle(J,null);N-=J.scrollTop,I-=J.scrollLeft;if(J===G){N+=J.offsetTop,I+=J.offsetLeft;if(o.offset.doesNotAddBorder&&!(o.offset.doesAddBorderForTableAndCells&&/^t(able|d|h)$/i.test(J.tagName))){N+=parseInt(M.borderTopWidth,10)||0,I+=parseInt(M.borderLeftWidth,10)||0}F=G,G=J.offsetParent}if(o.offset.subtractsBorderForOverflowNotVisible&&M.overflow!=="visible"){N+=parseInt(M.borderTopWidth,10)||0,I+=parseInt(M.borderLeftWidth,10)||0}E=M}if(E.position==="relative"||E.position==="static"){N+=K.offsetTop,I+=K.offsetLeft}if(E.position==="fixed"){N+=Math.max(H.scrollTop,K.scrollTop),I+=Math.max(H.scrollLeft,K.scrollLeft)}return{top:N,left:I}}}o.offset={initialize:function(){if(this.initialized){return}var L=document.body,F=document.createElement("div"),H,G,N,I,M,E,J=L.style.marginTop,K='<div style="position:absolute;top:0;left:0;margin:0;border:5px solid #000;padding:0;width:1px;height:1px;"><div></div></div><table style="position:absolute;top:0;left:0;margin:0;border:5px solid #000;padding:0;width:1px;height:1px;" cellpadding="0" cellspacing="0"><tr><td></td></tr></table>';M={position:"absolute",top:0,left:0,margin:0,border:0,width:"1px",height:"1px",visibility:"hidden"};for(E in M){F.style[E]=M[E]}F.innerHTML=K;L.insertBefore(F,L.firstChild);H=F.firstChild,G=H.firstChild,I=H.nextSibling.firstChild.firstChild;this.doesNotAddBorder=(G.offsetTop!==5);this.doesAddBorderForTableAndCells=(I.offsetTop===5);H.style.overflow="hidden",H.style.position="relative";this.subtractsBorderForOverflowNotVisible=(G.offsetTop===-5);L.style.marginTop="1px";this.doesNotIncludeMarginInBodyOffset=(L.offsetTop===0);L.style.marginTop=J;L.removeChild(F);this.initialized=true},bodyOffset:function(E){o.offset.initialized||o.offset.initialize();var G=E.offsetTop,F=E.offsetLeft;if(o.offset.doesNotIncludeMarginInBodyOffset){G+=parseInt(o.curCSS(E,"marginTop",true),10)||0,F+=parseInt(o.curCSS(E,"marginLeft",true),10)||0}return{top:G,left:F}}};o.fn.extend({position:function(){var I=0,H=0,F;if(this[0]){var G=this.offsetParent(),J=this.offset(),E=/^body|html$/i.test(G[0].tagName)?{top:0,left:0}:G.offset();J.top-=j(this,"marginTop");J.left-=j(this,"marginLeft");E.top+=j(G,"borderTopWidth");E.left+=j(G,"borderLeftWidth");F={top:J.top-E.top,left:J.left-E.left}}return F},offsetParent:function(){var E=this[0].offsetParent||document.body;while(E&&(!/^body|html$/i.test(E.tagName)&&o.css(E,"position")=="static")){E=E.offsetParent}return o(E)}});o.each(["Left","Top"],function(F,E){var G="scroll"+E;o.fn[G]=function(H){if(!this[0]){return null}return H!==g?this.each(function(){this==l||this==document?l.scrollTo(!F?H:o(l).scrollLeft(),F?H:o(l).scrollTop()):this[G]=H}):this[0]==l||this[0]==document?self[F?"pageYOffset":"pageXOffset"]||o.boxModel&&document.documentElement[G]||document.body[G]:this[0][G]}});o.each(["Height","Width"],function(I,G){var E=I?"Left":"Top",H=I?"Right":"Bottom",F=G.toLowerCase();o.fn["inner"+G]=function(){return this[0]?o.css(this[0],F,false,"padding"):null};o.fn["outer"+G]=function(K){return this[0]?o.css(this[0],F,false,K?"margin":"border"):null};var J=G.toLowerCase();o.fn[J]=function(K){return this[0]==l?document.compatMode=="CSS1Compat"&&document.documentElement["client"+G]||document.body["client"+G]:this[0]==document?Math.max(document.documentElement["client"+G],document.body["scroll"+G],document.documentElement["scroll"+G],document.body["offset"+G],document.documentElement["offset"+G]):K===g?(this.length?o.css(this[0],J):null):this.css(J,typeof K==="string"?K:K+"px")}})})();
-/*
- * jQuery UI 1.7.2
- *
- * Copyright (c) 2009 AUTHORS.txt (http://jqueryui.com/about)
- * Dual licensed under the MIT (MIT-LICENSE.txt)
- * and GPL (GPL-LICENSE.txt) licenses.
- *
- * http://docs.jquery.com/UI
- */
-jQuery.ui||(function(c){var i=c.fn.remove,d=c.browser.mozilla&&(parseFloat(c.browser.version)<1.9);c.ui={version:"1.7.2",plugin:{add:function(k,l,n){var m=c.ui[k].prototype;for(var j in n){m.plugins[j]=m.plugins[j]||[];m.plugins[j].push([l,n[j]])}},call:function(j,l,k){var n=j.plugins[l];if(!n||!j.element[0].parentNode){return}for(var m=0;m<n.length;m++){if(j.options[n[m][0]]){n[m][1].apply(j.element,k)}}}},contains:function(k,j){return document.compareDocumentPosition?k.compareDocumentPosition(j)&16:k!==j&&k.contains(j)},hasScroll:function(m,k){if(c(m).css("overflow")=="hidden"){return false}var j=(k&&k=="left")?"scrollLeft":"scrollTop",l=false;if(m[j]>0){return true}m[j]=1;l=(m[j]>0);m[j]=0;return l},isOverAxis:function(k,j,l){return(k>j)&&(k<(j+l))},isOver:function(o,k,n,m,j,l){return c.ui.isOverAxis(o,n,j)&&c.ui.isOverAxis(k,m,l)},keyCode:{BACKSPACE:8,CAPS_LOCK:20,COMMA:188,CONTROL:17,DELETE:46,DOWN:40,END:35,ENTER:13,ESCAPE:27,HOME:36,INSERT:45,LEFT:37,NUMPAD_ADD:107,NUMPAD_DECIMAL:110,NUMPAD_DIVIDE:111,NUMPAD_ENTER:108,NUMPAD_MULTIPLY:106,NUMPAD_SUBTRACT:109,PAGE_DOWN:34,PAGE_UP:33,PERIOD:190,RIGHT:39,SHIFT:16,SPACE:32,TAB:9,UP:38}};if(d){var f=c.attr,e=c.fn.removeAttr,h="http://www.w3.org/2005/07/aaa",a=/^aria-/,b=/^wairole:/;c.attr=function(k,j,l){var m=l!==undefined;return(j=="role"?(m?f.call(this,k,j,"wairole:"+l):(f.apply(this,arguments)||"").replace(b,"")):(a.test(j)?(m?k.setAttributeNS(h,j.replace(a,"aaa:"),l):f.call(this,k,j.replace(a,"aaa:"))):f.apply(this,arguments)))};c.fn.removeAttr=function(j){return(a.test(j)?this.each(function(){this.removeAttributeNS(h,j.replace(a,""))}):e.call(this,j))}}c.fn.extend({remove:function(){c("*",this).add(this).each(function(){c(this).triggerHandler("remove")});return i.apply(this,arguments)},enableSelection:function(){return this.attr("unselectable","off").css("MozUserSelect","").unbind("selectstart.ui")},disableSelection:function(){return this.attr("unselectable","on").css("MozUserSelect","none").bind("selectstart.ui",function(){return false})},scrollParent:function(){var j;if((c.browser.msie&&(/(static|relative)/).test(this.css("position")))||(/absolute/).test(this.css("position"))){j=this.parents().filter(function(){return(/(relative|absolute|fixed)/).test(c.curCSS(this,"position",1))&&(/(auto|scroll)/).test(c.curCSS(this,"overflow",1)+c.curCSS(this,"overflow-y",1)+c.curCSS(this,"overflow-x",1))}).eq(0)}else{j=this.parents().filter(function(){return(/(auto|scroll)/).test(c.curCSS(this,"overflow",1)+c.curCSS(this,"overflow-y",1)+c.curCSS(this,"overflow-x",1))}).eq(0)}return(/fixed/).test(this.css("position"))||!j.length?c(document):j}});c.extend(c.expr[":"],{data:function(l,k,j){return !!c.data(l,j[3])},focusable:function(k){var l=k.nodeName.toLowerCase(),j=c.attr(k,"tabindex");return(/input|select|textarea|button|object/.test(l)?!k.disabled:"a"==l||"area"==l?k.href||!isNaN(j):!isNaN(j))&&!c(k)["area"==l?"parents":"closest"](":hidden").length},tabbable:function(k){var j=c.attr(k,"tabindex");return(isNaN(j)||j>=0)&&c(k).is(":focusable")}});function g(m,n,o,l){function k(q){var p=c[m][n][q]||[];return(typeof p=="string"?p.split(/,?\s+/):p)}var j=k("getter");if(l.length==1&&typeof l[0]=="string"){j=j.concat(k("getterSetter"))}return(c.inArray(o,j)!=-1)}c.widget=function(k,j){var l=k.split(".")[0];k=k.split(".")[1];c.fn[k]=function(p){var n=(typeof p=="string"),o=Array.prototype.slice.call(arguments,1);if(n&&p.substring(0,1)=="_"){return this}if(n&&g(l,k,p,o)){var m=c.data(this[0],k);return(m?m[p].apply(m,o):undefined)}return this.each(function(){var q=c.data(this,k);(!q&&!n&&c.data(this,k,new c[l][k](this,p))._init());(q&&n&&c.isFunction(q[p])&&q[p].apply(q,o))})};c[l]=c[l]||{};c[l][k]=function(o,n){var m=this;this.namespace=l;this.widgetName=k;this.widgetEventPrefix=c[l][k].eventPrefix||k;this.widgetBaseClass=l+"-"+k;this.options=c.extend({},c.widget.defaults,c[l][k].defaults,c.metadata&&c.metadata.get(o)[k],n);this.element=c(o).bind("setData."+k,function(q,p,r){if(q.target==o){return m._setData(p,r)}}).bind("getData."+k,function(q,p){if(q.target==o){return m._getData(p)}}).bind("remove",function(){return m.destroy()})};c[l][k].prototype=c.extend({},c.widget.prototype,j);c[l][k].getterSetter="option"};c.widget.prototype={_init:function(){},destroy:function(){this.element.removeData(this.widgetName).removeClass(this.widgetBaseClass+"-disabled "+this.namespace+"-state-disabled").removeAttr("aria-disabled")},option:function(l,m){var k=l,j=this;if(typeof l=="string"){if(m===undefined){return this._getData(l)}k={};k[l]=m}c.each(k,function(n,o){j._setData(n,o)})},_getData:function(j){return this.options[j]},_setData:function(j,k){this.options[j]=k;if(j=="disabled"){this.element[k?"addClass":"removeClass"](this.widgetBaseClass+"-disabled "+this.namespace+"-state-disabled").attr("aria-disabled",k)}},enable:function(){this._setData("disabled",false)},disable:function(){this._setData("disabled",true)},_trigger:function(l,m,n){var p=this.options[l],j=(l==this.widgetEventPrefix?l:this.widgetEventPrefix+l);m=c.Event(m);m.type=j;if(m.originalEvent){for(var k=c.event.props.length,o;k;){o=c.event.props[--k];m[o]=m.originalEvent[o]}}this.element.trigger(m,n);return !(c.isFunction(p)&&p.call(this.element[0],m,n)===false||m.isDefaultPrevented())}};c.widget.defaults={disabled:false};c.ui.mouse={_mouseInit:function(){var j=this;this.element.bind("mousedown."+this.widgetName,function(k){return j._mouseDown(k)}).bind("click."+this.widgetName,function(k){if(j._preventClickEvent){j._preventClickEvent=false;k.stopImmediatePropagation();return false}});if(c.browser.msie){this._mouseUnselectable=this.element.attr("unselectable");this.element.attr("unselectable","on")}this.started=false},_mouseDestroy:function(){this.element.unbind("."+this.widgetName);(c.browser.msie&&this.element.attr("unselectable",this._mouseUnselectable))},_mouseDown:function(l){l.originalEvent=l.originalEvent||{};if(l.originalEvent.mouseHandled){return}(this._mouseStarted&&this._mouseUp(l));this._mouseDownEvent=l;var k=this,m=(l.which==1),j=(typeof this.options.cancel=="string"?c(l.target).parents().add(l.target).filter(this.options.cancel).length:false);if(!m||j||!this._mouseCapture(l)){return true}this.mouseDelayMet=!this.options.delay;if(!this.mouseDelayMet){this._mouseDelayTimer=setTimeout(function(){k.mouseDelayMet=true},this.options.delay)}if(this._mouseDistanceMet(l)&&this._mouseDelayMet(l)){this._mouseStarted=(this._mouseStart(l)!==false);if(!this._mouseStarted){l.preventDefault();return true}}this._mouseMoveDelegate=function(n){return k._mouseMove(n)};this._mouseUpDelegate=function(n){return k._mouseUp(n)};c(document).bind("mousemove."+this.widgetName,this._mouseMoveDelegate).bind("mouseup."+this.widgetName,this._mouseUpDelegate);(c.browser.safari||l.preventDefault());l.originalEvent.mouseHandled=true;return true},_mouseMove:function(j){if(c.browser.msie&&!j.button){return this._mouseUp(j)}if(this._mouseStarted){this._mouseDrag(j);return j.preventDefault()}if(this._mouseDistanceMet(j)&&this._mouseDelayMet(j)){this._mouseStarted=(this._mouseStart(this._mouseDownEvent,j)!==false);(this._mouseStarted?this._mouseDrag(j):this._mouseUp(j))}return !this._mouseStarted},_mouseUp:function(j){c(document).unbind("mousemove."+this.widgetName,this._mouseMoveDelegate).unbind("mouseup."+this.widgetName,this._mouseUpDelegate);if(this._mouseStarted){this._mouseStarted=false;this._preventClickEvent=(j.target==this._mouseDownEvent.target);this._mouseStop(j)}return false},_mouseDistanceMet:function(j){return(Math.max(Math.abs(this._mouseDownEvent.pageX-j.pageX),Math.abs(this._mouseDownEvent.pageY-j.pageY))>=this.options.distance)},_mouseDelayMet:function(j){return this.mouseDelayMet},_mouseStart:function(j){},_mouseDrag:function(j){},_mouseStop:function(j){},_mouseCapture:function(j){return true}};c.ui.mouse.defaults={cancel:null,distance:1,delay:0}})(jQuery);;/* * jQuery UI Resizable 1.7.2
- *
- * Copyright (c) 2009 AUTHORS.txt (http://jqueryui.com/about)
- * Dual licensed under the MIT (MIT-LICENSE.txt)
- * and GPL (GPL-LICENSE.txt) licenses.
- *
- * http://docs.jquery.com/UI/Resizables
- *
- * Depends:
- *	ui.core.js
- */
-(function(c){c.widget("ui.resizable",c.extend({},c.ui.mouse,{_init:function(){var e=this,j=this.options;this.element.addClass("ui-resizable");c.extend(this,{_aspectRatio:!!(j.aspectRatio),aspectRatio:j.aspectRatio,originalElement:this.element,_proportionallyResizeElements:[],_helper:j.helper||j.ghost||j.animate?j.helper||"ui-resizable-helper":null});if(this.element[0].nodeName.match(/canvas|textarea|input|select|button|img/i)){if(/relative/.test(this.element.css("position"))&&c.browser.opera){this.element.css({position:"relative",top:"auto",left:"auto"})}this.element.wrap(c('<div class="ui-wrapper" style="overflow: hidden;"></div>').css({position:this.element.css("position"),width:this.element.outerWidth(),height:this.element.outerHeight(),top:this.element.css("top"),left:this.element.css("left")}));this.element=this.element.parent().data("resizable",this.element.data("resizable"));this.elementIsWrapper=true;this.element.css({marginLeft:this.originalElement.css("marginLeft"),marginTop:this.originalElement.css("marginTop"),marginRight:this.originalElement.css("marginRight"),marginBottom:this.originalElement.css("marginBottom")});this.originalElement.css({marginLeft:0,marginTop:0,marginRight:0,marginBottom:0});this.originalResizeStyle=this.originalElement.css("resize");this.originalElement.css("resize","none");this._proportionallyResizeElements.push(this.originalElement.css({position:"static",zoom:1,display:"block"}));this.originalElement.css({margin:this.originalElement.css("margin")});this._proportionallyResize()}this.handles=j.handles||(!c(".ui-resizable-handle",this.element).length?"e,s,se":{n:".ui-resizable-n",e:".ui-resizable-e",s:".ui-resizable-s",w:".ui-resizable-w",se:".ui-resizable-se",sw:".ui-resizable-sw",ne:".ui-resizable-ne",nw:".ui-resizable-nw"});if(this.handles.constructor==String){if(this.handles=="all"){this.handles="n,e,s,w,se,sw,ne,nw"}var k=this.handles.split(",");this.handles={};for(var f=0;f<k.length;f++){var h=c.trim(k[f]),d="ui-resizable-"+h;var g=c('<div class="ui-resizable-handle '+d+'"></div>');if(/sw|se|ne|nw/.test(h)){g.css({zIndex:++j.zIndex})}if("se"==h){g.addClass("ui-icon ui-icon-gripsmall-diagonal-se")}this.handles[h]=".ui-resizable-"+h;this.element.append(g)}}this._renderAxis=function(p){p=p||this.element;for(var m in this.handles){if(this.handles[m].constructor==String){this.handles[m]=c(this.handles[m],this.element).show()}if(this.elementIsWrapper&&this.originalElement[0].nodeName.match(/textarea|input|select|button/i)){var n=c(this.handles[m],this.element),o=0;o=/sw|ne|nw|se|n|s/.test(m)?n.outerHeight():n.outerWidth();var l=["padding",/ne|nw|n/.test(m)?"Top":/se|sw|s/.test(m)?"Bottom":/^e$/.test(m)?"Right":"Left"].join("");p.css(l,o);this._proportionallyResize()}if(!c(this.handles[m]).length){continue}}};this._renderAxis(this.element);this._handles=c(".ui-resizable-handle",this.element).disableSelection();this._handles.mouseover(function(){if(!e.resizing){if(this.className){var i=this.className.match(/ui-resizable-(se|sw|ne|nw|n|e|s|w)/i)}e.axis=i&&i[1]?i[1]:"se"}});if(j.autoHide){this._handles.hide();c(this.element).addClass("ui-resizable-autohide").hover(function(){c(this).removeClass("ui-resizable-autohide");e._handles.show()},function(){if(!e.resizing){c(this).addClass("ui-resizable-autohide");e._handles.hide()}})}this._mouseInit()},destroy:function(){this._mouseDestroy();var d=function(f){c(f).removeClass("ui-resizable ui-resizable-disabled ui-resizable-resizing").removeData("resizable").unbind(".resizable").find(".ui-resizable-handle").remove()};if(this.elementIsWrapper){d(this.element);var e=this.element;e.parent().append(this.originalElement.css({position:e.css("position"),width:e.outerWidth(),height:e.outerHeight(),top:e.css("top"),left:e.css("left")})).end().remove()}this.originalElement.css("resize",this.originalResizeStyle);d(this.originalElement)},_mouseCapture:function(e){var f=false;for(var d in this.handles){if(c(this.handles[d])[0]==e.target){f=true}}return this.options.disabled||!!f},_mouseStart:function(f){var i=this.options,e=this.element.position(),d=this.element;this.resizing=true;this.documentScroll={top:c(document).scrollTop(),left:c(document).scrollLeft()};if(d.is(".ui-draggable")||(/absolute/).test(d.css("position"))){d.css({position:"absolute",top:e.top,left:e.left})}if(c.browser.opera&&(/relative/).test(d.css("position"))){d.css({position:"relative",top:"auto",left:"auto"})}this._renderProxy();var j=b(this.helper.css("left")),g=b(this.helper.css("top"));if(i.containment){j+=c(i.containment).scrollLeft()||0;g+=c(i.containment).scrollTop()||0}this.offset=this.helper.offset();this.position={left:j,top:g};this.size=this._helper?{width:d.outerWidth(),height:d.outerHeight()}:{width:d.width(),height:d.height()};this.originalSize=this._helper?{width:d.outerWidth(),height:d.outerHeight()}:{width:d.width(),height:d.height()};this.originalPosition={left:j,top:g};this.sizeDiff={width:d.outerWidth()-d.width(),height:d.outerHeight()-d.height()};this.originalMousePosition={left:f.pageX,top:f.pageY};this.aspectRatio=(typeof i.aspectRatio=="number")?i.aspectRatio:((this.originalSize.width/this.originalSize.height)||1);var h=c(".ui-resizable-"+this.axis).css("cursor");c("body").css("cursor",h=="auto"?this.axis+"-resize":h);d.addClass("ui-resizable-resizing");this._propagate("start",f);return true},_mouseDrag:function(d){var g=this.helper,f=this.options,l={},p=this,i=this.originalMousePosition,m=this.axis;var q=(d.pageX-i.left)||0,n=(d.pageY-i.top)||0;var h=this._change[m];if(!h){return false}var k=h.apply(this,[d,q,n]),j=c.browser.msie&&c.browser.version<7,e=this.sizeDiff;if(this._aspectRatio||d.shiftKey){k=this._updateRatio(k,d)}k=this._respectSize(k,d);this._propagate("resize",d);g.css({top:this.position.top+"px",left:this.position.left+"px",width:this.size.width+"px",height:this.size.height+"px"});if(!this._helper&&this._proportionallyResizeElements.length){this._proportionallyResize()}this._updateCache(k);this._trigger("resize",d,this.ui());return false},_mouseStop:function(g){this.resizing=false;var h=this.options,l=this;if(this._helper){var f=this._proportionallyResizeElements,d=f.length&&(/textarea/i).test(f[0].nodeName),e=d&&c.ui.hasScroll(f[0],"left")?0:l.sizeDiff.height,j=d?0:l.sizeDiff.width;var m={width:(l.size.width-j),height:(l.size.height-e)},i=(parseInt(l.element.css("left"),10)+(l.position.left-l.originalPosition.left))||null,k=(parseInt(l.element.css("top"),10)+(l.position.top-l.originalPosition.top))||null;if(!h.animate){this.element.css(c.extend(m,{top:k,left:i}))}l.helper.height(l.size.height);l.helper.width(l.size.width);if(this._helper&&!h.animate){this._proportionallyResize()}}c("body").css("cursor","auto");this.element.removeClass("ui-resizable-resizing");this._propagate("stop",g);if(this._helper){this.helper.remove()}return false},_updateCache:function(d){var e=this.options;this.offset=this.helper.offset();if(a(d.left)){this.position.left=d.left}if(a(d.top)){this.position.top=d.top}if(a(d.height)){this.size.height=d.height}if(a(d.width)){this.size.width=d.width}},_updateRatio:function(g,f){var h=this.options,i=this.position,e=this.size,d=this.axis;if(g.height){g.width=(e.height*this.aspectRatio)}else{if(g.width){g.height=(e.width/this.aspectRatio)}}if(d=="sw"){g.left=i.left+(e.width-g.width);g.top=null}if(d=="nw"){g.top=i.top+(e.height-g.height);g.left=i.left+(e.width-g.width)}return g},_respectSize:function(k,f){var i=this.helper,h=this.options,q=this._aspectRatio||f.shiftKey,p=this.axis,s=a(k.width)&&h.maxWidth&&(h.maxWidth<k.width),l=a(k.height)&&h.maxHeight&&(h.maxHeight<k.height),g=a(k.width)&&h.minWidth&&(h.minWidth>k.width),r=a(k.height)&&h.minHeight&&(h.minHeight>k.height);if(g){k.width=h.minWidth}if(r){k.height=h.minHeight}if(s){k.width=h.maxWidth}if(l){k.height=h.maxHeight}var e=this.originalPosition.left+this.originalSize.width,n=this.position.top+this.size.height;var j=/sw|nw|w/.test(p),d=/nw|ne|n/.test(p);if(g&&j){k.left=e-h.minWidth}if(s&&j){k.left=e-h.maxWidth}if(r&&d){k.top=n-h.minHeight}if(l&&d){k.top=n-h.maxHeight}var m=!k.width&&!k.height;if(m&&!k.left&&k.top){k.top=null}else{if(m&&!k.top&&k.left){k.left=null}}return k},_proportionallyResize:function(){var j=this.options;if(!this._proportionallyResizeElements.length){return}var f=this.helper||this.element;for(var e=0;e<this._proportionallyResizeElements.length;e++){var g=this._proportionallyResizeElements[e];if(!this.borderDif){var d=[g.css("borderTopWidth"),g.css("borderRightWidth"),g.css("borderBottomWidth"),g.css("borderLeftWidth")],h=[g.css("paddingTop"),g.css("paddingRight"),g.css("paddingBottom"),g.css("paddingLeft")];this.borderDif=c.map(d,function(k,m){var l=parseInt(k,10)||0,n=parseInt(h[m],10)||0;return l+n})}if(c.browser.msie&&!(!(c(f).is(":hidden")||c(f).parents(":hidden").length))){continue}g.css({height:(f.height()-this.borderDif[0]-this.borderDif[2])||0,width:(f.width()-this.borderDif[1]-this.borderDif[3])||0})}},_renderProxy:function(){var e=this.element,h=this.options;this.elementOffset=e.offset();if(this._helper){this.helper=this.helper||c('<div style="overflow:hidden;"></div>');var d=c.browser.msie&&c.browser.version<7,f=(d?1:0),g=(d?2:-1);this.helper.addClass(this._helper).css({width:this.element.outerWidth()+g,height:this.element.outerHeight()+g,position:"absolute",left:this.elementOffset.left-f+"px",top:this.elementOffset.top-f+"px",zIndex:++h.zIndex});this.helper.appendTo("body").disableSelection()}else{this.helper=this.element}},_change:{e:function(f,e,d){return{width:this.originalSize.width+e}},w:function(g,e,d){var i=this.options,f=this.originalSize,h=this.originalPosition;return{left:h.left+e,width:f.width-e}},n:function(g,e,d){var i=this.options,f=this.originalSize,h=this.originalPosition;return{top:h.top+d,height:f.height-d}},s:function(f,e,d){return{height:this.originalSize.height+d}},se:function(f,e,d){return c.extend(this._change.s.apply(this,arguments),this._change.e.apply(this,[f,e,d]))},sw:function(f,e,d){return c.extend(this._change.s.apply(this,arguments),this._change.w.apply(this,[f,e,d]))},ne:function(f,e,d){return c.extend(this._change.n.apply(this,arguments),this._change.e.apply(this,[f,e,d]))},nw:function(f,e,d){return c.extend(this._change.n.apply(this,arguments),this._change.w.apply(this,[f,e,d]))}},_propagate:function(e,d){c.ui.plugin.call(this,e,[d,this.ui()]);(e!="resize"&&this._trigger(e,d,this.ui()))},plugins:{},ui:function(){return{originalElement:this.originalElement,element:this.element,helper:this.helper,position:this.position,size:this.size,originalSize:this.originalSize,originalPosition:this.originalPosition}}}));c.extend(c.ui.resizable,{version:"1.7.2",eventPrefix:"resize",defaults:{alsoResize:false,animate:false,animateDuration:"slow",animateEasing:"swing",aspectRatio:false,autoHide:false,cancel:":input,option",containment:false,delay:0,distance:1,ghost:false,grid:false,handles:"e,s,se",helper:false,maxHeight:null,maxWidth:null,minHeight:10,minWidth:10,zIndex:1000}});c.ui.plugin.add("resizable","alsoResize",{start:function(e,f){var d=c(this).data("resizable"),g=d.options;_store=function(h){c(h).each(function(){c(this).data("resizable-alsoresize",{width:parseInt(c(this).width(),10),height:parseInt(c(this).height(),10),left:parseInt(c(this).css("left"),10),top:parseInt(c(this).css("top"),10)})})};if(typeof(g.alsoResize)=="object"&&!g.alsoResize.parentNode){if(g.alsoResize.length){g.alsoResize=g.alsoResize[0];_store(g.alsoResize)}else{c.each(g.alsoResize,function(h,i){_store(h)})}}else{_store(g.alsoResize)}},resize:function(f,h){var e=c(this).data("resizable"),i=e.options,g=e.originalSize,k=e.originalPosition;var j={height:(e.size.height-g.height)||0,width:(e.size.width-g.width)||0,top:(e.position.top-k.top)||0,left:(e.position.left-k.left)||0},d=function(l,m){c(l).each(function(){var p=c(this),q=c(this).data("resizable-alsoresize"),o={},n=m&&m.length?m:["width","height","top","left"];c.each(n||["width","height","top","left"],function(r,t){var s=(q[t]||0)+(j[t]||0);if(s&&s>=0){o[t]=s||null}});if(/relative/.test(p.css("position"))&&c.browser.opera){e._revertToRelativePosition=true;p.css({position:"absolute",top:"auto",left:"auto"})}p.css(o)})};if(typeof(i.alsoResize)=="object"&&!i.alsoResize.nodeType){c.each(i.alsoResize,function(l,m){d(l,m)})}else{d(i.alsoResize)}},stop:function(e,f){var d=c(this).data("resizable");if(d._revertToRelativePosition&&c.browser.opera){d._revertToRelativePosition=false;el.css({position:"relative"})}c(this).removeData("resizable-alsoresize-start")}});c.ui.plugin.add("resizable","animate",{stop:function(h,m){var n=c(this).data("resizable"),i=n.options;var g=n._proportionallyResizeElements,d=g.length&&(/textarea/i).test(g[0].nodeName),e=d&&c.ui.hasScroll(g[0],"left")?0:n.sizeDiff.height,k=d?0:n.sizeDiff.width;var f={width:(n.size.width-k),height:(n.size.height-e)},j=(parseInt(n.element.css("left"),10)+(n.position.left-n.originalPosition.left))||null,l=(parseInt(n.element.css("top"),10)+(n.position.top-n.originalPosition.top))||null;n.element.animate(c.extend(f,l&&j?{top:l,left:j}:{}),{duration:i.animateDuration,easing:i.animateEasing,step:function(){var o={width:parseInt(n.element.css("width"),10),height:parseInt(n.element.css("height"),10),top:parseInt(n.element.css("top"),10),left:parseInt(n.element.css("left"),10)};if(g&&g.length){c(g[0]).css({width:o.width,height:o.height})}n._updateCache(o);n._propagate("resize",h)}})}});c.ui.plugin.add("resizable","containment",{start:function(e,q){var s=c(this).data("resizable"),i=s.options,k=s.element;var f=i.containment,j=(f instanceof c)?f.get(0):(/parent/.test(f))?k.parent().get(0):f;if(!j){return}s.containerElement=c(j);if(/document/.test(f)||f==document){s.containerOffset={left:0,top:0};s.containerPosition={left:0,top:0};s.parentData={element:c(document),left:0,top:0,width:c(document).width(),height:c(document).height()||document.body.parentNode.scrollHeight}}else{var m=c(j),h=[];c(["Top","Right","Left","Bottom"]).each(function(p,o){h[p]=b(m.css("padding"+o))});s.containerOffset=m.offset();s.containerPosition=m.position();s.containerSize={height:(m.innerHeight()-h[3]),width:(m.innerWidth()-h[1])};var n=s.containerOffset,d=s.containerSize.height,l=s.containerSize.width,g=(c.ui.hasScroll(j,"left")?j.scrollWidth:l),r=(c.ui.hasScroll(j)?j.scrollHeight:d);s.parentData={element:j,left:n.left,top:n.top,width:g,height:r}}},resize:function(f,p){var s=c(this).data("resizable"),h=s.options,e=s.containerSize,n=s.containerOffset,l=s.size,m=s.position,q=s._aspectRatio||f.shiftKey,d={top:0,left:0},g=s.containerElement;if(g[0]!=document&&(/static/).test(g.css("position"))){d=n}if(m.left<(s._helper?n.left:0)){s.size.width=s.size.width+(s._helper?(s.position.left-n.left):(s.position.left-d.left));if(q){s.size.height=s.size.width/h.aspectRatio}s.position.left=h.helper?n.left:0}if(m.top<(s._helper?n.top:0))
-{s.size.height=s.size.height+(s._helper?(s.position.top-n.top):s.position.top);if(q){s.size.width=s.size.height*h.aspectRatio}s.position.top=s._helper?n.top:0}s.offset.left=s.parentData.left+s.position.left;s.offset.top=s.parentData.top+s.position.top;var k=Math.abs((s._helper?s.offset.left-d.left:(s.offset.left-d.left))+s.sizeDiff.width),r=Math.abs((s._helper?s.offset.top-d.top:(s.offset.top-n.top))+s.sizeDiff.height);var j=s.containerElement.get(0)==s.element.parent().get(0),i=/relative|absolute/.test(s.containerElement.css("position"));if(j&&i){k-=s.parentData.left}if(k+s.size.width>=s.parentData.width){s.size.width=s.parentData.width-k;if(q){s.size.height=s.size.width/s.aspectRatio}}if(r+s.size.height>=s.parentData.height){s.size.height=s.parentData.height-r;if(q){s.size.width=s.size.height*s.aspectRatio}}},stop:function(e,m){var p=c(this).data("resizable"),f=p.options,k=p.position,l=p.containerOffset,d=p.containerPosition,g=p.containerElement;var i=c(p.helper),q=i.offset(),n=i.outerWidth()-p.sizeDiff.width,j=i.outerHeight()-p.sizeDiff.height;if(p._helper&&!f.animate&&(/relative/).test(g.css("position"))){c(this).css({left:q.left-d.left-l.left,width:n,height:j})}if(p._helper&&!f.animate&&(/static/).test(g.css("position"))){c(this).css({left:q.left-d.left-l.left,width:n,height:j})}}});c.ui.plugin.add("resizable","ghost",{start:function(f,g){var d=c(this).data("resizable"),h=d.options,e=d.size;d.ghost=d.originalElement.clone();d.ghost.css({opacity:0.25,display:"block",position:"relative",height:e.height,width:e.width,margin:0,left:0,top:0}).addClass("ui-resizable-ghost").addClass(typeof h.ghost=="string"?h.ghost:"");d.ghost.appendTo(d.helper)},resize:function(e,f){var d=c(this).data("resizable"),g=d.options;if(d.ghost){d.ghost.css({position:"relative",height:d.size.height,width:d.size.width})}},stop:function(e,f){var d=c(this).data("resizable"),g=d.options;if(d.ghost&&d.helper){d.helper.get(0).removeChild(d.ghost.get(0))}}});c.ui.plugin.add("resizable","grid",{resize:function(d,l){var n=c(this).data("resizable"),g=n.options,j=n.size,h=n.originalSize,i=n.originalPosition,m=n.axis,k=g._aspectRatio||d.shiftKey;g.grid=typeof g.grid=="number"?[g.grid,g.grid]:g.grid;var f=Math.round((j.width-h.width)/(g.grid[0]||1))*(g.grid[0]||1),e=Math.round((j.height-h.height)/(g.grid[1]||1))*(g.grid[1]||1);if(/^(se|s|e)$/.test(m)){n.size.width=h.width+f;n.size.height=h.height+e}else{if(/^(ne)$/.test(m)){n.size.width=h.width+f;n.size.height=h.height+e;n.position.top=i.top-e}else{if(/^(sw)$/.test(m)){n.size.width=h.width+f;n.size.height=h.height+e;n.position.left=i.left-f}else{n.size.width=h.width+f;n.size.height=h.height+e;n.position.top=i.top-e;n.position.left=i.left-f}}}}});var b=function(d){return parseInt(d,10)||0};var a=function(d){return !isNaN(parseInt(d,10))}})(jQuery);;
-/**
- * jQuery.ScrollTo - Easy element scrolling using jQuery.
- * Copyright (c) 2008 Ariel Flesler - aflesler(at)gmail(dot)com
- * Licensed under GPL license (http://www.opensource.org/licenses/gpl-license.php).
- * Date: 2/8/2008
- * @author Ariel Flesler
- * @version 1.3.2
- */
-;(function($){var o=$.scrollTo=function(a,b,c){o.window().scrollTo(a,b,c)};o.defaults={axis:'y',duration:1};o.window=function(){return $($.browser.safari?'body':'html')};$.fn.scrollTo=function(l,m,n){if(typeof m=='object'){n=m;m=0}n=$.extend({},o.defaults,n);m=m||n.speed||n.duration;n.queue=n.queue&&n.axis.length>1;if(n.queue)m/=2;n.offset=j(n.offset);n.over=j(n.over);return this.each(function(){var a=this,b=$(a),t=l,c,d={},w=b.is('html,body');switch(typeof t){case'number':case'string':if(/^([+-]=)?\d+(px)?$/.test(t)){t=j(t);break}t=$(t,this);case'object':if(t.is||t.style)c=(t=$(t)).offset()}$.each(n.axis.split(''),function(i,f){var P=f=='x'?'Left':'Top',p=P.toLowerCase(),k='scroll'+P,e=a[k],D=f=='x'?'Width':'Height';if(c){d[k]=c[p]+(w?0:e-b.offset()[p]);if(n.margin){d[k]-=parseInt(t.css('margin'+P))||0;d[k]-=parseInt(t.css('border'+P+'Width'))||0}d[k]+=n.offset[p]||0;if(n.over[p])d[k]+=t[D.toLowerCase()]()*n.over[p]}else d[k]=t[p];if(/^\d+$/.test(d[k]))d[k]=d[k]<=0?0:Math.min(d[k],h(D));if(!i&&n.queue){if(e!=d[k])g(n.onAfterFirst);delete d[k]}});g(n.onAfter);function g(a){b.animate(d,m,n.easing,a&&function(){a.call(this,l)})};function h(D){var b=w?$.browser.opera?document.body:document.documentElement:a;return b['scroll'+D]-b['client'+D]}})};function j(a){return typeof a=='object'?a:{top:a,left:a}}})(jQuery);
-
+/*! jQuery v1.7.1 jquery.com | jquery.org/license */
+(function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cv(a){if(!ck[a]){var b=c.body,d=f("<"+a+">").appendTo(b),e=d.css("display");d.remove();if(e==="none"||e===""){cl||(cl=c.createElement("iframe"),cl.frameBorder=cl.width=cl.height=0),b.appendChild(cl);if(!cm||!cl.createElement)cm=(cl.contentWindow||cl.contentDocument).document,cm.write((c.compatMode==="CSS1Compat"?"<!doctype html>":"")+"<html><body>"),cm.close();d=cm.createElement(a),cm.body.appendChild(d),e=f.css(d,"display"),b.removeChild(cl)}ck[a]=e}return ck[a]}function cu(a,b){var c={};f.each(cq.concat.apply([],cq.slice(0,b)),function(){c[this]=a});return c}function ct(){cr=b}function cs(){setTimeout(ct,0);return cr=f.now()}function cj(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}function ci(){try{return new a.XMLHttpRequest}catch(b){}}function cc(a,c){a.dataFilter&&(c=a.dataFilter(c,a.dataType));var d=a.dataTypes,e={},g,h,i=d.length,j,k=d[0],l,m,n,o,p;for(g=1;g<i;g++){if(g===1)for(h in a.converters)typeof h=="string"&&(e[h.toLowerCase()]=a.converters[h]);l=k,k=d[g];if(k==="*")k=l;else if(l!=="*"&&l!==k){m=l+" "+k,n=e[m]||e["* "+k];if(!n){p=b;for(o in e){j=o.split(" ");if(j[0]===l||j[0]==="*"){p=e[j[1]+" "+k];if(p){o=e[o],o===!0?n=p:p===!0&&(n=o);break}}}}!n&&!p&&f.error("No conversion from "+m.replace(" "," to ")),n!==!0&&(c=n?n(c):p(o(c)))}}return c}function cb(a,c,d){var e=a.contents,f=a.dataTypes,g=a.responseFields,h,i,j,k;for(i in g)i in d&&(c[g[i]]=d[i]);while(f[0]==="*")f.shift(),h===b&&(h=a.mimeType||c.getResponseHeader("content-type"));if(h)for(i in e)if(e[i]&&e[i].test(h)){f.unshift(i);break}if(f[0]in d)j=f[0];else{for(i in d){if(!f[0]||a.converters[i+" "+f[0]]){j=i;break}k||(k=i)}j=j||k}if(j){j!==f[0]&&f.unshift(j);return d[j]}}function ca(a,b,c,d){if(f.isArray(b))f.each(b,function(b,e){c||bE.test(a)?d(a,e):ca(a+"["+(typeof e=="object"||f.isArray(e)?b:"")+"]",e,c,d)});else if(!c&&b!=null&&typeof b=="object")for(var e in b)ca(a+"["+e+"]",b[e],c,d);else d(a,b)}function b_(a,c){var d,e,g=f.ajaxSettings.flatOptions||{};for(d in c)c[d]!==b&&((g[d]?a:e||(e={}))[d]=c[d]);e&&f.extend(!0,a,e)}function b$(a,c,d,e,f,g){f=f||c.dataTypes[0],g=g||{},g[f]=!0;var h=a[f],i=0,j=h?h.length:0,k=a===bT,l;for(;i<j&&(k||!l);i++)l=h[i](c,d,e),typeof l=="string"&&(!k||g[l]?l=b:(c.dataTypes.unshift(l),l=b$(a,c,d,e,l,g)));(k||!l)&&!g["*"]&&(l=b$(a,c,d,e,"*",g));return l}function bZ(a){return function(b,c){typeof b!="string"&&(c=b,b="*");if(f.isFunction(c)){var d=b.toLowerCase().split(bP),e=0,g=d.length,h,i,j;for(;e<g;e++)h=d[e],j=/^\+/.test(h),j&&(h=h.substr(1)||"*"),i=a[h]=a[h]||[],i[j?"unshift":"push"](c)}}}function bC(a,b,c){var d=b==="width"?a.offsetWidth:a.offsetHeight,e=b==="width"?bx:by,g=0,h=e.length;if(d>0){if(c!=="border")for(;g<h;g++)c||(d-=parseFloat(f.css(a,"padding"+e[g]))||0),c==="margin"?d+=parseFloat(f.css(a,c+e[g]))||0:d-=parseFloat(f.css(a,"border"+e[g]+"Width"))||0;return d+"px"}d=bz(a,b,b);if(d<0||d==null)d=a.style[b]||0;d=parseFloat(d)||0;if(c)for(;g<h;g++)d+=parseFloat(f.css(a,"padding"+e[g]))||0,c!=="padding"&&(d+=parseFloat(f.css(a,"border"+e[g]+"Width"))||0),c==="margin"&&(d+=parseFloat(f.css(a,c+e[g]))||0);return d+"px"}function bp(a,b){b.src?f.ajax({url:b.src,async:!1,dataType:"script"}):f.globalEval((b.text||b.textContent||b.innerHTML||"").replace(bf,"/*$0*/")),b.parentNode&&b.parentNode.removeChild(b)}function bo(a){var b=c.createElement("div");bh.appendChild(b),b.innerHTML=a.outerHTML;return b.firstChild}function bn(a){var b=(a.nodeName||"").toLowerCase();b==="input"?bm(a):b!=="script"&&typeof a.getElementsByTagName!="undefined"&&f.grep(a.getElementsByTagName("input"),bm)}function bm(a){if(a.type==="checkbox"||a.type==="radio")a.defaultChecked=a.checked}function bl(a){return typeof a.getElementsByTagName!="undefined"?a.getElementsByTagName("*"):typeof a.querySelectorAll!="undefined"?a.querySelectorAll("*"):[]}function bk(a,b){var c;if(b.nodeType===1){b.clearAttributes&&b.clearAttributes(),b.mergeAttributes&&b.mergeAttributes(a),c=b.nodeName.toLowerCase();if(c==="object")b.outerHTML=a.outerHTML;else if(c!=="input"||a.type!=="checkbox"&&a.type!=="radio"){if(c==="option")b.selected=a.defaultSelected;else if(c==="input"||c==="textarea")b.defaultValue=a.defaultValue}else a.checked&&(b.defaultChecked=b.checked=a.checked),b.value!==a.value&&(b.value=a.value);b.removeAttribute(f.expando)}}function bj(a,b){if(b.nodeType===1&&!!f.hasData(a)){var c,d,e,g=f._data(a),h=f._data(b,g),i=g.events;if(i){delete h.handle,h.events={};for(c in i)for(d=0,e=i[c].length;d<e;d++)f.event.add(b,c+(i[c][d].namespace?".":"")+i[c][d].namespace,i[c][d],i[c][d].data)}h.data&&(h.data=f.extend({},h.data))}}function bi(a,b){return f.nodeName(a,"table")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function U(a){var b=V.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}function T(a,b,c){b=b||0;if(f.isFunction(b))return f.grep(a,function(a,d){var e=!!b.call(a,d,a);return e===c});if(b.nodeType)return f.grep(a,function(a,d){return a===b===c});if(typeof b=="string"){var d=f.grep(a,function(a){return a.nodeType===1});if(O.test(b))return f.filter(b,d,!c);b=f.filter(b,d)}return f.grep(a,function(a,d){return f.inArray(a,b)>=0===c})}function S(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function K(){return!0}function J(){return!1}function n(a,b,c){var d=b+"defer",e=b+"queue",g=b+"mark",h=f._data(a,d);h&&(c==="queue"||!f._data(a,e))&&(c==="mark"||!f._data(a,g))&&setTimeout(function(){!f._data(a,e)&&!f._data(a,g)&&(f.removeData(a,d,!0),h.fire())},0)}function m(a){for(var b in a){if(b==="data"&&f.isEmptyObject(a[b]))continue;if(b!=="toJSON")return!1}return!0}function l(a,c,d){if(d===b&&a.nodeType===1){var e="data-"+c.replace(k,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:f.isNumeric(d)?parseFloat(d):j.test(d)?f.parseJSON(d):d}catch(g){}f.data(a,c,d)}else d=b}return d}function h(a){var b=g[a]={},c,d;a=a.split(/\s+/);for(c=0,d=a.length;c<d;c++)b[a[c]]=!0;return b}var c=a.document,d=a.navigator,e=a.location,f=function(){function J(){if(!e.isReady){try{c.documentElement.doScroll("left")}catch(a){setTimeout(J,1);return}e.ready()}}var e=function(a,b){return new e.fn.init(a,b,h)},f=a.jQuery,g=a.$,h,i=/^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,j=/\S/,k=/^\s+/,l=/\s+$/,m=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,n=/^[\],:{}\s]*$/,o=/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,p=/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,q=/(?:^|:|,)(?:\s*\[)+/g,r=/(webkit)[ \/]([\w.]+)/,s=/(opera)(?:.*version)?[ \/]([\w.]+)/,t=/(msie) ([\w.]+)/,u=/(mozilla)(?:.*? rv:([\w.]+))?/,v=/-([a-z]|[0-9])/ig,w=/^-ms-/,x=function(a,b){return(b+"").toUpperCase()},y=d.userAgent,z,A,B,C=Object.prototype.toString,D=Object.prototype.hasOwnProperty,E=Array.prototype.push,F=Array.prototype.slice,G=String.prototype.trim,H=Array.prototype.indexOf,I={};e.fn=e.prototype={constructor:e,init:function(a,d,f){var g,h,j,k;if(!a)return this;if(a.nodeType){this.context=this[0]=a,this.length=1;return this}if(a==="body"&&!d&&c.body){this.context=c,this[0]=c.body,this.selector=a,this.length=1;return this}if(typeof a=="string"){a.charAt(0)!=="<"||a.charAt(a.length-1)!==">"||a.length<3?g=i.exec(a):g=[null,a,null];if(g&&(g[1]||!d)){if(g[1]){d=d instanceof e?d[0]:d,k=d?d.ownerDocument||d:c,j=m.exec(a),j?e.isPlainObject(d)?(a=[c.createElement(j[1])],e.fn.attr.call(a,d,!0)):a=[k.createElement(j[1])]:(j=e.buildFragment([g[1]],[k]),a=(j.cacheable?e.clone(j.fragment):j.fragment).childNodes);return e.merge(this,a)}h=c.getElementById(g[2]);if(h&&h.parentNode){if(h.id!==g[2])return f.find(a);this.length=1,this[0]=h}this.context=c,this.selector=a;return this}return!d||d.jquery?(d||f).find(a):this.constructor(d).find(a)}if(e.isFunction(a))return f.ready(a);a.selector!==b&&(this.selector=a.selector,this.context=a.context);return e.makeArray(a,this)},selector:"",jquery:"1.7.1",length:0,size:function(){return this.length},toArray:function(){return F.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=this.constructor();e.isArray(a)?E.apply(d,a):e.merge(d,a),d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")");return d},each:function(a,b){return e.each(this,a,b)},ready:function(a){e.bindReady(),A.add(a);return this},eq:function(a){a=+a;return a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(F.apply(this,arguments),"slice",F.call(arguments).join(","))},map:function(a){return this.pushStack(e.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:E,sort:[].sort,splice:[].splice},e.fn.init.prototype=e.fn,e.extend=e.fn.extend=function(){var a,c,d,f,g,h,i=arguments[0]||{},j=1,k=arguments.length,l=!1;typeof i=="boolean"&&(l=i,i=arguments[1]||{},j=2),typeof i!="object"&&!e.isFunction(i)&&(i={}),k===j&&(i=this,--j);for(;j<k;j++)if((a=arguments[j])!=null)for(c in a){d=i[c],f=a[c];if(i===f)continue;l&&f&&(e.isPlainObject(f)||(g=e.isArray(f)))?(g?(g=!1,h=d&&e.isArray(d)?d:[]):h=d&&e.isPlainObject(d)?d:{},i[c]=e.extend(l,h,f)):f!==b&&(i[c]=f)}return i},e.extend({noConflict:function(b){a.$===e&&(a.$=g),b&&a.jQuery===e&&(a.jQuery=f);return e},isReady:!1,readyWait:1,holdReady:function(a){a?e.readyWait++:e.ready(!0)},ready:function(a){if(a===!0&&!--e.readyWait||a!==!0&&!e.isReady){if(!c.body)return setTimeout(e.ready,1);e.isReady=!0;if(a!==!0&&--e.readyWait>0)return;A.fireWith(c,[e]),e.fn.trigger&&e(c).trigger("ready").off("ready")}},bindReady:function(){if(!A){A=e.Callbacks("once memory");if(c.readyState==="complete")return setTimeout(e.ready,1);if(c.addEventListener)c.addEventListener("DOMContentLoaded",B,!1),a.addEventListener("load",e.ready,!1);else if(c.attachEvent){c.attachEvent("onreadystatechange",B),a.attachEvent("onload",e.ready);var b=!1;try{b=a.frameElement==null}catch(d){}c.documentElement.doScroll&&b&&J()}}},isFunction:function(a){return e.type(a)==="function"},isArray:Array.isArray||function(a){return e.type(a)==="array"},isWindow:function(a){return a&&typeof a=="object"&&"setInterval"in a},isNumeric:function(a){return!isNaN(parseFloat(a))&&isFinite(a)},type:function(a){return a==null?String(a):I[C.call(a)]||"object"},isPlainObject:function(a){if(!a||e.type(a)!=="object"||a.nodeType||e.isWindow(a))return!1;try{if(a.constructor&&!D.call(a,"constructor")&&!D.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}var d;for(d in a);return d===b||D.call(a,d)},isEmptyObject:function(a){for(var b in a)return!1;return!0},error:function(a){throw new Error(a)},parseJSON:function(b){if(typeof b!="string"||!b)return null;b=e.trim(b);if(a.JSON&&a.JSON.parse)return a.JSON.parse(b);if(n.test(b.replace(o,"@").replace(p,"]").replace(q,"")))return(new Function("return "+b))();e.error("Invalid JSON: "+b)},parseXML:function(c){var d,f;try{a.DOMParser?(f=new DOMParser,d=f.parseFromString(c,"text/xml")):(d=new ActiveXObject("Microsoft.XMLDOM"),d.async="false",d.loadXML(c))}catch(g){d=b}(!d||!d.documentElement||d.getElementsByTagName("parsererror").length)&&e.error("Invalid XML: "+c);return d},noop:function(){},globalEval:function(b){b&&j.test(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(w,"ms-").replace(v,x)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toUpperCase()===b.toUpperCase()},each:function(a,c,d){var f,g=0,h=a.length,i=h===b||e.isFunction(a);if(d){if(i){for(f in a)if(c.apply(a[f],d)===!1)break}else for(;g<h;)if(c.apply(a[g++],d)===!1)break}else if(i){for(f in a)if(c.call(a[f],f,a[f])===!1)break}else for(;g<h;)if(c.call(a[g],g,a[g++])===!1)break;return a},trim:G?function(a){return a==null?"":G.call(a)}:function(a){return a==null?"":(a+"").replace(k,"").replace(l,"")},makeArray:function(a,b){var c=b||[];if(a!=null){var d=e.type(a);a.length==null||d==="string"||d==="function"||d==="regexp"||e.isWindow(a)?E.call(c,a):e.merge(c,a)}return c},inArray:function(a,b,c){var d;if(b){if(H)return H.call(b,a,c);d=b.length,c=c?c<0?Math.max(0,d+c):c:0;for(;c<d;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,c){var d=a.length,e=0;if(typeof c.length=="number")for(var f=c.length;e<f;e++)a[d++]=c[e];else while(c[e]!==b)a[d++]=c[e++];a.length=d;return a},grep:function(a,b,c){var d=[],e;c=!!c;for(var f=0,g=a.length;f<g;f++)e=!!b(a[f],f),c!==e&&d.push(a[f]);return d},map:function(a,c,d){var f,g,h=[],i=0,j=a.length,k=a instanceof e||j!==b&&typeof j=="number"&&(j>0&&a[0]&&a[j-1]||j===0||e.isArray(a));if(k)for(;i<j;i++)f=c(a[i],i,d),f!=null&&(h[h.length]=f);else for(g in a)f=c(a[g],g,d),f!=null&&(h[h.length]=f);return h.concat.apply([],h)},guid:1,proxy:function(a,c){if(typeof c=="string"){var d=a[c];c=a,a=d}if(!e.isFunction(a))return b;var f=F.call(arguments,2),g=function(){return a.apply(c,f.concat(F.call(arguments)))};g.guid=a.guid=a.guid||g.guid||e.guid++;return g},access:function(a,c,d,f,g,h){var i=a.length;if(typeof c=="object"){for(var j in c)e.access(a,j,c[j],f,g,d);return a}if(d!==b){f=!h&&f&&e.isFunction(d);for(var k=0;k<i;k++)g(a[k],c,f?d.call(a[k],k,g(a[k],c)):d,h);return a}return i?g(a[0],c):b},now:function(){return(new Date).getTime()},uaMatch:function(a){a=a.toLowerCase();var b=r.exec(a)||s.exec(a)||t.exec(a)||a.indexOf("compatible")<0&&u.exec(a)||[];return{browser:b[1]||"",version:b[2]||"0"}},sub:function(){function a(b,c){return new a.fn.init(b,c)}e.extend(!0,a,this),a.superclass=this,a.fn=a.prototype=this(),a.fn.constructor=a,a.sub=this.sub,a.fn.init=function(d,f){f&&f instanceof e&&!(f instanceof a)&&(f=a(f));return e.fn.init.call(this,d,f,b)},a.fn.init.prototype=a.fn;var b=a(c);return a},browser:{}}),e.each("Boolean Number String Function Array Date RegExp Object".split(" "),function(a,b){I["[object "+b+"]"]=b.toLowerCase()}),z=e.uaMatch(y),z.browser&&(e.browser[z.browser]=!0,e.browser.version=z.version),e.browser.webkit&&(e.browser.safari=!0),j.test(" ")&&(k=/^[\s\xA0]+/,l=/[\s\xA0]+$/),h=e(c),c.addEventListener?B=function(){c.removeEventListener("DOMContentLoaded",B,!1),e.ready()}:c.attachEvent&&(B=function(){c.readyState==="complete"&&(c.detachEvent("onreadystatechange",B),e.ready())});return e}(),g={};f.Callbacks=function(a){a=a?g[a]||h(a):{};var c=[],d=[],e,i,j,k,l,m=function(b){var d,e,g,h,i;for(d=0,e=b.length;d<e;d++)g=b[d],h=f.type(g),h==="array"?m(g):h==="function"&&(!a.unique||!o.has(g))&&c.push(g)},n=function(b,f){f=f||[],e=!a.memory||[b,f],i=!0,l=j||0,j=0,k=c.length;for(;c&&l<k;l++)if(c[l].apply(b,f)===!1&&a.stopOnFalse){e=!0;break}i=!1,c&&(a.once?e===!0?o.disable():c=[]:d&&d.length&&(e=d.shift(),o.fireWith(e[0],e[1])))},o={add:function(){if(c){var a=c.length;m(arguments),i?k=c.length:e&&e!==!0&&(j=a,n(e[0],e[1]))}return this},remove:function(){if(c){var b=arguments,d=0,e=b.length;for(;d<e;d++)for(var f=0;f<c.length;f++)if(b[d]===c[f]){i&&f<=k&&(k--,f<=l&&l--),c.splice(f--,1);if(a.unique)break}}return this},has:function(a){if(c){var b=0,d=c.length;for(;b<d;b++)if(a===c[b])return!0}return!1},empty:function(){c=[];return this},disable:function(){c=d=e=b;return this},disabled:function(){return!c},lock:function(){d=b,(!e||e===!0)&&o.disable();return this},locked:function(){return!d},fireWith:function(b,c){d&&(i?a.once||d.push([b,c]):(!a.once||!e)&&n(b,c));return this},fire:function(){o.fireWith(this,arguments);return this},fired:function(){return!!e}};return o};var i=[].slice;f.extend({Deferred:function(a){var b=f.Callbacks("once memory"),c=f.Callbacks("once memory"),d=f.Callbacks("memory"),e="pending",g={resolve:b,reject:c,notify:d},h={done:b.add,fail:c.add,progress:d.add,state:function(){return e},isResolved:b.fired,isRejected:c.fired,then:function(a,b,c){i.done(a).fail(b).progress(c);return this},always:function(){i.done.apply(i,arguments).fail.apply(i,arguments);return this},pipe:function(a,b,c){return f.Deferred(function(d){f.each({done:[a,"resolve"],fail:[b,"reject"],progress:[c,"notify"]},function(a,b){var c=b[0],e=b[1],g;f.isFunction(c)?i[a](function()
+{g=c.apply(this,arguments),g&&f.isFunction(g.promise)?g.promise().then(d.resolve,d.reject,d.notify):d[e+"With"](this===i?d:this,[g])}):i[a](d[e])})}).promise()},promise:function(a){if(a==null)a=h;else for(var b in h)a[b]=h[b];return a}},i=h.promise({}),j;for(j in g)i[j]=g[j].fire,i[j+"With"]=g[j].fireWith;i.done(function(){e="resolved"},c.disable,d.lock).fail(function(){e="rejected"},b.disable,d.lock),a&&a.call(i,i);return i},when:function(a){function m(a){return function(b){e[a]=arguments.length>1?i.call(arguments,0):b,j.notifyWith(k,e)}}function l(a){return function(c){b[a]=arguments.length>1?i.call(arguments,0):c,--g||j.resolveWith(j,b)}}var b=i.call(arguments,0),c=0,d=b.length,e=Array(d),g=d,h=d,j=d<=1&&a&&f.isFunction(a.promise)?a:f.Deferred(),k=j.promise();if(d>1){for(;c<d;c++)b[c]&&b[c].promise&&f.isFunction(b[c].promise)?b[c].promise().then(l(c),j.reject,m(c)):--g;g||j.resolveWith(j,b)}else j!==a&&j.resolveWith(j,d?[a]:[]);return k}}),f.support=function(){var b,d,e,g,h,i,j,k,l,m,n,o,p,q=c.createElement("div"),r=c.documentElement;q.setAttribute("className","t"),q.innerHTML="   <link/><table></table><a href='/a' style='top:1px;float:left;opacity:.55;'>a</a><input type='checkbox'/>",d=q.getElementsByTagName("*"),e=q.getElementsByTagName("a")[0];if(!d||!d.length||!e)return{};g=c.createElement("select"),h=g.appendChild(c.createElement("option")),i=q.getElementsByTagName("input")[0],b={leadingWhitespace:q.firstChild.nodeType===3,tbody:!q.getElementsByTagName("tbody").length,htmlSerialize:!!q.getElementsByTagName("link").length,style:/top/.test(e.getAttribute("style")),hrefNormalized:e.getAttribute("href")==="/a",opacity:/^0.55/.test(e.style.opacity),cssFloat:!!e.style.cssFloat,checkOn:i.value==="on",optSelected:h.selected,getSetAttribute:q.className!=="t",enctype:!!c.createElement("form").enctype,html5Clone:c.createElement("nav").cloneNode(!0).outerHTML!=="<:nav></:nav>",submitBubbles:!0,changeBubbles:!0,focusinBubbles:!1,deleteExpando:!0,noCloneEvent:!0,inlineBlockNeedsLayout:!1,shrinkWrapBlocks:!1,reliableMarginRight:!0},i.checked=!0,b.noCloneChecked=i.cloneNode(!0).checked,g.disabled=!0,b.optDisabled=!h.disabled;try{delete q.test}catch(s){b.deleteExpando=!1}!q.addEventListener&&q.attachEvent&&q.fireEvent&&(q.attachEvent("onclick",function(){b.noCloneEvent=!1}),q.cloneNode(!0).fireEvent("onclick")),i=c.createElement("input"),i.value="t",i.setAttribute("type","radio"),b.radioValue=i.value==="t",i.setAttribute("checked","checked"),q.appendChild(i),k=c.createDocumentFragment(),k.appendChild(q.lastChild),b.checkClone=k.cloneNode(!0).cloneNode(!0).lastChild.checked,b.appendChecked=i.checked,k.removeChild(i),k.appendChild(q),q.innerHTML="",a.getComputedStyle&&(j=c.createElement("div"),j.style.width="0",j.style.marginRight="0",q.style.width="2px",q.appendChild(j),b.reliableMarginRight=(parseInt((a.getComputedStyle(j,null)||{marginRight:0}).marginRight,10)||0)===0);if(q.attachEvent)for(o in{submit:1,change:1,focusin:1})n="on"+o,p=n in q,p||(q.setAttribute(n,"return;"),p=typeof q[n]=="function"),b[o+"Bubbles"]=p;k.removeChild(q),k=g=h=j=q=i=null,f(function(){var a,d,e,g,h,i,j,k,m,n,o,r=c.getElementsByTagName("body")[0];!r||(j=1,k="position:absolute;top:0;left:0;width:1px;height:1px;margin:0;",m="visibility:hidden;border:0;",n="style='"+k+"border:5px solid #000;padding:0;'",o="<div "+n+"><div></div></div>"+"<table "+n+" cellpadding='0' cellspacing='0'>"+"<tr><td></td></tr></table>",a=c.createElement("div"),a.style.cssText=m+"width:0;height:0;position:static;top:0;margin-top:"+j+"px",r.insertBefore(a,r.firstChild),q=c.createElement("div"),a.appendChild(q),q.innerHTML="<table><tr><td style='padding:0;border:0;display:none'></td><td>t</td></tr></table>",l=q.getElementsByTagName("td"),p=l[0].offsetHeight===0,l[0].style.display="",l[1].style.display="none",b.reliableHiddenOffsets=p&&l[0].offsetHeight===0,q.innerHTML="",q.style.width=q.style.paddingLeft="1px",f.boxModel=b.boxModel=q.offsetWidth===2,typeof q.style.zoom!="undefined"&&(q.style.display="inline",q.style.zoom=1,b.inlineBlockNeedsLayout=q.offsetWidth===2,q.style.display="",q.innerHTML="<div style='width:4px;'></div>",b.shrinkWrapBlocks=q.offsetWidth!==2),q.style.cssText=k+m,q.innerHTML=o,d=q.firstChild,e=d.firstChild,h=d.nextSibling.firstChild.firstChild,i={doesNotAddBorder:e.offsetTop!==5,doesAddBorderForTableAndCells:h.offsetTop===5},e.style.position="fixed",e.style.top="20px",i.fixedPosition=e.offsetTop===20||e.offsetTop===15,e.style.position=e.style.top="",d.style.overflow="hidden",d.style.position="relative",i.subtractsBorderForOverflowNotVisible=e.offsetTop===-5,i.doesNotIncludeMarginInBodyOffset=r.offsetTop!==j,r.removeChild(a),q=a=null,f.extend(b,i))});return b}();var j=/^(?:\{.*\}|\[.*\])$/,k=/([A-Z])/g;f.extend({cache:{},uuid:0,expando:"jQuery"+(f.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:!0,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:!0},hasData:function(a){a=a.nodeType?f.cache[a[f.expando]]:a[f.expando];return!!a&&!m(a)},data:function(a,c,d,e){if(!!f.acceptData(a)){var g,h,i,j=f.expando,k=typeof c=="string",l=a.nodeType,m=l?f.cache:a,n=l?a[j]:a[j]&&j,o=c==="events";if((!n||!m[n]||!o&&!e&&!m[n].data)&&k&&d===b)return;n||(l?a[j]=n=++f.uuid:n=j),m[n]||(m[n]={},l||(m[n].toJSON=f.noop));if(typeof c=="object"||typeof c=="function")e?m[n]=f.extend(m[n],c):m[n].data=f.extend(m[n].data,c);g=h=m[n],e||(h.data||(h.data={}),h=h.data),d!==b&&(h[f.camelCase(c)]=d);if(o&&!h[c])return g.events;k?(i=h[c],i==null&&(i=h[f.camelCase(c)])):i=h;return i}},removeData:function(a,b,c){if(!!f.acceptData(a)){var d,e,g,h=f.expando,i=a.nodeType,j=i?f.cache:a,k=i?a[h]:h;if(!j[k])return;if(b){d=c?j[k]:j[k].data;if(d){f.isArray(b)||(b in d?b=[b]:(b=f.camelCase(b),b in d?b=[b]:b=b.split(" ")));for(e=0,g=b.length;e<g;e++)delete d[b[e]];if(!(c?m:f.isEmptyObject)(d))return}}if(!c){delete j[k].data;if(!m(j[k]))return}f.support.deleteExpando||!j.setInterval?delete j[k]:j[k]=null,i&&(f.support.deleteExpando?delete a[h]:a.removeAttribute?a.removeAttribute(h):a[h]=null)}},_data:function(a,b,c){return f.data(a,b,c,!0)},acceptData:function(a){if(a.nodeName){var b=f.noData[a.nodeName.toLowerCase()];if(b)return b!==!0&&a.getAttribute("classid")===b}return!0}}),f.fn.extend({data:function(a,c){var d,e,g,h=null;if(typeof a=="undefined"){if(this.length){h=f.data(this[0]);if(this[0].nodeType===1&&!f._data(this[0],"parsedAttrs")){e=this[0].attributes;for(var i=0,j=e.length;i<j;i++)g=e[i].name,g.indexOf("data-")===0&&(g=f.camelCase(g.substring(5)),l(this[0],g,h[g]));f._data(this[0],"parsedAttrs",!0)}}return h}if(typeof a=="object")return this.each(function(){f.data(this,a)});d=a.split("."),d[1]=d[1]?"."+d[1]:"";if(c===b){h=this.triggerHandler("getData"+d[1]+"!",[d[0]]),h===b&&this.length&&(h=f.data(this[0],a),h=l(this[0],a,h));return h===b&&d[1]?this.data(d[0]):h}return this.each(function(){var b=f(this),e=[d[0],c];b.triggerHandler("setData"+d[1]+"!",e),f.data(this,a,c),b.triggerHandler("changeData"+d[1]+"!",e)})},removeData:function(a){return this.each(function(){f.removeData(this,a)})}}),f.extend({_mark:function(a,b){a&&(b=(b||"fx")+"mark",f._data(a,b,(f._data(a,b)||0)+1))},_unmark:function(a,b,c){a!==!0&&(c=b,b=a,a=!1);if(b){c=c||"fx";var d=c+"mark",e=a?0:(f._data(b,d)||1)-1;e?f._data(b,d,e):(f.removeData(b,d,!0),n(b,c,"mark"))}},queue:function(a,b,c){var d;if(a){b=(b||"fx")+"queue",d=f._data(a,b),c&&(!d||f.isArray(c)?d=f._data(a,b,f.makeArray(c)):d.push(c));return d||[]}},dequeue:function(a,b){b=b||"fx";var c=f.queue(a,b),d=c.shift(),e={};d==="inprogress"&&(d=c.shift()),d&&(b==="fx"&&c.unshift("inprogress"),f._data(a,b+".run",e),d.call(a,function(){f.dequeue(a,b)},e)),c.length||(f.removeData(a,b+"queue "+b+".run",!0),n(a,b,"queue"))}}),f.fn.extend({queue:function(a,c){typeof a!="string"&&(c=a,a="fx");if(c===b)return f.queue(this[0],a);return this.each(function(){var b=f.queue(this,a,c);a==="fx"&&b[0]!=="inprogress"&&f.dequeue(this,a)})},dequeue:function(a){return this.each(function(){f.dequeue(this,a)})},delay:function(a,b){a=f.fx?f.fx.speeds[a]||a:a,b=b||"fx";return this.queue(b,function(b,c){var d=setTimeout(b,a);c.stop=function(){clearTimeout(d)}})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,c){function m(){--h||d.resolveWith(e,[e])}typeof a!="string"&&(c=a,a=b),a=a||"fx";var d=f.Deferred(),e=this,g=e.length,h=1,i=a+"defer",j=a+"queue",k=a+"mark",l;while(g--)if(l=f.data(e[g],i,b,!0)||(f.data(e[g],j,b,!0)||f.data(e[g],k,b,!0))&&f.data(e[g],i,f.Callbacks("once memory"),!0))h++,l.add(m);m();return d.promise()}});var o=/[\n\t\r]/g,p=/\s+/,q=/\r/g,r=/^(?:button|input)$/i,s=/^(?:button|input|object|select|textarea)$/i,t=/^a(?:rea)?$/i,u=/^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,v=f.support.getSetAttribute,w,x,y;f.fn.extend({attr:function(a,b){return f.access(this,a,b,!0,f.attr)},removeAttr:function(a){return this.each(function(){f.removeAttr(this,a)})},prop:function(a,b){return f.access(this,a,b,!0,f.prop)},removeProp:function(a){a=f.propFix[a]||a;return this.each(function(){try{this[a]=b,delete this[a]}catch(c){}})},addClass:function(a){var b,c,d,e,g,h,i;if(f.isFunction(a))return this.each(function(b){f(this).addClass(a.call(this,b,this.className))});if(a&&typeof a=="string"){b=a.split(p);for(c=0,d=this.length;c<d;c++){e=this[c];if(e.nodeType===1)if(!e.className&&b.length===1)e.className=a;else{g=" "+e.className+" ";for(h=0,i=b.length;h<i;h++)~g.indexOf(" "+b[h]+" ")||(g+=b[h]+" ");e.className=f.trim(g)}}}return this},removeClass:function(a){var c,d,e,g,h,i,j;if(f.isFunction(a))return this.each(function(b){f(this).removeClass(a.call(this,b,this.className))});if(a&&typeof a=="string"||a===b){c=(a||"").split(p);for(d=0,e=this.length;d<e;d++){g=this[d];if(g.nodeType===1&&g.className)if(a){h=(" "+g.className+" ").replace(o," ");for(i=0,j=c.length;i<j;i++)h=h.replace(" "+c[i]+" "," ");g.className=f.trim(h)}else g.className=""}}return this},toggleClass:function(a,b){var c=typeof a,d=typeof b=="boolean";if(f.isFunction(a))return this.each(function(c){f(this).toggleClass(a.call(this,c,this.className,b),b)});return this.each(function(){if(c==="string"){var e,g=0,h=f(this),i=b,j=a.split(p);while(e=j[g++])i=d?i:!h.hasClass(e),h[i?"addClass":"removeClass"](e)}else if(c==="undefined"||c==="boolean")this.className&&f._data(this,"__className__",this.className),this.className=this.className||a===!1?"":f._data(this,"__className__")||""})},hasClass:function(a){var b=" "+a+" ",c=0,d=this.length;for(;c<d;c++)if(this[c].nodeType===1&&(" "+this[c].className+" ").replace(o," ").indexOf(b)>-1)return!0;return!1},val:function(a){var c,d,e,g=this[0];{if(!!arguments.length){e=f.isFunction(a);return this.each(function(d){var g=f(this),h;if(this.nodeType===1){e?h=a.call(this,d,g.val()):h=a,h==null?h="":typeof h=="number"?h+="":f.isArray(h)&&(h=f.map(h,function(a){return a==null?"":a+""})),c=f.valHooks[this.nodeName.toLowerCase()]||f.valHooks[this.type];if(!c||!("set"in c)||c.set(this,h,"value")===b)this.value=h}})}if(g){c=f.valHooks[g.nodeName.toLowerCase()]||f.valHooks[g.type];if(c&&"get"in c&&(d=c.get(g,"value"))!==b)return d;d=g.value;return typeof d=="string"?d.replace(q,""):d==null?"":d}}}}),f.extend({valHooks:{option:{get:function(a){var b=a.attributes.value;return!b||b.specified?a.value:a.text}},select:{get:function(a){var b,c,d,e,g=a.selectedIndex,h=[],i=a.options,j=a.type==="select-one";if(g<0)return null;c=j?g:0,d=j?g+1:i.length;for(;c<d;c++){e=i[c];if(e.selected&&(f.support.optDisabled?!e.disabled:e.getAttribute("disabled")===null)&&(!e.parentNode.disabled||!f.nodeName(e.parentNode,"optgroup"))){b=f(e).val();if(j)return b;h.push(b)}}if(j&&!h.length&&i.length)return f(i[g]).val();return h},set:function(a,b){var c=f.makeArray(b);f(a).find("option").each(function(){this.selected=f.inArray(f(this).val(),c)>=0}),c.length||(a.selectedIndex=-1);return c}}},attrFn:{val:!0,css:!0,html:!0,text:!0,data:!0,width:!0,height:!0,offset:!0},attr:function(a,c,d,e){var g,h,i,j=a.nodeType;if(!!a&&j!==3&&j!==8&&j!==2){if(e&&c in f.attrFn)return f(a)[c](d);if(typeof a.getAttribute=="undefined")return f.prop(a,c,d);i=j!==1||!f.isXMLDoc(a),i&&(c=c.toLowerCase(),h=f.attrHooks[c]||(u.test(c)?x:w));if(d!==b){if(d===null){f.removeAttr(a,c);return}if(h&&"set"in h&&i&&(g=h.set(a,d,c))!==b)return g;a.setAttribute(c,""+d);return d}if(h&&"get"in h&&i&&(g=h.get(a,c))!==null)return g;g=a.getAttribute(c);return g===null?b:g}},removeAttr:function(a,b){var c,d,e,g,h=0;if(b&&a.nodeType===1){d=b.toLowerCase().split(p),g=d.length;for(;h<g;h++)e=d[h],e&&(c=f.propFix[e]||e,f.attr(a,e,""),a.removeAttribute(v?e:c),u.test(e)&&c in a&&(a[c]=!1))}},attrHooks:{type:{set:function(a,b){if(r.test(a.nodeName)&&a.parentNode)f.error("type property can't be changed");else if(!f.support.radioValue&&b==="radio"&&f.nodeName(a,"input")){var c=a.value;a.setAttribute("type",b),c&&(a.value=c);return b}}},value:{get:function(a,b){if(w&&f.nodeName(a,"button"))return w.get(a,b);return b in a?a.value:null},set:function(a,b,c){if(w&&f.nodeName(a,"button"))return w.set(a,b,c);a.value=b}}},propFix:{tabindex:"tabIndex",readonly:"readOnly","for":"htmlFor","class":"className",maxlength:"maxLength",cellspacing:"cellSpacing",cellpadding:"cellPadding",rowspan:"rowSpan",colspan:"colSpan",usemap:"useMap",frameborder:"frameBorder",contenteditable:"contentEditable"},prop:function(a,c,d){var e,g,h,i=a.nodeType;if(!!a&&i!==3&&i!==8&&i!==2){h=i!==1||!f.isXMLDoc(a),h&&(c=f.propFix[c]||c,g=f.propHooks[c]);return d!==b?g&&"set"in g&&(e=g.set(a,d,c))!==b?e:a[c]=d:g&&"get"in g&&(e=g.get(a,c))!==null?e:a[c]}},propHooks:{tabIndex:{get:function(a){var c=a.getAttributeNode("tabindex");return c&&c.specified?parseInt(c.value,10):s.test(a.nodeName)||t.test(a.nodeName)&&a.href?0:b}}}}),f.attrHooks.tabindex=f.propHooks.tabIndex,x={get:function(a,c){var d,e=f.prop(a,c);return e===!0||typeof e!="boolean"&&(d=a.getAttributeNode(c))&&d.nodeValue!==!1?c.toLowerCase():b},set:function(a,b,c){var d;b===!1?f.removeAttr(a,c):(d=f.propFix[c]||c,d in a&&(a[d]=!0),a.setAttribute(c,c.toLowerCase()));return c}},v||(y={name:!0,id:!0},w=f.valHooks.button={get:function(a,c){var d;d=a.getAttributeNode(c);return d&&(y[c]?d.nodeValue!=="":d.specified)?d.nodeValue:b},set:function(a,b,d){var e=a.getAttributeNode(d);e||(e=c.createAttribute(d),a.setAttributeNode(e));return e.nodeValue=b+""}},f.attrHooks.tabindex.set=w.set,f.each(["width","height"],function(a,b){f.attrHooks[b]=f.extend(f.attrHooks[b],{set:function(a,c){if(c===""){a.setAttribute(b,"auto");return c}}})}),f.attrHooks.contenteditable={get:w.get,set:function(a,b,c){b===""&&(b="false"),w.set(a,b,c)}}),f.support.hrefNormalized||f.each(["href","src","width","height"],function(a,c){f.attrHooks[c]=f.extend(f.attrHooks[c],{get:function(a){var d=a.getAttribute(c,2);return d===null?b:d}})}),f.support.style||(f.attrHooks.style={get:function(a){return a.style.cssText.toLowerCase()||b},set:function(a,b){return a.style.cssText=""+b}}),f.support.optSelected||(f.propHooks.selected=f.extend(f.propHooks.selected,{get:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex);return null}})),f.support.enctype||(f.propFix.enctype="encoding"),f.support.checkOn||f.each(["radio","checkbox"],function(){f.valHooks[this]={get:function(a){return a.getAttribute("value")===null?"on":a.value}}}),f.each(["radio","checkbox"],function(){f.valHooks[this]=f.extend(f.valHooks[this],{set:function(a,b){if(f.isArray(b))return a.checked=f.inArray(f(a).val(),b)>=0}})});var z=/^(?:textarea|input|select)$/i,A=/^([^\.]*)?(?:\.(.+))?$/,B=/\bhover(\.\S+)?\b/,C=/^key/,D=/^(?:mouse|contextmenu)|click/,E=/^(?:focusinfocus|focusoutblur)$/,F=/^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,G=function(a){var b=F.exec(a);b&&(b[1]=(b[1]||"").toLowerCase(),b[3]=b[3]&&new RegExp("(?:^|\\s)"+b[3]+"(?:\\s|$)"));return b},H=function(a,b){var c=a.attributes||{};return(!b[1]||a.nodeName.toLowerCase()===b[1])&&(!b[2]||(c.id||{}).value===b[2])&&(!b[3]||b[3].test((c["class"]||{}).value))},I=function(a){return f.event.special.hover?a:a.replace(B,"mouseenter$1 mouseleave$1")};
+f.event={add:function(a,c,d,e,g){var h,i,j,k,l,m,n,o,p,q,r,s;if(!(a.nodeType===3||a.nodeType===8||!c||!d||!(h=f._data(a)))){d.handler&&(p=d,d=p.handler),d.guid||(d.guid=f.guid++),j=h.events,j||(h.events=j={}),i=h.handle,i||(h.handle=i=function(a){return typeof f!="undefined"&&(!a||f.event.triggered!==a.type)?f.event.dispatch.apply(i.elem,arguments):b},i.elem=a),c=f.trim(I(c)).split(" ");for(k=0;k<c.length;k++){l=A.exec(c[k])||[],m=l[1],n=(l[2]||"").split(".").sort(),s=f.event.special[m]||{},m=(g?s.delegateType:s.bindType)||m,s=f.event.special[m]||{},o=f.extend({type:m,origType:l[1],data:e,handler:d,guid:d.guid,selector:g,quick:G(g),namespace:n.join(".")},p),r=j[m];if(!r){r=j[m]=[],r.delegateCount=0;if(!s.setup||s.setup.call(a,e,n,i)===!1)a.addEventListener?a.addEventListener(m,i,!1):a.attachEvent&&a.attachEvent("on"+m,i)}s.add&&(s.add.call(a,o),o.handler.guid||(o.handler.guid=d.guid)),g?r.splice(r.delegateCount++,0,o):r.push(o),f.event.global[m]=!0}a=null}},global:{},remove:function(a,b,c,d,e){var g=f.hasData(a)&&f._data(a),h,i,j,k,l,m,n,o,p,q,r,s;if(!!g&&!!(o=g.events)){b=f.trim(I(b||"")).split(" ");for(h=0;h<b.length;h++){i=A.exec(b[h])||[],j=k=i[1],l=i[2];if(!j){for(j in o)f.event.remove(a,j+b[h],c,d,!0);continue}p=f.event.special[j]||{},j=(d?p.delegateType:p.bindType)||j,r=o[j]||[],m=r.length,l=l?new RegExp("(^|\\.)"+l.split(".").sort().join("\\.(?:.*\\.)?")+"(\\.|$)"):null;for(n=0;n<r.length;n++)s=r[n],(e||k===s.origType)&&(!c||c.guid===s.guid)&&(!l||l.test(s.namespace))&&(!d||d===s.selector||d==="**"&&s.selector)&&(r.splice(n--,1),s.selector&&r.delegateCount--,p.remove&&p.remove.call(a,s));r.length===0&&m!==r.length&&((!p.teardown||p.teardown.call(a,l)===!1)&&f.removeEvent(a,j,g.handle),delete o[j])}f.isEmptyObject(o)&&(q=g.handle,q&&(q.elem=null),f.removeData(a,["events","handle"],!0))}},customEvent:{getData:!0,setData:!0,changeData:!0},trigger:function(c,d,e,g){if(!e||e.nodeType!==3&&e.nodeType!==8){var h=c.type||c,i=[],j,k,l,m,n,o,p,q,r,s;if(E.test(h+f.event.triggered))return;h.indexOf("!")>=0&&(h=h.slice(0,-1),k=!0),h.indexOf(".")>=0&&(i=h.split("."),h=i.shift(),i.sort());if((!e||f.event.customEvent[h])&&!f.event.global[h])return;c=typeof c=="object"?c[f.expando]?c:new f.Event(h,c):new f.Event(h),c.type=h,c.isTrigger=!0,c.exclusive=k,c.namespace=i.join("."),c.namespace_re=c.namespace?new RegExp("(^|\\.)"+i.join("\\.(?:.*\\.)?")+"(\\.|$)"):null,o=h.indexOf(":")<0?"on"+h:"";if(!e){j=f.cache;for(l in j)j[l].events&&j[l].events[h]&&f.event.trigger(c,d,j[l].handle.elem,!0);return}c.result=b,c.target||(c.target=e),d=d!=null?f.makeArray(d):[],d.unshift(c),p=f.event.special[h]||{};if(p.trigger&&p.trigger.apply(e,d)===!1)return;r=[[e,p.bindType||h]];if(!g&&!p.noBubble&&!f.isWindow(e)){s=p.delegateType||h,m=E.test(s+h)?e:e.parentNode,n=null;for(;m;m=m.parentNode)r.push([m,s]),n=m;n&&n===e.ownerDocument&&r.push([n.defaultView||n.parentWindow||a,s])}for(l=0;l<r.length&&!c.isPropagationStopped();l++)m=r[l][0],c.type=r[l][1],q=(f._data(m,"events")||{})[c.type]&&f._data(m,"handle"),q&&q.apply(m,d),q=o&&m[o],q&&f.acceptData(m)&&q.apply(m,d)===!1&&c.preventDefault();c.type=h,!g&&!c.isDefaultPrevented()&&(!p._default||p._default.apply(e.ownerDocument,d)===!1)&&(h!=="click"||!f.nodeName(e,"a"))&&f.acceptData(e)&&o&&e[h]&&(h!=="focus"&&h!=="blur"||c.target.offsetWidth!==0)&&!f.isWindow(e)&&(n=e[o],n&&(e[o]=null),f.event.triggered=h,e[h](),f.event.triggered=b,n&&(e[o]=n));return c.result}},dispatch:function(c){c=f.event.fix(c||a.event);var d=(f._data(this,"events")||{})[c.type]||[],e=d.delegateCount,g=[].slice.call(arguments,0),h=!c.exclusive&&!c.namespace,i=[],j,k,l,m,n,o,p,q,r,s,t;g[0]=c,c.delegateTarget=this;if(e&&!c.target.disabled&&(!c.button||c.type!=="click")){m=f(this),m.context=this.ownerDocument||this;for(l=c.target;l!=this;l=l.parentNode||this){o={},q=[],m[0]=l;for(j=0;j<e;j++)r=d[j],s=r.selector,o[s]===b&&(o[s]=r.quick?H(l,r.quick):m.is(s)),o[s]&&q.push(r);q.length&&i.push({elem:l,matches:q})}}d.length>e&&i.push({elem:this,matches:d.slice(e)});for(j=0;j<i.length&&!c.isPropagationStopped();j++){p=i[j],c.currentTarget=p.elem;for(k=0;k<p.matches.length&&!c.isImmediatePropagationStopped();k++){r=p.matches[k];if(h||!c.namespace&&!r.namespace||c.namespace_re&&c.namespace_re.test(r.namespace))c.data=r.data,c.handleObj=r,n=((f.event.special[r.origType]||{}).handle||r.handler).apply(p.elem,g),n!==b&&(c.result=n,n===!1&&(c.preventDefault(),c.stopPropagation()))}}return c.result},props:"attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){a.which==null&&(a.which=b.charCode!=null?b.charCode:b.keyCode);return a}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,d){var e,f,g,h=d.button,i=d.fromElement;a.pageX==null&&d.clientX!=null&&(e=a.target.ownerDocument||c,f=e.documentElement,g=e.body,a.pageX=d.clientX+(f&&f.scrollLeft||g&&g.scrollLeft||0)-(f&&f.clientLeft||g&&g.clientLeft||0),a.pageY=d.clientY+(f&&f.scrollTop||g&&g.scrollTop||0)-(f&&f.clientTop||g&&g.clientTop||0)),!a.relatedTarget&&i&&(a.relatedTarget=i===a.target?d.toElement:i),!a.which&&h!==b&&(a.which=h&1?1:h&2?3:h&4?2:0);return a}},fix:function(a){if(a[f.expando])return a;var d,e,g=a,h=f.event.fixHooks[a.type]||{},i=h.props?this.props.concat(h.props):this.props;a=f.Event(g);for(d=i.length;d;)e=i[--d],a[e]=g[e];a.target||(a.target=g.srcElement||c),a.target.nodeType===3&&(a.target=a.target.parentNode),a.metaKey===b&&(a.metaKey=a.ctrlKey);return h.filter?h.filter(a,g):a},special:{ready:{setup:f.bindReady},load:{noBubble:!0},focus:{delegateType:"focusin"},blur:{delegateType:"focusout"},beforeunload:{setup:function(a,b,c){f.isWindow(this)&&(this.onbeforeunload=c)},teardown:function(a,b){this.onbeforeunload===b&&(this.onbeforeunload=null)}}},simulate:function(a,b,c,d){var e=f.extend(new f.Event,c,{type:a,isSimulated:!0,originalEvent:{}});d?f.event.trigger(e,null,b):f.event.dispatch.call(b,e),e.isDefaultPrevented()&&c.preventDefault()}},f.event.handle=f.event.dispatch,f.removeEvent=c.removeEventListener?function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c,!1)}:function(a,b,c){a.detachEvent&&a.detachEvent("on"+b,c)},f.Event=function(a,b){if(!(this instanceof f.Event))return new f.Event(a,b);a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||a.returnValue===!1||a.getPreventDefault&&a.getPreventDefault()?K:J):this.type=a,b&&f.extend(this,b),this.timeStamp=a&&a.timeStamp||f.now(),this[f.expando]=!0},f.Event.prototype={preventDefault:function(){this.isDefaultPrevented=K;var a=this.originalEvent;!a||(a.preventDefault?a.preventDefault():a.returnValue=!1)},stopPropagation:function(){this.isPropagationStopped=K;var a=this.originalEvent;!a||(a.stopPropagation&&a.stopPropagation(),a.cancelBubble=!0)},stopImmediatePropagation:function(){this.isImmediatePropagationStopped=K,this.stopPropagation()},isDefaultPrevented:J,isPropagationStopped:J,isImmediatePropagationStopped:J},f.each({mouseenter:"mouseover",mouseleave:"mouseout"},function(a,b){f.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c=this,d=a.relatedTarget,e=a.handleObj,g=e.selector,h;if(!d||d!==c&&!f.contains(c,d))a.type=e.origType,h=e.handler.apply(this,arguments),a.type=b;return h}}}),f.support.submitBubbles||(f.event.special.submit={setup:function(){if(f.nodeName(this,"form"))return!1;f.event.add(this,"click._submit keypress._submit",function(a){var c=a.target,d=f.nodeName(c,"input")||f.nodeName(c,"button")?c.form:b;d&&!d._submit_attached&&(f.event.add(d,"submit._submit",function(a){this.parentNode&&!a.isTrigger&&f.event.simulate("submit",this.parentNode,a,!0)}),d._submit_attached=!0)})},teardown:function(){if(f.nodeName(this,"form"))return!1;f.event.remove(this,"._submit")}}),f.support.changeBubbles||(f.event.special.change={setup:function(){if(z.test(this.nodeName)){if(this.type==="checkbox"||this.type==="radio")f.event.add(this,"propertychange._change",function(a){a.originalEvent.propertyName==="checked"&&(this._just_changed=!0)}),f.event.add(this,"click._change",function(a){this._just_changed&&!a.isTrigger&&(this._just_changed=!1,f.event.simulate("change",this,a,!0))});return!1}f.event.add(this,"beforeactivate._change",function(a){var b=a.target;z.test(b.nodeName)&&!b._change_attached&&(f.event.add(b,"change._change",function(a){this.parentNode&&!a.isSimulated&&!a.isTrigger&&f.event.simulate("change",this.parentNode,a,!0)}),b._change_attached=!0)})},handle:function(a){var b=a.target;if(this!==b||a.isSimulated||a.isTrigger||b.type!=="radio"&&b.type!=="checkbox")return a.handleObj.handler.apply(this,arguments)},teardown:function(){f.event.remove(this,"._change");return z.test(this.nodeName)}}),f.support.focusinBubbles||f.each({focus:"focusin",blur:"focusout"},function(a,b){var d=0,e=function(a){f.event.simulate(b,a.target,f.event.fix(a),!0)};f.event.special[b]={setup:function(){d++===0&&c.addEventListener(a,e,!0)},teardown:function(){--d===0&&c.removeEventListener(a,e,!0)}}}),f.fn.extend({on:function(a,c,d,e,g){var h,i;if(typeof a=="object"){typeof c!="string"&&(d=c,c=b);for(i in a)this.on(i,c,d,a[i],g);return this}d==null&&e==null?(e=c,d=c=b):e==null&&(typeof c=="string"?(e=d,d=b):(e=d,d=c,c=b));if(e===!1)e=J;else if(!e)return this;g===1&&(h=e,e=function(a){f().off(a);return h.apply(this,arguments)},e.guid=h.guid||(h.guid=f.guid++));return this.each(function(){f.event.add(this,a,e,d,c)})},one:function(a,b,c,d){return this.on.call(this,a,b,c,d,1)},off:function(a,c,d){if(a&&a.preventDefault&&a.handleObj){var e=a.handleObj;f(a.delegateTarget).off(e.namespace?e.type+"."+e.namespace:e.type,e.selector,e.handler);return this}if(typeof a=="object"){for(var g in a)this.off(g,c,a[g]);return this}if(c===!1||typeof c=="function")d=c,c=b;d===!1&&(d=J);return this.each(function(){f.event.remove(this,a,d,c)})},bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},live:function(a,b,c){f(this.context).on(a,this.selector,b,c);return this},die:function(a,b){f(this.context).off(a,this.selector||"**",b);return this},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return arguments.length==1?this.off(a,"**"):this.off(b,a,c)},trigger:function(a,b){return this.each(function(){f.event.trigger(a,b,this)})},triggerHandler:function(a,b){if(this[0])return f.event.trigger(a,b,this[0],!0)},toggle:function(a){var b=arguments,c=a.guid||f.guid++,d=0,e=function(c){var e=(f._data(this,"lastToggle"+a.guid)||0)%d;f._data(this,"lastToggle"+a.guid,e+1),c.preventDefault();return b[e].apply(this,arguments)||!1};e.guid=c;while(d<b.length)b[d++].guid=c;return this.click(e)},hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),f.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){f.fn[b]=function(a,c){c==null&&(c=a,a=null);return arguments.length>0?this.on(b,null,a,c):this.trigger(b)},f.attrFn&&(f.attrFn[b]=!0),C.test(b)&&(f.event.fixHooks[b]=f.event.keyHooks),D.test(b)&&(f.event.fixHooks[b]=f.event.mouseHooks)}),function(){function x(a,b,c,e,f,g){for(var h=0,i=e.length;h<i;h++){var j=e[h];if(j){var k=!1;j=j[a];while(j){if(j[d]===c){k=e[j.sizset];break}if(j.nodeType===1){g||(j[d]=c,j.sizset=h);if(typeof b!="string"){if(j===b){k=!0;break}}else if(m.filter(b,[j]).length>0){k=j;break}}j=j[a]}e[h]=k}}}function w(a,b,c,e,f,g){for(var h=0,i=e.length;h<i;h++){var j=e[h];if(j){var k=!1;j=j[a];while(j){if(j[d]===c){k=e[j.sizset];break}j.nodeType===1&&!g&&(j[d]=c,j.sizset=h);if(j.nodeName.toLowerCase()===b){k=j;break}j=j[a]}e[h]=k}}}var a=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^\[\]]*\]|['"][^'"]*['"]|[^\[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,d="sizcache"+(Math.random()+"").replace(".",""),e=0,g=Object.prototype.toString,h=!1,i=!0,j=/\\/g,k=/\r\n/g,l=/\W/;[0,0].sort(function(){i=!1;return 0});var m=function(b,d,e,f){e=e||[],d=d||c;var h=d;if(d.nodeType!==1&&d.nodeType!==9)return[];if(!b||typeof b!="string")return e;var i,j,k,l,n,q,r,t,u=!0,v=m.isXML(d),w=[],x=b;do{a.exec(""),i=a.exec(x);if(i){x=i[3],w.push(i[1]);if(i[2]){l=i[3];break}}}while(i);if(w.length>1&&p.exec(b))if(w.length===2&&o.relative[w[0]])j=y(w[0]+w[1],d,f);else{j=o.relative[w[0]]?[d]:m(w.shift(),d);while(w.length)b=w.shift(),o.relative[b]&&(b+=w.shift()),j=y(b,j,f)}else{!f&&w.length>1&&d.nodeType===9&&!v&&o.match.ID.test(w[0])&&!o.match.ID.test(w[w.length-1])&&(n=m.find(w.shift(),d,v),d=n.expr?m.filter(n.expr,n.set)[0]:n.set[0]);if(d){n=f?{expr:w.pop(),set:s(f)}:m.find(w.pop(),w.length===1&&(w[0]==="~"||w[0]==="+")&&d.parentNode?d.parentNode:d,v),j=n.expr?m.filter(n.expr,n.set):n.set,w.length>0?k=s(j):u=!1;while(w.length)q=w.pop(),r=q,o.relative[q]?r=w.pop():q="",r==null&&(r=d),o.relative[q](k,r,v)}else k=w=[]}k||(k=j),k||m.error(q||b);if(g.call(k)==="[object Array]")if(!u)e.push.apply(e,k);else if(d&&d.nodeType===1)for(t=0;k[t]!=null;t++)k[t]&&(k[t]===!0||k[t].nodeType===1&&m.contains(d,k[t]))&&e.push(j[t]);else for(t=0;k[t]!=null;t++)k[t]&&k[t].nodeType===1&&e.push(j[t]);else s(k,e);l&&(m(l,h,e,f),m.uniqueSort(e));return e};m.uniqueSort=function(a){if(u){h=i,a.sort(u);if(h)for(var b=1;b<a.length;b++)a[b]===a[b-1]&&a.splice(b--,1)}return a},m.matches=function(a,b){return m(a,null,null,b)},m.matchesSelector=function(a,b){return m(b,null,null,[a]).length>0},m.find=function(a,b,c){var d,e,f,g,h,i;if(!a)return[];for(e=0,f=o.order.length;e<f;e++){h=o.order[e];if(g=o.leftMatch[h].exec(a)){i=g[1],g.splice(1,1);if(i.substr(i.length-1)!=="\\"){g[1]=(g[1]||"").replace(j,""),d=o.find[h](g,b,c);if(d!=null){a=a.replace(o.match[h],"");break}}}}d||(d=typeof b.getElementsByTagName!="undefined"?b.getElementsByTagName("*"):[]);return{set:d,expr:a}},m.filter=function(a,c,d,e){var f,g,h,i,j,k,l,n,p,q=a,r=[],s=c,t=c&&c[0]&&m.isXML(c[0]);while(a&&c.length){for(h in o.filter)if((f=o.leftMatch[h].exec(a))!=null&&f[2]){k=o.filter[h],l=f[1],g=!1,f.splice(1,1);if(l.substr(l.length-1)==="\\")continue;s===r&&(r=[]);if(o.preFilter[h]){f=o.preFilter[h](f,s,d,r,e,t);if(!f)g=i=!0;else if(f===!0)continue}if(f)for(n=0;(j=s[n])!=null;n++)j&&(i=k(j,f,n,s),p=e^i,d&&i!=null?p?g=!0:s[n]=!1:p&&(r.push(j),g=!0));if(i!==b){d||(s=r),a=a.replace(o.match[h],"");if(!g)return[];break}}if(a===q)if(g==null)m.error(a);else break;q=a}return s},m.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)};var n=m.getText=function(a){var b,c,d=a.nodeType,e="";if(d){if(d===1||d===9){if(typeof a.textContent=="string")return a.textContent;if(typeof a.innerText=="string")return a.innerText.replace(k,"");for(a=a.firstChild;a;a=a.nextSibling)e+=n(a)}else if(d===3||d===4)return a.nodeValue}else for(b=0;c=a[b];b++)c.nodeType!==8&&(e+=n(c));return e},o=m.selectors={order:["ID","NAME","TAG"],match:{ID:/#((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,CLASS:/\.((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,NAME:/\[name=['"]*((?:[\w\u00c0-\uFFFF\-]|\\.)+)['"]*\]/,ATTR:/\[\s*((?:[\w\u00c0-\uFFFF\-]|\\.)+)\s*(?:(\S?=)\s*(?:(['"])(.*?)\3|(#?(?:[\w\u00c0-\uFFFF\-]|\\.)*)|)|)\s*\]/,TAG:/^((?:[\w\u00c0-\uFFFF\*\-]|\\.)+)/,CHILD:/:(only|nth|last|first)-child(?:\(\s*(even|odd|(?:[+\-]?\d+|(?:[+\-]?\d*)?n\s*(?:[+\-]\s*\d+)?))\s*\))?/,POS:/:(nth|eq|gt|lt|first|last|even|odd)(?:\((\d*)\))?(?=[^\-]|$)/,PSEUDO:/:((?:[\w\u00c0-\uFFFF\-]|\\.)+)(?:\((['"]?)((?:\([^\)]+\)|[^\(\)]*)+)\2\))?/},leftMatch:{},attrMap:{"class":"className","for":"htmlFor"},attrHandle:{href:function(a){return a.getAttribute("href")},type:function(a){return a.getAttribute("type")}},relative:{"+":function(a,b){var c=typeof b=="string",d=c&&!l.test(b),e=c&&!d;d&&(b=b.toLowerCase());for(var f=0,g=a.length,h;f<g;f++)if(h=a[f]){while((h=h.previousSibling)&&h.nodeType!==1);a[f]=e||h&&h.nodeName.toLowerCase()===b?h||!1:h===b}e&&m.filter(b,a,!0)},">":function(a,b){var c,d=typeof b=="string",e=0,f=a.length;if(d&&!l.test(b)){b=b.toLowerCase();for(;e<f;e++){c=a[e];if(c){var g=c.parentNode;a[e]=g.nodeName.toLowerCase()===b?g:!1}}}else{for(;e<f;e++)c=a[e],c&&(a[e]=d?c.parentNode:c.parentNode===b);d
+&&m.filter(b,a,!0)}},"":function(a,b,c){var d,f=e++,g=x;typeof b=="string"&&!l.test(b)&&(b=b.toLowerCase(),d=b,g=w),g("parentNode",b,f,a,d,c)},"~":function(a,b,c){var d,f=e++,g=x;typeof b=="string"&&!l.test(b)&&(b=b.toLowerCase(),d=b,g=w),g("previousSibling",b,f,a,d,c)}},find:{ID:function(a,b,c){if(typeof b.getElementById!="undefined"&&!c){var d=b.getElementById(a[1]);return d&&d.parentNode?[d]:[]}},NAME:function(a,b){if(typeof b.getElementsByName!="undefined"){var c=[],d=b.getElementsByName(a[1]);for(var e=0,f=d.length;e<f;e++)d[e].getAttribute("name")===a[1]&&c.push(d[e]);return c.length===0?null:c}},TAG:function(a,b){if(typeof b.getElementsByTagName!="undefined")return b.getElementsByTagName(a[1])}},preFilter:{CLASS:function(a,b,c,d,e,f){a=" "+a[1].replace(j,"")+" ";if(f)return a;for(var g=0,h;(h=b[g])!=null;g++)h&&(e^(h.className&&(" "+h.className+" ").replace(/[\t\n\r]/g," ").indexOf(a)>=0)?c||d.push(h):c&&(b[g]=!1));return!1},ID:function(a){return a[1].replace(j,"")},TAG:function(a,b){return a[1].replace(j,"").toLowerCase()},CHILD:function(a){if(a[1]==="nth"){a[2]||m.error(a[0]),a[2]=a[2].replace(/^\+|\s*/g,"");var b=/(-?)(\d*)(?:n([+\-]?\d*))?/.exec(a[2]==="even"&&"2n"||a[2]==="odd"&&"2n+1"||!/\D/.test(a[2])&&"0n+"+a[2]||a[2]);a[2]=b[1]+(b[2]||1)-0,a[3]=b[3]-0}else a[2]&&m.error(a[0]);a[0]=e++;return a},ATTR:function(a,b,c,d,e,f){var g=a[1]=a[1].replace(j,"");!f&&o.attrMap[g]&&(a[1]=o.attrMap[g]),a[4]=(a[4]||a[5]||"").replace(j,""),a[2]==="~="&&(a[4]=" "+a[4]+" ");return a},PSEUDO:function(b,c,d,e,f){if(b[1]==="not")if((a.exec(b[3])||"").length>1||/^\w/.test(b[3]))b[3]=m(b[3],null,null,c);else{var g=m.filter(b[3],c,d,!0^f);d||e.push.apply(e,g);return!1}else if(o.match.POS.test(b[0])||o.match.CHILD.test(b[0]))return!0;return b},POS:function(a){a.unshift(!0);return a}},filters:{enabled:function(a){return a.disabled===!1&&a.type!=="hidden"},disabled:function(a){return a.disabled===!0},checked:function(a){return a.checked===!0},selected:function(a){a.parentNode&&a.parentNode.selectedIndex;return a.selected===!0},parent:function(a){return!!a.firstChild},empty:function(a){return!a.firstChild},has:function(a,b,c){return!!m(c[3],a).length},header:function(a){return/h\d/i.test(a.nodeName)},text:function(a){var b=a.getAttribute("type"),c=a.type;return a.nodeName.toLowerCase()==="input"&&"text"===c&&(b===c||b===null)},radio:function(a){return a.nodeName.toLowerCase()==="input"&&"radio"===a.type},checkbox:function(a){return a.nodeName.toLowerCase()==="input"&&"checkbox"===a.type},file:function(a){return a.nodeName.toLowerCase()==="input"&&"file"===a.type},password:function(a){return a.nodeName.toLowerCase()==="input"&&"password"===a.type},submit:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"submit"===a.type},image:function(a){return a.nodeName.toLowerCase()==="input"&&"image"===a.type},reset:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"reset"===a.type},button:function(a){var b=a.nodeName.toLowerCase();return b==="input"&&"button"===a.type||b==="button"},input:function(a){return/input|select|textarea|button/i.test(a.nodeName)},focus:function(a){return a===a.ownerDocument.activeElement}},setFilters:{first:function(a,b){return b===0},last:function(a,b,c,d){return b===d.length-1},even:function(a,b){return b%2===0},odd:function(a,b){return b%2===1},lt:function(a,b,c){return b<c[3]-0},gt:function(a,b,c){return b>c[3]-0},nth:function(a,b,c){return c[3]-0===b},eq:function(a,b,c){return c[3]-0===b}},filter:{PSEUDO:function(a,b,c,d){var e=b[1],f=o.filters[e];if(f)return f(a,c,b,d);if(e==="contains")return(a.textContent||a.innerText||n([a])||"").indexOf(b[3])>=0;if(e==="not"){var g=b[3];for(var h=0,i=g.length;h<i;h++)if(g[h]===a)return!1;return!0}m.error(e)},CHILD:function(a,b){var c,e,f,g,h,i,j,k=b[1],l=a;switch(k){case"only":case"first":while(l=l.previousSibling)if(l.nodeType===1)return!1;if(k==="first")return!0;l=a;case"last":while(l=l.nextSibling)if(l.nodeType===1)return!1;return!0;case"nth":c=b[2],e=b[3];if(c===1&&e===0)return!0;f=b[0],g=a.parentNode;if(g&&(g[d]!==f||!a.nodeIndex)){i=0;for(l=g.firstChild;l;l=l.nextSibling)l.nodeType===1&&(l.nodeIndex=++i);g[d]=f}j=a.nodeIndex-e;return c===0?j===0:j%c===0&&j/c>=0}},ID:function(a,b){return a.nodeType===1&&a.getAttribute("id")===b},TAG:function(a,b){return b==="*"&&a.nodeType===1||!!a.nodeName&&a.nodeName.toLowerCase()===b},CLASS:function(a,b){return(" "+(a.className||a.getAttribute("class"))+" ").indexOf(b)>-1},ATTR:function(a,b){var c=b[1],d=m.attr?m.attr(a,c):o.attrHandle[c]?o.attrHandle[c](a):a[c]!=null?a[c]:a.getAttribute(c),e=d+"",f=b[2],g=b[4];return d==null?f==="!=":!f&&m.attr?d!=null:f==="="?e===g:f==="*="?e.indexOf(g)>=0:f==="~="?(" "+e+" ").indexOf(g)>=0:g?f==="!="?e!==g:f==="^="?e.indexOf(g)===0:f==="$="?e.substr(e.length-g.length)===g:f==="|="?e===g||e.substr(0,g.length+1)===g+"-":!1:e&&d!==!1},POS:function(a,b,c,d){var e=b[2],f=o.setFilters[e];if(f)return f(a,c,b,d)}}},p=o.match.POS,q=function(a,b){return"\\"+(b-0+1)};for(var r in o.match)o.match[r]=new RegExp(o.match[r].source+/(?![^\[]*\])(?![^\(]*\))/.source),o.leftMatch[r]=new RegExp(/(^(?:.|\r|\n)*?)/.source+o.match[r].source.replace(/\\(\d+)/g,q));var s=function(a,b){a=Array.prototype.slice.call(a,0);if(b){b.push.apply(b,a);return b}return a};try{Array.prototype.slice.call(c.documentElement.childNodes,0)[0].nodeType}catch(t){s=function(a,b){var c=0,d=b||[];if(g.call(a)==="[object Array]")Array.prototype.push.apply(d,a);else if(typeof a.length=="number")for(var e=a.length;c<e;c++)d.push(a[c]);else for(;a[c];c++)d.push(a[c]);return d}}var u,v;c.documentElement.compareDocumentPosition?u=function(a,b){if(a===b){h=!0;return 0}if(!a.compareDocumentPosition||!b.compareDocumentPosition)return a.compareDocumentPosition?-1:1;return a.compareDocumentPosition(b)&4?-1:1}:(u=function(a,b){if(a===b){h=!0;return 0}if(a.sourceIndex&&b.sourceIndex)return a.sourceIndex-b.sourceIndex;var c,d,e=[],f=[],g=a.parentNode,i=b.parentNode,j=g;if(g===i)return v(a,b);if(!g)return-1;if(!i)return 1;while(j)e.unshift(j),j=j.parentNode;j=i;while(j)f.unshift(j),j=j.parentNode;c=e.length,d=f.length;for(var k=0;k<c&&k<d;k++)if(e[k]!==f[k])return v(e[k],f[k]);return k===c?v(a,f[k],-1):v(e[k],b,1)},v=function(a,b,c){if(a===b)return c;var d=a.nextSibling;while(d){if(d===b)return-1;d=d.nextSibling}return 1}),function(){var a=c.createElement("div"),d="script"+(new Date).getTime(),e=c.documentElement;a.innerHTML="<a name='"+d+"'/>",e.insertBefore(a,e.firstChild),c.getElementById(d)&&(o.find.ID=function(a,c,d){if(typeof c.getElementById!="undefined"&&!d){var e=c.getElementById(a[1]);return e?e.id===a[1]||typeof e.getAttributeNode!="undefined"&&e.getAttributeNode("id").nodeValue===a[1]?[e]:b:[]}},o.filter.ID=function(a,b){var c=typeof a.getAttributeNode!="undefined"&&a.getAttributeNode("id");return a.nodeType===1&&c&&c.nodeValue===b}),e.removeChild(a),e=a=null}(),function(){var a=c.createElement("div");a.appendChild(c.createComment("")),a.getElementsByTagName("*").length>0&&(o.find.TAG=function(a,b){var c=b.getElementsByTagName(a[1]);if(a[1]==="*"){var d=[];for(var e=0;c[e];e++)c[e].nodeType===1&&d.push(c[e]);c=d}return c}),a.innerHTML="<a href='#'></a>",a.firstChild&&typeof a.firstChild.getAttribute!="undefined"&&a.firstChild.getAttribute("href")!=="#"&&(o.attrHandle.href=function(a){return a.getAttribute("href",2)}),a=null}(),c.querySelectorAll&&function(){var a=m,b=c.createElement("div"),d="__sizzle__";b.innerHTML="<p class='TEST'></p>";if(!b.querySelectorAll||b.querySelectorAll(".TEST").length!==0){m=function(b,e,f,g){e=e||c;if(!g&&!m.isXML(e)){var h=/^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec(b);if(h&&(e.nodeType===1||e.nodeType===9)){if(h[1])return s(e.getElementsByTagName(b),f);if(h[2]&&o.find.CLASS&&e.getElementsByClassName)return s(e.getElementsByClassName(h[2]),f)}if(e.nodeType===9){if(b==="body"&&e.body)return s([e.body],f);if(h&&h[3]){var i=e.getElementById(h[3]);if(!i||!i.parentNode)return s([],f);if(i.id===h[3])return s([i],f)}try{return s(e.querySelectorAll(b),f)}catch(j){}}else if(e.nodeType===1&&e.nodeName.toLowerCase()!=="object"){var k=e,l=e.getAttribute("id"),n=l||d,p=e.parentNode,q=/^\s*[+~]/.test(b);l?n=n.replace(/'/g,"\\$&"):e.setAttribute("id",n),q&&p&&(e=e.parentNode);try{if(!q||p)return s(e.querySelectorAll("[id='"+n+"'] "+b),f)}catch(r){}finally{l||k.removeAttribute("id")}}}return a(b,e,f,g)};for(var e in a)m[e]=a[e];b=null}}(),function(){var a=c.documentElement,b=a.matchesSelector||a.mozMatchesSelector||a.webkitMatchesSelector||a.msMatchesSelector;if(b){var d=!b.call(c.createElement("div"),"div"),e=!1;try{b.call(c.documentElement,"[test!='']:sizzle")}catch(f){e=!0}m.matchesSelector=function(a,c){c=c.replace(/\=\s*([^'"\]]*)\s*\]/g,"='$1']");if(!m.isXML(a))try{if(e||!o.match.PSEUDO.test(c)&&!/!=/.test(c)){var f=b.call(a,c);if(f||!d||a.document&&a.document.nodeType!==11)return f}}catch(g){}return m(c,null,null,[a]).length>0}}}(),function(){var a=c.createElement("div");a.innerHTML="<div class='test e'></div><div class='test'></div>";if(!!a.getElementsByClassName&&a.getElementsByClassName("e").length!==0){a.lastChild.className="e";if(a.getElementsByClassName("e").length===1)return;o.order.splice(1,0,"CLASS"),o.find.CLASS=function(a,b,c){if(typeof b.getElementsByClassName!="undefined"&&!c)return b.getElementsByClassName(a[1])},a=null}}(),c.documentElement.contains?m.contains=function(a,b){return a!==b&&(a.contains?a.contains(b):!0)}:c.documentElement.compareDocumentPosition?m.contains=function(a,b){return!!(a.compareDocumentPosition(b)&16)}:m.contains=function(){return!1},m.isXML=function(a){var b=(a?a.ownerDocument||a:0).documentElement;return b?b.nodeName!=="HTML":!1};var y=function(a,b,c){var d,e=[],f="",g=b.nodeType?[b]:b;while(d=o.match.PSEUDO.exec(a))f+=d[0],a=a.replace(o.match.PSEUDO,"");a=o.relative[a]?a+"*":a;for(var h=0,i=g.length;h<i;h++)m(a,g[h],e,c);return m.filter(f,e)};m.attr=f.attr,m.selectors.attrMap={},f.find=m,f.expr=m.selectors,f.expr[":"]=f.expr.filters,f.unique=m.uniqueSort,f.text=m.getText,f.isXMLDoc=m.isXML,f.contains=m.contains}();var L=/Until$/,M=/^(?:parents|prevUntil|prevAll)/,N=/,/,O=/^.[^:#\[\.,]*$/,P=Array.prototype.slice,Q=f.expr.match.POS,R={children:!0,contents:!0,next:!0,prev:!0};f.fn.extend({find:function(a){var b=this,c,d;if(typeof a!="string")return f(a).filter(function(){for(c=0,d=b.length;c<d;c++)if(f.contains(b[c],this))return!0});var e=this.pushStack("","find",a),g,h,i;for(c=0,d=this.length;c<d;c++){g=e.length,f.find(a,this[c],e);if(c>0)for(h=g;h<e.length;h++)for(i=0;i<g;i++)if(e[i]===e[h]){e.splice(h--,1);break}}return e},has:function(a){var b=f(a);return this.filter(function(){for(var a=0,c=b.length;a<c;a++)if(f.contains(this,b[a]))return!0})},not:function(a){return this.pushStack(T(this,a,!1),"not",a)},filter:function(a){return this.pushStack(T(this,a,!0),"filter",a)},is:function(a){return!!a&&(typeof a=="string"?Q.test(a)?f(a,this.context).index(this[0])>=0:f.filter(a,this).length>0:this.filter(a).length>0)},closest:function(a,b){var c=[],d,e,g=this[0];if(f.isArray(a)){var h=1;while(g&&g.ownerDocument&&g!==b){for(d=0;d<a.length;d++)f(g).is(a[d])&&c.push({selector:a[d],elem:g,level:h});g=g.parentNode,h++}return c}var i=Q.test(a)||typeof a!="string"?f(a,b||this.context):0;for(d=0,e=this.length;d<e;d++){g=this[d];while(g){if(i?i.index(g)>-1:f.find.matchesSelector(g,a)){c.push(g);break}g=g.parentNode;if(!g||!g.ownerDocument||g===b||g.nodeType===11)break}}c=c.length>1?f.unique(c):c;return this.pushStack(c,"closest",a)},index:function(a){if(!a)return this[0]&&this[0].parentNode?this.prevAll().length:-1;if(typeof a=="string")return f.inArray(this[0],f(a));return f.inArray(a.jquery?a[0]:a,this)},add:function(a,b){var c=typeof a=="string"?f(a,b):f.makeArray(a&&a.nodeType?[a]:a),d=f.merge(this.get(),c);return this.pushStack(S(c[0])||S(d[0])?d:f.unique(d))},andSelf:function(){return this.add(this.prevObject)}}),f.each({parent:function(a){var b=a.parentNode;return b&&b.nodeType!==11?b:null},parents:function(a){return f.dir(a,"parentNode")},parentsUntil:function(a,b,c){return f.dir(a,"parentNode",c)},next:function(a){return f.nth(a,2,"nextSibling")},prev:function(a){return f.nth(a,2,"previousSibling")},nextAll:function(a){return f.dir(a,"nextSibling")},prevAll:function(a){return f.dir(a,"previousSibling")},nextUntil:function(a,b,c){return f.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return f.dir(a,"previousSibling",c)},siblings:function(a){return f.sibling(a.parentNode.firstChild,a)},children:function(a){return f.sibling(a.firstChild)},contents:function(a){return f.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:f.makeArray(a.childNodes)}},function(a,b){f.fn[a]=function(c,d){var e=f.map(this,b,c);L.test(a)||(d=c),d&&typeof d=="string"&&(e=f.filter(d,e)),e=this.length>1&&!R[a]?f.unique(e):e,(this.length>1||N.test(d))&&M.test(a)&&(e=e.reverse());return this.pushStack(e,a,P.call(arguments).join(","))}}),f.extend({filter:function(a,b,c){c&&(a=":not("+a+")");return b.length===1?f.find.matchesSelector(b[0],a)?[b[0]]:[]:f.find.matches(a,b)},dir:function(a,c,d){var e=[],g=a[c];while(g&&g.nodeType!==9&&(d===b||g.nodeType!==1||!f(g).is(d)))g.nodeType===1&&e.push(g),g=g[c];return e},nth:function(a,b,c,d){b=b||1;var e=0;for(;a;a=a[c])if(a.nodeType===1&&++e===b)break;return a},sibling:function(a,b){var c=[];for(;a;a=a.nextSibling)a.nodeType===1&&a!==b&&c.push(a);return c}});var V="abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",W=/ jQuery\d+="(?:\d+|null)"/g,X=/^\s+/,Y=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,Z=/<([\w:]+)/,$=/<tbody/i,_=/<|&#?\w+;/,ba=/<(?:script|style)/i,bb=/<(?:script|object|embed|option|style)/i,bc=new RegExp("<(?:"+V+")","i"),bd=/checked\s*(?:[^=]|=\s*.checked.)/i,be=/\/(java|ecma)script/i,bf=/^\s*<!(?:\[CDATA\[|\-\-)/,bg={option:[1,"<select multiple='multiple'>","</select>"],legend:[1,"<fieldset>","</fieldset>"],thead:[1,"<table>","</table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],col:[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"],area:[1,"<map>","</map>"],_default:[0,"",""]},bh=U(c);bg.optgroup=bg.option,bg.tbody=bg.tfoot=bg.colgroup=bg.caption=bg.thead,bg.th=bg.td,f.support.htmlSerialize||(bg._default=[1,"div<div>","</div>"]),f.fn.extend({text:function(a){if(f.isFunction(a))return this.each(function(b){var c=f(this);c.text(a.call(this,b,c.text()))});if(typeof a!="object"&&a!==b)return this.empty().append((this[0]&&this[0].ownerDocument||c).createTextNode(a));return f.text(this)},wrapAll:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapAll(a.call(this,b))});if(this[0]){var b=f(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&a.firstChild.nodeType===1)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapInner(a.call(this,b))});return this.each(function(){var b=f(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=f.isFunction(a);return this.each(function(c){f(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){f.nodeName(this,"body")||f(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.appendChild(a)})},prepend:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this)});if(arguments.length){var a=f.clean(arguments);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this.nextSibling)});if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,f.clean(arguments));return a}},remove:function(a,b){for(var c=0,d;(d=this[c])!=null;c++)if(!a||f.filter(a,[d]).length)!b&&d.nodeType===1&&(f.cleanData(d.getElementsByTagName("*")),
+f.cleanData([d])),d.parentNode&&d.parentNode.removeChild(d);return this},empty:function()
+{for(var a=0,b;(b=this[a])!=null;a++){b.nodeType===1&&f.cleanData(b.getElementsByTagName("*"));while(b.firstChild)b.removeChild(b.firstChild)}return this},clone:function(a,b){a=a==null?!1:a,b=b==null?a:b;return this.map(function(){return f.clone(this,a,b)})},html:function(a){if(a===b)return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(W,""):null;if(typeof a=="string"&&!ba.test(a)&&(f.support.leadingWhitespace||!X.test(a))&&!bg[(Z.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Y,"<$1></$2>");try{for(var c=0,d=this.length;c<d;c++)this[c].nodeType===1&&(f.cleanData(this[c].getElementsByTagName("*")),this[c].innerHTML=a)}catch(e){this.empty().append(a)}}else f.isFunction(a)?this.each(function(b){var c=f(this);c.html(a.call(this,b,c.html()))}):this.empty().append(a);return this},replaceWith:function(a){if(this[0]&&this[0].parentNode){if(f.isFunction(a))return this.each(function(b){var c=f(this),d=c.html();c.replaceWith(a.call(this,b,d))});typeof a!="string"&&(a=f(a).detach());return this.each(function(){var b=this.nextSibling,c=this.parentNode;f(this).remove(),b?f(b).before(a):f(c).append(a)})}return this.length?this.pushStack(f(f.isFunction(a)?a():a),"replaceWith",a):this},detach:function(a){return this.remove(a,!0)},domManip:function(a,c,d){var e,g,h,i,j=a[0],k=[];if(!f.support.checkClone&&arguments.length===3&&typeof j=="string"&&bd.test(j))return this.each(function(){f(this).domManip(a,c,d,!0)});if(f.isFunction(j))return this.each(function(e){var g=f(this);a[0]=j.call(this,e,c?g.html():b),g.domManip(a,c,d)});if(this[0]){i=j&&j.parentNode,f.support.parentNode&&i&&i.nodeType===11&&i.childNodes.length===this.length?e={fragment:i}:e=f.buildFragment(a,this,k),h=e.fragment,h.childNodes.length===1?g=h=h.firstChild:g=h.firstChild;if(g){c=c&&f.nodeName(g,"tr");for(var l=0,m=this.length,n=m-1;l<m;l++)d.call(c?bi(this[l],g):this[l],e.cacheable||m>1&&l<n?f.clone(h,!0,!0):h)}k.length&&f.each(k,bp)}return this}}),f.buildFragment=function(a,b,d){var e,g,h,i,j=a[0];b&&b[0]&&(i=b[0].ownerDocument||b[0]),i.createDocumentFragment||(i=c),a.length===1&&typeof j=="string"&&j.length<512&&i===c&&j.charAt(0)==="<"&&!bb.test(j)&&(f.support.checkClone||!bd.test(j))&&(f.support.html5Clone||!bc.test(j))&&(g=!0,h=f.fragments[j],h&&h!==1&&(e=h)),e||(e=i.createDocumentFragment(),f.clean(a,i,e,d)),g&&(f.fragments[j]=h?e:1);return{fragment:e,cacheable:g}},f.fragments={},f.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){f.fn[a]=function(c){var d=[],e=f(c),g=this.length===1&&this[0].parentNode;if(g&&g.nodeType===11&&g.childNodes.length===1&&e.length===1){e[b](this[0]);return this}for(var h=0,i=e.length;h<i;h++){var j=(h>0?this.clone(!0):this).get();f(e[h])[b](j),d=d.concat(j)}return this.pushStack(d,a,e.selector)}}),f.extend({clone:function(a,b,c){var d,e,g,h=f.support.html5Clone||!bc.test("<"+a.nodeName)?a.cloneNode(!0):bo(a);if((!f.support.noCloneEvent||!f.support.noCloneChecked)&&(a.nodeType===1||a.nodeType===11)&&!f.isXMLDoc(a)){bk(a,h),d=bl(a),e=bl(h);for(g=0;d[g];++g)e[g]&&bk(d[g],e[g])}if(b){bj(a,h);if(c){d=bl(a),e=bl(h);for(g=0;d[g];++g)bj(d[g],e[g])}}d=e=null;return h},clean:function(a,b,d,e){var g;b=b||c,typeof b.createElement=="undefined"&&(b=b.ownerDocument||b[0]&&b[0].ownerDocument||c);var h=[],i;for(var j=0,k;(k=a[j])!=null;j++){typeof k=="number"&&(k+="");if(!k)continue;if(typeof k=="string")if(!_.test(k))k=b.createTextNode(k);else{k=k.replace(Y,"<$1></$2>");var l=(Z.exec(k)||["",""])[1].toLowerCase(),m=bg[l]||bg._default,n=m[0],o=b.createElement("div");b===c?bh.appendChild(o):U(b).appendChild(o),o.innerHTML=m[1]+k+m[2];while(n--)o=o.lastChild;if(!f.support.tbody){var p=$.test(k),q=l==="table"&&!p?o.firstChild&&o.firstChild.childNodes:m[1]==="<table>"&&!p?o.childNodes:[];for(i=q.length-1;i>=0;--i)f.nodeName(q[i],"tbody")&&!q[i].childNodes.length&&q[i].parentNode.removeChild(q[i])}!f.support.leadingWhitespace&&X.test(k)&&o.insertBefore(b.createTextNode(X.exec(k)[0]),o.firstChild),k=o.childNodes}var r;if(!f.support.appendChecked)if(k[0]&&typeof (r=k.length)=="number")for(i=0;i<r;i++)bn(k[i]);else bn(k);k.nodeType?h.push(k):h=f.merge(h,k)}if(d){g=function(a){return!a.type||be.test(a.type)};for(j=0;h[j];j++)if(e&&f.nodeName(h[j],"script")&&(!h[j].type||h[j].type.toLowerCase()==="text/javascript"))e.push(h[j].parentNode?h[j].parentNode.removeChild(h[j]):h[j]);else{if(h[j].nodeType===1){var s=f.grep(h[j].getElementsByTagName("script"),g);h.splice.apply(h,[j+1,0].concat(s))}d.appendChild(h[j])}}return h},cleanData:function(a){var b,c,d=f.cache,e=f.event.special,g=f.support.deleteExpando;for(var h=0,i;(i=a[h])!=null;h++){if(i.nodeName&&f.noData[i.nodeName.toLowerCase()])continue;c=i[f.expando];if(c){b=d[c];if(b&&b.events){for(var j in b.events)e[j]?f.event.remove(i,j):f.removeEvent(i,j,b.handle);b.handle&&(b.handle.elem=null)}g?delete i[f.expando]:i.removeAttribute&&i.removeAttribute(f.expando),delete d[c]}}}});var bq=/alpha\([^)]*\)/i,br=/opacity=([^)]*)/,bs=/([A-Z]|^ms)/g,bt=/^-?\d+(?:px)?$/i,bu=/^-?\d/,bv=/^([\-+])=([\-+.\de]+)/,bw={position:"absolute",visibility:"hidden",display:"block"},bx=["Left","Right"],by=["Top","Bottom"],bz,bA,bB;f.fn.css=function(a,c){if(arguments.length===2&&c===b)return this;return f.access(this,a,c,!0,function(a,c,d){return d!==b?f.style(a,c,d):f.css(a,c)})},f.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=bz(a,"opacity","opacity");return c===""?"1":c}return a.style.opacity}}},cssNumber:{fillOpacity:!0,fontWeight:!0,lineHeight:!0,opacity:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":f.support.cssFloat?"cssFloat":"styleFloat"},style:function(a,c,d,e){if(!!a&&a.nodeType!==3&&a.nodeType!==8&&!!a.style){var g,h,i=f.camelCase(c),j=a.style,k=f.cssHooks[i];c=f.cssProps[i]||i;if(d===b){if(k&&"get"in k&&(g=k.get(a,!1,e))!==b)return g;return j[c]}h=typeof d,h==="string"&&(g=bv.exec(d))&&(d=+(g[1]+1)*+g[2]+parseFloat(f.css(a,c)),h="number");if(d==null||h==="number"&&isNaN(d))return;h==="number"&&!f.cssNumber[i]&&(d+="px");if(!k||!("set"in k)||(d=k.set(a,d))!==b)try{j[c]=d}catch(l){}}},css:function(a,c,d){var e,g;c=f.camelCase(c),g=f.cssHooks[c],c=f.cssProps[c]||c,c==="cssFloat"&&(c="float");if(g&&"get"in g&&(e=g.get(a,!0,d))!==b)return e;if(bz)return bz(a,c)},swap:function(a,b,c){var d={};for(var e in b)d[e]=a.style[e],a.style[e]=b[e];c.call(a);for(e in b)a.style[e]=d[e]}}),f.curCSS=f.css,f.each(["height","width"],function(a,b){f.cssHooks[b]={get:function(a,c,d){var e;if(c){if(a.offsetWidth!==0)return bC(a,b,d);f.swap(a,bw,function(){e=bC(a,b,d)});return e}},set:function(a,b){if(!bt.test(b))return b;b=parseFloat(b);if(b>=0)return b+"px"}}}),f.support.opacity||(f.cssHooks.opacity={get:function(a,b){return br.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?parseFloat(RegExp.$1)/100+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=f.isNumeric(b)?"alpha(opacity="+b*100+")":"",g=d&&d.filter||c.filter||"";c.zoom=1;if(b>=1&&f.trim(g.replace(bq,""))===""){c.removeAttribute("filter");if(d&&!d.filter)return}c.filter=bq.test(g)?g.replace(bq,e):g+" "+e}}),f(function(){f.support.reliableMarginRight||(f.cssHooks.marginRight={get:function(a,b){var c;f.swap(a,{display:"inline-block"},function(){b?c=bz(a,"margin-right","marginRight"):c=a.style.marginRight});return c}})}),c.defaultView&&c.defaultView.getComputedStyle&&(bA=function(a,b){var c,d,e;b=b.replace(bs,"-$1").toLowerCase(),(d=a.ownerDocument.defaultView)&&(e=d.getComputedStyle(a,null))&&(c=e.getPropertyValue(b),c===""&&!f.contains(a.ownerDocument.documentElement,a)&&(c=f.style(a,b)));return c}),c.documentElement.currentStyle&&(bB=function(a,b){var c,d,e,f=a.currentStyle&&a.currentStyle[b],g=a.style;f===null&&g&&(e=g[b])&&(f=e),!bt.test(f)&&bu.test(f)&&(c=g.left,d=a.runtimeStyle&&a.runtimeStyle.left,d&&(a.runtimeStyle.left=a.currentStyle.left),g.left=b==="fontSize"?"1em":f||0,f=g.pixelLeft+"px",g.left=c,d&&(a.runtimeStyle.left=d));return f===""?"auto":f}),bz=bA||bB,f.expr&&f.expr.filters&&(f.expr.filters.hidden=function(a){var b=a.offsetWidth,c=a.offsetHeight;return b===0&&c===0||!f.support.reliableHiddenOffsets&&(a.style&&a.style.display||f.css(a,"display"))==="none"},f.expr.filters.visible=function(a){return!f.expr.filters.hidden(a)});var bD=/%20/g,bE=/\[\]$/,bF=/\r?\n/g,bG=/#.*$/,bH=/^(.*?):[ \t]*([^\r\n]*)\r?$/mg,bI=/^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,bJ=/^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,bK=/^(?:GET|HEAD)$/,bL=/^\/\//,bM=/\?/,bN=/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,bO=/^(?:select|textarea)/i,bP=/\s+/,bQ=/([?&])_=[^&]*/,bR=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,bS=f.fn.load,bT={},bU={},bV,bW,bX=["*/"]+["*"];try{bV=e.href}catch(bY){bV=c.createElement("a"),bV.href="",bV=bV.href}bW=bR.exec(bV.toLowerCase())||[],f.fn.extend({load:function(a,c,d){if(typeof a!="string"&&bS)return bS.apply(this,arguments);if(!this.length)return this;var e=a.indexOf(" ");if(e>=0){var g=a.slice(e,a.length);a=a.slice(0,e)}var h="GET";c&&(f.isFunction(c)?(d=c,c=b):typeof c=="object"&&(c=f.param(c,f.ajaxSettings.traditional),h="POST"));var i=this;f.ajax({url:a,type:h,dataType:"html",data:c,complete:function(a,b,c){c=a.responseText,a.isResolved()&&(a.done(function(a){c=a}),i.html(g?f("<div>").append(c.replace(bN,"")).find(g):c)),d&&i.each(d,[c,b,a])}});return this},serialize:function(){return f.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?f.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||bO.test(this.nodeName)||bI.test(this.type))}).map(function(a,b){var c=f(this).val();return c==null?null:f.isArray(c)?f.map(c,function(a,c){return{name:b.name,value:a.replace(bF,"\r\n")}}):{name:b.name,value:c.replace(bF,"\r\n")}}).get()}}),f.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(a,b){f.fn[b]=function(a){return this.on(b,a)}}),f.each(["get","post"],function(a,c){f[c]=function(a,d,e,g){f.isFunction(d)&&(g=g||e,e=d,d=b);return f.ajax({type:c,url:a,data:d,success:e,dataType:g})}}),f.extend({getScript:function(a,c){return f.get(a,b,c,"script")},getJSON:function(a,b,c){return f.get(a,b,c,"json")},ajaxSetup:function(a,b){b?b_(a,f.ajaxSettings):(b=a,a=f.ajaxSettings),b_(a,b);return a},ajaxSettings:{url:bV,isLocal:bJ.test(bW[1]),global:!0,type:"GET",contentType:"application/x-www-form-urlencoded",processData:!0,async:!0,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":bX},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":a.String,"text html":!0,"text json":f.parseJSON,"text xml":f.parseXML},flatOptions:{context:!0,url:!0}},ajaxPrefilter:bZ(bT),ajaxTransport:bZ(bU),ajax:function(a,c){function w(a,c,l,m){if(s!==2){s=2,q&&clearTimeout(q),p=b,n=m||"",v.readyState=a>0?4:0;var o,r,u,w=c,x=l?cb(d,v,l):b,y,z;if(a>=200&&a<300||a===304){if(d.ifModified){if(y=v.getResponseHeader("Last-Modified"))f.lastModified[k]=y;if(z=v.getResponseHeader("Etag"))f.etag[k]=z}if(a===304)w="notmodified",o=!0;else try{r=cc(d,x),w="success",o=!0}catch(A){w="parsererror",u=A}}else{u=w;if(!w||a)w="error",a<0&&(a=0)}v.status=a,v.statusText=""+(c||w),o?h.resolveWith(e,[r,w,v]):h.rejectWith(e,[v,w,u]),v.statusCode(j),j=b,t&&g.trigger("ajax"+(o?"Success":"Error"),[v,d,o?r:u]),i.fireWith(e,[v,w]),t&&(g.trigger("ajaxComplete",[v,d]),--f.active||f.event.trigger("ajaxStop"))}}typeof a=="object"&&(c=a,a=b),c=c||{};var d=f.ajaxSetup({},c),e=d.context||d,g=e!==d&&(e.nodeType||e instanceof f)?f(e):f.event,h=f.Deferred(),i=f.Callbacks("once memory"),j=d.statusCode||{},k,l={},m={},n,o,p,q,r,s=0,t,u,v={readyState:0,setRequestHeader:function(a,b){if(!s){var c=a.toLowerCase();a=m[c]=m[c]||a,l[a]=b}return this},getAllResponseHeaders:function(){return s===2?n:null},getResponseHeader:function(a){var c;if(s===2){if(!o){o={};while(c=bH.exec(n))o[c[1].toLowerCase()]=c[2]}c=o[a.toLowerCase()]}return c===b?null:c},overrideMimeType:function(a){s||(d.mimeType=a);return this},abort:function(a){a=a||"abort",p&&p.abort(a),w(0,a);return this}};h.promise(v),v.success=v.done,v.error=v.fail,v.complete=i.add,v.statusCode=function(a){if(a){var b;if(s<2)for(b in a)j[b]=[j[b],a[b]];else b=a[v.status],v.then(b,b)}return this},d.url=((a||d.url)+"").replace(bG,"").replace(bL,bW[1]+"//"),d.dataTypes=f.trim(d.dataType||"*").toLowerCase().split(bP),d.crossDomain==null&&(r=bR.exec(d.url.toLowerCase()),d.crossDomain=!(!r||r[1]==bW[1]&&r[2]==bW[2]&&(r[3]||(r[1]==="http:"?80:443))==(bW[3]||(bW[1]==="http:"?80:443)))),d.data&&d.processData&&typeof d.data!="string"&&(d.data=f.param(d.data,d.traditional)),b$(bT,d,c,v);if(s===2)return!1;t=d.global,d.type=d.type.toUpperCase(),d.hasContent=!bK.test(d.type),t&&f.active++===0&&f.event.trigger("ajaxStart");if(!d.hasContent){d.data&&(d.url+=(bM.test(d.url)?"&":"?")+d.data,delete d.data),k=d.url;if(d.cache===!1){var x=f.now(),y=d.url.replace(bQ,"$1_="+x);d.url=y+(y===d.url?(bM.test(d.url)?"&":"?")+"_="+x:"")}}(d.data&&d.hasContent&&d.contentType!==!1||c.contentType)&&v.setRequestHeader("Content-Type",d.contentType),d.ifModified&&(k=k||d.url,f.lastModified[k]&&v.setRequestHeader("If-Modified-Since",f.lastModified[k]),f.etag[k]&&v.setRequestHeader("If-None-Match",f.etag[k])),v.setRequestHeader("Accept",d.dataTypes[0]&&d.accepts[d.dataTypes[0]]?d.accepts[d.dataTypes[0]]+(d.dataTypes[0]!=="*"?", "+bX+"; q=0.01":""):d.accepts["*"]);for(u in d.headers)v.setRequestHeader(u,d.headers[u]);if(d.beforeSend&&(d.beforeSend.call(e,v,d)===!1||s===2)){v.abort();return!1}for(u in{success:1,error:1,complete:1})v[u](d[u]);p=b$(bU,d,c,v);if(!p)w(-1,"No Transport");else{v.readyState=1,t&&g.trigger("ajaxSend",[v,d]),d.async&&d.timeout>0&&(q=setTimeout(function(){v.abort("timeout")},d.timeout));try{s=1,p.send(l,w)}catch(z){if(s<2)w(-1,z);else throw z}}return v},param:function(a,c){var d=[],e=function(a,b){b=f.isFunction(b)?b():b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};c===b&&(c=f.ajaxSettings.traditional);if(f.isArray(a)||a.jquery&&!f.isPlainObject(a))f.each(a,function(){e(this.name,this.value)});else for(var g in a)ca(g,a[g],c,e);return d.join("&").replace(bD,"+")}}),f.extend({active:0,lastModified:{},etag:{}});var cd=f.now(),ce=/(\=)\?(&|$)|\?\?/i;f.ajaxSetup({jsonp:"callback",jsonpCallback:function(){return f.expando+"_"+cd++}}),f.ajaxPrefilter("json jsonp",function(b,c,d){var e=b.contentType==="application/x-www-form-urlencoded"&&typeof b.data=="string";if(b.dataTypes[0]==="jsonp"||b.jsonp!==!1&&(ce.test(b.url)||e&&ce.test(b.data))){var g,h=b.jsonpCallback=f.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,i=a[h],j=b.url,k=b.data,l="$1"+h+"$2";b.jsonp!==!1&&(j=j.replace(ce,l),b.url===j&&(e&&(k=k.replace(ce,l)),b.data===k&&(j+=(/\?/.test(j)?"&":"?")+b.jsonp+"="+h))),b.url=j,b.data=k,a[h]=function(a){g=[a]},d.always(function(){a[h]=i,g&&f.isFunction(i)&&a[h](g[0])}),b.converters["script json"]=function(){g||f.error(h+" was not called");return g[0]},b.dataTypes[0]="json";return"script"}}),f.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/javascript|ecmascript/},converters:{"text script":function(a){f.globalEval(a);return a}}}),f.ajaxPrefilter("script",function(a){a.cache===b&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),f.ajaxTransport("script",function(a){if(a.crossDomain){var d,e=c.head||c.getElementsByTagName("head")[0]||c.documentElement;return{send:function(f,g){d=c.createElement("script"),d.async="async",a.scriptCharset&&(d.charset=a.scriptCharset),d.src=a.url,d.onload=d.onreadystatechange=function(a,c){if(c||!d.readyState||/loaded|complete/.test(d.readyState))d.onload=d.onreadystatechange=null,e&&d.parentNode&&e.removeChild(d),d=b,c||g(200,"success")},e.insertBefore(d,e.firstChild)},abort:function(){d&&d.onload(0,1)}}}});var cf=a.ActiveXObject?function(){for(var a in ch)ch[a](0,1)}:!1,cg=0,ch;f.ajaxSettings.xhr=a.ActiveXObject?function(){return!this.isLocal&&ci()||cj()}:ci,function(a){f.extend(f.support,{ajax:!!a,cors:!!a&&"withCredentials"in a})}(f.ajaxSettings.xhr()),f.support.ajax&&f.ajaxTransport(function(c)
+{if(!c.crossDomain||f.support.cors){var d;return{send:function(e,g){var h=c.xhr(),i,j;c.username?h.open(c.type,c.url,c.async,c.username,c.password):h.open(c.type,c.url,c.async);if(c.xhrFields)for(j in c.xhrFields)h[j]=c.xhrFields[j];c.mimeType&&h.overrideMimeType&&h.overrideMimeType(c.mimeType),!c.crossDomain&&!e["X-Requested-With"]&&(e["X-Requested-With"]="XMLHttpRequest");try{for(j in e)h.setRequestHeader(j,e[j])}catch(k){}h.send(c.hasContent&&c.data||null),d=function(a,e){var j,k,l,m,n;try{if(d&&(e||h.readyState===4)){d=b,i&&(h.onreadystatechange=f.noop,cf&&delete ch[i]);if(e)h.readyState!==4&&h.abort();else{j=h.status,l=h.getAllResponseHeaders(),m={},n=h.responseXML,n&&n.documentElement&&(m.xml=n),m.text=h.responseText;try{k=h.statusText}catch(o){k=""}!j&&c.isLocal&&!c.crossDomain?j=m.text?200:404:j===1223&&(j=204)}}}catch(p){e||g(-1,p)}m&&g(j,k,m,l)},!c.async||h.readyState===4?d():(i=++cg,cf&&(ch||(ch={},f(a).unload(cf)),ch[i]=d),h.onreadystatechange=d)},abort:function(){d&&d(0,1)}}}});var ck={},cl,cm,cn=/^(?:toggle|show|hide)$/,co=/^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,cp,cq=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]],cr;f.fn.extend({show:function(a,b,c){var d,e;if(a||a===0)return this.animate(cu("show",3),a,b,c);for(var g=0,h=this.length;g<h;g++)d=this[g],d.style&&(e=d.style.display,!f._data(d,"olddisplay")&&e==="none"&&(e=d.style.display=""),e===""&&f.css(d,"display")==="none"&&f._data(d,"olddisplay",cv(d.nodeName)));for(g=0;g<h;g++){d=this[g];if(d.style){e=d.style.display;if(e===""||e==="none")d.style.display=f._data(d,"olddisplay")||""}}return this},hide:function(a,b,c){if(a||a===0)return this.animate(cu("hide",3),a,b,c);var d,e,g=0,h=this.length;for(;g<h;g++)d=this[g],d.style&&(e=f.css(d,"display"),e!=="none"&&!f._data(d,"olddisplay")&&f._data(d,"olddisplay",e));for(g=0;g<h;g++)this[g].style&&(this[g].style.display="none");return this},_toggle:f.fn.toggle,toggle:function(a,b,c){var d=typeof a=="boolean";f.isFunction(a)&&f.isFunction(b)?this._toggle.apply(this,arguments):a==null||d?this.each(function(){var b=d?a:f(this).is(":hidden");f(this)[b?"show":"hide"]()}):this.animate(cu("toggle",3),a,b,c);return this},fadeTo:function(a,b,c,d){return this.filter(":hidden").css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){function g(){e.queue===!1&&f._mark(this);var b=f.extend({},e),c=this.nodeType===1,d=c&&f(this).is(":hidden"),g,h,i,j,k,l,m,n,o;b.animatedProperties={};for(i in a){g=f.camelCase(i),i!==g&&(a[g]=a[i],delete a[i]),h=a[g],f.isArray(h)?(b.animatedProperties[g]=h[1],h=a[g]=h[0]):b.animatedProperties[g]=b.specialEasing&&b.specialEasing[g]||b.easing||"swing";if(h==="hide"&&d||h==="show"&&!d)return b.complete.call(this);c&&(g==="height"||g==="width")&&(b.overflow=[this.style.overflow,this.style.overflowX,this.style.overflowY],f.css(this,"display")==="inline"&&f.css(this,"float")==="none"&&(!f.support.inlineBlockNeedsLayout||cv(this.nodeName)==="inline"?this.style.display="inline-block":this.style.zoom=1))}b.overflow!=null&&(this.style.overflow="hidden");for(i in a)j=new f.fx(this,b,i),h=a[i],cn.test(h)?(o=f._data(this,"toggle"+i)||(h==="toggle"?d?"show":"hide":0),o?(f._data(this,"toggle"+i,o==="show"?"hide":"show"),j[o]()):j[h]()):(k=co.exec(h),l=j.cur(),k?(m=parseFloat(k[2]),n=k[3]||(f.cssNumber[i]?"":"px"),n!=="px"&&(f.style(this,i,(m||1)+n),l=(m||1)/j.cur()*l,f.style(this,i,l+n)),k[1]&&(m=(k[1]==="-="?-1:1)*m+l),j.custom(l,m,n)):j.custom(l,h,""));return!0}var e=f.speed(b,c,d);if(f.isEmptyObject(a))return this.each(e.complete,[!1]);a=f.extend({},a);return e.queue===!1?this.each(g):this.queue(e.queue,g)},stop:function(a,c,d){typeof a!="string"&&(d=c,c=a,a=b),c&&a!==!1&&this.queue(a||"fx",[]);return this.each(function(){function h(a,b,c){var e=b[c];f.removeData(a,c,!0),e.stop(d)}var b,c=!1,e=f.timers,g=f._data(this);d||f._unmark(!0,this);if(a==null)for(b in g)g[b]&&g[b].stop&&b.indexOf(".run")===b.length-4&&h(this,g,b);else g[b=a+".run"]&&g[b].stop&&h(this,g,b);for(b=e.length;b--;)e[b].elem===this&&(a==null||e[b].queue===a)&&(d?e[b](!0):e[b].saveState(),c=!0,e.splice(b,1));(!d||!c)&&f.dequeue(this,a)})}}),f.each({slideDown:cu("show",1),slideUp:cu("hide",1),slideToggle:cu("toggle",1),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){f.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),f.extend({speed:function(a,b,c){var d=a&&typeof a=="object"?f.extend({},a):{complete:c||!c&&b||f.isFunction(a)&&a,duration:a,easing:c&&b||b&&!f.isFunction(b)&&b};d.duration=f.fx.off?0:typeof d.duration=="number"?d.duration:d.duration in f.fx.speeds?f.fx.speeds[d.duration]:f.fx.speeds._default;if(d.queue==null||d.queue===!0)d.queue="fx";d.old=d.complete,d.complete=function(a){f.isFunction(d.old)&&d.old.call(this),d.queue?f.dequeue(this,d.queue):a!==!1&&f._unmark(this)};return d},easing:{linear:function(a,b,c,d){return c+d*a},swing:function(a,b,c,d){return(-Math.cos(a*Math.PI)/2+.5)*d+c}},timers:[],fx:function(a,b,c){this.options=b,this.elem=a,this.prop=c,b.orig=b.orig||{}}}),f.fx.prototype={update:function(){this.options.step&&this.options.step.call(this.elem,this.now,this),(f.fx.step[this.prop]||f.fx.step._default)(this)},cur:function(){if(this.elem[this.prop]!=null&&(!this.elem.style||this.elem.style[this.prop]==null))return this.elem[this.prop];var a,b=f.css(this.elem,this.prop);return isNaN(a=parseFloat(b))?!b||b==="auto"?0:b:a},custom:function(a,c,d){function h(a){return e.step(a)}var e=this,g=f.fx;this.startTime=cr||cs(),this.end=c,this.now=this.start=a,this.pos=this.state=0,this.unit=d||this.unit||(f.cssNumber[this.prop]?"":"px"),h.queue=this.options.queue,h.elem=this.elem,h.saveState=function(){e.options.hide&&f._data(e.elem,"fxshow"+e.prop)===b&&f._data(e.elem,"fxshow"+e.prop,e.start)},h()&&f.timers.push(h)&&!cp&&(cp=setInterval(g.tick,g.interval))},show:function(){var a=f._data(this.elem,"fxshow"+this.prop);this.options.orig[this.prop]=a||f.style(this.elem,this.prop),this.options.show=!0,a!==b?this.custom(this.cur(),a):this.custom(this.prop==="width"||this.prop==="height"?1:0,this.cur()),f(this.elem).show()},hide:function(){this.options.orig[this.prop]=f._data(this.elem,"fxshow"+this.prop)||f.style(this.elem,this.prop),this.options.hide=!0,this.custom(this.cur(),0)},step:function(a){var b,c,d,e=cr||cs(),g=!0,h=this.elem,i=this.options;if(a||e>=i.duration+this.startTime){this.now=this.end,this.pos=this.state=1,this.update(),i.animatedProperties[this.prop]=!0;for(b in i.animatedProperties)i.animatedProperties[b]!==!0&&(g=!1);if(g){i.overflow!=null&&!f.support.shrinkWrapBlocks&&f.each(["","X","Y"],function(a,b){h.style["overflow"+b]=i.overflow[a]}),i.hide&&f(h).hide();if(i.hide||i.show)for(b in i.animatedProperties)f.style(h,b,i.orig[b]),f.removeData(h,"fxshow"+b,!0),f.removeData(h,"toggle"+b,!0);d=i.complete,d&&(i.complete=!1,d.call(h))}return!1}i.duration==Infinity?this.now=e:(c=e-this.startTime,this.state=c/i.duration,this.pos=f.easing[i.animatedProperties[this.prop]](this.state,c,0,1,i.duration),this.now=this.start+(this.end-this.start)*this.pos),this.update();return!0}},f.extend(f.fx,{tick:function(){var a,b=f.timers,c=0;for(;c<b.length;c++)a=b[c],!a()&&b[c]===a&&b.splice(c--,1);b.length||f.fx.stop()},interval:13,stop:function(){clearInterval(cp),cp=null},speeds:{slow:600,fast:200,_default:400},step:{opacity:function(a){f.style(a.elem,"opacity",a.now)},_default:function(a){a.elem.style&&a.elem.style[a.prop]!=null?a.elem.style[a.prop]=a.now+a.unit:a.elem[a.prop]=a.now}}}),f.each(["width","height"],function(a,b){f.fx.step[b]=function(a){f.style(a.elem,b,Math.max(0,a.now)+a.unit)}}),f.expr&&f.expr.filters&&(f.expr.filters.animated=function(a){return f.grep(f.timers,function(b){return a===b.elem}).length});var cw=/^t(?:able|d|h)$/i,cx=/^(?:body|html)$/i;"getBoundingClientRect"in c.documentElement?f.fn.offset=function(a){var b=this[0],c;if(a)return this.each(function(b){f.offset.setOffset(this,a,b)});if(!b||!b.ownerDocument)return null;if(b===b.ownerDocument.body)return f.offset.bodyOffset(b);try{c=b.getBoundingClientRect()}catch(d){}var e=b.ownerDocument,g=e.documentElement;if(!c||!f.contains(g,b))return c?{top:c.top,left:c.left}:{top:0,left:0};var h=e.body,i=cy(e),j=g.clientTop||h.clientTop||0,k=g.clientLeft||h.clientLeft||0,l=i.pageYOffset||f.support.boxModel&&g.scrollTop||h.scrollTop,m=i.pageXOffset||f.support.boxModel&&g.scrollLeft||h.scrollLeft,n=c.top+l-j,o=c.left+m-k;return{top:n,left:o}}:f.fn.offset=function(a){var b=this[0];if(a)return this.each(function(b){f.offset.setOffset(this,a,b)});if(!b||!b.ownerDocument)return null;if(b===b.ownerDocument.body)return f.offset.bodyOffset(b);var c,d=b.offsetParent,e=b,g=b.ownerDocument,h=g.documentElement,i=g.body,j=g.defaultView,k=j?j.getComputedStyle(b,null):b.currentStyle,l=b.offsetTop,m=b.offsetLeft;while((b=b.parentNode)&&b!==i&&b!==h){if(f.support.fixedPosition&&k.position==="fixed")break;c=j?j.getComputedStyle(b,null):b.currentStyle,l-=b.scrollTop,m-=b.scrollLeft,b===d&&(l+=b.offsetTop,m+=b.offsetLeft,f.support.doesNotAddBorder&&(!f.support.doesAddBorderForTableAndCells||!cw.test(b.nodeName))&&(l+=parseFloat(c.borderTopWidth)||0,m+=parseFloat(c.borderLeftWidth)||0),e=d,d=b.offsetParent),f.support.subtractsBorderForOverflowNotVisible&&c.overflow!=="visible"&&(l+=parseFloat(c.borderTopWidth)||0,m+=parseFloat(c.borderLeftWidth)||0),k=c}if(k.position==="relative"||k.position==="static")l+=i.offsetTop,m+=i.offsetLeft;f.support.fixedPosition&&k.position==="fixed"&&(l+=Math.max(h.scrollTop,i.scrollTop),m+=Math.max(h.scrollLeft,i.scrollLeft));return{top:l,left:m}},f.offset={bodyOffset:function(a){var b=a.offsetTop,c=a.offsetLeft;f.support.doesNotIncludeMarginInBodyOffset&&(b+=parseFloat(f.css(a,"marginTop"))||0,c+=parseFloat(f.css(a,"marginLeft"))||0);return{top:b,left:c}},setOffset:function(a,b,c){var d=f.css(a,"position");d==="static"&&(a.style.position="relative");var e=f(a),g=e.offset(),h=f.css(a,"top"),i=f.css(a,"left"),j=(d==="absolute"||d==="fixed")&&f.inArray("auto",[h,i])>-1,k={},l={},m,n;j?(l=e.position(),m=l.top,n=l.left):(m=parseFloat(h)||0,n=parseFloat(i)||0),f.isFunction(b)&&(b=b.call(a,c,g)),b.top!=null&&(k.top=b.top-g.top+m),b.left!=null&&(k.left=b.left-g.left+n),"using"in b?b.using.call(a,k):e.css(k)}},f.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),c=this.offset(),d=cx.test(b[0].nodeName)?{top:0,left:0}:b.offset();c.top-=parseFloat(f.css(a,"marginTop"))||0,c.left-=parseFloat(f.css(a,"marginLeft"))||0,d.top+=parseFloat(f.css(b[0],"borderTopWidth"))||0,d.left+=parseFloat(f.css(b[0],"borderLeftWidth"))||0;return{top:c.top-d.top,left:c.left-d.left}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||c.body;while(a&&!cx.test(a.nodeName)&&f.css(a,"position")==="static")a=a.offsetParent;return a})}}),f.each(["Left","Top"],function(a,c){var d="scroll"+c;f.fn[d]=function(c){var e,g;if(c===b){e=this[0];if(!e)return null;g=cy(e);return g?"pageXOffset"in g?g[a?"pageYOffset":"pageXOffset"]:f.support.boxModel&&g.document.documentElement[d]||g.document.body[d]:e[d]}return this.each(function(){g=cy(this),g?g.scrollTo(a?f(g).scrollLeft():c,a?c:f(g).scrollTop()):this[d]=c})}}),f.each(["Height","Width"],function(a,c){var d=c.toLowerCase();f.fn["inner"+c]=function(){var a=this[0];return a?a.style?parseFloat(f.css(a,d,"padding")):this[d]():null},f.fn["outer"+c]=function(a){var b=this[0];return b?b.style?parseFloat(f.css(b,d,a?"margin":"border")):this[d]():null},f.fn[d]=function(a){var e=this[0];if(!e)return a==null?null:this;if(f.isFunction(a))return this.each(function(b){var c=f(this);c[d](a.call(this,b,c[d]()))});if(f.isWindow(e)){var g=e.document.documentElement["client"+c],h=e.document.body;return e.document.compatMode==="CSS1Compat"&&g||h&&h["client"+c]||g}if(e.nodeType===9)return Math.max(e.documentElement["client"+c],e.body["scroll"+c],e.documentElement["scroll"+c],e.body["offset"+c],e.documentElement["offset"+c]);if(a===b){var i=f.css(e,d),j=parseFloat(i);return f.isNumeric(j)?j:i}return this.css(d,typeof a=="string"?a:a+"px")}}),a.jQuery=a.$=f,typeof define=="function"&&define.amd&&define.amd.jQuery&&define("jquery",[],function(){return f})})(window);
diff --git a/doc/html/modules.html b/doc/html/modules.html
index e95f920..2691422 100644
--- a/doc/html/modules.html
+++ b/doc/html/modules.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: Modules</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li class="current"><a href="modules.html"><span>Modules</span></a></li>
       <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -48,22 +59,13 @@
       </li>
     </ul>
   </div>
-</div>
-<div class="header">
-  <div class="headertitle">
-<div class="title">Modules</div>  </div>
-</div>
-<div class="contents">
-<div class="textblock">Here is a list of all modules:</div><ul>
-<li><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></li>
-</ul>
-</div>
+</div><!-- top -->
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
      onmouseout="return searchBox.OnSearchSelectHide()"
      onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
@@ -72,8 +74,22 @@
 </iframe>
 </div>
 
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+<div class="header">
+  <div class="headertitle">
+<div class="title">Modules</div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="textblock">Here is a list of all modules:</div><div class="directory">
+<table class="directory">
+<tr id="row_0_" class="even"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><a class="el" href="group___turbo_j_p_e_g.html" target="_self">TurboJPEG</a></td><td class="desc">TurboJPEG API</td></tr>
+</table>
+</div><!-- directory -->
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/nav_f.png b/doc/html/nav_f.png
index 1b07a16..72a58a5 100644
--- a/doc/html/nav_f.png
+++ b/doc/html/nav_f.png
Binary files differ
diff --git a/doc/html/nav_g.png b/doc/html/nav_g.png
new file mode 100644
index 0000000..2093a23
--- /dev/null
+++ b/doc/html/nav_g.png
Binary files differ
diff --git a/doc/html/nav_h.png b/doc/html/nav_h.png
index 01f5fa6..33389b1 100644
--- a/doc/html/nav_h.png
+++ b/doc/html/nav_h.png
Binary files differ
diff --git a/doc/html/open.png b/doc/html/open.png
index 7b35d2c..30f75c7 100644
--- a/doc/html/open.png
+++ b/doc/html/open.png
Binary files differ
diff --git a/doc/html/search/all_63.html b/doc/html/search/all_63.html
index 8cfc38f..e7f34db 100644
--- a/doc/html/search/all_63.html
+++ b/doc/html/search/all_63.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_63.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_customfilter">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1" target="_parent">customFilter</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_63.js b/doc/html/search/all_63.js
new file mode 100644
index 0000000..7b058da
--- /dev/null
+++ b/doc/html/search/all_63.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['customfilter',['customFilter',['../structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1',1,'tjtransform']]]
+];
diff --git a/doc/html/search/all_64.html b/doc/html/search/all_64.html
index 2e53b02..360601f 100644
--- a/doc/html/search/all_64.html
+++ b/doc/html/search/all_64.html
@@ -1,24 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_64.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_data">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3" target="_parent">data</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
-<div class="SRResult" id="SR_denom">
- <div class="SREntry">
-  <a id="Item1" onkeydown="return searchResults.Nav(event,1)" onkeypress="return searchResults.Nav(event,1)" onkeyup="return searchResults.Nav(event,1)" class="SRSymbol" href="../structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3" target="_parent">denom</a>
-  <span class="SRScope">tjscalingfactor</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_64.js b/doc/html/search/all_64.js
new file mode 100644
index 0000000..e19a050
--- /dev/null
+++ b/doc/html/search/all_64.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['data',['data',['../structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3',1,'tjtransform']]],
+  ['denom',['denom',['../structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/all_68.html b/doc/html/search/all_68.html
index ccb671d..dec41d6 100644
--- a/doc/html/search/all_68.html
+++ b/doc/html/search/all_68.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_68.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_h">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115" target="_parent">h</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_68.js b/doc/html/search/all_68.js
new file mode 100644
index 0000000..7b17e97
--- /dev/null
+++ b/doc/html/search/all_68.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['h',['h',['../structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115',1,'tjregion']]]
+];
diff --git a/doc/html/search/all_6e.html b/doc/html/search/all_6e.html
index b9f5b05..e0fd765 100644
--- a/doc/html/search/all_6e.html
+++ b/doc/html/search/all_6e.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_6e.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_num">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec" target="_parent">num</a>
-  <span class="SRScope">tjscalingfactor</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_6e.js b/doc/html/search/all_6e.js
new file mode 100644
index 0000000..83faa13
--- /dev/null
+++ b/doc/html/search/all_6e.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['num',['num',['../structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/all_6f.html b/doc/html/search/all_6f.html
index d95bbef..5e86b03 100644
--- a/doc/html/search/all_6f.html
+++ b/doc/html/search/all_6f.html
@@ -1,24 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_6f.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_op">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498" target="_parent">op</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
-<div class="SRResult" id="SR_options">
- <div class="SREntry">
-  <a id="Item1" onkeydown="return searchResults.Nav(event,1)" onkeypress="return searchResults.Nav(event,1)" onkeyup="return searchResults.Nav(event,1)" class="SRSymbol" href="../structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6" target="_parent">options</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_6f.js b/doc/html/search/all_6f.js
new file mode 100644
index 0000000..1cca832
--- /dev/null
+++ b/doc/html/search/all_6f.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['op',['op',['../structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498',1,'tjtransform']]],
+  ['options',['options',['../structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6',1,'tjtransform']]]
+];
diff --git a/doc/html/search/all_72.html b/doc/html/search/all_72.html
index 465fe88..347b9f6 100644
--- a/doc/html/search/all_72.html
+++ b/doc/html/search/all_72.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_72.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_r">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf" target="_parent">r</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_72.js b/doc/html/search/all_72.js
new file mode 100644
index 0000000..01cde35
--- /dev/null
+++ b/doc/html/search/all_72.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['r',['r',['../structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf',1,'tjtransform']]]
+];
diff --git a/doc/html/search/all_74.html b/doc/html/search/all_74.html
index fd77663..c646aef 100644
--- a/doc/html/search/all_74.html
+++ b/doc/html/search/all_74.html
@@ -1,27 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_74.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_tjregion">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html" target="_parent">tjregion</a>
- </div>
-</div>
-<div class="SRResult" id="SR_tjscalingfactor">
- <div class="SREntry">
-  <a id="Item1" onkeydown="return searchResults.Nav(event,1)" onkeypress="return searchResults.Nav(event,1)" onkeyup="return searchResults.Nav(event,1)" class="SRSymbol" href="../structtjscalingfactor.html" target="_parent">tjscalingfactor</a>
- </div>
-</div>
-<div class="SRResult" id="SR_tjtransform">
- <div class="SREntry">
-  <a id="Item2" onkeydown="return searchResults.Nav(event,2)" onkeypress="return searchResults.Nav(event,2)" onkeyup="return searchResults.Nav(event,2)" class="SRSymbol" href="../structtjtransform.html" target="_parent">tjtransform</a>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_74.js b/doc/html/search/all_74.js
new file mode 100644
index 0000000..a1927ba
--- /dev/null
+++ b/doc/html/search/all_74.js
@@ -0,0 +1,75 @@
+var searchData=
+[
+  ['tj_5fnumpf',['TJ_NUMPF',['../group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e',1,'turbojpeg.h']]],
+  ['tj_5fnumsamp',['TJ_NUMSAMP',['../group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c',1,'turbojpeg.h']]],
+  ['tj_5fnumxop',['TJ_NUMXOP',['../group___turbo_j_p_e_g.html#ga0f6dbd18adf38b7d46ac547f0f4d562c',1,'turbojpeg.h']]],
+  ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]],
+  ['tjblueoffset',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]],
+  ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]],
+  ['tjbufsizeyuv',['tjBufSizeYUV',['../group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c',1,'turbojpeg.h']]],
+  ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2',1,'turbojpeg.h']]],
+  ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9',1,'turbojpeg.h']]],
+  ['tjdecompressheader2',['tjDecompressHeader2',['../group___turbo_j_p_e_g.html#gac5675fceb7997b385516cdffdb34e6aa',1,'turbojpeg.h']]],
+  ['tjdecompresstoyuv',['tjDecompressToYUV',['../group___turbo_j_p_e_g.html#gad7810af095624a4016e72957a50f77d8',1,'turbojpeg.h']]],
+  ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]],
+  ['tjencodeyuv2',['tjEncodeYUV2',['../group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35',1,'turbojpeg.h']]],
+  ['tjflag_5faccuratedct',['TJFLAG_ACCURATEDCT',['../group___turbo_j_p_e_g.html#gacb233cfd722d66d1ccbf48a7de81f0e0',1,'turbojpeg.h']]],
+  ['tjflag_5fbottomup',['TJFLAG_BOTTOMUP',['../group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec',1,'turbojpeg.h']]],
+  ['tjflag_5ffastdct',['TJFLAG_FASTDCT',['../group___turbo_j_p_e_g.html#gaabce235db80d3f698b27f36cbd453da2',1,'turbojpeg.h']]],
+  ['tjflag_5ffastupsample',['TJFLAG_FASTUPSAMPLE',['../group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d',1,'turbojpeg.h']]],
+  ['tjflag_5fforcemmx',['TJFLAG_FORCEMMX',['../group___turbo_j_p_e_g.html#ga4e872f11c82f241736fa8297920f24e5',1,'turbojpeg.h']]],
+  ['tjflag_5fforcesse',['TJFLAG_FORCESSE',['../group___turbo_j_p_e_g.html#gae17e63189e8cd730feed3efbd2454f38',1,'turbojpeg.h']]],
+  ['tjflag_5fforcesse2',['TJFLAG_FORCESSE2',['../group___turbo_j_p_e_g.html#ga8cf0bca96ea4d472563f4b0ebf8c48e7',1,'turbojpeg.h']]],
+  ['tjflag_5fforcesse3',['TJFLAG_FORCESSE3',['../group___turbo_j_p_e_g.html#gaf9d49066633404da4386d70820295dd2',1,'turbojpeg.h']]],
+  ['tjflag_5fnorealloc',['TJFLAG_NOREALLOC',['../group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963',1,'turbojpeg.h']]],
+  ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]],
+  ['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]],
+  ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8',1,'turbojpeg.h']]],
+  ['tjgreenoffset',['tjGreenOffset',['../group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f',1,'turbojpeg.h']]],
+  ['tjhandle',['tjhandle',['../group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763',1,'turbojpeg.h']]],
+  ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a',1,'turbojpeg.h']]],
+  ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e',1,'turbojpeg.h']]],
+  ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902',1,'turbojpeg.h']]],
+  ['tjmcuheight',['tjMCUHeight',['../group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf',1,'turbojpeg.h']]],
+  ['tjmcuwidth',['tjMCUWidth',['../group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c',1,'turbojpeg.h']]],
+  ['tjpad',['TJPAD',['../group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511',1,'turbojpeg.h']]],
+  ['tjpf',['TJPF',['../group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a',1,'turbojpeg.h']]],
+  ['tjpf_5fabgr',['TJPF_ABGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081',1,'turbojpeg.h']]],
+  ['tjpf_5fargb',['TJPF_ARGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c',1,'turbojpeg.h']]],
+  ['tjpf_5fbgr',['TJPF_BGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839',1,'turbojpeg.h']]],
+  ['tjpf_5fbgra',['TJPF_BGRA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4',1,'turbojpeg.h']]],
+  ['tjpf_5fbgrx',['TJPF_BGRX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8',1,'turbojpeg.h']]],
+  ['tjpf_5fgray',['TJPF_GRAY',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a',1,'turbojpeg.h']]],
+  ['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]],
+  ['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]],
+  ['tjpf_5frgbx',['TJPF_RGBX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01',1,'turbojpeg.h']]],
+  ['tjpf_5fxbgr',['TJPF_XBGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af',1,'turbojpeg.h']]],
+  ['tjpf_5fxrgb',['TJPF_XRGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84',1,'turbojpeg.h']]],
+  ['tjpixelsize',['tjPixelSize',['../group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c',1,'turbojpeg.h']]],
+  ['tjredoffset',['tjRedOffset',['../group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8',1,'turbojpeg.h']]],
+  ['tjregion',['tjregion',['../structtjregion.html',1,'']]],
+  ['tjsamp',['TJSAMP',['../group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074',1,'turbojpeg.h']]],
+  ['tjsamp_5f420',['TJSAMP_420',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737',1,'turbojpeg.h']]],
+  ['tjsamp_5f422',['TJSAMP_422',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404',1,'turbojpeg.h']]],
+  ['tjsamp_5f440',['TJSAMP_440',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974',1,'turbojpeg.h']]],
+  ['tjsamp_5f444',['TJSAMP_444',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3',1,'turbojpeg.h']]],
+  ['tjsamp_5fgray',['TJSAMP_GRAY',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248',1,'turbojpeg.h']]],
+  ['tjscaled',['TJSCALED',['../group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df',1,'turbojpeg.h']]],
+  ['tjscalingfactor',['tjscalingfactor',['../structtjscalingfactor.html',1,'']]],
+  ['tjtransform',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'tjtransform():&#160;turbojpeg.h'],['../group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616',1,'tjTransform(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags):&#160;turbojpeg.h']]],
+  ['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]],
+  ['tjxop_5fhflip',['TJXOP_HFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce',1,'turbojpeg.h']]],
+  ['tjxop_5fnone',['TJXOP_NONE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27',1,'turbojpeg.h']]],
+  ['tjxop_5frot180',['TJXOP_ROT180',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692',1,'turbojpeg.h']]],
+  ['tjxop_5frot270',['TJXOP_ROT270',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08',1,'turbojpeg.h']]],
+  ['tjxop_5frot90',['TJXOP_ROT90',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128',1,'turbojpeg.h']]],
+  ['tjxop_5ftranspose',['TJXOP_TRANSPOSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d',1,'turbojpeg.h']]],
+  ['tjxop_5ftransverse',['TJXOP_TRANSVERSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4',1,'turbojpeg.h']]],
+  ['tjxop_5fvflip',['TJXOP_VFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d',1,'turbojpeg.h']]],
+  ['tjxopt_5fcrop',['TJXOPT_CROP',['../group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2',1,'turbojpeg.h']]],
+  ['tjxopt_5fgray',['TJXOPT_GRAY',['../group___turbo_j_p_e_g.html#ga3acee7b48ade1b99e5588736007c2589',1,'turbojpeg.h']]],
+  ['tjxopt_5fnooutput',['TJXOPT_NOOUTPUT',['../group___turbo_j_p_e_g.html#gafbf992bbf6e006705886333703ffab31',1,'turbojpeg.h']]],
+  ['tjxopt_5fperfect',['TJXOPT_PERFECT',['../group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00',1,'turbojpeg.h']]],
+  ['tjxopt_5ftrim',['TJXOPT_TRIM',['../group___turbo_j_p_e_g.html#ga319826b7eb1583c0595bbe7b95428709',1,'turbojpeg.h']]],
+  ['turbojpeg',['TurboJPEG',['../group___turbo_j_p_e_g.html',1,'']]]
+];
diff --git a/doc/html/search/all_77.html b/doc/html/search/all_77.html
index b4c8d88..55d7142 100644
--- a/doc/html/search/all_77.html
+++ b/doc/html/search/all_77.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_77.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_w">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42" target="_parent">w</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_77.js b/doc/html/search/all_77.js
new file mode 100644
index 0000000..4267002
--- /dev/null
+++ b/doc/html/search/all_77.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['w',['w',['../structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42',1,'tjregion']]]
+];
diff --git a/doc/html/search/all_78.html b/doc/html/search/all_78.html
index a357691..39075d4 100644
--- a/doc/html/search/all_78.html
+++ b/doc/html/search/all_78.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_78.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_x">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#a4b6a37a93997091b26a75831fa291ad9" target="_parent">x</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_78.js b/doc/html/search/all_78.js
new file mode 100644
index 0000000..41a27f2
--- /dev/null
+++ b/doc/html/search/all_78.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['x',['x',['../structtjregion.html#a4b6a37a93997091b26a75831fa291ad9',1,'tjregion']]]
+];
diff --git a/doc/html/search/all_79.html b/doc/html/search/all_79.html
index a883bd1..033719a 100644
--- a/doc/html/search/all_79.html
+++ b/doc/html/search/all_79.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_79.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_y">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2" target="_parent">y</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/all_79.js b/doc/html/search/all_79.js
new file mode 100644
index 0000000..86890a6
--- /dev/null
+++ b/doc/html/search/all_79.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['y',['y',['../structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2',1,'tjregion']]]
+];
diff --git a/doc/html/search/classes_74.html b/doc/html/search/classes_74.html
index fd77663..4b0fdaa 100644
--- a/doc/html/search/classes_74.html
+++ b/doc/html/search/classes_74.html
@@ -1,27 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="classes_74.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_tjregion">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html" target="_parent">tjregion</a>
- </div>
-</div>
-<div class="SRResult" id="SR_tjscalingfactor">
- <div class="SREntry">
-  <a id="Item1" onkeydown="return searchResults.Nav(event,1)" onkeypress="return searchResults.Nav(event,1)" onkeyup="return searchResults.Nav(event,1)" class="SRSymbol" href="../structtjscalingfactor.html" target="_parent">tjscalingfactor</a>
- </div>
-</div>
-<div class="SRResult" id="SR_tjtransform">
- <div class="SREntry">
-  <a id="Item2" onkeydown="return searchResults.Nav(event,2)" onkeypress="return searchResults.Nav(event,2)" onkeyup="return searchResults.Nav(event,2)" class="SRSymbol" href="../structtjtransform.html" target="_parent">tjtransform</a>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/classes_74.js b/doc/html/search/classes_74.js
new file mode 100644
index 0000000..cd623d2
--- /dev/null
+++ b/doc/html/search/classes_74.js
@@ -0,0 +1,6 @@
+var searchData=
+[
+  ['tjregion',['tjregion',['../structtjregion.html',1,'']]],
+  ['tjscalingfactor',['tjscalingfactor',['../structtjscalingfactor.html',1,'']]],
+  ['tjtransform',['tjtransform',['../structtjtransform.html',1,'']]]
+];
diff --git a/doc/html/search/enums_74.html b/doc/html/search/enums_74.html
new file mode 100644
index 0000000..9b754ee
--- /dev/null
+++ b/doc/html/search/enums_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="enums_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/enums_74.js b/doc/html/search/enums_74.js
new file mode 100644
index 0000000..20bd4db
--- /dev/null
+++ b/doc/html/search/enums_74.js
@@ -0,0 +1,6 @@
+var searchData=
+[
+  ['tjpf',['TJPF',['../group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a',1,'turbojpeg.h']]],
+  ['tjsamp',['TJSAMP',['../group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074',1,'turbojpeg.h']]],
+  ['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/enumvalues_74.html b/doc/html/search/enumvalues_74.html
new file mode 100644
index 0000000..0d69a0a
--- /dev/null
+++ b/doc/html/search/enumvalues_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="enumvalues_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/enumvalues_74.js b/doc/html/search/enumvalues_74.js
new file mode 100644
index 0000000..55664f1
--- /dev/null
+++ b/doc/html/search/enumvalues_74.js
@@ -0,0 +1,27 @@
+var searchData=
+[
+  ['tjpf_5fabgr',['TJPF_ABGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081',1,'turbojpeg.h']]],
+  ['tjpf_5fargb',['TJPF_ARGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c',1,'turbojpeg.h']]],
+  ['tjpf_5fbgr',['TJPF_BGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839',1,'turbojpeg.h']]],
+  ['tjpf_5fbgra',['TJPF_BGRA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4',1,'turbojpeg.h']]],
+  ['tjpf_5fbgrx',['TJPF_BGRX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8',1,'turbojpeg.h']]],
+  ['tjpf_5fgray',['TJPF_GRAY',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a',1,'turbojpeg.h']]],
+  ['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]],
+  ['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]],
+  ['tjpf_5frgbx',['TJPF_RGBX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01',1,'turbojpeg.h']]],
+  ['tjpf_5fxbgr',['TJPF_XBGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af',1,'turbojpeg.h']]],
+  ['tjpf_5fxrgb',['TJPF_XRGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84',1,'turbojpeg.h']]],
+  ['tjsamp_5f420',['TJSAMP_420',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737',1,'turbojpeg.h']]],
+  ['tjsamp_5f422',['TJSAMP_422',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404',1,'turbojpeg.h']]],
+  ['tjsamp_5f440',['TJSAMP_440',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974',1,'turbojpeg.h']]],
+  ['tjsamp_5f444',['TJSAMP_444',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3',1,'turbojpeg.h']]],
+  ['tjsamp_5fgray',['TJSAMP_GRAY',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248',1,'turbojpeg.h']]],
+  ['tjxop_5fhflip',['TJXOP_HFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce',1,'turbojpeg.h']]],
+  ['tjxop_5fnone',['TJXOP_NONE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27',1,'turbojpeg.h']]],
+  ['tjxop_5frot180',['TJXOP_ROT180',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692',1,'turbojpeg.h']]],
+  ['tjxop_5frot270',['TJXOP_ROT270',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08',1,'turbojpeg.h']]],
+  ['tjxop_5frot90',['TJXOP_ROT90',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128',1,'turbojpeg.h']]],
+  ['tjxop_5ftranspose',['TJXOP_TRANSPOSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d',1,'turbojpeg.h']]],
+  ['tjxop_5ftransverse',['TJXOP_TRANSVERSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4',1,'turbojpeg.h']]],
+  ['tjxop_5fvflip',['TJXOP_VFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/functions_74.html b/doc/html/search/functions_74.html
new file mode 100644
index 0000000..1605901
--- /dev/null
+++ b/doc/html/search/functions_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="functions_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/functions_74.js b/doc/html/search/functions_74.js
new file mode 100644
index 0000000..c746a91
--- /dev/null
+++ b/doc/html/search/functions_74.js
@@ -0,0 +1,19 @@
+var searchData=
+[
+  ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]],
+  ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]],
+  ['tjbufsizeyuv',['tjBufSizeYUV',['../group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c',1,'turbojpeg.h']]],
+  ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2',1,'turbojpeg.h']]],
+  ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9',1,'turbojpeg.h']]],
+  ['tjdecompressheader2',['tjDecompressHeader2',['../group___turbo_j_p_e_g.html#gac5675fceb7997b385516cdffdb34e6aa',1,'turbojpeg.h']]],
+  ['tjdecompresstoyuv',['tjDecompressToYUV',['../group___turbo_j_p_e_g.html#gad7810af095624a4016e72957a50f77d8',1,'turbojpeg.h']]],
+  ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]],
+  ['tjencodeyuv2',['tjEncodeYUV2',['../group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35',1,'turbojpeg.h']]],
+  ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]],
+  ['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]],
+  ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8',1,'turbojpeg.h']]],
+  ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a',1,'turbojpeg.h']]],
+  ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e',1,'turbojpeg.h']]],
+  ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902',1,'turbojpeg.h']]],
+  ['tjtransform',['tjTransform',['../group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/groups_74.html b/doc/html/search/groups_74.html
new file mode 100644
index 0000000..a169560
--- /dev/null
+++ b/doc/html/search/groups_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="groups_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/groups_74.js b/doc/html/search/groups_74.js
new file mode 100644
index 0000000..27d4ffb
--- /dev/null
+++ b/doc/html/search/groups_74.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['turbojpeg',['TurboJPEG',['../group___turbo_j_p_e_g.html',1,'']]]
+];
diff --git a/doc/html/search/search.css b/doc/html/search/search.css
index 50249e5..5b208ed 100644
--- a/doc/html/search/search.css
+++ b/doc/html/search/search.css
@@ -4,13 +4,6 @@
     float: left;
 }
 
-#searchli {
-    float: right;
-    display: block;
-    width: 170px;
-    height: 36px;
-}
-
 #MSearchBox {
     white-space : nowrap;
     position: absolute;
@@ -20,6 +13,7 @@
     right: 0px;
     width: 170px;
     z-index: 102;
+    background-color: white;
 }
 
 #MSearchBox .left
@@ -224,6 +218,10 @@
     text-decoration: underline;
 }
 
+span.SRScope {
+    padding-left: 4px;
+}
+
 .SRPage .SRStatus {
     padding: 2px 5px;
     font-size: 8pt;
@@ -238,3 +236,36 @@
     margin-left: 10px;
     margin-right: 10px;
 }
+
+/*---------------- External search page results */
+
+.searchresult {
+    background-color: #F0F3F8;
+}
+
+.pages b {
+   color: white;
+   padding: 5px 5px 3px 5px;
+   background-image: url("../tab_a.png");
+   background-repeat: repeat-x;
+   text-shadow: 0 1px 1px #000000;
+}
+
+.pages {
+    line-height: 17px;
+    margin-left: 4px;
+    text-decoration: none;
+}
+
+.hl {
+    font-weight: bold;
+}
+
+#searchresults {
+    margin-bottom: 20px;
+}
+
+.searchpages {
+    margin-top: 10px;
+}
+
diff --git a/doc/html/search/search.js b/doc/html/search/search.js
index 0a9c356..409672c 100644
--- a/doc/html/search/search.js
+++ b/doc/html/search/search.js
@@ -9,14 +9,24 @@
 {
   0: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100010000011001010011100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
   1: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
-  2: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100010000011001000011100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
+  2: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  3: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100010000011001010011100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  4: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  5: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  6: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  7: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
 };
 
 var indexSectionNames =
 {
   0: "all",
   1: "classes",
-  2: "variables"
+  2: "functions",
+  3: "variables",
+  4: "typedefs",
+  5: "enums",
+  6: "enumvalues",
+  7: "groups"
 };
 
 function convertToId(search)
@@ -261,7 +271,7 @@
         var node = child.firstChild;
         if (j==id)
         {
-          node.innerHTML='&bull;';
+          node.innerHTML='&#8226;';
         }
         else
         {
@@ -358,7 +368,7 @@
        hasResultsPage = false;
     }
 
-    window.frames.MSearchResults.location.href = resultsPageWithSearch;  
+    window.frames.MSearchResults.location = resultsPageWithSearch;  
     var domPopupSearchResultsWindow = this.DOMPopupSearchResultsWindow();
 
     if (domPopupSearchResultsWindow.style.display!='block')
@@ -728,3 +738,72 @@
       return false;
     }
 }
+
+function setKeyActions(elem,action)
+{
+  elem.setAttribute('onkeydown',action);
+  elem.setAttribute('onkeypress',action);
+  elem.setAttribute('onkeyup',action);
+}
+
+function setClassAttr(elem,attr)
+{
+  elem.setAttribute('class',attr);
+  elem.setAttribute('className',attr);
+}
+
+function createResults()
+{
+  var results = document.getElementById("SRResults");
+  for (var e=0; e<searchData.length; e++)
+  {
+    var id = searchData[e][0];
+    var srResult = document.createElement('div');
+    srResult.setAttribute('id','SR_'+id);
+    setClassAttr(srResult,'SRResult');
+    var srEntry = document.createElement('div');
+    setClassAttr(srEntry,'SREntry');
+    var srLink = document.createElement('a');
+    srLink.setAttribute('id','Item'+e);
+    setKeyActions(srLink,'return searchResults.Nav(event,'+e+')');
+    setClassAttr(srLink,'SRSymbol');
+    srLink.innerHTML = searchData[e][1][0];
+    srEntry.appendChild(srLink);
+    if (searchData[e][1].length==2) // single result
+    {
+      srLink.setAttribute('href',searchData[e][1][1][0]);
+      if (searchData[e][1][1][1])
+      {
+       srLink.setAttribute('target','_parent');
+      }
+      var srScope = document.createElement('span');
+      setClassAttr(srScope,'SRScope');
+      srScope.innerHTML = searchData[e][1][1][2];
+      srEntry.appendChild(srScope);
+    }
+    else // multiple results
+    {
+      srLink.setAttribute('href','javascript:searchResults.Toggle("SR_'+id+'")');
+      var srChildren = document.createElement('div');
+      setClassAttr(srChildren,'SRChildren');
+      for (var c=0; c<searchData[e][1].length-1; c++)
+      {
+        var srChild = document.createElement('a');
+        srChild.setAttribute('id','Item'+e+'_c'+c);
+        setKeyActions(srChild,'return searchResults.NavChild(event,'+e+','+c+')');
+        setClassAttr(srChild,'SRScope');
+        srChild.setAttribute('href',searchData[e][1][c+1][0]);
+        if (searchData[e][1][c+1][1])
+        {
+         srChild.setAttribute('target','_parent');
+        }
+        srChild.innerHTML = searchData[e][1][c+1][2];
+        srChildren.appendChild(srChild);
+      }
+      srEntry.appendChild(srChildren);
+    }
+    srResult.appendChild(srEntry);
+    results.appendChild(srResult);
+  }
+}
+
diff --git a/doc/html/search/typedefs_74.html b/doc/html/search/typedefs_74.html
new file mode 100644
index 0000000..b2f6d2a
--- /dev/null
+++ b/doc/html/search/typedefs_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="typedefs_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/typedefs_74.js b/doc/html/search/typedefs_74.js
new file mode 100644
index 0000000..85b00f5
--- /dev/null
+++ b/doc/html/search/typedefs_74.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['tjhandle',['tjhandle',['../group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763',1,'turbojpeg.h']]],
+  ['tjtransform',['tjtransform',['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/variables_63.html b/doc/html/search/variables_63.html
index 8cfc38f..422085c 100644
--- a/doc/html/search/variables_63.html
+++ b/doc/html/search/variables_63.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_63.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_customfilter">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1" target="_parent">customFilter</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_63.js b/doc/html/search/variables_63.js
new file mode 100644
index 0000000..7b058da
--- /dev/null
+++ b/doc/html/search/variables_63.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['customfilter',['customFilter',['../structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1',1,'tjtransform']]]
+];
diff --git a/doc/html/search/variables_64.html b/doc/html/search/variables_64.html
index 2e53b02..df4414b 100644
--- a/doc/html/search/variables_64.html
+++ b/doc/html/search/variables_64.html
@@ -1,24 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_64.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_data">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3" target="_parent">data</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
-<div class="SRResult" id="SR_denom">
- <div class="SREntry">
-  <a id="Item1" onkeydown="return searchResults.Nav(event,1)" onkeypress="return searchResults.Nav(event,1)" onkeyup="return searchResults.Nav(event,1)" class="SRSymbol" href="../structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3" target="_parent">denom</a>
-  <span class="SRScope">tjscalingfactor</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_64.js b/doc/html/search/variables_64.js
new file mode 100644
index 0000000..e19a050
--- /dev/null
+++ b/doc/html/search/variables_64.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['data',['data',['../structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3',1,'tjtransform']]],
+  ['denom',['denom',['../structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/variables_68.html b/doc/html/search/variables_68.html
index ccb671d..2f0a862 100644
--- a/doc/html/search/variables_68.html
+++ b/doc/html/search/variables_68.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_68.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_h">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115" target="_parent">h</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_68.js b/doc/html/search/variables_68.js
new file mode 100644
index 0000000..7b17e97
--- /dev/null
+++ b/doc/html/search/variables_68.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['h',['h',['../structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115',1,'tjregion']]]
+];
diff --git a/doc/html/search/variables_6e.html b/doc/html/search/variables_6e.html
index b9f5b05..2eb4def 100644
--- a/doc/html/search/variables_6e.html
+++ b/doc/html/search/variables_6e.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_6e.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_num">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec" target="_parent">num</a>
-  <span class="SRScope">tjscalingfactor</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_6e.js b/doc/html/search/variables_6e.js
new file mode 100644
index 0000000..83faa13
--- /dev/null
+++ b/doc/html/search/variables_6e.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['num',['num',['../structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/variables_6f.html b/doc/html/search/variables_6f.html
index d95bbef..f06e2e0 100644
--- a/doc/html/search/variables_6f.html
+++ b/doc/html/search/variables_6f.html
@@ -1,24 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_6f.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_op">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498" target="_parent">op</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
-<div class="SRResult" id="SR_options">
- <div class="SREntry">
-  <a id="Item1" onkeydown="return searchResults.Nav(event,1)" onkeypress="return searchResults.Nav(event,1)" onkeyup="return searchResults.Nav(event,1)" class="SRSymbol" href="../structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6" target="_parent">options</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_6f.js b/doc/html/search/variables_6f.js
new file mode 100644
index 0000000..1cca832
--- /dev/null
+++ b/doc/html/search/variables_6f.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['op',['op',['../structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498',1,'tjtransform']]],
+  ['options',['options',['../structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6',1,'tjtransform']]]
+];
diff --git a/doc/html/search/variables_72.html b/doc/html/search/variables_72.html
index 465fe88..8a4ee7b 100644
--- a/doc/html/search/variables_72.html
+++ b/doc/html/search/variables_72.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_72.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_r">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf" target="_parent">r</a>
-  <span class="SRScope">tjtransform</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_72.js b/doc/html/search/variables_72.js
new file mode 100644
index 0000000..01cde35
--- /dev/null
+++ b/doc/html/search/variables_72.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['r',['r',['../structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf',1,'tjtransform']]]
+];
diff --git a/doc/html/search/variables_74.html b/doc/html/search/variables_74.html
new file mode 100644
index 0000000..1665fb8
--- /dev/null
+++ b/doc/html/search/variables_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_74.js b/doc/html/search/variables_74.js
new file mode 100644
index 0000000..13a056e
--- /dev/null
+++ b/doc/html/search/variables_74.js
@@ -0,0 +1,9 @@
+var searchData=
+[
+  ['tjblueoffset',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]],
+  ['tjgreenoffset',['tjGreenOffset',['../group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f',1,'turbojpeg.h']]],
+  ['tjmcuheight',['tjMCUHeight',['../group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf',1,'turbojpeg.h']]],
+  ['tjmcuwidth',['tjMCUWidth',['../group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c',1,'turbojpeg.h']]],
+  ['tjpixelsize',['tjPixelSize',['../group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c',1,'turbojpeg.h']]],
+  ['tjredoffset',['tjRedOffset',['../group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/variables_77.html b/doc/html/search/variables_77.html
index b4c8d88..434c6df 100644
--- a/doc/html/search/variables_77.html
+++ b/doc/html/search/variables_77.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_77.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_w">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42" target="_parent">w</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_77.js b/doc/html/search/variables_77.js
new file mode 100644
index 0000000..4267002
--- /dev/null
+++ b/doc/html/search/variables_77.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['w',['w',['../structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42',1,'tjregion']]]
+];
diff --git a/doc/html/search/variables_78.html b/doc/html/search/variables_78.html
index a357691..602e879 100644
--- a/doc/html/search/variables_78.html
+++ b/doc/html/search/variables_78.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_78.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_x">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#a4b6a37a93997091b26a75831fa291ad9" target="_parent">x</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_78.js b/doc/html/search/variables_78.js
new file mode 100644
index 0000000..41a27f2
--- /dev/null
+++ b/doc/html/search/variables_78.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['x',['x',['../structtjregion.html#a4b6a37a93997091b26a75831fa291ad9',1,'tjregion']]]
+];
diff --git a/doc/html/search/variables_79.html b/doc/html/search/variables_79.html
index a883bd1..17faef9 100644
--- a/doc/html/search/variables_79.html
+++ b/doc/html/search/variables_79.html
@@ -1,18 +1,18 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html><head><title></title>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
 <link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_79.js"></script>
 <script type="text/javascript" src="search.js"></script>
 </head>
 <body class="SRPage">
 <div id="SRIndex">
 <div class="SRStatus" id="Loading">Loading...</div>
-<div class="SRResult" id="SR_y">
- <div class="SREntry">
-  <a id="Item0" onkeydown="return searchResults.Nav(event,0)" onkeypress="return searchResults.Nav(event,0)" onkeyup="return searchResults.Nav(event,0)" class="SRSymbol" href="../structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2" target="_parent">y</a>
-  <span class="SRScope">tjregion</span>
- </div>
-</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
 <div class="SRStatus" id="Searching">Searching...</div>
 <div class="SRStatus" id="NoMatches">No Matches</div>
 <script type="text/javascript"><!--
diff --git a/doc/html/search/variables_79.js b/doc/html/search/variables_79.js
new file mode 100644
index 0000000..86890a6
--- /dev/null
+++ b/doc/html/search/variables_79.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['y',['y',['../structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2',1,'tjregion']]]
+];
diff --git a/doc/html/structtjregion.html b/doc/html/structtjregion.html
index 78e9311..33ee944 100644
--- a/doc/html/structtjregion.html
+++ b/doc/html/structtjregion.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: tjregion Struct Reference</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -55,92 +66,106 @@
       <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
     </ul>
   </div>
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
 </div>
+
+</div><!-- top -->
 <div class="header">
   <div class="summary">
 <a href="#pub-attribs">Data Fields</a>  </div>
   <div class="headertitle">
 <div class="title">tjregion Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
-</div>
+</div><!--header-->
 <div class="contents">
-<!-- doxytag: class="tjregion" -->
+
 <p>Cropping region.  
  <a href="structtjregion.html#details">More...</a></p>
 
 <p><code>#include &lt;turbojpeg.h&gt;</code></p>
 <table class="memberdecls">
-<tr><td colspan="2"><h2><a name="pub-attribs"></a>
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
 Data Fields</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#a4b6a37a93997091b26a75831fa291ad9">x</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The left boundary of the cropping region.  <a href="#a4b6a37a93997091b26a75831fa291ad9"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">y</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The upper boundary of the cropping region.  <a href="#a7b3e0c24cfe87acc80e334cafdcf22c2"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42">w</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The width of the cropping region.  <a href="#ab6eb73ceef584fc23c8c8097926dce42"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115">h</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The height of the cropping region.  <a href="#aecefc45a26f4d8b60dd4d825c1710115"></a><br/></td></tr>
+<tr class="memitem:a4b6a37a93997091b26a75831fa291ad9"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#a4b6a37a93997091b26a75831fa291ad9">x</a></td></tr>
+<tr class="memdesc:a4b6a37a93997091b26a75831fa291ad9"><td class="mdescLeft">&#160;</td><td class="mdescRight">The left boundary of the cropping region.  <a href="#a4b6a37a93997091b26a75831fa291ad9">More...</a><br/></td></tr>
+<tr class="separator:a4b6a37a93997091b26a75831fa291ad9"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a7b3e0c24cfe87acc80e334cafdcf22c2"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">y</a></td></tr>
+<tr class="memdesc:a7b3e0c24cfe87acc80e334cafdcf22c2"><td class="mdescLeft">&#160;</td><td class="mdescRight">The upper boundary of the cropping region.  <a href="#a7b3e0c24cfe87acc80e334cafdcf22c2">More...</a><br/></td></tr>
+<tr class="separator:a7b3e0c24cfe87acc80e334cafdcf22c2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ab6eb73ceef584fc23c8c8097926dce42"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42">w</a></td></tr>
+<tr class="memdesc:ab6eb73ceef584fc23c8c8097926dce42"><td class="mdescLeft">&#160;</td><td class="mdescRight">The width of the cropping region.  <a href="#ab6eb73ceef584fc23c8c8097926dce42">More...</a><br/></td></tr>
+<tr class="separator:ab6eb73ceef584fc23c8c8097926dce42"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aecefc45a26f4d8b60dd4d825c1710115"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115">h</a></td></tr>
+<tr class="memdesc:aecefc45a26f4d8b60dd4d825c1710115"><td class="mdescLeft">&#160;</td><td class="mdescRight">The height of the cropping region.  <a href="#aecefc45a26f4d8b60dd4d825c1710115">More...</a><br/></td></tr>
+<tr class="separator:aecefc45a26f4d8b60dd4d825c1710115"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
-<hr/><a name="details" id="details"></a><h2>Detailed Description</h2>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
 <div class="textblock"><p>Cropping region. </p>
-</div><hr/><h2>Field Documentation</h2>
-<a class="anchor" id="aecefc45a26f4d8b60dd4d825c1710115"></a><!-- doxytag: member="tjregion::h" ref="aecefc45a26f4d8b60dd4d825c1710115" args="" -->
+</div><h2 class="groupheader">Field Documentation</h2>
+<a class="anchor" id="aecefc45a26f4d8b60dd4d825c1710115"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115">tjregion::h</a></td>
+          <td class="memname">int tjregion::h</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The height of the cropping region. </p>
 <p>Setting this to 0 is the equivalent of setting it to the height of the source JPEG image - y. </p>
 
 </div>
 </div>
-<a class="anchor" id="ab6eb73ceef584fc23c8c8097926dce42"></a><!-- doxytag: member="tjregion::w" ref="ab6eb73ceef584fc23c8c8097926dce42" args="" -->
+<a class="anchor" id="ab6eb73ceef584fc23c8c8097926dce42"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42">tjregion::w</a></td>
+          <td class="memname">int tjregion::w</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The width of the cropping region. </p>
 <p>Setting this to 0 is the equivalent of setting it to the width of the source JPEG image - x. </p>
 
 </div>
 </div>
-<a class="anchor" id="a4b6a37a93997091b26a75831fa291ad9"></a><!-- doxytag: member="tjregion::x" ref="a4b6a37a93997091b26a75831fa291ad9" args="" -->
+<a class="anchor" id="a4b6a37a93997091b26a75831fa291ad9"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="structtjregion.html#a4b6a37a93997091b26a75831fa291ad9">tjregion::x</a></td>
+          <td class="memname">int tjregion::x</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The left boundary of the cropping region. </p>
 <p>This must be evenly divisible by the MCU block width (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a>.) </p>
 
 </div>
 </div>
-<a class="anchor" id="a7b3e0c24cfe87acc80e334cafdcf22c2"></a><!-- doxytag: member="tjregion::y" ref="a7b3e0c24cfe87acc80e334cafdcf22c2" args="" -->
+<a class="anchor" id="a7b3e0c24cfe87acc80e334cafdcf22c2"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">int <a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">tjregion::y</a></td>
+          <td class="memname">int tjregion::y</td>
         </tr>
       </table>
-</div>
-<div class="memdoc">
+</div><div class="memdoc">
 
 <p>The upper boundary of the cropping region. </p>
 <p>This must be evenly divisible by the MCU block height (see <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>.) </p>
@@ -150,23 +175,12 @@
 <hr/>The documentation for this struct was generated from the following file:<ul>
 <li>turbojpeg.h</li>
 </ul>
-</div>
-<!-- window showing the filter options -->
-<div id="MSearchSelectWindow"
-     onmouseover="return searchBox.OnSearchSelectShow()"
-     onmouseout="return searchBox.OnSearchSelectHide()"
-     onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
-
-<!-- iframe showing the search results (closed by default) -->
-<div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
-        name="MSearchResults" id="MSearchResults">
-</iframe>
-</div>
-
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/structtjscalingfactor.html b/doc/html/structtjscalingfactor.html
index 153522a..749b88e 100644
--- a/doc/html/structtjscalingfactor.html
+++ b/doc/html/structtjscalingfactor.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: tjscalingfactor Struct Reference</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -55,70 +66,12 @@
       <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
     </ul>
   </div>
-</div>
-<div class="header">
-  <div class="summary">
-<a href="#pub-attribs">Data Fields</a>  </div>
-  <div class="headertitle">
-<div class="title">tjscalingfactor Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
-</div>
-<div class="contents">
-<!-- doxytag: class="tjscalingfactor" -->
-<p>Scaling factor.  
- <a href="structtjscalingfactor.html#details">More...</a></p>
-
-<p><code>#include &lt;turbojpeg.h&gt;</code></p>
-<table class="memberdecls">
-<tr><td colspan="2"><h2><a name="pub-attribs"></a>
-Data Fields</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec">num</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Numerator.  <a href="#a9b011e57f981ee23083e2c1aa5e640ec"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3">denom</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Denominator.  <a href="#aefbcdf3e9e62274b2d312c695f133ce3"></a><br/></td></tr>
-</table>
-<hr/><a name="details" id="details"></a><h2>Detailed Description</h2>
-<div class="textblock"><p>Scaling factor. </p>
-</div><hr/><h2>Field Documentation</h2>
-<a class="anchor" id="aefbcdf3e9e62274b2d312c695f133ce3"></a><!-- doxytag: member="tjscalingfactor::denom" ref="aefbcdf3e9e62274b2d312c695f133ce3" args="" -->
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3">tjscalingfactor::denom</a></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Denominator. </p>
-
-</div>
-</div>
-<a class="anchor" id="a9b011e57f981ee23083e2c1aa5e640ec"></a><!-- doxytag: member="tjscalingfactor::num" ref="a9b011e57f981ee23083e2c1aa5e640ec" args="" -->
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec">tjscalingfactor::num</a></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Numerator. </p>
-
-</div>
-</div>
-<hr/>The documentation for this struct was generated from the following file:<ul>
-<li>turbojpeg.h</li>
-</ul>
-</div>
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
      onmouseout="return searchBox.OnSearchSelectHide()"
      onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
@@ -127,8 +80,69 @@
 </iframe>
 </div>
 
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+</div><!-- top -->
+<div class="header">
+  <div class="summary">
+<a href="#pub-attribs">Data Fields</a>  </div>
+  <div class="headertitle">
+<div class="title">tjscalingfactor Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+
+<p>Scaling factor.  
+ <a href="structtjscalingfactor.html#details">More...</a></p>
+
+<p><code>#include &lt;turbojpeg.h&gt;</code></p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
+Data Fields</h2></td></tr>
+<tr class="memitem:a9b011e57f981ee23083e2c1aa5e640ec"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec">num</a></td></tr>
+<tr class="memdesc:a9b011e57f981ee23083e2c1aa5e640ec"><td class="mdescLeft">&#160;</td><td class="mdescRight">Numerator.  <a href="#a9b011e57f981ee23083e2c1aa5e640ec">More...</a><br/></td></tr>
+<tr class="separator:a9b011e57f981ee23083e2c1aa5e640ec"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aefbcdf3e9e62274b2d312c695f133ce3"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3">denom</a></td></tr>
+<tr class="memdesc:aefbcdf3e9e62274b2d312c695f133ce3"><td class="mdescLeft">&#160;</td><td class="mdescRight">Denominator.  <a href="#aefbcdf3e9e62274b2d312c695f133ce3">More...</a><br/></td></tr>
+<tr class="separator:aefbcdf3e9e62274b2d312c695f133ce3"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Scaling factor. </p>
+</div><h2 class="groupheader">Field Documentation</h2>
+<a class="anchor" id="aefbcdf3e9e62274b2d312c695f133ce3"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjscalingfactor::denom</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Denominator. </p>
+
+</div>
+</div>
+<a class="anchor" id="a9b011e57f981ee23083e2c1aa5e640ec"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjscalingfactor::num</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Numerator. </p>
+
+</div>
+</div>
+<hr/>The documentation for this struct was generated from the following file:<ul>
+<li>turbojpeg.h</li>
+</ul>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/structtjtransform.html b/doc/html/structtjtransform.html
index 0a44092..87a5409 100644
--- a/doc/html/structtjtransform.html
+++ b/doc/html/structtjtransform.html
@@ -2,35 +2,46 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
 <title>TurboJPEG: tjtransform Struct Reference</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
 <link href="search/search.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="search/search.js"></script>
-<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="doxygen-extra.css" rel="stylesheet" type="text/css"/>
 </head>
-<body onload='searchBox.OnSelectItem(0);'>
-<!-- Generated by Doxygen 1.7.4 -->
-<script type="text/javascript"><!--
-var searchBox = new SearchBox("searchBox", "search",false,'Search');
---></script>
-<div id="top">
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 <div id="titlearea">
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
-   <div id="projectname">TurboJPEG&#160;<span id="projectnumber">1.2.1</span></div>
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.2.1</span>
+   </div>
   </td>
  </tr>
  </tbody>
 </table>
 </div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
   <div id="navrow1" class="tabs">
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="modules.html"><span>Modules</span></a></li>
       <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
-      <li id="searchli">
+      <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
         <span class="left">
           <img id="MSearchSelect" src="search/mag_sel.png"
@@ -55,134 +66,12 @@
       <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
     </ul>
   </div>
-</div>
-<div class="header">
-  <div class="summary">
-<a href="#pub-attribs">Data Fields</a>  </div>
-  <div class="headertitle">
-<div class="title">tjtransform Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
-</div>
-<div class="contents">
-<!-- doxytag: class="tjtransform" -->
-<p>Lossless transform.  
- <a href="structtjtransform.html#details">More...</a></p>
-
-<p><code>#include &lt;turbojpeg.h&gt;</code></p>
-<table class="memberdecls">
-<tr><td colspan="2"><h2><a name="pub-attribs"></a>
-Data Fields</h2></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top"><a class="el" href="structtjregion.html">tjregion</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf">r</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Cropping region.  <a href="#ac324e5e442abec8a961e5bf219db12cf"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498">op</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">One of the <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">transform operations</a>.  <a href="#a2525aab4ba6978a1c273f74fef50e498"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6">options</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The bitwise OR of one of more of the <a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">transform options</a>.  <a href="#ac0e74655baa4402209a21e1ae481c8f6"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">void *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3">data</a></td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Arbitrary data that can be accessed within the body of the callback function.  <a href="#a688fe8f1a8ecc12a538d9e561cf338e3"></a><br/></td></tr>
-<tr><td class="memItemLeft" align="right" valign="top">int(*&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1">customFilter</a> )(short *coeffs, <a class="el" href="structtjregion.html">tjregion</a> arrayRegion, <a class="el" href="structtjregion.html">tjregion</a> planeRegion, int componentIndex, int transformIndex, struct <a class="el" href="structtjtransform.html">tjtransform</a> *transform)</td></tr>
-<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG file.  <a href="#a43ee1bcdd2a8d7249a756774f78793c1"></a><br/></td></tr>
-</table>
-<hr/><a name="details" id="details"></a><h2>Detailed Description</h2>
-<div class="textblock"><p>Lossless transform. </p>
-</div><hr/><h2>Field Documentation</h2>
-<a class="anchor" id="a43ee1bcdd2a8d7249a756774f78793c1"></a><!-- doxytag: member="tjtransform::customFilter" ref="a43ee1bcdd2a8d7249a756774f78793c1" args=")(short *coeffs, tjregion arrayRegion, tjregion planeRegion, int componentIndex, int transformIndex, struct tjtransform *transform)" -->
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int(* <a class="el" href="structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1">tjtransform::customFilter</a>)(short *coeffs, <a class="el" href="structtjregion.html">tjregion</a> arrayRegion, <a class="el" href="structtjregion.html">tjregion</a> planeRegion, int componentIndex, int transformIndex, struct <a class="el" href="structtjtransform.html">tjtransform</a> *transform)</td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG file. </p>
-<p>This allows for custom filters or other transformations to be applied in the frequency domain.</p>
-<dl><dt><b>Parameters:</b></dt><dd>
-  <table class="params">
-    <tr><td class="paramname">coeffs</td><td>pointer to an array of transformed DCT coefficients. (NOTE: this pointer is not guaranteed to be valid once the callback returns, so applications wishing to hand off the DCT coefficients to another function or library should make a copy of them within the body of the callback.) </td></tr>
-    <tr><td class="paramname">arrayRegion</td><td><a class="el" href="structtjregion.html" title="Cropping region.">tjregion</a> structure containing the width and height of the array pointed to by <code>coeffs</code> as well as its offset relative to the component plane. TurboJPEG implementations may choose to split each component plane into multiple DCT coefficient arrays and call the callback function once for each array. </td></tr>
-    <tr><td class="paramname">planeRegion</td><td><a class="el" href="structtjregion.html" title="Cropping region.">tjregion</a> structure containing the width and height of the component plane to which <code>coeffs</code> belongs </td></tr>
-    <tr><td class="paramname">componentID</td><td>ID number of the component plane to which <code>coeffs</code> belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in typical JPEG images.) </td></tr>
-    <tr><td class="paramname">transformID</td><td>ID number of the transformed image to which <code>coeffs</code> belongs. This is the same as the index of the transform in the transforms array that was passed to <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>. </td></tr>
-    <tr><td class="paramname">transform</td><td>a pointer to a <a class="el" href="structtjtransform.html" title="Lossless transform.">tjtransform</a> structure that specifies the parameters and/or cropping region for this transform</td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="return"><dt><b>Returns:</b></dt><dd>0 if the callback was successful, or -1 if an error occurred. </dd></dl>
-
-</div>
-</div>
-<a class="anchor" id="a688fe8f1a8ecc12a538d9e561cf338e3"></a><!-- doxytag: member="tjtransform::data" ref="a688fe8f1a8ecc12a538d9e561cf338e3" args="" -->
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">void* <a class="el" href="structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3">tjtransform::data</a></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Arbitrary data that can be accessed within the body of the callback function. </p>
-
-</div>
-</div>
-<a class="anchor" id="a2525aab4ba6978a1c273f74fef50e498"></a><!-- doxytag: member="tjtransform::op" ref="a2525aab4ba6978a1c273f74fef50e498" args="" -->
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498">tjtransform::op</a></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>One of the <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">transform operations</a>. </p>
-
-</div>
-</div>
-<a class="anchor" id="ac0e74655baa4402209a21e1ae481c8f6"></a><!-- doxytag: member="tjtransform::options" ref="ac0e74655baa4402209a21e1ae481c8f6" args="" -->
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int <a class="el" href="structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6">tjtransform::options</a></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>The bitwise OR of one of more of the <a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">transform options</a>. </p>
-
-</div>
-</div>
-<a class="anchor" id="ac324e5e442abec8a961e5bf219db12cf"></a><!-- doxytag: member="tjtransform::r" ref="ac324e5e442abec8a961e5bf219db12cf" args="" -->
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname"><a class="el" href="structtjregion.html">tjregion</a> <a class="el" href="structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf">tjtransform::r</a></td>
-        </tr>
-      </table>
-</div>
-<div class="memdoc">
-
-<p>Cropping region. </p>
-
-</div>
-</div>
-<hr/>The documentation for this struct was generated from the following file:<ul>
-<li>turbojpeg.h</li>
-</ul>
-</div>
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
      onmouseout="return searchBox.OnSearchSelectHide()"
      onkeydown="return searchBox.OnSearchSelectKey(event)">
-<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Variables</a></div>
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
@@ -191,8 +80,133 @@
 </iframe>
 </div>
 
-<hr class="footer"/><address class="footer"><small>Generated on Fri Jun 29 2012 18:14:55 for TurboJPEG by&#160;
-<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </small></address>
+</div><!-- top -->
+<div class="header">
+  <div class="summary">
+<a href="#pub-attribs">Data Fields</a>  </div>
+  <div class="headertitle">
+<div class="title">tjtransform Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+
+<p>Lossless transform.  
+ <a href="structtjtransform.html#details">More...</a></p>
+
+<p><code>#include &lt;turbojpeg.h&gt;</code></p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
+Data Fields</h2></td></tr>
+<tr class="memitem:ac324e5e442abec8a961e5bf219db12cf"><td class="memItemLeft" align="right" valign="top"><a class="el" href="structtjregion.html">tjregion</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf">r</a></td></tr>
+<tr class="memdesc:ac324e5e442abec8a961e5bf219db12cf"><td class="mdescLeft">&#160;</td><td class="mdescRight">Cropping region.  <a href="#ac324e5e442abec8a961e5bf219db12cf">More...</a><br/></td></tr>
+<tr class="separator:ac324e5e442abec8a961e5bf219db12cf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a2525aab4ba6978a1c273f74fef50e498"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498">op</a></td></tr>
+<tr class="memdesc:a2525aab4ba6978a1c273f74fef50e498"><td class="mdescLeft">&#160;</td><td class="mdescRight">One of the <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">transform operations</a>.  <a href="#a2525aab4ba6978a1c273f74fef50e498">More...</a><br/></td></tr>
+<tr class="separator:a2525aab4ba6978a1c273f74fef50e498"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ac0e74655baa4402209a21e1ae481c8f6"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6">options</a></td></tr>
+<tr class="memdesc:ac0e74655baa4402209a21e1ae481c8f6"><td class="mdescLeft">&#160;</td><td class="mdescRight">The bitwise OR of one of more of the <a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">transform options</a>.  <a href="#ac0e74655baa4402209a21e1ae481c8f6">More...</a><br/></td></tr>
+<tr class="separator:ac0e74655baa4402209a21e1ae481c8f6"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="memItemLeft" align="right" valign="top">void *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3">data</a></td></tr>
+<tr class="memdesc:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="mdescLeft">&#160;</td><td class="mdescRight">Arbitrary data that can be accessed within the body of the callback function.  <a href="#a688fe8f1a8ecc12a538d9e561cf338e3">More...</a><br/></td></tr>
+<tr class="separator:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a43ee1bcdd2a8d7249a756774f78793c1"><td class="memItemLeft" align="right" valign="top">int(*&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1">customFilter</a> )(short *coeffs, <a class="el" href="structtjregion.html">tjregion</a> arrayRegion, <a class="el" href="structtjregion.html">tjregion</a> planeRegion, int componentIndex, int transformIndex, struct <a class="el" href="structtjtransform.html">tjtransform</a> *transform)</td></tr>
+<tr class="memdesc:a43ee1bcdd2a8d7249a756774f78793c1"><td class="mdescLeft">&#160;</td><td class="mdescRight">A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG image.  <a href="#a43ee1bcdd2a8d7249a756774f78793c1">More...</a><br/></td></tr>
+<tr class="separator:a43ee1bcdd2a8d7249a756774f78793c1"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Lossless transform. </p>
+</div><h2 class="groupheader">Field Documentation</h2>
+<a class="anchor" id="a43ee1bcdd2a8d7249a756774f78793c1"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int(* tjtransform::customFilter)(short *coeffs, <a class="el" href="structtjregion.html">tjregion</a> arrayRegion, <a class="el" href="structtjregion.html">tjregion</a> planeRegion, int componentIndex, int transformIndex, struct <a class="el" href="structtjtransform.html">tjtransform</a> *transform)</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG image. </p>
+<p>This allows for custom filters or other transformations to be applied in the frequency domain.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">coeffs</td><td>pointer to an array of transformed DCT coefficients. (NOTE: this pointer is not guaranteed to be valid once the callback returns, so applications wishing to hand off the DCT coefficients to another function or library should make a copy of them within the body of the callback.) </td></tr>
+    <tr><td class="paramname">arrayRegion</td><td><a class="el" href="structtjregion.html" title="Cropping region.">tjregion</a> structure containing the width and height of the array pointed to by <code>coeffs</code> as well as its offset relative to the component plane. TurboJPEG implementations may choose to split each component plane into multiple DCT coefficient arrays and call the callback function once for each array. </td></tr>
+    <tr><td class="paramname">planeRegion</td><td><a class="el" href="structtjregion.html" title="Cropping region.">tjregion</a> structure containing the width and height of the component plane to which <code>coeffs</code> belongs </td></tr>
+    <tr><td class="paramname">componentID</td><td>ID number of the component plane to which <code>coeffs</code> belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in typical JPEG images.) </td></tr>
+    <tr><td class="paramname">transformID</td><td>ID number of the transformed image to which <code>coeffs</code> belongs. This is the same as the index of the transform in the <code>transforms</code> array that was passed to <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>. </td></tr>
+    <tr><td class="paramname">transform</td><td>a pointer to a <a class="el" href="structtjtransform.html" title="Lossless transform.">tjtransform</a> structure that specifies the parameters and/or cropping region for this transform</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if the callback was successful, or -1 if an error occurred. </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="a688fe8f1a8ecc12a538d9e561cf338e3"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">void* tjtransform::data</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Arbitrary data that can be accessed within the body of the callback function. </p>
+
+</div>
+</div>
+<a class="anchor" id="a2525aab4ba6978a1c273f74fef50e498"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjtransform::op</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>One of the <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">transform operations</a>. </p>
+
+</div>
+</div>
+<a class="anchor" id="ac0e74655baa4402209a21e1ae481c8f6"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjtransform::options</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The bitwise OR of one of more of the <a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">transform options</a>. </p>
+
+</div>
+</div>
+<a class="anchor" id="ac324e5e442abec8a961e5bf219db12cf"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname"><a class="el" href="structtjregion.html">tjregion</a> tjtransform::r</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Cropping region. </p>
+
+</div>
+</div>
+<hr/>The documentation for this struct was generated from the following file:<ul>
+<li>turbojpeg.h</li>
+</ul>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
 </body>
 </html>
diff --git a/doc/html/sync_off.png b/doc/html/sync_off.png
new file mode 100644
index 0000000..3b443fc
--- /dev/null
+++ b/doc/html/sync_off.png
Binary files differ
diff --git a/doc/html/sync_on.png b/doc/html/sync_on.png
new file mode 100644
index 0000000..e08320f
--- /dev/null
+++ b/doc/html/sync_on.png
Binary files differ
diff --git a/doc/html/tab_a.png b/doc/html/tab_a.png
index 2d99ef2..3b725c4 100644
--- a/doc/html/tab_a.png
+++ b/doc/html/tab_a.png
Binary files differ
diff --git a/doc/html/tab_b.png b/doc/html/tab_b.png
index b2c3d2b..e2b4a86 100644
--- a/doc/html/tab_b.png
+++ b/doc/html/tab_b.png
Binary files differ
diff --git a/doc/html/tab_h.png b/doc/html/tab_h.png
index c11f48f..fd5cb70 100644
--- a/doc/html/tab_h.png
+++ b/doc/html/tab_h.png
Binary files differ
diff --git a/doc/html/tab_s.png b/doc/html/tab_s.png
index 978943a..ab478c9 100644
--- a/doc/html/tab_s.png
+++ b/doc/html/tab_s.png
Binary files differ
diff --git a/doc/html/tabs.css b/doc/html/tabs.css
index 2192056..9cf578f 100644
--- a/doc/html/tabs.css
+++ b/doc/html/tabs.css
@@ -3,6 +3,7 @@
     width: 100%;
     z-index: 101;
     font-size: 13px;
+    font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif;
 }
 
 .tabs2 {
diff --git a/doxygen-extra.css b/doxygen-extra.css
new file mode 100644
index 0000000..5abbcc2
--- /dev/null
+++ b/doxygen-extra.css
@@ -0,0 +1,3 @@
+code {
+	color: #4665A2; 
+}
diff --git a/doxygen.config b/doxygen.config
index 5a0b5a7..f9e4d44 100644
--- a/doxygen.config
+++ b/doxygen.config
@@ -12,3 +12,5 @@
 JAVADOC_AUTOBRIEF = YES
 MAX_INITIALIZER_LINES = 0
 ALWAYS_DETAILED_SEC = YES
+HTML_TIMESTAMP = NO
+HTML_EXTRA_STYLESHEET = doxygen-extra.css
diff --git a/example.c b/example.c
index 1d6f6cc..0a65a6c 100644
--- a/example.c
+++ b/example.c
@@ -6,7 +6,7 @@
  * conjunction with the documentation file libjpeg.txt.
  *
  * This code will not do anything useful as-is, but it may be helpful as a
- * skeleton for constructing routines that call the JPEG library.  
+ * skeleton for constructing routines that call the JPEG library.
  *
  * We present these routines in the same coding style used in the JPEG code
  * (ANSI function definitions, etc); but you are of course free to code your
@@ -58,9 +58,9 @@
  * RGB color and is described by:
  */
 
-extern JSAMPLE * image_buffer;	/* Points to large array of R,G,B-order data */
-extern int image_height;	/* Number of rows in image */
-extern int image_width;		/* Number of columns in image */
+extern JSAMPLE * image_buffer;  /* Points to large array of R,G,B-order data */
+extern int image_height;        /* Number of rows in image */
+extern int image_width;         /* Number of columns in image */
 
 
 /*
@@ -88,9 +88,9 @@
    */
   struct jpeg_error_mgr jerr;
   /* More stuff */
-  FILE * outfile;		/* target file */
-  JSAMPROW row_pointer[1];	/* pointer to JSAMPLE row[s] */
-  int row_stride;		/* physical row width in image buffer */
+  FILE * outfile;               /* target file */
+  JSAMPROW row_pointer[1];      /* pointer to JSAMPLE row[s] */
+  int row_stride;               /* physical row width in image buffer */
 
   /* Step 1: allocate and initialize JPEG compression object */
 
@@ -122,10 +122,10 @@
   /* First we supply a description of the input image.
    * Four fields of the cinfo struct must be filled in:
    */
-  cinfo.image_width = image_width; 	/* image width and height, in pixels */
+  cinfo.image_width = image_width;      /* image width and height, in pixels */
   cinfo.image_height = image_height;
-  cinfo.input_components = 3;		/* # of color components per pixel */
-  cinfo.in_color_space = JCS_RGB; 	/* colorspace of input image */
+  cinfo.input_components = 3;           /* # of color components per pixel */
+  cinfo.in_color_space = JCS_RGB;       /* colorspace of input image */
   /* Now use the library's routine to set default compression parameters.
    * (You must set at least cinfo.in_color_space before calling this,
    * since the defaults depend on the source color space.)
@@ -151,7 +151,7 @@
    * To keep things simple, we pass one scanline per call; you can pass
    * more if you wish, though.
    */
-  row_stride = image_width * 3;	/* JSAMPLEs per row in image_buffer */
+  row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */
 
   while (cinfo.next_scanline < cinfo.image_height) {
     /* jpeg_write_scanlines expects an array of pointers to scanlines.
@@ -248,9 +248,9 @@
  */
 
 struct my_error_mgr {
-  struct jpeg_error_mgr pub;	/* "public" fields */
+  struct jpeg_error_mgr pub;    /* "public" fields */
 
-  jmp_buf setjmp_buffer;	/* for return to caller */
+  jmp_buf setjmp_buffer;        /* for return to caller */
 };
 
 typedef struct my_error_mgr * my_error_ptr;
@@ -293,9 +293,9 @@
    */
   struct my_error_mgr jerr;
   /* More stuff */
-  FILE * infile;		/* source file */
-  JSAMPARRAY buffer;		/* Output row buffer */
-  int row_stride;		/* physical row width in output buffer */
+  FILE * infile;                /* source file */
+  JSAMPARRAY buffer;            /* Output row buffer */
+  int row_stride;               /* physical row width in output buffer */
 
   /* In this example we want to open the input file before doing anything else,
    * so that the setjmp() error recovery below can assume the file is open.
@@ -356,12 +356,12 @@
    * output image dimensions available, as well as the output colormap
    * if we asked for color quantization.
    * In this example, we need to make an output work buffer of the right size.
-   */ 
+   */
   /* JSAMPLEs per row in output buffer */
   row_stride = cinfo.output_width * cinfo.output_components;
   /* Make a one-row-high sample array that will go away when done with image */
   buffer = (*cinfo.mem->alloc_sarray)
-		((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
+                ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
 
   /* Step 6: while (scan lines remain to be read) */
   /*           jpeg_read_scanlines(...); */
diff --git a/filelist.txt b/filelist.txt
deleted file mode 100644
index 3a115aa..0000000
--- a/filelist.txt
+++ /dev/null
@@ -1,216 +0,0 @@
-IJG JPEG LIBRARY:  FILE LIST
-
-This file was part of the Independent JPEG Group's software:
-Copyright (C) 1994-2009, Thomas G. Lane, Guido Vollbeding.
-Modifications:
-Copyright (C) 2010, D. R. Commander.
-For conditions of distribution and use, see the accompanying README file.
-
-
-Here is a road map to the files in the IJG JPEG distribution.  The
-distribution includes the JPEG library proper, plus two application
-programs ("cjpeg" and "djpeg") which use the library to convert JPEG
-files to and from some other popular image formats.  A third application
-"jpegtran" uses the library to do lossless conversion between different
-variants of JPEG.  There are also two stand-alone applications,
-"rdjpgcom" and "wrjpgcom".
-
-
-THE JPEG LIBRARY
-================
-
-Include files:
-
-jpeglib.h	JPEG library's exported data and function declarations.
-jconfig.h	Configuration declarations.  Note: this file is not present
-		in the distribution; it is generated during installation.
-jmorecfg.h	Additional configuration declarations; need not be changed
-		for a standard installation.
-jerror.h	Declares JPEG library's error and trace message codes.
-jinclude.h	Central include file used by all IJG .c files to reference
-		system include files.
-jpegint.h	JPEG library's internal data structures.
-jchuff.h	Private declarations for Huffman encoder modules.
-jdhuff.h	Private declarations for Huffman decoder modules.
-jdct.h		Private declarations for forward & reverse DCT subsystems.
-jmemsys.h	Private declarations for memory management subsystem.
-jversion.h	Version information.
-
-Applications using the library should include jpeglib.h (which in turn
-includes jconfig.h and jmorecfg.h).  Optionally, jerror.h may be included
-if the application needs to reference individual JPEG error codes.  The
-other include files are intended for internal use and would not normally
-be included by an application program.  (cjpeg/djpeg/etc do use jinclude.h,
-since its function is to improve portability of the whole IJG distribution.
-Most other applications will directly include the system include files they
-want, and hence won't need jinclude.h.)
-
-
-C source code files:
-
-These files contain most of the functions intended to be called directly by
-an application program:
-
-jcapimin.c	Application program interface: core routines for compression.
-jcapistd.c	Application program interface: standard compression.
-jdapimin.c	Application program interface: core routines for decompression.
-jdapistd.c	Application program interface: standard decompression.
-jcomapi.c	Application program interface routines common to compression
-		and decompression.
-jcparam.c	Compression parameter setting helper routines.
-jctrans.c	API and library routines for transcoding compression.
-jdtrans.c	API and library routines for transcoding decompression.
-
-Compression side of the library:
-
-jcinit.c	Initialization: determines which other modules to use.
-jcmaster.c	Master control: setup and inter-pass sequencing logic.
-jcmainct.c	Main buffer controller (preprocessor => JPEG compressor).
-jcprepct.c	Preprocessor buffer controller.
-jccoefct.c	Buffer controller for DCT coefficient buffer.
-jccolor.c	Color space conversion.
-jcsample.c	Downsampling.
-jcdctmgr.c	DCT manager (DCT implementation selection & control).
-jfdctint.c	Forward DCT using slow-but-accurate integer method.
-jfdctfst.c	Forward DCT using faster, less accurate integer method.
-jfdctflt.c	Forward DCT using floating-point arithmetic.
-jchuff.c	Huffman entropy coding for sequential JPEG.
-jcphuff.c	Huffman entropy coding for progressive JPEG.
-jcarith.c	Arithmetic entropy coding.
-jcmarker.c	JPEG marker writing.
-jdatadst.c	Data destination managers for memory and stdio output.
-
-Decompression side of the library:
-
-jdmaster.c	Master control: determines which other modules to use.
-jdinput.c	Input controller: controls input processing modules.
-jdmainct.c	Main buffer controller (JPEG decompressor => postprocessor).
-jdcoefct.c	Buffer controller for DCT coefficient buffer.
-jdpostct.c	Postprocessor buffer controller.
-jdmarker.c	JPEG marker reading.
-jdhuff.c	Huffman entropy decoding for sequential JPEG.
-jdphuff.c	Huffman entropy decoding for progressive JPEG.
-jdarith.c	Arithmetic entropy decoding.
-jddctmgr.c	IDCT manager (IDCT implementation selection & control).
-jidctint.c	Inverse DCT using slow-but-accurate integer method.
-jidctfst.c	Inverse DCT using faster, less accurate integer method.
-jidctflt.c	Inverse DCT using floating-point arithmetic.
-jidctred.c	Inverse DCTs with reduced-size outputs.
-jdsample.c	Upsampling.
-jdcolor.c	Color space conversion.
-jdmerge.c	Merged upsampling/color conversion (faster, lower quality).
-jquant1.c	One-pass color quantization using a fixed-spacing colormap.
-jquant2.c	Two-pass color quantization using a custom-generated colormap.
-		Also handles one-pass quantization to an externally given map.
-jdatasrc.c	Data source managers for memory and stdio input.
-
-Support files for both compression and decompression:
-
-jaricom.c	Tables for common use in arithmetic entropy encoding and
-		decoding routines.
-jerror.c	Standard error handling routines (application replaceable).
-jmemmgr.c	System-independent (more or less) memory management code.
-jutils.c	Miscellaneous utility routines.
-
-jmemmgr.c relies on a system-dependent memory management module.  The IJG
-distribution includes the following implementations of the system-dependent
-module:
-
-jmemnobs.c	"No backing store": assumes adequate virtual memory exists.
-jmemansi.c	Makes temporary files with ANSI-standard routine tmpfile().
-jmemname.c	Makes temporary files with program-generated file names.
-jmemdos.c	Custom implementation for MS-DOS (16-bit environment only):
-		can use extended and expanded memory as well as temp files.
-jmemmac.c	Custom implementation for Apple Macintosh.
-
-Exactly one of the system-dependent modules should be configured into an
-installed JPEG library (see install.txt for hints about which one to use).
-On unusual systems you may find it worthwhile to make a special
-system-dependent memory manager.
-
-
-Non-C source code files:
-
-jmemdosa.asm	80x86 assembly code support for jmemdos.c; used only in
-		MS-DOS-specific configurations of the JPEG library.
-
-
-CJPEG/DJPEG/JPEGTRAN
-====================
-
-Include files:
-
-cdjpeg.h	Declarations shared by cjpeg/djpeg/jpegtran modules.
-cderror.h	Additional error and trace message codes for cjpeg et al.
-transupp.h	Declarations for jpegtran support routines in transupp.c.
-
-C source code files:
-
-cjpeg.c		Main program for cjpeg.
-djpeg.c		Main program for djpeg.
-jpegtran.c	Main program for jpegtran.
-cdjpeg.c	Utility routines used by all three programs.
-rdcolmap.c	Code to read a colormap file for djpeg's "-map" switch.
-rdswitch.c	Code to process some of cjpeg's more complex switches.
-		Also used by jpegtran.
-transupp.c	Support code for jpegtran: lossless image manipulations.
-
-Image file reader modules for cjpeg:
-
-rdbmp.c		BMP file input.
-rdgif.c		GIF file input (now just a stub).
-rdppm.c		PPM/PGM file input.
-rdrle.c		Utah RLE file input.
-rdtarga.c	Targa file input.
-
-Image file writer modules for djpeg:
-
-wrbmp.c		BMP file output.
-wrgif.c		GIF file output (a mere shadow of its former self).
-wrppm.c		PPM/PGM file output.
-wrrle.c		Utah RLE file output.
-wrtarga.c	Targa file output.
-
-
-RDJPGCOM/WRJPGCOM
-=================
-
-C source code files:
-
-rdjpgcom.c	Stand-alone rdjpgcom application.
-wrjpgcom.c	Stand-alone wrjpgcom application.
-
-These programs do not depend on the IJG library.  They do use
-jconfig.h and jinclude.h, only to improve portability.
-
-
-ADDITIONAL FILES
-================
-
-Documentation (see README for a guide to the documentation files):
-
-README		Master documentation file.
-*.txt		Other documentation files.
-*.1		Documentation in Unix man page format.
-change.log	Version-to-version change highlights.
-example.c	Sample code for calling JPEG library.
-
-Configuration/installation files and programs (see install.txt for more info):
-
-configure	Unix shell script to perform automatic configuration.
-configure.ac	Source file for use with Autoconf to generate configure.
-ltmain.sh	Support scripts for configure (from GNU libtool).
-config.guess
-config.sub
-depcomp
-missing
-install-sh	Install shell script for those Unix systems lacking one.
-Makefile.in	Makefile input for configure.
-Makefile.am	Source file for use with Automake to generate Makefile.in.
-jconfig.txt	Template for making jconfig.h by hand.
-aclocal.m4	M4 macro definitions for use with Autoconf.
-
-Test files (see install.txt for test procedure):
-
-test*.*		Source and comparison files for confidence test.
-		These are binary image files, NOT text files.
diff --git a/install.txt b/install.txt
deleted file mode 100644
index 1327dc4..0000000
--- a/install.txt
+++ /dev/null
@@ -1,1096 +0,0 @@
-INSTALLATION INSTRUCTIONS for the Independent JPEG Group's JPEG software
-
-Copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding.
-This file is part of the Independent JPEG Group's software.
-For conditions of distribution and use, see the accompanying README file.
-
-
-This file explains how to configure and install the IJG software.  We have
-tried to make this software extremely portable and flexible, so that it can be
-adapted to almost any environment.  The downside of this decision is that the
-installation process is complicated.  We have provided shortcuts to simplify
-the task on common systems.  But in any case, you will need at least a little
-familiarity with C programming and program build procedures for your system.
-
-If you are only using this software as part of a larger program, the larger
-program's installation procedure may take care of configuring the IJG code.
-For example, Ghostscript's installation script will configure the IJG code.
-You don't need to read this file if you just want to compile Ghostscript.
-
-If you are on a Unix machine, you may not need to read this file at all.
-Try doing
-	./configure
-	make
-	make test
-If that doesn't complain, do
-	make install
-(better do "make -n install" first to see if the makefile will put the files
-where you want them).  Read further if you run into snags or want to customize
-the code for your system.
-
-
-TABLE OF CONTENTS
------------------
-
-Before you start
-Configuring the software:
-	using the automatic "configure" script
-	using one of the supplied jconfig and makefile files
-	by hand
-Building the software
-Testing the software
-Installing the software
-Optional stuff
-Optimization
-Hints for specific systems
-
-
-BEFORE YOU START
-================
-
-Before installing the software you must unpack the distributed source code.
-Since you are reading this file, you have probably already succeeded in this
-task.  However, there is a potential for error if you needed to convert the
-files to the local standard text file format (for example, if you are on
-MS-DOS you may have converted LF end-of-line to CR/LF).  You must apply
-such conversion to all the files EXCEPT those whose names begin with "test".
-The test files contain binary data; if you change them in any way then the
-self-test will give bad results.
-
-Please check the last section of this file to see if there are hints for the
-specific machine or compiler you are using.
-
-
-CONFIGURING THE SOFTWARE
-========================
-
-To configure the IJG code for your system, you need to create two files:
-  * jconfig.h: contains values for system-dependent #define symbols.
-  * Makefile: controls the compilation process.
-(On a non-Unix machine, you may create "project files" or some other
-substitute for a Makefile.  jconfig.h is needed in any environment.)
-
-We provide three different ways to generate these files:
-  * On a Unix system, you can just run the "configure" script.
-  * We provide sample jconfig files and makefiles for popular machines;
-    if your machine matches one of the samples, just copy the right sample
-    files to jconfig.h and Makefile.
-  * If all else fails, read the instructions below and make your own files.
-
-
-Configuring the software using the automatic "configure" script
----------------------------------------------------------------
-
-If you are on a Unix machine, you can just type
-	./configure
-and let the configure script construct appropriate configuration files.
-If you're using "csh" on an old version of System V, you might need to type
-	sh configure
-instead to prevent csh from trying to execute configure itself.
-Expect configure to run for a few minutes, particularly on slower machines;
-it works by compiling a series of test programs.
-
-Configure was created with GNU Autoconf and it follows the usual conventions
-for GNU configure scripts.  It makes a few assumptions that you may want to
-override.  You can do this by providing optional switches to configure:
-
-* Configure will build both static and shared libraries, if possible.
-If you want to build libjpeg only as a static library, say
-	./configure --disable-shared
-If you want to build libjpeg only as a shared library, say
-	./configure --disable-static
-Configure uses GNU libtool to take care of system-dependent shared library
-building methods.
-
-* Configure will use gcc (GNU C compiler) if it's available, otherwise cc.
-To force a particular compiler to be selected, use the CC option, for example
-	./configure CC='cc'
-The same method can be used to include any unusual compiler switches.
-For example, on HP-UX you probably want to say
-	./configure CC='cc -Aa'
-to get HP's compiler to run in ANSI mode.
-
-* The default CFLAGS setting is "-g" for non-gcc compilers, "-g -O2" for gcc.
-You can override this by saying, for example,
-	./configure CFLAGS='-O2'
-if you want to compile without debugging support.
-
-* Configure will set up the makefile so that "make install" will install files
-into /usr/local/bin, /usr/local/man, etc.  You can specify an installation
-prefix other than "/usr/local" by giving configure the option "--prefix=PATH".
-
-* If you don't have a lot of swap space, you may need to enable the IJG
-software's internal virtual memory mechanism.  To do this, give the option
-"--enable-maxmem=N" where N is the default maxmemory limit in megabytes.
-This is discussed in more detail under "Selecting a memory manager", below.
-You probably don't need to worry about this on reasonably-sized Unix machines,
-unless you plan to process very large images.
-
-Configure has some other features that are useful if you are cross-compiling
-or working in a network of multiple machine types; but if you need those
-features, you probably already know how to use them.
-
-
-Configuring the software using one of the supplied jconfig and makefile files
------------------------------------------------------------------------------
-
-If you have one of these systems, you can just use the provided configuration
-files:
-
-Makefile	jconfig file	System and/or compiler
-
-makefile.manx	jconfig.manx	Amiga, Manx Aztec C
-makefile.sas	jconfig.sas	Amiga, SAS C
-makeproj.mac	jconfig.mac	Apple Macintosh, Metrowerks CodeWarrior
-mak*jpeg.st	jconfig.st	Atari ST/STE/TT, Pure C or Turbo C
-makefile.bcc	jconfig.bcc	MS-DOS or OS/2, Borland C
-makefile.dj	jconfig.dj	MS-DOS, DJGPP (Delorie's port of GNU C)
-makefile.mc6	jconfig.mc6	MS-DOS, Microsoft C (16-bit only)
-makefile.wat	jconfig.wat	MS-DOS, OS/2, or Windows NT, Watcom C
-makefile.vc	jconfig.vc	Windows NT/95, MS Visual C++
-make*.vc6	jconfig.vc	Windows NT/95, MS Visual C++ 6
-make*.v10	jconfig.vc	Windows NT/95, MS Visual C++ 2010 (v10)
-makefile.mms	jconfig.vms	Digital VMS, with MMS software
-makefile.vms	jconfig.vms	Digital VMS, without MMS software
-
-Copy the proper jconfig file to jconfig.h and the makefile to Makefile (or
-whatever your system uses as the standard makefile name).  For more info see
-the appropriate system-specific hints section near the end of this file.
-
-
-Configuring the software by hand
---------------------------------
-
-First, generate a jconfig.h file.  If you are moderately familiar with C,
-the comments in jconfig.txt should be enough information to do this; just
-copy jconfig.txt to jconfig.h and edit it appropriately.  Otherwise, you may
-prefer to use the ckconfig.c program.  You will need to compile and execute
-ckconfig.c by hand --- we hope you know at least enough to do that.
-ckconfig.c may not compile the first try (in fact, the whole idea is for it
-to fail if anything is going to).  If you get compile errors, fix them by
-editing ckconfig.c according to the directions given in ckconfig.c.  Once
-you get it to run, it will write a suitable jconfig.h file, and will also
-print out some advice about which makefile to use.
-
-You may also want to look at the canned jconfig files, if there is one for a
-system similar to yours.
-
-Second, select a makefile and copy it to Makefile (or whatever your system
-uses as the standard makefile name).  The most generic makefiles we provide
-are
-	makefile.ansi:	if your C compiler supports function prototypes
-	makefile.unix:	if not.
-(You have function prototypes if ckconfig.c put "#define HAVE_PROTOTYPES"
-in jconfig.h.)  You may want to start from one of the other makefiles if
-there is one for a system similar to yours.
-
-Look over the selected Makefile and adjust options as needed.  In particular
-you may want to change the CC and CFLAGS definitions.  For instance, if you
-are using GCC, set CC=gcc.  If you had to use any compiler switches to get
-ckconfig.c to work, make sure the same switches are in CFLAGS.
-
-If you are on a system that doesn't use makefiles, you'll need to set up
-project files (or whatever you do use) to compile all the source files and
-link them into executable files cjpeg, djpeg, jpegtran, rdjpgcom, and wrjpgcom.
-See the file lists in any of the makefiles to find out which files go into
-each program.  Note that the provided makefiles all make a "library" file
-libjpeg first, but you don't have to do that if you don't want to; the file
-lists identify which source files are actually needed for compression,
-decompression, or both.  As a last resort, you can make a batch script that
-just compiles everything and links it all together; makefile.vms is an example
-of this (it's for VMS systems that have no make-like utility).
-
-Here are comments about some specific configuration decisions you'll
-need to make:
-
-Command line style
-------------------
-
-These programs can use a Unix-like command line style which supports
-redirection and piping, like this:
-	cjpeg inputfile >outputfile
-	cjpeg <inputfile >outputfile
-	source program | cjpeg >outputfile
-The simpler "two file" command line style is just
-	cjpeg inputfile outputfile
-You may prefer the two-file style, particularly if you don't have pipes.
-
-You MUST use two-file style on any system that doesn't cope well with binary
-data fed through stdin/stdout; this is true for some MS-DOS compilers, for
-example.  If you're not on a Unix system, it's safest to assume you need
-two-file style.  (But if your compiler provides either the Posix-standard
-fdopen() library routine or a Microsoft-compatible setmode() routine, you
-can safely use the Unix command line style, by defining USE_FDOPEN or
-USE_SETMODE respectively.)
-
-To use the two-file style, make jconfig.h say "#define TWO_FILE_COMMANDLINE".
-
-Selecting a memory manager
---------------------------
-
-The IJG code is capable of working on images that are too big to fit in main
-memory; data is swapped out to temporary files as necessary.  However, the
-code to do this is rather system-dependent.  We provide five different
-memory managers:
-
-* jmemansi.c	This version uses the ANSI-standard library routine tmpfile(),
-		which not all non-ANSI systems have.  On some systems
-		tmpfile() may put the temporary file in a non-optimal
-		location; if you don't like what it does, use jmemname.c.
-
-* jmemname.c	This version creates named temporary files.  For anything
-		except a Unix machine, you'll need to configure the
-		select_file_name() routine appropriately; see the comments
-		near the head of jmemname.c.  If you use this version, define
-		NEED_SIGNAL_CATCHER in jconfig.h to make sure the temp files
-		are removed if the program is aborted.
-
-* jmemnobs.c	(That stands for No Backing Store :-).)  This will compile on
-		almost any system, but it assumes you have enough main memory
-		or virtual memory to hold the biggest images you work with.
-
-* jmemdos.c	This should be used with most 16-bit MS-DOS compilers.
-		See the system-specific notes about MS-DOS for more info.
-		IMPORTANT: if you use this, define USE_MSDOS_MEMMGR in
-		jconfig.h, and include the assembly file jmemdosa.asm in the
-		programs.  The supplied makefiles and jconfig files for
-		16-bit MS-DOS compilers already do both.
-
-* jmemmac.c	Custom version for Apple Macintosh; see the system-specific
-		notes for Macintosh for more info.
-
-To use a particular memory manager, change the SYSDEPMEM variable in your
-makefile to equal the corresponding object file name (for example, jmemansi.o
-or jmemansi.obj for jmemansi.c).
-
-If you have plenty of (real or virtual) main memory, just use jmemnobs.c.
-"Plenty" means about ten bytes for every pixel in the largest images
-you plan to process, so a lot of systems don't meet this criterion.
-If yours doesn't, try jmemansi.c first.  If that doesn't compile, you'll have
-to use jmemname.c; be sure to adjust select_file_name() for local conditions.
-You may also need to change unlink() to remove() in close_backing_store().
-
-Except with jmemnobs.c or jmemmac.c, you need to adjust the DEFAULT_MAX_MEM
-setting to a reasonable value for your system (either by adding a #define for
-DEFAULT_MAX_MEM to jconfig.h, or by adding a -D switch to the Makefile).
-This value limits the amount of data space the program will attempt to
-allocate.  Code and static data space isn't counted, so the actual memory
-needs for cjpeg or djpeg are typically 100 to 150Kb more than the max-memory
-setting.  Larger max-memory settings reduce the amount of I/O needed to
-process a large image, but too large a value can result in "insufficient
-memory" failures.  On most Unix machines (and other systems with virtual
-memory), just set DEFAULT_MAX_MEM to several million and forget it.  At the
-other end of the spectrum, for MS-DOS machines you probably can't go much
-above 300K to 400K.  (On MS-DOS the value refers to conventional memory only.
-Extended/expanded memory is handled separately by jmemdos.c.)
-
-
-BUILDING THE SOFTWARE
-=====================
-
-Now you should be able to compile the software.  Just say "make" (or
-whatever's necessary to start the compilation).  Have a cup of coffee.
-
-Here are some things that could go wrong:
-
-If your compiler complains about undefined structures, you should be able to
-shut it up by putting "#define INCOMPLETE_TYPES_BROKEN" in jconfig.h.
-
-If you have trouble with missing system include files or inclusion of the
-wrong ones, read jinclude.h.  This shouldn't happen if you used configure
-or ckconfig.c to set up jconfig.h.
-
-There are a fair number of routines that do not use all of their parameters;
-some compilers will issue warnings about this, which you can ignore.  There
-are also a few configuration checks that may give "unreachable code" warnings.
-Any other warning deserves investigation.
-
-If you don't have a getenv() library routine, define NO_GETENV.
-
-Also see the system-specific hints, below.
-
-
-TESTING THE SOFTWARE
-====================
-
-As a quick test of functionality we've included a small sample image in
-several forms:
-	testorig.jpg	Starting point for the djpeg tests.
-	testimg.ppm	The output of djpeg testorig.jpg
-	testimg.bmp	The output of djpeg -bmp -colors 256 testorig.jpg
-	testimg.jpg	The output of cjpeg testimg.ppm
-	testprog.jpg	Progressive-mode equivalent of testorig.jpg.
-	testimgp.jpg	The output of cjpeg -progressive -optimize testimg.ppm
-(The first- and second-generation .jpg files aren't identical since JPEG is
-lossy.)  If you can generate duplicates of the testimg* files then you
-probably have working programs.
-
-With most of the makefiles, "make test" will perform the necessary
-comparisons.
-
-If you're using a makefile that doesn't provide the test option, run djpeg
-and cjpeg by hand and compare the output files to testimg* with whatever
-binary file comparison tool you have.  The files should be bit-for-bit
-identical.
-
-If the programs complain "MAX_ALLOC_CHUNK is wrong, please fix", then you
-need to reduce MAX_ALLOC_CHUNK to a value that fits in type size_t.
-Try adding "#define MAX_ALLOC_CHUNK 65520L" to jconfig.h.  A less likely
-configuration error is "ALIGN_TYPE is wrong, please fix": defining ALIGN_TYPE
-as long should take care of that one.
-
-If the cjpeg test run fails with "Missing Huffman code table entry", it's a
-good bet that you needed to define RIGHT_SHIFT_IS_UNSIGNED.  Go back to the
-configuration step and run ckconfig.c.  (This is a good plan for any other
-test failure, too.)
-
-If you are using Unix (one-file) command line style on a non-Unix system,
-it's a good idea to check that binary I/O through stdin/stdout actually
-works.  You should get the same results from "djpeg <testorig.jpg >out.ppm"
-as from "djpeg -outfile out.ppm testorig.jpg".  Note that the makefiles all
-use the latter style and therefore do not exercise stdin/stdout!  If this
-check fails, try recompiling with USE_SETMODE or USE_FDOPEN defined.
-If it still doesn't work, better use two-file style.
-
-If you chose a memory manager other than jmemnobs.c, you should test that
-temporary-file usage works.  Try "djpeg -bmp -colors 256 -max 0 testorig.jpg"
-and make sure its output matches testimg.bmp.  If you have any really large
-images handy, try compressing them with -optimize and/or decompressing with
--colors 256 to make sure your DEFAULT_MAX_MEM setting is not too large.
-
-NOTE: this is far from an exhaustive test of the JPEG software; some modules,
-such as 1-pass color quantization, are not exercised at all.  It's just a
-quick test to give you some confidence that you haven't missed something
-major.
-
-
-INSTALLING THE SOFTWARE
-=======================
-
-Once you're done with the above steps, you can install the software by
-copying the executable files (cjpeg, djpeg, jpegtran, rdjpgcom, and wrjpgcom)
-to wherever you normally install programs.  On Unix systems, you'll also want
-to put the man pages (cjpeg.1, djpeg.1, jpegtran.1, rdjpgcom.1, wrjpgcom.1)
-in the man-page directory.  The pre-fab makefiles don't support this step
-since there's such a wide variety of installation procedures on different
-systems.
-
-If you generated a Makefile with the "configure" script, you can just say
-	make install
-to install the programs and their man pages into the standard places.
-(You'll probably need to be root to do this.)  We recommend first saying
-	make -n install
-to see where configure thought the files should go.  You may need to edit
-the Makefile, particularly if your system's conventions for man page
-filenames don't match what configure expects.
-
-If you want to install the IJG library itself, for use in compiling other
-programs besides ours, then you need to put the four include files
-	jpeglib.h jerror.h jconfig.h jmorecfg.h
-into your include-file directory, and put the library file libjpeg.a
-(extension may vary depending on system) wherever library files go.
-If you generated a Makefile with "configure", it will do what it thinks
-is the right thing if you say
-	make install-lib
-
-
-OPTIONAL STUFF
-==============
-
-Progress monitor:
-
-If you like, you can #define PROGRESS_REPORT (in jconfig.h) to enable display
-of percent-done progress reports.  The routine provided in cdjpeg.c merely
-prints percentages to stderr, but you can customize it to do something
-fancier.
-
-Utah RLE file format support:
-
-We distribute the software with support for RLE image files (Utah Raster
-Toolkit format) disabled, because the RLE support won't compile without the
-Utah library.  If you have URT version 3.1 or later, you can enable RLE
-support as follows:
-	1.  #define RLE_SUPPORTED in jconfig.h.
-	2.  Add a -I option to CFLAGS in the Makefile for the directory
-	    containing the URT .h files (typically the "include"
-	    subdirectory of the URT distribution).
-	3.  Add -L... -lrle to LDLIBS in the Makefile, where ... specifies
-	    the directory containing the URT "librle.a" file (typically the
-	    "lib" subdirectory of the URT distribution).
-
-Support for 12-bit-deep pixel data:
-
-The JPEG standard allows either 8-bit or 12-bit data precision.  (For color,
-this means 8 or 12 bits per channel, of course.)  If you need to work with
-deeper than 8-bit data, you can compile the IJG code for 12-bit operation.
-To do so:
-  1. In jmorecfg.h, define BITS_IN_JSAMPLE as 12 rather than 8.
-  2. In jconfig.h, undefine BMP_SUPPORTED, RLE_SUPPORTED, and TARGA_SUPPORTED,
-     because the code for those formats doesn't handle 12-bit data and won't
-     even compile.  (The PPM code does work, as explained below.  The GIF
-     code works too; it scales 8-bit GIF data to and from 12-bit depth
-     automatically.)
-  3. Compile.  Don't expect "make test" to pass, since the supplied test
-     files are for 8-bit data.
-
-Currently, 12-bit support does not work on 16-bit-int machines.
-
-Note that a 12-bit version will not read 8-bit JPEG files, nor vice versa;
-so you'll want to keep around a regular 8-bit compilation as well.
-(Run-time selection of data depth, to allow a single copy that does both,
-is possible but would probably slow things down considerably; it's very low
-on our to-do list.)
-
-The PPM reader (rdppm.c) can read 12-bit data from either text-format or
-binary-format PPM and PGM files.  Binary-format PPM/PGM files which have a
-maxval greater than 255 are assumed to use 2 bytes per sample, MSB first
-(big-endian order).  As of early 1995, 2-byte binary format is not
-officially supported by the PBMPLUS library, but it is expected that a
-future release of PBMPLUS will support it.  Note that the PPM reader will
-read files of any maxval regardless of the BITS_IN_JSAMPLE setting; incoming
-data is automatically rescaled to either maxval=255 or maxval=4095 as
-appropriate for the cjpeg bit depth.
-
-The PPM writer (wrppm.c) will normally write 2-byte binary PPM or PGM
-format, maxval 4095, when compiled with BITS_IN_JSAMPLE=12.  Since this
-format is not yet widely supported, you can disable it by compiling wrppm.c
-with PPM_NORAWWORD defined; then the data is scaled down to 8 bits to make a
-standard 1-byte/sample PPM or PGM file.  (Yes, this means still another copy
-of djpeg to keep around.  But hopefully you won't need it for very long.
-Poskanzer's supposed to get that new PBMPLUS release out Real Soon Now.)
-
-Of course, if you are working with 12-bit data, you probably have it stored
-in some other, nonstandard format.  In that case you'll probably want to
-write your own I/O modules to read and write your format.
-
-Note that a 12-bit version of cjpeg always runs in "-optimize" mode, in
-order to generate valid Huffman tables.  This is necessary because our
-default Huffman tables only cover 8-bit data.
-
-Removing code:
-
-If you need to make a smaller version of the JPEG software, some optional
-functions can be removed at compile time.  See the xxx_SUPPORTED #defines in
-jconfig.h and jmorecfg.h.  If at all possible, we recommend that you leave in
-decoder support for all valid JPEG files, to ensure that you can read anyone's
-output.  Taking out support for image file formats that you don't use is the
-most painless way to make the programs smaller.  Another possibility is to
-remove some of the DCT methods: in particular, the "IFAST" method may not be
-enough faster than the others to be worth keeping on your machine.  (If you
-do remove ISLOW or IFAST, be sure to redefine JDCT_DEFAULT or JDCT_FASTEST
-to a supported method, by adding a #define in jconfig.h.)
-
-
-OPTIMIZATION
-============
-
-Unless you own a Cray, you'll probably be interested in making the JPEG
-software go as fast as possible.  This section covers some machine-dependent
-optimizations you may want to try.  We suggest that before trying any of
-this, you first get the basic installation to pass the self-test step.
-Repeat the self-test after any optimization to make sure that you haven't
-broken anything.
-
-The integer DCT routines perform a lot of multiplications.  These
-multiplications must yield 32-bit results, but none of their input values
-are more than 16 bits wide.  On many machines, notably the 680x0 and 80x86
-CPUs, a 16x16=>32 bit multiply instruction is faster than a full 32x32=>32
-bit multiply.  Unfortunately there is no portable way to specify such a
-multiplication in C, but some compilers can generate one when you use the
-right combination of casts.  See the MULTIPLYxxx macro definitions in
-jdct.h.  If your compiler makes "int" be 32 bits and "short" be 16 bits,
-defining SHORTxSHORT_32 is fairly likely to work.  When experimenting with
-alternate definitions, be sure to test not only whether the code still works
-(use the self-test), but also whether it is actually faster --- on some
-compilers, alternate definitions may compute the right answer, yet be slower
-than the default.  Timing cjpeg on a large PGM (grayscale) input file is the
-best way to check this, as the DCT will be the largest fraction of the runtime
-in that mode.  (Note: some of the distributed compiler-specific jconfig files
-already contain #define switches to select appropriate MULTIPLYxxx
-definitions.)
-
-If your machine has sufficiently fast floating point hardware, you may find
-that the float DCT method is faster than the integer DCT methods, even
-after tweaking the integer multiply macros.  In that case you may want to
-make the float DCT be the default method.  (The only objection to this is
-that float DCT results may vary slightly across machines.)  To do that, add
-"#define JDCT_DEFAULT JDCT_FLOAT" to jconfig.h.  Even if you don't change
-the default, you should redefine JDCT_FASTEST, which is the method selected
-by djpeg's -fast switch.  Don't forget to update the documentation files
-(usage.txt and/or cjpeg.1, djpeg.1) to agree with what you've done.
-
-If access to "short" arrays is slow on your machine, it may be a win to
-define type JCOEF as int rather than short.  This will cost a good deal of
-memory though, particularly in some multi-pass modes, so don't do it unless
-you have memory to burn and short is REALLY slow.
-
-If your compiler can compile function calls in-line, make sure the INLINE
-macro in jmorecfg.h is defined as the keyword that marks a function
-inline-able.  Some compilers have a switch that tells the compiler to inline
-any function it thinks is profitable (e.g., -finline-functions for gcc).
-Enabling such a switch is likely to make the compiled code bigger but faster.
-
-In general, it's worth trying the maximum optimization level of your compiler,
-and experimenting with any optional optimizations such as loop unrolling.
-(Unfortunately, far too many compilers have optimizer bugs ... be prepared to
-back off if the code fails self-test.)  If you do any experimentation along
-these lines, please report the optimal settings to jpeg-info@jpegclub.org so
-we can mention them in future releases.  Be sure to specify your machine and
-compiler version.
-
-
-HINTS FOR SPECIFIC SYSTEMS
-==========================
-
-We welcome reports on changes needed for systems not mentioned here.  Submit
-'em to jpeg-info@jpegclub.org.  Also, if configure or ckconfig.c is wrong
-about how to configure the JPEG software for your system, please let us know.
-
-
-Acorn RISC OS:
-
-(Thanks to Simon Middleton for these hints on compiling with Desktop C.)
-After renaming the files according to Acorn conventions, take a copy of
-makefile.ansi, change all occurrences of 'libjpeg.a' to 'libjpeg.o' and
-change these definitions as indicated:
-
-CFLAGS= -throwback -IC: -Wn
-LDLIBS=C:o.Stubs
-SYSDEPMEM=jmemansi.o
-LN=Link
-AR=LibFile -c -o
-
-Also add a new line '.c.o:; $(cc) $< $(cflags) -c -o $@'.  Remove the
-lines '$(RM) libjpeg.o' and '$(AR2) libjpeg.o' and the 'jconfig.h'
-dependency section.
-
-Copy jconfig.txt to jconfig.h.  Edit jconfig.h to define TWO_FILE_COMMANDLINE
-and CHAR_IS_UNSIGNED.
-
-Run the makefile using !AMU not !Make.  If you want to use the 'clean' and
-'test' makefile entries then you will have to fiddle with the syntax a bit
-and rename the test files.
-
-
-Amiga:
-
-SAS C 6.50 reportedly is too buggy to compile the IJG code properly.
-A patch to update to 6.51 is available from SAS or AmiNet FTP sites.
-
-The supplied config files are set up to use jmemname.c as the memory
-manager, with temporary files being created on the device named by
-"JPEGTMP:".
-
-
-Atari ST/STE/TT:
-
-Copy the project files makcjpeg.st, makdjpeg.st, maktjpeg.st, and makljpeg.st
-to cjpeg.prj, djpeg.prj, jpegtran.prj, and libjpeg.prj respectively.  The
-project files should work as-is with Pure C.  For Turbo C, change library
-filenames "pc..." to "tc..." in each project file.  Note that libjpeg.prj
-selects jmemansi.c as the recommended memory manager.  You'll probably want to
-adjust the DEFAULT_MAX_MEM setting --- you want it to be a couple hundred K
-less than your normal free memory.  Put "#define DEFAULT_MAX_MEM nnnn" into
-jconfig.h to do this.
-
-To use the 68881/68882 coprocessor for the floating point DCT, add the
-compiler option "-8" to the project files and replace pcfltlib.lib with
-pc881lib.lib in cjpeg.prj and djpeg.prj.  Or if you don't have a
-coprocessor, you may prefer to remove the float DCT code by undefining
-DCT_FLOAT_SUPPORTED in jmorecfg.h (since without a coprocessor, the float
-code will be too slow to be useful).  In that case, you can delete
-pcfltlib.lib from the project files.
-
-Note that you must make libjpeg.lib before making cjpeg.ttp, djpeg.ttp,
-or jpegtran.ttp.  You'll have to perform the self-test by hand.
-
-We haven't bothered to include project files for rdjpgcom and wrjpgcom.
-Those source files should just be compiled by themselves; they don't
-depend on the JPEG library.  You can use the default.prj project file
-of the Pure C distribution to make the programs.
-
-There is a bug in some older versions of the Turbo C library which causes the
-space used by temporary files created with "tmpfile()" not to be freed after
-an abnormal program exit.  If you check your disk afterwards, you will find
-cluster chains that are allocated but not used by a file.  This should not
-happen in cjpeg/djpeg/jpegtran, since we enable a signal catcher to explicitly
-close temp files before exiting.  But if you use the JPEG library with your
-own code, be sure to supply a signal catcher, or else use a different
-system-dependent memory manager.
-
-
-Cray:
-
-Should you be so fortunate as to be running JPEG on a Cray YMP, there is a
-compiler bug in old versions of Cray's Standard C (prior to 3.1).  If you
-still have an old compiler, you'll need to insert a line reading
-"#pragma novector" just before the loop	
-    for (i = 1; i <= (int) htbl->bits[l]; i++)
-      huffsize[p++] = (char) l;
-in fix_huff_tbl (in V5beta1, line 204 of jchuff.c and line 176 of jdhuff.c).
-[This bug may or may not still occur with the current IJG code, but it's
-probably a dead issue anyway...]
-
-
-HP-UX:
-
-If you have HP-UX 7.05 or later with the "software development" C compiler,
-you should run the compiler in ANSI mode.  If using the configure script,
-say
-	./configure CC='cc -Aa'
-(or -Ae if you prefer).  If configuring by hand, use makefile.ansi and add
-"-Aa" to the CFLAGS line in the makefile.
-
-If you have a pre-7.05 system, or if you are using the non-ANSI C compiler
-delivered with a minimum HP-UX system, then you must use makefile.unix
-(and do NOT add -Aa); or just run configure without the CC option.
-
-On HP 9000 series 800 machines, the HP C compiler is buggy in revisions prior
-to A.08.07.  If you get complaints about "not a typedef name", you'll have to
-use makefile.unix, or run configure without the CC option.
-
-
-Macintosh, generic comments:
-
-The supplied user-interface files (cjpeg.c, djpeg.c, etc) are set up to
-provide a Unix-style command line interface.  You can use this interface on
-the Mac by means of the ccommand() library routine provided by Metrowerks
-CodeWarrior or Think C.  This is only appropriate for testing the library,
-however; to make a user-friendly equivalent of cjpeg/djpeg you'd really want
-to develop a Mac-style user interface.  There isn't a complete example
-available at the moment, but there are some helpful starting points:
-1. Sam Bushell's free "To JPEG" applet provides drag-and-drop conversion to
-JPEG under System 7 and later.  This only illustrates how to use the
-compression half of the library, but it does a very nice job of that part.
-The CodeWarrior source code is available from http://www.pobox.com/~jsam.
-2. Jim Brunner prepared a Mac-style user interface for both compression and
-decompression.  Unfortunately, it hasn't been updated since IJG v4, and
-the library's API has changed considerably since then.  Still it may be of
-some help, particularly as a guide to compiling the IJG code under Think C.
-Jim's code is available from the Info-Mac archives, at sumex-aim.stanford.edu
-or mirrors thereof; see file /info-mac/dev/src/jpeg-convert-c.hqx.
-
-jmemmac.c is the recommended memory manager back end for Macintosh.  It uses
-NewPtr/DisposePtr instead of malloc/free, and has a Mac-specific
-implementation of jpeg_mem_available().  It also creates temporary files that
-follow Mac conventions.  (That part of the code relies on System-7-or-later OS
-functions.  See the comments in jmemmac.c if you need to run it on System 6.)
-NOTE that USE_MAC_MEMMGR must be defined in jconfig.h to use jmemmac.c.
-
-You can also use jmemnobs.c, if you don't care about handling images larger
-than available memory.  If you use any memory manager back end other than
-jmemmac.c, we recommend replacing "malloc" and "free" by "NewPtr" and
-"DisposePtr", because Mac C libraries often have peculiar implementations of
-malloc/free.  (For instance, free() may not return the freed space to the
-Mac Memory Manager.  This is undesirable for the IJG code because jmemmgr.c
-already clumps space requests.)
-
-
-Macintosh, Metrowerks CodeWarrior:
-
-The Unix-command-line-style interface can be used by defining USE_CCOMMAND.
-You'll also need to define TWO_FILE_COMMANDLINE to avoid stdin/stdout.
-This means that when using the cjpeg/djpeg programs, you'll have to type the
-input and output file names in the "Arguments" text-edit box, rather than
-using the file radio buttons.  (Perhaps USE_FDOPEN or USE_SETMODE would
-eliminate the problem, but I haven't heard from anyone who's tried it.)
-
-On 680x0 Macs, Metrowerks defines type "double" as a 10-byte IEEE extended
-float.  jmemmgr.c won't like this: it wants sizeof(ALIGN_TYPE) to be a power
-of 2.  Add "#define ALIGN_TYPE long" to jconfig.h to eliminate the complaint.
-
-The supplied configuration file jconfig.mac can be used for your jconfig.h;
-it includes all the recommended symbol definitions.  If you have AppleScript
-installed, you can run the supplied script makeproj.mac to create CodeWarrior
-project files for the library and the testbed applications, then build the
-library and applications.  (Thanks to Dan Sears and Don Agro for this nifty
-hack, which saves us from trying to maintain CodeWarrior project files as part
-of the IJG distribution...)
-
-
-Macintosh, Think C:
-
-The documentation in Jim Brunner's "JPEG Convert" source code (see above)
-includes detailed build instructions for Think C; it's probably somewhat
-out of date for the current release, but may be helpful.
-
-If you want to build the minimal command line version, proceed as follows.
-You'll have to prepare project files for the programs; we don't include any
-in the distribution since they are not text files.  Use the file lists in
-any of the supplied makefiles as a guide.  Also add the ANSI and Unix C
-libraries in a separate segment.  You may need to divide the JPEG files into
-more than one segment; we recommend dividing compression and decompression
-modules.  Define USE_CCOMMAND in jconfig.h so that the ccommand() routine is
-called.  You must also define TWO_FILE_COMMANDLINE because stdin/stdout
-don't handle binary data correctly.
-
-On 680x0 Macs, Think C defines type "double" as a 12-byte IEEE extended float.
-jmemmgr.c won't like this: it wants sizeof(ALIGN_TYPE) to be a power of 2.
-Add "#define ALIGN_TYPE long" to jconfig.h to eliminate the complaint.
-
-jconfig.mac should work as a jconfig.h configuration file for Think C,
-but the makeproj.mac AppleScript script is specific to CodeWarrior.  Sorry.
-
-
-MIPS R3000:
-
-MIPS's cc version 1.31 has a rather nasty optimization bug.  Don't use -O
-if you have that compiler version.  (Use "cc -V" to check the version.)
-Note that the R3000 chip is found in workstations from DEC and others.
-
-
-MS-DOS, generic comments for 16-bit compilers:
-
-The IJG code is designed to work well in 80x86 "small" or "medium" memory
-models (i.e., data pointers are 16 bits unless explicitly declared "far";
-code pointers can be either size).  You may be able to use small model to
-compile cjpeg or djpeg by itself, but you will probably have to use medium
-model for any larger application.  This won't make much difference in
-performance.  You *will* take a noticeable performance hit if you use a
-large-data memory model, and you should avoid "huge" model if at all
-possible.  Be sure that NEED_FAR_POINTERS is defined in jconfig.h if you use
-a small-data memory model; be sure it is NOT defined if you use a large-data
-model.  (The supplied makefiles and jconfig files for Borland and Microsoft C
-compile in medium model and define NEED_FAR_POINTERS.)
-
-The DOS-specific memory manager, jmemdos.c, should be used if possible.
-It needs some assembly-code routines which are in jmemdosa.asm; make sure
-your makefile assembles that file and includes it in the library.  If you
-don't have a suitable assembler, you can get pre-assembled object files for
-jmemdosa by FTP from ftp.uu.net:/graphics/jpeg/jdosaobj.zip.  (DOS-oriented
-distributions of the IJG source code often include these object files.)
-
-When using jmemdos.c, jconfig.h must define USE_MSDOS_MEMMGR and must set
-MAX_ALLOC_CHUNK to less than 64K (65520L is a typical value).  If your
-C library's far-heap malloc() can't allocate blocks that large, reduce
-MAX_ALLOC_CHUNK to whatever it can handle.
-
-If you can't use jmemdos.c for some reason --- for example, because you
-don't have an assembler to assemble jmemdosa.asm --- you'll have to fall
-back to jmemansi.c or jmemname.c.  You'll probably still need to set
-MAX_ALLOC_CHUNK in jconfig.h, because most DOS C libraries won't malloc()
-more than 64K at a time.  IMPORTANT: if you use jmemansi.c or jmemname.c,
-you will have to compile in a large-data memory model in order to get the
-right stdio library.  Too bad.
-
-wrjpgcom needs to be compiled in large model, because it malloc()s a 64KB
-work area to hold the comment text.  If your C library's malloc can't
-handle that, reduce MAX_COM_LENGTH as necessary in wrjpgcom.c.
-
-Most MS-DOS compilers treat stdin/stdout as text files, so you must use
-two-file command line style.  But if your compiler has either fdopen() or
-setmode(), you can use one-file style if you like.  To do this, define
-USE_SETMODE or USE_FDOPEN so that stdin/stdout will be set to binary mode.
-(USE_SETMODE seems to work with more DOS compilers than USE_FDOPEN.)  You
-should test that I/O through stdin/stdout produces the same results as I/O
-to explicitly named files... the "make test" procedures in the supplied
-makefiles do NOT use stdin/stdout.
-
-
-MS-DOS, generic comments for 32-bit compilers:
-
-None of the above comments about memory models apply if you are using a
-32-bit flat-memory-space environment, such as DJGPP or Watcom C.  (And you
-should use one if you have it, as performance will be much better than
-8086-compatible code!)  For flat-memory-space compilers, do NOT define
-NEED_FAR_POINTERS, and do NOT use jmemdos.c.  Use jmemnobs.c if the
-environment supplies adequate virtual memory, otherwise use jmemansi.c or
-jmemname.c.
-
-You'll still need to be careful about binary I/O through stdin/stdout.
-See the last paragraph of the previous section.
-
-
-MS-DOS, Borland C:
-
-Be sure to convert all the source files to DOS text format (CR/LF newlines).
-Although Borland C will often work OK with unmodified Unix (LF newlines)
-source files, sometimes it will give bogus compile errors.
-"Illegal character '#'" is the most common such error.  (This is true with
-Borland C 3.1, but perhaps is fixed in newer releases.)
-
-If you want one-file command line style, just undefine TWO_FILE_COMMANDLINE.
-jconfig.bcc already includes #define USE_SETMODE to make this work.
-(fdopen does not work correctly.)
-
-
-MS-DOS, Microsoft C:
-
-makefile.mc6 works with Microsoft C, DOS Visual C++, etc.  It should only
-be used if you want to build a 16-bit (small or medium memory model) program.
-
-If you want one-file command line style, just undefine TWO_FILE_COMMANDLINE.
-jconfig.mc6 already includes #define USE_SETMODE to make this work.
-(fdopen does not work correctly.)
-
-Note that this makefile assumes that the working copy of itself is called
-"makefile".  If you want to call it something else, say "makefile.mak",
-be sure to adjust the dependency line that reads "$(RFILE) : makefile".
-Otherwise the make will fail because it doesn't know how to create "makefile".
-Worse, some releases of Microsoft's make utilities give an incorrect error
-message in this situation.
-
-Old versions of MS C fail with an "out of macro expansion space" error
-because they can't cope with the macro TRACEMS8 (defined in jerror.h).
-If this happens to you, the easiest solution is to change TRACEMS8 to
-expand to nothing.  You'll lose the ability to dump out JPEG coefficient
-tables with djpeg -debug -debug, but at least you can compile.
-
-Original MS C 6.0 is very buggy; it compiles incorrect code unless you turn
-off optimization entirely (remove -O from CFLAGS).  6.00A is better, but it
-still generates bad code if you enable loop optimizations (-Ol or -Ox).
-
-MS C 8.0 crashes when compiling jquant1.c with optimization switch /Oo ...
-which is on by default.  To work around this bug, compile that one file
-with /Oo-.
-
-
-Microsoft Windows (all versions), generic comments:
-
-Some Windows system include files define typedef boolean as "unsigned char".
-The IJG code also defines typedef boolean, but we make it "int" by default.
-This doesn't affect the IJG programs because we don't import those Windows
-include files.  But if you use the JPEG library in your own program, and some
-of your program's files import one definition of boolean while some import the
-other, you can get all sorts of mysterious problems.  A good preventive step
-is to make the IJG library use "unsigned char" for boolean.  To do that,
-add something like this to your jconfig.h file:
-	/* Define "boolean" as unsigned char, not int, per Windows custom */
-	#ifndef __RPCNDR_H__	/* don't conflict if rpcndr.h already read */
-	typedef unsigned char boolean;
-	#endif
-	#define HAVE_BOOLEAN	/* prevent jmorecfg.h from redefining it */
-(This is already in jconfig.vc, by the way.)
-
-windef.h contains the declarations
-	#define far
-	#define FAR far
-Since jmorecfg.h tries to define FAR as empty, you may get a compiler
-warning if you include both jpeglib.h and windef.h (which windows.h
-includes).  To suppress the warning, you can put "#ifndef FAR"/"#endif"
-around the line "#define FAR" in jmorecfg.h.
-(Something like this is already in jmorecfg.h, by the way.)
-
-When using the library in a Windows application, you will almost certainly
-want to modify or replace the error handler module jerror.c, since our
-default error handler does a couple of inappropriate things:
-  1. it tries to write error and warning messages on stderr;
-  2. in event of a fatal error, it exits by calling exit().
-
-A simple stopgap solution for problem 1 is to replace the line
-	fprintf(stderr, "%s\n", buffer);
-(in output_message in jerror.c) with
-	MessageBox(GetActiveWindow(),buffer,"JPEG Error",MB_OK|MB_ICONERROR);
-It's highly recommended that you at least do that much, since otherwise
-error messages will disappear into nowhere.  (Beginning with IJG v6b, this
-code is already present in jerror.c; just define USE_WINDOWS_MESSAGEBOX in
-jconfig.h to enable it.)
-
-The proper solution for problem 2 is to return control to your calling
-application after a library error.  This can be done with the setjmp/longjmp
-technique discussed in libjpeg.txt and illustrated in example.c.  (NOTE:
-some older Windows C compilers provide versions of setjmp/longjmp that
-don't actually work under Windows.  You may need to use the Windows system
-functions Catch and Throw instead.)
-
-The recommended memory manager under Windows is jmemnobs.c; in other words,
-let Windows do any virtual memory management needed.  You should NOT use
-jmemdos.c nor jmemdosa.asm under Windows.
-
-For Windows 3.1, we recommend compiling in medium or large memory model;
-for newer Windows versions, use a 32-bit flat memory model.  (See the MS-DOS
-sections above for more info about memory models.)  In the 16-bit memory
-models only, you'll need to put
-	#define MAX_ALLOC_CHUNK 65520L	/* Maximum request to malloc() */
-into jconfig.h to limit allocation chunks to 64Kb.  (Without that, you'd
-have to use huge memory model, which slows things down unnecessarily.)
-jmemnobs.c works without modification in large or flat memory models, but to
-use medium model, you need to modify its jpeg_get_large and jpeg_free_large
-routines to allocate far memory.  In any case, you might like to replace
-its calls to malloc and free with direct calls on Windows memory allocation
-functions.
-
-You may also want to modify jdatasrc.c and jdatadst.c to use Windows file
-operations rather than fread/fwrite.  This is only necessary if your C
-compiler doesn't provide a competent implementation of C stdio functions.
-
-You might want to tweak the RGB_xxx macros in jmorecfg.h so that the library
-will accept or deliver color pixels in BGR sample order, not RGB; BGR order
-is usually more convenient under Windows.  Note that this change will break
-the sample applications cjpeg/djpeg, but the library itself works fine.
-
-
-Many people want to convert the IJG library into a DLL.  This is reasonably
-straightforward, but watch out for the following:
-
-  1. Don't try to compile as a DLL in small or medium memory model; use
-large model, or even better, 32-bit flat model.  Many places in the IJG code
-assume the address of a local variable is an ordinary (not FAR) pointer;
-that isn't true in a medium-model DLL.
-
-  2. Microsoft C cannot pass file pointers between applications and DLLs.
-(See Microsoft Knowledge Base, PSS ID Number Q50336.)  So jdatasrc.c and
-jdatadst.c don't work if you open a file in your application and then pass
-the pointer to the DLL.  One workaround is to make jdatasrc.c/jdatadst.c
-part of your main application rather than part of the DLL.
-
-  3. You'll probably need to modify the macros GLOBAL() and EXTERN() to
-attach suitable linkage keywords to the exported routine names.  Similarly,
-you'll want to modify METHODDEF() and JMETHOD() to ensure function pointers
-are declared in a way that lets application routines be called back through
-the function pointers.  These macros are in jmorecfg.h.  Typical definitions
-for a 16-bit DLL are:
-	#define GLOBAL(type)		type _far _pascal _loadds _export
-	#define EXTERN(type)		extern type _far _pascal _loadds
-	#define METHODDEF(type)		static type _far _pascal
-	#define JMETHOD(type,methodname,arglist)  \
-		type (_far _pascal *methodname) arglist
-For a 32-bit DLL you may want something like
-	#define GLOBAL(type)		__declspec(dllexport) type
-	#define EXTERN(type)		extern __declspec(dllexport) type
-Although not all the GLOBAL routines are actually intended to be called by
-the application, the performance cost of making them all DLL entry points is
-negligible.
-
-The unmodified IJG library presents a very C-specific application interface,
-so the resulting DLL is only usable from C or C++ applications.  There has
-been some talk of writing wrapper code that would present a simpler interface
-usable from other languages, such as Visual Basic.  This is on our to-do list
-but hasn't been very high priority --- any volunteers out there?
-
-
-Microsoft Windows, Borland C:
-
-The provided jconfig.bcc should work OK in a 32-bit Windows environment,
-but you'll need to tweak it in a 16-bit environment (you'd need to define
-NEED_FAR_POINTERS and MAX_ALLOC_CHUNK).  Beware that makefile.bcc will need
-alteration if you want to use it for Windows --- in particular, you should
-use jmemnobs.c not jmemdos.c under Windows.
-
-Borland C++ 4.5 fails with an internal compiler error when trying to compile
-jdmerge.c in 32-bit mode.  If enough people complain, perhaps Borland will fix
-it.  In the meantime, the simplest known workaround is to add a redundant
-definition of the variable range_limit in h2v1_merged_upsample(), at the head
-of the block that handles odd image width (about line 268 in v6 jdmerge.c):
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    register JSAMPLE * range_limit = cinfo->sample_range_limit; /* ADD THIS */
-    cb = GETJSAMPLE(*inptr1);
-Pretty bizarre, especially since the very similar routine h2v2_merged_upsample
-doesn't trigger the bug.
-Recent reports suggest that this bug does not occur with "bcc32a" (the
-Pentium-optimized version of the compiler).
-
-Another report from a user of Borland C 4.5 was that incorrect code (leading
-to a color shift in processed images) was produced if any of the following
-optimization switch combinations were used: 
-	-Ot -Og
-	-Ot -Op
-	-Ot -Om
-So try backing off on optimization if you see such a problem.  (Are there
-several different releases all numbered "4.5"??)
-
-
-Microsoft Windows, Microsoft Visual C++:
-
-jconfig.vc should work OK with any Microsoft compiler for a 32-bit memory
-model.  makefile.vc is intended for command-line use.  (If you are using
-the Developer Studio environment, you may prefer the DevStudio project
-files; see below.)
-
-IJG JPEG 7 adds extern "C" to jpeglib.h.  This avoids the need to put
-extern "C" { ... } around #include "jpeglib.h" in your C++ application.
-You can also force VC++ to treat the library as C++ code by renaming
-all the *.c files to *.cpp (and adjusting the makefile to match).
-In this case you also need to define the symbol DONT_USE_EXTERN_C in
-the configuration to prevent jpeglib.h from using extern "C".
-
-
-Microsoft Windows, Microsoft Visual C++ 6 Developer Studio:
-
-We include makefiles that should work as project files in DevStudio 6.0 or
-later.  There is a library makefile that builds the IJG library as a static
-Win32 library, and application makefiles that build the sample applications
-as Win32 console applications.  (Even if you only want the library, we
-recommend building the applications so that you can run the self-test.)
-
-To use:
-1. Open the command prompt, change to the main directory and execute the
-   command line
-	NMAKE /f makefile.vc  setup-vc6
-   This will move jconfig.vc to jconfig.h and makefiles to project files.
-   (Note that the renaming is critical!)
-2. Open the workspace file jpeg.dsw, build the library project.
-   (If you are using DevStudio more recent than 6.0, you'll probably
-   get a message saying that the project files are being updated.)
-3. Open the workspace file apps.dsw, build the application projects.
-4. To perform the self-test, execute the command line
-	NMAKE /f makefile.vc  test-build
-5. Move the application .exe files from `app`\Release to an
-   appropriate location on your path.
-
-
-Microsoft Windows, Microsoft Visual C++ 2010 Developer Studio (v10):
-
-We include makefiles that should work as project files in Visual Studio
-2010 or later.  There is a library makefile that builds the IJG library
-as a static Win32 library, and application makefiles that build the sample
-applications as Win32 console applications.  (Even if you only want the
-library, we recommend building the applications so that you can run the
-self-test.)
-
-To use:
-1. Open the command prompt, change to the main directory and execute the
-   command line
-	NMAKE /f makefile.vc  setup-v10
-   This will move jconfig.vc to jconfig.h and makefiles to project files.
-   (Note that the renaming is critical!)
-2. Open the solution file jpeg.sln, build the library project.
-   (If you are using Visual Studio more recent than 2010 (v10), you'll
-   probably get a message saying that the project files are being updated.)
-3. Open the solution file apps.sln, build the application projects.
-4. To perform the self-test, execute the command line
-	NMAKE /f makefile.vc  test-build
-5. Move the application .exe files from `app`\Release to an
-   appropriate location on your path.
-
-Note:
-There seems to be an optimization bug in the compiler which causes the
-self-test to fail with the color quantization option.
-We have disabled optimization for the file jquant2.c in the library
-project file which causes the self-test to pass properly.
-
-
-OS/2, Borland C++:
-
-Watch out for optimization bugs in older Borland compilers; you may need
-to back off the optimization switch settings.  See the comments in
-makefile.bcc.
-
-
-SGI:
-
-On some SGI systems, you may need to set "AR2= ar -ts" in the Makefile.
-If you are using configure, you can do this by saying
-	./configure RANLIB='ar -ts'
-This change is not needed on all SGIs.  Use it only if the make fails at the
-stage of linking the completed programs.
-
-On the MIPS R4000 architecture (Indy, etc.), the compiler option "-mips2"
-reportedly speeds up the float DCT method substantially, enough to make it
-faster than the default int method (but still slower than the fast int
-method).  If you use -mips2, you may want to alter the default DCT method to
-be float.  To do this, put "#define JDCT_DEFAULT JDCT_FLOAT" in jconfig.h.
-
-
-VMS:
-
-On an Alpha/VMS system with MMS, be sure to use the "/Marco=Alpha=1"
-qualifier with MMS when building the JPEG package.
-
-VAX/VMS v5.5-1 may have problems with the test step of the build procedure
-reporting differences when it compares the original and test images.  If the
-error points to the last block of the files, it is most likely bogus and may
-be safely ignored.  It seems to be because the files are Stream_LF and
-Backup/Compare has difficulty with the (presumably) null padded files.
-This problem was not observed on VAX/VMS v6.1 or AXP/VMS v6.1.
diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt
index 87f8ec6..19127e3 100644
--- a/java/CMakeLists.txt
+++ b/java/CMakeLists.txt
@@ -9,10 +9,11 @@
   org/libjpegturbo/turbojpeg/TJTransform
   org/libjpegturbo/turbojpeg/TJTransformer
   TJUnitTest
-  TJExample)
+  TJExample
+  TJBench)
 
 if(MSVC_IDE)
-  set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/$(OutDir)")
+  set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}")
 else()
   set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR})
 endif()
diff --git a/java/Makefile.am b/java/Makefile.am
index 4a1b34e..1307d69 100644
--- a/java/Makefile.am
+++ b/java/Makefile.am
@@ -1,15 +1,21 @@
 JAVAROOT = .
 
+org/libjpegturbo/turbojpeg/TJLoader.java: $(srcdir)/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl
+	mkdir -p org/libjpegturbo/turbojpeg; \
+	cat $(srcdir)/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl | \
+		sed s@%{__libdir}@$(libdir)@g > org/libjpegturbo/turbojpeg/TJLoader.java
+
+
 JAVASOURCES = org/libjpegturbo/turbojpeg/TJ.java \
 	org/libjpegturbo/turbojpeg/TJCompressor.java \
 	org/libjpegturbo/turbojpeg/TJCustomFilter.java \
 	org/libjpegturbo/turbojpeg/TJDecompressor.java \
-	org/libjpegturbo/turbojpeg/TJLoader.java \
 	org/libjpegturbo/turbojpeg/TJScalingFactor.java \
 	org/libjpegturbo/turbojpeg/TJTransform.java \
 	org/libjpegturbo/turbojpeg/TJTransformer.java \
 	TJExample.java \
-	TJUnitTest.java
+	TJUnitTest.java \
+	TJBench.java
 
 JNIHEADERS = org_libjpegturbo_turbojpeg_TJ.h \
 	org_libjpegturbo_turbojpeg_TJCompressor.h \
@@ -18,7 +24,7 @@
 
 if WITH_JAVA
 
-dist_noinst_JAVA = ${JAVASOURCES}
+nodist_noinst_JAVA = ${JAVASOURCES} org/libjpegturbo/turbojpeg/TJLoader.java
 
 JAVA_CLASSES = org/libjpegturbo/turbojpeg/TJ.class \
 	org/libjpegturbo/turbojpeg/TJCompressor.class \
@@ -29,7 +35,8 @@
 	org/libjpegturbo/turbojpeg/TJTransform.class \
 	org/libjpegturbo/turbojpeg/TJTransformer.class \
 	TJExample.class \
-	TJUnitTest.class
+	TJUnitTest.class \
+	TJBench.class
 
 all: all-am turbojpeg.jar
 
@@ -40,12 +47,12 @@
 	rm -f turbojpeg.jar
 
 install-exec-local: turbojpeg.jar
-	mkdir -p $(DESTDIR)/$(prefix)/classes
-	$(INSTALL) -m 644 turbojpeg.jar $(DESTDIR)/$(prefix)/classes/
+	mkdir -p $(DESTDIR)/$(datadir)/classes
+	$(INSTALL) -m 644 turbojpeg.jar $(DESTDIR)/$(datadir)/classes/
 
 uninstall-local:
-	rm -f $(DESTDIR)/$(prefix)/classes/turbojpeg.jar
-	if [ -d $(DESTDIR)/$(prefix)/classes ]; then rmdir $(DESTDIR)/$(prefix)/classes; fi
+	rm -f $(DESTDIR)/$(datadir)/classes/turbojpeg.jar
+	if [ -d $(DESTDIR)/$(datadir)/classes ]; then rmdir $(DESTDIR)/$(datadir)/classes; fi
 
 headers: all
 	javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJ; \
@@ -55,9 +62,10 @@
 
 docs: all
 	mkdir -p ${srcdir}/doc; \
-	javadoc -d ${srcdir}/doc -sourcepath ${srcdir} org.libjpegturbo.turbojpeg
+	javadoc -notimestamp -d ${srcdir}/doc -sourcepath ${srcdir} org.libjpegturbo.turbojpeg
 
 endif
 
 EXTRA_DIST = MANIFEST.MF ${JAVASOURCES} ${JNIHEADERS} doc CMakeLists.txt \
+	org/libjpegturbo/turbojpeg/TJLoader.java.tmpl \
 	org/libjpegturbo/turbojpeg/TJLoader.java.in
diff --git a/java/README b/java/README
index 22e0f73..88ddc3b 100644
--- a/java/README
+++ b/java/README
@@ -1,25 +1,25 @@
-TurboJPEG/OSS Java Wrapper
-==========================
+TurboJPEG Java Wrapper
+======================
 
-TurboJPEG/OSS can optionally be built with a Java Native Interface wrapper,
-which allows the TurboJPEG/OSS dynamic library to be loaded and used directly
-from Java applications.  The Java front end for this is defined in several
-classes located under org/libjpegturbo/turbojpeg.  The source code for these
-Java classes is licensed under a BSD-style license, so the files can be
-incorporated directly into both open source and proprietary projects without
-restriction.  A Java archive (JAR) file containing these classes is also
-shipped with the "official" distribution packages of libjpeg-turbo.
+The TurboJPEG shared library can optionally be built with a Java Native
+Interface wrapper, which allows the library to be loaded and used directly from
+Java applications.  The Java front end for this is defined in several classes
+located under org/libjpegturbo/turbojpeg.  The source code for these Java
+classes is licensed under a BSD-style license, so the files can be incorporated
+directly into both open source and proprietary projects without restriction.  A
+Java archive (JAR) file containing these classes is also shipped with the
+"official" distribution packages of libjpeg-turbo.
 
 TJExample.java, which should also be located in the same directory as this
-README file, demonstrates how to use the TurboJPEG/OSS Java front end to
-compress and decompress JPEG images in memory.
+README file, demonstrates how to use the TurboJPEG Java API to compress and
+decompress JPEG images in memory.
 
 
 Performance Pitfalls
 --------------------
 
-The TurboJPEG Java front end defines several convenience methods that can
-allocate image buffers or instantiate classes to hold the result of compress,
+The TurboJPEG Java API defines several convenience methods that can allocate
+image buffers or instantiate classes to hold the result of compress,
 decompress, or transform operations.  However, if you use these methods, then
 be mindful of the amount of new data you are creating on the heap.  It may be
 necessary to manually invoke the garbage collector to prevent heap exhaustion
@@ -27,29 +27,26 @@
 performance, particularly in a multi-threaded environment (Java pauses all
 threads when the GC runs.)
 
-The Java front end always gives you the option of pre-allocating your own
-source and destination buffers, which allows you to re-use these buffers for
+The TurboJPEG Java API always gives you the option of pre-allocating your own
+source and destination buffers, which allows you to re-use those buffers for
 compressing/decompressing multiple images.  If the image sequence you are
 compressing or decompressing consists of images of the same size, then
 pre-allocating the buffers is recommended.
 
 
-Note for OS X users
--------------------
-
-/usr/lib, the directory under which libturbojpeg.dylib is installed on Mac
-systems, is not part of the normal Java library path.  Thus, when running a
-Java application that uses TurboJPEG/OSS on Mac systems, you will need to pass
-an argument of -Djava.library.path=/usr/lib to java.
-
-
-Note for Solaris users
+Installation Directory
 ----------------------
 
-/opt/libjpeg-turbo/lib, the directory under which libturbojpeg.so is installed
-on Solaris systems, is not part of the normal Java library path.  Thus, when
-running a Java application that uses TurboJPEG/OSS on Solaris systems, you will
-need to pass an argument of -Djava.library.path=/opt/libjpeg-turbo/lib to java.
-If using a 64-bit data model, then instead pass an argument of
--Djava.library.path=/opt/libjpeg-turbo/lib/amd64 to use the 64-bit version of
-libturbojpeg.so.
+The TurboJPEG Java Wrapper will look for the TurboJPEG JNI library
+(libturbojpeg.so, libturbojpeg.jnilib, or turbojpeg.dll) in the system library
+paths or in any paths specified in LD_LIBRARY_PATH (Un*x), DYLD_LIBRARY_PATH
+(Mac), or PATH (Windows.)  Failing this, on Un*x and Mac systems, the wrapper
+will look for the JNI library under the library directory configured when
+libjpeg-turbo was built.  If that library directory is
+/opt/libjpeg-turbo/lib32, then /opt/libjpeg-turbo/lib64 is also searched, and
+vice versa.
+
+If you installed the JNI library into another directory, then you will need
+to pass an argument of -Djava.library.path={path_to_JNI_library} to java, or
+manipulate LD_LIBRARY_PATH, DYLD_LIBRARY_PATH, or PATH to include the directory
+containing the JNI library.
diff --git a/java/TJBench.java b/java/TJBench.java
new file mode 100644
index 0000000..36df1ab
--- /dev/null
+++ b/java/TJBench.java
@@ -0,0 +1,874 @@
+/*
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import java.io.*;
+import java.awt.image.*;
+import javax.imageio.*;
+import java.util.*;
+import org.libjpegturbo.turbojpeg.*;
+
+class TJBench {
+
+  static final int YUVENCODE = 1;
+  static final int YUVDECODE = 2;
+
+  static int flags = 0, yuv = 0, quiet = 0, pf = TJ.PF_BGR;
+  static boolean decompOnly, doTile;
+
+  static final String[] pixFormatStr = {
+    "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY"
+  };
+
+  static final String[] subNameLong = {
+    "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0"
+  };
+
+  static final String[] subName = {
+    "444", "422", "420", "GRAY", "440"
+  };
+
+  static TJScalingFactor sf;
+  static int xformOp = TJTransform.OP_NONE, xformOpt = 0;
+  static double benchTime = 5.0;
+
+
+  static final double getTime() {
+    return (double)System.nanoTime() / 1.0e9;
+  }
+
+
+  static String sigFig(double val, int figs) {
+    String format;
+    int digitsAfterDecimal = figs - (int)Math.ceil(Math.log10(Math.abs(val)));
+    if (digitsAfterDecimal < 1)
+      format = new String("%.0f");
+    else
+      format = new String("%." + digitsAfterDecimal + "f");
+    return String.format(format, val);
+  }
+
+
+  static byte[] loadImage(String fileName, int[] w, int[] h, int pixelFormat)
+                          throws Exception {
+    BufferedImage img = ImageIO.read(new File(fileName));
+    if (img == null)
+      throw new Exception("Could not read " + fileName);
+    w[0] = img.getWidth();
+    h[0] = img.getHeight();
+    int[] rgb = img.getRGB(0, 0, w[0], h[0], null, 0, w[0]);
+    int ps = TJ.getPixelSize(pixelFormat);
+    int rindex = TJ.getRedOffset(pixelFormat);
+    int gindex = TJ.getGreenOffset(pixelFormat);
+    int bindex = TJ.getBlueOffset(pixelFormat);
+    byte[] dstBuf = new byte[w[0] * h[0] * ps];
+    int pixels = w[0] * h[0], dstPtr = 0, rgbPtr = 0;
+    while (pixels-- > 0) {
+      dstBuf[dstPtr + rindex] = (byte)((rgb[rgbPtr] >> 16) & 0xff);
+      dstBuf[dstPtr + gindex] = (byte)((rgb[rgbPtr] >> 8) & 0xff);
+      dstBuf[dstPtr + bindex] = (byte)(rgb[rgbPtr] & 0xff);
+      dstPtr += ps;
+      rgbPtr++;
+    }
+    return dstBuf;
+  }
+
+
+  static void saveImage(String fileName, byte[] srcBuf, int w, int h,
+                        int pixelFormat) throws Exception {
+    BufferedImage img = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB);
+    int pixels = w * h, srcPtr = 0;
+    int ps = TJ.getPixelSize(pixelFormat);
+    int rindex = TJ.getRedOffset(pixelFormat);
+    int gindex = TJ.getGreenOffset(pixelFormat);
+    int bindex = TJ.getBlueOffset(pixelFormat);
+    for (int y = 0; y < h; y++) {
+      for (int x = 0; x < w; x++, srcPtr += ps) {
+        int pixel = (srcBuf[srcPtr + rindex] & 0xff) << 16 |
+                    (srcBuf[srcPtr + gindex] & 0xff) << 8 |
+                    (srcBuf[srcPtr + bindex] & 0xff);
+        img.setRGB(x, y, pixel);
+      }
+    }
+    ImageIO.write(img, "bmp", new File(fileName));
+  }
+
+
+  /* Decompression test */
+  static void decompTest(byte[] srcBuf, byte[][] jpegBuf, int[] jpegSize,
+                         byte[] dstBuf, int w, int h, int subsamp,
+                         int jpegQual, String fileName, int tilew, int tileh)
+                         throws Exception {
+    String qualStr = new String(""), sizeStr, tempStr;
+    TJDecompressor tjd;
+    double start, elapsed;
+    int ps = TJ.getPixelSize(pf), i;
+    int yuvSize = TJ.bufSizeYUV(w, h, subsamp), bufsize;
+    int scaledw = (yuv == YUVDECODE) ? w : sf.getScaled(w);
+    int scaledh = (yuv == YUVDECODE) ? h : sf.getScaled(h);
+    int pitch = scaledw * ps;
+
+    if (jpegQual > 0)
+      qualStr = new String("_Q" + jpegQual);
+
+    tjd = new TJDecompressor();
+
+    int bufSize = (yuv == YUVDECODE ? yuvSize : pitch * scaledh);
+    if (dstBuf == null)
+      dstBuf = new byte[bufSize];
+
+    /* Set the destination buffer to gray so we know whether the decompressor
+       attempted to write to it */
+    Arrays.fill(dstBuf, (byte)127);
+
+    /* Execute once to preload cache */
+    tjd.setJPEGImage(jpegBuf[0], jpegSize[0]);
+    if (yuv == YUVDECODE)
+      tjd.decompressToYUV(dstBuf, flags);
+    else
+      tjd.decompress(dstBuf, 0, 0, scaledw, pitch, scaledh, pf, flags);
+
+    /* Benchmark */
+    for (i = 0, start = getTime(); (elapsed = getTime() - start) < benchTime;
+         i++) {
+      int tile = 0;
+      if (yuv == YUVDECODE)
+        tjd.decompressToYUV(dstBuf, flags);
+      else {
+        for (int y = 0; y < h; y += tileh) {
+          for (int x = 0; x < w; x += tilew, tile++) {
+            int width = doTile ? Math.min(tilew, w - x) : scaledw;
+            int height = doTile ? Math.min(tileh, h - y) : scaledh;
+            tjd.setJPEGImage(jpegBuf[tile], jpegSize[tile]);
+            tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags);
+          }
+        }
+      }
+    }
+
+    tjd = null;
+    for (i = 0; i < jpegBuf.length; i++)
+      jpegBuf[i] = null;
+    jpegBuf = null;  jpegSize = null;
+    System.gc();
+
+    if (quiet != 0)
+      System.out.println(
+        sigFig((double)(w * h) / 1000000. * (double)i / elapsed, 4));
+    else {
+      System.out.format("D--> Frame rate:           %f fps\n",
+                        (double)i / elapsed);
+      System.out.format("     Dest. throughput:     %f Megapixels/sec\n",
+                        (double)(w * h) / 1000000. * (double)i / elapsed);
+    }
+
+    if (yuv == YUVDECODE) {
+      tempStr = fileName + "_" + subName[subsamp] + qualStr + ".yuv";
+      FileOutputStream fos = new FileOutputStream(tempStr);
+      fos.write(dstBuf, 0, yuvSize);
+      fos.close();
+    } else {
+      if (sf.getNum() != 1 || sf.getDenom() != 1)
+        sizeStr = new String(sf.getNum() + "_" + sf.getDenom());
+      else if (tilew != w || tileh != h)
+        sizeStr = new String(tilew + "x" + tileh);
+      else
+        sizeStr = new String("full");
+      if (decompOnly)
+        tempStr = new String(fileName + "_" + sizeStr + ".bmp");
+      else
+        tempStr = new String(fileName + "_" + subName[subsamp] + qualStr +
+                             "_" + sizeStr + ".bmp");
+      saveImage(tempStr, dstBuf, scaledw, scaledh, pf);
+      int ndx = tempStr.lastIndexOf('.');
+      tempStr = new String(tempStr.substring(0, ndx) + "-err.bmp");
+      if (srcBuf != null && sf.getNum() == 1 && sf.getDenom() == 1) {
+        if (quiet == 0)
+          System.out.println("Compression error written to " + tempStr + ".");
+        if (subsamp == TJ.SAMP_GRAY) {
+          for (int y = 0, index = 0; y < h; y++, index += pitch) {
+            for (int x = 0, index2 = index; x < w; x++, index2 += ps) {
+              int rindex = index2 + TJ.getRedOffset(pf);
+              int gindex = index2 + TJ.getGreenOffset(pf);
+              int bindex = index2 + TJ.getBlueOffset(pf);
+              int lum = (int)((double)(srcBuf[rindex] & 0xff) * 0.299 +
+                              (double)(srcBuf[gindex] & 0xff) * 0.587 +
+                              (double)(srcBuf[bindex] & 0xff) * 0.114 + 0.5);
+              if (lum > 255) lum = 255;
+              if (lum < 0) lum = 0;
+              dstBuf[rindex] = (byte)Math.abs((dstBuf[rindex] & 0xff) - lum);
+              dstBuf[gindex] = (byte)Math.abs((dstBuf[gindex] & 0xff) - lum);
+              dstBuf[bindex] = (byte)Math.abs((dstBuf[bindex] & 0xff) - lum);
+            }
+          }
+        } else {
+          for (int y = 0; y < h; y++)
+            for (int x = 0; x < w * ps; x++)
+              dstBuf[pitch * y + x] =
+                (byte)Math.abs((dstBuf[pitch * y + x] & 0xff) -
+                               (srcBuf[pitch * y + x] & 0xff));
+        }
+        saveImage(tempStr, dstBuf, w, h, pf);
+      }
+    }
+  }
+
+
+  static void doTestYUV(byte[] srcBuf, int w, int h, int subsamp,
+                        String fileName) throws Exception {
+    TJCompressor tjc;
+    byte[] dstBuf;
+    double start, elapsed;
+    int ps = TJ.getPixelSize(pf), i;
+    int yuvSize = 0;
+
+    yuvSize = TJ.bufSizeYUV(w, h, subsamp);
+    dstBuf = new byte[yuvSize];
+
+    if (quiet == 0)
+      System.out.format(">>>>>  %s (%s) <--> YUV %s  <<<<<\n",
+        pixFormatStr[pf],
+        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down",
+        subNameLong[subsamp]);
+
+    if (quiet == 1)
+      System.out.format("%s\t%s\t%s\tN/A\t", pixFormatStr[pf],
+                        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
+                        subNameLong[subsamp]);
+
+    tjc = new TJCompressor(srcBuf, 0, 0, w, 0, h, pf);
+    tjc.setSubsamp(subsamp);
+
+    /* Execute once to preload cache */
+    tjc.encodeYUV(dstBuf, flags);
+
+    /* Benchmark */
+    for (i = 0, start = getTime();
+         (elapsed = getTime() - start) < benchTime; i++)
+      tjc.encodeYUV(dstBuf, flags);
+
+    if (quiet == 1)
+      System.out.format("%-4d  %-4d\t", w, h);
+    if (quiet != 0) {
+      System.out.format("%s%c%s%c",
+        sigFig((double)(w * h) / 1000000. * (double) i / elapsed, 4),
+        quiet == 2 ? '\n' : '\t',
+        sigFig((double)(w * h * ps) / (double)yuvSize, 4),
+        quiet == 2 ? '\n' : '\t');
+    } else {
+      System.out.format("\n%s size: %d x %d\n", "Image", w, h);
+      System.out.format("C--> Frame rate:           %f fps\n",
+                        (double)i / elapsed);
+      System.out.format("     Output image size:    %d bytes\n", yuvSize);
+      System.out.format("     Compression ratio:    %f:1\n",
+                        (double)(w * h * ps) / (double)yuvSize);
+      System.out.format("     Source throughput:    %f Megapixels/sec\n",
+                        (double)(w * h) / 1000000. * (double)i / elapsed);
+      System.out.format("     Output bit stream:    %f Megabits/sec\n",
+                        (double)yuvSize * 8. / 1000000. * (double)i / elapsed);
+    }
+    String tempStr = fileName + "_" + subName[subsamp] + ".yuv";
+    FileOutputStream fos = new FileOutputStream(tempStr);
+    fos.write(dstBuf, 0, yuvSize);
+    fos.close();
+    if (quiet == 0)
+      System.out.println("Reference image written to " + tempStr);
+  }
+
+
+  static void doTest(byte[] srcBuf, int w, int h, int subsamp, int jpegQual,
+                     String fileName) throws Exception {
+    TJCompressor tjc;
+    byte[] tmpBuf;
+    byte[][] jpegBuf;
+    int[] jpegSize;
+    double start, elapsed;
+    int totalJpegSize = 0, tilew, tileh, i;
+    int ps = TJ.getPixelSize(pf), ntilesw = 1, ntilesh = 1, pitch = w * ps;
+
+    if (yuv == YUVENCODE) {
+      doTestYUV(srcBuf, w, h, subsamp, fileName);
+      return;
+    }
+
+    tmpBuf = new byte[pitch * h];
+
+    if (quiet == 0)
+      System.out.format(">>>>>  %s (%s) <--> JPEG %s Q%d  <<<<<\n",
+        pixFormatStr[pf],
+        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down",
+        subNameLong[subsamp], jpegQual);
+
+    tjc = new TJCompressor();
+
+    for (tilew = doTile ? 8 : w, tileh = doTile ? 8 : h; ;
+         tilew *= 2, tileh *= 2) {
+      if (tilew > w)
+        tilew = w;
+      if (tileh > h)
+        tileh = h;
+      ntilesw = (w + tilew - 1) / tilew;
+      ntilesh = (h + tileh - 1) / tileh;
+
+      jpegBuf = new byte[ntilesw * ntilesh][TJ.bufSize(tilew, tileh, subsamp)];
+      jpegSize = new int[ntilesw * ntilesh];
+
+      /* Compression test */
+      if (quiet == 1)
+        System.out.format("%s\t%s\t%s\t%d\t", pixFormatStr[pf],
+                          (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
+                          subNameLong[subsamp], jpegQual);
+      for (i = 0; i < h; i++)
+        System.arraycopy(srcBuf, w * ps * i, tmpBuf, pitch * i, w * ps);
+      tjc.setSourceImage(srcBuf, 0, 0, tilew, pitch, tileh, pf);
+      tjc.setJPEGQuality(jpegQual);
+      tjc.setSubsamp(subsamp);
+
+      /* Execute once to preload cache */
+      tjc.compress(jpegBuf[0], flags);
+
+      /* Benchmark */
+      for (i = 0, start = getTime();
+           (elapsed = getTime() - start) < benchTime; i++) {
+        int tile = 0;
+        totalJpegSize = 0;
+        for (int y = 0; y < h; y += tileh) {
+          for (int x = 0; x < w; x += tilew, tile++) {
+            int width = Math.min(tilew, w - x);
+            int height = Math.min(tileh, h - y);
+            tjc.setSourceImage(srcBuf, x, y, width, pitch, height, pf);
+            tjc.compress(jpegBuf[tile], flags);
+            jpegSize[tile] = tjc.getCompressedSize();
+            totalJpegSize += jpegSize[tile];
+          }
+        }
+      }
+
+      if (quiet == 1)
+        System.out.format("%-4d  %-4d\t", tilew, tileh);
+      if (quiet != 0) {
+        System.out.format("%s%c%s%c",
+          sigFig((double)(w * h) / 1000000. * (double) i / elapsed, 4),
+          quiet == 2 ? '\n' : '\t',
+          sigFig((double)(w * h * ps) / (double)totalJpegSize, 4),
+          quiet == 2 ? '\n' : '\t');
+      } else {
+        System.out.format("\n%s size: %d x %d\n", doTile ? "Tile" : "Image",
+                          tilew, tileh);
+        System.out.format("C--> Frame rate:           %f fps\n",
+                          (double)i / elapsed);
+        System.out.format("     Output image size:    %d bytes\n",
+                          totalJpegSize);
+        System.out.format("     Compression ratio:    %f:1\n",
+                          (double)(w * h * ps) / (double)totalJpegSize);
+        System.out.format("     Source throughput:    %f Megapixels/sec\n",
+                          (double)(w * h) / 1000000. * (double)i / elapsed);
+        System.out.format("     Output bit stream:    %f Megabits/sec\n",
+          (double)totalJpegSize * 8. / 1000000. * (double)i / elapsed);
+      }
+      if (tilew == w && tileh == h) {
+        String tempStr = fileName + "_" + subName[subsamp] + "_" + "Q" +
+                         jpegQual + ".jpg";
+        FileOutputStream fos = new FileOutputStream(tempStr);
+        fos.write(jpegBuf[0], 0, jpegSize[0]);
+        fos.close();
+        if (quiet == 0)
+          System.out.println("Reference image written to " + tempStr);
+      }
+
+      /* Decompression test */
+      decompTest(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
+                 fileName, tilew, tileh);
+
+      if (tilew == w && tileh == h) break;
+    }
+  }
+
+
+  static void doDecompTest(String fileName) throws Exception {
+    TJTransformer tjt;
+    byte[][] jpegBuf;
+    byte[] srcBuf;
+    int[] jpegSize;
+    int totalJpegSize;
+    int w = 0, h = 0, subsamp = -1, _w, _h, _tilew, _tileh,
+      _ntilesw, _ntilesh, _subsamp, x, y;
+    int ntilesw = 1, ntilesh = 1;
+    double start, elapsed;
+    int ps = TJ.getPixelSize(pf), tile;
+
+    FileInputStream fis = new FileInputStream(fileName);
+    int srcSize = (int)fis.getChannel().size();
+    srcBuf = new byte[srcSize];
+    fis.read(srcBuf, 0, srcSize);
+    fis.close();
+
+    int index = fileName.lastIndexOf('.');
+    if (index >= 0)
+      fileName = new String(fileName.substring(0, index));
+
+    tjt = new TJTransformer();
+
+    tjt.setJPEGImage(srcBuf, srcSize);
+    w = tjt.getWidth();
+    h = tjt.getHeight();
+    subsamp = tjt.getSubsamp();
+
+    if (quiet == 1) {
+      System.out.println("All performance values in Mpixels/sec\n");
+      System.out.format("Bitmap\tBitmap\tJPEG\t%s %s \tXform\tComp\tDecomp\n",
+                        (doTile ? "Tile " : "Image"),
+                        (doTile ? "Tile " : "Image"));
+      System.out.println("Format\tOrder\tSubsamp\tWidth Height\tPerf \tRatio\tPerf\n");
+    } else if (quiet == 0) {
+      System.out.format(">>>>>  JPEG %s --> %s (%s)  <<<<<\n",
+        subNameLong[subsamp], pixFormatStr[pf],
+        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down");
+    }
+
+    for (int tilew = doTile ? 16 : w, tileh = doTile ? 16 : h; ;
+         tilew *= 2, tileh *= 2) {
+      if (tilew > w)
+        tilew = w;
+      if (tileh > h)
+        tileh = h;
+      ntilesw = (w + tilew - 1) / tilew;
+      ntilesh = (h + tileh - 1) / tileh;
+
+      _w = w;  _h = h;  _tilew = tilew;  _tileh = tileh;
+      if (quiet == 0) {
+        System.out.format("\n%s size: %d x %d", (doTile ? "Tile" : "Image"),
+                          _tilew, _tileh);
+        if (sf.getNum() != 1 || sf.getDenom() != 1)
+          System.out.format(" --> %d x %d", sf.getScaled(_w),
+                            sf.getScaled(_h));
+        System.out.println("");
+      } else if (quiet == 1) {
+        System.out.format("%s\t%s\t%s\t", pixFormatStr[pf],
+                          (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
+                          subNameLong[subsamp]);
+        System.out.format("%-4d  %-4d\t", tilew, tileh);
+      }
+
+      _subsamp = subsamp;
+      if (doTile || xformOp != TJTransform.OP_NONE || xformOpt != 0) {
+        if (xformOp == TJTransform.OP_TRANSPOSE ||
+            xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT90 ||
+            xformOp == TJTransform.OP_ROT270) {
+          _w = h;  _h = w;  _tilew = tileh;  _tileh = tilew;
+        }
+
+        if ((xformOpt & TJTransform.OPT_GRAY) != 0)
+          _subsamp = TJ.SAMP_GRAY;
+        if (xformOp == TJTransform.OP_HFLIP ||
+            xformOp == TJTransform.OP_ROT180)
+          _w = _w - (_w % TJ.getMCUWidth(_subsamp));
+        if (xformOp == TJTransform.OP_VFLIP ||
+            xformOp == TJTransform.OP_ROT180)
+          _h = _h - (_h % TJ.getMCUHeight(_subsamp));
+        if (xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT90)
+          _w = _w - (_w % TJ.getMCUHeight(_subsamp));
+        if (xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT270)
+          _h = _h - (_h % TJ.getMCUWidth(_subsamp));
+        _ntilesw = (_w + _tilew - 1) / _tilew;
+        _ntilesh = (_h + _tileh - 1) / _tileh;
+
+        TJTransform[] t = new TJTransform[_ntilesw * _ntilesh];
+        jpegBuf = new byte[_ntilesw * _ntilesh][TJ.bufSize(_tilew, _tileh, subsamp)];
+
+        for (y = 0, tile = 0; y < _h; y += _tileh) {
+          for (x = 0; x < _w; x += _tilew, tile++) {
+            t[tile] = new TJTransform();
+            t[tile].width = Math.min(_tilew, _w - x);
+            t[tile].height = Math.min(_tileh, _h - y);
+            t[tile].x = x;
+            t[tile].y = y;
+            t[tile].op = xformOp;
+            t[tile].options = xformOpt | TJTransform.OPT_TRIM;
+            if ((t[tile].options & TJTransform.OPT_NOOUTPUT) != 0 &&
+                jpegBuf[tile] != null)
+              jpegBuf[tile] = null;
+          }
+        }
+
+        start = getTime();
+        tjt.transform(jpegBuf, t, flags);
+        jpegSize = tjt.getTransformedSizes();
+        elapsed = getTime() - start;
+
+        t = null;
+
+        for (tile = 0, totalJpegSize = 0; tile < _ntilesw * _ntilesh; tile++)
+          totalJpegSize += jpegSize[tile];
+
+        if (quiet != 0) {
+          System.out.format("%s%c%s%c",
+            sigFig((double)(w * h) / 1000000. / elapsed, 4),
+            quiet == 2 ? '\n' : '\t',
+            sigFig((double)(w * h * ps) / (double)totalJpegSize, 4),
+            quiet == 2 ? '\n' : '\t');
+        } else if (quiet == 0) {
+          System.out.format("X--> Frame rate:           %f fps\n",
+                            1.0 / elapsed);
+          System.out.format("     Output image size:    %d bytes\n",
+                            totalJpegSize);
+          System.out.format("     Compression ratio:    %f:1\n",
+                            (double)(w * h * ps) / (double)totalJpegSize);
+          System.out.format("     Source throughput:    %f Megapixels/sec\n",
+                            (double)(w * h) / 1000000. / elapsed);
+          System.out.format("     Output bit stream:    %f Megabits/sec\n",
+                            (double)totalJpegSize * 8. / 1000000. / elapsed);
+        }
+      } else {
+        if (quiet == 1)
+          System.out.print("N/A\tN/A\t");
+        jpegBuf = new byte[1][TJ.bufSize(_tilew, _tileh, subsamp)];
+        jpegSize = new int[1];
+        jpegSize[0] = srcSize;
+        System.arraycopy(srcBuf, 0, jpegBuf[0], 0, srcSize);
+      }
+
+      if (w == tilew)
+        _tilew = _w;
+      if (h == tileh)
+        _tileh = _h;
+      if ((xformOpt & TJTransform.OPT_NOOUTPUT) == 0)
+        decompTest(null, jpegBuf, jpegSize, null, _w, _h, _subsamp, 0,
+                   fileName, _tilew, _tileh);
+      else if (quiet == 1)
+        System.out.println("N/A");
+
+      jpegBuf = null;
+      jpegSize = null;
+
+      if (tilew == w && tileh == h) break;
+    }
+  }
+
+
+  static void usage() throws Exception {
+    int i;
+    TJScalingFactor[] scalingFactors = TJ.getScalingFactors();
+    int nsf = scalingFactors.length;
+    String className = new TJBench().getClass().getName();
+
+    System.out.println("\nUSAGE: java " + className);
+    System.out.println("       <Inputfile (BMP)> <Quality> [options]\n");
+    System.out.println("       java " + className);
+    System.out.println("       <Inputfile (JPG)> [options]\n");
+    System.out.println("Options:\n");
+    System.out.println("-alloc = Dynamically allocate JPEG image buffers");
+    System.out.println("-bottomup = Test bottom-up compression/decompression");
+    System.out.println("-tile = Test performance of the codec when the image is encoded as separate");
+    System.out.println("     tiles of varying sizes.");
+    System.out.println("-forcemmx, -forcesse, -forcesse2, -forcesse3 =");
+    System.out.println("     Force MMX, SSE, SSE2, or SSE3 code paths in the underlying codec");
+    System.out.println("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =");
+    System.out.println("     Test the specified color conversion path in the codec (default: BGR)");
+    System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available in");
+    System.out.println("     the underlying codec");
+    System.out.println("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying");
+    System.out.println("     codec");
+    System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the");
+    System.out.println("     underlying codec");
+    System.out.println("-subsamp <s> = When testing JPEG compression, this option specifies the level");
+    System.out.println("     of chrominance subsampling to use (<s> = 444, 422, 440, 420, or GRAY).");
+    System.out.println("     The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in sequence.");
+    System.out.println("-quiet = Output results in tabular rather than verbose format");
+    System.out.println("-yuvencode = Encode RGB input as planar YUV rather than compressing as JPEG");
+    System.out.println("-yuvdecode = Decode JPEG image to planar YUV rather than RGB");
+    System.out.println("-scale M/N = scale down the width/height of the decompressed JPEG image by a");
+    System.out.print  ("     factor of M/N (M/N = ");
+    for (i = 0; i < nsf; i++) {
+      System.out.format("%d/%d", scalingFactors[i].getNum(),
+                        scalingFactors[i].getDenom());
+      if (nsf == 2 && i != nsf - 1)
+        System.out.print(" or ");
+      else if (nsf > 2) {
+        if (i != nsf - 1)
+          System.out.print(", ");
+        if (i == nsf - 2)
+          System.out.print("or ");
+      }
+      if (i % 8 == 0 && i != 0)
+        System.out.print("\n     ");
+    }
+    System.out.println(")");
+    System.out.println("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =");
+    System.out.println("     Perform the corresponding lossless transform prior to");
+    System.out.println("     decompression (these options are mutually exclusive)");
+    System.out.println("-grayscale = Perform lossless grayscale conversion prior to decompression");
+    System.out.println("     test (can be combined with the other transforms above)");
+    System.out.println("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)\n");
+    System.out.println("NOTE:  If the quality is specified as a range (e.g. 90-100), a separate");
+    System.out.println("test will be performed for all quality values in the range.\n");
+    System.exit(1);
+  }
+
+
+  public static void main(String[] argv) {
+    byte[] srcBuf = null;  int w = 0, h = 0;
+    int minQual = -1, maxQual = -1;
+    int minArg = 1;  int retval = 0;
+    int subsamp = -1;
+
+    try {
+
+      if (argv.length < minArg)
+        usage();
+
+      String tempStr = argv[0].toLowerCase();
+      if (tempStr.endsWith(".jpg") || tempStr.endsWith(".jpeg"))
+        decompOnly = true;
+
+      System.out.println("");
+
+      if (argv.length > minArg) {
+        for (int i = minArg; i < argv.length; i++) {
+          if (argv[i].equalsIgnoreCase("-yuvencode")) {
+            System.out.println("Testing YUV planar encoding\n");
+            yuv = YUVENCODE;  maxQual = minQual = 100;
+          }
+          if (argv[i].equalsIgnoreCase("-yuvdecode")) {
+            System.out.println("Testing YUV planar decoding\n");
+            yuv = YUVDECODE;
+          }
+        }
+      }
+
+      if (!decompOnly && yuv != YUVENCODE) {
+        minArg = 2;
+        if (argv.length < minArg)
+          usage();
+        try {
+          minQual = Integer.parseInt(argv[1]);
+        } catch (NumberFormatException e) {}
+        if (minQual < 1 || minQual > 100)
+          throw new Exception("Quality must be between 1 and 100.");
+        int dashIndex = argv[1].indexOf('-');
+        if (dashIndex > 0 && argv[1].length() > dashIndex + 1) {
+          try {
+            maxQual = Integer.parseInt(argv[1].substring(dashIndex + 1));
+          } catch (NumberFormatException e) {}
+        }
+        if (maxQual < 1 || maxQual > 100)
+          maxQual = minQual;
+      }
+
+      if (argv.length > minArg) {
+        for (int i = minArg; i < argv.length; i++) {
+          if (argv[i].equalsIgnoreCase("-tile")) {
+            doTile = true;  xformOpt |= TJTransform.OPT_CROP;
+          }
+          if (argv[i].equalsIgnoreCase("-forcesse3")) {
+            System.out.println("Forcing SSE3 code\n");
+            flags |= TJ.FLAG_FORCESSE3;
+          }
+          if (argv[i].equalsIgnoreCase("-forcesse2")) {
+            System.out.println("Forcing SSE2 code\n");
+            flags |= TJ.FLAG_FORCESSE2;
+          }
+          if (argv[i].equalsIgnoreCase("-forcesse")) {
+            System.out.println("Forcing SSE code\n");
+            flags |= TJ.FLAG_FORCESSE;
+          }
+          if (argv[i].equalsIgnoreCase("-forcemmx")) {
+            System.out.println("Forcing MMX code\n");
+            flags |= TJ.FLAG_FORCEMMX;
+          }
+          if (argv[i].equalsIgnoreCase("-fastupsample")) {
+            System.out.println("Using fast upsampling code\n");
+            flags |= TJ.FLAG_FASTUPSAMPLE;
+          }
+          if (argv[i].equalsIgnoreCase("-fastdct")) {
+            System.out.println("Using fastest DCT/IDCT algorithm\n");
+            flags |= TJ.FLAG_FASTDCT;
+          }
+          if (argv[i].equalsIgnoreCase("-accuratedct")) {
+            System.out.println("Using most accurate DCT/IDCT algorithm\n");
+            flags |= TJ.FLAG_ACCURATEDCT;
+          }
+          if (argv[i].equalsIgnoreCase("-rgb"))
+            pf = TJ.PF_RGB;
+          if (argv[i].equalsIgnoreCase("-rgbx"))
+            pf = TJ.PF_RGBX;
+          if (argv[i].equalsIgnoreCase("-bgr"))
+            pf = TJ.PF_BGR;
+          if (argv[i].equalsIgnoreCase("-bgrx"))
+            pf = TJ.PF_BGRX;
+          if (argv[i].equalsIgnoreCase("-xbgr"))
+            pf = TJ.PF_XBGR;
+          if (argv[i].equalsIgnoreCase("-xrgb"))
+            pf = TJ.PF_XRGB;
+          if (argv[i].equalsIgnoreCase("-bottomup"))
+            flags |= TJ.FLAG_BOTTOMUP;
+          if (argv[i].equalsIgnoreCase("-quiet"))
+            quiet = 1;
+          if (argv[i].equalsIgnoreCase("-qq"))
+            quiet = 2;
+          if (argv[i].equalsIgnoreCase("-scale") && i < argv.length - 1) {
+            int temp1 = 0, temp2 = 0;
+            boolean match = false, scanned = true;
+            Scanner scanner = new Scanner(argv[++i]).useDelimiter("/");
+            try {
+              temp1 = scanner.nextInt();
+              temp2 = scanner.nextInt();
+            } catch(Exception e) {}
+            if (temp2 <= 0) temp2 = 1;
+            if (temp1 > 0) {
+              TJScalingFactor[] scalingFactors = TJ.getScalingFactors();
+              for (int j = 0; j < scalingFactors.length; j++) {
+                if ((double)temp1 / (double)temp2 ==
+                    (double)scalingFactors[j].getNum() /
+                    (double)scalingFactors[j].getDenom()) {
+                  sf = scalingFactors[j];
+                  match = true;   break;
+                }
+              }
+              if (!match) usage();
+            } else
+              usage();
+          }
+          if (argv[i].equalsIgnoreCase("-hflip"))
+            xformOp = TJTransform.OP_HFLIP;
+          if (argv[i].equalsIgnoreCase("-vflip"))
+            xformOp = TJTransform.OP_VFLIP;
+          if (argv[i].equalsIgnoreCase("-transpose"))
+            xformOp = TJTransform.OP_TRANSPOSE;
+          if (argv[i].equalsIgnoreCase("-transverse"))
+            xformOp = TJTransform.OP_TRANSVERSE;
+          if (argv[i].equalsIgnoreCase("-rot90"))
+            xformOp = TJTransform.OP_ROT90;
+          if (argv[i].equalsIgnoreCase("-rot180"))
+            xformOp = TJTransform.OP_ROT180;
+          if (argv[i].equalsIgnoreCase("-rot270"))
+            xformOp = TJTransform.OP_ROT270;
+          if (argv[i].equalsIgnoreCase("-grayscale"))
+            xformOpt |= TJTransform.OPT_GRAY;
+          if (argv[i].equalsIgnoreCase("-nooutput"))
+            xformOpt |= TJTransform.OPT_NOOUTPUT;
+          if (argv[i].equalsIgnoreCase("-benchtime") && i < argv.length - 1) {
+            double temp = -1;
+            try {
+              temp = Double.parseDouble(argv[++i]);
+            } catch (NumberFormatException e) {}
+            if (temp > 0.0)
+              benchTime = temp;
+            else
+              usage();
+          }
+          if (argv[i].equalsIgnoreCase("-subsamp") && i < argv.length - 1) {
+            i++;
+            if (argv[i].toUpperCase().startsWith("G"))
+              subsamp = TJ.SAMP_GRAY;
+            else if (argv[i].equals("444"))
+              subsamp = TJ.SAMP_444;
+            else if (argv[i].equals("422"))
+              subsamp = TJ.SAMP_422;
+            else if (argv[i].equals("440"))
+              subsamp = TJ.SAMP_440;
+            else if (argv[i].equals("420"))
+              subsamp = TJ.SAMP_420;
+          }
+          if (argv[i].equalsIgnoreCase("-?"))
+            usage();
+        }
+      }
+
+      if (sf == null)
+        sf = new TJScalingFactor(1, 1);
+
+      if ((sf.getNum() != 1 || sf.getDenom() != 1) && doTile) {
+        System.out.println("Disabling tiled compression/decompression tests, because those tests do not");
+        System.out.println("work when scaled decompression is enabled.");
+        doTile = false;
+      }
+
+      if (yuv != 0 && doTile) {
+        System.out.println("Disabling tiled compression/decompression tests, because those tests do not");
+        System.out.println("work when YUV encoding or decoding is enabled.\n");
+        doTile = false;
+      }
+
+      if (!decompOnly) {
+        int[] width = new int[1], height = new int[1];
+        srcBuf = loadImage(argv[0], width, height, pf);
+        w = width[0];  h = height[0];
+        int index = -1;
+        if ((index = argv[0].lastIndexOf('.')) >= 0)
+          argv[0] = argv[0].substring(0, index);
+      }
+
+      if (quiet == 1 && !decompOnly) {
+        System.out.println("All performance values in Mpixels/sec\n");
+        System.out.format("Bitmap\tBitmap\tJPEG\tJPEG\t%s %s \tComp\tComp\tDecomp\n",
+          (doTile ? "Tile " : "Image"), (doTile ? "Tile " : "Image"));
+        System.out.println("Format\tOrder\tSubsamp\tQual\tWidth Height\tPerf \tRatio\tPerf\n");
+      }
+
+      if (decompOnly) {
+        doDecompTest(argv[0]);
+        System.out.println("");
+        System.exit(retval);
+      }
+
+      System.gc();
+      if (subsamp >= 0 && subsamp < TJ.NUMSAMP) {
+        for (int i = maxQual; i >= minQual; i--)
+          doTest(srcBuf, w, h, subsamp, i, argv[0]);
+        System.out.println("");
+      } else {
+        for (int i = maxQual; i >= minQual; i--)
+          doTest(srcBuf, w, h, TJ.SAMP_GRAY, i, argv[0]);
+        System.out.println("");
+        System.gc();
+        for (int i = maxQual; i >= minQual; i--)
+          doTest(srcBuf, w, h, TJ.SAMP_420, i, argv[0]);
+        System.out.println("");
+        System.gc();
+        for (int i = maxQual; i >= minQual; i--)
+          doTest(srcBuf, w, h, TJ.SAMP_422, i, argv[0]);
+        System.out.println("");
+        System.gc();
+        for (int i = maxQual; i >= minQual; i--)
+          doTest(srcBuf, w, h, TJ.SAMP_444, i, argv[0]);
+        System.out.println("");
+      }
+
+    } catch (Exception e) {
+      System.out.println("ERROR: " + e.getMessage());
+      e.printStackTrace();
+      retval = -1;
+    }
+
+    System.exit(retval);
+  }
+
+}
diff --git a/java/TJExample.java b/java/TJExample.java
index e726892..1ae350a 100644
--- a/java/TJExample.java
+++ b/java/TJExample.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011-2012 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011-2012, 2014 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -51,12 +51,15 @@
     System.out.println("Options:\n");
     System.out.println("-scale M/N = if the input image is a JPEG file, scale the width/height of the");
     System.out.print("             output image by a factor of M/N (M/N = ");
-    for(int i = 0; i < sf.length; i++) {
+    for (int i = 0; i < sf.length; i++) {
       System.out.print(sf[i].getNum() + "/" + sf[i].getDenom());
-      if(sf.length == 2 && i != sf.length - 1) System.out.print(" or ");
-      else if(sf.length > 2) {
-        if(i != sf.length - 1) System.out.print(", ");
-        if(i == sf.length - 2) System.out.print("or ");
+      if (sf.length == 2 && i != sf.length - 1)
+        System.out.print(" or ");
+      else if (sf.length > 2) {
+        if (i != sf.length - 1)
+          System.out.print(", ");
+        if (i == sf.length - 2)
+          System.out.print("or ");
       }
     }
     System.out.println(")\n");
@@ -90,13 +93,14 @@
     System.exit(1);
   }
 
-  private final static String sampName[] = {
+  private static final String[] sampName = {
     "4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0"
   };
 
-  public static void main(String argv[]) {
+  public static void main(String[] argv) {
 
-    BufferedImage img = null;  byte[] bmpBuf = null;
+    BufferedImage img = null;
+    byte[] bmpBuf = null;
     TJTransform xform = new TJTransform();
     int flags = 0;
 
@@ -104,7 +108,7 @@
 
       sf = TJ.getScalingFactors();
 
-      if(argv.length < 2) {
+      if (argv.length < 2) {
         usage();
       }
 
@@ -113,119 +117,134 @@
       int outSubsamp = -1, outQual = 95;
       boolean display = false;
 
-      if(argv.length > 1) {
-        for(int i = 1; i < argv.length; i++) {
-          if(argv[i].length() < 2) continue;
-          if(argv[i].length() > 2
-            && argv[i].substring(0, 3).equalsIgnoreCase("-sc")) {
+      if (argv.length > 1) {
+        for (int i = 1; i < argv.length; i++) {
+          if (argv[i].length() < 2)
+            continue;
+          if (argv[i].length() > 2 &&
+              argv[i].substring(0, 3).equalsIgnoreCase("-sc")) {
             int match = 0;
-            if(i < argv.length - 1) {
+            if (i < argv.length - 1) {
               String[] scaleArg = argv[++i].split("/");
-              if(scaleArg.length == 2) {
+              if (scaleArg.length == 2) {
                 TJScalingFactor tempsf =
                   new TJScalingFactor(Integer.parseInt(scaleArg[0]),
-                    Integer.parseInt(scaleArg[1]));
-                for(int j = 0; j < sf.length; j++) {
-                  if(tempsf.equals(sf[j])) {
+                                      Integer.parseInt(scaleArg[1]));
+                for (int j = 0; j < sf.length; j++) {
+                  if (tempsf.equals(sf[j])) {
                     scaleFactor = sf[j];
-                    match = 1;  break;
+                    match = 1;
+                    break;
                   }
                 }
               }
             }
-            if(match != 1) usage();
+            if (match != 1) usage();
           }
-          if(argv[i].equalsIgnoreCase("-h") || argv[i].equalsIgnoreCase("-?"))
+          if (argv[i].equalsIgnoreCase("-h") || argv[i].equalsIgnoreCase("-?"))
             usage();
-          if(argv[i].length() > 2
-            && argv[i].substring(0, 3).equalsIgnoreCase("-sa")) {
-            if(i < argv.length - 1) {
+          if (argv[i].length() > 2 &&
+              argv[i].substring(0, 3).equalsIgnoreCase("-sa")) {
+            if (i < argv.length - 1) {
               i++;
-              if(argv[i].substring(0, 1).equalsIgnoreCase("g"))
+              if (argv[i].substring(0, 1).equalsIgnoreCase("g"))
                 outSubsamp = TJ.SAMP_GRAY;
-              else if(argv[i].equals("444")) outSubsamp = TJ.SAMP_444;
-              else if(argv[i].equals("422")) outSubsamp = TJ.SAMP_422;
-              else if(argv[i].equals("420")) outSubsamp = TJ.SAMP_420;
-              else usage();
-            }
-            else usage();
+              else if (argv[i].equals("444"))
+                outSubsamp = TJ.SAMP_444;
+              else if (argv[i].equals("422"))
+                outSubsamp = TJ.SAMP_422;
+              else if (argv[i].equals("420"))
+                outSubsamp = TJ.SAMP_420;
+              else
+                usage();
+            } else
+              usage();
           }
-          if(argv[i].substring(0, 2).equalsIgnoreCase("-q")) {
-            if(i < argv.length - 1) {
+          if (argv[i].substring(0, 2).equalsIgnoreCase("-q")) {
+            if (i < argv.length - 1) {
               int qual = Integer.parseInt(argv[++i]);
-              if(qual >= 1 && qual <= 100) outQual = qual;
-              else usage();
-            }
-            else usage();
+              if (qual >= 1 && qual <= 100)
+                outQual = qual;
+              else
+                usage();
+            } else
+              usage();
           }
-          if(argv[i].substring(0, 2).equalsIgnoreCase("-g"))
+          if (argv[i].substring(0, 2).equalsIgnoreCase("-g"))
             xform.options |= TJTransform.OPT_GRAY;
-          if(argv[i].equalsIgnoreCase("-hflip"))
+          if (argv[i].equalsIgnoreCase("-hflip"))
             xform.op = TJTransform.OP_HFLIP;
-          if(argv[i].equalsIgnoreCase("-vflip"))
+          if (argv[i].equalsIgnoreCase("-vflip"))
             xform.op = TJTransform.OP_VFLIP;
-          if(argv[i].equalsIgnoreCase("-transpose"))
+          if (argv[i].equalsIgnoreCase("-transpose"))
             xform.op = TJTransform.OP_TRANSPOSE;
-          if(argv[i].equalsIgnoreCase("-transverse"))
+          if (argv[i].equalsIgnoreCase("-transverse"))
             xform.op = TJTransform.OP_TRANSVERSE;
-          if(argv[i].equalsIgnoreCase("-rot90"))
+          if (argv[i].equalsIgnoreCase("-rot90"))
             xform.op = TJTransform.OP_ROT90;
-          if(argv[i].equalsIgnoreCase("-rot180"))
+          if (argv[i].equalsIgnoreCase("-rot180"))
             xform.op = TJTransform.OP_ROT180;
-          if(argv[i].equalsIgnoreCase("-rot270"))
+          if (argv[i].equalsIgnoreCase("-rot270"))
             xform.op = TJTransform.OP_ROT270;
-          if(argv[i].equalsIgnoreCase("-custom"))
+          if (argv[i].equalsIgnoreCase("-custom"))
             xform.cf = new TJExample();
-          else if(argv[i].length() > 2
-            && argv[i].substring(0, 2).equalsIgnoreCase("-c")) {
-            if(i >= argv.length - 1) usage();
+          else if (argv[i].length() > 2 &&
+                   argv[i].substring(0, 2).equalsIgnoreCase("-c")) {
+            if (i >= argv.length - 1)
+              usage();
             String[] cropArg = argv[++i].split(",");
-            if(cropArg.length != 3) usage();
+            if (cropArg.length != 3)
+              usage();
             String[] dimArg = cropArg[2].split("[xX]");
-            if(dimArg.length != 2) usage();
+            if (dimArg.length != 2)
+              usage();
             int tempx = Integer.parseInt(cropArg[0]);
             int tempy = Integer.parseInt(cropArg[1]);
             int tempw = Integer.parseInt(dimArg[0]);
             int temph = Integer.parseInt(dimArg[1]);
-            if(tempx < 0 || tempy < 0 || tempw < 0 || temph < 0) usage();
-            xform.x = tempx;  xform.y = tempy;
-            xform.width = tempw;  xform.height = temph;
+            if (tempx < 0 || tempy < 0 || tempw < 0 || temph < 0)
+              usage();
+            xform.x = tempx;
+            xform.y = tempy;
+            xform.width = tempw;
+            xform.height = temph;
             xform.options |= TJTransform.OPT_CROP;
           }
-          if(argv[i].substring(0, 2).equalsIgnoreCase("-d"))
+          if (argv[i].substring(0, 2).equalsIgnoreCase("-d"))
             display = true;
-          if(argv[i].equalsIgnoreCase("-fastupsample")) {
+          if (argv[i].equalsIgnoreCase("-fastupsample")) {
             System.out.println("Using fast upsampling code");
             flags |= TJ.FLAG_FASTUPSAMPLE;
           }
-          if(argv[i].equalsIgnoreCase("-fastdct")) {
+          if (argv[i].equalsIgnoreCase("-fastdct")) {
             System.out.println("Using fastest DCT/IDCT algorithm");
             flags |= TJ.FLAG_FASTDCT;
           }
-          if(argv[i].equalsIgnoreCase("-accuratedct")) {
+          if (argv[i].equalsIgnoreCase("-accuratedct")) {
             System.out.println("Using most accurate DCT/IDCT algorithm");
             flags |= TJ.FLAG_ACCURATEDCT;
           }
         }
       }
       String[] inFileTokens = argv[0].split("\\.");
-      if(inFileTokens.length > 1)
+      if (inFileTokens.length > 1)
         inFormat = inFileTokens[inFileTokens.length - 1];
       String[] outFileTokens;
-      if(display) outFormat = "bmp";
+      if (display)
+        outFormat = "bmp";
       else {
         outFileTokens = argv[1].split("\\.");
-        if(outFileTokens.length > 1)
+        if (outFileTokens.length > 1)
           outFormat = outFileTokens[outFileTokens.length - 1];
       }
 
       File file = new File(argv[0]);
       int width, height;
 
-      if(inFormat.equalsIgnoreCase("jpg")) {
+      if (inFormat.equalsIgnoreCase("jpg")) {
         FileInputStream fis = new FileInputStream(file);
         int inputSize = fis.available();
-        if(inputSize < 1) {
+        if (inputSize < 1) {
           System.out.println("Input file contains no data");
           System.exit(1);
         }
@@ -234,27 +253,28 @@
         fis.close();
 
         TJDecompressor tjd;
-        if(xform.op != TJTransform.OP_NONE || xform.options != 0
-          || xform.cf != null) {
+        if (xform.op != TJTransform.OP_NONE || xform.options != 0 ||
+            xform.cf != null) {
           TJTransformer tjt = new TJTransformer(inputBuf);
-          TJTransform t[] = new TJTransform[1];
+          TJTransform[] t = new TJTransform[1];
           t[0] = xform;
           t[0].options |= TJTransform.OPT_TRIM;
           TJDecompressor[] tjdx = tjt.transform(t, 0);
           tjd = tjdx[0];
-        }
-        else tjd = new TJDecompressor(inputBuf);
+        } else
+          tjd = new TJDecompressor(inputBuf);
 
         width = tjd.getWidth();
         height = tjd.getHeight();
         int inSubsamp = tjd.getSubsamp();
-        System.out.println("Source Image: " + width + " x " + height
-          + " pixels, " + sampName[inSubsamp] + " subsampling");
-        if(outSubsamp < 0) outSubsamp = inSubsamp;
+        System.out.println("Source Image: " + width + " x " + height +
+                           " pixels, " + sampName[inSubsamp] + " subsampling");
+        if (outSubsamp < 0)
+          outSubsamp = inSubsamp;
 
-        if(outFormat.equalsIgnoreCase("jpg")
-          && (xform.op != TJTransform.OP_NONE || xform.options != 0)
-          && scaleFactor.isOne()) {
+        if (outFormat.equalsIgnoreCase("jpg") &&
+            (xform.op != TJTransform.OP_NONE || xform.options != 0) &&
+            scaleFactor.isOne()) {
           file = new File(argv[1]);
           FileOutputStream fos = new FileOutputStream(file);
           fos.write(tjd.getJPEGBuf(), 0, tjd.getJPEGSize());
@@ -265,46 +285,48 @@
         width = scaleFactor.getScaled(width);
         height = scaleFactor.getScaled(height);
 
-        if(!outFormat.equalsIgnoreCase("jpg"))
+        if (!outFormat.equalsIgnoreCase("jpg"))
           img = tjd.decompress(width, height, BufferedImage.TYPE_INT_RGB,
                                flags);
-        else bmpBuf = tjd.decompress(width, 0, height, TJ.PF_BGRX, flags);
+        else
+          bmpBuf = tjd.decompress(width, 0, height, TJ.PF_BGRX, flags);
         tjd.close();
-      }
-      else {
+      } else {
         img = ImageIO.read(file);
+        if (img == null)
+          throw new Exception("Input image type not supported.");
         width = img.getWidth();
         height = img.getHeight();
-        if(outSubsamp < 0) {
-          if(img.getType() == BufferedImage.TYPE_BYTE_GRAY)
+        if (outSubsamp < 0) {
+          if (img.getType() == BufferedImage.TYPE_BYTE_GRAY)
             outSubsamp = TJ.SAMP_GRAY;
-          else outSubsamp = TJ.SAMP_444;
+          else
+            outSubsamp = TJ.SAMP_444;
         }
       }
       System.gc();
-      if(!display)
-        System.out.print("Dest. Image (" + outFormat + "):  " + width + " x "
-          + height + " pixels");
+      if (!display)
+        System.out.print("Dest. Image (" + outFormat + "):  " + width + " x " +
+                         height + " pixels");
 
-      if(display) {
+      if (display) {
         ImageIcon icon = new ImageIcon(img);
         JLabel label = new JLabel(icon, JLabel.CENTER);
         JOptionPane.showMessageDialog(null, label, "Output Image",
-          JOptionPane.PLAIN_MESSAGE);
-      }
-      else if(outFormat.equalsIgnoreCase("jpg")) {
-        System.out.println(", " + sampName[outSubsamp]
-          + " subsampling, quality = " + outQual);
+                                      JOptionPane.PLAIN_MESSAGE);
+      } else if (outFormat.equalsIgnoreCase("jpg")) {
+        System.out.println(", " + sampName[outSubsamp] +
+                           " subsampling, quality = " + outQual);
         TJCompressor tjc = new TJCompressor();
         int jpegSize;
         byte[] jpegBuf;
 
         tjc.setSubsamp(outSubsamp);
         tjc.setJPEGQuality(outQual);
-        if(img != null)
+        if (img != null)
           jpegBuf = tjc.compress(img, flags);
         else {
-          tjc.setSourceImage(bmpBuf, width, 0, height, TJ.PF_BGRX);
+          tjc.setSourceImage(bmpBuf, 0, 0, width, 0, height, TJ.PF_BGRX);
           jpegBuf = tjc.compress(flags);
         }
         jpegSize = tjc.getCompressedSize();
@@ -314,27 +336,26 @@
         FileOutputStream fos = new FileOutputStream(file);
         fos.write(jpegBuf, 0, jpegSize);
         fos.close();
-      }
-      else {
+      } else {
         System.out.print("\n");
         file = new File(argv[1]);
         ImageIO.write(img, outFormat, file);
       }
 
-    }
-    catch(Exception e) {
+    } catch(Exception e) {
       e.printStackTrace();
       System.exit(-1);
     }
   }
 
   public void customFilter(ShortBuffer coeffBuffer, Rectangle bufferRegion,
-    Rectangle planeRegion, int componentIndex, int transformIndex,
-    TJTransform transform) throws Exception {
-    for(int i=0; i<bufferRegion.width*bufferRegion.height; i++) {
-	    coeffBuffer.put(i, (short)(-coeffBuffer.get(i)));
+                           Rectangle planeRegion, int componentIndex,
+                           int transformIndex, TJTransform transform)
+                           throws Exception {
+    for (int i = 0; i < bufferRegion.width * bufferRegion.height; i++) {
+      coeffBuffer.put(i, (short)(-coeffBuffer.get(i)));
     }
   }
 
-  static TJScalingFactor sf [] = null;
+  static TJScalingFactor[] sf = null;
 };
diff --git a/java/TJUnitTest.java b/java/TJUnitTest.java
index eaebf20..ac91a3d 100644
--- a/java/TJUnitTest.java
+++ b/java/TJUnitTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011-2012, 2014 D. R. Commander.  
+ * Copyright (C)2011-2012, 2014-2015 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -50,48 +50,48 @@
     System.exit(1);
   }
 
-  private final static String subNameLong[] = {
+  private static final String[] subNameLong = {
     "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0"
   };
-  private final static String subName[] = {
+  private static final String[] subName = {
     "444", "422", "420", "GRAY", "440"
   };
 
-  private final static String pixFormatStr[] = {
+  private static final String[] pixFormatStr = {
     "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale",
     "RGBA", "BGRA", "ABGR", "ARGB"
   };
 
-  private final static int alphaOffset[] = {
+  private static final int[] alphaOffset = {
     -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0
   };
 
-  private final static int _3byteFormats[] = {
+  private static final int[] _3byteFormats = {
     TJ.PF_RGB, TJ.PF_BGR
   };
-  private final static int _3byteFormatsBI[] = {
+  private static final int[] _3byteFormatsBI = {
     BufferedImage.TYPE_3BYTE_BGR
   };
-  private final static int _4byteFormats[] = {
+  private static final int[] _4byteFormats = {
     TJ.PF_RGBX, TJ.PF_BGRX, TJ.PF_XBGR, TJ.PF_XRGB
   };
-  private final static int _4byteFormatsBI[] = {
+  private static final int[] _4byteFormatsBI = {
     BufferedImage.TYPE_INT_BGR, BufferedImage.TYPE_INT_RGB,
     BufferedImage.TYPE_4BYTE_ABGR, BufferedImage.TYPE_4BYTE_ABGR_PRE,
     BufferedImage.TYPE_INT_ARGB, BufferedImage.TYPE_INT_ARGB_PRE
   };
-  private final static int onlyGray[] = {
+  private static final int[] onlyGray = {
     TJ.PF_GRAY
   };
-  private final static int onlyGrayBI[] = {
+  private static final int[] onlyGrayBI = {
     BufferedImage.TYPE_BYTE_GRAY
   };
-  private final static int onlyRGB[] = {
+  private static final int[] onlyRGB = {
     TJ.PF_RGB
   };
 
-  private final static int YUVENCODE = 1;
-  private final static int YUVDECODE = 2;
+  private static final int YUVENCODE = 1;
+  private static final int YUVDECODE = 2;
   private static int yuv = 0;
   private static boolean bi = false;
 
@@ -104,22 +104,22 @@
         return TJ.PF_BGR;
       case BufferedImage.TYPE_4BYTE_ABGR:
       case BufferedImage.TYPE_4BYTE_ABGR_PRE:
-        return TJ.PF_XBGR;
+        return TJ.PF_ABGR;
       case BufferedImage.TYPE_BYTE_GRAY:
         return TJ.PF_GRAY;
       case BufferedImage.TYPE_INT_BGR:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           return TJ.PF_XBGR;
         else
           return TJ.PF_RGBX;
       case BufferedImage.TYPE_INT_RGB:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           return TJ.PF_XRGB;
         else
           return TJ.PF_BGRX;
       case BufferedImage.TYPE_INT_ARGB:
       case BufferedImage.TYPE_INT_ARGB_PRE:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           return TJ.PF_ARGB;
         else
           return TJ.PF_BGRA;
@@ -148,14 +148,13 @@
     }
     return "Unknown";
   }
-  
 
   private static double getTime() {
     return (double)System.nanoTime() / 1.0e9;
   }
 
   private static void initBuf(byte[] buf, int w, int pitch, int h, int pf,
-    int flags) throws Exception {
+                              int flags) throws Exception {
     int roffset = TJ.getRedOffset(pf);
     int goffset = TJ.getGreenOffset(pf);
     int boffset = TJ.getBlueOffset(pf);
@@ -164,42 +163,46 @@
     int index, row, col, halfway = 16;
 
     Arrays.fill(buf, (byte)0);
-    if(pf == TJ.PF_GRAY) {
-      for(row = 0; row < h; row++) {
-        for(col = 0; col < w; col++) {
-          if((flags & TJ.FLAG_BOTTOMUP) != 0)
+    if (pf == TJ.PF_GRAY) {
+      for (row = 0; row < h; row++) {
+        for (col = 0; col < w; col++) {
+          if ((flags & TJ.FLAG_BOTTOMUP) != 0)
             index = pitch * (h - row - 1) + col;
-          else index = pitch * row + col;
-          if(((row / 8) + (col / 8)) % 2 == 0)
+          else
+            index = pitch * row + col;
+          if (((row / 8) + (col / 8)) % 2 == 0)
             buf[index] = (row < halfway) ? (byte)255 : 0;
-          else buf[index] = (row < halfway) ? 76 : (byte)226;
+          else
+            buf[index] = (row < halfway) ? 76 : (byte)226;
         }
       }
       return;
     }
-    for(row = 0; row < h; row++) {
-      for(col = 0; col < w; col++) {
-        if((flags & TJ.FLAG_BOTTOMUP) != 0)
+    for (row = 0; row < h; row++) {
+      for (col = 0; col < w; col++) {
+        if ((flags & TJ.FLAG_BOTTOMUP) != 0)
           index = pitch * (h - row - 1) + col * ps;
-        else index = pitch * row + col * ps;
-        if(((row / 8) + (col / 8)) % 2 == 0) {
-          if(row < halfway) {
+        else
+          index = pitch * row + col * ps;
+        if (((row / 8) + (col / 8)) % 2 == 0) {
+          if (row < halfway) {
             buf[index + roffset] = (byte)255;
             buf[index + goffset] = (byte)255;
             buf[index + boffset] = (byte)255;
           }
-        }
-        else {
+        } else {
           buf[index + roffset] = (byte)255;
-          if(row >= halfway) buf[index + goffset] = (byte)255;
+          if (row >= halfway)
+            buf[index + goffset] = (byte)255;
         }
-        if (aoffset >= 0) buf[index + aoffset] = (byte)255;
+        if (aoffset >= 0)
+          buf[index + aoffset] = (byte)255;
       }
     }
   }
 
   private static void initIntBuf(int[] buf, int w, int pitch, int h, int pf,
-    int flags) throws Exception {
+                                 int flags) throws Exception {
     int rshift = TJ.getRedOffset(pf) * 8;
     int gshift = TJ.getGreenOffset(pf) * 8;
     int bshift = TJ.getBlueOffset(pf) * 8;
@@ -207,43 +210,44 @@
     int index, row, col, halfway = 16;
 
     Arrays.fill(buf, 0);
-    for(row = 0; row < h; row++) {
-      for(col = 0; col < w; col++) {
-        if((flags & TJ.FLAG_BOTTOMUP) != 0)
+    for (row = 0; row < h; row++) {
+      for (col = 0; col < w; col++) {
+        if ((flags & TJ.FLAG_BOTTOMUP) != 0)
           index = pitch * (h - row - 1) + col;
-        else index = pitch * row + col;
-        if(((row / 8) + (col / 8)) % 2 == 0) {
-          if(row < halfway) {
+        else
+          index = pitch * row + col;
+        if (((row / 8) + (col / 8)) % 2 == 0) {
+          if (row < halfway) {
             buf[index] |= (255 << rshift);
             buf[index] |= (255 << gshift);
             buf[index] |= (255 << bshift);
           }
-        }
-        else {
+        } else {
           buf[index] |= (255 << rshift);
-          if(row >= halfway) buf[index] |= (255 << gshift);
+          if (row >= halfway)
+            buf[index] |= (255 << gshift);
         }
-        if (ashift >= 0) buf[index] |= (255 << ashift);
+        if (ashift >= 0)
+          buf[index] |= (255 << ashift);
       }
     }
   }
 
   private static void initImg(BufferedImage img, int pf, int flags)
-    throws Exception {
+                              throws Exception {
     WritableRaster wr = img.getRaster();
     int imgType = img.getType();
-    if(imgType == BufferedImage.TYPE_INT_RGB
-      || imgType == BufferedImage.TYPE_INT_BGR
-      || imgType == BufferedImage.TYPE_INT_ARGB
-      || imgType == BufferedImage.TYPE_INT_ARGB_PRE) {
+    if (imgType == BufferedImage.TYPE_INT_RGB ||
+        imgType == BufferedImage.TYPE_INT_BGR ||
+        imgType == BufferedImage.TYPE_INT_ARGB ||
+        imgType == BufferedImage.TYPE_INT_ARGB_PRE) {
       SinglePixelPackedSampleModel sm =
         (SinglePixelPackedSampleModel)img.getSampleModel();
       int pitch = sm.getScanlineStride();
       DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
       int[] buf = db.getData();
       initIntBuf(buf, img.getWidth(), pitch, img.getHeight(), pf, flags);
-    }
-    else {
+    } else {
       ComponentSampleModel sm = (ComponentSampleModel)img.getSampleModel();
       int pitch = sm.getScanlineStride();
       DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
@@ -253,34 +257,35 @@
   }
 
   private static void checkVal(int row, int col, int v, String vname, int cv)
-    throws Exception {
+                               throws Exception {
     v = (v < 0) ? v + 256 : v;
-    if(v < cv - 1 || v > cv + 1) {
-      throw new Exception("\nComp. " + vname + " at " + row + "," + col
-        + " should be " + cv + ", not " + v + "\n");
+    if (v < cv - 1 || v > cv + 1) {
+      throw new Exception("Comp. " + vname + " at " + row + "," + col +
+                          " should be " + cv + ", not " + v);
     }
   }
 
   private static void checkVal0(int row, int col, int v, String vname)
-    throws Exception {
+                                throws Exception {
     v = (v < 0) ? v + 256 : v;
-    if(v > 1) {
-      throw new Exception("\nComp. " + vname + " at " + row + "," + col
-        + " should be 0, not " + v + "\n");
+    if (v > 1) {
+      throw new Exception("Comp. " + vname + " at " + row + "," + col +
+                          " should be 0, not " + v);
     }
   }
 
   private static void checkVal255(int row, int col, int v, String vname)
-    throws Exception {
+                                  throws Exception {
     v = (v < 0) ? v + 256 : v;
-    if(v < 254) {
-      throw new Exception("\nComp. " + vname + " at " + row + "," + col
-        + " should be 255, not " + v + "\n");
+    if (v < 254) {
+      throw new Exception("Comp. " + vname + " at " + row + "," + col +
+                          " should be 255, not " + v);
     }
   }
 
   private static int checkBuf(byte[] buf, int w, int pitch, int h, int pf,
-    int subsamp, TJScalingFactor sf, int flags) throws Exception {
+                              int subsamp, TJScalingFactor sf, int flags)
+                              throws Exception {
     int roffset = TJ.getRedOffset(pf);
     int goffset = TJ.getGreenOffset(pf);
     int boffset = TJ.getBlueOffset(pf);
@@ -291,68 +296,64 @@
     int blockSize = 8 * sf.getNum() / sf.getDenom();
 
     try {
-      for(row = 0; row < halfway; row++) {
-        for(col = 0; col < w; col++) {
-          if((flags & TJ.FLAG_BOTTOMUP) != 0)
+      for (row = 0; row < halfway; row++) {
+        for (col = 0; col < w; col++) {
+          if ((flags & TJ.FLAG_BOTTOMUP) != 0)
             index = pitch * (h - row - 1) + col * ps;
-          else index = pitch * row + col * ps;
+          else
+            index = pitch * row + col * ps;
           byte r = buf[index + roffset];
           byte g = buf[index + goffset];
           byte b = buf[index + boffset];
           byte a = aoffset >= 0 ? buf[index + aoffset] : (byte)255;
-          if(((row / blockSize) + (col / blockSize)) % 2 == 0) {
-            if(row < halfway) {
+          if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+            if (row < halfway) {
               checkVal255(row, col, r, "R");
               checkVal255(row, col, g, "G");
               checkVal255(row, col, b, "B");
-            }
-            else {
+            } else {
               checkVal0(row, col, r, "R");
               checkVal0(row, col, g, "G");
               checkVal0(row, col, b, "B");
             }
-          }
-          else {
-            if(subsamp == TJ.SAMP_GRAY) {
-              if(row < halfway) {
+          } else {
+            if (subsamp == TJ.SAMP_GRAY) {
+              if (row < halfway) {
                 checkVal(row, col, r, "R", 76);
                 checkVal(row, col, g, "G", 76);
                 checkVal(row, col, b, "B", 76);
-              }
-              else {
+              } else {
                 checkVal(row, col, r, "R", 226);
                 checkVal(row, col, g, "G", 226);
                 checkVal(row, col, b, "B", 226);
               }
-            }
-            else {
+            } else {
               checkVal255(row, col, r, "R");
-              if(row < halfway) {
+              if (row < halfway) {
                 checkVal0(row, col, g, "G");
-              }
-              else {
+              } else {
                 checkVal255(row, col, g, "G");
               }
-              checkVal0(row, col, b, "B");							
+              checkVal0(row, col, b, "B");
             }
           }
           checkVal255(row, col, a, "A");
         }
       }
-    }
-    catch(Exception e) {
-      System.out.println(e);
+    } catch(Exception e) {
+      System.out.println("\n" + e.getMessage());
       retval = 0;
     }
 
-    if(retval == 0) {
-      System.out.print("\n");
-      for(row = 0; row < h; row++) {
-        for(col = 0; col < w; col++) {
+    if (retval == 0) {
+      for (row = 0; row < h; row++) {
+        for (col = 0; col < w; col++) {
           int r = buf[pitch * row + col * ps + roffset];
           int g = buf[pitch * row + col * ps + goffset];
           int b = buf[pitch * row + col * ps + boffset];
-          if(r < 0) r += 256;  if(g < 0) g += 256;  if(b < 0) b += 256;
+          if (r < 0) r += 256;
+          if (g < 0) g += 256;
+          if (b < 0) b += 256;
           System.out.format("%3d/%3d/%3d ", r, g, b);
         }
         System.out.print("\n");
@@ -362,7 +363,8 @@
   }
 
   private static int checkIntBuf(int[] buf, int w, int pitch, int h, int pf,
-    int subsamp, TJScalingFactor sf, int flags) throws Exception {
+                                 int subsamp, TJScalingFactor sf, int flags)
+                                 throws Exception {
     int rshift = TJ.getRedOffset(pf) * 8;
     int gshift = TJ.getGreenOffset(pf) * 8;
     int bshift = TJ.getBlueOffset(pf) * 8;
@@ -372,46 +374,42 @@
     int blockSize = 8 * sf.getNum() / sf.getDenom();
 
     try {
-      for(row = 0; row < halfway; row++) {
-        for(col = 0; col < w; col++) {
-          if((flags & TJ.FLAG_BOTTOMUP) != 0)
+      for (row = 0; row < halfway; row++) {
+        for (col = 0; col < w; col++) {
+          if ((flags & TJ.FLAG_BOTTOMUP) != 0)
             index = pitch * (h - row - 1) + col;
-          else index = pitch * row + col;
+          else
+            index = pitch * row + col;
           int r = (buf[index] >> rshift) & 0xFF;
           int g = (buf[index] >> gshift) & 0xFF;
           int b = (buf[index] >> bshift) & 0xFF;
           int a = ashift >= 0 ? (buf[index] >> ashift) & 0xFF : 255;
-          if(((row / blockSize) + (col / blockSize)) % 2 == 0) {
-            if(row < halfway) {
+          if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+            if (row < halfway) {
               checkVal255(row, col, r, "R");
               checkVal255(row, col, g, "G");
               checkVal255(row, col, b, "B");
-            }
-            else {
+            } else {
               checkVal0(row, col, r, "R");
               checkVal0(row, col, g, "G");
               checkVal0(row, col, b, "B");
             }
-          }
-          else {
-            if(subsamp == TJ.SAMP_GRAY) {
-              if(row < halfway) {
+          } else {
+            if (subsamp == TJ.SAMP_GRAY) {
+              if (row < halfway) {
                 checkVal(row, col, r, "R", 76);
                 checkVal(row, col, g, "G", 76);
                 checkVal(row, col, b, "B", 76);
-              }
-              else {
+              } else {
                 checkVal(row, col, r, "R", 226);
                 checkVal(row, col, g, "G", 226);
                 checkVal(row, col, b, "B", 226);
               }
-            }
-            else {
+            } else {
               checkVal255(row, col, r, "R");
-              if(row < halfway) {
+              if (row < halfway) {
                 checkVal0(row, col, g, "G");
-              }
-              else {
+              } else {
                 checkVal255(row, col, g, "G");
               }
               checkVal0(row, col, b, "B");
@@ -420,20 +418,20 @@
           checkVal255(row, col, a, "A");
         }
       }
-    }
-    catch(Exception e) {
-      System.out.println(e);
+    } catch(Exception e) {
+      System.out.println("\n" + e.getMessage());
       retval = 0;
     }
 
-    if(retval == 0) {
-      System.out.print("\n");
-      for(row = 0; row < h; row++) {
-        for(col = 0; col < w; col++) {
+    if (retval == 0) {
+      for (row = 0; row < h; row++) {
+        for (col = 0; col < w; col++) {
           int r = (buf[pitch * row + col] >> rshift) & 0xFF;
           int g = (buf[pitch * row + col] >> gshift) & 0xFF;
           int b = (buf[pitch * row + col] >> bshift) & 0xFF;
-          if(r < 0) r += 256;  if(g < 0) g += 256;  if(b < 0) b += 256;
+          if (r < 0) r += 256;
+          if (g < 0) g += 256;
+          if (b < 0) b += 256;
           System.out.format("%3d/%3d/%3d ", r, g, b);
         }
         System.out.print("\n");
@@ -442,29 +440,28 @@
     return retval;
   }
 
-  private static int checkImg(BufferedImage img, int pf,
-    int subsamp, TJScalingFactor sf, int flags) throws Exception {
+  private static int checkImg(BufferedImage img, int pf, int subsamp,
+                              TJScalingFactor sf, int flags) throws Exception {
     WritableRaster wr = img.getRaster();
     int imgType = img.getType();
-    if(imgType == BufferedImage.TYPE_INT_RGB
-      || imgType == BufferedImage.TYPE_INT_BGR
-      || imgType == BufferedImage.TYPE_INT_ARGB
-      || imgType == BufferedImage.TYPE_INT_ARGB_PRE) {
-       SinglePixelPackedSampleModel sm =
+    if (imgType == BufferedImage.TYPE_INT_RGB ||
+        imgType == BufferedImage.TYPE_INT_BGR ||
+        imgType == BufferedImage.TYPE_INT_ARGB ||
+        imgType == BufferedImage.TYPE_INT_ARGB_PRE) {
+      SinglePixelPackedSampleModel sm =
         (SinglePixelPackedSampleModel)img.getSampleModel();
       int pitch = sm.getScanlineStride();
       DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
       int[] buf = db.getData();
       return checkIntBuf(buf, img.getWidth(), pitch, img.getHeight(), pf,
-        subsamp, sf, flags);
-    }
-    else {
+                         subsamp, sf, flags);
+    } else {
       ComponentSampleModel sm = (ComponentSampleModel)img.getSampleModel();
       int pitch = sm.getScanlineStride();
       DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
       byte[] buf = db.getData();
       return checkBuf(buf, img.getWidth(), pitch, img.getHeight(), pf, subsamp,
-        sf, flags);
+                      sf, flags);
     }
   }
 
@@ -473,51 +470,52 @@
   }
 
   private static int checkBufYUV(byte[] buf, int size, int w, int h,
-    int subsamp) throws Exception {
+                                 int subsamp) throws Exception {
     int row, col;
-    int hsf = TJ.getMCUWidth(subsamp)/8, vsf = TJ.getMCUHeight(subsamp)/8;
+    int hsf = TJ.getMCUWidth(subsamp) / 8, vsf = TJ.getMCUHeight(subsamp) / 8;
     int pw = PAD(w, hsf), ph = PAD(h, vsf);
     int cw = pw / hsf, ch = ph / vsf;
     int ypitch = PAD(pw, 4), uvpitch = PAD(cw, 4);
     int retval = 1;
-    int correctsize = ypitch * ph
-      + (subsamp == TJ.SAMP_GRAY ? 0 : uvpitch * ch * 2);
+    int correctsize = ypitch * ph +
+                      (subsamp == TJ.SAMP_GRAY ? 0 : uvpitch * ch * 2);
     int halfway = 16;
 
     try {
-      if(size != correctsize)
-        throw new Exception("\nIncorrect size " + size + ".  Should be "
-          + correctsize);
+      if (size != correctsize)
+        throw new Exception("Incorrect size " + size + ".  Should be " +
+                            correctsize);
 
-      for(row = 0; row < ph; row++) {
-        for(col = 0; col < pw; col++) {
+      for (row = 0; row < ph; row++) {
+        for (col = 0; col < pw; col++) {
           byte y = buf[ypitch * row + col];
-          if(((row / 8) + (col / 8)) % 2 == 0) {
-            if(row < halfway) checkVal255(row, col, y, "Y");
-            else checkVal0(row, col, y, "Y");
-          }
-          else {
-            if(row < halfway) checkVal(row, col, y, "Y", 76);
-            else checkVal(row, col, y, "Y", 226);
+          if (((row / 8) + (col / 8)) % 2 == 0) {
+            if (row < halfway)
+              checkVal255(row, col, y, "Y");
+            else
+              checkVal0(row, col, y, "Y");
+          } else {
+            if (row < halfway)
+              checkVal(row, col, y, "Y", 76);
+            else
+              checkVal(row, col, y, "Y", 226);
           }
         }
       }
-      if(subsamp != TJ.SAMP_GRAY) {
+      if (subsamp != TJ.SAMP_GRAY) {
         halfway = 16 / vsf;
-        for(row = 0; row < ch; row++) {
-          for(col = 0; col < cw; col++) {
+        for (row = 0; row < ch; row++) {
+          for (col = 0; col < cw; col++) {
             byte u = buf[ypitch * ph + (uvpitch * row + col)],
-              v = buf[ypitch * ph + uvpitch * ch + (uvpitch * row + col)];
-            if(((row * vsf / 8) + (col * hsf / 8)) % 2 == 0) {
+                 v = buf[ypitch * ph + uvpitch * ch + (uvpitch * row + col)];
+            if (((row * vsf / 8) + (col * hsf / 8)) % 2 == 0) {
               checkVal(row, col, u, "U", 128);
               checkVal(row, col, v, "V", 128);
-            }
-            else {
-              if(row < halfway) {
+            } else {
+              if (row < halfway) {
                 checkVal(row, col, u, "U", 85);
                 checkVal255(row, col, v, "V");
-              }
-              else {
+              } else {
                 checkVal0(row, col, u, "U");
                 checkVal(row, col, v, "V", 149);
               }
@@ -525,47 +523,45 @@
           }
         }
       }
-    }
-    catch(Exception e) {
-      System.out.println(e);
+    } catch(Exception e) {
+      System.out.println("\n" + e.getMessage());
       retval = 0;
     }
 
-    if(retval == 0) {
-      for(row = 0; row < ph; row++) {
-        for(col = 0; col < pw; col++) {
+    if (retval == 0) {
+      for (row = 0; row < ph; row++) {
+        for (col = 0; col < pw; col++) {
           int y = buf[ypitch * row + col];
-          if(y < 0) y += 256;
+          if (y < 0) y += 256;
           System.out.format("%3d ", y);
         }
         System.out.print("\n");
       }
       System.out.print("\n");
-      for(row = 0; row < ch; row++) {
-        for(col = 0; col < cw; col++) {
+      for (row = 0; row < ch; row++) {
+        for (col = 0; col < cw; col++) {
           int u = buf[ypitch * ph + (uvpitch * row + col)];
-          if(u < 0) u += 256;
+          if (u < 0) u += 256;
           System.out.format("%3d ", u);
         }
         System.out.print("\n");
       }
       System.out.print("\n");
-      for(row = 0; row < ch; row++) {
-        for(col = 0; col < cw; col++) {
+      for (row = 0; row < ch; row++) {
+        for (col = 0; col < cw; col++) {
           int v = buf[ypitch * ph + uvpitch * ch + (uvpitch * row + col)];
-          if(v < 0) v += 256;
+          if (v < 0) v += 256;
           System.out.format("%3d ", v);
         }
         System.out.print("\n");
       }
-      System.out.print("\n");
     }
 
     return retval;
   }
 
   private static void writeJPEG(byte[] jpegBuf, int jpegBufSize,
-    String filename) throws Exception {
+                                String filename) throws Exception {
     File file = new File(filename);
     FileOutputStream fos = new FileOutputStream(file);
     fos.write(jpegBuf, 0, jpegBufSize);
@@ -573,8 +569,8 @@
   }
 
   private static int compTest(TJCompressor tjc, byte[] dstBuf, int w,
-    int h, int pf, String baseName, int subsamp, int jpegQual,
-    int flags) throws Exception {
+                              int h, int pf, String baseName, int subsamp,
+                              int jpegQual, int flags) throws Exception {
     String tempstr;
     byte[] srcBuf = null;
     BufferedImage img = null;
@@ -585,28 +581,32 @@
     if (bi) {
       pf = biTypePF(imgType);
       pfStr = biTypeStr(imgType);
-    }
-    else pfStr = pixFormatStr[pf];
+    } else
+      pfStr = pixFormatStr[pf];
     ps =  TJ.getPixelSize(pf);
 
     System.out.print(pfStr + " ");
-    if(bi) System.out.print("(" + pixFormatStr[pf] + ") ");
-    if((flags & TJ.FLAG_BOTTOMUP) != 0) System.out.print("Bottom-Up");
-    else System.out.print("Top-Down ");
+    if (bi)
+      System.out.print("(" + pixFormatStr[pf] + ") ");
+    if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+      System.out.print("Bottom-Up");
+    else
+      System.out.print("Top-Down ");
     System.out.print(" -> " + subNameLong[subsamp] + " ");
-    if(yuv == YUVENCODE) System.out.print("YUV ... ");
-    else System.out.print("Q" + jpegQual + " ... ");
+    if (yuv == YUVENCODE)
+      System.out.print("YUV ... ");
+    else
+      System.out.print("Q" + jpegQual + " ... ");
 
-    if(bi) {
+    if (bi) {
       img = new BufferedImage(w, h, imgType);
       initImg(img, pf, flags);
-      tempstr = baseName + "_enc_" + pfStr + "_"
-        + (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_"
-        + subName[subsamp] + "_Q" + jpegQual + ".png";
+      tempstr = baseName + "_enc_" + pfStr + "_" +
+                (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
+                subName[subsamp] + "_Q" + jpegQual + ".png";
       File file = new File(tempstr);
       ImageIO.write(img, "png", file);
-    }
-    else {
+    } else {
       srcBuf = new byte[w * h * ps + 1];
       initBuf(srcBuf, w, w * ps, h, pf, flags);
     }
@@ -615,36 +615,40 @@
     t = getTime();
     tjc.setSubsamp(subsamp);
     tjc.setJPEGQuality(jpegQual);
-    if(bi) {
-      if(yuv == YUVENCODE) tjc.encodeYUV(img, dstBuf, flags);
-      else tjc.compress(img, dstBuf, flags);
-    }
-    else {
-      tjc.setSourceImage(srcBuf, w, 0, h, pf);
-      if(yuv == YUVENCODE) tjc.encodeYUV(dstBuf, flags);
-      else tjc.compress(dstBuf, flags);
+    if (bi) {
+      if (yuv == YUVENCODE)
+        tjc.encodeYUV(img, dstBuf, flags);
+      else
+        tjc.compress(img, dstBuf, flags);
+    } else {
+      tjc.setSourceImage(srcBuf, 0, 0, w, 0, h, pf);
+      if (yuv == YUVENCODE)
+        tjc.encodeYUV(dstBuf, flags);
+      else
+        tjc.compress(dstBuf, flags);
     }
     size = tjc.getCompressedSize();
     t = getTime() - t;
 
-    if(yuv == YUVENCODE)
-      tempstr = baseName + "_enc_" + pfStr + "_"
-        + (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_"
-        + subName[subsamp] + ".yuv";
+    if (yuv == YUVENCODE)
+      tempstr = baseName + "_enc_" + pfStr + "_" +
+                (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
+                subName[subsamp] + ".yuv";
     else
-      tempstr = baseName + "_enc_" + pfStr + "_"
-        + (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_"
-        + subName[subsamp] + "_Q" + jpegQual + ".jpg";
+      tempstr = baseName + "_enc_" + pfStr + "_" +
+                (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
+                subName[subsamp] + "_Q" + jpegQual + ".jpg";
     writeJPEG(dstBuf, size, tempstr);
 
-    if(yuv == YUVENCODE) {
-      if(checkBufYUV(dstBuf, size, w, h, subsamp) == 1)
+    if (yuv == YUVENCODE) {
+      if (checkBufYUV(dstBuf, size, w, h, subsamp) == 1)
         System.out.print("Passed.");
       else {
-        System.out.print("FAILED!");  exitStatus = -1;
+        System.out.print("FAILED!");
+        exitStatus = -1;
       }
-    }
-    else System.out.print("Done.");
+    } else
+      System.out.print("Done.");
     System.out.format("  %.6f ms\n", t * 1000.);
     System.out.println("  Result in " + tempstr);
 
@@ -652,8 +656,9 @@
   }
 
   private static void decompTest(TJDecompressor tjd, byte[] jpegBuf,
-    int jpegSize, int w, int h, int pf, String baseName, int subsamp,
-    int flags, TJScalingFactor sf) throws Exception {
+                                 int jpegSize, int w, int h, int pf,
+                                 String baseName, int subsamp, int flags,
+                                 TJScalingFactor sf) throws Exception {
     String pfStr, tempstr;
     double t;
     int scaledWidth = sf.getScaled(w);
@@ -662,134 +667,145 @@
     BufferedImage img = null;
     byte[] dstBuf = null;
 
-    if(yuv == YUVENCODE) return;
+    if (yuv == YUVENCODE) return;
 
     if (bi) {
       pf = biTypePF(imgType);
       pfStr = biTypeStr(imgType);
-    }
-    else pfStr = pixFormatStr[pf];
+    } else
+      pfStr = pixFormatStr[pf];
 
     System.out.print("JPEG -> ");
-    if(yuv == YUVDECODE)
-      System.out.print("YUV " + subName[subsamp] + " ... ");
+    if (yuv == YUVDECODE)
+      System.out.print("YUV " + subNameLong[subsamp] + " ... ");
     else {
       System.out.print(pfStr + " ");
-      if(bi) System.out.print("(" + pixFormatStr[pf] + ") ");
-      if((flags & TJ.FLAG_BOTTOMUP) != 0) System.out.print("Bottom-Up ");
-      else System.out.print("Top-Down  ");
-      if(!sf.isOne())
+      if (bi)
+        System.out.print("(" + pixFormatStr[pf] + ") ");
+      if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+        System.out.print("Bottom-Up ");
+      else
+        System.out.print("Top-Down  ");
+      if (!sf.isOne())
         System.out.print(sf.getNum() + "/" + sf.getDenom() + " ... ");
-      else System.out.print("... ");
+      else
+        System.out.print("... ");
     }
 
     t = getTime();
     tjd.setJPEGImage(jpegBuf, jpegSize);
-    if(tjd.getWidth() != w || tjd.getHeight() != h
-      || tjd.getSubsamp() != subsamp)
+    if (tjd.getWidth() != w || tjd.getHeight() != h ||
+        tjd.getSubsamp() != subsamp)
       throw new Exception("Incorrect JPEG header");
 
     temp1 = scaledWidth;
     temp2 = scaledHeight;
     temp1 = tjd.getScaledWidth(temp1, temp2);
     temp2 = tjd.getScaledHeight(temp1, temp2);
-    if(temp1 != scaledWidth || temp2 != scaledHeight)
+    if (temp1 != scaledWidth || temp2 != scaledHeight)
       throw new Exception("Scaled size mismatch");
 
-    if(yuv == YUVDECODE) dstBuf = tjd.decompressToYUV(flags);
+    if (yuv == YUVDECODE)
+      dstBuf = tjd.decompressToYUV(flags);
     else {
-      if(bi)
+      if (bi)
         img = tjd.decompress(scaledWidth, scaledHeight, imgType, flags);
-      else dstBuf = tjd.decompress(scaledWidth, 0, scaledHeight, pf, flags);
+      else
+        dstBuf = tjd.decompress(scaledWidth, 0, scaledHeight, pf, flags);
     }
     t = getTime() - t;
 
-    if(bi) {
-      tempstr = baseName + "_dec_" + pfStr + "_"
-        + (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_"
-        + subName[subsamp] + "_" + (double)sf.getNum() / (double)sf.getDenom()
-        + "x" + ".png";
+    if (bi) {
+      tempstr = baseName + "_dec_" + pfStr + "_" +
+                (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
+                subName[subsamp] + "_" +
+                (double)sf.getNum() / (double)sf.getDenom() + "x" + ".png";
       File file = new File(tempstr);
       ImageIO.write(img, "png", file);
     }
 
-    if(yuv == YUVDECODE) {
-      if(checkBufYUV(dstBuf, dstBuf.length, w, h, subsamp) == 1)
+    if (yuv == YUVDECODE) {
+      if (checkBufYUV(dstBuf, dstBuf.length, w, h, subsamp) == 1)
         System.out.print("Passed.");
       else {
         System.out.print("FAILED!");  exitStatus = -1;
       }
-    }
-    else {
-      if((bi && checkImg(img, pf, subsamp, sf, flags) == 1)
-        || (!bi && checkBuf(dstBuf, scaledWidth, scaledWidth
-          * TJ.getPixelSize(pf), scaledHeight, pf, subsamp, sf, flags) == 1))
+    } else {
+      if ((bi && checkImg(img, pf, subsamp, sf, flags) == 1) ||
+          (!bi && checkBuf(dstBuf, scaledWidth,
+                           scaledWidth * TJ.getPixelSize(pf), scaledHeight, pf,
+                           subsamp, sf, flags) == 1))
         System.out.print("Passed.");
       else {
-        System.out.print("FAILED!");  exitStatus = -1;
+        System.out.print("FAILED!");
+        exitStatus = -1;
       }
     }
     System.out.format("  %.6f ms\n", t * 1000.);
   }
 
   private static void decompTest(TJDecompressor tjd, byte[] jpegBuf,
-    int jpegSize, int w, int h, int pf, String baseName, int subsamp,
-    int flags) throws Exception {
+                                 int jpegSize, int w, int h, int pf,
+                                 String baseName, int subsamp,
+                                 int flags) throws Exception {
     int i;
-    if((subsamp == TJ.SAMP_444 || subsamp == TJ.SAMP_GRAY) && yuv == 0) {
-      TJScalingFactor sf[] = TJ.getScalingFactors();
-      for(i = 0; i < sf.length; i++)
+    if ((subsamp == TJ.SAMP_444 || subsamp == TJ.SAMP_GRAY) && yuv == 0) {
+      TJScalingFactor[] sf = TJ.getScalingFactors();
+      for (i = 0; i < sf.length; i++)
         decompTest(tjd, jpegBuf, jpegSize, w, h, pf, baseName, subsamp,
-          flags, sf[i]);
-    }
-    else
+                   flags, sf[i]);
+    } else
       decompTest(tjd, jpegBuf, jpegSize, w, h, pf, baseName, subsamp,
-        flags, new TJScalingFactor(1, 1));
-    System.out.print("\n");
+                 flags, new TJScalingFactor(1, 1));
   }
 
   private static void doTest(int w, int h, int[] formats, int subsamp,
-    String baseName) throws Exception {
+                             String baseName) throws Exception {
     TJCompressor tjc = null;
     TJDecompressor tjd = null;
     int size;
     byte[] dstBuf;
 
-    if(yuv == YUVENCODE) dstBuf = new byte[TJ.bufSizeYUV(w, h, subsamp)];
-    else dstBuf = new byte[TJ.bufSize(w, h, subsamp)];
+    if (yuv == YUVENCODE)
+      dstBuf = new byte[TJ.bufSizeYUV(w, h, subsamp)];
+    else
+      dstBuf = new byte[TJ.bufSize(w, h, subsamp)];
 
     try {
       tjc = new TJCompressor();
-      tjd = new TJDecompressor();  
+      tjd = new TJDecompressor();
 
-      for(int pf : formats) {
-        for(int i = 0; i < 2; i++) {
+      for (int pf : formats) {
+        for (int i = 0; i < 2; i++) {
           int flags = 0;
-          if (subsamp == TJ.SAMP_422 || subsamp == TJ.SAMP_420
-            || subsamp == TJ.SAMP_440)
+          if (subsamp == TJ.SAMP_422 || subsamp == TJ.SAMP_420 ||
+              subsamp == TJ.SAMP_440)
             flags |= TJ.FLAG_FASTUPSAMPLE;
-          if(i == 1) {
-            if(yuv == YUVDECODE) {
-              tjc.close();  tjd.close();  return;
-            }
-            else flags |= TJ.FLAG_BOTTOMUP;
+          if (i == 1) {
+            if (yuv == YUVDECODE) {
+              tjc.close();
+              tjd.close();
+              return;
+            } else
+              flags |= TJ.FLAG_BOTTOMUP;
           }
           size = compTest(tjc, dstBuf, w, h, pf, baseName, subsamp, 100,
-            flags);
+                          flags);
           decompTest(tjd, dstBuf, size, w, h, pf, baseName, subsamp, flags);
-          if(pf >= TJ.PF_RGBX && pf <= TJ.PF_XRGB && !bi)
+          if (pf >= TJ.PF_RGBX && pf <= TJ.PF_XRGB && !bi)
             decompTest(tjd, dstBuf, size, w, h, pf + (TJ.PF_RGBA - TJ.PF_RGBX),
-              baseName, subsamp, flags);
+                       baseName, subsamp, flags);
+          System.out.print("\n");
         }
       }
-    }
-    catch(Exception e) {
-      if(tjc != null) tjc.close();
-      if(tjd != null) tjd.close();
+      System.out.print("--------------------\n\n");
+    } catch(Exception e) {
+      if (tjc != null) tjc.close();
+      if (tjd != null) tjd.close();
       throw e;
     }
-    if(tjc != null) tjc.close();
-    if(tjd != null) tjd.close();
+    if (tjc != null) tjc.close();
+    if (tjd != null) tjd.close();
   }
 
   private static void bufSizeTest() throws Exception {
@@ -801,21 +817,21 @@
     try {
       tjc = new TJCompressor();
       System.out.println("Buffer size regression test");
-      for(subsamp = 0; subsamp < TJ.NUMSAMP; subsamp++) {
-        for(w = 1; w < 48; w++) {
+      for (subsamp = 0; subsamp < TJ.NUMSAMP; subsamp++) {
+        for (w = 1; w < 48; w++) {
           int maxh = (w == 1) ? 2048 : 48;
-          for(h = 1; h < maxh; h++) {
-            if(h % 100 == 0)
+          for (h = 1; h < maxh; h++) {
+            if (h % 100 == 0)
               System.out.format("%04d x %04d\b\b\b\b\b\b\b\b\b\b\b", w, h);
             srcBuf = new byte[w * h * 4];
             if (yuv == YUVENCODE)
               dstBuf = new byte[TJ.bufSizeYUV(w, h, subsamp)];
             else
               dstBuf = new byte[TJ.bufSize(w, h, subsamp)];
-            for(i = 0; i < w * h * 4; i++) {
+            for (i = 0; i < w * h * 4; i++) {
               srcBuf[i] = (byte)(r.nextInt(2) * 255);
             }
-            tjc.setSourceImage(srcBuf, w, 0, h, TJ.PF_BGRX);
+            tjc.setSourceImage(srcBuf, 0, 0, w, 0, h, TJ.PF_BGRX);
             tjc.setSubsamp(subsamp);
             tjc.setJPEGQuality(100);
             if (yuv == YUVENCODE)
@@ -828,10 +844,10 @@
               dstBuf = new byte[TJ.bufSizeYUV(h, w, subsamp)];
             else
               dstBuf = new byte[TJ.bufSize(h, w, subsamp)];
-            for(i = 0; i < h * w * 4; i++) {
+            for (i = 0; i < h * w * 4; i++) {
               srcBuf[i] = (byte)(r.nextInt(2) * 255);
             }
-            tjc.setSourceImage(srcBuf, h, 0, w, TJ.PF_BGRX);
+            tjc.setSourceImage(srcBuf, 0, 0, h, 0, w, TJ.PF_BGRX);
             if (yuv == YUVENCODE)
               tjc.encodeYUV(dstBuf, 0);
             else
@@ -840,52 +856,54 @@
         }
       }
       System.out.println("Done.      ");
-    }
-    catch(Exception e) {
-      if(tjc != null) tjc.close();
+    } catch(Exception e) {
+      if (tjc != null) tjc.close();
       throw e;
     }
-    if(tjc != null) tjc.close();
+    if (tjc != null) tjc.close();
   }
 
-  public static void main(String argv[]) {
+  public static void main(String[] argv) {
     try {
       String testName = "javatest";
       boolean doyuv = false;
-      for(int i = 0; i < argv.length; i++) {
-        if(argv[i].equalsIgnoreCase("-yuv")) doyuv = true;
-        if(argv[i].substring(0, 1).equalsIgnoreCase("-h")
-          || argv[i].equalsIgnoreCase("-?"))
+      for (int i = 0; i < argv.length; i++) {
+        if (argv[i].equalsIgnoreCase("-yuv"))
+          doyuv = true;
+        if (argv[i].substring(0, 1).equalsIgnoreCase("-h") ||
+            argv[i].equalsIgnoreCase("-?"))
           usage();
-        if(argv[i].equalsIgnoreCase("-bi")) {
+        if (argv[i].equalsIgnoreCase("-bi")) {
           bi = true;
           testName = "javabitest";
         }
       }
-      if(doyuv) yuv = YUVENCODE;
-      doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_444, testName);
-      doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_444, testName);
+      if (doyuv) yuv = YUVENCODE;
+      doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_444,
+             testName);
+      doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_444,
+             testName);
       doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_422,
-        testName);
+             testName);
       doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_422,
-        testName);
+             testName);
       doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_420,
-        testName);
+             testName);
       doTest(41, 35, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_420,
-        testName);
+             testName);
       doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_440,
-        testName);
+             testName);
       doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_440,
-        testName);
+             testName);
       doTest(35, 39, bi ? onlyGrayBI : onlyGray, TJ.SAMP_GRAY, testName);
       doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_GRAY,
-        testName);
+             testName);
       doTest(41, 35, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_GRAY,
-        testName);
-      if(!bi)
+             testName);
+      if (!bi)
         bufSizeTest();
       if (doyuv && !bi) {
-        System.out.print("\n\n");
+        System.out.print("\n--------------------\n\n");
         yuv = YUVDECODE;
         doTest(48, 48, onlyRGB, TJ.SAMP_444, "javatest_yuv0");
         doTest(35, 39, onlyRGB, TJ.SAMP_444, "javatest_yuv1");
@@ -900,8 +918,7 @@
         doTest(48, 48, onlyGray, TJ.SAMP_GRAY, "javatest_yuv0");
         doTest(39, 41, onlyGray, TJ.SAMP_GRAY, "javatest_yuv1");
       }
-    }
-    catch(Exception e) {
+    } catch(Exception e) {
       e.printStackTrace();
       exitStatus = -1;
     }
diff --git a/java/doc/allclasses-frame.html b/java/doc/allclasses-frame.html
index 4860d29..b2810b5 100644
--- a/java/doc/allclasses-frame.html
+++ b/java/doc/allclasses-frame.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 All Classes
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/allclasses-noframe.html b/java/doc/allclasses-noframe.html
index accfa3f..ddc3d63 100644
--- a/java/doc/allclasses-noframe.html
+++ b/java/doc/allclasses-noframe.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 All Classes
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/constant-values.html b/java/doc/constant-values.html
index 26d6d95..e4adb67 100644
--- a/java/doc/constant-values.html
+++ b/java/doc/constant-values.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 Constant Field Values
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/deprecated-list.html b/java/doc/deprecated-list.html
index 27f6caf..65951d2 100644
--- a/java/doc/deprecated-list.html
+++ b/java/doc/deprecated-list.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 Deprecated List
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
 
@@ -83,8 +81,46 @@
 </CENTER>
 <HR SIZE="4" NOSHADE>
 <B>Contents</B><UL>
+<LI><A HREF="#method">Deprecated Methods</A>
+<LI><A HREF="#constructor">Deprecated Constructors</A>
 </UL>
 
+<A NAME="method"><!-- --></A>
+<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
+<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
+<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
+<B>Deprecated Methods</B></FONT></TH>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">org.libjpegturbo.turbojpeg.TJDecompressor.decompress(byte[], int, int, int, int, int)</A>
+<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<I>Use
+ <A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>TJDecompressor.decompress(byte[], int, int, int, int, int, int, int)</CODE></A> instead.</I>&nbsp;</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)">org.libjpegturbo.turbojpeg.TJCompressor.setSourceImage(byte[], int, int, int, int)</A>
+<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<I>Use
+ <A HREF="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>TJCompressor.setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> instead.</I>&nbsp;</TD>
+</TR>
+</TABLE>
+&nbsp;
+<P>
+<A NAME="constructor"><!-- --></A>
+<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
+<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
+<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
+<B>Deprecated Constructors</B></FONT></TH>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int)">org.libjpegturbo.turbojpeg.TJCompressor(byte[], int, int, int, int)</A>
+<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<I>Use
+ <A HREF="org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><CODE>TJCompressor.TJCompressor(byte[], int, int, int, int, int, int)</CODE></A> instead.</I>&nbsp;</TD>
+</TR>
+</TABLE>
+&nbsp;
+<P>
 <HR>
 
 
diff --git a/java/doc/help-doc.html b/java/doc/help-doc.html
index 8724af8..b919d0a 100644
--- a/java/doc/help-doc.html
+++ b/java/doc/help-doc.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 API Help
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/index-all.html b/java/doc/index-all.html
index 5213541..6642769 100644
--- a/java/doc/index-all.html
+++ b/java/doc/index-all.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 Index
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="./stylesheet.css" TITLE="Style">
 
@@ -83,8 +81,7 @@
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)"><B>bufSize(int, int, int)</B></A> - 
 Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
 <DD>Returns the maximum size of the buffer (in bytes) required to hold a JPEG
- image with the given width and height, and level of chrominance
- subsampling.
+ image with the given width, height, and level of chrominance subsampling.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><B>bufSizeYUV(int, int, int)</B></A> - 
 Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
 <DD>Returns the size of the buffer (in bytes) required to hold a YUV planar
@@ -123,20 +120,28 @@
 Method in interface org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</A>
 <DD>A callback function that can be used to modify the DCT coefficients after
  they are losslessly transformed but before they are transcoded to a new
- JPEG file.
+ JPEG image.
 </DL>
 <HR>
 <A NAME="_D_"><!-- --></A><H2>
 <B>D</B></H2>
 <DL>
-<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)"><B>decompress(byte[], int, int, int, int, int)</B></A> - 
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><B>decompress(byte[], int, int, int, int, int, int, int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
 <DD>Decompress the JPEG source image associated with this decompressor
  instance and output a decompressed image to the given destination buffer.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)"><B>decompress(byte[], int, int, int, int, int)</B></A> - 
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
+<DD><B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>TJDecompressor.decompress(byte[], int, int, int, int, int, int, int)</CODE></A> instead.</I>
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)"><B>decompress(int, int, int, int, int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
 <DD>Decompress the JPEG source image associated with this decompressor
  instance and return a buffer containing the decompressed image.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)"><B>decompress(int[], int, int, int, int, int, int, int)</B></A> - 
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
+<DD>Decompress the JPEG source image associated with this decompressor
+ instance and output a decompressed image to the given destination buffer.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)"><B>decompress(BufferedImage, int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
 <DD>Decompress the JPEG source image associated with this decompressor
@@ -204,8 +209,9 @@
 <DD>Use the fastest DCT/IDCT algorithm available in the underlying codec.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTUPSAMPLE"><B>FLAG_FASTUPSAMPLE</B></A> - 
 Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
-<DD>When decompressing, use the fastest chrominance upsampling algorithm
- available in the underlying codec.
+<DD>When decompressing an image that was compressed using chrominance
+ subsampling, use the fastest chrominance upsampling algorithm available in
+ the underlying codec.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX"><B>FLAG_FORCEMMX</B></A> - 
 Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
 <DD>Turn off CPU auto-detection and force TurboJPEG to use MMX code
@@ -266,7 +272,7 @@
 <DD>Returns numerator
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#getPixelSize(int)"><B>getPixelSize(int)</B></A> - 
 Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
-<DD>Returns the pixel size (in bytes) of the given pixel format.
+<DD>Returns the pixel size (in bytes) for the given pixel format.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#getRedOffset(int)"><B>getRedOffset(int)</B></A> - 
 Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
 <DD>For the given pixel format, returns the number of bytes that the red
@@ -276,12 +282,12 @@
 <DD>Returns the scaled value of <code>dimension</code>.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)"><B>getScaledHeight(int, int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
-<DD>Returns the height of the largest scaled down image that the TurboJPEG
+<DD>Returns the height of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)"><B>getScaledWidth(int, int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
-<DD>Returns the width of the largest scaled down image that the TurboJPEG
+<DD>Returns the width of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()"><B>getScalingFactors()</B></A> - 
@@ -294,8 +300,8 @@
  associated with this decompressor instance.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html#getTransformedSizes()"><B>getTransformedSizes()</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</A>
-<DD>Returns an array containing the sizes of the transformed JPEG images from
- the most recent call to <A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><CODE>transform()</CODE></A>.
+<DD>Returns an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()"><B>getWidth()</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
 <DD>Returns the width of the JPEG image associated with this decompressor
@@ -471,9 +477,13 @@
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setJPEGQuality(int)"><B>setJPEGQuality(int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
 <DD>Set the JPEG image quality level for subsequent compress operations.
-<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)"><B>setSourceImage(byte[], int, int, int, int)</B></A> - 
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><B>setSourceImage(byte[], int, int, int, int, int, int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
 <DD>Associate an uncompressed source image with this compressor instance.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)"><B>setSourceImage(byte[], int, int, int, int)</B></A> - 
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
+<DD><B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>TJCompressor.setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> instead.</I>
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSubsamp(int)"><B>setSubsamp(int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
 <DD>Set the level of chrominance subsampling for subsequent compress/encode
@@ -489,23 +499,27 @@
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><B>TJCompressor</B></A> - Class in <A HREF="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</A><DD>TurboJPEG compressor<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor()"><B>TJCompressor()</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
 <DD>Create a TurboJPEG compressor instance.
-<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int)"><B>TJCompressor(byte[], int, int, int, int)</B></A> - 
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><B>TJCompressor(byte[], int, int, int, int, int, int)</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
 <DD>Create a TurboJPEG compressor instance and associate the uncompressed
- source image stored in <code>srcImage</code> with the newly-created
+ source image stored in <code>srcImage</code> with the newly created
  instance.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int)"><B>TJCompressor(byte[], int, int, int, int)</B></A> - 
+Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
+<DD><B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><CODE>TJCompressor.TJCompressor(byte[], int, int, int, int, int, int)</CODE></A> instead.</I>
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><B>TJCustomFilter</B></A> - Interface in <A HREF="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</A><DD>Custom filter callback interface<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><B>TJDecompressor</B></A> - Class in <A HREF="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</A><DD>TurboJPEG decompressor<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor()"><B>TJDecompressor()</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
 <DD>Create a TurboJPEG decompresssor instance.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(byte[])"><B>TJDecompressor(byte[])</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
 <DD>Create a TurboJPEG decompressor instance and associate the JPEG image
- stored in <code>jpegImage</code> with the newly-created instance.
+ stored in <code>jpegImage</code> with the newly created instance.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(byte[], int)"><B>TJDecompressor(byte[], int)</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
 <DD>Create a TurboJPEG decompressor instance and associate the JPEG image
  of length <code>imageSize</code> bytes stored in <code>jpegImage</code>
- with the newly-created instance.
+ with the newly created instance.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><B>TJScalingFactor</B></A> - Class in <A HREF="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</A><DD>Fractional scaling factor<DT><A HREF="./org/libjpegturbo/turbojpeg/TJScalingFactor.html#TJScalingFactor(int, int)"><B>TJScalingFactor(int, int)</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</A>
 <DD>&nbsp;
@@ -524,12 +538,12 @@
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer(byte[])"><B>TJTransformer(byte[])</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</A>
 <DD>Create a TurboJPEG lossless transformer instance and associate the JPEG
- image stored in <code>jpegImage</code> with the newly-created instance.
+ image stored in <code>jpegImage</code> with the newly created instance.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer(byte[], int)"><B>TJTransformer(byte[], int)</B></A> - 
 Constructor for class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</A>
 <DD>Create a TurboJPEG lossless transformer instance and associate the JPEG
  image of length <code>imageSize</code> bytes stored in
- <code>jpegImage</code> with the newly-created instance.
+ <code>jpegImage</code> with the newly created instance.
 <DT><A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><B>transform(byte[][], TJTransform[], int)</B></A> - 
 Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</A>
 <DD>Losslessly transform the JPEG image associated with this transformer
diff --git a/java/doc/index.html b/java/doc/index.html
index 93c50af..356cd1d 100644
--- a/java/doc/index.html
+++ b/java/doc/index.html
@@ -2,7 +2,6 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc on Fri Jun 29 14:29:14 CDT 2012-->
 <TITLE>
 Generated Documentation (Untitled)
 </TITLE>
@@ -10,8 +9,42 @@
     targetPage = "" + window.location.search;
     if (targetPage != "" && targetPage != "undefined")
         targetPage = targetPage.substring(1);
-    if (targetPage.indexOf(":") != -1)
+    if (targetPage.indexOf(":") != -1 || (targetPage != "" && !validURL(targetPage)))
         targetPage = "undefined";
+    function validURL(url) {
+        var pos = url.indexOf(".html");
+        if (pos == -1 || pos != url.length - 5)
+            return false;
+        var allowNumber = false;
+        var allowSep = false;
+        var seenDot = false;
+        for (var i = 0; i < url.length - 5; i++) {
+            var ch = url.charAt(i);
+            if ('a' <= ch && ch <= 'z' ||
+                    'A' <= ch && ch <= 'Z' ||
+                    ch == '$' ||
+                    ch == '_') {
+                allowNumber = true;
+                allowSep = true;
+            } else if ('0' <= ch && ch <= '9'
+                    || ch == '-') {
+                if (!allowNumber)
+                     return false;
+            } else if (ch == '/' || ch == '.') {
+                if (!allowSep)
+                    return false;
+                allowNumber = false;
+                allowSep = false;
+                if (ch == '.')
+                     seenDot = true;
+                if (ch == '/' && seenDot)
+                     return false;
+            } else {
+                return false;
+            }
+        }
+        return true;
+    }
     function loadFrames() {
         if (targetPage != "" && targetPage != "undefined")
              top.classFrame.location = top.targetPage;
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJ.html b/java/doc/org/libjpegturbo/turbojpeg/TJ.html
index 326b22f..f905406 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJ.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJ.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 TJ
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
@@ -146,8 +144,9 @@
 <TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTUPSAMPLE">FLAG_FASTUPSAMPLE</A></B></CODE>
 
 <BR>
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;When decompressing, use the fastest chrominance upsampling algorithm
- available in the underlying codec.</TD>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;When decompressing an image that was compressed using chrominance
+ subsampling, use the fastest chrominance upsampling algorithm available in
+ the underlying codec.</TD>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
 <TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -364,8 +363,7 @@
 
 <BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns the maximum size of the buffer (in bytes) required to hold a JPEG
- image with the given width and height, and level of chrominance
- subsampling.</TD>
+ image with the given width, height, and level of chrominance subsampling.</TD>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
 <TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -420,7 +418,7 @@
 <TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getPixelSize(int)">getPixelSize</A></B>(int&nbsp;pixelFormat)</CODE>
 
 <BR>
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns the pixel size (in bytes) of the given pixel format.</TD>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns the pixel size (in bytes) for the given pixel format.</TD>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
 <TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -534,6 +532,7 @@
 <DL>
 <DD>4:4:0 chrominance subsampling.  The JPEG or YUV image will contain one
  chrominance component for every 1x2 block of pixels in the source image.
+ Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
 <P>
 <DL>
 <DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_440">Constant Field Values</A></DL>
@@ -779,11 +778,11 @@
 <PRE>
 public static final int <B>FLAG_FASTUPSAMPLE</B></PRE>
 <DL>
-<DD>When decompressing, use the fastest chrominance upsampling algorithm
- available in the underlying codec.  The default is to use smooth
- upsampling, which creates a smooth transition between neighboring
- chrominance components in order to reduce upsampling artifacts in the
- decompressed image.
+<DD>When decompressing an image that was compressed using chrominance
+ subsampling, use the fastest chrominance upsampling algorithm available in
+ the underlying codec.  The default is to use smooth upsampling, which
+ creates a smooth transition between neighboring chrominance components in
+ order to reduce upsampling artifacts in the decompressed image.
 <P>
 <DL>
 <DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FASTUPSAMPLE">Constant Field Values</A></DL>
@@ -796,11 +795,11 @@
 public static final int <B>FLAG_FASTDCT</B></PRE>
 <DL>
 <DD>Use the fastest DCT/IDCT algorithm available in the underlying codec.  The
- default if this flag is not specified is implementation-specific.  The
- libjpeg implementation, for example, uses the fast algorithm by default
- when compressing, because this has been shown to have only a very slight
- effect on accuracy, but it uses the accurate algorithm when decompressing,
- because this has been shown to have a larger effect.
+ default if this flag is not specified is implementation-specific.  For
+ example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+ algorithm by default when compressing, because this has been shown to have
+ only a very slight effect on accuracy, but it uses the accurate algorithm
+ when decompressing, because this has been shown to have a larger effect.
 <P>
 <DL>
 <DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FASTDCT">Constant Field Values</A></DL>
@@ -814,11 +813,11 @@
 <DL>
 <DD>Use the most accurate DCT/IDCT algorithm available in the underlying
  codec.  The default if this flag is not specified is
- implementation-specific.  The libjpeg implementation, for example, uses
- the fast algorithm by default when compressing, because this has been
- shown to have only a very slight effect on accuracy, but it uses the
- accurate algorithm when decompressing, because this has been shown to have
- a larger effect.
+ implementation-specific.  For example, the implementation of TurboJPEG for
+ libjpeg[-turbo] uses the fast algorithm by default when compressing,
+ because this has been shown to have only a very slight effect on accuracy,
+ but it uses the accurate algorithm when decompressing, because this has
+ been shown to have a larger effect.
 <P>
 <DL>
 <DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_ACCURATEDCT">Constant Field Values</A></DL>
@@ -896,11 +895,11 @@
 public static int <B>getPixelSize</B>(int&nbsp;pixelFormat)
                         throws java.lang.Exception</PRE>
 <DL>
-<DD>Returns the pixel size (in bytes) of the given pixel format.
+<DD>Returns the pixel size (in bytes) for the given pixel format.
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>pixelFormat</CODE> - the pixel format (one of <code>PF_*</code>)
-<DT><B>Returns:</B><DD>the pixel size (in bytes) of the given pixel format
+<DT><B>Returns:</B><DD>the pixel size (in bytes) for the given pixel format
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -979,15 +978,13 @@
                    throws java.lang.Exception</PRE>
 <DL>
 <DD>Returns the maximum size of the buffer (in bytes) required to hold a JPEG
- image with the given width and height, and level of chrominance
- subsampling.
+ image with the given width, height, and level of chrominance subsampling.
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>width</CODE> - the width (in pixels) of the JPEG image<DD><CODE>height</CODE> - the height (in pixels) of the JPEG image<DD><CODE>jpegSubsamp</CODE> - the level of chrominance subsampling to be used when
  generating the JPEG image (one of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.SAMP_*</CODE></A>)
 <DT><B>Returns:</B><DD>the maximum size of the buffer (in bytes) required to hold a JPEG
- image with the given width and height, and level of chrominance
- subsampling
+ image with the given width, height, and level of chrominance subsampling
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
index 1c7088a..935db0e 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 TJCompressor
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
@@ -129,8 +127,21 @@
              int&nbsp;pixelFormat)</CODE>
 
 <BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><CODE>TJCompressor(byte[], int, int, int, int, int, int)</CODE></A> instead.</I></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)">TJCompressor</A></B>(byte[]&nbsp;srcImage,
+             int&nbsp;x,
+             int&nbsp;y,
+             int&nbsp;width,
+             int&nbsp;pitch,
+             int&nbsp;height,
+             int&nbsp;pixelFormat)</CODE>
+
+<BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Create a TurboJPEG compressor instance and associate the uncompressed
- source image stored in <code>srcImage</code> with the newly-created
+ source image stored in <code>srcImage</code> with the newly created
  instance.</TD>
 </TR>
 </TABLE>
@@ -266,6 +277,21 @@
                int&nbsp;pixelFormat)</CODE>
 
 <BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> instead.</I></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>&nbsp;void</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)">setSourceImage</A></B>(byte[]&nbsp;srcImage,
+               int&nbsp;x,
+               int&nbsp;y,
+               int&nbsp;width,
+               int&nbsp;pitch,
+               int&nbsp;height,
+               int&nbsp;pixelFormat)</CODE>
+
+<BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Associate an uncompressed source image with this compressor instance.</TD>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
@@ -315,10 +341,12 @@
 </DL>
 <HR>
 
-<A NAME="TJCompressor(byte[], int, int, int, int)"><!-- --></A><H3>
+<A NAME="TJCompressor(byte[], int, int, int, int, int, int)"><!-- --></A><H3>
 TJCompressor</H3>
 <PRE>
 public <B>TJCompressor</B>(byte[]&nbsp;srcImage,
+                    int&nbsp;x,
+                    int&nbsp;y,
                     int&nbsp;width,
                     int&nbsp;pitch,
                     int&nbsp;height,
@@ -326,11 +354,33 @@
              throws java.lang.Exception</PRE>
 <DL>
 <DD>Create a TurboJPEG compressor instance and associate the uncompressed
- source image stored in <code>srcImage</code> with the newly-created
+ source image stored in <code>srcImage</code> with the newly created
  instance.
 <P>
 <DL>
-<DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int)</CODE></A> for description<DD><CODE>width</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int)</CODE></A> for description<DD><CODE>pitch</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int)</CODE></A> for description<DD><CODE>height</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int)</CODE></A> for description<DD><CODE>pixelFormat</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int)</CODE></A> for description
+<DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> for description<DD><CODE>x</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> for description<DD><CODE>y</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> for description<DD><CODE>width</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> for description<DD><CODE>pitch</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> for description<DD><CODE>height</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> for description<DD><CODE>pixelFormat</CODE> - pixel format of the source image (one of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><CODE>TJ.PF_*</CODE></A>)
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DL>
+<HR>
+
+<A NAME="TJCompressor(byte[], int, int, int, int)"><!-- --></A><H3>
+TJCompressor</H3>
+<PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public <B>TJCompressor</B>(byte[]&nbsp;srcImage,
+                               int&nbsp;width,
+                               int&nbsp;pitch,
+                               int&nbsp;height,
+                               int&nbsp;pixelFormat)
+             throws java.lang.Exception</PRE>
+<DL>
+<DD><B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><CODE>TJCompressor(byte[], int, int, int, int, int, int)</CODE></A> instead.</I>
+<P>
+<DL>
+
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DL>
@@ -345,10 +395,12 @@
 </TR>
 </TABLE>
 
-<A NAME="setSourceImage(byte[], int, int, int, int)"><!-- --></A><H3>
+<A NAME="setSourceImage(byte[], int, int, int, int, int, int)"><!-- --></A><H3>
 setSourceImage</H3>
 <PRE>
 public void <B>setSourceImage</B>(byte[]&nbsp;srcImage,
+                           int&nbsp;x,
+                           int&nbsp;y,
                            int&nbsp;width,
                            int&nbsp;pitch,
                            int&nbsp;height,
@@ -359,15 +411,41 @@
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - image buffer containing RGB or grayscale pixels to be
- compressed<DD><CODE>width</CODE> - width (in pixels) of the source image<DD><CODE>pitch</CODE> - bytes per line of the source image.  Normally, this should be
+ compressed or encoded<DD><CODE>x</CODE> - x offset (in pixels) of the region in the source image from which
+ the JPEG or YUV image should be compressed/encoded<DD><CODE>y</CODE> - y offset (in pixels) of the region in the source image from which
+ the JPEG or YUV image should be compressed/encoded<DD><CODE>width</CODE> - width (in pixels) of the region in the source image from
+ which the JPEG or YUV image should be compressed/encoded<DD><CODE>pitch</CODE> - bytes per line of the source image.  Normally, this should be
  <code>width * TJ.pixelSize(pixelFormat)</code> if the source image is
  unpadded, but you can use this parameter to, for instance, specify that
- the scanlines in the source image are padded to 4-byte boundaries, as is
- the case for Windows bitmaps.  You can also be clever and use this
- parameter to skip lines, etc.  Setting this parameter to 0 is the
- equivalent of setting it to <code>width *
- TJ.pixelSize(pixelFormat)</code>.<DD><CODE>height</CODE> - height (in pixels) of the source image<DD><CODE>pixelFormat</CODE> - pixel format of the source image (one of
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.PF_*</CODE></A>)
+ the scanlines in the source image are padded to a 4-byte boundary or to
+ compress/encode a JPEG or YUV image from a region of a larger source
+ image.  You can also be clever and use this parameter to skip lines, etc.
+ Setting this parameter to 0 is the equivalent of setting it to
+ <code>width * TJ.pixelSize(pixelFormat)</code>.<DD><CODE>height</CODE> - height (in pixels) of the region in the source image from
+ which the JPEG or YUV image should be compressed/encoded<DD><CODE>pixelFormat</CODE> - pixel format of the source image (one of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><CODE>TJ.PF_*</CODE></A>)
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
+<A NAME="setSourceImage(byte[], int, int, int, int)"><!-- --></A><H3>
+setSourceImage</H3>
+<PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public void <B>setSourceImage</B>(byte[]&nbsp;srcImage,
+                                      int&nbsp;width,
+                                      int&nbsp;pitch,
+                                      int&nbsp;height,
+                                      int&nbsp;pixelFormat)
+                    throws java.lang.Exception</PRE>
+<DL>
+<DD><B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> instead.</I>
+<P>
+<DD><DL>
+
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -384,8 +462,9 @@
  operations.
 <P>
 <DD><DL>
-<DT><B>Parameters:</B><DD><CODE>newSubsamp</CODE> - the new level of chrominance subsampling (one of
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.SAMP_*</CODE></A>)
+<DT><B>Parameters:</B><DD><CODE>newSubsamp</CODE> - the level of chrominance subsampling to use in
+ subsequent compress/encode operations (one of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><CODE>TJ.SAMP_*</CODE></A>)
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -422,7 +501,9 @@
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>dstBuf</CODE> - buffer that will receive the JPEG image.  Use
  <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)"><CODE>TJ.bufSize(int, int, int)</CODE></A> to determine the maximum size for this buffer based on
- the image width and height.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ the source image's width and height and the desired level of chrominance
+ subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -439,7 +520,8 @@
  instance and return a buffer containing a JPEG image.
 <P>
 <DD><DL>
-<DT><B>Parameters:</B><DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+<DT><B>Parameters:</B><DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>a buffer containing a JPEG image.  The length of this buffer will
  not be equal to the size of the JPEG image.  Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#getCompressedSize()"><CODE>getCompressedSize()</CODE></A> to obtain the size of the JPEG image.
 <DT><B>Throws:</B>
@@ -463,7 +545,8 @@
 <DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - a <code>BufferedImage</code> instance containing RGB or
  grayscale pixels to be compressed<DD><CODE>dstBuf</CODE> - buffer that will receive the JPEG image.  Use
  <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)"><CODE>TJ.bufSize(int, int, int)</CODE></A> to determine the maximum size for this buffer based on
- the image width and height.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -482,7 +565,8 @@
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - a <code>BufferedImage</code> instance containing RGB or
- grayscale pixels to be compressed<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ grayscale pixels to be compressed<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>a buffer containing a JPEG image.  The length of this buffer will
  not be equal to the size of the JPEG image.  Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#getCompressedSize()"><CODE>getCompressedSize()</CODE></A> to obtain the size of the JPEG image.
 <DT><B>Throws:</B>
@@ -504,18 +588,23 @@
  TurboJPEG's underlying codec to produce a planar YUV image that is
  suitable for direct video display.  Specifically, if the chrominance
  components are subsampled along the horizontal dimension, then the width
- of the luminance plane is padded to 2 in the output image (same goes for
- the height of the luminance plane, if the chrominance components are
- subsampled along the vertical dimension.)  Also, each line of each plane
- in the output image is padded to 4 bytes.  Although this will work with
- any subsampling option, it is really only useful in combination with
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_420"><CODE>TJ.SAMP_420</CODE></A>, which produces an image compatible with the I420 (AKA
- "YUV420P") format.
+ of the luminance plane is padded to the nearest multiple of 2 in the
+ output image (same goes for the height of the luminance plane, if the
+ chrominance components are subsampled along the vertical dimension.)
+ Also, each line of each plane in the output image is padded to 4 bytes.
+ Although this will work with any subsampling option, it is really only
+ useful in combination with <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_420"><CODE>TJ.SAMP_420</CODE></A>, which produces an image
+ compatible with the I420 (AKA "YUV420P") format.
+ <p>
+ NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+ convention of the digital video community, the TurboJPEG API uses "YUV" to
+ refer to an image format consisting of Y, Cb, and Cr image planes.
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>dstBuf</CODE> - buffer that will receive the YUV planar image.  Use
  <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int)</CODE></A> to determine the appropriate size for this buffer
- based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -533,7 +622,8 @@
  <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)"><CODE>encodeYUV(byte[], int)</CODE></A> for more detail.
 <P>
 <DD><DL>
-<DT><B>Parameters:</B><DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+<DT><B>Parameters:</B><DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>a buffer containing a YUV planar image
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
@@ -557,7 +647,8 @@
 <DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - a <code>BufferedImage</code> instance containing RGB or
  grayscale pixels to be encoded<DD><CODE>dstBuf</CODE> - buffer that will receive the YUV planar image.  Use
  <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int)</CODE></A> to determine the appropriate size for this buffer
- based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -577,7 +668,8 @@
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - a <code>BufferedImage</code> instance containing RGB or
- grayscale pixels to be encoded<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ grayscale pixels to be encoded<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>a buffer containing a YUV planar image
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
index 9a3b56e..3291c71 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 TJCustomFilter
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
@@ -124,7 +122,7 @@
 <BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;A callback function that can be used to modify the DCT coefficients after
  they are losslessly transformed but before they are transcoded to a new
- JPEG file.</TD>
+ JPEG image.</TD>
 </TR>
 </TABLE>
 &nbsp;
@@ -153,7 +151,7 @@
 <DL>
 <DD>A callback function that can be used to modify the DCT coefficients after
  they are losslessly transformed but before they are transcoded to a new
- JPEG file.  This allows for custom filters or other transformations to be
+ JPEG image.  This allows for custom filters or other transformations to be
  applied in the frequency domain.
 <P>
 <DD><DL>
@@ -167,10 +165,10 @@
  into multiple DCT coefficient buffers and call the callback function once
  for each buffer.<DD><CODE>planeRegion</CODE> - rectangle containing the width and height of the
  component plane to which <code>coeffBuffer</code> belongs<DD><CODE>componentID</CODE> - ID number of the component plane to which
- <code>coeffBuffer</code>belongs (Y, Cb, and Cr have, respectively, ID's of
- 0, 1, and 2 in typical JPEG images.)<DD><CODE>transformID</CODE> - ID number of the transformed image to which
+ <code>coeffBuffer</code> belongs (Y, Cb, and Cr have, respectively, ID's
+ of 0, 1, and 2 in typical JPEG images.)<DD><CODE>transformID</CODE> - ID number of the transformed image to which
  <code>coeffBuffer</code> belongs.  This is the same as the index of the
- transform in the transforms array that was passed to <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><CODE>TJTransformer.transform()</CODE></A>.<DD><CODE>transform</CODE> - a <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJTransform</CODE></A> instance that specifies the
+ transform in the <code>transforms</code> array that was passed to <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><CODE>TJTransformer.transform()</CODE></A>.<DD><CODE>transform</CODE> - a <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJTransform</CODE></A> instance that specifies the
  parameters and/or cropping region for this transform
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
index 2505470..d69f943 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 TJDecompressor
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
@@ -186,7 +184,7 @@
 
 <BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Create a TurboJPEG decompressor instance and associate the JPEG image
- stored in <code>jpegImage</code> with the newly-created instance.</TD>
+ stored in <code>jpegImage</code> with the newly created instance.</TD>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
 <TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(byte[], int)">TJDecompressor</A></B>(byte[]&nbsp;jpegImage,
@@ -195,7 +193,7 @@
 <BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Create a TurboJPEG decompressor instance and associate the JPEG image
  of length <code>imageSize</code> bytes stored in <code>jpegImage</code>
- with the newly-created instance.</TD>
+ with the newly created instance.</TD>
 </TR>
 </TABLE>
 &nbsp;
@@ -237,6 +235,38 @@
            int&nbsp;flags)</CODE>
 
 <BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int, int, int)</CODE></A> instead.</I></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>&nbsp;void</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)">decompress</A></B>(byte[]&nbsp;dstBuf,
+           int&nbsp;x,
+           int&nbsp;y,
+           int&nbsp;desiredWidth,
+           int&nbsp;pitch,
+           int&nbsp;desiredHeight,
+           int&nbsp;pixelFormat,
+           int&nbsp;flags)</CODE>
+
+<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Decompress the JPEG source image associated with this decompressor
+ instance and output a decompressed image to the given destination buffer.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>&nbsp;void</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)">decompress</A></B>(int[]&nbsp;dstBuf,
+           int&nbsp;x,
+           int&nbsp;y,
+           int&nbsp;desiredWidth,
+           int&nbsp;stride,
+           int&nbsp;desiredHeight,
+           int&nbsp;pixelFormat,
+           int&nbsp;flags)</CODE>
+
+<BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Decompress the JPEG source image associated with this decompressor
  instance and output a decompressed image to the given destination buffer.</TD>
 </TR>
@@ -326,7 +356,7 @@
                 int&nbsp;desiredHeight)</CODE>
 
 <BR>
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns the height of the largest scaled down image that the TurboJPEG
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns the height of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height.</TD>
 </TR>
@@ -337,7 +367,7 @@
                int&nbsp;desiredHeight)</CODE>
 
 <BR>
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns the width of the largest scaled down image that the TurboJPEG
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns the width of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height.</TD>
 </TR>
@@ -483,7 +513,7 @@
                throws java.lang.Exception</PRE>
 <DL>
 <DD>Create a TurboJPEG decompressor instance and associate the JPEG image
- stored in <code>jpegImage</code> with the newly-created instance.
+ stored in <code>jpegImage</code> with the newly created instance.
 <P>
 <DL>
 <DT><B>Parameters:</B><DD><CODE>jpegImage</CODE> - JPEG image buffer (size of the JPEG image is assumed to
@@ -502,7 +532,7 @@
 <DL>
 <DD>Create a TurboJPEG decompressor instance and associate the JPEG image
  of length <code>imageSize</code> bytes stored in <code>jpegImage</code>
- with the newly-created instance.
+ with the newly created instance.
 <P>
 <DL>
 <DT><B>Parameters:</B><DD><CODE>jpegImage</CODE> - JPEG image buffer<DD><CODE>imageSize</CODE> - size of the JPEG image (in bytes)
@@ -584,7 +614,7 @@
                throws java.lang.Exception</PRE>
 <DL>
 <DD>Returns the level of chrominance subsampling used in the JPEG image
- associated with this decompressor instance.
+ associated with this decompressor instance.  See <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.SAMP_*</CODE></A>.
 <P>
 <DD><DL>
 
@@ -639,7 +669,7 @@
                           int&nbsp;desiredHeight)
                    throws java.lang.Exception</PRE>
 <DL>
-<DD>Returns the width of the largest scaled down image that the TurboJPEG
+<DD>Returns the width of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height.
 <P>
@@ -651,7 +681,7 @@
  Setting this to 0 is the same as setting it to the height of the JPEG
  image (in other words, the height will not be considered when determining
  the scaled image size.)
-<DT><B>Returns:</B><DD>the width of the largest scaled down image that the TurboJPEG
+<DT><B>Returns:</B><DD>the width of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height
 <DT><B>Throws:</B>
@@ -667,7 +697,7 @@
                            int&nbsp;desiredHeight)
                     throws java.lang.Exception</PRE>
 <DL>
-<DD>Returns the height of the largest scaled down image that the TurboJPEG
+<DD>Returns the height of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height.
 <P>
@@ -679,7 +709,7 @@
  Setting this to 0 is the same as setting it to the height of the JPEG
  image (in other words, the height will not be considered when determining
  the scaled image size.)
-<DT><B>Returns:</B><DD>the height of the largest scaled down image that the TurboJPEG
+<DT><B>Returns:</B><DD>the height of the largest scaled-down image that the TurboJPEG
  decompressor can generate without exceeding the desired image width and
  height
 <DT><B>Throws:</B>
@@ -688,10 +718,12 @@
 </DL>
 <HR>
 
-<A NAME="decompress(byte[], int, int, int, int, int)"><!-- --></A><H3>
+<A NAME="decompress(byte[], int, int, int, int, int, int, int)"><!-- --></A><H3>
 decompress</H3>
 <PRE>
 public void <B>decompress</B>(byte[]&nbsp;dstBuf,
+                       int&nbsp;x,
+                       int&nbsp;y,
                        int&nbsp;desiredWidth,
                        int&nbsp;pitch,
                        int&nbsp;desiredHeight,
@@ -707,28 +739,59 @@
  buffer should normally be <code>pitch * scaledHeight</code> bytes in size,
  where <code>scaledHeight</code> can be determined by calling <code>
  scalingFactor.<A HREF="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><CODE>getScaled</CODE></A>(jpegHeight)
- </code> with one of the scaling factors returned from <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()"><CODE>TJ.getScalingFactors()</CODE></A> or by calling <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)"><CODE>getScaledHeight(int, int)</CODE></A>.<DD><CODE>desiredWidth</CODE> - desired width (in pixels) of the decompressed image.
- If the desired image dimensions are smaller than the dimensions of the
- JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG
- decompressor to generate the largest possible image that will fit within
- the desired dimensions.  Setting this to 0 is the same as setting it to
- the width of the JPEG image (in other words, the width will not be
- considered when determining the scaled image size.)<DD><CODE>pitch</CODE> - bytes per line of the destination image.  Normally, this
+ </code> with one of the scaling factors returned from <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()"><CODE>TJ.getScalingFactors()</CODE></A> or by calling <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)"><CODE>getScaledHeight(int, int)</CODE></A>.  However,
+ the buffer may also be larger than the dimensions of the JPEG image, in
+ which case the <code>x</code>, <code>y</code>, and <code>pitch</code>
+ parameters can be used to specify the region into which the JPEG image
+ should be decompressed.<DD><CODE>x</CODE> - x offset (in pixels) of the region into which the JPEG image
+ should be decompressed, relative to the start of <code>dstBuf</code>.<DD><CODE>y</CODE> - y offset (in pixels) of the region into which the JPEG image
+ should be decompressed, relative to the start of <code>dstBuf</code>.<DD><CODE>desiredWidth</CODE> - desired width (in pixels) of the decompressed image
+ (or image region.)  If the desired image dimensions are different than the
+ dimensions of the JPEG image being decompressed, then TurboJPEG will use
+ scaling in the JPEG decompressor to generate the largest possible image
+ that will fit within the desired dimensions.  Setting this to 0 is the
+ same as setting it to the width of the JPEG image (in other words, the
+ width will not be considered when determining the scaled image size.)<DD><CODE>pitch</CODE> - bytes per line of the destination image.  Normally, this
  should be set to <code>scaledWidth * TJ.pixelSize(pixelFormat)</code> if
  the decompressed image is unpadded, but you can use this to, for instance,
- pad each line of the decompressed image to a 4-byte boundary.  NOTE:
+ pad each line of the decompressed image to a 4-byte boundary or to
+ decompress the JPEG image into a region of a larger image.  NOTE:
  <code>scaledWidth</code> can be determined by calling <code>
  scalingFactor.<A HREF="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><CODE>getScaled</CODE></A>(jpegWidth)
  </code> or by calling <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)"><CODE>getScaledWidth(int, int)</CODE></A>.  Setting this parameter to
  0 is the equivalent of setting it to <code>scaledWidth *
- TJ.pixelSize(pixelFormat)</code>.<DD><CODE>desiredHeight</CODE> - desired height (in pixels) of the decompressed image.
- If the desired image dimensions are smaller than the dimensions of the
- JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG
- decompressor to generate the largest possible image that will fit within
- the desired dimensions.  Setting this to 0 is the same as setting it to
- the height of the JPEG image (in other words, the height will not be
- considered when determining the scaled image size.)<DD><CODE>pixelFormat</CODE> - pixel format of the decompressed image (one of
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.PF_*</CODE></A>)<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ TJ.pixelSize(pixelFormat)</code>.<DD><CODE>desiredHeight</CODE> - desired height (in pixels) of the decompressed image
+ (or image region.)  If the desired image dimensions are different than the
+ dimensions of the JPEG image being decompressed, then TurboJPEG will use
+ scaling in the JPEG decompressor to generate the largest possible image
+ that will fit within the desired dimensions.  Setting this to 0 is the
+ same as setting it to the height of the JPEG image (in other words, the
+ height will not be considered when determining the scaled image size.)<DD><CODE>pixelFormat</CODE> - pixel format of the decompressed/decoded image (one of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><CODE>TJ.PF_*</CODE></A>)<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
+<A NAME="decompress(byte[], int, int, int, int, int)"><!-- --></A><H3>
+decompress</H3>
+<PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public void <B>decompress</B>(byte[]&nbsp;dstBuf,
+                                  int&nbsp;desiredWidth,
+                                  int&nbsp;pitch,
+                                  int&nbsp;desiredHeight,
+                                  int&nbsp;pixelFormat,
+                                  int&nbsp;flags)
+                throws java.lang.Exception</PRE>
+<DL>
+<DD><B>Deprecated.</B>&nbsp;<I>Use
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int, int, int)</CODE></A> instead.</I>
+<P>
+<DD><DL>
+
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -750,10 +813,14 @@
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>desiredWidth</CODE> - see
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int)</CODE></A> for description<DD><CODE>pitch</CODE> - see
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int)</CODE></A> for description<DD><CODE>desiredHeight</CODE> - see
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int)</CODE></A> for description<DD><CODE>pixelFormat</CODE> - pixel format of the decompressed image (one of
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.PF_*</CODE></A>)<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int, int, int)</CODE></A>
+ for description<DD><CODE>pitch</CODE> - see
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int, int, int)</CODE></A>
+ for description<DD><CODE>desiredHeight</CODE> - see
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int, int, int)</CODE></A>
+ for description<DD><CODE>pixelFormat</CODE> - pixel format of the decompressed image (one of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><CODE>TJ.PF_*</CODE></A>)<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>a buffer containing the decompressed image
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
@@ -772,16 +839,18 @@
  instance and output a YUV planar image to the given destination buffer.
  This method performs JPEG decompression but leaves out the color
  conversion step, so a planar YUV image is generated instead of an RGB
- image.  The padding of the planes in this image is the same as the images
- generated by <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)"><CODE>TJCompressor.encodeYUV(byte[], int)</CODE></A>.  Note that, if
- the width or height of the image is not an even multiple of the MCU block
- size (see <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUWidth(int)"><CODE>TJ.getMCUWidth(int)</CODE></A> and <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUHeight(int)"><CODE>TJ.getMCUHeight(int)</CODE></A>), then an
- intermediate buffer copy will be performed within TurboJPEG.
+ image.  The padding of the planes in this image is the same as in the
+ images generated by <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)"><CODE>TJCompressor.encodeYUV(byte[], int)</CODE></A>.
+ <p>
+ NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+ convention of the digital video community, the TurboJPEG API uses "YUV" to
+ refer to an image format consisting of Y, Cb, and Cr image planes.
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>dstBuf</CODE> - buffer that will receive the YUV planar image.  Use
  <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int)</CODE></A> to determine the appropriate size for this buffer
- based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -798,7 +867,8 @@
  instance and return a buffer containing a YUV planar image.  See <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)"><CODE>decompressToYUV(byte[], int)</CODE></A> for more detail.
 <P>
 <DD><DL>
-<DT><B>Parameters:</B><DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+<DT><B>Parameters:</B><DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>a buffer containing a YUV planar image
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
@@ -806,6 +876,60 @@
 </DL>
 <HR>
 
+<A NAME="decompress(int[], int, int, int, int, int, int, int)"><!-- --></A><H3>
+decompress</H3>
+<PRE>
+public void <B>decompress</B>(int[]&nbsp;dstBuf,
+                       int&nbsp;x,
+                       int&nbsp;y,
+                       int&nbsp;desiredWidth,
+                       int&nbsp;stride,
+                       int&nbsp;desiredHeight,
+                       int&nbsp;pixelFormat,
+                       int&nbsp;flags)
+                throws java.lang.Exception</PRE>
+<DL>
+<DD>Decompress the JPEG source image associated with this decompressor
+ instance and output a decompressed image to the given destination buffer.
+<P>
+<DD><DL>
+<DT><B>Parameters:</B><DD><CODE>dstBuf</CODE> - buffer that will receive the decompressed image.  This
+ buffer should normally be <code>stride * scaledHeight</code> pixels in
+ size, where <code>scaledHeight</code> can be determined by calling <code>
+ scalingFactor.<A HREF="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><CODE>getScaled</CODE></A>(jpegHeight)
+ </code> with one of the scaling factors returned from <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()"><CODE>TJ.getScalingFactors()</CODE></A> or by calling <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)"><CODE>getScaledHeight(int, int)</CODE></A>.  However,
+ the buffer may also be larger than the dimensions of the JPEG image, in
+ which case the <code>x</code>, <code>y</code>, and <code>stride</code>
+ parameters can be used to specify the region into which the JPEG image
+ should be decompressed.<DD><CODE>x</CODE> - x offset (in pixels) of the region into which the JPEG image
+ should be decompressed, relative to the start of <code>dstBuf</code>.<DD><CODE>y</CODE> - y offset (in pixels) of the region into which the JPEG image
+ should be decompressed, relative to the start of <code>dstBuf</code>.<DD><CODE>desiredWidth</CODE> - desired width (in pixels) of the decompressed image
+ (or image region.)  If the desired image dimensions are different than the
+ dimensions of the JPEG image being decompressed, then TurboJPEG will use
+ scaling in the JPEG decompressor to generate the largest possible image
+ that will fit within the desired dimensions.  Setting this to 0 is the
+ same as setting it to the width of the JPEG image (in other words, the
+ width will not be considered when determining the scaled image size.)<DD><CODE>stride</CODE> - pixels per line of the destination image.  Normally, this
+ should be set to <code>scaledWidth</code>, but you can use this to, for
+ instance, decompress the JPEG image into a region of a larger image.
+ NOTE: <code>scaledWidth</code> can be determined by calling <code>
+ scalingFactor.<A HREF="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><CODE>getScaled</CODE></A>(jpegWidth)
+ </code> or by calling <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)"><CODE>getScaledWidth(int, int)</CODE></A>.  Setting this parameter to
+ 0 is the equivalent of setting it to <code>scaledWidth</code>.<DD><CODE>desiredHeight</CODE> - desired height (in pixels) of the decompressed image
+ (or image region.)  If the desired image dimensions are different than the
+ dimensions of the JPEG image being decompressed, then TurboJPEG will use
+ scaling in the JPEG decompressor to generate the largest possible image
+ that will fit within the desired dimensions.  Setting this to 0 is the
+ same as setting it to the height of the JPEG image (in other words, the
+ height will not be considered when determining the scaled image size.)<DD><CODE>pixelFormat</CODE> - pixel format of the decompressed image (one of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><CODE>TJ.PF_*</CODE></A>)<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
 <A NAME="decompress(java.awt.image.BufferedImage, int)"><!-- --></A><H3>
 decompress</H3>
 <PRE>
@@ -819,7 +943,10 @@
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>dstImage</CODE> - a <code>BufferedImage</code> instance that will receive
- the decompressed image<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ the decompressed image.  The width and height of the
+ <code>BufferedImage</code> instance must match one of the scaled image
+ sizes that TurboJPEG is capable of generating from the JPEG image.<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -841,10 +968,13 @@
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>desiredWidth</CODE> - see
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int)</CODE></A> for description<DD><CODE>desiredHeight</CODE> - see
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int)</CODE></A> for description<DD><CODE>bufferedImageType</CODE> - the image type of the newly-created
- <code>BufferedImage</code> instance (for instance,
- <code>BufferedImage.TYPE_INT_RGB</code>)<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int, int, int)</CODE></A> for
+ description<DD><CODE>desiredHeight</CODE> - see
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>decompress(byte[], int, int, int, int, int, int, int)</CODE></A> for
+ description<DD><CODE>bufferedImageType</CODE> - the image type of the <code>BufferedImage</code>
+ instance that will be created (for instance,
+ <code>BufferedImage.TYPE_INT_RGB</code>)<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>a <code>BufferedImage</code> instance containing the
  decompressed image
 <DT><B>Throws:</B>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html b/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html
index 4964a29..af29fc5 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 TJScalingFactor
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html
index ebb9644..3a68c9f 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 TJTransform
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
@@ -676,8 +674,10 @@
 <DT><B>Parameters:</B><DD><CODE>x</CODE> - the left boundary of the cropping region.  This must be evenly
  divisible by the MCU block width (see <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUWidth(int)"><CODE>TJ.getMCUWidth(int)</CODE></A>)<DD><CODE>y</CODE> - the upper boundary of the cropping region.  This must be evenly
  divisible by the MCU block height (see <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUHeight(int)"><CODE>TJ.getMCUHeight(int)</CODE></A>)<DD><CODE>w</CODE> - the width of the cropping region.  Setting this to 0 is the
- equivalent of setting it to the width of the source JPEG image - x.<DD><CODE>h</CODE> - the height of the cropping region.  Setting this to 0 is the
- equivalent of setting it to the height of the source JPEG image - y.<DD><CODE>op</CODE> - one of the transform operations (<code>OP_*</code>)<DD><CODE>options</CODE> - the bitwise OR of one or more of the transform options
+ equivalent of setting it to (width of the source JPEG image -
+ <code>x</code>).<DD><CODE>h</CODE> - the height of the cropping region.  Setting this to 0 is the
+ equivalent of setting it to (height of the source JPEG image -
+ <code>y</code>).<DD><CODE>op</CODE> - one of the transform operations (<code>OP_*</code>)<DD><CODE>options</CODE> - the bitwise OR of one or more of the transform options
  (<code>OPT_*</code>)<DD><CODE>cf</CODE> - an instance of an object that implements the <A HREF="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><CODE>TJCustomFilter</CODE></A> interface, or null if no custom filter is needed
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
index df81c9c..c08f358 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 TJTransformer
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
@@ -145,7 +143,7 @@
 
 <BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Create a TurboJPEG lossless transformer instance and associate the JPEG
- image stored in <code>jpegImage</code> with the newly-created instance.</TD>
+ image stored in <code>jpegImage</code> with the newly created instance.</TD>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
 <TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer(byte[], int)">TJTransformer</A></B>(byte[]&nbsp;jpegImage,
@@ -154,7 +152,7 @@
 <BR>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Create a TurboJPEG lossless transformer instance and associate the JPEG
  image of length <code>imageSize</code> bytes stored in
- <code>jpegImage</code> with the newly-created instance.</TD>
+ <code>jpegImage</code> with the newly created instance.</TD>
 </TR>
 </TABLE>
 &nbsp;
@@ -172,8 +170,8 @@
 <TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#getTransformedSizes()">getTransformedSizes</A></B>()</CODE>
 
 <BR>
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns an array containing the sizes of the transformed JPEG images from
- the most recent call to <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><CODE>transform()</CODE></A>.</TD>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Returns an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation.</TD>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
 <TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -205,7 +203,7 @@
 <TH ALIGN="left"><B>Methods inherited from class org.libjpegturbo.turbojpeg.<A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A></B></TH>
 </TR>
 <TR BGCOLOR="white" CLASS="TableRowColor">
-<TD><CODE><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#close()">close</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#finalize()">finalize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">getJPEGBuf</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">getJPEGSize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)">getScaledHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)">getScaledWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSubsamp()">getSubsamp</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()">getWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage</A></CODE></TD>
+<TD><CODE><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#close()">close</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#finalize()">finalize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">getJPEGBuf</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">getJPEGSize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)">getScaledHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)">getScaledWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSubsamp()">getSubsamp</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()">getWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage</A></CODE></TD>
 </TR>
 </TABLE>
 &nbsp;<A NAME="methods_inherited_from_class_java.lang.Object"><!-- --></A>
@@ -252,7 +250,7 @@
               throws java.lang.Exception</PRE>
 <DL>
 <DD>Create a TurboJPEG lossless transformer instance and associate the JPEG
- image stored in <code>jpegImage</code> with the newly-created instance.
+ image stored in <code>jpegImage</code> with the newly created instance.
 <P>
 <DL>
 <DT><B>Parameters:</B><DD><CODE>jpegImage</CODE> - JPEG image buffer (size of the JPEG image is assumed to
@@ -271,7 +269,7 @@
 <DL>
 <DD>Create a TurboJPEG lossless transformer instance and associate the JPEG
  image of length <code>imageSize</code> bytes stored in
- <code>jpegImage</code> with the newly-created instance.
+ <code>jpegImage</code> with the newly created instance.
 <P>
 <DL>
 <DT><B>Parameters:</B><DD><CODE>jpegImage</CODE> - JPEG image buffer<DD><CODE>imageSize</CODE> - size of the JPEG image (in bytes)
@@ -303,20 +301,22 @@
  JPEG image structure to another without altering the values of the
  coefficients.  While this is typically faster than decompressing the
  image, transforming it, and re-compressing it, lossless transforms are not
- free.  Each lossless transform requires reading and Huffman decoding all
- of the coefficients in the source image, regardless of the size of the
- destination image.  Thus, this method provides a means of generating
- multiple transformed images from the same source or of applying multiple
- transformations simultaneously, in order to eliminate the need to read the
- source coefficients multiple times.
+ free.  Each lossless transform requires reading and performing Huffman
+ decoding on all of the coefficients in the source image, regardless of the
+ size of the destination image.  Thus, this method provides a means of
+ generating multiple transformed images from the same source or of applying
+ multiple transformations simultaneously, in order to eliminate the need to
+ read the source coefficients multiple times.
 <P>
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>dstBufs</CODE> - an array of image buffers.  <code>dstbufs[i]</code> will
  receive a JPEG image that has been transformed using the parameters in
  <code>transforms[i]</code>.  Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)"><CODE>TJ.bufSize(int, int, int)</CODE></A> to determine the
- maximum size for each buffer based on the cropped width and height.<DD><CODE>transforms</CODE> - an array of <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJTransform</CODE></A> instances, each of
+ maximum size for each buffer based on the transformed or cropped width and
+ height and the level of subsampling used in the source image.<DD><CODE>transforms</CODE> - an array of <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJTransform</CODE></A> instances, each of
  which specifies the transform parameters and/or cropping region for the
- corresponding transformed output image<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ corresponding transformed output image<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
@@ -337,7 +337,8 @@
 <DD><DL>
 <DT><B>Parameters:</B><DD><CODE>transforms</CODE> - an array of <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJTransform</CODE></A> instances, each of
  which specifies the transform parameters and/or cropping region for the
- corresponding transformed output image<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ corresponding transformed output image<DD><CODE>flags</CODE> - the bitwise OR of one or more of
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><CODE>TJ.FLAG_*</CODE></A>
 <DT><B>Returns:</B><DD>an array of <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJDecompressor</CODE></A> instances, each of
  which has a transformed JPEG image associated with it
 <DT><B>Throws:</B>
@@ -352,13 +353,13 @@
 public int[] <B>getTransformedSizes</B>()
                           throws java.lang.Exception</PRE>
 <DL>
-<DD>Returns an array containing the sizes of the transformed JPEG images from
- the most recent call to <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><CODE>transform()</CODE></A>.
+<DD>Returns an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation.
 <P>
 <DD><DL>
 
-<DT><B>Returns:</B><DD>an array containing the sizes of the transformed JPEG images from
- the most recent call to <A HREF="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><CODE>transform()</CODE></A>
+<DT><B>Returns:</B><DD>an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation
 <DT><B>Throws:</B>
 <DD><CODE>java.lang.Exception</CODE></DL>
 </DD>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-frame.html b/java/doc/org/libjpegturbo/turbojpeg/package-frame.html
index 562e570..f160418 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/package-frame.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/package-frame.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 org.libjpegturbo.turbojpeg
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-summary.html b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html
index 600546c..505512c 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/package-summary.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 org.libjpegturbo.turbojpeg
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-tree.html b/java/doc/org/libjpegturbo/turbojpeg/package-tree.html
index e33355b..e13143d 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/package-tree.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/package-tree.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:13 CDT 2012 -->
 <TITLE>
 org.libjpegturbo.turbojpeg Class Hierarchy
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/overview-tree.html b/java/doc/overview-tree.html
index fc5467a..1c12b10 100644
--- a/java/doc/overview-tree.html
+++ b/java/doc/overview-tree.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 Class Hierarchy
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
 
diff --git a/java/doc/serialized-form.html b/java/doc/serialized-form.html
index 8c7e089..8ba0661 100644
--- a/java/doc/serialized-form.html
+++ b/java/doc/serialized-form.html
@@ -2,12 +2,10 @@
 <!--NewPage-->
 <HTML>
 <HEAD>
-<!-- Generated by javadoc (build 1.6.0_33) on Fri Jun 29 14:29:14 CDT 2012 -->
 <TITLE>
 Serialized Form
 </TITLE>
 
-<META NAME="date" CONTENT="2012-06-29">
 
 <LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
 
diff --git a/java/org/libjpegturbo/turbojpeg/TJ.java b/java/org/libjpegturbo/turbojpeg/TJ.java
index 78a72f6..6c6a95d 100644
--- a/java/org/libjpegturbo/turbojpeg/TJ.java
+++ b/java/org/libjpegturbo/turbojpeg/TJ.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011-2012 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011-2013 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -31,38 +31,39 @@
 /**
  * TurboJPEG utility class (cannot be instantiated)
  */
-final public class TJ {
+public final class TJ {
 
 
   /**
    * The number of chrominance subsampling options
    */
-  final public static int NUMSAMP   = 5;
+  public static final int NUMSAMP   = 5;
   /**
    * 4:4:4 chrominance subsampling (no chrominance subsampling).  The JPEG
    * or YUV image will contain one chrominance component for every pixel in the
    * source image.
    */
-  final public static int SAMP_444  = 0;
+  public static final int SAMP_444  = 0;
   /**
    * 4:2:2 chrominance subsampling.  The JPEG or YUV image will contain one
    * chrominance component for every 2x1 block of pixels in the source image.
    */
-  final public static int SAMP_422  = 1;
+  public static final int SAMP_422  = 1;
   /**
    * 4:2:0 chrominance subsampling.  The JPEG or YUV image will contain one
    * chrominance component for every 2x2 block of pixels in the source image.
    */
-  final public static int SAMP_420  = 2;
+  public static final int SAMP_420  = 2;
   /**
    * Grayscale.  The JPEG or YUV image will contain no chrominance components.
    */
-  final public static int SAMP_GRAY = 3;
+  public static final int SAMP_GRAY = 3;
   /**
    * 4:4:0 chrominance subsampling.  The JPEG or YUV image will contain one
    * chrominance component for every 1x2 block of pixels in the source image.
+   * Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
    */
-  final public static int SAMP_440  = 4;
+  public static final int SAMP_440  = 4;
 
 
   /**
@@ -75,12 +76,12 @@
    * @return the MCU block width for the given level of chrominance subsampling
    */
   public static int getMCUWidth(int subsamp) throws Exception {
-    if(subsamp < 0 || subsamp >= NUMSAMP)
+    if (subsamp < 0 || subsamp >= NUMSAMP)
       throw new Exception("Invalid subsampling type");
     return mcuWidth[subsamp];
   }
 
-  final private static int mcuWidth[] = {
+  private static final int[] mcuWidth = {
     8, 16, 16, 8, 8
   };
 
@@ -96,12 +97,12 @@
    * subsampling
    */
   public static int getMCUHeight(int subsamp) throws Exception {
-    if(subsamp < 0 || subsamp >= NUMSAMP)
+    if (subsamp < 0 || subsamp >= NUMSAMP)
       throw new Exception("Invalid subsampling type");
     return mcuHeight[subsamp];
   }
 
-  final private static int mcuHeight[] = {
+  private static final int[] mcuHeight = {
     8, 8, 16, 8, 16
   };
 
@@ -109,92 +110,92 @@
   /**
    * The number of pixel formats
    */
-  final public static int NUMPF   = 11;
+  public static final int NUMPF   = 11;
   /**
    * RGB pixel format.  The red, green, and blue components in the image are
    * stored in 3-byte pixels in the order R, G, B from lowest to highest byte
    * address within each pixel.
    */
-  final public static int PF_RGB  = 0;
+  public static final int PF_RGB  = 0;
   /**
    * BGR pixel format.  The red, green, and blue components in the image are
    * stored in 3-byte pixels in the order B, G, R from lowest to highest byte
    * address within each pixel.
    */
-  final public static int PF_BGR  = 1;
+  public static final int PF_BGR  = 1;
   /**
    * RGBX pixel format.  The red, green, and blue components in the image are
    * stored in 4-byte pixels in the order R, G, B from lowest to highest byte
    * address within each pixel.  The X component is ignored when compressing
    * and undefined when decompressing.
    */
-  final public static int PF_RGBX = 2;
+  public static final int PF_RGBX = 2;
   /**
    * BGRX pixel format.  The red, green, and blue components in the image are
    * stored in 4-byte pixels in the order B, G, R from lowest to highest byte
    * address within each pixel.  The X component is ignored when compressing
    * and undefined when decompressing.
    */
-  final public static int PF_BGRX = 3;
+  public static final int PF_BGRX = 3;
   /**
    * XBGR pixel format.  The red, green, and blue components in the image are
    * stored in 4-byte pixels in the order R, G, B from highest to lowest byte
    * address within each pixel.  The X component is ignored when compressing
    * and undefined when decompressing.
    */
-  final public static int PF_XBGR = 4;
+  public static final int PF_XBGR = 4;
   /**
    * XRGB pixel format.  The red, green, and blue components in the image are
    * stored in 4-byte pixels in the order B, G, R from highest to lowest byte
    * address within each pixel.  The X component is ignored when compressing
    * and undefined when decompressing.
    */
-  final public static int PF_XRGB = 5;
+  public static final int PF_XRGB = 5;
   /**
    * Grayscale pixel format.  Each 1-byte pixel represents a luminance
    * (brightness) level from 0 to 255.
    */
-  final public static int PF_GRAY = 6;
+  public static final int PF_GRAY = 6;
   /**
    * RGBA pixel format.  This is the same as {@link #PF_RGBX}, except that when
    * decompressing, the X byte is guaranteed to be 0xFF, which can be
    * interpreted as an opaque alpha channel.
    */
-  final public static int PF_RGBA = 7;
+  public static final int PF_RGBA = 7;
   /**
    * BGRA pixel format.  This is the same as {@link #PF_BGRX}, except that when
    * decompressing, the X byte is guaranteed to be 0xFF, which can be
    * interpreted as an opaque alpha channel.
    */
-  final public static int PF_BGRA = 8;
+  public static final int PF_BGRA = 8;
   /**
    * ABGR pixel format.  This is the same as {@link #PF_XBGR}, except that when
    * decompressing, the X byte is guaranteed to be 0xFF, which can be
    * interpreted as an opaque alpha channel.
    */
-  final public static int PF_ABGR = 9;
+  public static final int PF_ABGR = 9;
   /**
    * ARGB pixel format.  This is the same as {@link #PF_XRGB}, except that when
    * decompressing, the X byte is guaranteed to be 0xFF, which can be
    * interpreted as an opaque alpha channel.
    */
-  final public static int PF_ARGB = 10;
+  public static final int PF_ARGB = 10;
 
 
   /**
-   * Returns the pixel size (in bytes) of the given pixel format.
+   * Returns the pixel size (in bytes) for the given pixel format.
    *
    * @param pixelFormat the pixel format (one of <code>PF_*</code>)
    *
-   * @return the pixel size (in bytes) of the given pixel format
+   * @return the pixel size (in bytes) for the given pixel format
    */
   public static int getPixelSize(int pixelFormat) throws Exception {
-    if(pixelFormat < 0 || pixelFormat >= NUMPF)
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
       throw new Exception("Invalid pixel format");
     return pixelSize[pixelFormat];
   }
 
-  final private static int pixelSize[] = {
+  private static final int[] pixelSize = {
     3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4
   };
 
@@ -211,12 +212,12 @@
    * @return the red offset for the given pixel format
    */
   public static int getRedOffset(int pixelFormat) throws Exception {
-    if(pixelFormat < 0 || pixelFormat >= NUMPF)
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
       throw new Exception("Invalid pixel format");
     return redOffset[pixelFormat];
   }
 
-  final private static int redOffset[] = {
+  private static final int[] redOffset = {
     0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1
   };
 
@@ -233,12 +234,12 @@
    * @return the green offset for the given pixel format
    */
   public static int getGreenOffset(int pixelFormat) throws Exception {
-    if(pixelFormat < 0 || pixelFormat >= NUMPF)
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
       throw new Exception("Invalid pixel format");
     return greenOffset[pixelFormat];
   }
 
-  final private static int greenOffset[] = {
+  private static final int[] greenOffset = {
     1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2
   };
 
@@ -255,12 +256,12 @@
    * @return the blue offset for the given pixel format
    */
   public static int getBlueOffset(int pixelFormat) throws Exception {
-    if(pixelFormat < 0 || pixelFormat >= NUMPF)
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
       throw new Exception("Invalid pixel format");
     return blueOffset[pixelFormat];
   }
 
-  final private static int blueOffset[] = {
+  private static final int[] blueOffset = {
     2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3
   };
 
@@ -269,60 +270,59 @@
    * The uncompressed source/destination image is stored in bottom-up (Windows,
    * OpenGL) order, not top-down (X11) order.
    */
-  final public static int FLAG_BOTTOMUP     = 2;
+  public static final int FLAG_BOTTOMUP     = 2;
   /**
    * Turn off CPU auto-detection and force TurboJPEG to use MMX code
    * (if the underlying codec supports it.)
    */
-  final public static int FLAG_FORCEMMX     = 8;
+  public static final int FLAG_FORCEMMX     = 8;
   /**
    * Turn off CPU auto-detection and force TurboJPEG to use SSE code
    * (if the underlying codec supports it.)
    */
-  final public static int FLAG_FORCESSE     = 16;
+  public static final int FLAG_FORCESSE     = 16;
   /**
    * Turn off CPU auto-detection and force TurboJPEG to use SSE2 code
    * (if the underlying codec supports it.)
    */
-  final public static int FLAG_FORCESSE2    = 32;
+  public static final int FLAG_FORCESSE2    = 32;
   /**
    * Turn off CPU auto-detection and force TurboJPEG to use SSE3 code
    * (if the underlying codec supports it.)
    */
-  final public static int FLAG_FORCESSE3    = 128;
+  public static final int FLAG_FORCESSE3    = 128;
   /**
-   * When decompressing, use the fastest chrominance upsampling algorithm
-   * available in the underlying codec.  The default is to use smooth
-   * upsampling, which creates a smooth transition between neighboring
-   * chrominance components in order to reduce upsampling artifacts in the
-   * decompressed image.
+   * When decompressing an image that was compressed using chrominance
+   * subsampling, use the fastest chrominance upsampling algorithm available in
+   * the underlying codec.  The default is to use smooth upsampling, which
+   * creates a smooth transition between neighboring chrominance components in
+   * order to reduce upsampling artifacts in the decompressed image.
    */
-  final public static int FLAG_FASTUPSAMPLE = 256;
+  public static final int FLAG_FASTUPSAMPLE = 256;
   /**
    * Use the fastest DCT/IDCT algorithm available in the underlying codec.  The
-   * default if this flag is not specified is implementation-specific.  The
-   * libjpeg implementation, for example, uses the fast algorithm by default
-   * when compressing, because this has been shown to have only a very slight
-   * effect on accuracy, but it uses the accurate algorithm when decompressing,
-   * because this has been shown to have a larger effect.
+   * default if this flag is not specified is implementation-specific.  For
+   * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+   * algorithm by default when compressing, because this has been shown to have
+   * only a very slight effect on accuracy, but it uses the accurate algorithm
+   * when decompressing, because this has been shown to have a larger effect.
    */
-  final public static int FLAG_FASTDCT      =  2048;
+  public static final int FLAG_FASTDCT      =  2048;
   /**
    * Use the most accurate DCT/IDCT algorithm available in the underlying
    * codec.  The default if this flag is not specified is
-   * implementation-specific.  The libjpeg implementation, for example, uses
-   * the fast algorithm by default when compressing, because this has been
-   * shown to have only a very slight effect on accuracy, but it uses the
-   * accurate algorithm when decompressing, because this has been shown to have
-   * a larger effect.
+   * implementation-specific.  For example, the implementation of TurboJPEG for
+   * libjpeg[-turbo] uses the fast algorithm by default when compressing,
+   * because this has been shown to have only a very slight effect on accuracy,
+   * but it uses the accurate algorithm when decompressing, because this has
+   * been shown to have a larger effect.
    */
-  final public static int FLAG_ACCURATEDCT  =  4096;
+  public static final int FLAG_ACCURATEDCT  =  4096;
 
 
   /**
    * Returns the maximum size of the buffer (in bytes) required to hold a JPEG
-   * image with the given width and height, and level of chrominance
-   * subsampling.
+   * image with the given width, height, and level of chrominance subsampling.
    *
    * @param width the width (in pixels) of the JPEG image
    *
@@ -332,10 +332,9 @@
    * generating the JPEG image (one of {@link TJ TJ.SAMP_*})
    *
    * @return the maximum size of the buffer (in bytes) required to hold a JPEG
-   * image with the given width and height, and level of chrominance
-   * subsampling
+   * image with the given width, height, and level of chrominance subsampling
    */
-  public native static int bufSize(int width, int height, int jpegSubsamp)
+  public static native int bufSize(int width, int height, int jpegSubsamp)
     throws Exception;
 
   /**
@@ -352,8 +351,7 @@
    * @return the size of the buffer (in bytes) required to hold a YUV planar
    * image with the given width, height, and level of chrominance subsampling
    */
-  public native static int bufSizeYUV(int width, int height,
-    int subsamp)
+  public static native int bufSizeYUV(int width, int height, int subsamp)
     throws Exception;
 
   /**
@@ -363,7 +361,7 @@
    * @return a list of fractional scaling factors that the JPEG decompressor in
    * this implementation of TurboJPEG supports
    */
-  public native static TJScalingFactor[] getScalingFactors()
+  public static native TJScalingFactor[] getScalingFactors()
     throws Exception;
 
   static {
diff --git a/java/org/libjpegturbo/turbojpeg/TJCompressor.java b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
index b3c9b95..29c8b2a 100644
--- a/java/org/libjpegturbo/turbojpeg/TJCompressor.java
+++ b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011-2014 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -36,7 +36,7 @@
  */
 public class TJCompressor {
 
-  private final static String NO_ASSOC_ERROR =
+  private static final String NO_ASSOC_ERROR =
     "No source image is associated with this instance";
 
   /**
@@ -48,21 +48,36 @@
 
   /**
    * Create a TurboJPEG compressor instance and associate the uncompressed
-   * source image stored in <code>srcImage</code> with the newly-created
+   * source image stored in <code>srcImage</code> with the newly created
    * instance.
    *
    * @param srcImage see {@link #setSourceImage} for description
    *
+   * @param x see {@link #setSourceImage} for description
+   *
+   * @param y see {@link #setSourceImage} for description
+   *
    * @param width see {@link #setSourceImage} for description
    *
    * @param pitch see {@link #setSourceImage} for description
    *
    * @param height see {@link #setSourceImage} for description
    *
-   * @param pixelFormat see {@link #setSourceImage} for description
+   * @param pixelFormat pixel format of the source image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
    */
+  public TJCompressor(byte[] srcImage, int x, int y, int width, int pitch,
+                      int height, int pixelFormat) throws Exception {
+    setSourceImage(srcImage, x, y, width, pitch, height, pixelFormat);
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #TJCompressor(byte[], int, int, int, int, int, int)} instead.
+   */
+  @Deprecated
   public TJCompressor(byte[] srcImage, int width, int pitch, int height,
-    int pixelFormat) throws Exception {
+                      int pixelFormat) throws Exception {
     setSourceImage(srcImage, width, pitch, height, pixelFormat);
   }
 
@@ -70,47 +85,73 @@
    * Associate an uncompressed source image with this compressor instance.
    *
    * @param srcImage image buffer containing RGB or grayscale pixels to be
-   * compressed
+   * compressed or encoded
    *
-   * @param width width (in pixels) of the source image
+   * @param x x offset (in pixels) of the region in the source image from which
+   * the JPEG or YUV image should be compressed/encoded
+   *
+   * @param y y offset (in pixels) of the region in the source image from which
+   * the JPEG or YUV image should be compressed/encoded
+   *
+   * @param width width (in pixels) of the region in the source image from
+   * which the JPEG or YUV image should be compressed/encoded
    *
    * @param pitch bytes per line of the source image.  Normally, this should be
    * <code>width * TJ.pixelSize(pixelFormat)</code> if the source image is
    * unpadded, but you can use this parameter to, for instance, specify that
-   * the scanlines in the source image are padded to 4-byte boundaries, as is
-   * the case for Windows bitmaps.  You can also be clever and use this
-   * parameter to skip lines, etc.  Setting this parameter to 0 is the
-   * equivalent of setting it to <code>width *
-   * TJ.pixelSize(pixelFormat)</code>.
+   * the scanlines in the source image are padded to a 4-byte boundary or to
+   * compress/encode a JPEG or YUV image from a region of a larger source
+   * image.  You can also be clever and use this parameter to skip lines, etc.
+   * Setting this parameter to 0 is the equivalent of setting it to
+   * <code>width * TJ.pixelSize(pixelFormat)</code>.
    *
-   * @param height height (in pixels) of the source image
+   * @param height height (in pixels) of the region in the source image from
+   * which the JPEG or YUV image should be compressed/encoded
    *
    * @param pixelFormat pixel format of the source image (one of
-   * {@link TJ TJ.PF_*})
+   * {@link TJ#PF_RGB TJ.PF_*})
    */
-  public void setSourceImage(byte[] srcImage, int width, int pitch,
-    int height, int pixelFormat) throws Exception {
-    if(handle == 0) init();
-    if(srcImage == null || width < 1 || height < 1 || pitch < 0
-      || pixelFormat < 0 || pixelFormat >= TJ.NUMPF)
+  public void setSourceImage(byte[] srcImage, int x, int y, int width,
+                             int pitch, int height, int pixelFormat)
+                             throws Exception {
+    if (handle == 0) init();
+    if (srcImage == null || x < 0 || y < 0 || width < 1 || height < 1 ||
+        pitch < 0 || pixelFormat < 0 || pixelFormat >= TJ.NUMPF)
       throw new Exception("Invalid argument in setSourceImage()");
     srcBuf = srcImage;
     srcWidth = width;
-    if(pitch == 0) srcPitch = width * TJ.getPixelSize(pixelFormat);
-    else srcPitch = pitch;
+    if (pitch == 0)
+      srcPitch = width * TJ.getPixelSize(pixelFormat);
+    else
+      srcPitch = pitch;
     srcHeight = height;
     srcPixelFormat = pixelFormat;
+    srcX = x;
+    srcY = y;
   }
 
   /**
+   * @deprecated Use
+   * {@link #setSourceImage(byte[], int, int, int, int, int, int)} instead.
+   */
+  @Deprecated
+  public void setSourceImage(byte[] srcImage, int width, int pitch,
+                             int height, int pixelFormat) throws Exception {
+    setSourceImage(srcImage, 0, 0, width, pitch, height, pixelFormat);
+    srcX = srcY = -1;
+  }
+
+
+  /**
    * Set the level of chrominance subsampling for subsequent compress/encode
    * operations.
    *
-   * @param newSubsamp the new level of chrominance subsampling (one of
-   * {@link TJ TJ.SAMP_*})
+   * @param newSubsamp the level of chrominance subsampling to use in
+   * subsequent compress/encode operations (one of
+   * {@link TJ#SAMP_444 TJ.SAMP_*})
    */
   public void setSubsamp(int newSubsamp) throws Exception {
-    if(newSubsamp < 0 || newSubsamp >= TJ.NUMSAMP)
+    if (newSubsamp < 0 || newSubsamp >= TJ.NUMSAMP)
       throw new Exception("Invalid argument in setSubsamp()");
     subsamp = newSubsamp;
   }
@@ -122,7 +163,7 @@
    * 100 = best)
    */
   public void setJPEGQuality(int quality) throws Exception {
-    if(quality < 1 || quality > 100)
+    if (quality < 1 || quality > 100)
       throw new Exception("Invalid argument in setJPEGQuality()");
     jpegQuality = quality;
   }
@@ -133,32 +174,44 @@
    *
    * @param dstBuf buffer that will receive the JPEG image.  Use
    * {@link TJ#bufSize} to determine the maximum size for this buffer based on
-   * the image width and height.
+   * the source image's width and height and the desired level of chrominance
+   * subsampling.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public void compress(byte[] dstBuf, int flags) throws Exception {
-    if(dstBuf == null || flags < 0)
+    if (dstBuf == null || flags < 0)
       throw new Exception("Invalid argument in compress()");
-    if(srcBuf == null) throw new Exception(NO_ASSOC_ERROR);
-    if(jpegQuality < 0) throw new Exception("JPEG Quality not set");
-    if(subsamp < 0) throw new Exception("Subsampling level not set");
-    compressedSize = compress(srcBuf, srcWidth, srcPitch,
-      srcHeight, srcPixelFormat, dstBuf, subsamp, jpegQuality, flags);
+    if (srcBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (jpegQuality < 0)
+      throw new Exception("JPEG Quality not set");
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
+    if (srcX >= 0 && srcY >= 0)
+      compressedSize = compress(srcBuf, srcX, srcY, srcWidth, srcPitch,
+                                srcHeight, srcPixelFormat, dstBuf, subsamp,
+                                jpegQuality, flags);
+    else
+      compressedSize = compress(srcBuf, srcWidth, srcPitch, srcHeight,
+                                srcPixelFormat, dstBuf, subsamp, jpegQuality,
+                                flags);
   }
 
   /**
    * Compress the uncompressed source image associated with this compressor
    * instance and return a buffer containing a JPEG image.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    *
    * @return a buffer containing a JPEG image.  The length of this buffer will
    * not be equal to the size of the JPEG image.  Use {@link
    * #getCompressedSize} to obtain the size of the JPEG image.
    */
   public byte[] compress(int flags) throws Exception {
-    if(srcWidth < 1 || srcHeight < 1)
+    if (srcWidth < 1 || srcHeight < 1)
       throw new Exception(NO_ASSOC_ERROR);
     byte[] buf = new byte[TJ.bufSize(srcWidth, srcHeight, subsamp)];
     compress(buf, flags);
@@ -174,18 +227,20 @@
    *
    * @param dstBuf buffer that will receive the JPEG image.  Use
    * {@link TJ#bufSize} to determine the maximum size for this buffer based on
-   * the image width and height.
+   * the image width, height, and level of chrominance subsampling.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public void compress(BufferedImage srcImage, byte[] dstBuf, int flags)
-    throws Exception {
-    if(srcImage == null || dstBuf == null || flags < 0)
+                       throws Exception {
+    if (srcImage == null || dstBuf == null || flags < 0)
       throw new Exception("Invalid argument in compress()");
     int width = srcImage.getWidth();
     int height = srcImage.getHeight();
-    int pixelFormat;  boolean intPixels = false;
-    if(byteOrder == null)
+    int pixelFormat;
+    boolean intPixels = false;
+    if (byteOrder == null)
       byteOrder = ByteOrder.nativeOrder();
     switch(srcImage.getType()) {
       case BufferedImage.TYPE_3BYTE_BGR:
@@ -196,7 +251,7 @@
       case BufferedImage.TYPE_BYTE_GRAY:
         pixelFormat = TJ.PF_GRAY;  break;
       case BufferedImage.TYPE_INT_BGR:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           pixelFormat = TJ.PF_XBGR;
         else
           pixelFormat = TJ.PF_RGBX;
@@ -204,7 +259,7 @@
       case BufferedImage.TYPE_INT_RGB:
       case BufferedImage.TYPE_INT_ARGB:
       case BufferedImage.TYPE_INT_ARGB_PRE:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           pixelFormat = TJ.PF_XRGB;
         else
           pixelFormat = TJ.PF_BGRX;
@@ -213,28 +268,39 @@
         throw new Exception("Unsupported BufferedImage format");
     }
     WritableRaster wr = srcImage.getRaster();
-    if(jpegQuality < 0) throw new Exception("JPEG Quality not set");
-    if(subsamp < 0) throw new Exception("Subsampling level not set");
-    if(intPixels) {
+    if (jpegQuality < 0)
+      throw new Exception("JPEG Quality not set");
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
+    if (intPixels) {
       SinglePixelPackedSampleModel sm =
         (SinglePixelPackedSampleModel)srcImage.getSampleModel();
-      int pitch = sm.getScanlineStride();
+      int stride = sm.getScanlineStride();
       DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
       int[] buf = db.getData();
-      compressedSize = compress(buf, width, pitch, height, pixelFormat, dstBuf,
-        subsamp, jpegQuality, flags);
-    }
-    else {
+      if (srcX >= 0 && srcY >= 0)
+        compressedSize = compress(buf, srcX, srcY, width, stride, height,
+                                  pixelFormat, dstBuf, subsamp, jpegQuality,
+                                  flags);
+      else
+        compressedSize = compress(buf, width, stride, height, pixelFormat,
+                                  dstBuf, subsamp, jpegQuality, flags);
+    } else {
       ComponentSampleModel sm =
         (ComponentSampleModel)srcImage.getSampleModel();
       int pixelSize = sm.getPixelStride();
-      if(pixelSize != TJ.getPixelSize(pixelFormat))
+      if (pixelSize != TJ.getPixelSize(pixelFormat))
         throw new Exception("Inconsistency between pixel format and pixel size in BufferedImage");
       int pitch = sm.getScanlineStride();
       DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
       byte[] buf = db.getData();
-      compressedSize = compress(buf, width, pitch, height, pixelFormat, dstBuf,
-        subsamp, jpegQuality, flags);
+      if (srcX >= 0 && srcY >= 0)
+        compressedSize = compress(buf, srcX, srcY, width, pitch, height,
+                                  pixelFormat, dstBuf, subsamp, jpegQuality,
+                                  flags);
+      else
+        compressedSize = compress(buf, width, pitch, height, pixelFormat,
+                                  dstBuf, subsamp, jpegQuality, flags);
     }
   }
 
@@ -245,7 +311,8 @@
    * @param srcImage a <code>BufferedImage</code> instance containing RGB or
    * grayscale pixels to be compressed
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    *
    * @return a buffer containing a JPEG image.  The length of this buffer will
    * not be equal to the size of the JPEG image.  Use {@link
@@ -266,27 +333,34 @@
    * TurboJPEG's underlying codec to produce a planar YUV image that is
    * suitable for direct video display.  Specifically, if the chrominance
    * components are subsampled along the horizontal dimension, then the width
-   * of the luminance plane is padded to 2 in the output image (same goes for
-   * the height of the luminance plane, if the chrominance components are
-   * subsampled along the vertical dimension.)  Also, each line of each plane
-   * in the output image is padded to 4 bytes.  Although this will work with
-   * any subsampling option, it is really only useful in combination with
-   * {@link TJ#SAMP_420}, which produces an image compatible with the I420 (AKA
-   * "YUV420P") format.
+   * of the luminance plane is padded to the nearest multiple of 2 in the
+   * output image (same goes for the height of the luminance plane, if the
+   * chrominance components are subsampled along the vertical dimension.)
+   * Also, each line of each plane in the output image is padded to 4 bytes.
+   * Although this will work with any subsampling option, it is really only
+   * useful in combination with {@link TJ#SAMP_420}, which produces an image
+   * compatible with the I420 (AKA "YUV420P") format.
+   * <p>
+   * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+   * convention of the digital video community, the TurboJPEG API uses "YUV" to
+   * refer to an image format consisting of Y, Cb, and Cr image planes.
    *
    * @param dstBuf buffer that will receive the YUV planar image.  Use
    * {@link TJ#bufSizeYUV} to determine the appropriate size for this buffer
    * based on the image width, height, and level of chrominance subsampling.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public void encodeYUV(byte[] dstBuf, int flags) throws Exception {
-    if(dstBuf == null || flags < 0)
+    if (dstBuf == null || flags < 0)
       throw new Exception("Invalid argument in compress()");
-    if(srcBuf == null) throw new Exception(NO_ASSOC_ERROR);
-    if(subsamp < 0) throw new Exception("Subsampling level not set");
+    if (srcBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
     encodeYUV(srcBuf, srcWidth, srcPitch, srcHeight,
-      srcPixelFormat, dstBuf, subsamp, flags);
+              srcPixelFormat, dstBuf, subsamp, flags);
     compressedSize = TJ.bufSizeYUV(srcWidth, srcHeight, subsamp);
   }
 
@@ -295,14 +369,16 @@
    * instance and return a buffer containing a YUV planar image.  See
    * {@link #encodeYUV(byte[], int)} for more detail.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    *
    * @return a buffer containing a YUV planar image
    */
   public byte[] encodeYUV(int flags) throws Exception {
-    if(srcWidth < 1 || srcHeight < 1)
+    if (srcWidth < 1 || srcHeight < 1)
       throw new Exception(NO_ASSOC_ERROR);
-    if(subsamp < 0) throw new Exception("Subsampling level not set");
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
     byte[] buf = new byte[TJ.bufSizeYUV(srcWidth, srcHeight, subsamp)];
     encodeYUV(buf, flags);
     return buf;
@@ -320,16 +396,17 @@
    * {@link TJ#bufSizeYUV} to determine the appropriate size for this buffer
    * based on the image width, height, and level of chrominance subsampling.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public void encodeYUV(BufferedImage srcImage, byte[] dstBuf, int flags)
     throws Exception {
-    if(srcImage == null || dstBuf == null || flags < 0)
+    if (srcImage == null || dstBuf == null || flags < 0)
       throw new Exception("Invalid argument in encodeYUV()");
     int width = srcImage.getWidth();
     int height = srcImage.getHeight();
     int pixelFormat;  boolean intPixels = false;
-    if(byteOrder == null)
+    if (byteOrder == null)
       byteOrder = ByteOrder.nativeOrder();
     switch(srcImage.getType()) {
       case BufferedImage.TYPE_3BYTE_BGR:
@@ -340,7 +417,7 @@
       case BufferedImage.TYPE_BYTE_GRAY:
         pixelFormat = TJ.PF_GRAY;  break;
       case BufferedImage.TYPE_INT_BGR:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           pixelFormat = TJ.PF_XBGR;
         else
           pixelFormat = TJ.PF_RGBX;
@@ -348,7 +425,7 @@
       case BufferedImage.TYPE_INT_RGB:
       case BufferedImage.TYPE_INT_ARGB:
       case BufferedImage.TYPE_INT_ARGB_PRE:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           pixelFormat = TJ.PF_XRGB;
         else
           pixelFormat = TJ.PF_BGRX;
@@ -357,27 +434,26 @@
         throw new Exception("Unsupported BufferedImage format");
     }
     WritableRaster wr = srcImage.getRaster();
-    if(subsamp < 0) throw new Exception("Subsampling level not set");
-    if(intPixels) {
+    if (subsamp < 0) throw new Exception("Subsampling level not set");
+    if (intPixels) {
       SinglePixelPackedSampleModel sm =
         (SinglePixelPackedSampleModel)srcImage.getSampleModel();
-      int pitch = sm.getScanlineStride();
+      int stride = sm.getScanlineStride();
       DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
       int[] buf = db.getData();
-      encodeYUV(buf, width, pitch, height, pixelFormat, dstBuf, subsamp,
-        flags);
-    }
-    else {
+      encodeYUV(buf, width, stride, height, pixelFormat, dstBuf, subsamp,
+                flags);
+    } else {
       ComponentSampleModel sm =
         (ComponentSampleModel)srcImage.getSampleModel();
       int pixelSize = sm.getPixelStride();
-      if(pixelSize != TJ.getPixelSize(pixelFormat))
+      if (pixelSize != TJ.getPixelSize(pixelFormat))
         throw new Exception("Inconsistency between pixel format and pixel size in BufferedImage");
       int pitch = sm.getScanlineStride();
       DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
       byte[] buf = db.getData();
       encodeYUV(buf, width, pitch, height, pixelFormat, dstBuf, subsamp,
-        flags);
+                flags);
     }
     compressedSize = TJ.bufSizeYUV(width, height, subsamp);
   }
@@ -390,13 +466,14 @@
    * @param srcImage a <code>BufferedImage</code> instance containing RGB or
    * grayscale pixels to be encoded
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    *
    * @return a buffer containing a YUV planar image
    */
-  public byte[] encodeYUV(BufferedImage srcImage, int flags)
-    throws Exception {
-    if(subsamp < 0) throw new Exception("Subsampling level not set");
+  public byte[] encodeYUV(BufferedImage srcImage, int flags) throws Exception {
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
     int width = srcImage.getWidth();
     int height = srcImage.getHeight();
     byte[] buf = new byte[TJ.bufSizeYUV(width, height, subsamp)];
@@ -419,15 +496,15 @@
    * Free the native structures associated with this compressor instance.
    */
   public void close() throws Exception {
-    destroy();
+    if (handle != 0)
+      destroy();
   }
 
   protected void finalize() throws Throwable {
     try {
       close();
-    }
-    catch(Exception e) {}
-    finally {
+    } catch(Exception e) {
+    } finally {
       super.finalize();
     }
   };
@@ -438,19 +515,27 @@
 
   // JPEG size in bytes is returned
   private native int compress(byte[] srcBuf, int width, int pitch,
-    int height, int pixelFormat, byte[] dstbuf, int jpegSubsamp, int jpegQual,
-    int flags) throws Exception;
+    int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, int jpegQual,
+    int flags) throws Exception; // deprecated
 
-  private native int compress(int[] srcBuf, int width, int pitch,
-    int height, int pixelFormat, byte[] dstbuf, int jpegSubsamp, int jpegQual,
-    int flags) throws Exception;
+  private native int compress(byte[] srcBuf, int x, int y, int width,
+    int pitch, int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp,
+    int jpegQual, int flags) throws Exception;
+
+  private native int compress(int[] srcBuf, int width, int stride,
+    int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, int jpegQual,
+    int flags) throws Exception; // deprecated
+
+  private native int compress(int[] srcBuf, int x, int y, int width,
+    int stride, int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp,
+    int jpegQual, int flags) throws Exception;
 
   private native void encodeYUV(byte[] srcBuf, int width, int pitch,
-    int height, int pixelFormat, byte[] dstbuf, int subsamp, int flags)
+    int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags)
     throws Exception;
 
-  private native void encodeYUV(int[] srcBuf, int width, int pitch,
-    int height, int pixelFormat, byte[] dstbuf, int subsamp, int flags)
+  private native void encodeYUV(int[] srcBuf, int width, int stride,
+    int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags)
     throws Exception;
 
   static {
@@ -461,6 +546,8 @@
   private byte[] srcBuf = null;
   private int srcWidth = 0;
   private int srcHeight = 0;
+  private int srcX = -1;
+  private int srcY = -1;
   private int srcPitch = 0;
   private int srcPixelFormat = -1;
   private int subsamp = -1;
diff --git a/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
index 711225b..bf78f2e 100644
--- a/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
+++ b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011, 2013 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -39,7 +39,7 @@
   /**
    * A callback function that can be used to modify the DCT coefficients after
    * they are losslessly transformed but before they are transcoded to a new
-   * JPEG file.  This allows for custom filters or other transformations to be
+   * JPEG image.  This allows for custom filters or other transformations to be
    * applied in the frequency domain.
    *
    * @param coeffBuffer a buffer containing transformed DCT coefficients.
@@ -58,19 +58,19 @@
    * component plane to which <code>coeffBuffer</code> belongs
    *
    * @param componentID ID number of the component plane to which
-   * <code>coeffBuffer</code>belongs (Y, Cb, and Cr have, respectively, ID's of
-   * 0, 1, and 2 in typical JPEG images.)
+   * <code>coeffBuffer</code> belongs (Y, Cb, and Cr have, respectively, ID's
+   * of 0, 1, and 2 in typical JPEG images.)
    *
    * @param transformID ID number of the transformed image to which
    * <code>coeffBuffer</code> belongs.  This is the same as the index of the
-   * transform in the transforms array that was passed to {@link
+   * transform in the <code>transforms</code> array that was passed to {@link
    * TJTransformer#transform TJTransformer.transform()}.
    *
    * @param transform a {@link TJTransform} instance that specifies the
    * parameters and/or cropping region for this transform
    */
-  public void customFilter(ShortBuffer coeffBuffer, Rectangle bufferRegion,
-    Rectangle planeRegion, int componentID, int transformID,
-    TJTransform transform)
+  void customFilter(ShortBuffer coeffBuffer, Rectangle bufferRegion,
+                    Rectangle planeRegion, int componentID, int transformID,
+                    TJTransform transform)
     throws Exception;
 }
diff --git a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
index de6cacc..8b98dcf 100644
--- a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
+++ b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011-2014 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -36,7 +36,7 @@
  */
 public class TJDecompressor {
 
-  private final static String NO_ASSOC_ERROR =
+  private static final String NO_ASSOC_ERROR =
     "No JPEG image is associated with this instance";
 
   /**
@@ -48,7 +48,7 @@
 
   /**
    * Create a TurboJPEG decompressor instance and associate the JPEG image
-   * stored in <code>jpegImage</code> with the newly-created instance.
+   * stored in <code>jpegImage</code> with the newly created instance.
    *
    * @param jpegImage JPEG image buffer (size of the JPEG image is assumed to
    * be the length of the array)
@@ -61,7 +61,7 @@
   /**
    * Create a TurboJPEG decompressor instance and associate the JPEG image
    * of length <code>imageSize</code> bytes stored in <code>jpegImage</code>
-   * with the newly-created instance.
+   * with the newly created instance.
    *
    * @param jpegImage JPEG image buffer
    *
@@ -82,7 +82,7 @@
    * @param imageSize size of the JPEG image (in bytes)
    */
   public void setJPEGImage(byte[] jpegImage, int imageSize) throws Exception {
-    if(jpegImage == null || imageSize < 1)
+    if (jpegImage == null || imageSize < 1)
       throw new Exception("Invalid argument in setJPEGImage()");
     jpegBuf = jpegImage;
     jpegBufSize = imageSize;
@@ -97,7 +97,8 @@
    * instance
    */
   public int getWidth() throws Exception {
-    if(jpegWidth < 1) throw new Exception(NO_ASSOC_ERROR);
+    if (jpegWidth < 1)
+      throw new Exception(NO_ASSOC_ERROR);
     return jpegWidth;
   }
 
@@ -109,20 +110,22 @@
    * instance
    */
   public int getHeight() throws Exception {
-    if(jpegHeight < 1) throw new Exception(NO_ASSOC_ERROR);
+    if (jpegHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
     return jpegHeight;
   }
 
   /**
    * Returns the level of chrominance subsampling used in the JPEG image
-   * associated with this decompressor instance.
+   * associated with this decompressor instance.  See {@link TJ TJ.SAMP_*}.
    *
    * @return the level of chrominance subsampling used in the JPEG image
    * associated with this decompressor instance
    */
   public int getSubsamp() throws Exception {
-    if(jpegSubsamp < 0) throw new Exception(NO_ASSOC_ERROR);
-    if(jpegSubsamp >= TJ.NUMSAMP)
+    if (jpegSubsamp < 0)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (jpegSubsamp >= TJ.NUMSAMP)
       throw new Exception("JPEG header information is invalid");
     return jpegSubsamp;
   }
@@ -133,7 +136,8 @@
    * @return the JPEG image buffer associated with this decompressor instance
    */
   public byte[] getJPEGBuf() throws Exception {
-    if(jpegBuf == null) throw new Exception(NO_ASSOC_ERROR);
+    if (jpegBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
     return jpegBuf;
   }
 
@@ -145,13 +149,13 @@
    * decompressor instance
    */
   public int getJPEGSize() throws Exception {
-    if(jpegBufSize < 1) throw new Exception(NO_ASSOC_ERROR);
+    if (jpegBufSize < 1)
+      throw new Exception(NO_ASSOC_ERROR);
     return jpegBufSize;
   }
 
-
   /**
-   * Returns the width of the largest scaled down image that the TurboJPEG
+   * Returns the width of the largest scaled-down image that the TurboJPEG
    * decompressor can generate without exceeding the desired image width and
    * height.
    *
@@ -165,33 +169,35 @@
    * image (in other words, the height will not be considered when determining
    * the scaled image size.)
    *
-   * @return the width of the largest scaled down image that the TurboJPEG
+   * @return the width of the largest scaled-down image that the TurboJPEG
    * decompressor can generate without exceeding the desired image width and
    * height
    */
   public int getScaledWidth(int desiredWidth, int desiredHeight)
-    throws Exception {
-    if(jpegWidth < 1 || jpegHeight < 1)
+                            throws Exception {
+    if (jpegWidth < 1 || jpegHeight < 1)
       throw new Exception(NO_ASSOC_ERROR);
-    if(desiredWidth < 0 || desiredHeight < 0)
+    if (desiredWidth < 0 || desiredHeight < 0)
       throw new Exception("Invalid argument in getScaledWidth()");
-    TJScalingFactor sf[] = TJ.getScalingFactors();
-    if(desiredWidth == 0) desiredWidth = jpegWidth;
-    if(desiredHeight == 0) desiredHeight = jpegHeight;
+    TJScalingFactor[] sf = TJ.getScalingFactors();
+    if (desiredWidth == 0)
+      desiredWidth = jpegWidth;
+    if (desiredHeight == 0)
+      desiredHeight = jpegHeight;
     int scaledWidth = jpegWidth, scaledHeight = jpegHeight;
-    for(int i = 0; i < sf.length; i++) {
+    for (int i = 0; i < sf.length; i++) {
       scaledWidth = sf[i].getScaled(jpegWidth);
       scaledHeight = sf[i].getScaled(jpegHeight);
-      if(scaledWidth <= desiredWidth && scaledHeight <= desiredHeight)
+      if (scaledWidth <= desiredWidth && scaledHeight <= desiredHeight)
         break;
     }
-    if(scaledWidth > desiredWidth || scaledHeight > desiredHeight)
+    if (scaledWidth > desiredWidth || scaledHeight > desiredHeight)
       throw new Exception("Could not scale down to desired image dimensions");
     return scaledWidth;
   }
 
   /**
-   * Returns the height of the largest scaled down image that the TurboJPEG
+   * Returns the height of the largest scaled-down image that the TurboJPEG
    * decompressor can generate without exceeding the desired image width and
    * height.
    *
@@ -205,27 +211,29 @@
    * image (in other words, the height will not be considered when determining
    * the scaled image size.)
    *
-   * @return the height of the largest scaled down image that the TurboJPEG
+   * @return the height of the largest scaled-down image that the TurboJPEG
    * decompressor can generate without exceeding the desired image width and
    * height
    */
   public int getScaledHeight(int desiredWidth, int desiredHeight)
-    throws Exception {
-    if(jpegWidth < 1 || jpegHeight < 1)
+                             throws Exception {
+    if (jpegWidth < 1 || jpegHeight < 1)
       throw new Exception(NO_ASSOC_ERROR);
-    if(desiredWidth < 0 || desiredHeight < 0)
+    if (desiredWidth < 0 || desiredHeight < 0)
       throw new Exception("Invalid argument in getScaledHeight()");
-    TJScalingFactor sf[] = TJ.getScalingFactors();
-    if(desiredWidth == 0) desiredWidth = jpegWidth;
-    if(desiredHeight == 0) desiredHeight = jpegHeight;
+    TJScalingFactor[] sf = TJ.getScalingFactors();
+    if (desiredWidth == 0)
+      desiredWidth = jpegWidth;
+    if (desiredHeight == 0)
+      desiredHeight = jpegHeight;
     int scaledWidth = jpegWidth, scaledHeight = jpegHeight;
-    for(int i = 0; i < sf.length; i++) {
+    for (int i = 0; i < sf.length; i++) {
       scaledWidth = sf[i].getScaled(jpegWidth);
       scaledHeight = sf[i].getScaled(jpegHeight);
-      if(scaledWidth <= desiredWidth && scaledHeight <= desiredHeight)
+      if (scaledWidth <= desiredWidth && scaledHeight <= desiredHeight)
         break;
     }
-    if(scaledWidth > desiredWidth || scaledHeight > desiredHeight)
+    if (scaledWidth > desiredWidth || scaledHeight > desiredHeight)
       throw new Exception("Could not scale down to desired image dimensions");
     return scaledHeight;
   }
@@ -239,47 +247,78 @@
    * where <code>scaledHeight</code> can be determined by calling <code>
    * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight)
    * </code> with one of the scaling factors returned from {@link
-   * TJ#getScalingFactors} or by calling {@link #getScaledHeight}.
+   * TJ#getScalingFactors} or by calling {@link #getScaledHeight}.  However,
+   * the buffer may also be larger than the dimensions of the JPEG image, in
+   * which case the <code>x</code>, <code>y</code>, and <code>pitch</code>
+   * parameters can be used to specify the region into which the JPEG image
+   * should be decompressed.
    *
-   * @param desiredWidth desired width (in pixels) of the decompressed image.
-   * If the desired image dimensions are smaller than the dimensions of the
-   * JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG
-   * decompressor to generate the largest possible image that will fit within
-   * the desired dimensions.  Setting this to 0 is the same as setting it to
-   * the width of the JPEG image (in other words, the width will not be
-   * considered when determining the scaled image size.)
+   * @param x x offset (in pixels) of the region into which the JPEG image
+   * should be decompressed, relative to the start of <code>dstBuf</code>.
+   *
+   * @param y y offset (in pixels) of the region into which the JPEG image
+   * should be decompressed, relative to the start of <code>dstBuf</code>.
+   *
+   * @param desiredWidth desired width (in pixels) of the decompressed image
+   * (or image region.)  If the desired image dimensions are different than the
+   * dimensions of the JPEG image being decompressed, then TurboJPEG will use
+   * scaling in the JPEG decompressor to generate the largest possible image
+   * that will fit within the desired dimensions.  Setting this to 0 is the
+   * same as setting it to the width of the JPEG image (in other words, the
+   * width will not be considered when determining the scaled image size.)
    *
    * @param pitch bytes per line of the destination image.  Normally, this
    * should be set to <code>scaledWidth * TJ.pixelSize(pixelFormat)</code> if
    * the decompressed image is unpadded, but you can use this to, for instance,
-   * pad each line of the decompressed image to a 4-byte boundary.  NOTE:
+   * pad each line of the decompressed image to a 4-byte boundary or to
+   * decompress the JPEG image into a region of a larger image.  NOTE:
    * <code>scaledWidth</code> can be determined by calling <code>
    * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth)
    * </code> or by calling {@link #getScaledWidth}.  Setting this parameter to
    * 0 is the equivalent of setting it to <code>scaledWidth *
    * TJ.pixelSize(pixelFormat)</code>.
    *
-   * @param desiredHeight desired height (in pixels) of the decompressed image.
-   * If the desired image dimensions are smaller than the dimensions of the
-   * JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG
-   * decompressor to generate the largest possible image that will fit within
-   * the desired dimensions.  Setting this to 0 is the same as setting it to
-   * the height of the JPEG image (in other words, the height will not be
-   * considered when determining the scaled image size.)
+   * @param desiredHeight desired height (in pixels) of the decompressed image
+   * (or image region.)  If the desired image dimensions are different than the
+   * dimensions of the JPEG image being decompressed, then TurboJPEG will use
+   * scaling in the JPEG decompressor to generate the largest possible image
+   * that will fit within the desired dimensions.  Setting this to 0 is the
+   * same as setting it to the height of the JPEG image (in other words, the
+   * height will not be considered when determining the scaled image size.)
    *
-   * @param pixelFormat pixel format of the decompressed image (one of
-   * {@link TJ TJ.PF_*})
+   * @param pixelFormat pixel format of the decompressed/decoded image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
-  public void decompress(byte[] dstBuf, int desiredWidth, int pitch,
-    int desiredHeight, int pixelFormat, int flags) throws Exception {
-    if(jpegBuf == null) throw new Exception(NO_ASSOC_ERROR);
-    if(dstBuf == null || desiredWidth < 0 || pitch < 0 || desiredHeight < 0
-      || pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0)
+  public void decompress(byte[] dstBuf, int x, int y, int desiredWidth,
+                         int pitch, int desiredHeight, int pixelFormat,
+                         int flags) throws Exception {
+    if (jpegBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (dstBuf == null || x < 0 || y < 0 || desiredWidth < 0 || pitch < 0 ||
+        desiredHeight < 0 || pixelFormat < 0 || pixelFormat >= TJ.NUMPF ||
+        flags < 0)
       throw new Exception("Invalid argument in decompress()");
-    decompress(jpegBuf, jpegBufSize, dstBuf, desiredWidth, pitch,
-      desiredHeight, pixelFormat, flags);
+    if (x > 0 || y > 0)
+      decompress(jpegBuf, jpegBufSize, dstBuf, x, y, desiredWidth, pitch,
+                 desiredHeight, pixelFormat, flags);
+    else
+      decompress(jpegBuf, jpegBufSize, dstBuf, desiredWidth, pitch,
+                 desiredHeight, pixelFormat, flags);
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)} instead.
+   */
+  @Deprecated
+  public void decompress(byte[] dstBuf, int desiredWidth, int pitch,
+                         int desiredHeight, int pixelFormat, int flags)
+                         throws Exception {
+    decompress(dstBuf, 0, 0, desiredWidth, pitch, desiredHeight, pixelFormat,
+               flags);
   }
 
   /**
@@ -287,30 +326,35 @@
    * instance and return a buffer containing the decompressed image.
    *
    * @param desiredWidth see
-   * {@link #decompress(byte[], int, int, int, int, int)} for description
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)}
+   * for description
    *
    * @param pitch see
-   * {@link #decompress(byte[], int, int, int, int, int)} for description
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)}
+   * for description
    *
    * @param desiredHeight see
-   * {@link #decompress(byte[], int, int, int, int, int)} for description
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)}
+   * for description
    *
    * @param pixelFormat pixel format of the decompressed image (one of
-   * {@link TJ TJ.PF_*})
+   * {@link TJ#PF_RGB TJ.PF_*})
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    *
    * @return a buffer containing the decompressed image
    */
   public byte[] decompress(int desiredWidth, int pitch, int desiredHeight,
-    int pixelFormat, int flags) throws Exception {
-    if(desiredWidth < 0 || pitch < 0 || desiredHeight < 0
-      || pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0)
+                           int pixelFormat, int flags) throws Exception {
+    if (desiredWidth < 0 || pitch < 0 || desiredHeight < 0 ||
+        pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0)
       throw new Exception("Invalid argument in decompress()");
     int pixelSize = TJ.getPixelSize(pixelFormat);
     int scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
     int scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
-    if(pitch == 0) pitch = scaledWidth * pixelSize;
+    if (pitch == 0)
+      pitch = scaledWidth * pixelSize;
     byte[] buf = new byte[pitch * scaledHeight];
     decompress(buf, desiredWidth, pitch, desiredHeight, pixelFormat, flags);
     return buf;
@@ -321,41 +365,45 @@
    * instance and output a YUV planar image to the given destination buffer.
    * This method performs JPEG decompression but leaves out the color
    * conversion step, so a planar YUV image is generated instead of an RGB
-   * image.  The padding of the planes in this image is the same as the images
-   * generated by {@link TJCompressor#encodeYUV(byte[], int)}.  Note that, if
-   * the width or height of the image is not an even multiple of the MCU block
-   * size (see {@link TJ#getMCUWidth} and {@link TJ#getMCUHeight}), then an
-   * intermediate buffer copy will be performed within TurboJPEG.
+   * image.  The padding of the planes in this image is the same as in the
+   * images generated by {@link TJCompressor#encodeYUV(byte[], int)}.
+   * <p>
+   * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+   * convention of the digital video community, the TurboJPEG API uses "YUV" to
+   * refer to an image format consisting of Y, Cb, and Cr image planes.
    *
    * @param dstBuf buffer that will receive the YUV planar image.  Use
    * {@link TJ#bufSizeYUV} to determine the appropriate size for this buffer
    * based on the image width, height, and level of chrominance subsampling.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public void decompressToYUV(byte[] dstBuf, int flags) throws Exception {
-    if(jpegBuf == null) throw new Exception(NO_ASSOC_ERROR);
-    if(dstBuf == null || flags < 0)
+    if (jpegBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (dstBuf == null || flags < 0)
       throw new Exception("Invalid argument in decompressToYUV()");
     decompressToYUV(jpegBuf, jpegBufSize, dstBuf, flags);
   }
 
-  
+
   /**
    * Decompress the JPEG source image associated with this decompressor
    * instance and return a buffer containing a YUV planar image.  See {@link
    * #decompressToYUV(byte[], int)} for more detail.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    *
    * @return a buffer containing a YUV planar image
    */
   public byte[] decompressToYUV(int flags) throws Exception {
-    if(flags < 0)
+    if (flags < 0)
       throw new Exception("Invalid argument in decompressToYUV()");
-    if(jpegWidth < 1 || jpegHeight < 1 || jpegSubsamp < 0)
+    if (jpegWidth < 1 || jpegHeight < 1 || jpegSubsamp < 0)
       throw new Exception(NO_ASSOC_ERROR);
-    if(jpegSubsamp >= TJ.NUMSAMP)
+    if (jpegSubsamp >= TJ.NUMSAMP)
       throw new Exception("JPEG header information is invalid");
     byte[] buf = new byte[TJ.bufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp)];
     decompressToYUV(buf, flags);
@@ -364,25 +412,93 @@
 
   /**
    * Decompress the JPEG source image associated with this decompressor
+   * instance and output a decompressed image to the given destination buffer.
+   *
+   * @param dstBuf buffer that will receive the decompressed image.  This
+   * buffer should normally be <code>stride * scaledHeight</code> pixels in
+   * size, where <code>scaledHeight</code> can be determined by calling <code>
+   * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight)
+   * </code> with one of the scaling factors returned from {@link
+   * TJ#getScalingFactors} or by calling {@link #getScaledHeight}.  However,
+   * the buffer may also be larger than the dimensions of the JPEG image, in
+   * which case the <code>x</code>, <code>y</code>, and <code>stride</code>
+   * parameters can be used to specify the region into which the JPEG image
+   * should be decompressed.
+   *
+   * @param x x offset (in pixels) of the region into which the JPEG image
+   * should be decompressed, relative to the start of <code>dstBuf</code>.
+   *
+   * @param y y offset (in pixels) of the region into which the JPEG image
+   * should be decompressed, relative to the start of <code>dstBuf</code>.
+   *
+   * @param desiredWidth desired width (in pixels) of the decompressed image
+   * (or image region.)  If the desired image dimensions are different than the
+   * dimensions of the JPEG image being decompressed, then TurboJPEG will use
+   * scaling in the JPEG decompressor to generate the largest possible image
+   * that will fit within the desired dimensions.  Setting this to 0 is the
+   * same as setting it to the width of the JPEG image (in other words, the
+   * width will not be considered when determining the scaled image size.)
+   *
+   * @param stride pixels per line of the destination image.  Normally, this
+   * should be set to <code>scaledWidth</code>, but you can use this to, for
+   * instance, decompress the JPEG image into a region of a larger image.
+   * NOTE: <code>scaledWidth</code> can be determined by calling <code>
+   * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth)
+   * </code> or by calling {@link #getScaledWidth}.  Setting this parameter to
+   * 0 is the equivalent of setting it to <code>scaledWidth</code>.
+   *
+   * @param desiredHeight desired height (in pixels) of the decompressed image
+   * (or image region.)  If the desired image dimensions are different than the
+   * dimensions of the JPEG image being decompressed, then TurboJPEG will use
+   * scaling in the JPEG decompressor to generate the largest possible image
+   * that will fit within the desired dimensions.  Setting this to 0 is the
+   * same as setting it to the height of the JPEG image (in other words, the
+   * height will not be considered when determining the scaled image size.)
+   *
+   * @param pixelFormat pixel format of the decompressed image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void decompress(int[] dstBuf, int x, int y, int desiredWidth,
+                         int stride, int desiredHeight, int pixelFormat,
+                         int flags) throws Exception {
+    if (jpegBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (dstBuf == null || x < 0 || y < 0 || desiredWidth < 0 || stride < 0 ||
+        desiredHeight < 0 || pixelFormat < 0 || pixelFormat >= TJ.NUMPF ||
+        flags < 0)
+      throw new Exception("Invalid argument in decompress()");
+    decompress(jpegBuf, jpegBufSize, dstBuf, x, y, desiredWidth, stride,
+               desiredHeight, pixelFormat, flags);
+  }
+
+  /**
+   * Decompress the JPEG source image associated with this decompressor
    * instance and output a decompressed image to the given
    * <code>BufferedImage</code> instance.
    *
    * @param dstImage a <code>BufferedImage</code> instance that will receive
-   * the decompressed image
+   * the decompressed image.  The width and height of the
+   * <code>BufferedImage</code> instance must match one of the scaled image
+   * sizes that TurboJPEG is capable of generating from the JPEG image.
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public void decompress(BufferedImage dstImage, int flags) throws Exception {
-    if(dstImage == null || flags < 0)
+    if (dstImage == null || flags < 0)
       throw new Exception("Invalid argument in decompress()");
     int desiredWidth = dstImage.getWidth();
     int desiredHeight = dstImage.getHeight();
     int scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
     int scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
-    if(scaledWidth != desiredWidth || scaledHeight != desiredHeight)
-      throw new Exception("BufferedImage dimensions do not match a scaled image size that TurboJPEG is capable of generating.");
+    if (scaledWidth != desiredWidth || scaledHeight != desiredHeight)
+      throw new Exception("BufferedImage dimensions do not match one of the scaled image sizes that TurboJPEG is capable of generating.");
     int pixelFormat;  boolean intPixels = false;
-    if(byteOrder == null)
+    if (byteOrder == null)
       byteOrder = ByteOrder.nativeOrder();
     switch(dstImage.getType()) {
       case BufferedImage.TYPE_3BYTE_BGR:
@@ -393,20 +509,20 @@
       case BufferedImage.TYPE_BYTE_GRAY:
         pixelFormat = TJ.PF_GRAY;  break;
       case BufferedImage.TYPE_INT_BGR:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           pixelFormat = TJ.PF_XBGR;
         else
           pixelFormat = TJ.PF_RGBX;
         intPixels = true;  break;
       case BufferedImage.TYPE_INT_RGB:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           pixelFormat = TJ.PF_XRGB;
         else
           pixelFormat = TJ.PF_BGRX;
         intPixels = true;  break;
       case BufferedImage.TYPE_INT_ARGB:
       case BufferedImage.TYPE_INT_ARGB_PRE:
-        if(byteOrder == ByteOrder.BIG_ENDIAN)
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
           pixelFormat = TJ.PF_ARGB;
         else
           pixelFormat = TJ.PF_BGRA;
@@ -415,21 +531,21 @@
         throw new Exception("Unsupported BufferedImage format");
     }
     WritableRaster wr = dstImage.getRaster();
-    if(intPixels) {
+    if (intPixels) {
       SinglePixelPackedSampleModel sm =
         (SinglePixelPackedSampleModel)dstImage.getSampleModel();
-      int pitch = sm.getScanlineStride();
+      int stride = sm.getScanlineStride();
       DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
       int[] buf = db.getData();
-      if(jpegBuf == null) throw new Exception(NO_ASSOC_ERROR);
-      decompress(jpegBuf, jpegBufSize, buf, scaledWidth, pitch, scaledHeight,
-        pixelFormat, flags);
-    }
-    else {
+      if (jpegBuf == null)
+        throw new Exception(NO_ASSOC_ERROR);
+      decompress(jpegBuf, jpegBufSize, buf, scaledWidth, stride, scaledHeight,
+                 pixelFormat, flags);
+    } else {
       ComponentSampleModel sm =
         (ComponentSampleModel)dstImage.getSampleModel();
       int pixelSize = sm.getPixelStride();
-      if(pixelSize != TJ.getPixelSize(pixelFormat))
+      if (pixelSize != TJ.getPixelSize(pixelFormat))
         throw new Exception("Inconsistency between pixel format and pixel size in BufferedImage");
       int pitch = sm.getScanlineStride();
       DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
@@ -444,28 +560,32 @@
    * decompressed image.
    *
    * @param desiredWidth see
-   * {@link #decompress(byte[], int, int, int, int, int)} for description
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)} for
+   * description
    *
    * @param desiredHeight see
-   * {@link #decompress(byte[], int, int, int, int, int)} for description
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)} for
+   * description
    *
-   * @param bufferedImageType the image type of the newly-created
-   * <code>BufferedImage</code> instance (for instance,
+   * @param bufferedImageType the image type of the <code>BufferedImage</code>
+   * instance that will be created (for instance,
    * <code>BufferedImage.TYPE_INT_RGB</code>)
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    *
    * @return a <code>BufferedImage</code> instance containing the
    * decompressed image
    */
   public BufferedImage decompress(int desiredWidth, int desiredHeight,
-    int bufferedImageType, int flags) throws Exception {
-    if(desiredWidth < 0 || desiredHeight < 0 || flags < 0)
+                                  int bufferedImageType, int flags)
+                                  throws Exception {
+    if (desiredWidth < 0 || desiredHeight < 0 || flags < 0)
       throw new Exception("Invalid argument in decompress()");
     int scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
     int scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
     BufferedImage img = new BufferedImage(scaledWidth, scaledHeight,
-      bufferedImageType);
+                                          bufferedImageType);
     decompress(img, flags);
     return img;
   }
@@ -474,15 +594,15 @@
    * Free the native structures associated with this decompressor instance.
    */
   public void close() throws Exception {
-    destroy();
+    if (handle != 0)
+      destroy();
   }
 
   protected void finalize() throws Throwable {
     try {
       close();
-    }
-    catch(Exception e) {}
-    finally {
+    } catch(Exception e) {
+    } finally {
       super.finalize();
     }
   };
@@ -496,15 +616,22 @@
 
   private native void decompress(byte[] srcBuf, int size, byte[] dstBuf,
     int desiredWidth, int pitch, int desiredHeight, int pixelFormat, int flags)
-    throws Exception;
+    throws Exception; // deprecated
+
+  private native void decompress(byte[] srcBuf, int size, byte[] dstBuf, int x,
+    int y, int desiredWidth, int pitch, int desiredHeight, int pixelFormat,
+    int flags) throws Exception;
 
   private native void decompress(byte[] srcBuf, int size, int[] dstBuf,
-    int desiredWidth, int pitch, int desiredHeight, int pixelFormat, int flags)
-    throws Exception;
+    int desiredWidth, int stride, int desiredHeight, int pixelFormat,
+    int flags) throws Exception; // deprecated
+
+  private native void decompress(byte[] srcBuf, int size, int[] dstBuf, int x,
+    int y, int desiredWidth, int stride, int desiredHeight, int pixelFormat,
+    int flags) throws Exception;
 
   private native void decompressToYUV(byte[] srcBuf, int size, byte[] dstBuf,
-    int flags)
-    throws Exception;
+    int flags) throws Exception;
 
   static {
     TJLoader.load();
diff --git a/java/org/libjpegturbo/turbojpeg/TJLoader.java b/java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl
similarity index 62%
copy from java/org/libjpegturbo/turbojpeg/TJLoader.java
copy to java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl
index db77bba..a4f1c87 100644
--- a/java/org/libjpegturbo/turbojpeg/TJLoader.java
+++ b/java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011-2013 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,30 @@
 
 final class TJLoader {
   static void load() {
-    System.loadLibrary("turbojpeg");
+    try {
+      System.loadLibrary("turbojpeg");
+    } catch (java.lang.UnsatisfiedLinkError e) {
+      String os = System.getProperty("os.name").toLowerCase();
+      if (os.indexOf("mac") >= 0) {
+        try {
+          System.load("%{__libdir}/libturbojpeg.jnilib");
+        } catch (java.lang.UnsatisfiedLinkError e2) {
+          System.load("/usr/lib/libturbojpeg.jnilib");
+        }
+      } else {
+        try {
+          System.load("%{__libdir}/libturbojpeg.so");
+        } catch (java.lang.UnsatisfiedLinkError e3) {
+          String libdir = "%{__libdir}";
+          if (libdir.equals("/opt/libjpeg-turbo/lib64")) {
+            System.load("/opt/libjpeg-turbo/lib32/libturbojpeg.so");
+          } else if (libdir.equals("/opt/libjpeg-turbo/lib32")) {
+            System.load("/opt/libjpeg-turbo/lib64/libturbojpeg.so");
+          } else {
+            throw e3;
+          }
+        }
+      }
+    }
   }
 };
diff --git a/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java b/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java
index d71ceee..4e7363f 100644
--- a/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java
+++ b/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java
@@ -34,7 +34,7 @@
 public class TJScalingFactor {
 
   public TJScalingFactor(int num, int denom) throws Exception {
-    if(num < 1 || denom < 1)
+    if (num < 1 || denom < 1)
       throw new Exception("Numerator and denominator must be >= 1");
     this.num = num;
     this.denom = denom;
diff --git a/java/org/libjpegturbo/turbojpeg/TJTransform.java b/java/org/libjpegturbo/turbojpeg/TJTransform.java
index 399cf3a..b464ffd 100644
--- a/java/org/libjpegturbo/turbojpeg/TJTransform.java
+++ b/java/org/libjpegturbo/turbojpeg/TJTransform.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011, 2013 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -40,54 +40,54 @@
   /**
    * The number of lossless transform operations
    */
-  final public static int NUMOP         = 8;
+  public static final int NUMOP         = 8;
   /**
    * Do not transform the position of the image pixels.
    */
-  final public static int OP_NONE       = 0;
+  public static final int OP_NONE       = 0;
   /**
    * Flip (mirror) image horizontally.  This transform is imperfect if there
    * are any partial MCU blocks on the right edge.
    * @see #OPT_PERFECT
    */
-  final public static int OP_HFLIP      = 1;
+  public static final int OP_HFLIP      = 1;
   /**
    * Flip (mirror) image vertically.  This transform is imperfect if there are
    * any partial MCU blocks on the bottom edge.
    * @see #OPT_PERFECT
    */
-  final public static int OP_VFLIP      = 2;
+  public static final int OP_VFLIP      = 2;
   /**
    * Transpose image (flip/mirror along upper left to lower right axis).  This
    * transform is always perfect.
    * @see #OPT_PERFECT
    */
-  final public static int OP_TRANSPOSE  = 3;
+  public static final int OP_TRANSPOSE  = 3;
   /**
    * Transverse transpose image (flip/mirror along upper right to lower left
    * axis).  This transform is imperfect if there are any partial MCU blocks in
    * the image.
    * @see #OPT_PERFECT
    */
-  final public static int OP_TRANSVERSE = 4;
+  public static final int OP_TRANSVERSE = 4;
   /**
    * Rotate image clockwise by 90 degrees.  This transform is imperfect if
    * there are any partial MCU blocks on the bottom edge.
    * @see #OPT_PERFECT
    */
-  final public static int OP_ROT90      = 5;
+  public static final int OP_ROT90      = 5;
   /**
    * Rotate image 180 degrees.  This transform is imperfect if there are any
    * partial MCU blocks in the image.
    * @see #OPT_PERFECT
    */
-  final public static int OP_ROT180     = 6;
+  public static final int OP_ROT180     = 6;
   /**
    * Rotate image counter-clockwise by 90 degrees.  This transform is imperfect
    * if there are any partial MCU blocks on the right edge.
    * @see #OPT_PERFECT
    */
-  final public static int OP_ROT270     = 7;
+  public static final int OP_ROT270     = 7;
 
 
   /**
@@ -103,21 +103,21 @@
    * partial MCU blocks that cannot be transformed will be left in place, which
    * will create odd-looking strips on the right or bottom edge of the image.
    */
-  final public static int OPT_PERFECT  = 1;
+  public static final int OPT_PERFECT  = 1;
   /**
    * This option will discard any partial MCU blocks that cannot be
    * transformed.
    */
-  final public static int OPT_TRIM     = 2;
+  public static final int OPT_TRIM     = 2;
   /**
    * This option will enable lossless cropping.
    */
-  final public static int OPT_CROP     = 4;
+  public static final int OPT_CROP     = 4;
   /**
    * This option will discard the color data in the input image and produce
    * a grayscale output image.
    */
-  final public static int OPT_GRAY     = 8;
+  public static final int OPT_GRAY     = 8;
   /**
    * This option will prevent {@link TJTransformer#transform
    * TJTransformer.transform()} from outputting a JPEG image for this
@@ -125,9 +125,9 @@
    * filter to capture the transformed DCT coefficients without transcoding
    * them.
    */
-  final public static int OPT_NOOUTPUT = 16;
+  public static final int OPT_NOOUTPUT = 16;
 
-  
+
   /**
    * Create a new lossless transform instance.
    */
@@ -144,10 +144,12 @@
    * divisible by the MCU block height (see {@link TJ#getMCUHeight})
    *
    * @param w the width of the cropping region.  Setting this to 0 is the
-   * equivalent of setting it to the width of the source JPEG image - x.
+   * equivalent of setting it to (width of the source JPEG image -
+   * <code>x</code>).
    *
    * @param h the height of the cropping region.  Setting this to 0 is the
-   * equivalent of setting it to the height of the source JPEG image - y.
+   * equivalent of setting it to (height of the source JPEG image -
+   * <code>y</code>).
    *
    * @param op one of the transform operations (<code>OP_*</code>)
    *
@@ -158,9 +160,11 @@
    * TJCustomFilter} interface, or null if no custom filter is needed
    */
   public TJTransform(int x, int y, int w, int h, int op, int options,
-    TJCustomFilter cf) throws Exception {
+                     TJCustomFilter cf) throws Exception {
     super(x, y, w, h);
-    this.op = op;  this.options = options;  this.cf = cf;
+    this.op = op;
+    this.options = options;
+    this.cf = cf;
   }
 
   /**
@@ -180,9 +184,11 @@
    * TJCustomFilter} interface, or null if no custom filter is needed
    */
   public TJTransform(Rectangle r, int op, int options,
-    TJCustomFilter cf) throws Exception {
+                     TJCustomFilter cf) throws Exception {
     super(r);
-    this.op = op;  this.options = options;  this.cf = cf;
+    this.op = op;
+    this.options = options;
+    this.cf = cf;
   }
 
   /**
diff --git a/java/org/libjpegturbo/turbojpeg/TJTransformer.java b/java/org/libjpegturbo/turbojpeg/TJTransformer.java
index 6c07483..3240b07 100644
--- a/java/org/libjpegturbo/turbojpeg/TJTransformer.java
+++ b/java/org/libjpegturbo/turbojpeg/TJTransformer.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011, 2013-2014 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -42,7 +42,7 @@
 
   /**
    * Create a TurboJPEG lossless transformer instance and associate the JPEG
-   * image stored in <code>jpegImage</code> with the newly-created instance.
+   * image stored in <code>jpegImage</code> with the newly created instance.
    *
    * @param jpegImage JPEG image buffer (size of the JPEG image is assumed to
    * be the length of the array)
@@ -55,7 +55,7 @@
   /**
    * Create a TurboJPEG lossless transformer instance and associate the JPEG
    * image of length <code>imageSize</code> bytes stored in
-   * <code>jpegImage</code> with the newly-created instance.
+   * <code>jpegImage</code> with the newly created instance.
    *
    * @param jpegImage JPEG image buffer
    *
@@ -73,31 +73,34 @@
    * JPEG image structure to another without altering the values of the
    * coefficients.  While this is typically faster than decompressing the
    * image, transforming it, and re-compressing it, lossless transforms are not
-   * free.  Each lossless transform requires reading and Huffman decoding all
-   * of the coefficients in the source image, regardless of the size of the
-   * destination image.  Thus, this method provides a means of generating
-   * multiple transformed images from the same source or of applying multiple
-   * transformations simultaneously, in order to eliminate the need to read the
-   * source coefficients multiple times.
+   * free.  Each lossless transform requires reading and performing Huffman
+   * decoding on all of the coefficients in the source image, regardless of the
+   * size of the destination image.  Thus, this method provides a means of
+   * generating multiple transformed images from the same source or of applying
+   * multiple transformations simultaneously, in order to eliminate the need to
+   * read the source coefficients multiple times.
    *
    * @param dstBufs an array of image buffers.  <code>dstbufs[i]</code> will
    * receive a JPEG image that has been transformed using the parameters in
    * <code>transforms[i]</code>.  Use {@link TJ#bufSize} to determine the
-   * maximum size for each buffer based on the cropped width and height.
+   * maximum size for each buffer based on the transformed or cropped width and
+   * height and the level of subsampling used in the source image.
    *
    * @param transforms an array of {@link TJTransform} instances, each of
    * which specifies the transform parameters and/or cropping region for the
    * corresponding transformed output image
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public void transform(byte[][] dstBufs, TJTransform[] transforms,
-    int flags) throws Exception {
-    if(jpegBuf == null) throw new Exception("JPEG buffer not initialized");
+                        int flags) throws Exception {
+    if (jpegBuf == null)
+      throw new Exception("JPEG buffer not initialized");
     transformedSizes = transform(jpegBuf, jpegBufSize, dstBufs, transforms,
-      flags);
+                                 flags);
   }
-  
+
   /**
    * Losslessly transform the JPEG image associated with this transformer
    * instance and return an array of {@link TJDecompressor} instances, each of
@@ -110,37 +113,38 @@
    * @return an array of {@link TJDecompressor} instances, each of
    * which has a transformed JPEG image associated with it
    *
-   * @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
    */
   public TJDecompressor[] transform(TJTransform[] transforms, int flags)
     throws Exception {
     byte[][] dstBufs = new byte[transforms.length][];
-    if(jpegWidth < 1 || jpegHeight < 1)
+    if (jpegWidth < 1 || jpegHeight < 1)
       throw new Exception("JPEG buffer not initialized");
-    for(int i = 0; i < transforms.length; i++) {
+    for (int i = 0; i < transforms.length; i++) {
       int w = jpegWidth, h = jpegHeight;
-      if((transforms[i].options & TJTransform.OPT_CROP) != 0) {
-        if(transforms[i].width != 0) w = transforms[i].width;
-        if(transforms[i].height != 0) h = transforms[i].height;
+      if ((transforms[i].options & TJTransform.OPT_CROP) != 0) {
+        if (transforms[i].width != 0) w = transforms[i].width;
+        if (transforms[i].height != 0) h = transforms[i].height;
       }
       dstBufs[i] = new byte[TJ.bufSize(w, h, jpegSubsamp)];
     }
     TJDecompressor[] tjd = new TJDecompressor[transforms.length];
     transform(dstBufs, transforms, flags);
-    for(int i = 0; i < transforms.length; i++)
+    for (int i = 0; i < transforms.length; i++)
       tjd[i] = new TJDecompressor(dstBufs[i], transformedSizes[i]);
     return tjd;
   }
-  
+
   /**
-   * Returns an array containing the sizes of the transformed JPEG images from
-   * the most recent call to {@link #transform transform()}.
+   * Returns an array containing the sizes of the transformed JPEG images
+   * generated by the most recent transform operation.
    *
-   * @return an array containing the sizes of the transformed JPEG images from
-   * the most recent call to {@link #transform transform()}
+   * @return an array containing the sizes of the transformed JPEG images
+   * generated by the most recent transform operation
    */
   public int[] getTransformedSizes() throws Exception {
-    if(transformedSizes == null)
+    if (transformedSizes == null)
       throw new Exception("No image has been transformed yet");
     return transformedSizes;
   }
diff --git a/java/org_libjpegturbo_turbojpeg_TJ.h b/java/org_libjpegturbo_turbojpeg_TJ.h
index c892086..d7b032a 100644
--- a/java/org_libjpegturbo_turbojpeg_TJ.h
+++ b/java/org_libjpegturbo_turbojpeg_TJ.h
@@ -55,6 +55,10 @@
 #define org_libjpegturbo_turbojpeg_TJ_FLAG_FORCESSE3 128L
 #undef org_libjpegturbo_turbojpeg_TJ_FLAG_FASTUPSAMPLE
 #define org_libjpegturbo_turbojpeg_TJ_FLAG_FASTUPSAMPLE 256L
+#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FASTDCT
+#define org_libjpegturbo_turbojpeg_TJ_FLAG_FASTDCT 2048L
+#undef org_libjpegturbo_turbojpeg_TJ_FLAG_ACCURATEDCT
+#define org_libjpegturbo_turbojpeg_TJ_FLAG_ACCURATEDCT 4096L
 /*
  * Class:     org_libjpegturbo_turbojpeg_TJ
  * Method:    bufSize
diff --git a/java/org_libjpegturbo_turbojpeg_TJCompressor.h b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
index 59f81e3..2fc9136 100644
--- a/java/org_libjpegturbo_turbojpeg_TJCompressor.h
+++ b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
@@ -34,6 +34,14 @@
 /*
  * Class:     org_libjpegturbo_turbojpeg_TJCompressor
  * Method:    compress
+ * Signature: ([BIIIIII[BIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    compress
  * Signature: ([IIIII[BIII)I
  */
 JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII
@@ -41,6 +49,14 @@
 
 /*
  * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    compress
+ * Signature: ([IIIIIII[BIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII
+  (JNIEnv *, jobject, jintArray, jint, jint, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
  * Method:    encodeYUV
  * Signature: ([BIIII[BII)V
  */
diff --git a/java/org_libjpegturbo_turbojpeg_TJDecompressor.h b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
index 6b67296..f798a77 100644
--- a/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
+++ b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
@@ -42,6 +42,14 @@
 /*
  * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
  * Method:    decompress
+ * Signature: ([BI[BIIIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jbyteArray, jint, jint, jint, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompress
  * Signature: ([BI[IIIIII)V
  */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII
@@ -49,6 +57,14 @@
 
 /*
  * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompress
+ * Signature: ([BI[IIIIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jintArray, jint, jint, jint, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
  * Method:    decompressToYUV
  * Signature: ([BI[BI)V
  */
diff --git a/jcapimin.c b/jcapimin.c
index 20ba9e9..601bb71 100644
--- a/jcapimin.c
+++ b/jcapimin.c
@@ -33,12 +33,12 @@
   int i;
 
   /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  cinfo->mem = NULL;            /* so jpeg_destroy knows mem mgr not called */
   if (version != JPEG_LIB_VERSION)
     ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
   if (structsize != SIZEOF(struct jpeg_compress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE,
+             (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
 
   /* For debugging purposes, we zero the whole master structure.
    * But the application has already set the err pointer, and may have set
@@ -85,7 +85,7 @@
 
   cinfo->script_space = NULL;
 
-  cinfo->input_gamma = 1.0;	/* in case application forgets */
+  cinfo->input_gamma = 1.0;     /* in case application forgets */
 
   /* OK, I'm ready */
   cinfo->global_state = CSTATE_START;
@@ -173,15 +173,15 @@
     (*cinfo->master->prepare_for_pass) (cinfo);
     for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
       if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) iMCU_row;
-	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        cinfo->progress->pass_counter = (long) iMCU_row;
+        cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       }
       /* We bypass the main controller and invoke coef controller directly;
        * all work is being done from the coefficient buffer.
        */
       if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
-	ERREXIT(cinfo, JERR_CANT_SUSPEND);
+        ERREXIT(cinfo, JERR_CANT_SUSPEND);
     }
     (*cinfo->master->finish_pass) (cinfo);
   }
@@ -202,7 +202,7 @@
 
 GLOBAL(void)
 jpeg_write_marker (j_compress_ptr cinfo, int marker,
-		   const JOCTET *dataptr, unsigned int datalen)
+                   const JOCTET *dataptr, unsigned int datalen)
 {
   JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
 
@@ -213,7 +213,7 @@
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
+  write_marker_byte = cinfo->marker->write_marker_byte; /* copy for speed */
   while (datalen--) {
     (*write_marker_byte) (cinfo, *dataptr);
     dataptr++;
@@ -248,14 +248,14 @@
  * To produce a pair of files containing abbreviated tables and abbreviated
  * image data, one would proceed as follows:
  *
- *		initialize JPEG object
- *		set JPEG parameters
- *		set destination to table file
- *		jpeg_write_tables(cinfo);
- *		set destination to image file
- *		jpeg_start_compress(cinfo, FALSE);
- *		write data...
- *		jpeg_finish_compress(cinfo);
+ *              initialize JPEG object
+ *              set JPEG parameters
+ *              set destination to table file
+ *              jpeg_write_tables(cinfo);
+ *              set destination to image file
+ *              jpeg_start_compress(cinfo, FALSE);
+ *              write data...
+ *              jpeg_finish_compress(cinfo);
  *
  * jpeg_write_tables has the side effect of marking all tables written
  * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
diff --git a/jcapistd.c b/jcapistd.c
index c0320b1..167f020 100644
--- a/jcapistd.c
+++ b/jcapistd.c
@@ -41,7 +41,7 @@
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   if (write_all_tables)
-    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
+    jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */
 
   /* (Re)initialize error mgr and destination modules */
   (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
@@ -75,7 +75,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
-		      JDIMENSION num_lines)
+                      JDIMENSION num_lines)
 {
   JDIMENSION row_ctr, rows_left;
 
@@ -118,7 +118,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
-		     JDIMENSION num_lines)
+                     JDIMENSION num_lines)
 {
   JDIMENSION lines_per_iMCU_row;
 
diff --git a/jcarith.c b/jcarith.c
index a9ca1c3..e8bb281 100644
--- a/jcarith.c
+++ b/jcarith.c
@@ -34,8 +34,8 @@
   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
   int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
 
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
 
   /* Pointers to statistics areas (these workspaces have image lifespan) */
   unsigned char * dc_stats[NUM_ARITH_TBLS];
@@ -101,14 +101,14 @@
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
+#define ISHIFT_TEMPS    int ishift_temp;
 #define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
+        ((ishift_temp = (x)) < 0 ? \
+         (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+         (ishift_temp >> (shft)))
 #else
 #define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#define IRIGHT_SHIFT(x,shft)    ((x) >> (shft))
 #endif
 
 
@@ -149,11 +149,11 @@
     /* One final overflow has to be handled */
     if (e->buffer >= 0) {
       if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
       emit_byte(e->buffer + 1, cinfo);
       if (e->buffer + 1 == 0xFF)
-	emit_byte(0x00, cinfo);
+        emit_byte(0x00, cinfo);
     }
     e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
     e->sc = 0;
@@ -162,17 +162,17 @@
       ++e->zc;
     else if (e->buffer >= 0) {
       if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
       emit_byte(e->buffer, cinfo);
     }
     if (e->sc) {
       if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
       do {
-	emit_byte(0xFF, cinfo);
-	emit_byte(0x00, cinfo);
+        emit_byte(0xFF, cinfo);
+        emit_byte(0x00, cinfo);
       } while (--e->sc);
     }
   }
@@ -187,7 +187,7 @@
     if (e->c & 0x7F800L) {
       emit_byte((e->c >> 11) & 0xFF, cinfo);
       if (((e->c >> 11) & 0xFF) == 0xFF)
-	emit_byte(0x00, cinfo);
+        emit_byte(0x00, cinfo);
     }
   }
 }
@@ -216,7 +216,7 @@
  */
 
 LOCAL(void)
-arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) 
+arith_encode (j_compress_ptr cinfo, unsigned char *st, int val)
 {
   register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
   register unsigned char nl, nm;
@@ -227,9 +227,9 @@
    * Qe values and probability estimation state machine
    */
   sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
+  qe = jpeg_aritab[sv & 0x7F];  /* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;     /* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;     /* Next_Index_MPS */
 
   /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
   e->a -= qe;
@@ -243,7 +243,7 @@
       e->c += e->a;
       e->a = qe;
     }
-    *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+    *st = (sv & 0x80) ^ nl;     /* Estimate_after_LPS */
   } else {
     /* Encode the more probable symbol */
     if (e->a >= 0x8000L)
@@ -255,7 +255,7 @@
       e->c += e->a;
       e->a = qe;
     }
-    *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+    *st = (sv & 0x80) ^ nm;     /* Estimate_after_MPS */
   }
 
   /* Renormalization & data output per section D.1.6 */
@@ -266,43 +266,43 @@
       /* Another byte is ready for output */
       temp = e->c >> 19;
       if (temp > 0xFF) {
-	/* Handle overflow over all stacked 0xFF bytes */
-	if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer + 1, cinfo);
-	  if (e->buffer + 1 == 0xFF)
-	    emit_byte(0x00, cinfo);
-	}
-	e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
-	e->sc = 0;
-	/* Note: The 3 spacer bits in the C register guarantee
-	 * that the new buffer byte can't be 0xFF here
-	 * (see page 160 in the P&M JPEG book). */
-	e->buffer = temp & 0xFF;  /* new output byte, might overflow later */
+        /* Handle overflow over all stacked 0xFF bytes */
+        if (e->buffer >= 0) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          emit_byte(e->buffer + 1, cinfo);
+          if (e->buffer + 1 == 0xFF)
+            emit_byte(0x00, cinfo);
+        }
+        e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+        e->sc = 0;
+        /* Note: The 3 spacer bits in the C register guarantee
+         * that the new buffer byte can't be 0xFF here
+         * (see page 160 in the P&M JPEG book). */
+        e->buffer = temp & 0xFF;  /* new output byte, might overflow later */
       } else if (temp == 0xFF) {
-	++e->sc;  /* stack 0xFF byte (which might overflow later) */
+        ++e->sc;  /* stack 0xFF byte (which might overflow later) */
       } else {
-	/* Output all stacked 0xFF bytes, they will not overflow any more */
-	if (e->buffer == 0)
-	  ++e->zc;
-	else if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer, cinfo);
-	}
-	if (e->sc) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  do {
-	    emit_byte(0xFF, cinfo);
-	    emit_byte(0x00, cinfo);
-	  } while (--e->sc);
-	}
-	e->buffer = temp & 0xFF;  /* new output byte (can still overflow) */
+        /* Output all stacked 0xFF bytes, they will not overflow any more */
+        if (e->buffer == 0)
+          ++e->zc;
+        else if (e->buffer >= 0) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          emit_byte(e->buffer, cinfo);
+        }
+        if (e->sc) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          do {
+            emit_byte(0xFF, cinfo);
+            emit_byte(0x00, cinfo);
+          } while (--e->sc);
+        }
+        e->buffer = temp & 0xFF;  /* new output byte (can still overflow) */
       }
       e->c &= 0x7FFFFL;
       e->ct += 8;
@@ -398,45 +398,45 @@
     /* Figure F.4: Encode_DC_DIFF */
     if ((v = m - entropy->last_dc_val[ci]) == 0) {
       arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
+      entropy->dc_context[ci] = 0;      /* zero diff category */
     } else {
       entropy->last_dc_val[ci] = m;
       arith_encode(cinfo, st, 1);
       /* Figure F.6: Encoding nonzero value v */
       /* Figure F.7: Encoding the sign of v */
       if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
+        arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
+        st += 2;                        /* Table F.4: SP = S0 + 2 */
+        entropy->dc_context[ci] = 4;    /* small positive diff category */
       } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
+        v = -v;
+        arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
+        st += 3;                        /* Table F.4: SN = S0 + 3 */
+        entropy->dc_context[ci] = 8;    /* small negative diff category */
       }
       /* Figure F.8: Encoding the magnitude category of v */
       m = 0;
       if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
       }
       arith_encode(cinfo, st, 0);
       /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
       if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
+        entropy->dc_context[ci] = 0;    /* zero diff category */
       else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
+        entropy->dc_context[ci] += 8;   /* large diff category */
       /* Figure F.9: Encoding the magnitude bit pattern of v */
       st += 14;
       while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
     }
   }
 
@@ -491,21 +491,21 @@
   /* Figure F.5: Encode_AC_Coefficients */
   for (k = cinfo->Ss; k <= ke; k++) {
     st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    arith_encode(cinfo, st, 0);		/* EOB decision */
+    arith_encode(cinfo, st, 0);         /* EOB decision */
     for (;;) {
       if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 0);
-	  break;
-	}
+        if (v >>= cinfo->Al) {
+          arith_encode(cinfo, st + 1, 1);
+          arith_encode(cinfo, entropy->fixed_bin, 0);
+          break;
+        }
       } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 1);
-	  break;
-	}
+        v = -v;
+        if (v >>= cinfo->Al) {
+          arith_encode(cinfo, st + 1, 1);
+          arith_encode(cinfo, entropy->fixed_bin, 1);
+          break;
+        }
       }
       arith_encode(cinfo, st + 1, 0); st += 3; k++;
     }
@@ -517,15 +517,15 @@
       m = 1;
       v2 = v;
       if (v2 >>= 1) {
-	arith_encode(cinfo, st, 1);
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
+        arith_encode(cinfo, st, 1);
+        m <<= 1;
+        st = entropy->ac_stats[tbl] +
+             (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
       }
     }
     arith_encode(cinfo, st, 0);
@@ -566,7 +566,7 @@
     entropy->restarts_to_go--;
   }
 
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
+  st = entropy->fixed_bin;      /* use fixed probability estimation */
   Al = cinfo->Al;
 
   /* Encode the MCU data blocks */
@@ -635,29 +635,29 @@
   for (k = cinfo->Ss; k <= ke; k++) {
     st = entropy->ac_stats[tbl] + 3 * (k - 1);
     if (k > kex)
-      arith_encode(cinfo, st, 0);	/* EOB decision */
+      arith_encode(cinfo, st, 0);       /* EOB decision */
     for (;;) {
       if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 0);
-	  }
-	  break;
-	}
+        if (v >>= cinfo->Al) {
+          if (v >> 1)                   /* previously nonzero coef */
+            arith_encode(cinfo, st + 2, (v & 1));
+          else {                        /* newly nonzero coef */
+            arith_encode(cinfo, st + 1, 1);
+            arith_encode(cinfo, entropy->fixed_bin, 0);
+          }
+          break;
+        }
       } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 1);
-	  }
-	  break;
-	}
+        v = -v;
+        if (v >>= cinfo->Al) {
+          if (v >> 1)                   /* previously nonzero coef */
+            arith_encode(cinfo, st + 2, (v & 1));
+          else {                        /* newly nonzero coef */
+            arith_encode(cinfo, st + 1, 1);
+            arith_encode(cinfo, entropy->fixed_bin, 1);
+          }
+          break;
+        }
       }
       arith_encode(cinfo, st + 1, 0); st += 3; k++;
     }
@@ -713,45 +713,45 @@
     /* Figure F.4: Encode_DC_DIFF */
     if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
       arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
+      entropy->dc_context[ci] = 0;      /* zero diff category */
     } else {
       entropy->last_dc_val[ci] = (*block)[0];
       arith_encode(cinfo, st, 1);
       /* Figure F.6: Encoding nonzero value v */
       /* Figure F.7: Encoding the sign of v */
       if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
+        arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
+        st += 2;                        /* Table F.4: SP = S0 + 2 */
+        entropy->dc_context[ci] = 4;    /* small positive diff category */
       } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
+        v = -v;
+        arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
+        st += 3;                        /* Table F.4: SN = S0 + 3 */
+        entropy->dc_context[ci] = 8;    /* small negative diff category */
       }
       /* Figure F.8: Encoding the magnitude category of v */
       m = 0;
       if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
       }
       arith_encode(cinfo, st, 0);
       /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
       if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
+        entropy->dc_context[ci] = 0;    /* zero diff category */
       else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
+        entropy->dc_context[ci] += 8;   /* large diff category */
       /* Figure F.9: Encoding the magnitude bit pattern of v */
       st += 14;
       while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
     }
 
     /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
@@ -765,43 +765,43 @@
     /* Figure F.5: Encode_AC_Coefficients */
     for (k = 1; k <= ke; k++) {
       st = entropy->ac_stats[tbl] + 3 * (k - 1);
-      arith_encode(cinfo, st, 0);	/* EOB decision */
+      arith_encode(cinfo, st, 0);       /* EOB decision */
       while ((v = (*block)[jpeg_natural_order[k]]) == 0) {
-	arith_encode(cinfo, st + 1, 0); st += 3; k++;
+        arith_encode(cinfo, st + 1, 0); st += 3; k++;
       }
       arith_encode(cinfo, st + 1, 1);
       /* Figure F.6: Encoding nonzero value v */
       /* Figure F.7: Encoding the sign of v */
       if (v > 0) {
-	arith_encode(cinfo, entropy->fixed_bin, 0);
+        arith_encode(cinfo, entropy->fixed_bin, 0);
       } else {
-	v = -v;
-	arith_encode(cinfo, entropy->fixed_bin, 1);
+        v = -v;
+        arith_encode(cinfo, entropy->fixed_bin, 1);
       }
       st += 2;
       /* Figure F.8: Encoding the magnitude category of v */
       m = 0;
       if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	if (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (v2 >>= 1) {
-	    arith_encode(cinfo, st, 1);
-	    m <<= 1;
-	    st += 1;
-	  }
-	}
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        if (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st = entropy->ac_stats[tbl] +
+               (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+          while (v2 >>= 1) {
+            arith_encode(cinfo, st, 1);
+            m <<= 1;
+            st += 1;
+          }
+        }
       }
       arith_encode(cinfo, st, 0);
       /* Figure F.9: Encoding the magnitude bit pattern of v */
       st += 14;
       while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
     }
     /* Encode EOB decision only if k <= DCTSIZE2 - 1 */
     if (k <= DCTSIZE2 - 1) {
@@ -838,14 +838,14 @@
   if (cinfo->progressive_mode) {
     if (cinfo->Ah == 0) {
       if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_first;
+        entropy->pub.encode_mcu = encode_mcu_DC_first;
       else
-	entropy->pub.encode_mcu = encode_mcu_AC_first;
+        entropy->pub.encode_mcu = encode_mcu_AC_first;
     } else {
       if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_refine;
+        entropy->pub.encode_mcu = encode_mcu_DC_refine;
       else
-	entropy->pub.encode_mcu = encode_mcu_AC_refine;
+        entropy->pub.encode_mcu = encode_mcu_AC_refine;
     }
   } else
     entropy->pub.encode_mcu = encode_mcu;
@@ -857,10 +857,10 @@
     if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
       tbl = compptr->dc_tbl_no;
       if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
       if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+        entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
       MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
       /* Initialize DC predictions to 0 */
       entropy->last_dc_val[ci] = 0;
@@ -870,15 +870,15 @@
     if (cinfo->progressive_mode == 0 || cinfo->Se) {
       tbl = compptr->ac_tbl_no;
       if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
       if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+        entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
       MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
 #ifdef CALCULATE_SPECTRAL_CONDITIONING
       if (cinfo->progressive_mode)
-	/* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
-	cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
+        /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
+        cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
 #endif
     }
   }
@@ -909,7 +909,7 @@
 
   entropy = (arith_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(arith_entropy_encoder));
+                                SIZEOF(arith_entropy_encoder));
   cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
   entropy->pub.start_pass = start_pass;
   entropy->pub.finish_pass = finish_pass;
diff --git a/jccoefct.c b/jccoefct.c
index 1963ddb..ffc9b7e 100644
--- a/jccoefct.c
+++ b/jccoefct.c
@@ -34,10 +34,10 @@
 typedef struct {
   struct jpeg_c_coef_controller pub; /* public fields */
 
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
+  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
 
   /* For single-pass compression, it's sufficient to buffer just one MCU
    * (although this may prove a bit slow in practice).  We allocate a
@@ -143,7 +143,7 @@
 compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   int blkn, bi, ci, yindex, yoffset, blockcnt;
@@ -154,7 +154,7 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Determine where data comes from in input_buf and do the DCT thing.
        * Each call on forward_DCT processes a horizontal row of DCT blocks
        * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
@@ -166,46 +166,46 @@
        */
       blkn = 0;
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	xpos = MCU_col_num * compptr->MCU_sample_width;
-	ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yoffset+yindex < compptr->last_row_height) {
-	    (*cinfo->fdct->forward_DCT) (cinfo, compptr,
-					 input_buf[compptr->component_index],
-					 coef->MCU_buffer[blkn],
-					 ypos, xpos, (JDIMENSION) blockcnt);
-	    if (blockcnt < compptr->MCU_width) {
-	      /* Create some dummy blocks at the right edge of the image. */
-	      jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt],
-			(compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
-	      for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
-		coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
-	      }
-	    }
-	  } else {
-	    /* Create a row of dummy blocks at the bottom of the image. */
-	    jzero_far((void FAR *) coef->MCU_buffer[blkn],
-		      compptr->MCU_width * SIZEOF(JBLOCK));
-	    for (bi = 0; bi < compptr->MCU_width; bi++) {
-	      coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
-	    }
-	  }
-	  blkn += compptr->MCU_width;
-	  ypos += DCTSIZE;
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+                                                : compptr->last_col_width;
+        xpos = MCU_col_num * compptr->MCU_sample_width;
+        ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (coef->iMCU_row_num < last_iMCU_row ||
+              yoffset+yindex < compptr->last_row_height) {
+            (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+                                         input_buf[compptr->component_index],
+                                         coef->MCU_buffer[blkn],
+                                         ypos, xpos, (JDIMENSION) blockcnt);
+            if (blockcnt < compptr->MCU_width) {
+              /* Create some dummy blocks at the right edge of the image. */
+              jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt],
+                        (compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
+              for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
+                coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
+              }
+            }
+          } else {
+            /* Create a row of dummy blocks at the bottom of the image. */
+            jzero_far((void FAR *) coef->MCU_buffer[blkn],
+                      compptr->MCU_width * SIZEOF(JBLOCK));
+            for (bi = 0; bi < compptr->MCU_width; bi++) {
+              coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
+            }
+          }
+          blkn += compptr->MCU_width;
+          ypos += DCTSIZE;
+        }
       }
       /* Try to write the MCU.  In event of a suspension failure, we will
        * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
        */
       if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -280,17 +280,17 @@
     for (block_row = 0; block_row < block_rows; block_row++) {
       thisblockrow = buffer[block_row];
       (*cinfo->fdct->forward_DCT) (cinfo, compptr,
-				   input_buf[ci], thisblockrow,
-				   (JDIMENSION) (block_row * DCTSIZE),
-				   (JDIMENSION) 0, blocks_across);
+                                   input_buf[ci], thisblockrow,
+                                   (JDIMENSION) (block_row * DCTSIZE),
+                                   (JDIMENSION) 0, blocks_across);
       if (ndummy > 0) {
-	/* Create dummy blocks at the right edge of the image. */
-	thisblockrow += blocks_across; /* => first dummy block */
-	jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
-	lastDC = thisblockrow[-1][0];
-	for (bi = 0; bi < ndummy; bi++) {
-	  thisblockrow[bi][0] = lastDC;
-	}
+        /* Create dummy blocks at the right edge of the image. */
+        thisblockrow += blocks_across; /* => first dummy block */
+        jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
+        lastDC = thisblockrow[-1][0];
+        for (bi = 0; bi < ndummy; bi++) {
+          thisblockrow[bi][0] = lastDC;
+        }
       }
     }
     /* If at end of image, create dummy block rows as needed.
@@ -299,22 +299,22 @@
      * This squeezes a few more bytes out of the resulting file...
      */
     if (coef->iMCU_row_num == last_iMCU_row) {
-      blocks_across += ndummy;	/* include lower right corner */
+      blocks_across += ndummy;  /* include lower right corner */
       MCUs_across = blocks_across / h_samp_factor;
       for (block_row = block_rows; block_row < compptr->v_samp_factor;
-	   block_row++) {
-	thisblockrow = buffer[block_row];
-	lastblockrow = buffer[block_row-1];
-	jzero_far((void FAR *) thisblockrow,
-		  (size_t) (blocks_across * SIZEOF(JBLOCK)));
-	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
-	  lastDC = lastblockrow[h_samp_factor-1][0];
-	  for (bi = 0; bi < h_samp_factor; bi++) {
-	    thisblockrow[bi][0] = lastDC;
-	  }
-	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
-	  lastblockrow += h_samp_factor;
-	}
+           block_row++) {
+        thisblockrow = buffer[block_row];
+        lastblockrow = buffer[block_row-1];
+        jzero_far((void FAR *) thisblockrow,
+                  (size_t) (blocks_across * SIZEOF(JBLOCK)));
+        for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+          lastDC = lastblockrow[h_samp_factor-1][0];
+          for (bi = 0; bi < h_samp_factor; bi++) {
+            thisblockrow[bi][0] = lastDC;
+          }
+          thisblockrow += h_samp_factor; /* advance to next MCU in row */
+          lastblockrow += h_samp_factor;
+        }
       }
     }
   }
@@ -341,7 +341,7 @@
 compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   int blkn, ci, xindex, yindex, yoffset;
   JDIMENSION start_col;
   JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
@@ -364,25 +364,25 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-	    coef->MCU_buffer[blkn++] = buffer_ptr++;
-	  }
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+          }
+        }
       }
       /* Try to write the MCU. */
       if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -408,7 +408,7 @@
 
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
+                                SIZEOF(my_coef_controller));
   cinfo->coef = (struct jpeg_c_coef_controller *) coef;
   coef->pub.start_pass = start_pass_coef;
 
@@ -421,14 +421,14 @@
     jpeg_component_info *compptr;
 
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) compptr->v_samp_factor);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+         (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+                                (long) compptr->h_samp_factor),
+         (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+                                (long) compptr->v_samp_factor),
+         (JDIMENSION) compptr->v_samp_factor);
     }
 #else
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
@@ -440,7 +440,7 @@
 
     buffer = (JBLOCKROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+                                  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
     for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
       coef->MCU_buffer[i] = buffer + i;
     }
diff --git a/jccolext.c b/jccolext.c
index 84da8cd..2c6b7ac 100644
--- a/jccolext.c
+++ b/jccolext.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009-2012, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -58,16 +58,16 @@
        */
       /* Y */
       outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
       /* Cb */
       outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+                 >> SCALEBITS);
       /* Cr */
       outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+                 >> SCALEBITS);
     }
   }
 }
@@ -108,8 +108,8 @@
       inptr += RGB_PIXELSIZE;
       /* Y */
       outptr[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
     }
   }
 }
diff --git a/jccolor.c b/jccolor.c
index 219c5da..fe9422a 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2009-2012, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
@@ -15,7 +15,7 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "config.h"
+#include "jconfigint.h"
 
 
 /* Private subobject */
@@ -24,7 +24,7 @@
   struct jpeg_color_converter pub; /* public fields */
 
   /* Private state for RGB->YCC conversion */
-  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
+  INT32 * rgb_ycc_tab;          /* => table for RGB to YCbCr conversion */
 } my_color_converter;
 
 typedef my_color_converter * my_cconvert_ptr;
@@ -36,9 +36,9 @@
  * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
  * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
  * The conversion equations to be implemented are therefore
- *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
- *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
- *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
+ *      Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *      Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
+ *      Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
  * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
  * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
  * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
@@ -60,10 +60,10 @@
  * in the tables to save adding them separately in the inner loop.
  */
 
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define CBCR_OFFSET     ((INT32) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
 
 /* We allocate one big table and divide it up into eight parts, instead of
  * doing eight alloc_small requests.  This lets us use a single table base
@@ -71,16 +71,16 @@
  * machines (more than can hold all eight addresses, anyway).
  */
 
-#define R_Y_OFF		0			/* offset to R => Y section */
-#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
-#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
-#define R_CB_OFF	(3*(MAXJSAMPLE+1))
-#define G_CB_OFF	(4*(MAXJSAMPLE+1))
-#define B_CB_OFF	(5*(MAXJSAMPLE+1))
-#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
-#define G_CR_OFF	(6*(MAXJSAMPLE+1))
-#define B_CR_OFF	(7*(MAXJSAMPLE+1))
-#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
+#define R_Y_OFF         0                       /* offset to R => Y section */
+#define G_Y_OFF         (1*(MAXJSAMPLE+1))      /* offset to G => Y section */
+#define B_Y_OFF         (2*(MAXJSAMPLE+1))      /* etc. */
+#define R_CB_OFF        (3*(MAXJSAMPLE+1))
+#define G_CB_OFF        (4*(MAXJSAMPLE+1))
+#define B_CB_OFF        (5*(MAXJSAMPLE+1))
+#define R_CR_OFF        B_CB_OFF                /* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF        (6*(MAXJSAMPLE+1))
+#define B_CR_OFF        (7*(MAXJSAMPLE+1))
+#define TABLE_SIZE      (8*(MAXJSAMPLE+1))
 
 
 /* Include inline routines for colorspace extensions */
@@ -202,7 +202,7 @@
   /* Allocate and fill in the conversion tables. */
   cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(TABLE_SIZE * SIZEOF(INT32)));
+                                (TABLE_SIZE * SIZEOF(INT32)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
     rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
@@ -230,8 +230,8 @@
 
 METHODDEF(void)
 rgb_ycc_convert (j_compress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		 JDIMENSION output_row, int num_rows)
+                 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                 JDIMENSION output_row, int num_rows)
 {
   switch (cinfo->in_color_space) {
     case JCS_EXT_RGB:
@@ -279,8 +279,8 @@
 
 METHODDEF(void)
 rgb_gray_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
+                  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                  JDIMENSION output_row, int num_rows)
 {
   switch (cinfo->in_color_space) {
     case JCS_EXT_RGB:
@@ -325,8 +325,8 @@
 
 METHODDEF(void)
 rgb_rgb_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
+                  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                  JDIMENSION output_row, int num_rows)
 {
   switch (cinfo->in_color_space) {
     case JCS_EXT_RGB:
@@ -375,8 +375,8 @@
 
 METHODDEF(void)
 cmyk_ycck_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
+                   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                   JDIMENSION output_row, int num_rows)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int r, g, b;
@@ -398,7 +398,7 @@
       g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
       b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
       /* K passes through as-is */
-      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
+      outptr3[col] = inptr[3];  /* don't need GETJSAMPLE here */
       inptr += 4;
       /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
        * must be too; we do not need an explicit range-limiting operation.
@@ -407,16 +407,16 @@
        */
       /* Y */
       outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
       /* Cb */
       outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+                 >> SCALEBITS);
       /* Cr */
       outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+                 >> SCALEBITS);
     }
   }
 }
@@ -430,8 +430,8 @@
 
 METHODDEF(void)
 grayscale_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
+                   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                   JDIMENSION output_row, int num_rows)
 {
   register JSAMPROW inptr;
   register JSAMPROW outptr;
@@ -444,7 +444,7 @@
     outptr = output_buf[0][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      outptr[col] = inptr[0];	/* don't need GETJSAMPLE() here */
+      outptr[col] = inptr[0];   /* don't need GETJSAMPLE() here */
       inptr += instride;
     }
   }
@@ -459,8 +459,8 @@
 
 METHODDEF(void)
 null_convert (j_compress_ptr cinfo,
-	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	      JDIMENSION output_row, int num_rows)
+              JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+              JDIMENSION output_row, int num_rows)
 {
   register JSAMPROW inptr;
   register JSAMPROW outptr;
@@ -475,8 +475,8 @@
       inptr = *input_buf;
       outptr = output_buf[ci][output_row];
       for (col = 0; col < num_cols; col++) {
-	outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
-	inptr += nc;
+        outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+        inptr += nc;
       }
     }
     input_buf++;
@@ -507,7 +507,7 @@
 
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_color_converter));
+                                SIZEOF(my_color_converter));
   cinfo->cconvert = (struct jpeg_color_converter *) cconvert;
   /* set start_pass to null method until we find out differently */
   cconvert->pub.start_pass = null_method;
@@ -545,7 +545,7 @@
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
 
-  default:			/* JCS_UNKNOWN can be anything */
+  default:                      /* JCS_UNKNOWN can be anything */
     if (cinfo->input_components < 1)
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
@@ -652,9 +652,9 @@
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
-  default:			/* allow null conversion of JCS_UNKNOWN */
+  default:                      /* allow null conversion of JCS_UNKNOWN */
     if (cinfo->jpeg_color_space != cinfo->in_color_space ||
-	cinfo->num_components != cinfo->input_components)
+        cinfo->num_components != cinfo->input_components)
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     cconvert->pub.color_convert = null_convert;
     break;
diff --git a/jcdctmgr.c b/jcdctmgr.c
index 43db03a..bf40ff7 100644
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2011 D. R. Commander
@@ -18,7 +18,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 #include "jsimddct.h"
 
 
@@ -44,7 +44,7 @@
 METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
 
 typedef struct {
-  struct jpeg_forward_dct pub;	/* public fields */
+  struct jpeg_forward_dct pub;  /* public fields */
 
   /* Pointer to the DCT routine actually in use */
   forward_DCT_method_ptr dct;
@@ -147,7 +147,7 @@
  *
  * In order to allow SIMD implementations we also tweak the values to
  * allow the same calculation to be made at all times:
- * 
+ *
  *   dctbl[0] = f rounded to nearest integer
  *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
  *   dctbl[2] = 1 << ((word size) * 2 - r)
@@ -221,7 +221,7 @@
     qtblno = compptr->quant_tbl_no;
     /* Make sure specified quantization table is present */
     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+        cinfo->quant_tbl_ptrs[qtblno] == NULL)
       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
     qtbl = cinfo->quant_tbl_ptrs[qtblno];
     /* Compute divisors for this quant table */
@@ -233,91 +233,91 @@
        * coefficients multiplied by 8 (to counteract scaling).
        */
       if (fdct->divisors[qtblno] == NULL) {
-	fdct->divisors[qtblno] = (DCTELEM *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
+        fdct->divisors[qtblno] = (DCTELEM *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
       }
       dtbl = fdct->divisors[qtblno];
       for (i = 0; i < DCTSIZE2; i++) {
-	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
-	  && fdct->quantize == jsimd_quantize)
-	  fdct->quantize = quantize;
+        if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
+          && fdct->quantize == jsimd_quantize)
+          fdct->quantize = quantize;
       }
       break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
     case JDCT_IFAST:
       {
-	/* For AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 */
+        /* For AA&N IDCT method, divisors are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * We apply a further scale factor of 8.
+         */
 #define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
+        static const INT16 aanscales[DCTSIZE2] = {
+          /* precomputed values scaled up by 14 bits */
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+        };
+        SHIFT_TEMPS
 
-	if (fdct->divisors[qtblno] == NULL) {
-	  fdct->divisors[qtblno] = (DCTELEM *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
-	}
-	dtbl = fdct->divisors[qtblno];
-	for (i = 0; i < DCTSIZE2; i++) {
-	  if(!compute_reciprocal(
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-3), &dtbl[i])
-	    && fdct->quantize == jsimd_quantize)
-	    fdct->quantize = quantize;
-	}
+        if (fdct->divisors[qtblno] == NULL) {
+          fdct->divisors[qtblno] = (DCTELEM *)
+            (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                        (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
+        }
+        dtbl = fdct->divisors[qtblno];
+        for (i = 0; i < DCTSIZE2; i++) {
+          if(!compute_reciprocal(
+            DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+                                  (INT32) aanscales[i]),
+                    CONST_BITS-3), &dtbl[i])
+            && fdct->quantize == jsimd_quantize)
+            fdct->quantize = quantize;
+        }
       }
       break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
     case JDCT_FLOAT:
       {
-	/* For float AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 * What's actually stored is 1/divisor so that the inner loop can
-	 * use a multiplication rather than a division.
-	 */
-	FAST_FLOAT * fdtbl;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
+        /* For float AA&N IDCT method, divisors are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * We apply a further scale factor of 8.
+         * What's actually stored is 1/divisor so that the inner loop can
+         * use a multiplication rather than a division.
+         */
+        FAST_FLOAT * fdtbl;
+        int row, col;
+        static const double aanscalefactor[DCTSIZE] = {
+          1.0, 1.387039845, 1.306562965, 1.175875602,
+          1.0, 0.785694958, 0.541196100, 0.275899379
+        };
 
-	if (fdct->float_divisors[qtblno] == NULL) {
-	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(FAST_FLOAT));
-	}
-	fdtbl = fdct->float_divisors[qtblno];
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fdtbl[i] = (FAST_FLOAT)
-	      (1.0 / (((double) qtbl->quantval[i] *
-		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
-	    i++;
-	  }
-	}
+        if (fdct->float_divisors[qtblno] == NULL) {
+          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
+            (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                        DCTSIZE2 * SIZEOF(FAST_FLOAT));
+        }
+        fdtbl = fdct->float_divisors[qtblno];
+        i = 0;
+        for (row = 0; row < DCTSIZE; row++) {
+          for (col = 0; col < DCTSIZE; col++) {
+            fdtbl[i] = (FAST_FLOAT)
+              (1.0 / (((double) qtbl->quantval[i] *
+                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+            i++;
+          }
+        }
       }
       break;
 #endif
@@ -344,7 +344,7 @@
   for (elemr = 0; elemr < DCTSIZE; elemr++) {
     elemptr = sample_data[elemr] + start_col;
 
-#if DCTSIZE == 8		/* unroll the inner loop */
+#if DCTSIZE == 8                /* unroll the inner loop */
     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
@@ -410,9 +410,9 @@
 
 METHODDEF(void)
 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
-	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-	     JDIMENSION start_row, JDIMENSION start_col,
-	     JDIMENSION num_blocks)
+             JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+             JDIMENSION start_row, JDIMENSION start_col,
+             JDIMENSION num_blocks)
 /* This version is used for integer DCT implementations. */
 {
   /* This routine is heavily used, so it's worth coding it tightly. */
@@ -427,7 +427,7 @@
   quantize_method_ptr do_quantize = fdct->quantize;
   workspace = fdct->workspace;
 
-  sample_data += start_row;	/* fold in the vertical offset once */
+  sample_data += start_row;     /* fold in the vertical offset once */
 
   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     /* Load data into workspace, applying unsigned->signed conversion */
@@ -455,7 +455,7 @@
   workspaceptr = workspace;
   for (elemr = 0; elemr < DCTSIZE; elemr++) {
     elemptr = sample_data[elemr] + start_col;
-#if DCTSIZE == 8		/* unroll the inner loop */
+#if DCTSIZE == 8                /* unroll the inner loop */
     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
@@ -500,9 +500,9 @@
 
 METHODDEF(void)
 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		   JDIMENSION start_row, JDIMENSION start_col,
-		   JDIMENSION num_blocks)
+                   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                   JDIMENSION start_row, JDIMENSION start_col,
+                   JDIMENSION num_blocks)
 /* This version is used for floating-point DCT implementations. */
 {
   /* This routine is heavily used, so it's worth coding it tightly. */
@@ -518,7 +518,7 @@
   float_quantize_method_ptr do_quantize = fdct->float_quantize;
   workspace = fdct->float_workspace;
 
-  sample_data += start_row;	/* fold in the vertical offset once */
+  sample_data += start_row;     /* fold in the vertical offset once */
 
   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     /* Load data into workspace, applying unsigned->signed conversion */
@@ -547,7 +547,7 @@
 
   fdct = (my_fdct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_fdct_controller));
+                                SIZEOF(my_fdct_controller));
   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
   fdct->pub.start_pass = start_pass_fdctmgr;
 
@@ -626,12 +626,12 @@
   if (cinfo->dct_method == JDCT_FLOAT)
     fdct->float_workspace = (FAST_FLOAT *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(FAST_FLOAT) * DCTSIZE2);
+                                  SIZEOF(FAST_FLOAT) * DCTSIZE2);
   else
 #endif
     fdct->workspace = (DCTELEM *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(DCTELEM) * DCTSIZE2);
+                                  SIZEOF(DCTELEM) * DCTSIZE2);
 
   /* Mark divisor tables unallocated */
   for (i = 0; i < NUM_QUANT_TBLS; i++) {
diff --git a/jchuff.c b/jchuff.c
index 887614d..1880cc2 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
- * Copyright (C) 2009-2011, D. R. Commander.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2014 D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains Huffman entropy encoding routines.
@@ -19,7 +19,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jchuff.h"		/* Declarations shared with jcphuff.c */
+#include "jchuff.h"             /* Declarations shared with jcphuff.c */
 #include <limits.h>
 
 static unsigned char jpeg_nbits_table[65536];
@@ -37,8 +37,8 @@
  */
 
 typedef struct {
-  size_t put_buffer;		/* current bit-accumulation buffer */
-  int put_bits;			/* # of bits now in it */
+  size_t put_buffer;            /* current bit-accumulation buffer */
+  int put_bits;                 /* # of bits now in it */
   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
 } savable_state;
 
@@ -52,12 +52,12 @@
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest,src)  \
-	((dest).put_buffer = (src).put_buffer, \
-	 (dest).put_bits = (src).put_bits, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+        ((dest).put_buffer = (src).put_buffer, \
+         (dest).put_bits = (src).put_bits, \
+         (dest).last_dc_val[0] = (src).last_dc_val[0], \
+         (dest).last_dc_val[1] = (src).last_dc_val[1], \
+         (dest).last_dc_val[2] = (src).last_dc_val[2], \
+         (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 
@@ -65,17 +65,17 @@
 typedef struct {
   struct jpeg_entropy_encoder pub; /* public fields */
 
-  savable_state saved;		/* Bit buffer & DC state at start of MCU */
+  savable_state saved;          /* Bit buffer & DC state at start of MCU */
 
   /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
 
   /* Pointers to derived tables (these workspaces have image lifespan) */
   c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
   c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
 
-#ifdef ENTROPY_OPT_SUPPORTED	/* Statistics tables for optimization */
+#ifdef ENTROPY_OPT_SUPPORTED    /* Statistics tables for optimization */
   long * dc_count_ptrs[NUM_HUFF_TBLS];
   long * ac_count_ptrs[NUM_HUFF_TBLS];
 #endif
@@ -88,20 +88,20 @@
  */
 
 typedef struct {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  savable_state cur;		/* Current bit buffer & DC state */
-  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
+  JOCTET * next_output_byte;    /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+  savable_state cur;            /* Current bit buffer & DC state */
+  j_compress_ptr cinfo;         /* dump_buffer needs access to this */
 } working_state;
 
 
 /* Forward declarations */
 METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo,
-					JBLOCKROW *MCU_data));
+                                        JBLOCKROW *MCU_data));
 METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo));
 #ifdef ENTROPY_OPT_SUPPORTED
 METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo,
-					  JBLOCKROW *MCU_data));
+                                          JBLOCKROW *MCU_data));
 METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo));
 #endif
 
@@ -140,29 +140,29 @@
       /* Check for invalid table indexes */
       /* (make_c_derived_tbl does this in the other path) */
       if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
-	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
       if (actbl < 0 || actbl >= NUM_HUFF_TBLS)
-	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
       /* Allocate and zero the statistics tables */
       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
       if (entropy->dc_count_ptrs[dctbl] == NULL)
-	entropy->dc_count_ptrs[dctbl] = (long *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      257 * SIZEOF(long));
+        entropy->dc_count_ptrs[dctbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      257 * SIZEOF(long));
       MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long));
       if (entropy->ac_count_ptrs[actbl] == NULL)
-	entropy->ac_count_ptrs[actbl] = (long *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      257 * SIZEOF(long));
+        entropy->ac_count_ptrs[actbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      257 * SIZEOF(long));
       MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long));
 #endif
     } else {
       /* Compute derived values for Huffman tables */
       /* We may do this more than once for a table, but it's not expensive */
       jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
-			      & entropy->dc_derived_tbls[dctbl]);
+                              & entropy->dc_derived_tbls[dctbl]);
       jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
-			      & entropy->ac_derived_tbls[actbl]);
+                              & entropy->ac_derived_tbls[actbl]);
     }
     /* Initialize DC predictions to 0 */
     entropy->saved.last_dc_val[ci] = 0;
@@ -187,7 +187,7 @@
 
 GLOBAL(void)
 jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
-			 c_derived_tbl ** pdtbl)
+                         c_derived_tbl ** pdtbl)
 {
   JHUFF_TBL *htbl;
   c_derived_tbl *dtbl;
@@ -212,22 +212,22 @@
   if (*pdtbl == NULL)
     *pdtbl = (c_derived_tbl *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(c_derived_tbl));
+                                  SIZEOF(c_derived_tbl));
   dtbl = *pdtbl;
-  
+
   /* Figure C.1: make table of Huffman code length for each symbol */
 
   p = 0;
   for (l = 1; l <= 16; l++) {
     i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
+    if (i < 0 || p + i > 256)   /* protect against table overrun */
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     while (i--)
       huffsize[p++] = (char) l;
   }
   huffsize[p] = 0;
   lastp = p;
-  
+
   /* Figure C.2: generate the codes themselves */
   /* We also validate that the counts represent a legal Huffman code tree. */
 
@@ -247,7 +247,7 @@
     code <<= 1;
     si++;
   }
-  
+
   /* Figure C.3: generate encoding tables */
   /* These are code and size indexed by symbol value */
 
@@ -287,10 +287,10 @@
 
 /* Emit a byte, taking 'action' if must suspend. */
 #define emit_byte(state,val,action)  \
-	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(state)->free_in_buffer == 0)  \
-	    if (! dump_buffer(state))  \
-	      { action; } }
+        { *(state)->next_output_byte++ = (JOCTET) (val);  \
+          if (--(state)->free_in_buffer == 0)  \
+            if (! dump_buffer(state))  \
+              { action; } }
 
 
 LOCAL(boolean)
@@ -391,7 +391,16 @@
 #endif
 
 
-#define BUFSIZE (DCTSIZE2 * 2)
+/* Although it is exceedingly rare, it is possible for a Huffman-encoded
+ * coefficient block to be larger than the 128-byte unencoded block.  For each
+ * of the 64 coefficients, PUT_BITS is invoked twice, and each invocation can
+ * theoretically store 16 bits (for a maximum of 2048 bits or 256 bytes per
+ * encoded block.)  If, for instance, one artificially sets the AC
+ * coefficients to alternating values of 32767 and -32768 (using the JPEG
+ * scanning order-- 1, 8, 16, etc.), then this will produce an encoded block
+ * larger than 200 bytes.
+ */
+#define BUFSIZE (DCTSIZE2 * 4)
 
 #define LOAD_BUFFER() { \
   if (state->free_in_buffer < BUFSIZE) { \
@@ -438,7 +447,7 @@
   PUT_BITS(0x7F, 7)
   while (put_bits >= 8) EMIT_BYTE()
 
-  state->cur.put_buffer = 0;	/* and reset bit-buffer to empty */
+  state->cur.put_buffer = 0;    /* and reset bit-buffer to empty */
   state->cur.put_bits = 0;
   STORE_BUFFER()
 
@@ -450,7 +459,7 @@
 
 LOCAL(boolean)
 encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
-		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
+                  c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
   int temp, temp2, temp3;
   int nbits;
@@ -465,7 +474,7 @@
   LOAD_BUFFER()
 
   /* Encode the DC coefficient difference per section F.1.2.1 */
-  
+
   temp = temp2 = block[0] - last_dc_val;
 
  /* This is a well-known technique for obtaining the absolute value without a
@@ -499,8 +508,8 @@
   CHECKBUF15()
 
   /* Encode the AC coefficients per section F.1.2.2 */
-  
-  r = 0;			/* r = run length of zeros */
+
+  r = 0;                        /* r = run length of zeros */
 
 /* Manually unroll the k loop to eliminate the counter variable.  This
  * improves performance greatly on systems with a limited number of
@@ -606,7 +615,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! emit_restart(&state, entropy->next_restart_num))
-	return FALSE;
+        return FALSE;
   }
 
   /* Encode the MCU data blocks */
@@ -614,9 +623,9 @@
     ci = cinfo->MCU_membership[blkn];
     compptr = cinfo->cur_comp_info[ci];
     if (! encode_one_block(&state,
-			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
-			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
-			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+                           MCU_data[blkn][0], state.cur.last_dc_val[ci],
+                           entropy->dc_derived_tbls[compptr->dc_tbl_no],
+                           entropy->ac_derived_tbls[compptr->ac_tbl_no]))
       return FALSE;
     /* Update last_dc_val */
     state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
@@ -686,18 +695,18 @@
 
 LOCAL(void)
 htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
-		 long dc_counts[], long ac_counts[])
+                 long dc_counts[], long ac_counts[])
 {
   register int temp;
   register int nbits;
   register int k, r;
-  
+
   /* Encode the DC coefficient difference per section F.1.2.1 */
-  
+
   temp = block[0] - last_dc_val;
   if (temp < 0)
     temp = -temp;
-  
+
   /* Find the number of bits needed for the magnitude of the coefficient */
   nbits = 0;
   while (temp) {
@@ -712,36 +721,36 @@
 
   /* Count the Huffman symbol for the number of bits */
   dc_counts[nbits]++;
-  
+
   /* Encode the AC coefficients per section F.1.2.2 */
-  
-  r = 0;			/* r = run length of zeros */
-  
+
+  r = 0;                        /* r = run length of zeros */
+
   for (k = 1; k < DCTSIZE2; k++) {
     if ((temp = block[jpeg_natural_order[k]]) == 0) {
       r++;
     } else {
       /* if run length > 15, must emit special run-length-16 codes (0xF0) */
       while (r > 15) {
-	ac_counts[0xF0]++;
-	r -= 16;
+        ac_counts[0xF0]++;
+        r -= 16;
       }
-      
+
       /* Find the number of bits needed for the magnitude of the coefficient */
       if (temp < 0)
-	temp = -temp;
-      
+        temp = -temp;
+
       /* Find the number of bits needed for the magnitude of the coefficient */
-      nbits = 1;		/* there must be at least one 1 bit */
+      nbits = 1;                /* there must be at least one 1 bit */
       while ((temp >>= 1))
-	nbits++;
+        nbits++;
       /* Check for out-of-range coefficient values */
       if (nbits > MAX_COEF_BITS)
-	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-      
+        ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
       /* Count Huffman symbol for run length / number of bits */
       ac_counts[(r << 4) + nbits]++;
-      
+
       r = 0;
     }
   }
@@ -769,7 +778,7 @@
     if (entropy->restarts_to_go == 0) {
       /* Re-initialize DC predictions to 0 */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-	entropy->saved.last_dc_val[ci] = 0;
+        entropy->saved.last_dc_val[ci] = 0;
       /* Update restart state */
       entropy->restarts_to_go = cinfo->restart_interval;
     }
@@ -780,8 +789,8 @@
     ci = cinfo->MCU_membership[blkn];
     compptr = cinfo->cur_comp_info[ci];
     htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
-		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
-		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+                    entropy->dc_count_ptrs[compptr->dc_tbl_no],
+                    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
     entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
   }
 
@@ -820,10 +829,10 @@
 GLOBAL(void)
 jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
 {
-#define MAX_CLEN 32		/* assumed maximum initial code length */
-  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
-  int codesize[257];		/* codesize[k] = code length of symbol k */
-  int others[257];		/* next symbol in current branch of tree */
+#define MAX_CLEN 32             /* assumed maximum initial code length */
+  UINT8 bits[MAX_CLEN+1];       /* bits[k] = # of symbols with code length k */
+  int codesize[257];            /* codesize[k] = code length of symbol k */
+  int others[257];              /* next symbol in current branch of tree */
   int c1, c2;
   int p, i, j;
   long v;
@@ -833,9 +842,9 @@
   MEMZERO(bits, SIZEOF(bits));
   MEMZERO(codesize, SIZEOF(codesize));
   for (i = 0; i < 257; i++)
-    others[i] = -1;		/* init links to empty */
-  
-  freq[256] = 1;		/* make sure 256 has a nonzero count */
+    others[i] = -1;             /* init links to empty */
+
+  freq[256] = 1;                /* make sure 256 has a nonzero count */
   /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
    * that no real symbol is given code-value of all ones, because 256
    * will be placed last in the largest codeword category.
@@ -850,8 +859,8 @@
     v = 1000000000L;
     for (i = 0; i <= 256; i++) {
       if (freq[i] && freq[i] <= v) {
-	v = freq[i];
-	c1 = i;
+        v = freq[i];
+        c1 = i;
       }
     }
 
@@ -861,15 +870,15 @@
     v = 1000000000L;
     for (i = 0; i <= 256; i++) {
       if (freq[i] && freq[i] <= v && i != c1) {
-	v = freq[i];
-	c2 = i;
+        v = freq[i];
+        c2 = i;
       }
     }
 
     /* Done if we've merged everything into one frequency */
     if (c2 < 0)
       break;
-    
+
     /* Else merge the two counts/trees */
     freq[c1] += freq[c2];
     freq[c2] = 0;
@@ -880,9 +889,9 @@
       c1 = others[c1];
       codesize[c1]++;
     }
-    
-    others[c1] = c2;		/* chain c2 onto c1's tree branch */
-    
+
+    others[c1] = c2;            /* chain c2 onto c1's tree branch */
+
     /* Increment the codesize of everything in c2's tree branch */
     codesize[c2]++;
     while (others[c2] >= 0) {
@@ -897,7 +906,7 @@
       /* The JPEG standard seems to think that this can't happen, */
       /* but I'm paranoid... */
       if (codesize[i] > MAX_CLEN)
-	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+        ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
 
       bits[codesize[i]]++;
     }
@@ -913,28 +922,28 @@
    * shortest nonzero BITS entry is converted into a prefix for two code words
    * one bit longer.
    */
-  
+
   for (i = MAX_CLEN; i > 16; i--) {
     while (bits[i] > 0) {
-      j = i - 2;		/* find length of new prefix to be used */
+      j = i - 2;                /* find length of new prefix to be used */
       while (bits[j] == 0)
-	j--;
-      
-      bits[i] -= 2;		/* remove two symbols */
-      bits[i-1]++;		/* one goes in this length */
-      bits[j+1] += 2;		/* two new symbols in this length */
-      bits[j]--;		/* symbol of this length is now a prefix */
+        j--;
+
+      bits[i] -= 2;             /* remove two symbols */
+      bits[i-1]++;              /* one goes in this length */
+      bits[j+1] += 2;           /* two new symbols in this length */
+      bits[j]--;                /* symbol of this length is now a prefix */
     }
   }
 
   /* Remove the count for the pseudo-symbol 256 from the largest codelength */
-  while (bits[i] == 0)		/* find largest codelength still in use */
+  while (bits[i] == 0)          /* find largest codelength still in use */
     i--;
   bits[i]--;
-  
+
   /* Return final symbol counts (only for lengths 0..16) */
   MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
-  
+
   /* Return a list of the symbols sorted by code length */
   /* It's not real clear to me why we don't need to consider the codelength
    * changes made above, but the JPEG spec seems to think this works.
@@ -943,8 +952,8 @@
   for (i = 1; i <= MAX_CLEN; i++) {
     for (j = 0; j <= 255; j++) {
       if (codesize[j] == i) {
-	htbl->huffval[p] = (UINT8) j;
-	p++;
+        htbl->huffval[p] = (UINT8) j;
+        p++;
       }
     }
   }
@@ -981,14 +990,14 @@
     if (! did_dc[dctbl]) {
       htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl];
       if (*htblptr == NULL)
-	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);
       did_dc[dctbl] = TRUE;
     }
     if (! did_ac[actbl]) {
       htblptr = & cinfo->ac_huff_tbl_ptrs[actbl];
       if (*htblptr == NULL)
-	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);
       did_ac[actbl] = TRUE;
     }
@@ -1011,7 +1020,7 @@
 
   entropy = (huff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(huff_entropy_encoder));
+                                SIZEOF(huff_entropy_encoder));
   cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
   entropy->pub.start_pass = start_pass_huff;
 
diff --git a/jchuff.h b/jchuff.h
index a9599fc..a1a5280 100644
--- a/jchuff.h
+++ b/jchuff.h
@@ -25,23 +25,23 @@
 /* Derived data constructed for each Huffman table */
 
 typedef struct {
-  unsigned int ehufco[256];	/* code for each symbol */
-  char ehufsi[256];		/* length of code for each symbol */
+  unsigned int ehufco[256];     /* code for each symbol */
+  char ehufsi[256];             /* length of code for each symbol */
   /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
 } c_derived_tbl;
 
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_make_c_derived_tbl	jMkCDerived
-#define jpeg_gen_optimal_table	jGenOptTbl
+#define jpeg_make_c_derived_tbl jMkCDerived
+#define jpeg_gen_optimal_table  jGenOptTbl
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 /* Expand a Huffman table definition into the derived format */
 EXTERN(void) jpeg_make_c_derived_tbl
-	JPP((j_compress_ptr cinfo, boolean isDC, int tblno,
-	     c_derived_tbl ** pdtbl));
+        JPP((j_compress_ptr cinfo, boolean isDC, int tblno,
+             c_derived_tbl ** pdtbl));
 
 /* Generate an optimal table definition given the specified counts */
 EXTERN(void) jpeg_gen_optimal_table
-	JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]));
+        JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]));
diff --git a/jcinit.c b/jcinit.c
index de0ade2..347cf6d 100644
--- a/jcinit.c
+++ b/jcinit.c
@@ -60,7 +60,7 @@
 
   /* Need a full-image coefficient buffer in any multi-pass mode. */
   jinit_c_coef_controller(cinfo,
-		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
+                (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
   jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
 
   jinit_marker_writer(cinfo);
diff --git a/jcmainct.c b/jcmainct.c
index 5b7ff21..5a0549a 100644
--- a/jcmainct.c
+++ b/jcmainct.c
@@ -28,10 +28,10 @@
 typedef struct {
   struct jpeg_c_main_controller pub; /* public fields */
 
-  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
-  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
-  boolean suspended;		/* remember if we suspended output */
-  J_BUF_MODE pass_mode;		/* current operating mode */
+  JDIMENSION cur_iMCU_row;      /* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;      /* counts row groups received in iMCU row */
+  boolean suspended;            /* remember if we suspended output */
+  J_BUF_MODE pass_mode;         /* current operating mode */
 
   /* If using just a strip buffer, this points to the entire set of buffers
    * (we allocate one for each component).  In the full-image case, this
@@ -52,12 +52,12 @@
 
 /* Forward declarations */
 METHODDEF(void) process_data_simple_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+        JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+             JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
 METHODDEF(void) process_data_buffer_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+        JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+             JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
 #endif
 
 
@@ -74,10 +74,10 @@
   if (cinfo->raw_data_in)
     return;
 
-  main_ptr->cur_iMCU_row = 0;	/* initialize counters */
+  main_ptr->cur_iMCU_row = 0;   /* initialize counters */
   main_ptr->rowgroup_ctr = 0;
   main_ptr->suspended = FALSE;
-  main_ptr->pass_mode = pass_mode;	/* save mode for use by process_data */
+  main_ptr->pass_mode = pass_mode;      /* save mode for use by process_data */
 
   switch (pass_mode) {
   case JBUF_PASS_THRU:
@@ -111,8 +111,8 @@
 
 METHODDEF(void)
 process_data_simple_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
+                          JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                          JDIMENSION in_rows_avail)
 {
   my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
 
@@ -120,9 +120,9 @@
     /* Read input data if we haven't filled the main buffer yet */
     if (main_ptr->rowgroup_ctr < DCTSIZE)
       (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					main_ptr->buffer, &main_ptr->rowgroup_ctr,
-					(JDIMENSION) DCTSIZE);
+                                        input_buf, in_row_ctr, in_rows_avail,
+                                        main_ptr->buffer, &main_ptr->rowgroup_ctr,
+                                        (JDIMENSION) DCTSIZE);
 
     /* If we don't have a full iMCU row buffered, return to application for
      * more data.  Note that preprocessor will always pad to fill the iMCU row
@@ -140,8 +140,8 @@
        * think we were done.
        */
       if (! main_ptr->suspended) {
-	(*in_row_ctr)--;
-	main_ptr->suspended = TRUE;
+        (*in_row_ctr)--;
+        main_ptr->suspended = TRUE;
       }
       return;
     }
@@ -167,8 +167,8 @@
 
 METHODDEF(void)
 process_data_buffer_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
+                          JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                          JDIMENSION in_rows_avail)
 {
   my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   int ci;
@@ -179,16 +179,16 @@
     /* Realign the virtual buffers if at the start of an iMCU row. */
     if (main_ptr->rowgroup_ctr == 0) {
       for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, main_ptr->whole_image[ci],
-	   main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
-	   (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
+           ci++, compptr++) {
+        main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+          ((j_common_ptr) cinfo, main_ptr->whole_image[ci],
+           main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
+           (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
       }
       /* In a read pass, pretend we just read some source data. */
       if (! writing) {
-	*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
-	main_ptr->rowgroup_ctr = DCTSIZE;
+        *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
+        main_ptr->rowgroup_ctr = DCTSIZE;
       }
     }
 
@@ -196,35 +196,35 @@
     /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
     if (writing) {
       (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					main_ptr->buffer, &main_ptr->rowgroup_ctr,
-					(JDIMENSION) DCTSIZE);
+                                        input_buf, in_row_ctr, in_rows_avail,
+                                        main_ptr->buffer, &main_ptr->rowgroup_ctr,
+                                        (JDIMENSION) DCTSIZE);
       /* Return to application if we need more data to fill the iMCU row. */
       if (main_ptr->rowgroup_ctr < DCTSIZE)
-	return;
+        return;
     }
 
     /* Emit data, unless this is a sink-only pass. */
     if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) {
       if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
-	/* If compressor did not consume the whole row, then we must need to
-	 * suspend processing and return to the application.  In this situation
-	 * we pretend we didn't yet consume the last input row; otherwise, if
-	 * it happened to be the last row of the image, the application would
-	 * think we were done.
-	 */
-	if (! main_ptr->suspended) {
-	  (*in_row_ctr)--;
-	  main_ptr->suspended = TRUE;
-	}
-	return;
+        /* If compressor did not consume the whole row, then we must need to
+         * suspend processing and return to the application.  In this situation
+         * we pretend we didn't yet consume the last input row; otherwise, if
+         * it happened to be the last row of the image, the application would
+         * think we were done.
+         */
+        if (! main_ptr->suspended) {
+          (*in_row_ctr)--;
+          main_ptr->suspended = TRUE;
+        }
+        return;
       }
       /* We did finish the row.  Undo our little suspension hack if a previous
        * call suspended; then mark the main buffer empty.
        */
       if (main_ptr->suspended) {
-	(*in_row_ctr)++;
-	main_ptr->suspended = FALSE;
+        (*in_row_ctr)++;
+        main_ptr->suspended = FALSE;
       }
     }
 
@@ -250,7 +250,7 @@
 
   main_ptr = (my_main_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
+                                SIZEOF(my_main_controller));
   cinfo->main = (struct jpeg_c_main_controller *) main_ptr;
   main_ptr->pub.start_pass = start_pass_main;
 
@@ -266,13 +266,13 @@
     /* Allocate a full-image virtual array for each component */
     /* Note we pad the bottom to a multiple of the iMCU height */
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 compptr->width_in_blocks * DCTSIZE,
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor) * DCTSIZE,
-	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+         compptr->width_in_blocks * DCTSIZE,
+         (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+                                (long) compptr->v_samp_factor) * DCTSIZE,
+         (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
     }
 #else
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
@@ -283,11 +283,11 @@
 #endif
     /* Allocate a strip buffer for each component */
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 compptr->width_in_blocks * DCTSIZE,
-	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         compptr->width_in_blocks * DCTSIZE,
+         (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
     }
   }
 }
diff --git a/jcmarker.c b/jcmarker.c
index e2037c3..62aeb59 100644
--- a/jcmarker.c
+++ b/jcmarker.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2003-2010 by Guido Vollbeding.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -17,7 +17,7 @@
 #include "jpegcomp.h"
 
 
-typedef enum {			/* JPEG marker codes */
+typedef enum {                  /* JPEG marker codes */
   M_SOF0  = 0xc0,
   M_SOF1  = 0xc1,
   M_SOF2  = 0xc2,
@@ -173,7 +173,7 @@
       /* The table entries must be emitted in zigzag order. */
       unsigned int qval = qtbl->quantval[jpeg_natural_order[i]];
       if (prec)
-	emit_byte(cinfo, (int) (qval >> 8));
+        emit_byte(cinfo, (int) (qval >> 8));
       emit_byte(cinfo, (int) (qval & 0xFF));
     }
 
@@ -190,33 +190,33 @@
 {
   JHUFF_TBL * htbl;
   int length, i;
-  
+
   if (is_ac) {
     htbl = cinfo->ac_huff_tbl_ptrs[index];
-    index += 0x10;		/* output index has AC bit set */
+    index += 0x10;              /* output index has AC bit set */
   } else {
     htbl = cinfo->dc_huff_tbl_ptrs[index];
   }
 
   if (htbl == NULL)
     ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
-  
+
   if (! htbl->sent_table) {
     emit_marker(cinfo, M_DHT);
-    
+
     length = 0;
     for (i = 1; i <= 16; i++)
       length += htbl->bits[i];
-    
+
     emit_2bytes(cinfo, length + 2 + 1 + 16);
     emit_byte(cinfo, index);
-    
+
     for (i = 1; i <= 16; i++)
       emit_byte(cinfo, htbl->bits[i]);
-    
+
     for (i = 0; i < length; i++)
       emit_byte(cinfo, htbl->huffval[i]);
-    
+
     htbl->sent_table = TRUE;
   }
 }
@@ -258,12 +258,12 @@
 
     for (i = 0; i < NUM_ARITH_TBLS; i++) {
       if (dc_in_use[i]) {
-	emit_byte(cinfo, i);
-	emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
+        emit_byte(cinfo, i);
+        emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
       }
       if (ac_in_use[i]) {
-	emit_byte(cinfo, i + 0x10);
-	emit_byte(cinfo, cinfo->arith_ac_K[i]);
+        emit_byte(cinfo, i + 0x10);
+        emit_byte(cinfo, cinfo->arith_ac_K[i]);
       }
     }
   }
@@ -276,8 +276,8 @@
 /* Emit a DRI marker */
 {
   emit_marker(cinfo, M_DRI);
-  
-  emit_2bytes(cinfo, 4);	/* fixed length */
+
+  emit_2bytes(cinfo, 4);        /* fixed length */
 
   emit_2bytes(cinfo, (int) cinfo->restart_interval);
 }
@@ -289,9 +289,9 @@
 {
   int ci;
   jpeg_component_info *compptr;
-  
+
   emit_marker(cinfo, code);
-  
+
   emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
 
   /* Make sure image isn't bigger than SOF field can handle */
@@ -320,13 +320,13 @@
 {
   int i, td, ta;
   jpeg_component_info *compptr;
-  
+
   emit_marker(cinfo, M_SOS);
-  
+
   emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
-  
+
   emit_byte(cinfo, cinfo->comps_in_scan);
-  
+
   for (i = 0; i < cinfo->comps_in_scan; i++) {
     compptr = cinfo->cur_comp_info[i];
     emit_byte(cinfo, compptr->component_id);
@@ -354,22 +354,22 @@
 /* Emit a JFIF-compliant APP0 marker */
 {
   /*
-   * Length of APP0 block	(2 bytes)
-   * Block ID			(4 bytes - ASCII "JFIF")
-   * Zero byte			(1 byte to terminate the ID string)
-   * Version Major, Minor	(2 bytes - major first)
-   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
-   * Xdpu			(2 bytes - dots per unit horizontal)
-   * Ydpu			(2 bytes - dots per unit vertical)
-   * Thumbnail X size		(1 byte)
-   * Thumbnail Y size		(1 byte)
+   * Length of APP0 block       (2 bytes)
+   * Block ID                   (4 bytes - ASCII "JFIF")
+   * Zero byte                  (1 byte to terminate the ID string)
+   * Version Major, Minor       (2 bytes - major first)
+   * Units                      (1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu                       (2 bytes - dots per unit horizontal)
+   * Ydpu                       (2 bytes - dots per unit vertical)
+   * Thumbnail X size           (1 byte)
+   * Thumbnail Y size           (1 byte)
    */
-  
+
   emit_marker(cinfo, M_APP0);
-  
+
   emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
 
-  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x4A);       /* Identifier: ASCII "JFIF" */
   emit_byte(cinfo, 0x46);
   emit_byte(cinfo, 0x49);
   emit_byte(cinfo, 0x46);
@@ -379,7 +379,7 @@
   emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
   emit_2bytes(cinfo, (int) cinfo->X_density);
   emit_2bytes(cinfo, (int) cinfo->Y_density);
-  emit_byte(cinfo, 0);		/* No thumbnail image */
+  emit_byte(cinfo, 0);          /* No thumbnail image */
   emit_byte(cinfo, 0);
 }
 
@@ -389,12 +389,12 @@
 /* Emit an Adobe APP14 marker */
 {
   /*
-   * Length of APP14 block	(2 bytes)
-   * Block ID			(5 bytes - ASCII "Adobe")
-   * Version Number		(2 bytes - currently 100)
-   * Flags0			(2 bytes - currently 0)
-   * Flags1			(2 bytes - currently 0)
-   * Color transform		(1 byte)
+   * Length of APP14 block      (2 bytes)
+   * Block ID                   (5 bytes - ASCII "Adobe")
+   * Version Number             (2 bytes - currently 100)
+   * Flags0                     (2 bytes - currently 0)
+   * Flags1                     (2 bytes - currently 0)
+   * Color transform            (1 byte)
    *
    * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
    * now in circulation seem to use Version = 100, so that's what we write.
@@ -403,28 +403,28 @@
    * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
    * whether the encoder performed a transformation, which is pretty useless.
    */
-  
+
   emit_marker(cinfo, M_APP14);
-  
+
   emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
 
-  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x41);       /* Identifier: ASCII "Adobe" */
   emit_byte(cinfo, 0x64);
   emit_byte(cinfo, 0x6F);
   emit_byte(cinfo, 0x62);
   emit_byte(cinfo, 0x65);
-  emit_2bytes(cinfo, 100);	/* Version */
-  emit_2bytes(cinfo, 0);	/* Flags0 */
-  emit_2bytes(cinfo, 0);	/* Flags1 */
+  emit_2bytes(cinfo, 100);      /* Version */
+  emit_2bytes(cinfo, 0);        /* Flags0 */
+  emit_2bytes(cinfo, 0);        /* Flags1 */
   switch (cinfo->jpeg_color_space) {
   case JCS_YCbCr:
-    emit_byte(cinfo, 1);	/* Color transform = 1 */
+    emit_byte(cinfo, 1);        /* Color transform = 1 */
     break;
   case JCS_YCCK:
-    emit_byte(cinfo, 2);	/* Color transform = 2 */
+    emit_byte(cinfo, 2);        /* Color transform = 2 */
     break;
   default:
-    emit_byte(cinfo, 0);	/* Color transform = 0 */
+    emit_byte(cinfo, 0);        /* Color transform = 0 */
     break;
   }
 }
@@ -442,12 +442,12 @@
 write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
 /* Emit an arbitrary marker header */
 {
-  if (datalen > (unsigned int) 65533)		/* safety check */
+  if (datalen > (unsigned int) 65533)           /* safety check */
     ERREXIT(cinfo, JERR_BAD_LENGTH);
 
   emit_marker(cinfo, (JPEG_MARKER) marker);
 
-  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
+  emit_2bytes(cinfo, (int) (datalen + 2));      /* total length */
 }
 
 METHODDEF(void)
@@ -474,12 +474,12 @@
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
 
-  emit_marker(cinfo, M_SOI);	/* first the SOI */
+  emit_marker(cinfo, M_SOI);    /* first the SOI */
 
   /* SOI is defined to reset restart interval to 0 */
   marker->last_restart_interval = 0;
 
-  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
+  if (cinfo->write_JFIF_header) /* next an optional JFIF APP0 */
     emit_jfif_app0(cinfo);
   if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
     emit_adobe_app14(cinfo);
@@ -500,7 +500,7 @@
   int ci, prec;
   boolean is_baseline;
   jpeg_component_info *compptr;
-  
+
   /* Emit DQT for each quantization table.
    * Note that emit_dqt() suppresses any duplicate tables.
    */
@@ -520,9 +520,9 @@
   } else {
     is_baseline = TRUE;
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
-	is_baseline = FALSE;
+        is_baseline = FALSE;
     }
     if (prec && is_baseline) {
       is_baseline = FALSE;
@@ -539,11 +539,11 @@
       emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
   } else {
     if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
+      emit_sof(cinfo, M_SOF2);  /* SOF code for progressive Huffman */
     else if (is_baseline)
-      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
+      emit_sof(cinfo, M_SOF0);  /* SOF code for baseline implementation */
     else
-      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
+      emit_sof(cinfo, M_SOF1);  /* SOF code for non-baseline Huffman file */
   }
 }
 
@@ -575,10 +575,10 @@
       compptr = cinfo->cur_comp_info[i];
       /* DC needs no table for refinement scan */
       if (cinfo->Ss == 0 && cinfo->Ah == 0)
-	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+        emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
       /* AC needs no table when not present */
       if (cinfo->Se)
-	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+        emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
     }
   }
 
@@ -627,9 +627,9 @@
   if (! cinfo->arith_code) {
     for (i = 0; i < NUM_HUFF_TBLS; i++) {
       if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, FALSE);
+        emit_dht(cinfo, i, FALSE);
       if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, TRUE);
+        emit_dht(cinfo, i, TRUE);
     }
   }
 
@@ -649,7 +649,7 @@
   /* Create the subobject */
   marker = (my_marker_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_marker_writer));
+                                SIZEOF(my_marker_writer));
   cinfo->marker = (struct jpeg_marker_writer *) marker;
   /* Initialize method pointers */
   marker->pub.write_file_header = write_file_header;
diff --git a/jcmaster.c b/jcmaster.c
index accfc70..cf51c1e 100644
--- a/jcmaster.c
+++ b/jcmaster.c
@@ -4,13 +4,13 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2003-2010 by Guido Vollbeding.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains master control logic for the JPEG compressor.
  * These routines are concerned with parameter validation, initial setup,
- * and inter-pass control (determining the number of passes and the work 
+ * and inter-pass control (determining the number of passes and the work
  * to be done in each pass).
  */
 
@@ -23,20 +23,20 @@
 /* Private state */
 
 typedef enum {
-	main_pass,		/* input data, also do first output step */
-	huff_opt_pass,		/* Huffman code optimization pass */
-	output_pass		/* data output pass */
+        main_pass,              /* input data, also do first output step */
+        huff_opt_pass,          /* Huffman code optimization pass */
+        output_pass             /* data output pass */
 } c_pass_type;
 
 typedef struct {
-  struct jpeg_comp_master pub;	/* public fields */
+  struct jpeg_comp_master pub;  /* public fields */
 
-  c_pass_type pass_type;	/* the type of the current pass */
+  c_pass_type pass_type;        /* the type of the current pass */
 
-  int pass_number;		/* # of passes completed */
-  int total_passes;		/* total # of passes needed */
+  int pass_number;              /* # of passes completed */
+  int total_passes;             /* total # of passes needed */
 
-  int scan_number;		/* current index in scan_info[] */
+  int scan_number;              /* current index in scan_info[] */
 } my_comp_master;
 
 typedef my_comp_master * my_master_ptr;
@@ -105,7 +105,7 @@
   /* Check that number of components won't exceed internal array sizes */
   if (cinfo->num_components > MAX_COMPONENTS)
     ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
+             MAX_COMPONENTS);
 
   /* Compute maximum sampling factors; check factor validity */
   cinfo->max_h_samp_factor = 1;
@@ -113,12 +113,12 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+        compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
       ERREXIT(cinfo, JERR_BAD_SAMPLING);
     cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
+                                   compptr->h_samp_factor);
     cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
+                                   compptr->v_samp_factor);
   }
 
   /* Compute dimensions of components */
@@ -135,17 +135,17 @@
     /* Size in DCT blocks */
     compptr->width_in_blocks = (JDIMENSION)
       jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+                    (long) (cinfo->max_h_samp_factor * DCTSIZE));
     compptr->height_in_blocks = (JDIMENSION)
       jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+                    (long) (cinfo->max_v_samp_factor * DCTSIZE));
     /* Size in samples */
     compptr->downsampled_width = (JDIMENSION)
       jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
-		    (long) cinfo->max_h_samp_factor);
+                    (long) cinfo->max_h_samp_factor);
     compptr->downsampled_height = (JDIMENSION)
       jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
-		    (long) cinfo->max_v_samp_factor);
+                    (long) cinfo->max_v_samp_factor);
     /* Mark component needed (this flag isn't actually used for compression) */
     compptr->component_needed = TRUE;
   }
@@ -155,7 +155,7 @@
    */
   cinfo->total_iMCU_rows = (JDIMENSION)
     jdiv_round_up((long) cinfo->_jpeg_height,
-		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+                  (long) (cinfo->max_v_samp_factor*DCTSIZE));
 }
 
 
@@ -188,15 +188,15 @@
 #ifdef C_PROGRESSIVE_SUPPORTED
     cinfo->progressive_mode = TRUE;
     last_bitpos_ptr = & last_bitpos[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (ci = 0; ci < cinfo->num_components; ci++)
       for (coefi = 0; coefi < DCTSIZE2; coefi++)
-	*last_bitpos_ptr++ = -1;
+        *last_bitpos_ptr++ = -1;
 #else
     ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
   } else {
     cinfo->progressive_mode = FALSE;
-    for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (ci = 0; ci < cinfo->num_components; ci++)
       component_sent[ci] = FALSE;
   }
 
@@ -208,10 +208,10 @@
     for (ci = 0; ci < ncomps; ci++) {
       thisi = scanptr->component_index[ci];
       if (thisi < 0 || thisi >= cinfo->num_components)
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+        ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
       /* Components must appear in SOF order within each scan */
       if (ci > 0 && thisi <= scanptr->component_index[ci-1])
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+        ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
     }
     /* Validate progression parameters */
     Ss = scanptr->Ss;
@@ -233,43 +233,43 @@
 #define MAX_AH_AL 13
 #endif
       if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
-	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+        ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       if (Ss == 0) {
-	if (Se != 0)		/* DC and AC together not OK */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        if (Se != 0)            /* DC and AC together not OK */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       } else {
-	if (ncomps != 1)	/* AC scans must be for only one component */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        if (ncomps != 1)        /* AC scans must be for only one component */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       }
       for (ci = 0; ci < ncomps; ci++) {
-	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
-	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	for (coefi = Ss; coefi <= Se; coefi++) {
-	  if (last_bitpos_ptr[coefi] < 0) {
-	    /* first scan of this coefficient */
-	    if (Ah != 0)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  } else {
-	    /* not first scan */
-	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  }
-	  last_bitpos_ptr[coefi] = Al;
-	}
+        last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
+        if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        for (coefi = Ss; coefi <= Se; coefi++) {
+          if (last_bitpos_ptr[coefi] < 0) {
+            /* first scan of this coefficient */
+            if (Ah != 0)
+              ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          } else {
+            /* not first scan */
+            if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
+              ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          }
+          last_bitpos_ptr[coefi] = Al;
+        }
       }
 #endif
     } else {
       /* For sequential JPEG, all progression parameters must be these: */
       if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       /* Make sure components are not sent twice */
       for (ci = 0; ci < ncomps; ci++) {
-	thisi = scanptr->component_index[ci];
-	if (component_sent[thisi])
-	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-	component_sent[thisi] = TRUE;
+        thisi = scanptr->component_index[ci];
+        if (component_sent[thisi])
+          ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+        component_sent[thisi] = TRUE;
       }
     }
   }
@@ -284,13 +284,13 @@
      */
     for (ci = 0; ci < cinfo->num_components; ci++) {
       if (last_bitpos[ci][0] < 0)
-	ERREXIT(cinfo, JERR_MISSING_DATA);
+        ERREXIT(cinfo, JERR_MISSING_DATA);
     }
 #endif
   } else {
     for (ci = 0; ci < cinfo->num_components; ci++) {
       if (! component_sent[ci])
-	ERREXIT(cinfo, JERR_MISSING_DATA);
+        ERREXIT(cinfo, JERR_MISSING_DATA);
     }
   }
 }
@@ -313,7 +313,7 @@
     cinfo->comps_in_scan = scanptr->comps_in_scan;
     for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
       cinfo->cur_comp_info[ci] =
-	&cinfo->comp_info[scanptr->component_index[ci]];
+        &cinfo->comp_info[scanptr->component_index[ci]];
     }
     cinfo->Ss = scanptr->Ss;
     cinfo->Se = scanptr->Se;
@@ -326,7 +326,7 @@
     /* Prepare for single sequential-JPEG scan containing all components */
     if (cinfo->num_components > MAX_COMPS_IN_SCAN)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPS_IN_SCAN);
+               MAX_COMPS_IN_SCAN);
     cinfo->comps_in_scan = cinfo->num_components;
     for (ci = 0; ci < cinfo->num_components; ci++) {
       cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
@@ -346,16 +346,16 @@
 {
   int ci, mcublks, tmp;
   jpeg_component_info *compptr;
-  
+
   if (cinfo->comps_in_scan == 1) {
-    
+
     /* Noninterleaved (single-component) scan */
     compptr = cinfo->cur_comp_info[0];
-    
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = compptr->width_in_blocks;
     cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-    
+
     /* For noninterleaved scan, always one block per MCU */
     compptr->MCU_width = 1;
     compptr->MCU_height = 1;
@@ -368,28 +368,28 @@
     tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
     if (tmp == 0) tmp = compptr->v_samp_factor;
     compptr->last_row_height = tmp;
-    
+
     /* Prepare array describing MCU composition */
     cinfo->blocks_in_MCU = 1;
     cinfo->MCU_membership[0] = 0;
-    
+
   } else {
-    
+
     /* Interleaved (multi-component) scan */
     if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-    
+               MAX_COMPS_IN_SCAN);
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = (JDIMENSION)
       jdiv_round_up((long) cinfo->_jpeg_width,
-		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+                    (long) (cinfo->max_h_samp_factor*DCTSIZE));
     cinfo->MCU_rows_in_scan = (JDIMENSION)
       jdiv_round_up((long) cinfo->_jpeg_height,
-		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
-    
+                    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+
     cinfo->blocks_in_MCU = 0;
-    
+
     for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
       compptr = cinfo->cur_comp_info[ci];
       /* Sampling factors give # of blocks of component in each MCU */
@@ -407,12 +407,12 @@
       /* Prepare array describing MCU composition */
       mcublks = compptr->MCU_blocks;
       if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+        ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
       while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+        cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
       }
     }
-    
+
   }
 
   /* Convert restart specified in rows to actual MCU count. */
@@ -452,8 +452,8 @@
     (*cinfo->fdct->start_pass) (cinfo);
     (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
     (*cinfo->coef->start_pass) (cinfo,
-				(master->total_passes > 1 ?
-				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+                                (master->total_passes > 1 ?
+                                 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
     (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
     if (cinfo->optimize_coding) {
       /* No immediate data output; postpone writing frame/scan headers */
@@ -581,7 +581,7 @@
 
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(my_comp_master));
+                                  SIZEOF(my_comp_master));
   cinfo->master = (struct jpeg_comp_master *) master;
   master->pub.prepare_for_pass = prepare_for_pass;
   master->pub.pass_startup = pass_startup;
@@ -602,7 +602,7 @@
     cinfo->num_scans = 1;
   }
 
-  if (cinfo->progressive_mode && !cinfo->arith_code)	/*  TEMPORARY HACK ??? */
+  if (cinfo->progressive_mode && !cinfo->arith_code)    /*  TEMPORARY HACK ??? */
     cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
 
   /* Initialize my private state */
diff --git a/jcomapi.c b/jcomapi.c
index 9b1fa75..4ca2042 100644
--- a/jcomapi.c
+++ b/jcomapi.c
@@ -72,8 +72,8 @@
   /* NB: mem pointer is NULL if memory mgr failed to initialize. */
   if (cinfo->mem != NULL)
     (*cinfo->mem->self_destruct) (cinfo);
-  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
-  cinfo->global_state = 0;	/* mark it destroyed */
+  cinfo->mem = NULL;            /* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;      /* mark it destroyed */
 }
 
 
@@ -89,7 +89,7 @@
 
   tbl = (JQUANT_TBL *)
     (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  tbl->sent_table = FALSE;      /* make sure this is false in any new table */
   return tbl;
 }
 
@@ -101,6 +101,6 @@
 
   tbl = (JHUFF_TBL *)
     (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  tbl->sent_table = FALSE;      /* make sure this is false in any new table */
   return tbl;
 }
diff --git a/jconfig.h.in b/jconfig.h.in
index 27d4cc9..78023c1 100644
--- a/jconfig.h.in
+++ b/jconfig.h.in
@@ -12,7 +12,10 @@
 /* Support arithmetic decoding */
 #undef D_ARITH_CODING_SUPPORTED
 
-/* Compiler supports function prototypes. */
+/* Define to 1 if you have the <locale.h> header file. */
+#undef HAVE_LOCALE_H
+
+/* Define if your compiler supports prototypes */
 #undef HAVE_PROTOTYPES
 
 /* Define to 1 if you have the <stddef.h> header file. */
@@ -21,25 +24,31 @@
 /* Define to 1 if you have the <stdlib.h> header file. */
 #undef HAVE_STDLIB_H
 
-/* Compiler supports 'unsigned char'. */
+/* Define to 1 if the system has the type `unsigned char'. */
 #undef HAVE_UNSIGNED_CHAR
 
-/* Compiler supports 'unsigned short'. */
+/* Define to 1 if the system has the type `unsigned short'. */
 #undef HAVE_UNSIGNED_SHORT
 
-/* Compiler does not support pointers to unspecified structures. */
+/* Compiler does not support pointers to undefined structures. */
 #undef INCOMPLETE_TYPES_BROKEN
 
-/* Compiler has <strings.h> rather than standard <string.h>. */
+/* Support in-memory source/destination managers */
+#undef MEM_SRCDST_SUPPORTED
+
+/* Define if you have BSD-like bzero and bcopy in <strings.h> rather than
+   memset/memcpy in <string.h>. */
 #undef NEED_BSD_STRINGS
 
-/* Linker requires that global names be unique in first 15 characters. */
+/* Define if linker requires that the first 15 characters of global names be
+   unique. */
 #undef NEED_SHORT_EXTERNAL_NAMES
 
-/* Need to include <sys/types.h> in order to obtain size_t. */
+/* Define if you need to include <sys/types.h> to get size_t. */
 #undef NEED_SYS_TYPES_H
 
-/* Broken compiler shifts signed values as an unsigned shift. */
+/* Define if your (broken) compiler shifts signed values as if they were
+   unsigned. */
 #undef RIGHT_SHIFT_IS_UNSIGNED
 
 /* Use accelerated SIMD routines. */
diff --git a/jconfig.txt b/jconfig.txt
index b96d312..ba829ad 100644
--- a/jconfig.txt
+++ b/jconfig.txt
@@ -48,7 +48,7 @@
  * If you're not sure, leaving it undefined will work at some cost in speed.
  * If you defined HAVE_UNSIGNED_CHAR then the speed difference is minimal.
  */
-#undef CHAR_IS_UNSIGNED
+#undef __CHAR_UNSIGNED__
 
 /* Define this if your system has an ANSI-conforming <stddef.h> file.
  */
@@ -94,10 +94,10 @@
 /* Define "boolean" as unsigned char, not int, on Windows systems.
  */
 #ifdef _WIN32
-#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
+#ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
 typedef unsigned char boolean;
 #endif
-#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
+#define HAVE_BOOLEAN            /* prevent jmorecfg.h from redefining it */
 #endif
 
 
@@ -130,11 +130,11 @@
 
 /* These defines indicate which image (non-JPEG) file formats are allowed. */
 
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
+#define BMP_SUPPORTED           /* BMP image file format */
+#define GIF_SUPPORTED           /* GIF image file format */
+#define PPM_SUPPORTED           /* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED            /* Utah RLE image file format */
+#define TARGA_SUPPORTED         /* Targa image file format */
 
 /* Define this if you want to name both input and output files on the command
  * line, rather than using stdout and optionally stdin.  You MUST do this if
diff --git a/jconfigint.h.in b/jconfigint.h.in
new file mode 100644
index 0000000..8f216eb
--- /dev/null
+++ b/jconfigint.h.in
@@ -0,0 +1,11 @@
+/* libjpeg-turbo build number */
+#undef BUILD
+
+/* How to obtain function inlining. */
+#undef INLINE
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Version number of package */
+#undef VERSION
diff --git a/jcparam.c b/jcparam.c
index 3207520..1832d9f 100644
--- a/jcparam.c
+++ b/jcparam.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2003-2008 by Guido Vollbeding.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -24,8 +24,8 @@
 
 GLOBAL(void)
 jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
-		      const unsigned int *basic_table,
-		      int scale_factor, boolean force_baseline)
+                      const unsigned int *basic_table,
+                      int scale_factor, boolean force_baseline)
 /* Define a quantization table equal to the basic_table times
  * a scale factor (given as a percentage).
  * If force_baseline is TRUE, the computed quantization table entries
@@ -54,7 +54,7 @@
     if (temp <= 0L) temp = 1L;
     if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
     if (force_baseline && temp > 255L)
-      temp = 255L;		/* limit to baseline range if requested */
+      temp = 255L;              /* limit to baseline range if requested */
     (*qtblptr)->quantval[i] = (UINT16) temp;
   }
 
@@ -99,16 +99,16 @@
 {
   /* Set up two quantization tables using the specified scaling */
   jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       cinfo->q_scale_factor[0], force_baseline);
+                       cinfo->q_scale_factor[0], force_baseline);
   jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       cinfo->q_scale_factor[1], force_baseline);
+                       cinfo->q_scale_factor[1], force_baseline);
 }
 #endif
 
 
 GLOBAL(void)
 jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
-			 boolean force_baseline)
+                         boolean force_baseline)
 /* Set or change the 'quality' (quantization) setting, using default tables
  * and a straight percentage-scaling quality scale.  In most cases it's better
  * to use jpeg_set_quality (below); this entry point is provided for
@@ -117,9 +117,9 @@
 {
   /* Set up two quantization tables using the specified scaling */
   jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       scale_factor, force_baseline);
+                       scale_factor, force_baseline);
   jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       scale_factor, force_baseline);
+                       scale_factor, force_baseline);
 }
 
 
@@ -301,12 +301,12 @@
   if (cinfo->comp_info == NULL)
     cinfo->comp_info = (jpeg_component_info *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
+                                  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
 
   /* Initialize everything not dependent on the color space */
 
 #if JPEG_LIB_VERSION >= 70
-  cinfo->scale_num = 1;		/* 1:1 scaling */
+  cinfo->scale_num = 1;         /* 1:1 scaling */
   cinfo->scale_denom = 1;
 #endif
   cinfo->data_precision = BITS_IN_JSAMPLE;
@@ -371,8 +371,8 @@
    */
   cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
   cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
-  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
+  cinfo->density_unit = 0;      /* Pixel size is unknown by default */
+  cinfo->X_density = 1;         /* Pixel aspect ratio is square by default */
   cinfo->Y_density = 1;
 
   /* Choose JPEG colorspace based on input space, set defaults accordingly */
@@ -498,7 +498,7 @@
     cinfo->num_components = cinfo->input_components;
     if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPONENTS);
+               MAX_COMPONENTS);
     for (ci = 0; ci < cinfo->num_components; ci++) {
       SET_COMP(ci, ci, 1,1, 0, 0,0);
     }
@@ -513,7 +513,7 @@
 
 LOCAL(jpeg_scan_info *)
 fill_a_scan (jpeg_scan_info * scanptr, int ci,
-	     int Ss, int Se, int Ah, int Al)
+             int Ss, int Se, int Ah, int Al)
 /* Support routine: generate one scan for specified component */
 {
   scanptr->comps_in_scan = 1;
@@ -528,7 +528,7 @@
 
 LOCAL(jpeg_scan_info *)
 fill_scans (jpeg_scan_info * scanptr, int ncomps,
-	    int Ss, int Se, int Ah, int Al)
+            int Ss, int Se, int Ah, int Al)
 /* Support routine: generate one scan for each component */
 {
   int ci;
@@ -591,9 +591,9 @@
   } else {
     /* All-purpose script for other color spaces. */
     if (ncomps > MAX_COMPS_IN_SCAN)
-      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
+      nscans = 6 * ncomps;      /* 2 DC + 4 AC scans per component */
     else
-      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
+      nscans = 2 + 4 * ncomps;  /* 2 DC scans; 4 AC scans per component */
   }
 
   /* Allocate space for script.
@@ -607,7 +607,7 @@
     cinfo->script_space_size = MAX(nscans, 10);
     cinfo->script_space = (jpeg_scan_info *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
+                        cinfo->script_space_size * SIZEOF(jpeg_scan_info));
   }
   scanptr = cinfo->script_space;
   cinfo->scan_info = scanptr;
diff --git a/jcphuff.c b/jcphuff.c
index 3102871..b764b65 100644
--- a/jcphuff.c
+++ b/jcphuff.c
@@ -15,7 +15,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jchuff.h"		/* Declarations shared with jchuff.c */
+#include "jchuff.h"             /* Declarations shared with jchuff.c */
 
 #ifdef C_PROGRESSIVE_SUPPORTED
 
@@ -30,24 +30,24 @@
   /* Bit-level coding status.
    * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
    */
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  INT32 put_buffer;		/* current bit-accumulation buffer */
-  int put_bits;			/* # of bits now in it */
-  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
+  JOCTET * next_output_byte;    /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+  INT32 put_buffer;             /* current bit-accumulation buffer */
+  int put_bits;                 /* # of bits now in it */
+  j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
 
   /* Coding status for DC components */
   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
 
   /* Coding status for AC components */
-  int ac_tbl_no;		/* the table number of the single component */
-  unsigned int EOBRUN;		/* run length of EOBs */
-  unsigned int BE;		/* # of buffered correction bits before MCU */
-  char * bit_buffer;		/* buffer for correction bits (1 per char) */
+  int ac_tbl_no;                /* the table number of the single component */
+  unsigned int EOBRUN;          /* run length of EOBs */
+  unsigned int BE;              /* # of buffered correction bits before MCU */
+  char * bit_buffer;            /* buffer for correction bits (1 per char) */
   /* packing correction bits tightly would save some space but cost time... */
 
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
 
   /* Pointers to derived tables (these workspaces have image lifespan).
    * Since any one scan codes only DC or only AC, we only need one set
@@ -67,7 +67,7 @@
  * The minimum safe size is 64 bits.
  */
 
-#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
+#define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
 
 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
  * We assume that int right shift is unsigned if INT32 right shift is,
@@ -75,25 +75,25 @@
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
+#define ISHIFT_TEMPS    int ishift_temp;
 #define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
+        ((ishift_temp = (x)) < 0 ? \
+         (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+         (ishift_temp >> (shft)))
 #else
 #define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#define IRIGHT_SHIFT(x,shft)    ((x) >> (shft))
 #endif
 
 /* Forward declarations */
 METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
+                                            JBLOCKROW *MCU_data));
 METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
+                                            JBLOCKROW *MCU_data));
 METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
+                                             JBLOCKROW *MCU_data));
 METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
+                                             JBLOCKROW *MCU_data));
 METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo));
 METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo));
 
@@ -104,7 +104,7 @@
 
 METHODDEF(void)
 start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics)
-{  
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   boolean is_DC_band;
   int ci, tbl;
@@ -130,9 +130,9 @@
       entropy->pub.encode_mcu = encode_mcu_AC_refine;
       /* AC refinement needs a correction bit buffer */
       if (entropy->bit_buffer == NULL)
-	entropy->bit_buffer = (char *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      MAX_CORR_BITS * SIZEOF(char));
+        entropy->bit_buffer = (char *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      MAX_CORR_BITS * SIZEOF(char));
     }
   }
   if (gather_statistics)
@@ -149,8 +149,8 @@
     entropy->last_dc_val[ci] = 0;
     /* Get table index */
     if (is_DC_band) {
-      if (cinfo->Ah != 0)	/* DC refinement needs no table */
-	continue;
+      if (cinfo->Ah != 0)       /* DC refinement needs no table */
+        continue;
       tbl = compptr->dc_tbl_no;
     } else {
       entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
@@ -163,15 +163,15 @@
       /* Allocate and zero the statistics tables */
       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
       if (entropy->count_ptrs[tbl] == NULL)
-	entropy->count_ptrs[tbl] = (long *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      257 * SIZEOF(long));
+        entropy->count_ptrs[tbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      257 * SIZEOF(long));
       MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long));
     } else {
       /* Compute derived values for Huffman table */
       /* We may do this more than once for a table, but it's not expensive */
       jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
-			      & entropy->derived_tbls[tbl]);
+                              & entropy->derived_tbls[tbl]);
     }
   }
 
@@ -196,9 +196,9 @@
 
 /* Emit a byte */
 #define emit_byte(entropy,val)  \
-	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(entropy)->free_in_buffer == 0)  \
-	    dump_buffer(entropy); }
+        { *(entropy)->next_output_byte++ = (JOCTET) (val);  \
+          if (--(entropy)->free_in_buffer == 0)  \
+            dump_buffer(entropy); }
 
 
 LOCAL(void)
@@ -236,21 +236,21 @@
     ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
 
   if (entropy->gather_statistics)
-    return;			/* do nothing if we're only getting stats */
+    return;                     /* do nothing if we're only getting stats */
 
   put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-  
+
+  put_bits += size;             /* new number of bits in buffer */
+
   put_buffer <<= 24 - put_bits; /* align incoming bits */
 
   put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
 
   while (put_bits >= 8) {
     int c = (int) ((put_buffer >> 16) & 0xFF);
-    
+
     emit_byte(entropy, c);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
+    if (c == 0xFF) {            /* need to stuff a zero byte? */
       emit_byte(entropy, 0);
     }
     put_buffer <<= 8;
@@ -293,10 +293,10 @@
 
 LOCAL(void)
 emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart,
-		    unsigned int nbits)
+                    unsigned int nbits)
 {
   if (entropy->gather_statistics)
-    return;			/* no real work */
+    return;                     /* no real work */
 
   while (nbits > 0) {
     emit_bits(entropy, (unsigned int) (*bufstart), 1);
@@ -315,7 +315,7 @@
 {
   register int temp, nbits;
 
-  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
+  if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */
     temp = entropy->EOBRUN;
     nbits = 0;
     while ((temp >>= 1))
@@ -409,12 +409,12 @@
     /* Encode the DC coefficient difference per section G.1.2.1 */
     temp2 = temp;
     if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
+      temp = -temp;             /* temp is abs value of input */
       /* For a negative input, want temp2 = bitwise complement of abs(input) */
       /* This code assumes we are on a two's complement machine */
       temp2--;
     }
-    
+
     /* Find the number of bits needed for the magnitude of the coefficient */
     nbits = 0;
     while (temp) {
@@ -426,13 +426,13 @@
      */
     if (nbits > MAX_COEF_BITS+1)
       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-    
+
     /* Count/emit the Huffman-coded symbol for the number of bits */
     emit_symbol(entropy, compptr->dc_tbl_no, nbits);
-    
+
     /* Emit that number of bits of the value, if positive, */
     /* or the complement of its magnitude, if negative. */
-    if (nbits)			/* emit_bits rejects calls with size 0 */
+    if (nbits)                  /* emit_bits rejects calls with size 0 */
       emit_bits(entropy, (unsigned int) temp2, nbits);
   }
 
@@ -481,9 +481,9 @@
   block = MCU_data[0];
 
   /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
-  
-  r = 0;			/* r = run length of zeros */
-   
+
+  r = 0;                        /* r = run length of zeros */
+
   for (k = cinfo->Ss; k <= Se; k++) {
     if ((temp = (*block)[jpeg_natural_order[k]]) == 0) {
       r++;
@@ -495,12 +495,12 @@
      * interwoven with finding the abs value (temp) and output bits (temp2).
      */
     if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      temp >>= Al;		/* apply the point transform */
+      temp = -temp;             /* temp is abs value of input */
+      temp >>= Al;              /* apply the point transform */
       /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
       temp2 = ~temp;
     } else {
-      temp >>= Al;		/* apply the point transform */
+      temp >>= Al;              /* apply the point transform */
       temp2 = temp;
     }
     /* Watch out for case that nonzero coef is zero after point transform */
@@ -519,7 +519,7 @@
     }
 
     /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 1;			/* there must be at least one 1 bit */
+    nbits = 1;                  /* there must be at least one 1 bit */
     while ((temp >>= 1))
       nbits++;
     /* Check for out-of-range coefficient values */
@@ -533,13 +533,13 @@
     /* or the complement of its magnitude, if negative. */
     emit_bits(entropy, (unsigned int) temp2, nbits);
 
-    r = 0;			/* reset zero run length */
+    r = 0;                      /* reset zero run length */
   }
 
-  if (r > 0) {			/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
+  if (r > 0) {                  /* If there are trailing zeroes, */
+    entropy->EOBRUN++;          /* count an EOB */
     if (entropy->EOBRUN == 0x7FFF)
-      emit_eobrun(entropy);	/* force it out to avoid overflow */
+      emit_eobrun(entropy);     /* force it out to avoid overflow */
   }
 
   cinfo->dest->next_output_byte = entropy->next_output_byte;
@@ -648,17 +648,17 @@
      * in C, we shift after obtaining the absolute value.
      */
     if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-    temp >>= Al;		/* apply the point transform */
-    absvalues[k] = temp;	/* save abs value for main pass */
+      temp = -temp;             /* temp is abs value of input */
+    temp >>= Al;                /* apply the point transform */
+    absvalues[k] = temp;        /* save abs value for main pass */
     if (temp == 1)
-      EOB = k;			/* EOB = index of last newly-nonzero coef */
+      EOB = k;                  /* EOB = index of last newly-nonzero coef */
   }
 
   /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
-  
-  r = 0;			/* r = run length of zeros */
-  BR = 0;			/* BR = count of buffered bits added now */
+
+  r = 0;                        /* r = run length of zeros */
+  BR = 0;                       /* BR = count of buffered bits added now */
   BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
 
   for (k = cinfo->Ss; k <= Se; k++) {
@@ -705,12 +705,12 @@
     emit_buffered_bits(entropy, BR_buffer, BR);
     BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
     BR = 0;
-    r = 0;			/* reset zero run length */
+    r = 0;                      /* reset zero run length */
   }
 
-  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    entropy->BE += BR;		/* concat my correction bits to older ones */
+  if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */
+    entropy->EOBRUN++;          /* count an EOB */
+    entropy->BE += BR;          /* concat my correction bits to older ones */
     /* We force out the EOB if we risk either:
      * 1. overflow of the EOB counter;
      * 2. overflow of the correction bit buffer during the next MCU.
@@ -742,7 +742,7 @@
 
 METHODDEF(void)
 finish_pass_phuff (j_compress_ptr cinfo)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
@@ -784,8 +784,8 @@
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
     if (is_DC_band) {
-      if (cinfo->Ah != 0)	/* DC refinement needs no table */
-	continue;
+      if (cinfo->Ah != 0)       /* DC refinement needs no table */
+        continue;
       tbl = compptr->dc_tbl_no;
     } else {
       tbl = compptr->ac_tbl_no;
@@ -816,7 +816,7 @@
 
   entropy = (phuff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(phuff_entropy_encoder));
+                                SIZEOF(phuff_entropy_encoder));
   cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
   entropy->pub.start_pass = start_pass_phuff;
 
@@ -825,7 +825,7 @@
     entropy->derived_tbls[i] = NULL;
     entropy->count_ptrs[i] = NULL;
   }
-  entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
+  entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */
 }
 
 #endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/jcprepct.c b/jcprepct.c
index fa93333..785ff88 100644
--- a/jcprepct.c
+++ b/jcprepct.c
@@ -58,12 +58,12 @@
    */
   JSAMPARRAY color_buf[MAX_COMPONENTS];
 
-  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
-  int next_buf_row;		/* index of next row to store in color_buf */
+  JDIMENSION rows_to_go;        /* counts rows remaining in source image */
+  int next_buf_row;             /* index of next row to store in color_buf */
 
-#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
-  int this_row_group;		/* starting row index of group to process */
-  int next_buf_stop;		/* downsample when we reach this index */
+#ifdef CONTEXT_ROWS_SUPPORTED   /* only needed for context case */
+  int this_row_group;           /* starting row index of group to process */
+  int next_buf_stop;            /* downsample when we reach this index */
 #endif
 } my_prep_controller;
 
@@ -104,13 +104,13 @@
 
 LOCAL(void)
 expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
-		    int input_rows, int output_rows)
+                    int input_rows, int output_rows)
 {
   register int row;
 
   for (row = input_rows; row < output_rows; row++) {
     jcopy_sample_rows(image_data, input_rows-1, image_data, row,
-		      1, num_cols);
+                      1, num_cols);
   }
 }
 
@@ -126,10 +126,10 @@
 
 METHODDEF(void)
 pre_process_data (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		  JDIMENSION in_rows_avail,
-		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		  JDIMENSION out_row_groups_avail)
+                  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                  JDIMENSION in_rows_avail,
+                  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                  JDIMENSION out_row_groups_avail)
 {
   my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
   int numrows, ci;
@@ -137,32 +137,32 @@
   jpeg_component_info * compptr;
 
   while (*in_row_ctr < in_rows_avail &&
-	 *out_row_group_ctr < out_row_groups_avail) {
+         *out_row_group_ctr < out_row_groups_avail) {
     /* Do color conversion to fill the conversion buffer. */
     inrows = in_rows_avail - *in_row_ctr;
     numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
     numrows = (int) MIN((JDIMENSION) numrows, inrows);
     (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-				       prep->color_buf,
-				       (JDIMENSION) prep->next_buf_row,
-				       numrows);
+                                       prep->color_buf,
+                                       (JDIMENSION) prep->next_buf_row,
+                                       numrows);
     *in_row_ctr += numrows;
     prep->next_buf_row += numrows;
     prep->rows_to_go -= numrows;
     /* If at bottom of image, pad to fill the conversion buffer. */
     if (prep->rows_to_go == 0 &&
-	prep->next_buf_row < cinfo->max_v_samp_factor) {
+        prep->next_buf_row < cinfo->max_v_samp_factor) {
       for (ci = 0; ci < cinfo->num_components; ci++) {
-	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			   prep->next_buf_row, cinfo->max_v_samp_factor);
+        expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+                           prep->next_buf_row, cinfo->max_v_samp_factor);
       }
       prep->next_buf_row = cinfo->max_v_samp_factor;
     }
     /* If we've filled the conversion buffer, empty it. */
     if (prep->next_buf_row == cinfo->max_v_samp_factor) {
       (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf, (JDIMENSION) 0,
-					output_buf, *out_row_group_ctr);
+                                        prep->color_buf, (JDIMENSION) 0,
+                                        output_buf, *out_row_group_ctr);
       prep->next_buf_row = 0;
       (*out_row_group_ctr)++;
     }
@@ -170,16 +170,16 @@
      * Note we assume the caller is providing a one-iMCU-height output buffer!
      */
     if (prep->rows_to_go == 0 &&
-	*out_row_group_ctr < out_row_groups_avail) {
+        *out_row_group_ctr < out_row_groups_avail) {
       for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	expand_bottom_edge(output_buf[ci],
-			   compptr->width_in_blocks * DCTSIZE,
-			   (int) (*out_row_group_ctr * compptr->v_samp_factor),
-			   (int) (out_row_groups_avail * compptr->v_samp_factor));
+           ci++, compptr++) {
+        expand_bottom_edge(output_buf[ci],
+                           compptr->width_in_blocks * DCTSIZE,
+                           (int) (*out_row_group_ctr * compptr->v_samp_factor),
+                           (int) (out_row_groups_avail * compptr->v_samp_factor));
       }
       *out_row_group_ctr = out_row_groups_avail;
-      break;			/* can exit outer loop without test */
+      break;                    /* can exit outer loop without test */
     }
   }
 }
@@ -193,10 +193,10 @@
 
 METHODDEF(void)
 pre_process_context (j_compress_ptr cinfo,
-		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		     JDIMENSION in_rows_avail,
-		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		     JDIMENSION out_row_groups_avail)
+                     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                     JDIMENSION in_rows_avail,
+                     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                     JDIMENSION out_row_groups_avail)
 {
   my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
   int numrows, ci;
@@ -210,19 +210,19 @@
       numrows = prep->next_buf_stop - prep->next_buf_row;
       numrows = (int) MIN((JDIMENSION) numrows, inrows);
       (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-					 prep->color_buf,
-					 (JDIMENSION) prep->next_buf_row,
-					 numrows);
+                                         prep->color_buf,
+                                         (JDIMENSION) prep->next_buf_row,
+                                         numrows);
       /* Pad at top of image, if first time through */
       if (prep->rows_to_go == cinfo->image_height) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  int row;
-	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
-	    jcopy_sample_rows(prep->color_buf[ci], 0,
-			      prep->color_buf[ci], -row,
-			      1, cinfo->image_width);
-	  }
-	}
+        for (ci = 0; ci < cinfo->num_components; ci++) {
+          int row;
+          for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+            jcopy_sample_rows(prep->color_buf[ci], 0,
+                              prep->color_buf[ci], -row,
+                              1, cinfo->image_width);
+          }
+        }
       }
       *in_row_ctr += numrows;
       prep->next_buf_row += numrows;
@@ -230,29 +230,29 @@
     } else {
       /* Return for more data, unless we are at the bottom of the image. */
       if (prep->rows_to_go != 0)
-	break;
+        break;
       /* When at bottom of image, pad to fill the conversion buffer. */
       if (prep->next_buf_row < prep->next_buf_stop) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			     prep->next_buf_row, prep->next_buf_stop);
-	}
-	prep->next_buf_row = prep->next_buf_stop;
+        for (ci = 0; ci < cinfo->num_components; ci++) {
+          expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+                             prep->next_buf_row, prep->next_buf_stop);
+        }
+        prep->next_buf_row = prep->next_buf_stop;
       }
     }
     /* If we've gotten enough data, downsample a row group. */
     if (prep->next_buf_row == prep->next_buf_stop) {
       (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf,
-					(JDIMENSION) prep->this_row_group,
-					output_buf, *out_row_group_ctr);
+                                        prep->color_buf,
+                                        (JDIMENSION) prep->this_row_group,
+                                        output_buf, *out_row_group_ctr);
       (*out_row_group_ctr)++;
       /* Advance pointers with wraparound as necessary. */
       prep->this_row_group += cinfo->max_v_samp_factor;
       if (prep->this_row_group >= buf_height)
-	prep->this_row_group = 0;
+        prep->this_row_group = 0;
       if (prep->next_buf_row >= buf_height)
-	prep->next_buf_row = 0;
+        prep->next_buf_row = 0;
       prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
     }
   }
@@ -277,8 +277,8 @@
    */
   fake_buffer = (JSAMPARRAY)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(cinfo->num_components * 5 * rgroup_height) *
-				SIZEOF(JSAMPROW));
+                                (cinfo->num_components * 5 * rgroup_height) *
+                                SIZEOF(JSAMPROW));
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -289,11 +289,11 @@
     true_buffer = (*cinfo->mem->alloc_sarray)
       ((j_common_ptr) cinfo, JPOOL_IMAGE,
        (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
-		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+                      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
        (JDIMENSION) (3 * rgroup_height));
     /* Copy true buffer row pointers into the middle of the fake row array */
     MEMCOPY(fake_buffer + rgroup_height, true_buffer,
-	    3 * rgroup_height * SIZEOF(JSAMPROW));
+            3 * rgroup_height * SIZEOF(JSAMPROW));
     /* Fill in the above and below wraparound pointers */
     for (i = 0; i < rgroup_height; i++) {
       fake_buffer[i] = true_buffer[2 * rgroup_height + i];
@@ -318,12 +318,12 @@
   int ci;
   jpeg_component_info * compptr;
 
-  if (need_full_buffer)		/* safety check */
+  if (need_full_buffer)         /* safety check */
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 
   prep = (my_prep_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_prep_controller));
+                                SIZEOF(my_prep_controller));
   cinfo->prep = (struct jpeg_c_prep_controller *) prep;
   prep->pub.start_pass = start_pass_prep;
 
@@ -343,12 +343,12 @@
     /* No context, just make it tall enough for one row group */
     prep->pub.pre_process_data = pre_process_data;
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
-			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+                        cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+         (JDIMENSION) cinfo->max_v_samp_factor);
     }
   }
 }
diff --git a/jcsample.c b/jcsample.c
index eea376f..f7b8419 100644
--- a/jcsample.c
+++ b/jcsample.c
@@ -1,9 +1,10 @@
 /*
  * jcsample.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains downsampling routines.
@@ -54,13 +55,13 @@
 
 /* Pointer to routine to downsample a single component */
 typedef JMETHOD(void, downsample1_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data));
+                (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                 JSAMPARRAY input_data, JSAMPARRAY output_data));
 
 /* Private subobject */
 
 typedef struct {
-  struct jpeg_downsampler pub;	/* public fields */
+  struct jpeg_downsampler pub;  /* public fields */
 
   /* Downsampling method pointers, one per component */
   downsample1_ptr methods[MAX_COMPONENTS];
@@ -87,7 +88,7 @@
 
 LOCAL(void)
 expand_right_edge (JSAMPARRAY image_data, int num_rows,
-		   JDIMENSION input_cols, JDIMENSION output_cols)
+                   JDIMENSION input_cols, JDIMENSION output_cols)
 {
   register JSAMPROW ptr;
   register JSAMPLE pixval;
@@ -98,9 +99,9 @@
   if (numcols > 0) {
     for (row = 0; row < num_rows; row++) {
       ptr = image_data[row] + input_cols;
-      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
+      pixval = ptr[-1];         /* don't need GETJSAMPLE() here */
       for (count = numcols; count > 0; count--)
-	*ptr++ = pixval;
+        *ptr++ = pixval;
     }
   }
 }
@@ -114,8 +115,8 @@
 
 METHODDEF(void)
 sep_downsample (j_compress_ptr cinfo,
-		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
+                JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+                JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
 {
   my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
   int ci;
@@ -140,10 +141,10 @@
 
 METHODDEF(void)
 int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		JSAMPARRAY input_data, JSAMPARRAY output_data)
+                JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
-  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
+  JDIMENSION outcol, outcol_h;  /* outcol_h == outcol*h_expand */
   JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
   JSAMPROW inptr, outptr;
   INT32 outvalue;
@@ -158,19 +159,19 @@
    * efficient.
    */
   expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * h_expand);
+                    cinfo->image_width, output_cols * h_expand);
 
   inrow = 0;
   for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
     outptr = output_data[outrow];
     for (outcol = 0, outcol_h = 0; outcol < output_cols;
-	 outcol++, outcol_h += h_expand) {
+         outcol++, outcol_h += h_expand) {
       outvalue = 0;
       for (v = 0; v < v_expand; v++) {
-	inptr = input_data[inrow+v] + outcol_h;
-	for (h = 0; h < h_expand; h++) {
-	  outvalue += (INT32) GETJSAMPLE(*inptr++);
-	}
+        inptr = input_data[inrow+v] + outcol_h;
+        for (h = 0; h < h_expand; h++) {
+          outvalue += (INT32) GETJSAMPLE(*inptr++);
+        }
       }
       *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
     }
@@ -187,14 +188,14 @@
 
 METHODDEF(void)
 fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY output_data)
+                     JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   /* Copy the data */
   jcopy_sample_rows(input_data, 0, output_data, 0,
-		    cinfo->max_v_samp_factor, cinfo->image_width);
+                    cinfo->max_v_samp_factor, cinfo->image_width);
   /* Edge-expand */
   expand_right_edge(output_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
+                    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
 }
 
 
@@ -212,7 +213,7 @@
 
 METHODDEF(void)
 h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+                 JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int outrow;
   JDIMENSION outcol;
@@ -225,16 +226,16 @@
    * efficient.
    */
   expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
+                    cinfo->image_width, output_cols * 2);
 
   for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
     outptr = output_data[outrow];
     inptr = input_data[outrow];
-    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
+    bias = 0;                   /* bias = 0,1,0,1,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
       *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
-			      + bias) >> 1);
-      bias ^= 1;		/* 0=>1, 1=>0 */
+                              + bias) >> 1);
+      bias ^= 1;                /* 0=>1, 1=>0 */
       inptr += 2;
     }
   }
@@ -249,7 +250,7 @@
 
 METHODDEF(void)
 h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+                 JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int inrow, outrow;
   JDIMENSION outcol;
@@ -262,19 +263,19 @@
    * efficient.
    */
   expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
+                    cinfo->image_width, output_cols * 2);
 
   inrow = 0;
   for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
     outptr = output_data[outrow];
     inptr0 = input_data[inrow];
     inptr1 = input_data[inrow+1];
-    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
+    bias = 1;                   /* bias = 1,2,1,2,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
       *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
-			      + bias) >> 2);
-      bias ^= 3;		/* 1=>2, 2=>1 */
+                              GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
+                              + bias) >> 2);
+      bias ^= 3;                /* 1=>2, 2=>1 */
       inptr0 += 2; inptr1 += 2;
     }
     inrow += 2;
@@ -292,7 +293,7 @@
 
 METHODDEF(void)
 h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-			JSAMPARRAY input_data, JSAMPARRAY output_data)
+                        JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int inrow, outrow;
   JDIMENSION colctr;
@@ -305,7 +306,7 @@
    * efficient.
    */
   expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols * 2);
+                    cinfo->image_width, output_cols * 2);
 
   /* We don't bother to form the individual "smoothed" input pixel values;
    * we can directly compute the output which is the average of the four
@@ -333,14 +334,14 @@
 
     /* Special case for first column: pretend column -1 is same as column 0 */
     membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
     neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
-	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+               GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
+               GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
     neighsum += neighsum;
     neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
-		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+                GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
     inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
@@ -348,17 +349,17 @@
     for (colctr = output_cols - 2; colctr > 0; colctr--) {
       /* sum of pixels directly mapped to this output element */
       membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+                  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
       /* sum of edge-neighbor pixels */
       neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
-		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+                 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+                 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
+                 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
       /* The edge-neighbors count twice as much as corner-neighbors */
       neighsum += neighsum;
       /* Add in the corner-neighbors */
       neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
-		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+                  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
       /* form final output scaled up by 2^16 */
       membersum = membersum * memberscale + neighsum * neighscale;
       /* round, descale and output it */
@@ -368,14 +369,14 @@
 
     /* Special case for last column */
     membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
     neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
-	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+               GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
+               GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
     neighsum += neighsum;
     neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
-		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+                GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
 
@@ -392,7 +393,7 @@
 
 METHODDEF(void)
 fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
-			    JSAMPARRAY input_data, JSAMPARRAY output_data)
+                            JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int outrow;
   JDIMENSION colctr;
@@ -406,7 +407,7 @@
    * efficient.
    */
   expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols);
+                    cinfo->image_width, output_cols);
 
   /* Each of the eight neighbor pixels contributes a fraction SF to the
    * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
@@ -425,10 +426,10 @@
 
     /* Special case for first column */
     colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
-	     GETJSAMPLE(*inptr);
+             GETJSAMPLE(*inptr);
     membersum = GETJSAMPLE(*inptr++);
     nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		 GETJSAMPLE(*inptr);
+                 GETJSAMPLE(*inptr);
     neighsum = colsum + (colsum - membersum) + nextcolsum;
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
@@ -438,7 +439,7 @@
       membersum = GETJSAMPLE(*inptr++);
       above_ptr++; below_ptr++;
       nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		   GETJSAMPLE(*inptr);
+                   GETJSAMPLE(*inptr);
       neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
       membersum = membersum * memberscale + neighsum * neighscale;
       *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
@@ -472,7 +473,7 @@
 
   downsample = (my_downsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_downsampler));
+                                SIZEOF(my_downsampler));
   cinfo->downsample = (struct jpeg_downsampler *) downsample;
   downsample->pub.start_pass = start_pass_downsample;
   downsample->pub.downsample = sep_downsample;
@@ -485,35 +486,36 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
-	compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+        compptr->v_samp_factor == cinfo->max_v_samp_factor) {
 #ifdef INPUT_SMOOTHING_SUPPORTED
       if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = fullsize_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
+        downsample->methods[ci] = fullsize_smooth_downsample;
+        downsample->pub.need_context_rows = TRUE;
       } else
 #endif
-	downsample->methods[ci] = fullsize_downsample;
+        downsample->methods[ci] = fullsize_downsample;
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
-	       compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+               compptr->v_samp_factor == cinfo->max_v_samp_factor) {
       smoothok = FALSE;
       if (jsimd_can_h2v1_downsample())
         downsample->methods[ci] = jsimd_h2v1_downsample;
       else
         downsample->methods[ci] = h2v1_downsample;
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
-	       compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
+               compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
 #ifdef INPUT_SMOOTHING_SUPPORTED
       if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = h2v2_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
+        downsample->methods[ci] = h2v2_smooth_downsample;
+        downsample->pub.need_context_rows = TRUE;
+      } else {
 #endif
-	if (jsimd_can_h2v2_downsample())
-	  downsample->methods[ci] = jsimd_h2v2_downsample;
-	else
-	  downsample->methods[ci] = h2v2_downsample;
+        if (jsimd_can_h2v2_downsample())
+          downsample->methods[ci] = jsimd_h2v2_downsample;
+        else
+          downsample->methods[ci] = h2v2_downsample;
+      }
     } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
-	       (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
+               (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
       smoothok = FALSE;
       downsample->methods[ci] = int_downsample;
     } else
diff --git a/jcstest.c b/jcstest.c
index 98f16da..358ed25 100644
--- a/jcstest.c
+++ b/jcstest.c
@@ -78,7 +78,7 @@
   jerr.pub.output_message = my_output_message;
 
   if(setjmp(jerr.jb)) {
-    // this will execute if libjpeg has an error
+    /* this will execute if libjpeg has an error */
     jcs_valid = 0;
     goto done;
   }
@@ -105,7 +105,7 @@
   #endif
 
   if(setjmp(jerr.jb)) {
-    // this will execute if libjpeg has an error
+    /* this will execute if libjpeg has an error */
     jcs_alpha_valid = 0;
     goto done2;
   }
diff --git a/jctrans.c b/jctrans.c
index 916e872..c353293 100644
--- a/jctrans.c
+++ b/jctrans.c
@@ -18,9 +18,9 @@
 
 /* Forward declarations */
 LOCAL(void) transencode_master_selection
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+        JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
 LOCAL(void) transencode_coef_controller
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+        JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
 
 
 /*
@@ -48,7 +48,7 @@
   /* Perform master selection of active modules */
   transencode_master_selection(cinfo, coef_arrays);
   /* Wait for jpeg_finish_compress() call */
-  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->next_scanline = 0;     /* so jpeg_write_marker works */
   cinfo->global_state = CSTATE_WRCOEFS;
 }
 
@@ -62,7 +62,7 @@
 
 GLOBAL(void)
 jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
-			       j_compress_ptr dstinfo)
+                               j_compress_ptr dstinfo)
 {
   JQUANT_TBL ** qtblptr;
   jpeg_component_info *incomp, *outcomp;
@@ -96,10 +96,10 @@
     if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
       qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
       if (*qtblptr == NULL)
-	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+        *qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
       MEMCOPY((*qtblptr)->quantval,
-	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
-	      SIZEOF((*qtblptr)->quantval));
+              srcinfo->quant_tbl_ptrs[tblno]->quantval,
+              SIZEOF((*qtblptr)->quantval));
       (*qtblptr)->sent_table = FALSE;
     }
   }
@@ -109,7 +109,7 @@
   dstinfo->num_components = srcinfo->num_components;
   if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
     ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
-	     MAX_COMPONENTS);
+             MAX_COMPONENTS);
   for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
        ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
     outcomp->component_id = incomp->component_id;
@@ -122,14 +122,14 @@
      */
     tblno = outcomp->quant_tbl_no;
     if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
-	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+        srcinfo->quant_tbl_ptrs[tblno] == NULL)
       ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
     slot_quant = srcinfo->quant_tbl_ptrs[tblno];
     c_quant = incomp->quant_table;
     if (c_quant != NULL) {
       for (coefi = 0; coefi < DCTSIZE2; coefi++) {
-	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
-	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+        if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+          ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
       }
     }
     /* Note: we do not copy the source's Huffman table assignments;
@@ -163,7 +163,7 @@
 
 LOCAL(void)
 transencode_master_selection (j_compress_ptr cinfo,
-			      jvirt_barray_ptr * coef_arrays)
+                              jvirt_barray_ptr * coef_arrays)
 {
   /* Although we don't actually use input_components for transcoding,
    * jcmaster.c's initial_setup will complain if input_components is 0.
@@ -219,10 +219,10 @@
 typedef struct {
   struct jpeg_c_coef_controller pub; /* public fields */
 
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
+  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
 
   /* Virtual block array for each component. */
   jvirt_barray_ptr * whole_image;
@@ -289,7 +289,7 @@
 compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   int blkn, ci, xindex, yindex, yoffset, blockcnt;
@@ -312,44 +312,44 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yindex+yoffset < compptr->last_row_height) {
-	    /* Fill in pointers to real blocks in this row */
-	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	    for (xindex = 0; xindex < blockcnt; xindex++)
-	      MCU_buffer[blkn++] = buffer_ptr++;
-	  } else {
-	    /* At bottom of image, need a whole row of dummy blocks */
-	    xindex = 0;
-	  }
-	  /* Fill in any dummy blocks needed in this row.
-	   * Dummy blocks are filled in the same way as in jccoefct.c:
-	   * all zeroes in the AC entries, DC entries equal to previous
-	   * block's DC value.  The init routine has already zeroed the
-	   * AC entries, so we need only set the DC entries correctly.
-	   */
-	  for (; xindex < compptr->MCU_width; xindex++) {
-	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
-	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
-	    blkn++;
-	  }
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+                                                : compptr->last_col_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (coef->iMCU_row_num < last_iMCU_row ||
+              yindex+yoffset < compptr->last_row_height) {
+            /* Fill in pointers to real blocks in this row */
+            buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+            for (xindex = 0; xindex < blockcnt; xindex++)
+              MCU_buffer[blkn++] = buffer_ptr++;
+          } else {
+            /* At bottom of image, need a whole row of dummy blocks */
+            xindex = 0;
+          }
+          /* Fill in any dummy blocks needed in this row.
+           * Dummy blocks are filled in the same way as in jccoefct.c:
+           * all zeroes in the AC entries, DC entries equal to previous
+           * block's DC value.  The init routine has already zeroed the
+           * AC entries, so we need only set the DC entries correctly.
+           */
+          for (; xindex < compptr->MCU_width; xindex++) {
+            MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+            MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
+            blkn++;
+          }
+        }
       }
       /* Try to write the MCU. */
       if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -372,7 +372,7 @@
 
 LOCAL(void)
 transencode_coef_controller (j_compress_ptr cinfo,
-			     jvirt_barray_ptr * coef_arrays)
+                             jvirt_barray_ptr * coef_arrays)
 {
   my_coef_ptr coef;
   JBLOCKROW buffer;
@@ -380,7 +380,7 @@
 
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
+                                SIZEOF(my_coef_controller));
   cinfo->coef = (struct jpeg_c_coef_controller *) coef;
   coef->pub.start_pass = start_pass_coef;
   coef->pub.compress_data = compress_output;
@@ -391,7 +391,7 @@
   /* Allocate and pre-zero space for dummy DCT blocks. */
   buffer = (JBLOCKROW)
     (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+                                C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
   jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
   for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
     coef->dummy_buffer[i] = buffer + i;
diff --git a/jdapimin.c b/jdapimin.c
index cadb59f..db53fdc 100644
--- a/jdapimin.c
+++ b/jdapimin.c
@@ -32,12 +32,12 @@
   int i;
 
   /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  cinfo->mem = NULL;            /* so jpeg_destroy knows mem mgr not called */
   if (version != JPEG_LIB_VERSION)
     ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
   if (structsize != SIZEOF(struct jpeg_decompress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE,
+             (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
 
   /* For debugging purposes, we zero the whole master structure.
    * But the application has already set the err pointer, and may have set
@@ -121,22 +121,22 @@
     cinfo->jpeg_color_space = JCS_GRAYSCALE;
     cinfo->out_color_space = JCS_GRAYSCALE;
     break;
-    
+
   case 3:
     if (cinfo->saw_JFIF_marker) {
       cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
     } else if (cinfo->saw_Adobe_marker) {
       switch (cinfo->Adobe_transform) {
       case 0:
-	cinfo->jpeg_color_space = JCS_RGB;
-	break;
+        cinfo->jpeg_color_space = JCS_RGB;
+        break;
       case 1:
-	cinfo->jpeg_color_space = JCS_YCbCr;
-	break;
+        cinfo->jpeg_color_space = JCS_YCbCr;
+        break;
       default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
-	break;
+        WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+        cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+        break;
       }
     } else {
       /* Saw no special markers, try to guess from the component IDs */
@@ -145,31 +145,31 @@
       int cid2 = cinfo->comp_info[2].component_id;
 
       if (cid0 == 1 && cid1 == 2 && cid2 == 3)
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+        cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
       else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
-	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
+        cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
       else {
-	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+        TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+        cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
       }
     }
     /* Always guess RGB is proper output colorspace. */
     cinfo->out_color_space = JCS_RGB;
     break;
-    
+
   case 4:
     if (cinfo->saw_Adobe_marker) {
       switch (cinfo->Adobe_transform) {
       case 0:
-	cinfo->jpeg_color_space = JCS_CMYK;
-	break;
+        cinfo->jpeg_color_space = JCS_CMYK;
+        break;
       case 2:
-	cinfo->jpeg_color_space = JCS_YCCK;
-	break;
+        cinfo->jpeg_color_space = JCS_YCCK;
+        break;
       default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
-	break;
+        WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+        cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+        break;
       }
     } else {
       /* No special markers, assume straight CMYK. */
@@ -177,7 +177,7 @@
     }
     cinfo->out_color_space = JCS_CMYK;
     break;
-    
+
   default:
     cinfo->jpeg_color_space = JCS_UNKNOWN;
     cinfo->out_color_space = JCS_UNKNOWN;
@@ -185,7 +185,7 @@
   }
 
   /* Set defaults for other decompression parameters. */
-  cinfo->scale_num = 1;		/* 1:1 scaling */
+  cinfo->scale_num = 1;         /* 1:1 scaling */
   cinfo->scale_denom = 1;
   cinfo->output_gamma = 1.0;
   cinfo->buffered_image = FALSE;
@@ -253,7 +253,7 @@
     retcode = JPEG_HEADER_OK;
     break;
   case JPEG_REACHED_EOI:
-    if (require_image)		/* Complain if application wanted an image */
+    if (require_image)          /* Complain if application wanted an image */
       ERREXIT(cinfo, JERR_NO_IMAGE);
     /* Reset to start state; it would be safer to require the application to
      * call jpeg_abort, but we can't change it now for compatibility reasons.
@@ -385,7 +385,7 @@
   /* Read until EOI */
   while (! cinfo->inputctl->eoi_reached) {
     if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
+      return FALSE;             /* Suspend, come back later */
   }
   /* Do final cleanup */
   (*cinfo->src->term_source) (cinfo);
diff --git a/jdapistd.c b/jdapistd.c
index 89d2c7c..f4ac50d 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -55,24 +55,24 @@
     if (cinfo->inputctl->has_multiple_scans) {
 #ifdef D_MULTISCAN_FILES_SUPPORTED
       for (;;) {
-	int retcode;
-	/* Call progress monitor hook if present */
-	if (cinfo->progress != NULL)
-	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-	/* Absorb some more input */
-	retcode = (*cinfo->inputctl->consume_input) (cinfo);
-	if (retcode == JPEG_SUSPENDED)
-	  return FALSE;
-	if (retcode == JPEG_REACHED_EOI)
-	  break;
-	/* Advance progress counter if appropriate */
-	if (cinfo->progress != NULL &&
-	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	    /* jdmaster underestimated number of scans; ratchet up one scan */
-	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	  }
-	}
+        int retcode;
+        /* Call progress monitor hook if present */
+        if (cinfo->progress != NULL)
+          (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        /* Absorb some more input */
+        retcode = (*cinfo->inputctl->consume_input) (cinfo);
+        if (retcode == JPEG_SUSPENDED)
+          return FALSE;
+        if (retcode == JPEG_REACHED_EOI)
+          break;
+        /* Advance progress counter if appropriate */
+        if (cinfo->progress != NULL &&
+            (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+          if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+            /* jdmaster underestimated number of scans; ratchet up one scan */
+            cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+          }
+        }
       }
 #else
       ERREXIT(cinfo, JERR_NOT_COMPILED);
@@ -111,16 +111,16 @@
       JDIMENSION last_scanline;
       /* Call progress monitor hook if present */
       if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-	cinfo->progress->pass_limit = (long) cinfo->output_height;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+        cinfo->progress->pass_limit = (long) cinfo->output_height;
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       }
       /* Process some data */
       last_scanline = cinfo->output_scanline;
       (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
-				    &cinfo->output_scanline, (JDIMENSION) 0);
+                                    &cinfo->output_scanline, (JDIMENSION) 0);
       if (cinfo->output_scanline == last_scanline)
-	return FALSE;		/* No progress made, must suspend */
+        return FALSE;           /* No progress made, must suspend */
     }
     /* Finish up dummy pass, and set up for another one */
     (*cinfo->master->finish_output_pass) (cinfo);
@@ -153,7 +153,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
-		     JDIMENSION max_lines)
+                     JDIMENSION max_lines)
 {
   JDIMENSION row_ctr;
 
@@ -186,7 +186,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
-		    JDIMENSION max_lines)
+                    JDIMENSION max_lines)
 {
   JDIMENSION lines_per_iMCU_row;
 
@@ -211,7 +211,7 @@
 
   /* Decompress directly into user's buffer. */
   if (! (*cinfo->coef->decompress_data) (cinfo, data))
-    return 0;			/* suspension forced, can do nothing more */
+    return 0;                   /* suspension forced, can do nothing more */
 
   /* OK, we processed one iMCU row. */
   cinfo->output_scanline += lines_per_iMCU_row;
@@ -267,9 +267,9 @@
   }
   /* Read markers looking for SOS or EOI */
   while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
+         ! cinfo->inputctl->eoi_reached) {
     if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
+      return FALSE;             /* Suspend, come back later */
   }
   cinfo->global_state = DSTATE_BUFIMAGE;
   return TRUE;
diff --git a/jdarith.c b/jdarith.c
index d556733..b945d64 100644
--- a/jdarith.c
+++ b/jdarith.c
@@ -32,7 +32,7 @@
   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
   int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
 
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
 
   /* Pointers to statistics areas (these workspaces have image lifespan) */
   unsigned char * dc_stats[NUM_ARITH_TBLS];
@@ -115,32 +115,32 @@
     if (--e->ct < 0) {
       /* Need to fetch next data byte */
       if (cinfo->unread_marker)
-	data = 0;		/* stuff zero data */
+        data = 0;               /* stuff zero data */
       else {
-	data = get_byte(cinfo);	/* read next input byte */
-	if (data == 0xFF) {	/* zero stuff or marker code */
-	  do data = get_byte(cinfo);
-	  while (data == 0xFF);	/* swallow extra 0xFF bytes */
-	  if (data == 0)
-	    data = 0xFF;	/* discard stuffed zero byte */
-	  else {
-	    /* Note: Different from the Huffman decoder, hitting
-	     * a marker while processing the compressed data
-	     * segment is legal in arithmetic coding.
-	     * The convention is to supply zero data
-	     * then until decoding is complete.
-	     */
-	    cinfo->unread_marker = data;
-	    data = 0;
-	  }
-	}
+        data = get_byte(cinfo); /* read next input byte */
+        if (data == 0xFF) {     /* zero stuff or marker code */
+          do data = get_byte(cinfo);
+          while (data == 0xFF); /* swallow extra 0xFF bytes */
+          if (data == 0)
+            data = 0xFF;        /* discard stuffed zero byte */
+          else {
+            /* Note: Different from the Huffman decoder, hitting
+             * a marker while processing the compressed data
+             * segment is legal in arithmetic coding.
+             * The convention is to supply zero data
+             * then until decoding is complete.
+             */
+            cinfo->unread_marker = data;
+            data = 0;
+          }
+        }
       }
       e->c = (e->c << 8) | data; /* insert data into C register */
-      if ((e->ct += 8) < 0)	 /* update bit shift counter */
-	/* Need more initial bytes */
-	if (++e->ct == 0)
-	  /* Got 2 initial bytes -> re-init A and exit loop */
-	  e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
+      if ((e->ct += 8) < 0)      /* update bit shift counter */
+        /* Need more initial bytes */
+        if (++e->ct == 0)
+          /* Got 2 initial bytes -> re-init A and exit loop */
+          e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
     }
     e->a <<= 1;
   }
@@ -149,9 +149,9 @@
    * Qe values and probability estimation state machine
    */
   sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
+  qe = jpeg_aritab[sv & 0x7F];  /* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;     /* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;     /* Next_Index_MPS */
 
   /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
   temp = e->a - qe;
@@ -162,19 +162,19 @@
     /* Conditional LPS (less probable symbol) exchange */
     if (e->a < qe) {
       e->a = qe;
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+      *st = (sv & 0x80) ^ nm;   /* Estimate_after_MPS */
     } else {
       e->a = qe;
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
+      *st = (sv & 0x80) ^ nl;   /* Estimate_after_LPS */
+      sv ^= 0x80;               /* Exchange LPS/MPS */
     }
   } else if (e->a < 0x8000L) {
     /* Conditional MPS (more probable symbol) exchange */
     if (e->a < qe) {
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
+      *st = (sv & 0x80) ^ nl;   /* Estimate_after_LPS */
+      sv ^= 0x80;               /* Exchange LPS/MPS */
     } else {
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+      *st = (sv & 0x80) ^ nm;   /* Estimate_after_MPS */
     }
   }
 
@@ -214,7 +214,7 @@
   /* Reset arithmetic decoding variables */
   entropy->c = 0;
   entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+  entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
 
   /* Reset restart counter */
   entropy->restarts_to_go = cinfo->restart_interval;
@@ -253,7 +253,7 @@
     entropy->restarts_to_go--;
   }
 
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
 
   /* Outer loop handles each block in the MCU */
 
@@ -277,28 +277,28 @@
       st += 2; st += sign;
       /* Figure F.23: Decoding the magnitude category of v */
       if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == 0x8000) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
+        st = entropy->dc_stats[tbl] + 20;       /* Table F.4: X1 = 20 */
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
       }
       /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
       if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
+        entropy->dc_context[ci] = 0;               /* zero diff category */
       else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+        entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
       else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+        entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
       v = m;
       /* Figure F.24: Decoding the magnitude bit pattern of v */
       st += 14;
       while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
+        if (arith_decode(cinfo, st)) v |= m;
       v += 1; if (sign) v = -v;
       entropy->last_dc_val[ci] += v;
     }
@@ -332,7 +332,7 @@
     entropy->restarts_to_go--;
   }
 
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
 
   /* There is always only one block per MCU */
   block = MCU_data[0];
@@ -343,13 +343,13 @@
   /* Figure F.20: Decode_AC_coefficients */
   for (k = cinfo->Ss; k <= cinfo->Se; k++) {
     st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    if (arith_decode(cinfo, st)) break;		/* EOB flag */
+    if (arith_decode(cinfo, st)) break;         /* EOB flag */
     while (arith_decode(cinfo, st + 1) == 0) {
       st += 3; k++;
       if (k > cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
+        WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+        entropy->ct = -1;                       /* spectral overflow */
+        return TRUE;
       }
     }
     /* Figure F.21: Decoding nonzero value v */
@@ -359,17 +359,17 @@
     /* Figure F.23: Decoding the magnitude category of v */
     if ((m = arith_decode(cinfo, st)) != 0) {
       if (arith_decode(cinfo, st)) {
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == 0x8000) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
+        m <<= 1;
+        st = entropy->ac_stats[tbl] +
+             (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
       }
     }
     v = m;
@@ -404,8 +404,8 @@
     entropy->restarts_to_go--;
   }
 
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+  st = entropy->fixed_bin;      /* use fixed probability estimation */
+  p1 = 1 << cinfo->Al;          /* 1 in the bit position being coded */
 
   /* Outer loop handles each block in the MCU */
 
@@ -440,14 +440,14 @@
     entropy->restarts_to_go--;
   }
 
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
 
   /* There is always only one block per MCU */
   block = MCU_data[0];
   tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
 
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-  m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+  p1 = 1 << cinfo->Al;          /* 1 in the bit position being coded */
+  m1 = (-1) << cinfo->Al;       /* -1 in the bit position being coded */
 
   /* Establish EOBx (previous stage end-of-block) index */
   for (kex = cinfo->Se; kex > 0; kex--)
@@ -456,30 +456,30 @@
   for (k = cinfo->Ss; k <= cinfo->Se; k++) {
     st = entropy->ac_stats[tbl] + 3 * (k - 1);
     if (k > kex)
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+      if (arith_decode(cinfo, st)) break;       /* EOB flag */
     for (;;) {
       thiscoef = *block + jpeg_natural_order[k];
-      if (*thiscoef) {				/* previously nonzero coef */
-	if (arith_decode(cinfo, st + 2)) {
-	  if (*thiscoef < 0)
-	    *thiscoef += m1;
-	  else
-	    *thiscoef += p1;
-	}
-	break;
+      if (*thiscoef) {                          /* previously nonzero coef */
+        if (arith_decode(cinfo, st + 2)) {
+          if (*thiscoef < 0)
+            *thiscoef += m1;
+          else
+            *thiscoef += p1;
+        }
+        break;
       }
-      if (arith_decode(cinfo, st + 1)) {	/* newly nonzero coef */
-	if (arith_decode(cinfo, entropy->fixed_bin))
-	  *thiscoef = m1;
-	else
-	  *thiscoef = p1;
-	break;
+      if (arith_decode(cinfo, st + 1)) {        /* newly nonzero coef */
+        if (arith_decode(cinfo, entropy->fixed_bin))
+          *thiscoef = m1;
+        else
+          *thiscoef = p1;
+        break;
       }
       st += 3; k++;
       if (k > cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
+        WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+        entropy->ct = -1;                       /* spectral overflow */
+        return TRUE;
       }
     }
   }
@@ -509,7 +509,7 @@
     entropy->restarts_to_go--;
   }
 
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
 
   /* Outer loop handles each block in the MCU */
 
@@ -535,28 +535,28 @@
       st += 2; st += sign;
       /* Figure F.23: Decoding the magnitude category of v */
       if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == 0x8000) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
+        st = entropy->dc_stats[tbl] + 20;       /* Table F.4: X1 = 20 */
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
       }
       /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
       if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
+        entropy->dc_context[ci] = 0;               /* zero diff category */
       else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+        entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
       else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+        entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
       v = m;
       /* Figure F.24: Decoding the magnitude bit pattern of v */
       st += 14;
       while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
+        if (arith_decode(cinfo, st)) v |= m;
       v += 1; if (sign) v = -v;
       entropy->last_dc_val[ci] += v;
     }
@@ -570,14 +570,14 @@
     /* Figure F.20: Decode_AC_coefficients */
     for (k = 1; k <= DCTSIZE2 - 1; k++) {
       st = entropy->ac_stats[tbl] + 3 * (k - 1);
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+      if (arith_decode(cinfo, st)) break;       /* EOB flag */
       while (arith_decode(cinfo, st + 1) == 0) {
-	st += 3; k++;
-	if (k > DCTSIZE2 - 1) {
-	  WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	  entropy->ct = -1;			/* spectral overflow */
-	  return TRUE;
-	}
+        st += 3; k++;
+        if (k > DCTSIZE2 - 1) {
+          WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+          entropy->ct = -1;                     /* spectral overflow */
+          return TRUE;
+        }
       }
       /* Figure F.21: Decoding nonzero value v */
       /* Figure F.22: Decoding the sign of v */
@@ -585,25 +585,25 @@
       st += 2;
       /* Figure F.23: Decoding the magnitude category of v */
       if ((m = arith_decode(cinfo, st)) != 0) {
-	if (arith_decode(cinfo, st)) {
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (arith_decode(cinfo, st)) {
-	    if ((m <<= 1) == 0x8000) {
-	      WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	      entropy->ct = -1;			/* magnitude overflow */
-	      return TRUE;
-	    }
-	    st += 1;
-	  }
-	}
+        if (arith_decode(cinfo, st)) {
+          m <<= 1;
+          st = entropy->ac_stats[tbl] +
+               (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+          while (arith_decode(cinfo, st)) {
+            if ((m <<= 1) == 0x8000) {
+              WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+              entropy->ct = -1;                 /* magnitude overflow */
+              return TRUE;
+            }
+            st += 1;
+          }
+        }
       }
       v = m;
       /* Figure F.24: Decoding the magnitude bit pattern of v */
       st += 14;
       while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
+        if (arith_decode(cinfo, st)) v |= m;
       v += 1; if (sign) v = -v;
       (*block)[jpeg_natural_order[k]] = (JCOEF) v;
     }
@@ -628,24 +628,24 @@
     /* Validate progressive scan parameters */
     if (cinfo->Ss == 0) {
       if (cinfo->Se != 0)
-	goto bad;
+        goto bad;
     } else {
       /* need not check Ss/Se < 0 since they came from unsigned bytes */
       if (cinfo->Se < cinfo->Ss || cinfo->Se > DCTSIZE2 - 1)
-	goto bad;
+        goto bad;
       /* AC scans may have only one component */
       if (cinfo->comps_in_scan != 1)
-	goto bad;
+        goto bad;
     }
     if (cinfo->Ah != 0) {
       /* Successive approximation refinement scan: must have Al = Ah-1. */
       if (cinfo->Ah-1 != cinfo->Al)
-	goto bad;
+        goto bad;
     }
-    if (cinfo->Al > 13) {	/* need not check for < 0 */
+    if (cinfo->Al > 13) {       /* need not check for < 0 */
       bad:
       ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+               cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
     }
     /* Update progression status, and verify that scan order is legal.
      * Note that inter-scan inconsistencies are treated as warnings
@@ -655,32 +655,32 @@
       int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
       int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
       if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+        WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
       for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
-	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
-	if (cinfo->Ah != expected)
-	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
-	coef_bit_ptr[coefi] = cinfo->Al;
+        int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+        if (cinfo->Ah != expected)
+          WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+        coef_bit_ptr[coefi] = cinfo->Al;
       }
     }
     /* Select MCU decoding routine */
     if (cinfo->Ah == 0) {
       if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_first;
+        entropy->pub.decode_mcu = decode_mcu_DC_first;
       else
-	entropy->pub.decode_mcu = decode_mcu_AC_first;
+        entropy->pub.decode_mcu = decode_mcu_AC_first;
     } else {
       if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_refine;
+        entropy->pub.decode_mcu = decode_mcu_DC_refine;
       else
-	entropy->pub.decode_mcu = decode_mcu_AC_refine;
+        entropy->pub.decode_mcu = decode_mcu_AC_refine;
     }
   } else {
     /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
      * This ought to be an error condition, but we make it a warning.
      */
     if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-	(cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1))
+        (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1))
       WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
     /* Select MCU decoding routine */
     entropy->pub.decode_mcu = decode_mcu;
@@ -692,10 +692,10 @@
     if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
       tbl = compptr->dc_tbl_no;
       if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
       if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+        entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
       MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
       /* Initialize DC predictions to 0 */
       entropy->last_dc_val[ci] = 0;
@@ -704,10 +704,10 @@
     if (! cinfo->progressive_mode || cinfo->Ss) {
       tbl = compptr->ac_tbl_no;
       if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
       if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+        entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
       MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
     }
   }
@@ -715,7 +715,7 @@
   /* Initialize arithmetic decoding variables */
   entropy->c = 0;
   entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+  entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
 
   /* Initialize restart counter */
   entropy->restarts_to_go = cinfo->restart_interval;
@@ -734,7 +734,7 @@
 
   entropy = (arith_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(arith_entropy_decoder));
+                                SIZEOF(arith_entropy_decoder));
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass;
 
@@ -752,10 +752,10 @@
     int *coef_bit_ptr, ci;
     cinfo->coef_bits = (int (*)[DCTSIZE2])
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  cinfo->num_components*DCTSIZE2*SIZEOF(int));
+                                  cinfo->num_components*DCTSIZE2*SIZEOF(int));
     coef_bit_ptr = & cinfo->coef_bits[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (ci = 0; ci < cinfo->num_components; ci++)
       for (i = 0; i < DCTSIZE2; i++)
-	*coef_bit_ptr++ = -1;
+        *coef_bit_ptr++ = -1;
   }
 }
diff --git a/jdatadst-tj.c b/jdatadst-tj.c
index 1939f23..95d9823 100644
--- a/jdatadst-tj.c
+++ b/jdatadst-tj.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
- * Modifications:
+ * Modified 2009-2012 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -22,13 +22,13 @@
 #include "jpeglib.h"
 #include "jerror.h"
 
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
 extern void * malloc JPP((size_t size));
 extern void free JPP((void *ptr));
 #endif
 
 
-#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+#define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
 
 
 /* Expanded data destination object for memory output */
@@ -36,10 +36,10 @@
 typedef struct {
   struct jpeg_destination_mgr pub; /* public fields */
 
-  unsigned char ** outbuffer;	/* target buffer */
+  unsigned char ** outbuffer;   /* target buffer */
   unsigned long * outsize;
-  unsigned char * newbuffer;	/* newly allocated buffer */
-  JOCTET * buffer;		/* start of buffer */
+  unsigned char * newbuffer;    /* newly allocated buffer */
+  JOCTET * buffer;              /* start of buffer */
   size_t bufsize;
   boolean alloc;
 } my_mem_destination_mgr;
@@ -93,7 +93,7 @@
 
   /* Try to allocate new buffer with double size */
   nextsize = dest->bufsize * 2;
-  nextbuffer = malloc(nextsize);
+  nextbuffer = (JOCTET *) malloc(nextsize);
 
   if (nextbuffer == NULL)
     ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
@@ -147,21 +147,21 @@
 
 GLOBAL(void)
 jpeg_mem_dest_tj (j_compress_ptr cinfo,
-	       unsigned char ** outbuffer, unsigned long * outsize,
-	       boolean alloc)
+               unsigned char ** outbuffer, unsigned long * outsize,
+               boolean alloc)
 {
   my_mem_dest_ptr dest;
 
-  if (outbuffer == NULL || outsize == NULL)	/* sanity check */
+  if (outbuffer == NULL || outsize == NULL)     /* sanity check */
     ERREXIT(cinfo, JERR_BUFFER_SIZE);
 
   /* The destination object is made permanent so that multiple JPEG images
    * can be written to the same buffer without re-executing jpeg_mem_dest.
    */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
     cinfo->dest = (struct jpeg_destination_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_mem_destination_mgr));
+                                  SIZEOF(my_mem_destination_mgr));
     dest = (my_mem_dest_ptr) cinfo->dest;
     dest->newbuffer = NULL;
   }
@@ -177,7 +177,7 @@
   if (*outbuffer == NULL || *outsize == 0) {
     if (alloc) {
       /* Allocate initial buffer */
-      dest->newbuffer = *outbuffer = malloc(OUTPUT_BUF_SIZE);
+      dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
       if (dest->newbuffer == NULL)
         ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
       *outsize = OUTPUT_BUF_SIZE;
diff --git a/jdatadst.c b/jdatadst.c
index 2f48869..3fbc449 100644
--- a/jdatadst.c
+++ b/jdatadst.c
@@ -1,9 +1,11 @@
 /*
  * jdatadst.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains compression data destination routines for the case of
@@ -20,7 +22,7 @@
 #include "jpeglib.h"
 #include "jerror.h"
 
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
 extern void * malloc JPP((size_t size));
 extern void free JPP((void *ptr));
 #endif
@@ -31,25 +33,25 @@
 typedef struct {
   struct jpeg_destination_mgr pub; /* public fields */
 
-  FILE * outfile;		/* target stream */
-  JOCTET * buffer;		/* start of buffer */
+  FILE * outfile;               /* target stream */
+  JOCTET * buffer;              /* start of buffer */
 } my_destination_mgr;
 
 typedef my_destination_mgr * my_dest_ptr;
 
-#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+#define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
 
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /* Expanded data destination object for memory output */
 
 typedef struct {
   struct jpeg_destination_mgr pub; /* public fields */
 
-  unsigned char ** outbuffer;	/* target buffer */
+  unsigned char ** outbuffer;   /* target buffer */
   unsigned long * outsize;
-  unsigned char * newbuffer;	/* newly allocated buffer */
-  JOCTET * buffer;		/* start of buffer */
+  unsigned char * newbuffer;    /* newly allocated buffer */
+  JOCTET * buffer;              /* start of buffer */
   size_t bufsize;
 } my_mem_destination_mgr;
 
@@ -70,13 +72,13 @@
   /* Allocate the output buffer --- it will be released when done with image */
   dest->buffer = (JOCTET *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
+                                  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
 
   dest->pub.next_output_byte = dest->buffer;
   dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
 }
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(void)
 init_mem_destination (j_compress_ptr cinfo)
 {
@@ -123,7 +125,7 @@
   return TRUE;
 }
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(boolean)
 empty_mem_output_buffer (j_compress_ptr cinfo)
 {
@@ -133,7 +135,7 @@
 
   /* Try to allocate new buffer with double size */
   nextsize = dest->bufsize * 2;
-  nextbuffer = malloc(nextsize);
+  nextbuffer = (JOCTET *) malloc(nextsize);
 
   if (nextbuffer == NULL)
     ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
@@ -182,14 +184,14 @@
     ERREXIT(cinfo, JERR_FILE_WRITE);
 }
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(void)
 term_mem_destination (j_compress_ptr cinfo)
 {
   my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
 
   *dest->outbuffer = dest->buffer;
-  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
+  *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer);
 }
 #endif
 
@@ -211,10 +213,10 @@
    * manager serially with the same JPEG object, because their private object
    * sizes may be different.  Caveat programmer.
    */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
     cinfo->dest = (struct jpeg_destination_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_destination_mgr));
+                                  SIZEOF(my_destination_mgr));
   }
 
   dest = (my_dest_ptr) cinfo->dest;
@@ -225,7 +227,7 @@
 }
 
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /*
  * Prepare for output to a memory buffer.
  * The caller may supply an own initial buffer with appropriate size.
@@ -239,20 +241,20 @@
 
 GLOBAL(void)
 jpeg_mem_dest (j_compress_ptr cinfo,
-	       unsigned char ** outbuffer, unsigned long * outsize)
+               unsigned char ** outbuffer, unsigned long * outsize)
 {
   my_mem_dest_ptr dest;
 
-  if (outbuffer == NULL || outsize == NULL)	/* sanity check */
+  if (outbuffer == NULL || outsize == NULL)     /* sanity check */
     ERREXIT(cinfo, JERR_BUFFER_SIZE);
 
   /* The destination object is made permanent so that multiple JPEG images
    * can be written to the same buffer without re-executing jpeg_mem_dest.
    */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
     cinfo->dest = (struct jpeg_destination_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_mem_destination_mgr));
+                                  SIZEOF(my_mem_destination_mgr));
   }
 
   dest = (my_mem_dest_ptr) cinfo->dest;
@@ -265,7 +267,7 @@
 
   if (*outbuffer == NULL || *outsize == 0) {
     /* Allocate initial buffer */
-    dest->newbuffer = *outbuffer = malloc(OUTPUT_BUF_SIZE);
+    dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
     if (dest->newbuffer == NULL)
       ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
     *outsize = OUTPUT_BUF_SIZE;
diff --git a/jdatasrc-tj.c b/jdatasrc-tj.c
index e980529..f023a8b 100644
--- a/jdatasrc-tj.c
+++ b/jdatasrc-tj.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2010 by Guido Vollbeding.
- * Modifications:
+ * Modified 2009-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -71,16 +71,17 @@
 METHODDEF(boolean)
 fill_mem_input_buffer (j_decompress_ptr cinfo)
 {
-  static JOCTET mybuffer[4];
+  static const JOCTET mybuffer[4] = {
+    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
+  };
 
   /* The whole JPEG data is expected to reside in the supplied memory
    * buffer, so any request for more data beyond the given buffer size
    * is treated as an error.
    */
   WARNMS(cinfo, JWRN_JPEG_EOF);
+
   /* Insert a fake EOI marker */
-  mybuffer[0] = (JOCTET) 0xFF;
-  mybuffer[1] = (JOCTET) JPEG_EOI;
 
   cinfo->src->next_input_byte = mybuffer;
   cinfo->src->bytes_in_buffer = 2;
@@ -156,21 +157,21 @@
 
 GLOBAL(void)
 jpeg_mem_src_tj (j_decompress_ptr cinfo,
-	      unsigned char * inbuffer, unsigned long insize)
+              unsigned char * inbuffer, unsigned long insize)
 {
   struct jpeg_source_mgr * src;
 
-  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
+  if (inbuffer == NULL || insize == 0)  /* Treat empty input as fatal error */
     ERREXIT(cinfo, JERR_INPUT_EMPTY);
 
   /* The source object is made permanent so that a series of JPEG images
    * can be read from the same buffer by calling jpeg_mem_src only before
    * the first one.
    */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
     cinfo->src = (struct jpeg_source_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(struct jpeg_source_mgr));
+                                  SIZEOF(struct jpeg_source_mgr));
   }
 
   src = cinfo->src;
diff --git a/jdatasrc.c b/jdatasrc.c
index 7609f76..5b4c17d 100644
--- a/jdatasrc.c
+++ b/jdatasrc.c
@@ -1,9 +1,11 @@
 /*
  * jdatasrc.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2010 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains decompression data source routines for the case of
@@ -24,16 +26,16 @@
 /* Expanded data source object for stdio input */
 
 typedef struct {
-  struct jpeg_source_mgr pub;	/* public fields */
+  struct jpeg_source_mgr pub;   /* public fields */
 
-  FILE * infile;		/* source stream */
-  JOCTET * buffer;		/* start of buffer */
-  boolean start_of_file;	/* have we gotten any data yet? */
+  FILE * infile;                /* source stream */
+  JOCTET * buffer;              /* start of buffer */
+  boolean start_of_file;        /* have we gotten any data yet? */
 } my_source_mgr;
 
 typedef my_source_mgr * my_src_ptr;
 
-#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+#define INPUT_BUF_SIZE  4096    /* choose an efficiently fread'able size */
 
 
 /*
@@ -53,7 +55,7 @@
   src->start_of_file = TRUE;
 }
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(void)
 init_mem_source (j_decompress_ptr cinfo)
 {
@@ -104,7 +106,7 @@
   nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
 
   if (nbytes <= 0) {
-    if (src->start_of_file)	/* Treat empty input file as fatal error */
+    if (src->start_of_file)     /* Treat empty input file as fatal error */
       ERREXIT(cinfo, JERR_INPUT_EMPTY);
     WARNMS(cinfo, JWRN_JPEG_EOF);
     /* Insert a fake EOI marker */
@@ -120,20 +122,21 @@
   return TRUE;
 }
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(boolean)
 fill_mem_input_buffer (j_decompress_ptr cinfo)
 {
-  static JOCTET mybuffer[4];
+  static const JOCTET mybuffer[4] = {
+    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
+  };
 
   /* The whole JPEG data is expected to reside in the supplied memory
    * buffer, so any request for more data beyond the given buffer size
    * is treated as an error.
    */
   WARNMS(cinfo, JWRN_JPEG_EOF);
+
   /* Insert a fake EOI marker */
-  mybuffer[0] = (JOCTET) 0xFF;
-  mybuffer[1] = (JOCTET) JPEG_EOI;
 
   cinfo->src->next_input_byte = mybuffer;
   cinfo->src->bytes_in_buffer = 2;
@@ -221,14 +224,14 @@
    * This makes it unsafe to use this manager and a different source
    * manager serially with the same JPEG object.  Caveat programmer.
    */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
     cinfo->src = (struct jpeg_source_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_source_mgr));
+                                  SIZEOF(my_source_mgr));
     src = (my_src_ptr) cinfo->src;
     src->buffer = (JOCTET *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
+                                  INPUT_BUF_SIZE * SIZEOF(JOCTET));
   }
 
   src = (my_src_ptr) cinfo->src;
@@ -243,7 +246,7 @@
 }
 
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /*
  * Prepare for input from a supplied memory buffer.
  * The buffer must contain the whole JPEG data.
@@ -251,21 +254,21 @@
 
 GLOBAL(void)
 jpeg_mem_src (j_decompress_ptr cinfo,
-	      unsigned char * inbuffer, unsigned long insize)
+              unsigned char * inbuffer, unsigned long insize)
 {
   struct jpeg_source_mgr * src;
 
-  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
+  if (inbuffer == NULL || insize == 0)  /* Treat empty input as fatal error */
     ERREXIT(cinfo, JERR_INPUT_EMPTY);
 
   /* The source object is made permanent so that a series of JPEG images
    * can be read from the same buffer by calling jpeg_mem_src only before
    * the first one.
    */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
     cinfo->src = (struct jpeg_source_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(struct jpeg_source_mgr));
+                                  SIZEOF(struct jpeg_source_mgr));
   }
 
   src = cinfo->src;
diff --git a/jdcoefct.c b/jdcoefct.c
index 5315e80..1d18a74 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -33,9 +33,9 @@
 
   /* These variables keep track of the current location of the input side. */
   /* cinfo->input_iMCU_row is also used for this. */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+  JDIMENSION MCU_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
 
   /* The output side's location is represented by cinfo->output_iMCU_row. */
 
@@ -61,7 +61,7 @@
 #ifdef BLOCK_SMOOTHING_SUPPORTED
   /* When doing block smoothing, we latch coefficient Al values here */
   int * coef_bits_latch;
-#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#define SAVED_COEFS  6          /* we save coef_bits[0..5] */
 #endif
 } my_coef_controller;
 
@@ -69,15 +69,15 @@
 
 /* Forward declarations */
 METHODDEF(int) decompress_onepass
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+        JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
 #ifdef D_MULTISCAN_FILES_SUPPORTED
 METHODDEF(int) decompress_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+        JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
 #endif
 #ifdef BLOCK_SMOOTHING_SUPPORTED
 LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
 METHODDEF(int) decompress_smooth_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+        JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
 #endif
 
 
@@ -153,7 +153,7 @@
 decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   int blkn, ci, xindex, yindex, yoffset, useful_width;
@@ -166,49 +166,49 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
       jzero_far((void FAR *) coef->MCU_buffer[0],
-		(size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
+                (size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
       if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
       }
       /* Determine where data should go in output_buf and do the IDCT thing.
        * We skip dummy blocks at the right and bottom edges (but blkn gets
        * incremented past them!).  Note the inner loop relies on having
        * allocated the MCU_buffer[] blocks sequentially.
        */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	/* Don't bother to IDCT an uninteresting component. */
-	if (! compptr->component_needed) {
-	  blkn += compptr->MCU_blocks;
-	  continue;
-	}
-	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
-	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						    : compptr->last_col_width;
-	output_ptr = output_buf[compptr->component_index] +
-	  yoffset * compptr->_DCT_scaled_size;
-	start_col = MCU_col_num * compptr->MCU_sample_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (cinfo->input_iMCU_row < last_iMCU_row ||
-	      yoffset+yindex < compptr->last_row_height) {
-	    output_col = start_col;
-	    for (xindex = 0; xindex < useful_width; xindex++) {
-	      (*inverse_DCT) (cinfo, compptr,
-			      (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
-			      output_ptr, output_col);
-	      output_col += compptr->_DCT_scaled_size;
-	    }
-	  }
-	  blkn += compptr->MCU_width;
-	  output_ptr += compptr->_DCT_scaled_size;
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        /* Don't bother to IDCT an uninteresting component. */
+        if (! compptr->component_needed) {
+          blkn += compptr->MCU_blocks;
+          continue;
+        }
+        inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+        useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+                                                    : compptr->last_col_width;
+        output_ptr = output_buf[compptr->component_index] +
+          yoffset * compptr->_DCT_scaled_size;
+        start_col = MCU_col_num * compptr->MCU_sample_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (cinfo->input_iMCU_row < last_iMCU_row ||
+              yoffset+yindex < compptr->last_row_height) {
+            output_col = start_col;
+            for (xindex = 0; xindex < useful_width; xindex++) {
+              (*inverse_DCT) (cinfo, compptr,
+                              (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
+                              output_ptr, output_col);
+              output_col += compptr->_DCT_scaled_size;
+            }
+          }
+          blkn += compptr->MCU_width;
+          output_ptr += compptr->_DCT_scaled_size;
+        }
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -233,7 +233,7 @@
 METHODDEF(int)
 dummy_consume_data (j_decompress_ptr cinfo)
 {
-  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+  return JPEG_SUSPENDED;        /* Always indicate nothing was done */
 }
 
 
@@ -250,7 +250,7 @@
 consume_data (j_decompress_ptr cinfo)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   int blkn, ci, xindex, yindex, yoffset;
   JDIMENSION start_col;
   JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
@@ -274,25 +274,25 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-	    coef->MCU_buffer[blkn++] = buffer_ptr++;
-	  }
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+          }
+        }
       }
       /* Try to fetch the MCU. */
       if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -333,8 +333,8 @@
 
   /* Force some input to be done if we are getting ahead of the input. */
   while (cinfo->input_scan_number < cinfo->output_scan_number ||
-	 (cinfo->input_scan_number == cinfo->output_scan_number &&
-	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+         (cinfo->input_scan_number == cinfo->output_scan_number &&
+          cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
     if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
       return JPEG_SUSPENDED;
   }
@@ -365,10 +365,10 @@
       buffer_ptr = buffer[block_row];
       output_col = 0;
       for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
-			output_ptr, output_col);
-	buffer_ptr++;
-	output_col += compptr->_DCT_scaled_size;
+        (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+                        output_ptr, output_col);
+        buffer_ptr++;
+        output_col += compptr->_DCT_scaled_size;
       }
       output_ptr += compptr->_DCT_scaled_size;
     }
@@ -425,8 +425,8 @@
   if (coef->coef_bits_latch == NULL)
     coef->coef_bits_latch = (int *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  cinfo->num_components *
-				  (SAVED_COEFS * SIZEOF(int)));
+                                  cinfo->num_components *
+                                  (SAVED_COEFS * SIZEOF(int)));
   coef_bits_latch = coef->coef_bits_latch;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
@@ -436,11 +436,11 @@
       return FALSE;
     /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
     if (qtable->quantval[0] == 0 ||
-	qtable->quantval[Q01_POS] == 0 ||
-	qtable->quantval[Q10_POS] == 0 ||
-	qtable->quantval[Q20_POS] == 0 ||
-	qtable->quantval[Q11_POS] == 0 ||
-	qtable->quantval[Q02_POS] == 0)
+        qtable->quantval[Q01_POS] == 0 ||
+        qtable->quantval[Q10_POS] == 0 ||
+        qtable->quantval[Q20_POS] == 0 ||
+        qtable->quantval[Q11_POS] == 0 ||
+        qtable->quantval[Q02_POS] == 0)
       return FALSE;
     /* DC values must be at least partly known for all components. */
     coef_bits = cinfo->coef_bits[ci];
@@ -450,7 +450,7 @@
     for (coefi = 1; coefi <= 5; coefi++) {
       coef_bits_latch[coefi] = coef_bits[coefi];
       if (coef_bits[coefi] != 0)
-	smoothing_useful = TRUE;
+        smoothing_useful = TRUE;
     }
     coef_bits_latch += SAVED_COEFS;
   }
@@ -489,7 +489,7 @@
 
   /* Force some input to be done if we are getting ahead of the input. */
   while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
+         ! cinfo->inputctl->eoi_reached) {
     if (cinfo->input_scan_number == cinfo->output_scan_number) {
       /* If input is working on current scan, we ordinarily want it to
        * have completed the current row.  But if input scan is DC,
@@ -498,7 +498,7 @@
        */
       JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
       if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
-	break;
+        break;
     }
     if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
       return JPEG_SUSPENDED;
@@ -526,15 +526,15 @@
     if (cinfo->output_iMCU_row > 0) {
       access_rows += compptr->v_samp_factor; /* prior iMCU row too */
       buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
-	 (JDIMENSION) access_rows, FALSE);
-      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+        ((j_common_ptr) cinfo, coef->whole_image[ci],
+         (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+         (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor; /* point to current iMCU row */
       first_row = FALSE;
     } else {
       buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+        ((j_common_ptr) cinfo, coef->whole_image[ci],
+         (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
       first_row = TRUE;
     }
     /* Fetch component-dependent info */
@@ -552,13 +552,13 @@
     for (block_row = 0; block_row < block_rows; block_row++) {
       buffer_ptr = buffer[block_row];
       if (first_row && block_row == 0)
-	prev_block_row = buffer_ptr;
+        prev_block_row = buffer_ptr;
       else
-	prev_block_row = buffer[block_row-1];
+        prev_block_row = buffer[block_row-1];
       if (last_row && block_row == block_rows-1)
-	next_block_row = buffer_ptr;
+        next_block_row = buffer_ptr;
       else
-	next_block_row = buffer[block_row+1];
+        next_block_row = buffer[block_row+1];
       /* We fetch the surrounding DC values using a sliding-register approach.
        * Initialize all nine here so as to do the right thing on narrow pics.
        */
@@ -568,102 +568,102 @@
       output_col = 0;
       last_block_column = compptr->width_in_blocks - 1;
       for (block_num = 0; block_num <= last_block_column; block_num++) {
-	/* Fetch current DCT block into workspace so we can modify it. */
-	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
-	/* Update DC values */
-	if (block_num < last_block_column) {
-	  DC3 = (int) prev_block_row[1][0];
-	  DC6 = (int) buffer_ptr[1][0];
-	  DC9 = (int) next_block_row[1][0];
-	}
-	/* Compute coefficient estimates per K.8.
-	 * An estimate is applied only if coefficient is still zero,
-	 * and is not known to be fully accurate.
-	 */
-	/* AC01 */
-	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
-	  num = 36 * Q00 * (DC4 - DC6);
-	  if (num >= 0) {
-	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[1] = (JCOEF) pred;
-	}
-	/* AC10 */
-	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
-	  num = 36 * Q00 * (DC2 - DC8);
-	  if (num >= 0) {
-	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[8] = (JCOEF) pred;
-	}
-	/* AC20 */
-	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
-	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[16] = (JCOEF) pred;
-	}
-	/* AC11 */
-	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
-	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
-	  if (num >= 0) {
-	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[9] = (JCOEF) pred;
-	}
-	/* AC02 */
-	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
-	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[2] = (JCOEF) pred;
-	}
-	/* OK, do the IDCT */
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
-			output_ptr, output_col);
-	/* Advance for next column */
-	DC1 = DC2; DC2 = DC3;
-	DC4 = DC5; DC5 = DC6;
-	DC7 = DC8; DC8 = DC9;
-	buffer_ptr++, prev_block_row++, next_block_row++;
-	output_col += compptr->_DCT_scaled_size;
+        /* Fetch current DCT block into workspace so we can modify it. */
+        jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+        /* Update DC values */
+        if (block_num < last_block_column) {
+          DC3 = (int) prev_block_row[1][0];
+          DC6 = (int) buffer_ptr[1][0];
+          DC9 = (int) next_block_row[1][0];
+        }
+        /* Compute coefficient estimates per K.8.
+         * An estimate is applied only if coefficient is still zero,
+         * and is not known to be fully accurate.
+         */
+        /* AC01 */
+        if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+          num = 36 * Q00 * (DC4 - DC6);
+          if (num >= 0) {
+            pred = (int) (((Q01<<7) + num) / (Q01<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q01<<7) - num) / (Q01<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[1] = (JCOEF) pred;
+        }
+        /* AC10 */
+        if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+          num = 36 * Q00 * (DC2 - DC8);
+          if (num >= 0) {
+            pred = (int) (((Q10<<7) + num) / (Q10<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q10<<7) - num) / (Q10<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[8] = (JCOEF) pred;
+        }
+        /* AC20 */
+        if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+          num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+          if (num >= 0) {
+            pred = (int) (((Q20<<7) + num) / (Q20<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q20<<7) - num) / (Q20<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[16] = (JCOEF) pred;
+        }
+        /* AC11 */
+        if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+          num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+          if (num >= 0) {
+            pred = (int) (((Q11<<7) + num) / (Q11<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q11<<7) - num) / (Q11<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[9] = (JCOEF) pred;
+        }
+        /* AC02 */
+        if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+          num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+          if (num >= 0) {
+            pred = (int) (((Q02<<7) + num) / (Q02<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q02<<7) - num) / (Q02<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[2] = (JCOEF) pred;
+        }
+        /* OK, do the IDCT */
+        (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+                        output_ptr, output_col);
+        /* Advance for next column */
+        DC1 = DC2; DC2 = DC3;
+        DC4 = DC5; DC5 = DC6;
+        DC7 = DC8; DC8 = DC9;
+        buffer_ptr++, prev_block_row++, next_block_row++;
+        output_col += compptr->_DCT_scaled_size;
       }
       output_ptr += compptr->_DCT_scaled_size;
     }
@@ -688,7 +688,7 @@
 
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
+                                SIZEOF(my_coef_controller));
   cinfo->coef = (struct jpeg_d_coef_controller *) coef;
   coef->pub.start_input_pass = start_input_pass;
   coef->pub.start_output_pass = start_output_pass;
@@ -706,20 +706,20 @@
     jpeg_component_info *compptr;
 
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       access_rows = compptr->v_samp_factor;
 #ifdef BLOCK_SMOOTHING_SUPPORTED
       /* If block smoothing could be used, need a bigger window */
       if (cinfo->progressive_mode)
-	access_rows *= 3;
+        access_rows *= 3;
 #endif
       coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) access_rows);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+         (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+                                (long) compptr->h_samp_factor),
+         (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+                                (long) compptr->v_samp_factor),
+         (JDIMENSION) access_rows);
     }
     coef->pub.consume_data = consume_data;
     coef->pub.decompress_data = decompress_data;
@@ -734,7 +734,7 @@
 
     buffer = (JBLOCKROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+                                  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
     for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
       coef->MCU_buffer[i] = buffer + i;
     }
diff --git a/jdcolext.c b/jdcolext.c
index a8e67c3..f72cab0 100644
--- a/jdcolext.c
+++ b/jdcolext.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009, 2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -58,8 +58,8 @@
       /* Range-limiting is essential due to noise introduced by DCT losses. */
       outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
       outptr[RGB_GREEN] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
+                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS))];
       outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
       /* Set unused byte to 0xFF so it can be interpreted as an opaque */
       /* alpha channel value */
diff --git a/jdcolor.c b/jdcolor.c
index 7a8ed22..8dae08d 100644
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -3,7 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
+ * Modified 2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2009, 2011-2012, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
@@ -15,7 +16,7 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "config.h"
+#include "jconfigint.h"
 
 
 /* Private subobject */
@@ -24,24 +25,32 @@
   struct jpeg_color_deconverter pub; /* public fields */
 
   /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+  int * Cr_r_tab;               /* => table for Cr to R conversion */
+  int * Cb_b_tab;               /* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;             /* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;             /* => table for Cb to G conversion */
+
+  /* Private state for RGB->Y conversion */
+  INT32 * rgb_y_tab;            /* => table for RGB to Y conversion */
 } my_color_deconverter;
 
 typedef my_color_deconverter * my_cconvert_ptr;
 
 
 /**************** YCbCr -> RGB conversion: most common case **************/
+/****************   RGB -> Y   conversion: less common case **************/
 
 /*
  * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
  * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
  * The conversion equations to be implemented are therefore
- *	R = Y                + 1.40200 * Cr
- *	G = Y - 0.34414 * Cb - 0.71414 * Cr
- *	B = Y + 1.77200 * Cb
+ *
+ *      R = Y                + 1.40200 * Cr
+ *      G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *      B = Y + 1.77200 * Cb
+ *
+ *      Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ *
  * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
  * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
  *
@@ -62,9 +71,21 @@
  * together before rounding.
  */
 
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table for RGB->Y conversion and divide it up into
+ * three parts, instead of doing three alloc_small requests.  This lets us
+ * use a single table base address, which can be held in a register in the
+ * inner loops on many machines (more than can hold all three addresses,
+ * anyway).
+ */
+
+#define R_Y_OFF         0                       /* offset to R => Y section */
+#define G_Y_OFF         (1*(MAXJSAMPLE+1))      /* offset to G => Y section */
+#define B_Y_OFF         (2*(MAXJSAMPLE+1))      /* etc. */
+#define TABLE_SIZE      (3*(MAXJSAMPLE+1))
 
 
 /* Include inline routines for colorspace extensions */
@@ -194,26 +215,26 @@
 
   cconvert->Cr_r_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * SIZEOF(int));
   cconvert->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * SIZEOF(int));
   cconvert->Cr_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * SIZEOF(INT32));
   cconvert->Cb_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * SIZEOF(INT32));
 
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
     /* Cr=>R value is nearest int to 1.40200 * x */
     cconvert->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
     /* Cb=>B value is nearest int to 1.77200 * x */
     cconvert->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
     /* Cr=>G value is scaled-up -0.71414 * x */
     cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
     /* Cb=>G value is scaled-up -0.34414 * x */
@@ -229,8 +250,8 @@
 
 METHODDEF(void)
 ycc_rgb_convert (j_decompress_ptr cinfo,
-		 JSAMPIMAGE input_buf, JDIMENSION input_row,
-		 JSAMPARRAY output_buf, int num_rows)
+                 JSAMPIMAGE input_buf, JDIMENSION input_row,
+                 JSAMPARRAY output_buf, int num_rows)
 {
   switch (cinfo->out_color_space) {
     case JCS_EXT_RGB:
@@ -273,14 +294,74 @@
 
 
 /*
+ * Initialize for RGB->grayscale colorspace conversion.
+ */
+
+LOCAL(void)
+build_rgb_y_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_y_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_y_tab = rgb_y_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                (TABLE_SIZE * SIZEOF(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert RGB to grayscale.
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_decompress_ptr cinfo,
+                  JSAMPIMAGE input_buf, JDIMENSION input_row,
+                  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_y_tab;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Y */
+      outptr[col] = (JSAMPLE)
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
  * Color conversion for no colorspace change: just copy the data,
  * converting from separate-planes to interleaved representation.
  */
 
 METHODDEF(void)
 null_convert (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION input_row,
-	      JSAMPARRAY output_buf, int num_rows)
+              JSAMPIMAGE input_buf, JDIMENSION input_row,
+              JSAMPARRAY output_buf, int num_rows)
 {
   register JSAMPROW inptr, outptr;
   register JDIMENSION count;
@@ -293,8 +374,8 @@
       inptr = input_buf[ci][input_row];
       outptr = output_buf[0] + ci;
       for (count = num_cols; count > 0; count--) {
-	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
-	outptr += num_components;
+        *outptr = *inptr++;     /* needn't bother with GETJSAMPLE() here */
+        outptr += num_components;
       }
     }
     input_row++;
@@ -311,11 +392,11 @@
 
 METHODDEF(void)
 grayscale_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
+                   JSAMPIMAGE input_buf, JDIMENSION input_row,
+                   JSAMPARRAY output_buf, int num_rows)
 {
   jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
-		    num_rows, cinfo->output_width);
+                    num_rows, cinfo->output_width);
 }
 
 
@@ -325,8 +406,8 @@
 
 METHODDEF(void)
 gray_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
+                  JSAMPIMAGE input_buf, JDIMENSION input_row,
+                  JSAMPARRAY output_buf, int num_rows)
 {
   switch (cinfo->out_color_space) {
     case JCS_EXT_RGB:
@@ -371,8 +452,8 @@
 
 METHODDEF(void)
 rgb_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
+                  JSAMPIMAGE input_buf, JDIMENSION input_row,
+                  JSAMPARRAY output_buf, int num_rows)
 {
   switch (cinfo->out_color_space) {
     case JCS_EXT_RGB:
@@ -410,6 +491,7 @@
   }
 }
 
+
 /*
  * Adobe-style YCCK->CMYK conversion.
  * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
@@ -419,8 +501,8 @@
 
 METHODDEF(void)
 ycck_cmyk_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
+                   JSAMPIMAGE input_buf, JDIMENSION input_row,
+                   JSAMPARRAY output_buf, int num_rows)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int y, cb, cr;
@@ -448,13 +530,13 @@
       cb = GETJSAMPLE(inptr1[col]);
       cr = GETJSAMPLE(inptr2[col]);
       /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
-      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS)))];
-      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];   /* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +                 /* green */
+                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];   /* blue */
       /* K passes through unchanged */
-      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr[3] = inptr3[col];  /* don't need GETJSAMPLE here */
       outptr += 4;
     }
   }
@@ -484,7 +566,7 @@
 
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_color_deconverter));
+                                SIZEOF(my_color_deconverter));
   cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
   cconvert->pub.start_pass = start_pass_dcolor;
 
@@ -507,7 +589,7 @@
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     break;
 
-  default:			/* JCS_UNKNOWN can be anything */
+  default:                      /* JCS_UNKNOWN can be anything */
     if (cinfo->num_components < 1)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     break;
@@ -522,11 +604,14 @@
   case JCS_GRAYSCALE:
     cinfo->out_color_components = 1;
     if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
-	cinfo->jpeg_color_space == JCS_YCbCr) {
+        cinfo->jpeg_color_space == JCS_YCbCr) {
       cconvert->pub.color_convert = grayscale_convert;
       /* For color->grayscale conversion, only the Y (0) component is needed */
       for (ci = 1; ci < cinfo->num_components; ci++)
-	cinfo->comp_info[ci].component_needed = FALSE;
+        cinfo->comp_info[ci].component_needed = FALSE;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      cconvert->pub.color_convert = rgb_gray_convert;
+      build_rgb_y_table(cinfo);
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
@@ -580,7 +665,7 @@
     if (cinfo->out_color_space == cinfo->jpeg_color_space) {
       cinfo->out_color_components = cinfo->num_components;
       cconvert->pub.color_convert = null_convert;
-    } else			/* unsupported non-null conversion */
+    } else                      /* unsupported non-null conversion */
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
   }
diff --git a/jdct.h b/jdct.h
index 7b49a97..310f2ca 100644
--- a/jdct.h
+++ b/jdct.h
@@ -8,7 +8,7 @@
  * This include file contains common declarations for the forward and
  * inverse DCT modules.  These declarations are private to the DCT managers
  * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
- * The individual DCT algorithms are kept in separate files to ease 
+ * The individual DCT algorithms are kept in separate files to ease
  * machine-dependent tuning (e.g., assembly coding).
  */
 
@@ -29,7 +29,7 @@
 
 #if BITS_IN_JSAMPLE == 8
 #ifndef WITH_SIMD
-typedef int DCTELEM;		/* 16 or 32 bits is fine */
+typedef int DCTELEM;            /* 16 or 32 bits is fine */
 typedef unsigned int UDCTELEM;
 typedef unsigned long long UDCTELEM2;
 #else
@@ -38,7 +38,7 @@
 typedef unsigned int UDCTELEM2;
 #endif
 #else
-typedef INT32 DCTELEM;		/* must have 32 bits */
+typedef INT32 DCTELEM;          /* must have 32 bits */
 typedef UINT32 UDCTELEM;
 typedef unsigned long long UDCTELEM2;
 #endif
@@ -64,10 +64,10 @@
 typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
 #if BITS_IN_JSAMPLE == 8
 typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
-#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#define IFAST_SCALE_BITS  2     /* fractional bits in scale factors */
 #else
-typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
-#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+typedef INT32 IFAST_MULT_TYPE;  /* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13    /* fractional bits in scale factors */
 #endif
 typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
 
@@ -89,15 +89,27 @@
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_fdct_islow		jFDislow
-#define jpeg_fdct_ifast		jFDifast
-#define jpeg_fdct_float		jFDfloat
-#define jpeg_idct_islow		jRDislow
-#define jpeg_idct_ifast		jRDifast
-#define jpeg_idct_float		jRDfloat
-#define jpeg_idct_4x4		jRD4x4
-#define jpeg_idct_2x2		jRD2x2
-#define jpeg_idct_1x1		jRD1x1
+#define jpeg_fdct_islow         jFDislow
+#define jpeg_fdct_ifast         jFDifast
+#define jpeg_fdct_float         jFDfloat
+#define jpeg_idct_islow         jRDislow
+#define jpeg_idct_ifast         jRDifast
+#define jpeg_idct_float         jRDfloat
+#define jpeg_idct_7x7           jRD7x7
+#define jpeg_idct_6x6           jRD6x6
+#define jpeg_idct_5x5           jRD5x5
+#define jpeg_idct_4x4           jRD4x4
+#define jpeg_idct_3x3           jRD3x3
+#define jpeg_idct_2x2           jRD2x2
+#define jpeg_idct_1x1           jRD1x1
+#define jpeg_idct_9x9           jRD9x9
+#define jpeg_idct_10x10         jRD10x10
+#define jpeg_idct_11x11         jRD11x11
+#define jpeg_idct_12x12         jRD12x12
+#define jpeg_idct_13x13         jRD13x13
+#define jpeg_idct_14x14         jRD14x14
+#define jpeg_idct_15x15         jRD15x15
+#define jpeg_idct_16x16         jRD16x16
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 /* Extern declarations for the forward and inverse DCT routines. */
@@ -108,22 +120,58 @@
 
 EXTERN(void) jpeg_idct_islow
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
 EXTERN(void) jpeg_idct_ifast
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
 EXTERN(void) jpeg_idct_float
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x7
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x5
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
 EXTERN(void) jpeg_idct_4x4
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x3
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
 EXTERN(void) jpeg_idct_2x2
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
 EXTERN(void) jpeg_idct_1x1
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_9x9
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x10
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_11x11
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x12
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_13x13
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x14
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_15x15
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x16
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
 
 
 /*
@@ -136,7 +184,7 @@
  * and may differ from one module to the next.
  */
 
-#define ONE	((INT32) 1)
+#define ONE     ((INT32) 1)
 #define CONST_SCALE (ONE << CONST_BITS)
 
 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
@@ -144,7 +192,7 @@
  * thus causing a lot of useless floating-point operations at run time.
  */
 
-#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+#define FIX(x)  ((INT32) ((x) * CONST_SCALE + 0.5))
 
 /* Descale and correctly round an INT32 value that's scaled by N bits.
  * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
@@ -162,23 +210,23 @@
  * correct combination of casts.
  */
 
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
 #define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
 #endif
-#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
 #define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
 #endif
 
-#ifndef MULTIPLY16C16		/* default definition */
+#ifndef MULTIPLY16C16           /* default definition */
 #define MULTIPLY16C16(var,const)  ((var) * (const))
 #endif
 
 /* Same except both inputs are variables. */
 
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
 #define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
 #endif
 
-#ifndef MULTIPLY16V16		/* default definition */
+#ifndef MULTIPLY16V16           /* default definition */
 #define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
 #endif
diff --git a/jddctmgr.c b/jddctmgr.c
index be401d5..1a6032d 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -3,7 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modifications:
+ * Modified 2002-2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
@@ -21,7 +22,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 #include "jsimddct.h"
 #include "jpegcomp.h"
 
@@ -46,7 +47,7 @@
 /* Private subobject for this module */
 
 typedef struct {
-  struct jpeg_inverse_dct pub;	/* public fields */
+  struct jpeg_inverse_dct pub;  /* public fields */
 
   /* This array contains the IDCT method code that each multiplier table
    * is currently set up for, or -1 if it's not yet set up.
@@ -107,57 +108,105 @@
 #ifdef IDCT_SCALING_SUPPORTED
     case 1:
       method_ptr = jpeg_idct_1x1;
-      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
       break;
     case 2:
       if (jsimd_can_idct_2x2())
         method_ptr = jsimd_idct_2x2;
       else
         method_ptr = jpeg_idct_2x2;
-      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
+      break;
+    case 3:
+      method_ptr = jpeg_idct_3x3;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 4:
       if (jsimd_can_idct_4x4())
         method_ptr = jsimd_idct_4x4;
       else
         method_ptr = jpeg_idct_4x4;
-      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
+      break;
+    case 5:
+      method_ptr = jpeg_idct_5x5;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 6:
+      method_ptr = jpeg_idct_6x6;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 7:
+      method_ptr = jpeg_idct_7x7;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
 #endif
     case DCTSIZE:
       switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
       case JDCT_ISLOW:
-	if (jsimd_can_idct_islow())
-	  method_ptr = jsimd_idct_islow;
-	else
-	  method_ptr = jpeg_idct_islow;
-	method = JDCT_ISLOW;
-	break;
+        if (jsimd_can_idct_islow())
+          method_ptr = jsimd_idct_islow;
+        else
+          method_ptr = jpeg_idct_islow;
+        method = JDCT_ISLOW;
+        break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
       case JDCT_IFAST:
-	if (jsimd_can_idct_ifast())
-	  method_ptr = jsimd_idct_ifast;
-	else
-	  method_ptr = jpeg_idct_ifast;
-	method = JDCT_IFAST;
-	break;
+        if (jsimd_can_idct_ifast())
+          method_ptr = jsimd_idct_ifast;
+        else
+          method_ptr = jpeg_idct_ifast;
+        method = JDCT_IFAST;
+        break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
       case JDCT_FLOAT:
-	if (jsimd_can_idct_float())
-	  method_ptr = jsimd_idct_float;
-	else
-	  method_ptr = jpeg_idct_float;
-	method = JDCT_FLOAT;
-	break;
+        if (jsimd_can_idct_float())
+          method_ptr = jsimd_idct_float;
+        else
+          method_ptr = jpeg_idct_float;
+        method = JDCT_FLOAT;
+        break;
 #endif
       default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-	break;
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+        break;
       }
       break;
+    case 9:
+      method_ptr = jpeg_idct_9x9;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 10:
+      method_ptr = jpeg_idct_10x10;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 11:
+      method_ptr = jpeg_idct_11x11;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 12:
+      method_ptr = jpeg_idct_12x12;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 13:
+      method_ptr = jpeg_idct_13x13;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 14:
+      method_ptr = jpeg_idct_14x14;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 15:
+      method_ptr = jpeg_idct_15x15;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 16:
+      method_ptr = jpeg_idct_16x16;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
     default:
       ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size);
       break;
@@ -173,81 +222,81 @@
     if (! compptr->component_needed || idct->cur_method[ci] == method)
       continue;
     qtbl = compptr->quant_table;
-    if (qtbl == NULL)		/* happens if no data yet for component */
+    if (qtbl == NULL)           /* happens if no data yet for component */
       continue;
     idct->cur_method[ci] = method;
     switch (method) {
 #ifdef PROVIDE_ISLOW_TABLES
     case JDCT_ISLOW:
       {
-	/* For LL&M IDCT method, multipliers are equal to raw quantization
-	 * coefficients, but are stored as ints to ensure access efficiency.
-	 */
-	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
-	}
+        /* For LL&M IDCT method, multipliers are equal to raw quantization
+         * coefficients, but are stored as ints to ensure access efficiency.
+         */
+        ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+        for (i = 0; i < DCTSIZE2; i++) {
+          ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+        }
       }
       break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
     case JDCT_IFAST:
       {
-	/* For AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * For integer operation, the multiplier table is to be scaled by
-	 * IFAST_SCALE_BITS.
-	 */
-	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+        /* For AA&N IDCT method, multipliers are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * For integer operation, the multiplier table is to be scaled by
+         * IFAST_SCALE_BITS.
+         */
+        IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
 #define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
+        static const INT16 aanscales[DCTSIZE2] = {
+          /* precomputed values scaled up by 14 bits */
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+        };
+        SHIFT_TEMPS
 
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ifmtbl[i] = (IFAST_MULT_TYPE)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-IFAST_SCALE_BITS);
-	}
+        for (i = 0; i < DCTSIZE2; i++) {
+          ifmtbl[i] = (IFAST_MULT_TYPE)
+            DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+                                  (INT32) aanscales[i]),
+                    CONST_BITS-IFAST_SCALE_BITS);
+        }
       }
       break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
     case JDCT_FLOAT:
       {
-	/* For float AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 */
-	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
+        /* For float AA&N IDCT method, multipliers are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         */
+        FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+        int row, col;
+        static const double aanscalefactor[DCTSIZE] = {
+          1.0, 1.387039845, 1.306562965, 1.175875602,
+          1.0, 0.785694958, 0.541196100, 0.275899379
+        };
 
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fmtbl[i] = (FLOAT_MULT_TYPE)
-	      ((double) qtbl->quantval[i] *
-	       aanscalefactor[row] * aanscalefactor[col]);
-	    i++;
-	  }
-	}
+        i = 0;
+        for (row = 0; row < DCTSIZE; row++) {
+          for (col = 0; col < DCTSIZE; col++) {
+            fmtbl[i] = (FLOAT_MULT_TYPE)
+              ((double) qtbl->quantval[i] *
+               aanscalefactor[row] * aanscalefactor[col]);
+            i++;
+          }
+        }
       }
       break;
 #endif
@@ -272,7 +321,7 @@
 
   idct = (my_idct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_idct_controller));
+                                SIZEOF(my_idct_controller));
   cinfo->idct = (struct jpeg_inverse_dct *) idct;
   idct->pub.start_pass = start_pass;
 
@@ -281,7 +330,7 @@
     /* Allocate and pre-zero a multiplier table for each component */
     compptr->dct_table =
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(multiplier_table));
+                                  SIZEOF(multiplier_table));
     MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
     /* Mark multiplier table not yet set up for any method */
     idct->cur_method[ci] = -1;
diff --git a/jdhuff.c b/jdhuff.c
index 4197cc5..b545e66 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -19,7 +19,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdhuff.h"		/* Declarations shared with jdphuff.c */
+#include "jdhuff.h"             /* Declarations shared with jdphuff.c */
 #include "jpegcomp.h"
 
 
@@ -44,10 +44,10 @@
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest,src)  \
-	((dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+        ((dest).last_dc_val[0] = (src).last_dc_val[0], \
+         (dest).last_dc_val[1] = (src).last_dc_val[1], \
+         (dest).last_dc_val[2] = (src).last_dc_val[2], \
+         (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 
@@ -58,11 +58,11 @@
   /* These fields are loaded into local variables at start of each MCU.
    * In case of suspension, we exit WITHOUT updating them.
    */
-  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
-  savable_state saved;		/* Other state at start of MCU */
+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
+  savable_state saved;          /* Other state at start of MCU */
 
   /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
 
   /* Pointers to derived tables (these workspaces have image lifespan) */
   d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
@@ -107,9 +107,9 @@
     /* Compute derived values for Huffman tables */
     /* We may do this more than once for a table, but it's not expensive */
     jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
-			    & entropy->dc_derived_tbls[dctbl]);
+                            & entropy->dc_derived_tbls[dctbl]);
     jpeg_make_d_derived_tbl(cinfo, FALSE, actbl,
-			    & entropy->ac_derived_tbls[actbl]);
+                            & entropy->ac_derived_tbls[actbl]);
     /* Initialize DC predictions to 0 */
     entropy->saved.last_dc_val[ci] = 0;
   }
@@ -150,7 +150,7 @@
 
 GLOBAL(void)
 jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
-			 d_derived_tbl ** pdtbl)
+                         d_derived_tbl ** pdtbl)
 {
   JHUFF_TBL *htbl;
   d_derived_tbl *dtbl;
@@ -176,26 +176,26 @@
   if (*pdtbl == NULL)
     *pdtbl = (d_derived_tbl *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(d_derived_tbl));
+                                  SIZEOF(d_derived_tbl));
   dtbl = *pdtbl;
-  dtbl->pub = htbl;		/* fill in back link */
-  
+  dtbl->pub = htbl;             /* fill in back link */
+
   /* Figure C.1: make table of Huffman code length for each symbol */
 
   p = 0;
   for (l = 1; l <= 16; l++) {
     i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
+    if (i < 0 || p + i > 256)   /* protect against table overrun */
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     while (i--)
       huffsize[p++] = (char) l;
   }
   huffsize[p] = 0;
   numsymbols = p;
-  
+
   /* Figure C.2: generate the codes themselves */
   /* We also validate that the counts represent a legal Huffman code tree. */
-  
+
   code = 0;
   si = huffsize[0];
   p = 0;
@@ -225,7 +225,7 @@
       p += htbl->bits[l];
       dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
     } else {
-      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+      dtbl->maxcode[l] = -1;    /* -1 if no codes of this length */
     }
   }
   dtbl->valoffset[17] = 0;
@@ -248,8 +248,8 @@
       /* Generate left-justified code followed by all possible bit sequences */
       lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
       for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-	dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
-	lookbits++;
+        dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
+        lookbits++;
       }
     }
   }
@@ -264,7 +264,7 @@
     for (i = 0; i < numsymbols; i++) {
       int sym = htbl->huffval[i];
       if (sym < 0 || sym > 15)
-	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+        ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     }
   }
 }
@@ -286,7 +286,7 @@
  */
 
 #ifdef SLOW_SHIFT_32
-#define MIN_GET_BITS  15	/* minimum allowable value */
+#define MIN_GET_BITS  15        /* minimum allowable value */
 #else
 #define MIN_GET_BITS  (BIT_BUF_SIZE-7)
 #endif
@@ -294,8 +294,8 @@
 
 GLOBAL(boolean)
 jpeg_fill_bit_buffer (bitread_working_state * state,
-		      register bit_buf_type get_buffer, register int bits_left,
-		      int nbits)
+                      register bit_buf_type get_buffer, register int bits_left,
+                      int nbits)
 /* Load up the bit buffer to a depth of at least nbits */
 {
   /* Copy heavily used state fields into locals (hopefully registers) */
@@ -307,54 +307,54 @@
   /* (It is assumed that no request will be for more than that many bits.) */
   /* We fail to do so only if we hit a marker or are forced to suspend. */
 
-  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+  if (cinfo->unread_marker == 0) {      /* cannot advance past a marker */
     while (bits_left < MIN_GET_BITS) {
       register int c;
 
       /* Attempt to read a byte */
       if (bytes_in_buffer == 0) {
-	if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	  return FALSE;
-	next_input_byte = cinfo->src->next_input_byte;
-	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+        if (! (*cinfo->src->fill_input_buffer) (cinfo))
+          return FALSE;
+        next_input_byte = cinfo->src->next_input_byte;
+        bytes_in_buffer = cinfo->src->bytes_in_buffer;
       }
       bytes_in_buffer--;
       c = GETJOCTET(*next_input_byte++);
 
       /* If it's 0xFF, check and discard stuffed zero byte */
       if (c == 0xFF) {
-	/* Loop here to discard any padding FF's on terminating marker,
-	 * so that we can save a valid unread_marker value.  NOTE: we will
-	 * accept multiple FF's followed by a 0 as meaning a single FF data
-	 * byte.  This data pattern is not valid according to the standard.
-	 */
-	do {
-	  if (bytes_in_buffer == 0) {
-	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	      return FALSE;
-	    next_input_byte = cinfo->src->next_input_byte;
-	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
-	  }
-	  bytes_in_buffer--;
-	  c = GETJOCTET(*next_input_byte++);
-	} while (c == 0xFF);
+        /* Loop here to discard any padding FF's on terminating marker,
+         * so that we can save a valid unread_marker value.  NOTE: we will
+         * accept multiple FF's followed by a 0 as meaning a single FF data
+         * byte.  This data pattern is not valid according to the standard.
+         */
+        do {
+          if (bytes_in_buffer == 0) {
+            if (! (*cinfo->src->fill_input_buffer) (cinfo))
+              return FALSE;
+            next_input_byte = cinfo->src->next_input_byte;
+            bytes_in_buffer = cinfo->src->bytes_in_buffer;
+          }
+          bytes_in_buffer--;
+          c = GETJOCTET(*next_input_byte++);
+        } while (c == 0xFF);
 
-	if (c == 0) {
-	  /* Found FF/00, which represents an FF data byte */
-	  c = 0xFF;
-	} else {
-	  /* Oops, it's actually a marker indicating end of compressed data.
-	   * Save the marker code for later use.
-	   * Fine point: it might appear that we should save the marker into
-	   * bitread working state, not straight into permanent state.  But
-	   * once we have hit a marker, we cannot need to suspend within the
-	   * current MCU, because we will read no more bytes from the data
-	   * source.  So it is OK to update permanent state right away.
-	   */
-	  cinfo->unread_marker = c;
-	  /* See if we need to insert some fake zero bits. */
-	  goto no_more_bytes;
-	}
+        if (c == 0) {
+          /* Found FF/00, which represents an FF data byte */
+          c = 0xFF;
+        } else {
+          /* Oops, it's actually a marker indicating end of compressed data.
+           * Save the marker code for later use.
+           * Fine point: it might appear that we should save the marker into
+           * bitread working state, not straight into permanent state.  But
+           * once we have hit a marker, we cannot need to suspend within the
+           * current MCU, because we will read no more bytes from the data
+           * source.  So it is OK to update permanent state right away.
+           */
+          cinfo->unread_marker = c;
+          /* See if we need to insert some fake zero bits. */
+          goto no_more_bytes;
+        }
       }
 
       /* OK, load c into get_buffer */
@@ -374,8 +374,8 @@
        * appears per data segment.
        */
       if (! cinfo->entropy->insufficient_data) {
-	WARNMS(cinfo, JWRN_HIT_MARKER);
-	cinfo->entropy->insufficient_data = TRUE;
+        WARNMS(cinfo, JWRN_HIT_MARKER);
+        cinfo->entropy->insufficient_data = TRUE;
       }
       /* Fill the buffer with zero bits */
       get_buffer <<= MIN_GET_BITS - bits_left;
@@ -444,8 +444,8 @@
 
 GLOBAL(int)
 jpeg_huff_decode (bitread_working_state * state,
-		  register bit_buf_type get_buffer, register int bits_left,
-		  d_derived_tbl * htbl, int min_bits)
+                  register bit_buf_type get_buffer, register int bits_left,
+                  d_derived_tbl * htbl, int min_bits)
 {
   register int l = min_bits;
   register INT32 code;
@@ -474,7 +474,7 @@
 
   if (l > 16) {
     WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
-    return 0;			/* fake a zero as the safest result */
+    return 0;                   /* fake a zero as the safest result */
   }
 
   return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
@@ -594,7 +594,7 @@
 
         r = s >> 4;
         s &= 15;
-      
+
         if (s) {
           k += r;
           CHECK_BIT_BUFFER(br_state, s, return FALSE);
@@ -683,7 +683,7 @@
         HUFF_DECODE_FAST(s, l, actbl);
         r = s >> 4;
         s &= 15;
-      
+
         if (s) {
           k += r;
           FILL_BIT_BUFFER_FAST
@@ -755,7 +755,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
     usefast = 0;
   }
 
@@ -797,7 +797,7 @@
 
   entropy = (huff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(huff_entropy_decoder));
+                                SIZEOF(huff_entropy_decoder));
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass_huff_decoder;
   entropy->pub.decode_mcu = decode_mcu;
diff --git a/jdhuff.h b/jdhuff.h
index 2201436..9ce3399 100644
--- a/jdhuff.h
+++ b/jdhuff.h
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -15,21 +15,21 @@
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_make_d_derived_tbl	jMkDDerived
-#define jpeg_fill_bit_buffer	jFilBitBuf
-#define jpeg_huff_decode	jHufDecode
+#define jpeg_make_d_derived_tbl jMkDDerived
+#define jpeg_fill_bit_buffer    jFilBitBuf
+#define jpeg_huff_decode        jHufDecode
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 
 /* Derived data constructed for each Huffman table */
 
-#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+#define HUFF_LOOKAHEAD  8       /* # of bits of lookahead */
 
 typedef struct {
   /* Basic tables: (element [0] of each array is unused) */
-  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  INT32 maxcode[18];            /* largest code of length k (-1 if none) */
   /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
-  INT32 valoffset[18];		/* huffval[] offset for codes of length k */
+  INT32 valoffset[18];          /* huffval[] offset for codes of length k */
   /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
    * the smallest code of length k; so given a code of length k, the
    * corresponding symbol is huffval[code + valoffset[k]]
@@ -53,8 +53,8 @@
 
 /* Expand a Huffman table definition into the derived format */
 EXTERN(void) jpeg_make_d_derived_tbl
-	JPP((j_decompress_ptr cinfo, boolean isDC, int tblno,
-	     d_derived_tbl ** pdtbl));
+        JPP((j_decompress_ptr cinfo, boolean isDC, int tblno,
+             d_derived_tbl ** pdtbl));
 
 
 /*
@@ -77,13 +77,13 @@
 
 #if __WORDSIZE == 64 || defined(_WIN64)
 
-typedef size_t bit_buf_type;	/* type of bit-extraction buffer */
-#define BIT_BUF_SIZE  64		/* size of buffer in bits */
+typedef size_t bit_buf_type;    /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  64                /* size of buffer in bits */
 
 #else
 
-typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
-#define BIT_BUF_SIZE  32		/* size of buffer in bits */
+typedef INT32 bit_buf_type;     /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32                /* size of buffer in bits */
 
 #endif
 
@@ -94,43 +94,43 @@
  * because not all machines measure sizeof in 8-bit bytes.
  */
 
-typedef struct {		/* Bitreading state saved across MCUs */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
+typedef struct {                /* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;      /* current bit-extraction buffer */
+  int bits_left;                /* # of unused bits in it */
 } bitread_perm_state;
 
-typedef struct {		/* Bitreading working state within an MCU */
+typedef struct {                /* Bitreading working state within an MCU */
   /* Current data source location */
   /* We need a copy, rather than munging the original, in case of suspension */
   const JOCTET * next_input_byte; /* => next byte to read from source */
-  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  size_t bytes_in_buffer;       /* # of bytes remaining in source buffer */
   /* Bit input buffer --- note these values are kept in register variables,
    * not in this struct, inside the inner loops.
    */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
+  bit_buf_type get_buffer;      /* current bit-extraction buffer */
+  int bits_left;                /* # of unused bits in it */
   /* Pointer needed by jpeg_fill_bit_buffer. */
-  j_decompress_ptr cinfo;	/* back link to decompress master record */
+  j_decompress_ptr cinfo;       /* back link to decompress master record */
 } bitread_working_state;
 
 /* Macros to declare and load/save bitread local variables. */
 #define BITREAD_STATE_VARS  \
-	register bit_buf_type get_buffer;  \
-	register int bits_left;  \
-	bitread_working_state br_state
+        register bit_buf_type get_buffer;  \
+        register int bits_left;  \
+        bitread_working_state br_state
 
 #define BITREAD_LOAD_STATE(cinfop,permstate)  \
-	br_state.cinfo = cinfop; \
-	br_state.next_input_byte = cinfop->src->next_input_byte; \
-	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
-	get_buffer = permstate.get_buffer; \
-	bits_left = permstate.bits_left;
+        br_state.cinfo = cinfop; \
+        br_state.next_input_byte = cinfop->src->next_input_byte; \
+        br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+        get_buffer = permstate.get_buffer; \
+        bits_left = permstate.bits_left;
 
 #define BITREAD_SAVE_STATE(cinfop,permstate)  \
-	cinfop->src->next_input_byte = br_state.next_input_byte; \
-	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
-	permstate.get_buffer = get_buffer; \
-	permstate.bits_left = bits_left
+        cinfop->src->next_input_byte = br_state.next_input_byte; \
+        cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+        permstate.get_buffer = get_buffer; \
+        permstate.bits_left = bits_left
 
 /*
  * These macros provide the in-line portion of bit fetching.
@@ -138,37 +138,37 @@
  * before using GET_BITS, PEEK_BITS, or DROP_BITS.
  * The variables get_buffer and bits_left are assumed to be locals,
  * but the state struct might not be (jpeg_huff_decode needs this).
- *	CHECK_BIT_BUFFER(state,n,action);
- *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      CHECK_BIT_BUFFER(state,n,action);
+ *              Ensure there are N bits in get_buffer; if suspend, take action.
  *      val = GET_BITS(n);
- *		Fetch next N bits.
+ *              Fetch next N bits.
  *      val = PEEK_BITS(n);
- *		Fetch next N bits without removing them from the buffer.
- *	DROP_BITS(n);
- *		Discard next N bits.
+ *              Fetch next N bits without removing them from the buffer.
+ *      DROP_BITS(n);
+ *              Discard next N bits.
  * The value N should be a simple variable, not an expression, because it
  * is evaluated multiple times.
  */
 
 #define CHECK_BIT_BUFFER(state,nbits,action) \
-	{ if (bits_left < (nbits)) {  \
-	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
-	      { action; }  \
-	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+        { if (bits_left < (nbits)) {  \
+            if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+              { action; }  \
+            get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
 
 #define GET_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1))
+        (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1))
 
 #define PEEK_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -  (nbits)))) & ((1<<(nbits))-1))
+        (((int) (get_buffer >> (bits_left -  (nbits)))) & ((1<<(nbits))-1))
 
 #define DROP_BITS(nbits) \
-	(bits_left -= (nbits))
+        (bits_left -= (nbits))
 
 /* Load up the bit buffer to a depth of at least nbits */
 EXTERN(boolean) jpeg_fill_bit_buffer
-	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
-	     register int bits_left, int nbits));
+        JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+             register int bits_left, int nbits));
 
 
 /*
@@ -204,7 +204,7 @@
   } else { \
 slowlabel: \
     if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
-	{ failaction; } \
+        { failaction; } \
     get_buffer = state.get_buffer; bits_left = state.bits_left; \
   } \
 }
@@ -231,5 +231,5 @@
 
 /* Out-of-line case for Huffman code fetching */
 EXTERN(int) jpeg_huff_decode
-	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
-	     register int bits_left, d_derived_tbl * htbl, int min_bits));
+        JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+             register int bits_left, d_derived_tbl * htbl, int min_bits));
diff --git a/jdinput.c b/jdinput.c
index aa17d25..4afb074 100644
--- a/jdinput.c
+++ b/jdinput.c
@@ -3,8 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2009 by Guido Vollbeding.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -25,7 +24,7 @@
 typedef struct {
   struct jpeg_input_controller pub; /* public fields */
 
-  boolean inheaders;		/* TRUE until first SOS is reached */
+  boolean inheaders;            /* TRUE until first SOS is reached */
 } my_input_controller;
 
 typedef my_input_controller * my_inputctl_ptr;
@@ -39,79 +38,6 @@
  * Routines to calculate various quantities related to the size of the image.
  */
 
-
-#if JPEG_LIB_VERSION >= 80
-/*
- * Compute output image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_core_output_dimensions (j_decompress_ptr cinfo)
-/* Do computations that are needed before master selection phase.
- * This function is used for transcoding and full decompression.
- */
-{
-#ifdef IDCT_SCALING_SUPPORTED
-  int ci;
-  jpeg_component_info *compptr;
-
-  /* Compute actual output image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
-    /* Provide 1/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
-    /* Provide 2/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
-    /* Provide 4/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
-    /* Provide 8/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  }
-  /* Recompute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
-  }
-
-#else /* !IDCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->output_width = cinfo->image_width;
-  cinfo->output_height = cinfo->image_height;
-  /* jdinput.c has already initialized DCT_scaled_size,
-   * and has computed unscaled downsampled_width and downsampled_height.
-   */
-
-#endif /* IDCT_SCALING_SUPPORTED */
-}
-#endif
-
-
 LOCAL(void)
 initial_setup (j_decompress_ptr cinfo)
 /* Called once, when first SOS marker is reached */
@@ -131,7 +57,7 @@
   /* Check that number of components won't exceed internal array sizes */
   if (cinfo->num_components > MAX_COMPONENTS)
     ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
+             MAX_COMPONENTS);
 
   /* Compute maximum sampling factors; check factor validity */
   cinfo->max_h_samp_factor = 1;
@@ -139,12 +65,12 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+        compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
       ERREXIT(cinfo, JERR_BAD_SAMPLING);
     cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
+                                   compptr->h_samp_factor);
     cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
+                                   compptr->v_samp_factor);
   }
 
 #if JPEG_LIB_VERSION >=80
@@ -174,10 +100,10 @@
     /* Size in DCT blocks */
     compptr->width_in_blocks = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+                    (long) (cinfo->max_h_samp_factor * DCTSIZE));
     compptr->height_in_blocks = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+                    (long) (cinfo->max_v_samp_factor * DCTSIZE));
     /* downsampled_width and downsampled_height will also be overridden by
      * jdmaster.c if we are doing full decompression.  The transcoder library
      * doesn't use these values, but the calling application might.
@@ -185,10 +111,10 @@
     /* Size in samples */
     compptr->downsampled_width = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) cinfo->max_h_samp_factor);
+                    (long) cinfo->max_h_samp_factor);
     compptr->downsampled_height = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) cinfo->max_v_samp_factor);
+                    (long) cinfo->max_v_samp_factor);
     /* Mark component needed, until color conversion says otherwise */
     compptr->component_needed = TRUE;
     /* Mark no quantization table yet saved for component */
@@ -198,7 +124,7 @@
   /* Compute number of fully interleaved MCU rows. */
   cinfo->total_iMCU_rows = (JDIMENSION)
     jdiv_round_up((long) cinfo->image_height,
-		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+                  (long) (cinfo->max_v_samp_factor*DCTSIZE));
 
   /* Decide whether file contains multiple scans */
   if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
@@ -215,16 +141,16 @@
 {
   int ci, mcublks, tmp;
   jpeg_component_info *compptr;
-  
+
   if (cinfo->comps_in_scan == 1) {
-    
+
     /* Noninterleaved (single-component) scan */
     compptr = cinfo->cur_comp_info[0];
-    
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = compptr->width_in_blocks;
     cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-    
+
     /* For noninterleaved scan, always one block per MCU */
     compptr->MCU_width = 1;
     compptr->MCU_height = 1;
@@ -237,28 +163,28 @@
     tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
     if (tmp == 0) tmp = compptr->v_samp_factor;
     compptr->last_row_height = tmp;
-    
+
     /* Prepare array describing MCU composition */
     cinfo->blocks_in_MCU = 1;
     cinfo->MCU_membership[0] = 0;
-    
+
   } else {
-    
+
     /* Interleaved (multi-component) scan */
     if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-    
+               MAX_COMPS_IN_SCAN);
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width,
-		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+                    (long) (cinfo->max_h_samp_factor*DCTSIZE));
     cinfo->MCU_rows_in_scan = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height,
-		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
-    
+                    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+
     cinfo->blocks_in_MCU = 0;
-    
+
     for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
       compptr = cinfo->cur_comp_info[ci];
       /* Sampling factors give # of blocks of component in each MCU */
@@ -276,12 +202,12 @@
       /* Prepare array describing MCU composition */
       mcublks = compptr->MCU_blocks;
       if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+        ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
       while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+        cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
       }
     }
-    
+
   }
 }
 
@@ -322,12 +248,12 @@
     /* Make sure specified quantization table is present */
     qtblno = compptr->quant_tbl_no;
     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+        cinfo->quant_tbl_ptrs[qtblno] == NULL)
       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
     /* OK, save away the quantization table */
     qtbl = (JQUANT_TBL *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(JQUANT_TBL));
+                                  SIZEOF(JQUANT_TBL));
     MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
     compptr->quant_table = qtbl;
   }
@@ -387,31 +313,31 @@
   val = (*cinfo->marker->read_markers) (cinfo);
 
   switch (val) {
-  case JPEG_REACHED_SOS:	/* Found SOS */
-    if (inputctl->inheaders) {	/* 1st SOS */
+  case JPEG_REACHED_SOS:        /* Found SOS */
+    if (inputctl->inheaders) {  /* 1st SOS */
       initial_setup(cinfo);
       inputctl->inheaders = FALSE;
       /* Note: start_input_pass must be called by jdmaster.c
        * before any more input can be consumed.  jdapimin.c is
        * responsible for enforcing this sequencing.
        */
-    } else {			/* 2nd or later SOS marker */
+    } else {                    /* 2nd or later SOS marker */
       if (! inputctl->pub.has_multiple_scans)
-	ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+        ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
       start_input_pass(cinfo);
     }
     break;
-  case JPEG_REACHED_EOI:	/* Found EOI */
+  case JPEG_REACHED_EOI:        /* Found EOI */
     inputctl->pub.eoi_reached = TRUE;
-    if (inputctl->inheaders) {	/* Tables-only datastream, apparently */
+    if (inputctl->inheaders) {  /* Tables-only datastream, apparently */
       if (cinfo->marker->saw_SOF)
-	ERREXIT(cinfo, JERR_SOF_NO_SOS);
+        ERREXIT(cinfo, JERR_SOF_NO_SOS);
     } else {
       /* Prevent infinite loop in coef ctlr's decompress_data routine
        * if user set output_scan_number larger than number of scans.
        */
       if (cinfo->output_scan_number > cinfo->input_scan_number)
-	cinfo->output_scan_number = cinfo->input_scan_number;
+        cinfo->output_scan_number = cinfo->input_scan_number;
     }
     break;
   case JPEG_SUSPENDED:
@@ -456,7 +382,7 @@
   /* Create subobject in permanent pool */
   inputctl = (my_inputctl_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				SIZEOF(my_input_controller));
+                                SIZEOF(my_input_controller));
   cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
   /* Initialize method pointers */
   inputctl->pub.consume_input = consume_markers;
diff --git a/jdmainct.c b/jdmainct.c
index 2a69c53..922f649 100644
--- a/jdmainct.c
+++ b/jdmainct.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -120,39 +120,39 @@
   /* Pointer to allocated workspace (M or M+2 row groups). */
   JSAMPARRAY buffer[MAX_COMPONENTS];
 
-  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
-  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+  boolean buffer_full;          /* Have we gotten an iMCU row from decoder? */
+  JDIMENSION rowgroup_ctr;      /* counts row groups output to postprocessor */
 
   /* Remaining fields are only used in the context case. */
 
   /* These are the master pointers to the funny-order pointer lists. */
-  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+  JSAMPIMAGE xbuffer[2];        /* pointers to weird pointer lists */
 
-  int whichptr;			/* indicates which pointer set is now in use */
-  int context_state;		/* process_data state machine status */
-  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
-  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+  int whichptr;                 /* indicates which pointer set is now in use */
+  int context_state;            /* process_data state machine status */
+  JDIMENSION rowgroups_avail;   /* row groups available to postprocessor */
+  JDIMENSION iMCU_row_ctr;      /* counts iMCU rows to detect image top/bot */
 } my_main_controller;
 
 typedef my_main_controller * my_main_ptr;
 
 /* context_state values: */
-#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
-#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
-#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+#define CTX_PREPARE_FOR_IMCU    0       /* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU        1       /* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW       2       /* feeding postponed row group */
 
 
 /* Forward declarations */
 METHODDEF(void) process_data_simple_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+        JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+             JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
 METHODDEF(void) process_data_context_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+        JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+             JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
 #ifdef QUANT_2PASS_SUPPORTED
 METHODDEF(void) process_data_crank_post
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+        JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+             JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
 #endif
 
 
@@ -173,7 +173,7 @@
    */
   main_ptr->xbuffer[0] = (JSAMPIMAGE)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
+                                cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
   main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
@@ -185,8 +185,8 @@
      */
     xbuf = (JSAMPARRAY)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
-    xbuf += rgroup;		/* want one row group at negative offsets */
+                                  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
+    xbuf += rgroup;             /* want one row group at negative offsets */
     main_ptr->xbuffer[0][ci] = xbuf;
     xbuf += rgroup * (M + 4);
     main_ptr->xbuffer[1][ci] = xbuf;
@@ -316,14 +316,14 @@
     if (cinfo->upsample->need_context_rows) {
       main_ptr->pub.process_data = process_data_context_main;
       make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
-      main_ptr->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      main_ptr->whichptr = 0;   /* Read first iMCU row into xbuffer[0] */
       main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
       main_ptr->iMCU_row_ctr = 0;
     } else {
       /* Simple case with no context needed */
       main_ptr->pub.process_data = process_data_simple_main;
     }
-    main_ptr->buffer_full = FALSE;	/* Mark buffer empty */
+    main_ptr->buffer_full = FALSE;      /* Mark buffer empty */
     main_ptr->rowgroup_ctr = 0;
     break;
 #ifdef QUANT_2PASS_SUPPORTED
@@ -346,8 +346,8 @@
 
 METHODDEF(void)
 process_data_simple_main (j_decompress_ptr cinfo,
-			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			  JDIMENSION out_rows_avail)
+                          JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                          JDIMENSION out_rows_avail)
 {
   my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   JDIMENSION rowgroups_avail;
@@ -355,8 +355,8 @@
   /* Read input data if we haven't filled the main buffer yet */
   if (! main_ptr->buffer_full) {
     if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer))
-      return;			/* suspension forced, can do nothing more */
-    main_ptr->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+      return;                   /* suspension forced, can do nothing more */
+    main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
   }
 
   /* There are always min_DCT_scaled_size row groups in an iMCU row. */
@@ -368,8 +368,8 @@
 
   /* Feed the postprocessor */
   (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer,
-				     &main_ptr->rowgroup_ctr, rowgroups_avail,
-				     output_buf, out_row_ctr, out_rows_avail);
+                                     &main_ptr->rowgroup_ctr, rowgroups_avail,
+                                     output_buf, out_row_ctr, out_rows_avail);
 
   /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
   if (main_ptr->rowgroup_ctr >= rowgroups_avail) {
@@ -386,18 +386,18 @@
 
 METHODDEF(void)
 process_data_context_main (j_decompress_ptr cinfo,
-			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail)
+                           JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                           JDIMENSION out_rows_avail)
 {
   my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
 
   /* Read input data if we haven't filled the main buffer yet */
   if (! main_ptr->buffer_full) {
     if (! (*cinfo->coef->decompress_data) (cinfo,
-					   main_ptr->xbuffer[main_ptr->whichptr]))
-      return;			/* suspension forced, can do nothing more */
-    main_ptr->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
-    main_ptr->iMCU_row_ctr++;	/* count rows received */
+                                           main_ptr->xbuffer[main_ptr->whichptr]))
+      return;                   /* suspension forced, can do nothing more */
+    main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
+    main_ptr->iMCU_row_ctr++;   /* count rows received */
   }
 
   /* Postprocessor typically will not swallow all the input data it is handed
@@ -409,13 +409,13 @@
   case CTX_POSTPONED_ROW:
     /* Call postprocessor using previously set pointers for postponed row */
     (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
-			&main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
+                        &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
+                        output_buf, out_row_ctr, out_rows_avail);
     if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
-      return;			/* Need to suspend */
+      return;                   /* Need to suspend */
     main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
     if (*out_row_ctr >= out_rows_avail)
-      return;			/* Postprocessor exactly filled output buf */
+      return;                   /* Postprocessor exactly filled output buf */
     /*FALLTHROUGH*/
   case CTX_PREPARE_FOR_IMCU:
     /* Prepare to process first M-1 row groups of this iMCU row */
@@ -431,15 +431,15 @@
   case CTX_PROCESS_IMCU:
     /* Call postprocessor using previously set pointers */
     (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
-			&main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
+                        &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
+                        output_buf, out_row_ctr, out_rows_avail);
     if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
-      return;			/* Need to suspend */
+      return;                   /* Need to suspend */
     /* After the first iMCU, change wraparound pointers to normal state */
     if (main_ptr->iMCU_row_ctr == 1)
       set_wraparound_pointers(cinfo);
     /* Prepare to load new iMCU row using other xbuffer list */
-    main_ptr->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    main_ptr->whichptr ^= 1;    /* 0=>1 or 1=>0 */
     main_ptr->buffer_full = FALSE;
     /* Still need to process last row group of this iMCU row, */
     /* which is saved at index M+1 of the other xbuffer */
@@ -460,12 +460,12 @@
 
 METHODDEF(void)
 process_data_crank_post (j_decompress_ptr cinfo,
-			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			 JDIMENSION out_rows_avail)
+                         JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                         JDIMENSION out_rows_avail)
 {
   (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
-				     (JDIMENSION *) NULL, (JDIMENSION) 0,
-				     output_buf, out_row_ctr, out_rows_avail);
+                                     (JDIMENSION *) NULL, (JDIMENSION) 0,
+                                     output_buf, out_row_ctr, out_rows_avail);
 }
 
 #endif /* QUANT_2PASS_SUPPORTED */
@@ -484,11 +484,11 @@
 
   main_ptr = (my_main_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
+                                SIZEOF(my_main_controller));
   cinfo->main = (struct jpeg_d_main_controller *) main_ptr;
   main_ptr->pub.start_pass = start_pass_main;
 
-  if (need_full_buffer)		/* shouldn't happen */
+  if (need_full_buffer)         /* shouldn't happen */
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 
   /* Allocate the workspace.
@@ -508,8 +508,8 @@
     rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
       cinfo->_min_DCT_scaled_size; /* height of a row group of component */
     main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 compptr->width_in_blocks * compptr->_DCT_scaled_size,
-			 (JDIMENSION) (rgroup * ngroups));
+                        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                         compptr->width_in_blocks * compptr->_DCT_scaled_size,
+                         (JDIMENSION) (rgroup * ngroups));
   }
 }
diff --git a/jdmarker.c b/jdmarker.c
index 914e298..d996f6b 100644
--- a/jdmarker.c
+++ b/jdmarker.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2012, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -19,29 +19,29 @@
 #include "jpeglib.h"
 
 
-typedef enum {			/* JPEG marker codes */
+typedef enum {                  /* JPEG marker codes */
   M_SOF0  = 0xc0,
   M_SOF1  = 0xc1,
   M_SOF2  = 0xc2,
   M_SOF3  = 0xc3,
-  
+
   M_SOF5  = 0xc5,
   M_SOF6  = 0xc6,
   M_SOF7  = 0xc7,
-  
+
   M_JPG   = 0xc8,
   M_SOF9  = 0xc9,
   M_SOF10 = 0xca,
   M_SOF11 = 0xcb,
-  
+
   M_SOF13 = 0xcd,
   M_SOF14 = 0xce,
   M_SOF15 = 0xcf,
-  
+
   M_DHT   = 0xc4,
-  
+
   M_DAC   = 0xcc,
-  
+
   M_RST0  = 0xd0,
   M_RST1  = 0xd1,
   M_RST2  = 0xd2,
@@ -50,7 +50,7 @@
   M_RST5  = 0xd5,
   M_RST6  = 0xd6,
   M_RST7  = 0xd7,
-  
+
   M_SOI   = 0xd8,
   M_EOI   = 0xd9,
   M_SOS   = 0xda,
@@ -59,7 +59,7 @@
   M_DRI   = 0xdd,
   M_DHP   = 0xde,
   M_EXP   = 0xdf,
-  
+
   M_APP0  = 0xe0,
   M_APP1  = 0xe1,
   M_APP2  = 0xe2,
@@ -76,13 +76,13 @@
   M_APP13 = 0xed,
   M_APP14 = 0xee,
   M_APP15 = 0xef,
-  
+
   M_JPG0  = 0xf0,
   M_JPG13 = 0xfd,
   M_COM   = 0xfe,
-  
+
   M_TEM   = 0x01,
-  
+
   M_ERROR = 0x100
 } JPEG_MARKER;
 
@@ -101,8 +101,8 @@
   unsigned int length_limit_APPn[16];
 
   /* Status of COM/APPn marker saving */
-  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
-  unsigned int bytes_read;		/* data bytes read so far in marker */
+  jpeg_saved_marker_ptr cur_marker;     /* NULL if not processing a marker */
+  unsigned int bytes_read;              /* data bytes read so far in marker */
   /* Note: cur_marker is not linked into marker_list until it's all read. */
 } my_marker_reader;
 
@@ -119,49 +119,49 @@
 
 /* Declare and initialize local copies of input pointer/count */
 #define INPUT_VARS(cinfo)  \
-	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
-	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
-	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+        struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+        const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+        size_t bytes_in_buffer = datasrc->bytes_in_buffer
 
 /* Unload the local copies --- do this only at a restart boundary */
 #define INPUT_SYNC(cinfo)  \
-	( datasrc->next_input_byte = next_input_byte,  \
-	  datasrc->bytes_in_buffer = bytes_in_buffer )
+        ( datasrc->next_input_byte = next_input_byte,  \
+          datasrc->bytes_in_buffer = bytes_in_buffer )
 
 /* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
 #define INPUT_RELOAD(cinfo)  \
-	( next_input_byte = datasrc->next_input_byte,  \
-	  bytes_in_buffer = datasrc->bytes_in_buffer )
+        ( next_input_byte = datasrc->next_input_byte,  \
+          bytes_in_buffer = datasrc->bytes_in_buffer )
 
 /* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
  * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
  * but we must reload the local copies after a successful fill.
  */
 #define MAKE_BYTE_AVAIL(cinfo,action)  \
-	if (bytes_in_buffer == 0) {  \
-	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
-	    { action; }  \
-	  INPUT_RELOAD(cinfo);  \
-	}
+        if (bytes_in_buffer == 0) {  \
+          if (! (*datasrc->fill_input_buffer) (cinfo))  \
+            { action; }  \
+          INPUT_RELOAD(cinfo);  \
+        }
 
 /* Read a byte into variable V.
  * If must suspend, take the specified action (typically "return FALSE").
  */
 #define INPUT_BYTE(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = GETJOCTET(*next_input_byte++); )
+        MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+                  bytes_in_buffer--; \
+                  V = GETJOCTET(*next_input_byte++); )
 
 /* As above, but read two bytes interpreted as an unsigned 16-bit integer.
  * V should be declared unsigned int or perhaps INT32.
  */
 #define INPUT_2BYTES(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
-		  MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V += GETJOCTET(*next_input_byte++); )
+        MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+                  bytes_in_buffer--; \
+                  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+                  MAKE_BYTE_AVAIL(cinfo,action); \
+                  bytes_in_buffer--; \
+                  V += GETJOCTET(*next_input_byte++); )
 
 
 /*
@@ -200,7 +200,7 @@
 /* Process an SOI marker */
 {
   int i;
-  
+
   TRACEMS(cinfo, 1, JTRC_SOI);
 
   if (cinfo->marker->saw_SOI)
@@ -257,8 +257,8 @@
   length -= 8;
 
   TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
-	   (int) cinfo->image_width, (int) cinfo->image_height,
-	   cinfo->num_components);
+           (int) cinfo->image_width, (int) cinfo->image_height,
+           cinfo->num_components);
 
   if (cinfo->marker->saw_SOF)
     ERREXIT(cinfo, JERR_SOF_DUPLICATE);
@@ -273,11 +273,11 @@
   if (length != (cinfo->num_components * 3))
     ERREXIT(cinfo, JERR_BAD_LENGTH);
 
-  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+  if (cinfo->comp_info == NULL) /* do only once, even if suspend */
     cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 cinfo->num_components * SIZEOF(jpeg_component_info));
-  
+                        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                         cinfo->num_components * SIZEOF(jpeg_component_info));
+
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     compptr->component_index = ci;
@@ -288,8 +288,8 @@
     INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
 
     TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
-	     compptr->component_id, compptr->h_samp_factor,
-	     compptr->v_samp_factor, compptr->quant_tbl_no);
+             compptr->component_id, compptr->h_samp_factor,
+             compptr->v_samp_factor, compptr->quant_tbl_no);
   }
 
   cinfo->marker->saw_SOF = TRUE;
@@ -330,12 +330,12 @@
   for (i = 0; i < n; i++) {
     INPUT_BYTE(cinfo, cc, return FALSE);
     INPUT_BYTE(cinfo, c, return FALSE);
-    
+
     for (ci = 0, compptr = cinfo->comp_info;
-	 ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN;
-	 ci++, compptr++) {
+         ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN;
+         ci++, compptr++) {
       if (cc == compptr->component_id && !cinfo->cur_comp_info[ci])
-	goto id_found;
+        goto id_found;
     }
 
     ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
@@ -345,9 +345,9 @@
     cinfo->cur_comp_info[i] = compptr;
     compptr->dc_tbl_no = (c >> 4) & 15;
     compptr->ac_tbl_no = (c     ) & 15;
-    
+
     TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
-	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+             compptr->dc_tbl_no, compptr->ac_tbl_no);
 
     /* This CSi (cc) should differ from the previous CSi */
     for (pi = 0; pi < i; pi++) {
@@ -367,7 +367,7 @@
   cinfo->Al = (c     ) & 15;
 
   TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
-	   cinfo->Ah, cinfo->Al);
+           cinfo->Ah, cinfo->Al);
 
   /* Prepare to scan data & restart markers */
   cinfo->marker->next_restart_num = 0;
@@ -392,7 +392,7 @@
 
   INPUT_2BYTES(cinfo, length, return FALSE);
   length -= 2;
-  
+
   while (length > 0) {
     INPUT_BYTE(cinfo, index, return FALSE);
     INPUT_BYTE(cinfo, val, return FALSE);
@@ -406,11 +406,11 @@
 
     if (index >= NUM_ARITH_TBLS) { /* define AC table */
       cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
-    } else {			/* define DC table */
+    } else {                    /* define DC table */
       cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
       cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
       if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
-	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+        ERREXIT1(cinfo, JERR_DAC_VALUE, val);
     }
   }
 
@@ -441,12 +441,12 @@
 
   INPUT_2BYTES(cinfo, length, return FALSE);
   length -= 2;
-  
+
   while (length > 16) {
     INPUT_BYTE(cinfo, index, return FALSE);
 
     TRACEMS1(cinfo, 1, JTRC_DHT, index);
-      
+
     bits[0] = 0;
     count = 0;
     for (i = 1; i <= 16; i++) {
@@ -457,11 +457,11 @@
     length -= 1 + 16;
 
     TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[1], bits[2], bits[3], bits[4],
-	     bits[5], bits[6], bits[7], bits[8]);
+             bits[1], bits[2], bits[3], bits[4],
+             bits[5], bits[6], bits[7], bits[8]);
     TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[9], bits[10], bits[11], bits[12],
-	     bits[13], bits[14], bits[15], bits[16]);
+             bits[9], bits[10], bits[11], bits[12],
+             bits[13], bits[14], bits[15], bits[16]);
 
     /* Here we just do minimal validation of the counts to avoid walking
      * off the end of our table space.  jdhuff.c will check more carefully.
@@ -476,19 +476,20 @@
 
     length -= count;
 
-    if (index & 0x10) {		/* AC table definition */
+    if (index & 0x10) {         /* AC table definition */
       index -= 0x10;
+      if (index < 0 || index >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_DHT_INDEX, index);
       htblptr = &cinfo->ac_huff_tbl_ptrs[index];
-    } else {			/* DC table definition */
+    } else {                    /* DC table definition */
+      if (index < 0 || index >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_DHT_INDEX, index);
       htblptr = &cinfo->dc_huff_tbl_ptrs[index];
     }
 
-    if (index < 0 || index >= NUM_HUFF_TBLS)
-      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
-
     if (*htblptr == NULL)
       *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-  
+
     MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
     MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
   }
@@ -523,27 +524,27 @@
 
     if (n >= NUM_QUANT_TBLS)
       ERREXIT1(cinfo, JERR_DQT_INDEX, n);
-      
+
     if (cinfo->quant_tbl_ptrs[n] == NULL)
       cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
     quant_ptr = cinfo->quant_tbl_ptrs[n];
 
     for (i = 0; i < DCTSIZE2; i++) {
       if (prec)
-	INPUT_2BYTES(cinfo, tmp, return FALSE);
+        INPUT_2BYTES(cinfo, tmp, return FALSE);
       else
-	INPUT_BYTE(cinfo, tmp, return FALSE);
+        INPUT_BYTE(cinfo, tmp, return FALSE);
       /* We convert the zigzag-order table to natural array order. */
       quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp;
     }
 
     if (cinfo->err->trace_level >= 2) {
       for (i = 0; i < DCTSIZE2; i += 8) {
-	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
-		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
-		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
-		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
-		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+        TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+                 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+                 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+                 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+                 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
       }
     }
 
@@ -568,7 +569,7 @@
   INPUT_VARS(cinfo);
 
   INPUT_2BYTES(cinfo, length, return FALSE);
-  
+
   if (length != 4)
     ERREXIT(cinfo, JERR_BAD_LENGTH);
 
@@ -590,14 +591,14 @@
  * JFIF and Adobe markers, respectively.
  */
 
-#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
-#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
-#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+#define APP0_DATA_LEN   14      /* Length of interesting data in APP0 */
+#define APP14_DATA_LEN  12      /* Length of interesting data in APP14 */
+#define APPN_DATA_LEN   14      /* Must be the largest of the above!! */
 
 
 LOCAL(void)
 examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	      unsigned int datalen, INT32 remaining)
+              unsigned int datalen, INT32 remaining)
 /* Examine first few bytes from an APP0.
  * Take appropriate action if it is a JFIF marker.
  * datalen is # of bytes at data[], remaining is length of rest of marker data.
@@ -626,18 +627,18 @@
      */
     if (cinfo->JFIF_major_version != 1)
       WARNMS2(cinfo, JWRN_JFIF_MAJOR,
-	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+              cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
     /* Generate trace messages */
     TRACEMS5(cinfo, 1, JTRC_JFIF,
-	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
-	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+             cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+             cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
     /* Validate thumbnail dimensions and issue appropriate messages */
     if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
       TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
-	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+               GETJOCTET(data[12]), GETJOCTET(data[13]));
     totallen -= APP0_DATA_LEN;
     if (totallen !=
-	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+        ((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
       TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
   } else if (datalen >= 6 &&
       GETJOCTET(data[0]) == 0x4A &&
@@ -661,7 +662,7 @@
       break;
     default:
       TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
-	       GETJOCTET(data[5]), (int) totallen);
+               GETJOCTET(data[5]), (int) totallen);
       break;
     }
   } else {
@@ -673,7 +674,7 @@
 
 LOCAL(void)
 examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	       unsigned int datalen, INT32 remaining)
+               unsigned int datalen, INT32 remaining)
 /* Examine first few bytes from an APP14.
  * Take appropriate action if it is an Adobe marker.
  * datalen is # of bytes at data[], remaining is length of rest of marker data.
@@ -765,19 +766,19 @@
     /* begin reading a marker */
     INPUT_2BYTES(cinfo, length, return FALSE);
     length -= 2;
-    if (length >= 0) {		/* watch out for bogus length word */
+    if (length >= 0) {          /* watch out for bogus length word */
       /* figure out how much we want to save */
       unsigned int limit;
       if (cinfo->unread_marker == (int) M_COM)
-	limit = marker->length_limit_COM;
+        limit = marker->length_limit_COM;
       else
-	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+        limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
       if ((unsigned int) length < limit)
-	limit = (unsigned int) length;
+        limit = (unsigned int) length;
       /* allocate and initialize the marker item */
       cur_marker = (jpeg_saved_marker_ptr)
-	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				    SIZEOF(struct jpeg_marker_struct) + limit);
+        (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                    SIZEOF(struct jpeg_marker_struct) + limit);
       cur_marker->next = NULL;
       cur_marker->marker = (UINT8) cinfo->unread_marker;
       cur_marker->original_length = (unsigned int) length;
@@ -801,7 +802,7 @@
   }
 
   while (bytes_read < data_length) {
-    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    INPUT_SYNC(cinfo);          /* move the restart point to here */
     marker->bytes_read = bytes_read;
     /* If there's not at least one byte in buffer, suspend */
     MAKE_BYTE_AVAIL(cinfo, return FALSE);
@@ -814,14 +815,14 @@
   }
 
   /* Done reading what we want to read */
-  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+  if (cur_marker != NULL) {     /* will be NULL if bogus length word */
     /* Add new marker to end of list */
     if (cinfo->marker_list == NULL) {
       cinfo->marker_list = cur_marker;
     } else {
       jpeg_saved_marker_ptr prev = cinfo->marker_list;
       while (prev->next != NULL)
-	prev = prev->next;
+        prev = prev->next;
       prev->next = cur_marker;
     }
     /* Reset pointer & calc remaining data length */
@@ -841,12 +842,12 @@
     break;
   default:
     TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
-	     (int) (data_length + length));
+             (int) (data_length + length));
     break;
   }
 
   /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  INPUT_SYNC(cinfo);            /* do before skip_input_data */
   if (length > 0)
     (*cinfo->src->skip_input_data) (cinfo, (long) length);
 
@@ -865,10 +866,10 @@
 
   INPUT_2BYTES(cinfo, length, return FALSE);
   length -= 2;
-  
+
   TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
 
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  INPUT_SYNC(cinfo);            /* do before skip_input_data */
   if (length > 0)
     (*cinfo->src->skip_input_data) (cinfo, (long) length);
 
@@ -912,7 +913,7 @@
       INPUT_BYTE(cinfo, c, return FALSE);
     } while (c == 0xFF);
     if (c != 0)
-      break;			/* found a valid marker, exit loop */
+      break;                    /* found a valid marker, exit loop */
     /* Reach here if we found a stuffed-zero data sequence (FF/00).
      * Discard it and loop back to try again.
      */
@@ -972,11 +973,11 @@
     /* NB: first_marker() enforces the requirement that SOI appear first. */
     if (cinfo->unread_marker == 0) {
       if (! cinfo->marker->saw_SOI) {
-	if (! first_marker(cinfo))
-	  return JPEG_SUSPENDED;
+        if (! first_marker(cinfo))
+          return JPEG_SUSPENDED;
       } else {
-	if (! next_marker(cinfo))
-	  return JPEG_SUSPENDED;
+        if (! next_marker(cinfo))
+          return JPEG_SUSPENDED;
       }
     }
     /* At this point cinfo->unread_marker contains the marker code and the
@@ -986,74 +987,74 @@
     switch (cinfo->unread_marker) {
     case M_SOI:
       if (! get_soi(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF0:		/* Baseline */
-    case M_SOF1:		/* Extended sequential, Huffman */
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
       if (! get_sof(cinfo, FALSE, FALSE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF2:		/* Progressive, Huffman */
+    case M_SOF2:                /* Progressive, Huffman */
       if (! get_sof(cinfo, TRUE, FALSE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF9:		/* Extended sequential, arithmetic */
+    case M_SOF9:                /* Extended sequential, arithmetic */
       if (! get_sof(cinfo, FALSE, TRUE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF10:		/* Progressive, arithmetic */
+    case M_SOF10:               /* Progressive, arithmetic */
       if (! get_sof(cinfo, TRUE, TRUE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
     /* Currently unsupported SOFn types */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_JPG:			/* Reserved for JPEG extensions */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
+    case M_SOF3:                /* Lossless, Huffman */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_JPG:                 /* Reserved for JPEG extensions */
+    case M_SOF11:               /* Lossless, arithmetic */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
       ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
       break;
 
     case M_SOS:
       if (! get_sos(cinfo))
-	return JPEG_SUSPENDED;
-      cinfo->unread_marker = 0;	/* processed the marker */
+        return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0; /* processed the marker */
       return JPEG_REACHED_SOS;
-    
+
     case M_EOI:
       TRACEMS(cinfo, 1, JTRC_EOI);
-      cinfo->unread_marker = 0;	/* processed the marker */
+      cinfo->unread_marker = 0; /* processed the marker */
       return JPEG_REACHED_EOI;
-      
+
     case M_DAC:
       if (! get_dac(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_DHT:
       if (! get_dht(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_DQT:
       if (! get_dqt(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_DRI:
       if (! get_dri(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_APP0:
     case M_APP1:
     case M_APP2:
@@ -1071,16 +1072,16 @@
     case M_APP14:
     case M_APP15:
       if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
-		cinfo->unread_marker - (int) M_APP0]) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-      
-    case M_COM:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
-	return JPEG_SUSPENDED;
+                cinfo->unread_marker - (int) M_APP0]) (cinfo))
+        return JPEG_SUSPENDED;
       break;
 
-    case M_RST0:		/* these are all parameterless */
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:                /* these are all parameterless */
     case M_RST1:
     case M_RST2:
     case M_RST3:
@@ -1092,12 +1093,12 @@
       TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
       break;
 
-    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+    case M_DNL:                 /* Ignore DNL ... perhaps the wrong thing */
       if (! skip_variable(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    default:			/* must be DHP, EXP, JPGn, or RESn */
+    default:                    /* must be DHP, EXP, JPGn, or RESn */
       /* For now, we treat the reserved markers as fatal errors since they are
        * likely to be used to signal incompatible JPEG Part 3 extensions.
        * Once the JPEG 3 version-number marker is well defined, this code
@@ -1143,7 +1144,7 @@
     /* Uh-oh, the restart markers have been messed up. */
     /* Let the data source manager determine how to resync. */
     if (! (*cinfo->src->resync_to_restart) (cinfo,
-					    cinfo->marker->next_restart_num))
+                                            cinfo->marker->next_restart_num))
       return FALSE;
   }
 
@@ -1208,25 +1209,25 @@
 {
   int marker = cinfo->unread_marker;
   int action = 1;
-  
+
   /* Always put up a warning. */
   WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
-  
+
   /* Outer loop handles repeated decision after scanning forward. */
   for (;;) {
     if (marker < (int) M_SOF0)
-      action = 2;		/* invalid marker */
+      action = 2;               /* invalid marker */
     else if (marker < (int) M_RST0 || marker > (int) M_RST7)
-      action = 3;		/* valid non-restart marker */
+      action = 3;               /* valid non-restart marker */
     else {
       if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
-	  marker == ((int) M_RST0 + ((desired+2) & 7)))
-	action = 3;		/* one of the next two expected restarts */
+          marker == ((int) M_RST0 + ((desired+2) & 7)))
+        action = 3;             /* one of the next two expected restarts */
       else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
-	       marker == ((int) M_RST0 + ((desired-2) & 7)))
-	action = 2;		/* a prior restart, so advance */
+               marker == ((int) M_RST0 + ((desired-2) & 7)))
+        action = 2;             /* a prior restart, so advance */
       else
-	action = 1;		/* desired restart or too far away */
+        action = 1;             /* desired restart or too far away */
     }
     TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
     switch (action) {
@@ -1237,7 +1238,7 @@
     case 2:
       /* Scan to the next marker, and repeat the decision loop. */
       if (! next_marker(cinfo))
-	return FALSE;
+        return FALSE;
       marker = cinfo->unread_marker;
       break;
     case 3:
@@ -1258,10 +1259,10 @@
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
 
-  cinfo->comp_info = NULL;		/* until allocated by get_sof */
-  cinfo->input_scan_number = 0;		/* no SOS seen yet */
-  cinfo->unread_marker = 0;		/* no pending marker */
-  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  cinfo->comp_info = NULL;              /* until allocated by get_sof */
+  cinfo->input_scan_number = 0;         /* no SOS seen yet */
+  cinfo->unread_marker = 0;             /* no pending marker */
+  marker->pub.saw_SOI = FALSE;          /* set internal state too */
   marker->pub.saw_SOF = FALSE;
   marker->pub.discarded_bytes = 0;
   marker->cur_marker = NULL;
@@ -1282,7 +1283,7 @@
   /* Create subobject in permanent pool */
   marker = (my_marker_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				SIZEOF(my_marker_reader));
+                                SIZEOF(my_marker_reader));
   cinfo->marker = (struct jpeg_marker_reader *) marker;
   /* Initialize public method pointers */
   marker->pub.reset_marker_reader = reset_marker_reader;
@@ -1313,7 +1314,7 @@
 
 GLOBAL(void)
 jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
-		   unsigned int length_limit)
+                   unsigned int length_limit)
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
   long maxlength;
@@ -1362,7 +1363,7 @@
 
 GLOBAL(void)
 jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
-			   jpeg_marker_parser_method routine)
+                           jpeg_marker_parser_method routine)
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
 
diff --git a/jdmaster.c b/jdmaster.c
index 25ad416..b9f78fd 100644
--- a/jdmaster.c
+++ b/jdmaster.c
@@ -3,7 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
+ * Modified 2002-2009 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -24,7 +25,7 @@
 typedef struct {
   struct jpeg_decomp_master pub; /* public fields */
 
-  int pass_number;		/* # of passes completed */
+  int pass_number;              /* # of passes completed */
 
   boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
 
@@ -79,7 +80,7 @@
       cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
     return FALSE;
   /* ??? also need to test for upsample-time rescaling, when & if supported */
-  return TRUE;			/* by golly, it'll work... */
+  return TRUE;                  /* by golly, it'll work... */
 #else
   return FALSE;
 #endif
@@ -90,6 +91,177 @@
  * Compute output image dimensions and related values.
  * NOTE: this is exported for possible use by application.
  * Hence it mustn't do anything that can't be done twice.
+ */
+
+#if JPEG_LIB_VERSION >= 80
+GLOBAL(void)
+#else
+LOCAL(void)
+#endif
+jpeg_core_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for transcoding and full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
+    /* Provide 1/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 1;
+    cinfo->_min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
+    /* Provide 2/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 2L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 2L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 2;
+    cinfo->_min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
+    /* Provide 3/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 3L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 3L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 3;
+    cinfo->_min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
+    /* Provide 4/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 4L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 4L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 4;
+    cinfo->_min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
+    /* Provide 5/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 5L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 5L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 5;
+    cinfo->_min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
+    /* Provide 6/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 6L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 6L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 6;
+    cinfo->_min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
+    /* Provide 7/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 7L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 7L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 7;
+    cinfo->_min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
+    /* Provide 8/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 8L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 8L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 8;
+    cinfo->_min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
+    /* Provide 9/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 9L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 9L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 9;
+    cinfo->_min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
+    /* Provide 10/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 10L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 10L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 10;
+    cinfo->_min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
+    /* Provide 11/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 11L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 11L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 11;
+    cinfo->_min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
+    /* Provide 12/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 12L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 12L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 12;
+    cinfo->_min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
+    /* Provide 13/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 13L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 13L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 13;
+    cinfo->_min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
+    /* Provide 14/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 14L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 14L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 14;
+    cinfo->_min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
+    /* Provide 15/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 15L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 15L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 15;
+    cinfo->_min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide 16/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 16L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 16L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 16;
+    cinfo->_min_DCT_v_scaled_size = 16;
+  }
+
+  /* Recompute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
+    compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* jdinput.c has already initialized DCT_scaled_size,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
  * Also note that it may be called before the master module is initialized!
  */
 
@@ -106,65 +278,24 @@
   if (cinfo->global_state != DSTATE_READY)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
+  /* Compute core output image dimensions and DCT scaling choices. */
+  jpeg_core_output_dimensions(cinfo);
+
 #ifdef IDCT_SCALING_SUPPORTED
 
-  /* Compute actual output image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num * 8 <= cinfo->scale_denom) {
-    /* Provide 1/8 scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, 8L);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, 8L);
-#if JPEG_LIB_VERSION >= 70
-    cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 1;
-#else
-    cinfo->min_DCT_scaled_size = 1;
-#endif
-  } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
-    /* Provide 1/4 scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, 4L);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, 4L);
-#if JPEG_LIB_VERSION >= 70
-    cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 2;
-#else
-    cinfo->min_DCT_scaled_size = 2;
-#endif
-  } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
-    /* Provide 1/2 scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, 2L);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, 2L);
-#if JPEG_LIB_VERSION >= 70
-    cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 4;
-#else
-    cinfo->min_DCT_scaled_size = 4;
-#endif
-  } else {
-    /* Provide 1/1 scaling */
-    cinfo->output_width = cinfo->image_width;
-    cinfo->output_height = cinfo->image_height;
-#if JPEG_LIB_VERSION >= 70
-    cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE;
-#else
-    cinfo->min_DCT_scaled_size = DCTSIZE;
-#endif
-  }
   /* In selecting the actual DCT scaling for each component, we try to
    * scale up the chroma components via IDCT scaling rather than upsampling.
    * This saves time if the upsampler gets to use 1:1 scaling.
-   * Note this code assumes that the supported DCT scalings are powers of 2.
+   * Note this code adapts subsampling ratios which are powers of 2.
    */
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     int ssize = cinfo->_min_DCT_scaled_size;
     while (ssize < DCTSIZE &&
-	   (compptr->h_samp_factor * ssize * 2 <=
-	    cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) &&
-	   (compptr->v_samp_factor * ssize * 2 <=
-	    cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size)) {
+           ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
+            (compptr->h_samp_factor * ssize * 2) == 0) &&
+           ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
+            (compptr->v_samp_factor * ssize * 2) == 0)) {
       ssize = ssize * 2;
     }
 #if JPEG_LIB_VERSION >= 70
@@ -182,12 +313,12 @@
     /* Size in samples, after IDCT scaling */
     compptr->downsampled_width = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width *
-		    (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+                    (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size),
+                    (long) (cinfo->max_h_samp_factor * DCTSIZE));
     compptr->downsampled_height = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height *
-		    (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+                    (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size),
+                    (long) (cinfo->max_v_samp_factor * DCTSIZE));
   }
 
 #else /* !IDCT_SCALING_SUPPORTED */
@@ -227,12 +358,12 @@
   case JCS_YCCK:
     cinfo->out_color_components = 4;
     break;
-  default:			/* else must be same colorspace as in file */
+  default:                      /* else must be same colorspace as in file */
     cinfo->out_color_components = cinfo->num_components;
     break;
   }
   cinfo->output_components = (cinfo->quantize_colors ? 1 :
-			      cinfo->out_color_components);
+                              cinfo->out_color_components);
 
   /* See if upsampler will want to emit more than one row at a time */
   if (use_merged_upsample(cinfo))
@@ -249,20 +380,20 @@
  * processes are inner loops and need to be as fast as possible.  On most
  * machines, particularly CPUs with pipelines or instruction prefetch,
  * a (subscript-check-less) C table lookup
- *		x = sample_range_limit[x];
+ *              x = sample_range_limit[x];
  * is faster than explicit tests
- *		if (x < 0)  x = 0;
- *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ *              if (x < 0)  x = 0;
+ *              else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
  * These processes all use a common table prepared by the routine below.
  *
  * For most steps we can mathematically guarantee that the initial value
  * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
  * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
- * limiting step (just after the IDCT), a wildly out-of-range value is 
+ * limiting step (just after the IDCT), a wildly out-of-range value is
  * possible if the input data is corrupt.  To avoid any chance of indexing
  * off the end of memory and getting a bad-pointer trap, we perform the
  * post-IDCT limiting thus:
- *		x = range_limit[x & MASK];
+ *              x = range_limit[x & MASK];
  * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
  * samples.  Under normal circumstances this is more than enough range and
  * a correct output will be generated; with bogus input data the mask will
@@ -294,23 +425,23 @@
 
   table = (JSAMPLE *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-		(5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
-  table += (MAXJSAMPLE+1);	/* allow negative subscripts of simple table */
+                (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  table += (MAXJSAMPLE+1);      /* allow negative subscripts of simple table */
   cinfo->sample_range_limit = table;
   /* First segment of "simple" table: limit[x] = 0 for x < 0 */
   MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
   /* Main part of "simple" table: limit[x] = x */
   for (i = 0; i <= MAXJSAMPLE; i++)
     table[i] = (JSAMPLE) i;
-  table += CENTERJSAMPLE;	/* Point to where post-IDCT table starts */
+  table += CENTERJSAMPLE;       /* Point to where post-IDCT table starts */
   /* End of simple table, rest of first half of post-IDCT table */
   for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++)
     table[i] = MAXJSAMPLE;
   /* Second half of post-IDCT table */
   MEMZERO(table + (2 * (MAXJSAMPLE+1)),
-	  (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+          (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
   MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
-	  cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
+          cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
 }
 
 
@@ -498,24 +629,24 @@
     if (cinfo->quantize_colors && cinfo->colormap == NULL) {
       /* Select new quantization method */
       if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
-	cinfo->cquantize = master->quantizer_2pass;
-	master->pub.is_dummy_pass = TRUE;
+        cinfo->cquantize = master->quantizer_2pass;
+        master->pub.is_dummy_pass = TRUE;
       } else if (cinfo->enable_1pass_quant) {
-	cinfo->cquantize = master->quantizer_1pass;
+        cinfo->cquantize = master->quantizer_1pass;
       } else {
-	ERREXIT(cinfo, JERR_MODE_CHANGE);
+        ERREXIT(cinfo, JERR_MODE_CHANGE);
       }
     }
     (*cinfo->idct->start_pass) (cinfo);
     (*cinfo->coef->start_output_pass) (cinfo);
     if (! cinfo->raw_data_out) {
       if (! master->using_merged_upsample)
-	(*cinfo->cconvert->start_pass) (cinfo);
+        (*cinfo->cconvert->start_pass) (cinfo);
       (*cinfo->upsample->start_pass) (cinfo);
       if (cinfo->quantize_colors)
-	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+        (*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
       (*cinfo->post->start_pass) (cinfo,
-	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+            (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
       (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
     }
   }
@@ -524,7 +655,7 @@
   if (cinfo->progress != NULL) {
     cinfo->progress->completed_passes = master->pass_number;
     cinfo->progress->total_passes = master->pass_number +
-				    (master->pub.is_dummy_pass ? 2 : 1);
+                                    (master->pub.is_dummy_pass ? 2 : 1);
     /* In buffered-image mode, we assume one more output pass if EOI not
      * yet reached, but no more passes if EOI has been reached.
      */
@@ -591,7 +722,7 @@
 
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(my_decomp_master));
+                                  SIZEOF(my_decomp_master));
   cinfo->master = (struct jpeg_decomp_master *) master;
   master->pub.prepare_for_output_pass = prepare_for_output_pass;
   master->pub.finish_output_pass = finish_output_pass;
diff --git a/jdmerge.c b/jdmerge.c
index 17d28f1..c669b17 100644
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009, 2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -17,19 +17,19 @@
  * (ie, box filtering), we can save some work in color conversion by
  * calculating all the output pixels corresponding to a pair of chroma
  * samples at one time.  In the conversion equations
- *	R = Y           + K1 * Cr
- *	G = Y + K2 * Cb + K3 * Cr
- *	B = Y + K4 * Cb
+ *      R = Y           + K1 * Cr
+ *      G = Y + K2 * Cb + K3 * Cr
+ *      B = Y + K4 * Cb
  * only the Y term varies among the group of pixels corresponding to a pair
  * of chroma samples, so the rest of the terms can be calculated just once.
  * At typical sampling ratios, this eliminates half or three-quarters of the
  * multiplications needed for color conversion.
  *
  * This file currently provides implementations for the following cases:
- *	YCbCr => RGB color conversion only.
- *	Sampling ratios of 2h1v or 2h2v.
- *	No scaling needed at upsample time.
- *	Corner-aligned (non-CCIR601) sampling alignment.
+ *      YCbCr => RGB color conversion only.
+ *      Sampling ratios of 2h1v or 2h2v.
+ *      No scaling needed at upsample time.
+ *      Corner-aligned (non-CCIR601) sampling alignment.
  * Other special cases could be added, but in most applications these are
  * the only common cases.  (For uncommon cases we fall back on the more
  * general code in jdsample.c and jdcolor.c.)
@@ -39,7 +39,7 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "config.h"
+#include "jconfigint.h"
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
@@ -47,18 +47,18 @@
 /* Private subobject */
 
 typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
+  struct jpeg_upsampler pub;    /* public fields */
 
   /* Pointer to routine to do actual upsampling/conversion of one row group */
   JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-			   JSAMPARRAY output_buf));
+                           JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                           JSAMPARRAY output_buf));
 
   /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+  int * Cr_r_tab;               /* => table for Cr to R conversion */
+  int * Cb_b_tab;               /* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;             /* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;             /* => table for Cb to G conversion */
 
   /* For 2:1 vertical sampling, we produce two output rows at a time.
    * We need a "spare" row buffer to hold the second output row if the
@@ -66,17 +66,17 @@
    * to discard the dummy last row if the image height is odd.
    */
   JSAMPROW spare_row;
-  boolean spare_full;		/* T if spare buffer is occupied */
+  boolean spare_full;           /* T if spare buffer is occupied */
 
-  JDIMENSION out_row_width;	/* samples per output row */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+  JDIMENSION out_row_width;     /* samples per output row */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
 } my_upsampler;
 
 typedef my_upsampler * my_upsample_ptr;
 
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
 
 
 /* Include inline routines for colorspace extensions */
@@ -195,26 +195,26 @@
 
   upsample->Cr_r_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * SIZEOF(int));
   upsample->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * SIZEOF(int));
   upsample->Cr_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * SIZEOF(INT32));
   upsample->Cb_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * SIZEOF(INT32));
 
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
     /* Cr=>R value is nearest int to 1.40200 * x */
     upsample->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
     /* Cb=>B value is nearest int to 1.77200 * x */
     upsample->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
     /* Cr=>G value is scaled-up -0.71414 * x */
     upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
     /* Cb=>G value is scaled-up -0.34414 * x */
@@ -248,20 +248,20 @@
 
 METHODDEF(void)
 merged_2v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 /* 2:1 vertical sampling case: may need a spare row. */
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   JSAMPROW work_ptrs[2];
-  JDIMENSION num_rows;		/* number of rows returned to caller */
+  JDIMENSION num_rows;          /* number of rows returned to caller */
 
   if (upsample->spare_full) {
     /* If we have a spare row saved from a previous cycle, just return it. */
     jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
-		      1, upsample->out_row_width);
+                      1, upsample->out_row_width);
     num_rows = 1;
     upsample->spare_full = FALSE;
   } else {
@@ -297,17 +297,17 @@
 
 METHODDEF(void)
 merged_1v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 /* 1:1 vertical sampling case: much easier, never need a spare row. */
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
 
   /* Just do the upsampling. */
   (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
-			 output_buf + *out_row_ctr);
+                         output_buf + *out_row_ctr);
   /* Adjust counts */
   (*out_row_ctr)++;
   (*in_row_group_ctr)++;
@@ -330,8 +330,8 @@
 
 METHODDEF(void)
 h2v1_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
+                      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                      JSAMPARRAY output_buf)
 {
   switch (cinfo->out_color_space) {
     case JCS_EXT_RGB:
@@ -376,8 +376,8 @@
 
 METHODDEF(void)
 h2v2_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
+                      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                      JSAMPARRAY output_buf)
 {
   switch (cinfo->out_color_space) {
     case JCS_EXT_RGB:
@@ -431,7 +431,7 @@
 
   upsample = (my_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_upsampler));
+                                SIZEOF(my_upsampler));
   cinfo->upsample = (struct jpeg_upsampler *) upsample;
   upsample->pub.start_pass = start_pass_merged_upsample;
   upsample->pub.need_context_rows = FALSE;
@@ -447,7 +447,7 @@
     /* Allocate a spare row buffer */
     upsample->spare_row = (JSAMPROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
+                (size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
   } else {
     upsample->pub.upsample = merged_1v_upsample;
     if (jsimd_can_h2v1_merged_upsample())
diff --git a/jdmrgext.c b/jdmrgext.c
index e1ab1e5..1f0a550 100644
--- a/jdmrgext.c
+++ b/jdmrgext.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
diff --git a/jdphuff.c b/jdphuff.c
index 2267809..783d8a8 100644
--- a/jdphuff.c
+++ b/jdphuff.c
@@ -17,7 +17,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdhuff.h"		/* Declarations shared with jdhuff.c */
+#include "jdhuff.h"             /* Declarations shared with jdhuff.c */
 
 
 #ifdef D_PROGRESSIVE_SUPPORTED
@@ -30,8 +30,8 @@
  */
 
 typedef struct {
-  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
-  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+  unsigned int EOBRUN;                  /* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
 } savable_state;
 
 /* This macro is to work around compilers with missing or broken
@@ -44,11 +44,11 @@
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest,src)  \
-	((dest).EOBRUN = (src).EOBRUN, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+        ((dest).EOBRUN = (src).EOBRUN, \
+         (dest).last_dc_val[0] = (src).last_dc_val[0], \
+         (dest).last_dc_val[1] = (src).last_dc_val[1], \
+         (dest).last_dc_val[2] = (src).last_dc_val[2], \
+         (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 
@@ -59,11 +59,11 @@
   /* These fields are loaded into local variables at start of each MCU.
    * In case of suspension, we exit WITHOUT updating them.
    */
-  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
-  savable_state saved;		/* Other state at start of MCU */
+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
+  savable_state saved;          /* Other state at start of MCU */
 
   /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
 
   /* Pointers to derived tables (these workspaces have image lifespan) */
   d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
@@ -75,13 +75,13 @@
 
 /* Forward declarations */
 METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
+                                            JBLOCKROW *MCU_data));
 METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
+                                            JBLOCKROW *MCU_data));
 METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
+                                             JBLOCKROW *MCU_data));
 METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
+                                             JBLOCKROW *MCU_data));
 
 
 /*
@@ -117,7 +117,7 @@
     if (cinfo->Al != cinfo->Ah-1)
       bad = TRUE;
   }
-  if (cinfo->Al > 13)		/* need not check for < 0 */
+  if (cinfo->Al > 13)           /* need not check for < 0 */
     bad = TRUE;
   /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
    * but the spec doesn't say so, and we try to be liberal about what we
@@ -127,7 +127,7 @@
    */
   if (bad)
     ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	     cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+             cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
   /* Update progression status, and verify that scan order is legal.
    * Note that inter-scan inconsistencies are treated as warnings
    * not fatal errors ... not clear if this is right way to behave.
@@ -140,7 +140,7 @@
     for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
       int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
       if (cinfo->Ah != expected)
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+        WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
       coef_bit_ptr[coefi] = cinfo->Al;
     }
   }
@@ -164,15 +164,15 @@
      * We may build same derived table more than once, but it's not expensive.
      */
     if (is_DC_band) {
-      if (cinfo->Ah == 0) {	/* DC refinement needs no table */
-	tbl = compptr->dc_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-				& entropy->derived_tbls[tbl]);
+      if (cinfo->Ah == 0) {     /* DC refinement needs no table */
+        tbl = compptr->dc_tbl_no;
+        jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+                                & entropy->derived_tbls[tbl]);
       }
     } else {
       tbl = compptr->ac_tbl_no;
       jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-			      & entropy->derived_tbls[tbl]);
+                              & entropy->derived_tbls[tbl]);
       /* remember the single active table */
       entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
     }
@@ -198,6 +198,7 @@
  * On some machines, a shift and add will be faster than a table lookup.
  */
 
+#define AVOID_TABLES
 #ifdef AVOID_TABLES
 
 #define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
@@ -263,7 +264,7 @@
 /*
  * Huffman MCU decoding.
  * Each of these routines decodes and returns one MCU's worth of
- * Huffman-compressed coefficients. 
+ * Huffman-compressed coefficients.
  * The coefficients are reordered from zigzag order into natural array order,
  * but are not dequantized.
  *
@@ -284,7 +285,7 @@
 
 METHODDEF(boolean)
 decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Al = cinfo->Al;
   register int s, r;
@@ -299,7 +300,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* If we've run out of data, just leave the MCU set to zeroes.
@@ -324,9 +325,9 @@
       /* Section F.2.2.1: decode the DC coefficient difference */
       HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
       if (s) {
-	CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	r = GET_BITS(s);
-	s = HUFF_EXTEND(r, s);
+        CHECK_BIT_BUFFER(br_state, s, return FALSE);
+        r = GET_BITS(s);
+        s = HUFF_EXTEND(r, s);
       }
 
       /* Convert DC difference to actual value, update last_dc_val */
@@ -355,7 +356,7 @@
 
 METHODDEF(boolean)
 decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Se = cinfo->Se;
   int Al = cinfo->Al;
@@ -369,7 +370,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* If we've run out of data, just leave the MCU set to zeroes.
@@ -380,49 +381,49 @@
     /* Load up working state.
      * We can avoid loading/saving bitread state if in an EOB run.
      */
-    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+    EOBRUN = entropy->saved.EOBRUN;     /* only part of saved state we need */
 
     /* There is always only one block per MCU */
 
-    if (EOBRUN > 0)		/* if it's a band of zeroes... */
-      EOBRUN--;			/* ...process it now (we do nothing) */
+    if (EOBRUN > 0)             /* if it's a band of zeroes... */
+      EOBRUN--;                 /* ...process it now (we do nothing) */
     else {
       BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
       block = MCU_data[0];
       tbl = entropy->ac_derived_tbl;
 
       for (k = cinfo->Ss; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	  /* Scale and output coefficient in natural (dezigzagged) order */
-	  (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
-	} else {
-	  if (r == 15) {	/* ZRL */
-	    k += 15;		/* skip 15 zeroes in band */
-	  } else {		/* EOBr, run length is 2^r + appended bits */
-	    EOBRUN = 1 << r;
-	    if (r) {		/* EOBr, r > 0 */
-	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    EOBRUN--;		/* this band is processed at this moment */
-	    break;		/* force end-of-band */
-	  }
-	}
+        HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+        r = s >> 4;
+        s &= 15;
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          /* Scale and output coefficient in natural (dezigzagged) order */
+          (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
+        } else {
+          if (r == 15) {        /* ZRL */
+            k += 15;            /* skip 15 zeroes in band */
+          } else {              /* EOBr, run length is 2^r + appended bits */
+            EOBRUN = 1 << r;
+            if (r) {            /* EOBr, r > 0 */
+              CHECK_BIT_BUFFER(br_state, r, return FALSE);
+              r = GET_BITS(r);
+              EOBRUN += r;
+            }
+            EOBRUN--;           /* this band is processed at this moment */
+            break;              /* force end-of-band */
+          }
+        }
       }
 
       BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
     }
 
     /* Completed MCU, so update state */
-    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+    entropy->saved.EOBRUN = EOBRUN;     /* only part of saved state we need */
   }
 
   /* Account for restart interval (no-op if not using restarts) */
@@ -440,9 +441,9 @@
 
 METHODDEF(boolean)
 decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
-  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int p1 = 1 << cinfo->Al;      /* 1 in the bit position being coded */
   int blkn;
   JBLOCKROW block;
   BITREAD_STATE_VARS;
@@ -451,7 +452,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* Not worth the cycles to check insufficient_data here,
@@ -489,11 +490,11 @@
 
 METHODDEF(boolean)
 decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Se = cinfo->Se;
-  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-  int m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+  int p1 = 1 << cinfo->Al;      /* 1 in the bit position being coded */
+  int m1 = (-1) << cinfo->Al;   /* -1 in the bit position being coded */
   register int s, k, r;
   unsigned int EOBRUN;
   JBLOCKROW block;
@@ -507,7 +508,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* If we've run out of data, don't modify the MCU.
@@ -535,58 +536,58 @@
 
     if (EOBRUN == 0) {
       for (; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  if (s != 1)		/* size of new coef should always be 1 */
-	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1))
-	    s = p1;		/* newly nonzero coef is positive */
-	  else
-	    s = m1;		/* newly nonzero coef is negative */
-	} else {
-	  if (r != 15) {
-	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
-	    if (r) {
-	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    break;		/* rest of block is handled by EOB logic */
-	  }
-	  /* note s = 0 for processing ZRL */
-	}
-	/* Advance over already-nonzero coefs and r still-zero coefs,
-	 * appending correction bits to the nonzeroes.  A correction bit is 1
-	 * if the absolute value of the coefficient must be increased.
-	 */
-	do {
-	  thiscoef = *block + jpeg_natural_order[k];
-	  if (*thiscoef != 0) {
-	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	    if (GET_BITS(1)) {
-	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
-		if (*thiscoef >= 0)
-		  *thiscoef += p1;
-		else
-		  *thiscoef += m1;
-	      }
-	    }
-	  } else {
-	    if (--r < 0)
-	      break;		/* reached target zero coefficient */
-	  }
-	  k++;
-	} while (k <= Se);
-	if (s) {
-	  int pos = jpeg_natural_order[k];
-	  /* Output newly nonzero coefficient */
-	  (*block)[pos] = (JCOEF) s;
-	  /* Remember its position in case we have to suspend */
-	  newnz_pos[num_newnz++] = pos;
-	}
+        HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+        r = s >> 4;
+        s &= 15;
+        if (s) {
+          if (s != 1)           /* size of new coef should always be 1 */
+            WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+          CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+          if (GET_BITS(1))
+            s = p1;             /* newly nonzero coef is positive */
+          else
+            s = m1;             /* newly nonzero coef is negative */
+        } else {
+          if (r != 15) {
+            EOBRUN = 1 << r;    /* EOBr, run length is 2^r + appended bits */
+            if (r) {
+              CHECK_BIT_BUFFER(br_state, r, goto undoit);
+              r = GET_BITS(r);
+              EOBRUN += r;
+            }
+            break;              /* rest of block is handled by EOB logic */
+          }
+          /* note s = 0 for processing ZRL */
+        }
+        /* Advance over already-nonzero coefs and r still-zero coefs,
+         * appending correction bits to the nonzeroes.  A correction bit is 1
+         * if the absolute value of the coefficient must be increased.
+         */
+        do {
+          thiscoef = *block + jpeg_natural_order[k];
+          if (*thiscoef != 0) {
+            CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+            if (GET_BITS(1)) {
+              if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+                if (*thiscoef >= 0)
+                  *thiscoef += p1;
+                else
+                  *thiscoef += m1;
+              }
+            }
+          } else {
+            if (--r < 0)
+              break;            /* reached target zero coefficient */
+          }
+          k++;
+        } while (k <= Se);
+        if (s) {
+          int pos = jpeg_natural_order[k];
+          /* Output newly nonzero coefficient */
+          (*block)[pos] = (JCOEF) s;
+          /* Remember its position in case we have to suspend */
+          newnz_pos[num_newnz++] = pos;
+        }
       }
     }
 
@@ -597,18 +598,18 @@
        * if the absolute value of the coefficient must be increased.
        */
       for (; k <= Se; k++) {
-	thiscoef = *block + jpeg_natural_order[k];
-	if (*thiscoef != 0) {
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1)) {
-	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
-	      if (*thiscoef >= 0)
-		*thiscoef += p1;
-	      else
-		*thiscoef += m1;
-	    }
-	  }
-	}
+        thiscoef = *block + jpeg_natural_order[k];
+        if (*thiscoef != 0) {
+          CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+          if (GET_BITS(1)) {
+            if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+              if (*thiscoef >= 0)
+                *thiscoef += p1;
+              else
+                *thiscoef += m1;
+            }
+          }
+        }
       }
       /* Count one block completed in EOB run */
       EOBRUN--;
@@ -646,7 +647,7 @@
 
   entropy = (phuff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(phuff_entropy_decoder));
+                                SIZEOF(phuff_entropy_decoder));
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass_phuff_decoder;
 
@@ -658,9 +659,9 @@
   /* Create progression status table */
   cinfo->coef_bits = (int (*)[DCTSIZE2])
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				cinfo->num_components*DCTSIZE2*SIZEOF(int));
+                                cinfo->num_components*DCTSIZE2*SIZEOF(int));
   coef_bit_ptr = & cinfo->coef_bits[0][0];
-  for (ci = 0; ci < cinfo->num_components; ci++) 
+  for (ci = 0; ci < cinfo->num_components; ci++)
     for (i = 0; i < DCTSIZE2; i++)
       *coef_bit_ptr++ = -1;
 }
diff --git a/jdpostct.c b/jdpostct.c
index 571563d..aa2af07 100644
--- a/jdpostct.c
+++ b/jdpostct.c
@@ -31,12 +31,12 @@
    * For two-pass color quantization, we need a full-image buffer;
    * for one-pass operation, a strip buffer is sufficient.
    */
-  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
-  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
-  JDIMENSION strip_height;	/* buffer size in rows */
+  jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;            /* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;      /* buffer size in rows */
   /* for two-pass mode only: */
-  JDIMENSION starting_row;	/* row # of first row in current strip */
-  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+  JDIMENSION starting_row;      /* row # of first row in current strip */
+  JDIMENSION next_row;          /* index of next row to fill/empty in strip */
 } my_post_controller;
 
 typedef my_post_controller * my_post_ptr;
@@ -44,24 +44,24 @@
 
 /* Forward declarations */
 METHODDEF(void) post_process_1pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
+        JPP((j_decompress_ptr cinfo,
+             JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+             JDIMENSION in_row_groups_avail,
+             JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+             JDIMENSION out_rows_avail));
 #ifdef QUANT_2PASS_SUPPORTED
 METHODDEF(void) post_process_prepass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
+        JPP((j_decompress_ptr cinfo,
+             JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+             JDIMENSION in_row_groups_avail,
+             JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+             JDIMENSION out_rows_avail));
 METHODDEF(void) post_process_2pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
+        JPP((j_decompress_ptr cinfo,
+             JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+             JDIMENSION in_row_groups_avail,
+             JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+             JDIMENSION out_rows_avail));
 #endif
 
 
@@ -84,9 +84,9 @@
        * allocate a strip buffer.  Use the virtual-array buffer as workspace.
        */
       if (post->buffer == NULL) {
-	post->buffer = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, post->whole_image,
-	   (JDIMENSION) 0, post->strip_height, TRUE);
+        post->buffer = (*cinfo->mem->access_virt_sarray)
+          ((j_common_ptr) cinfo, post->whole_image,
+           (JDIMENSION) 0, post->strip_height, TRUE);
       }
     } else {
       /* For single-pass processing without color quantization,
@@ -124,10 +124,10 @@
 
 METHODDEF(void)
 post_process_1pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr) cinfo->post;
   JDIMENSION num_rows, max_rows;
@@ -139,11 +139,11 @@
     max_rows = post->strip_height;
   num_rows = 0;
   (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &num_rows, max_rows);
+                input_buf, in_row_group_ctr, in_row_groups_avail,
+                post->buffer, &num_rows, max_rows);
   /* Quantize and emit data. */
   (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+                post->buffer, output_buf + *out_row_ctr, (int) num_rows);
   *out_row_ctr += num_rows;
 }
 
@@ -156,10 +156,10 @@
 
 METHODDEF(void)
 post_process_prepass (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		      JDIMENSION in_row_groups_avail,
-		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		      JDIMENSION out_rows_avail)
+                      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                      JDIMENSION in_row_groups_avail,
+                      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                      JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr) cinfo->post;
   JDIMENSION old_next_row, num_rows;
@@ -167,22 +167,22 @@
   /* Reposition virtual buffer if at start of strip. */
   if (post->next_row == 0) {
     post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, TRUE);
+        ((j_common_ptr) cinfo, post->whole_image,
+         post->starting_row, post->strip_height, TRUE);
   }
 
   /* Upsample some data (up to a strip height's worth). */
   old_next_row = post->next_row;
   (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &post->next_row, post->strip_height);
+                input_buf, in_row_group_ctr, in_row_groups_avail,
+                post->buffer, &post->next_row, post->strip_height);
 
   /* Allow quantizer to scan new data.  No data is emitted, */
   /* but we advance out_row_ctr so outer loop can tell when we're done. */
   if (post->next_row > old_next_row) {
     num_rows = post->next_row - old_next_row;
     (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
-					 (JSAMPARRAY) NULL, (int) num_rows);
+                                         (JSAMPARRAY) NULL, (int) num_rows);
     *out_row_ctr += num_rows;
   }
 
@@ -200,10 +200,10 @@
 
 METHODDEF(void)
 post_process_2pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr) cinfo->post;
   JDIMENSION num_rows, max_rows;
@@ -211,8 +211,8 @@
   /* Reposition virtual buffer if at start of strip. */
   if (post->next_row == 0) {
     post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, FALSE);
+        ((j_common_ptr) cinfo, post->whole_image,
+         post->starting_row, post->strip_height, FALSE);
   }
 
   /* Determine number of rows to emit. */
@@ -227,8 +227,8 @@
 
   /* Quantize and emit data. */
   (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer + post->next_row, output_buf + *out_row_ctr,
-		(int) num_rows);
+                post->buffer + post->next_row, output_buf + *out_row_ctr,
+                (int) num_rows);
   *out_row_ctr += num_rows;
 
   /* Advance if we filled the strip. */
@@ -253,11 +253,11 @@
 
   post = (my_post_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_post_controller));
+                                SIZEOF(my_post_controller));
   cinfo->post = (struct jpeg_d_post_controller *) post;
   post->pub.start_pass = start_pass_dpost;
-  post->whole_image = NULL;	/* flag for no virtual arrays */
-  post->buffer = NULL;		/* flag for no strip buffer */
+  post->whole_image = NULL;     /* flag for no virtual arrays */
+  post->buffer = NULL;          /* flag for no strip buffer */
 
   /* Create the quantization buffer, if needed */
   if (cinfo->quantize_colors) {
@@ -271,20 +271,20 @@
       /* We round up the number of rows to a multiple of the strip height. */
 #ifdef QUANT_2PASS_SUPPORTED
       post->whole_image = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 (JDIMENSION) jround_up((long) cinfo->output_height,
-				(long) post->strip_height),
-	 post->strip_height);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+         cinfo->output_width * cinfo->out_color_components,
+         (JDIMENSION) jround_up((long) cinfo->output_height,
+                                (long) post->strip_height),
+         post->strip_height);
 #else
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 #endif /* QUANT_2PASS_SUPPORTED */
     } else {
       /* One-pass color quantization: just make a strip buffer. */
       post->buffer = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 post->strip_height);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         cinfo->output_width * cinfo->out_color_components,
+         post->strip_height);
     }
   }
 }
diff --git a/jdsample.c b/jdsample.c
index 5211785..3da1d06 100644
--- a/jdsample.c
+++ b/jdsample.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
@@ -30,13 +30,13 @@
 
 /* Pointer to routine to upsample a single component */
 typedef JMETHOD(void, upsample1_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+                (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
 
 /* Private subobject */
 
 typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
+  struct jpeg_upsampler pub;    /* public fields */
 
   /* Color conversion buffer.  When using separate upsampling and color
    * conversion steps, this buffer holds one upsampled row group until it
@@ -50,8 +50,8 @@
   /* Per-component upsampling method pointers */
   upsample1_ptr methods[MAX_COMPONENTS];
 
-  int next_row_out;		/* counts rows emitted from color_buf */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+  int next_row_out;             /* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
 
   /* Height of an input row group for each component. */
   int rowgroup_height[MAX_COMPONENTS];
@@ -92,10 +92,10 @@
 
 METHODDEF(void)
 sep_upsample (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	      JDIMENSION in_row_groups_avail,
-	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	      JDIMENSION out_rows_avail)
+              JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+              JDIMENSION in_row_groups_avail,
+              JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+              JDIMENSION out_rows_avail)
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   int ci;
@@ -105,13 +105,13 @@
   /* Fill the conversion buffer, if it's empty */
   if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       /* Invoke per-component upsample method.  Notice we pass a POINTER
        * to color_buf[ci], so that fullsize_upsample can change it.
        */
       (*upsample->methods[ci]) (cinfo, compptr,
-	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
-	upsample->color_buf + ci);
+        input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+        upsample->color_buf + ci);
     }
     upsample->next_row_out = 0;
   }
@@ -123,7 +123,7 @@
   /* Not more than the distance to the end of the image.  Need this test
    * in case the image height is not a multiple of max_v_samp_factor:
    */
-  if (num_rows > upsample->rows_to_go) 
+  if (num_rows > upsample->rows_to_go)
     num_rows = upsample->rows_to_go;
   /* And not more than what the client can accept: */
   out_rows_avail -= *out_row_ctr;
@@ -131,9 +131,9 @@
     num_rows = out_rows_avail;
 
   (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
-				     (JDIMENSION) upsample->next_row_out,
-				     output_buf + *out_row_ctr,
-				     (int) num_rows);
+                                     (JDIMENSION) upsample->next_row_out,
+                                     output_buf + *out_row_ctr,
+                                     (int) num_rows);
 
   /* Adjust counts */
   *out_row_ctr += num_rows;
@@ -160,7 +160,7 @@
 
 METHODDEF(void)
 fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+                   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   *output_data_ptr = input_data;
 }
@@ -173,9 +173,9 @@
 
 METHODDEF(void)
 noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+               JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
-  *output_data_ptr = NULL;	/* safety check */
+  *output_data_ptr = NULL;      /* safety check */
 }
 
 
@@ -192,7 +192,7 @@
 
 METHODDEF(void)
 int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+              JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   JSAMPARRAY output_data = *output_data_ptr;
@@ -213,15 +213,15 @@
     outptr = output_data[outrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
       for (h = h_expand; h > 0; h--) {
-	*outptr++ = invalue;
+        *outptr++ = invalue;
       }
     }
     /* Generate any additional output rows by duplicating the first one */
     if (v_expand > 1) {
       jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
-			v_expand-1, cinfo->output_width);
+                        v_expand-1, cinfo->output_width);
     }
     inrow++;
     outrow += v_expand;
@@ -236,7 +236,7 @@
 
 METHODDEF(void)
 h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+               JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr, outptr;
@@ -249,7 +249,7 @@
     outptr = output_data[inrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
       *outptr++ = invalue;
       *outptr++ = invalue;
     }
@@ -264,7 +264,7 @@
 
 METHODDEF(void)
 h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+               JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr, outptr;
@@ -278,12 +278,12 @@
     outptr = output_data[outrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
       *outptr++ = invalue;
       *outptr++ = invalue;
     }
     jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
-		      1, cinfo->output_width);
+                      1, cinfo->output_width);
     inrow++;
     outrow += 2;
   }
@@ -307,7 +307,7 @@
 
 METHODDEF(void)
 h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+                     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr, outptr;
@@ -348,7 +348,7 @@
 
 METHODDEF(void)
 h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+                     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr0, inptr1, outptr;
@@ -365,10 +365,10 @@
     for (v = 0; v < 2; v++) {
       /* inptr0 points to nearest input row, inptr1 points to next nearest */
       inptr0 = input_data[inrow];
-      if (v == 0)		/* next nearest is row above */
-	inptr1 = input_data[inrow-1];
-      else			/* next nearest is row below */
-	inptr1 = input_data[inrow+1];
+      if (v == 0)               /* next nearest is row above */
+        inptr1 = input_data[inrow-1];
+      else                      /* next nearest is row below */
+        inptr1 = input_data[inrow+1];
       outptr = output_data[outrow++];
 
       /* Special case for first column */
@@ -379,12 +379,12 @@
       lastcolsum = thiscolsum; thiscolsum = nextcolsum;
 
       for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
-	/* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
-	/* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
-	nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
-	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
-	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
-	lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+        /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
+        /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
+        nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+        *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+        *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+        lastcolsum = thiscolsum; thiscolsum = nextcolsum;
       }
 
       /* Special case for last column */
@@ -411,13 +411,13 @@
 
   upsample = (my_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_upsampler));
+                                SIZEOF(my_upsampler));
   cinfo->upsample = (struct jpeg_upsampler *) upsample;
   upsample->pub.start_pass = start_pass_upsample;
   upsample->pub.upsample = sep_upsample;
   upsample->pub.need_context_rows = FALSE; /* until we find out differently */
 
-  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+  if (cinfo->CCIR601_sampling)  /* this isn't supported */
     ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
 
   /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
@@ -434,9 +434,9 @@
      * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
      */
     h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) /
-		 cinfo->_min_DCT_scaled_size;
+                 cinfo->_min_DCT_scaled_size;
     v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
-		 cinfo->_min_DCT_scaled_size;
+                 cinfo->_min_DCT_scaled_size;
     h_out_group = cinfo->max_h_samp_factor;
     v_out_group = cinfo->max_v_samp_factor;
     upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
@@ -450,36 +450,36 @@
       upsample->methods[ci] = fullsize_upsample;
       need_buffer = FALSE;
     } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group == v_out_group) {
+               v_in_group == v_out_group) {
       /* Special cases for 2h1v upsampling */
       if (do_fancy && compptr->downsampled_width > 2) {
-	if (jsimd_can_h2v1_fancy_upsample())
-	  upsample->methods[ci] = jsimd_h2v1_fancy_upsample;
-	else
-	  upsample->methods[ci] = h2v1_fancy_upsample;
+        if (jsimd_can_h2v1_fancy_upsample())
+          upsample->methods[ci] = jsimd_h2v1_fancy_upsample;
+        else
+          upsample->methods[ci] = h2v1_fancy_upsample;
       } else {
-	if (jsimd_can_h2v1_upsample())
-	  upsample->methods[ci] = jsimd_h2v1_upsample;
-	else
-	  upsample->methods[ci] = h2v1_upsample;
+        if (jsimd_can_h2v1_upsample())
+          upsample->methods[ci] = jsimd_h2v1_upsample;
+        else
+          upsample->methods[ci] = h2v1_upsample;
       }
     } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group * 2 == v_out_group) {
+               v_in_group * 2 == v_out_group) {
       /* Special cases for 2h2v upsampling */
       if (do_fancy && compptr->downsampled_width > 2) {
-	if (jsimd_can_h2v2_fancy_upsample())
-	  upsample->methods[ci] = jsimd_h2v2_fancy_upsample;
-	else
-	  upsample->methods[ci] = h2v2_fancy_upsample;
-	upsample->pub.need_context_rows = TRUE;
+        if (jsimd_can_h2v2_fancy_upsample())
+          upsample->methods[ci] = jsimd_h2v2_fancy_upsample;
+        else
+          upsample->methods[ci] = h2v2_fancy_upsample;
+        upsample->pub.need_context_rows = TRUE;
       } else {
-	if (jsimd_can_h2v2_upsample())
-	  upsample->methods[ci] = jsimd_h2v2_upsample;
-	else
-	  upsample->methods[ci] = h2v2_upsample;
+        if (jsimd_can_h2v2_upsample())
+          upsample->methods[ci] = jsimd_h2v2_upsample;
+        else
+          upsample->methods[ci] = h2v2_upsample;
       }
     } else if ((h_out_group % h_in_group) == 0 &&
-	       (v_out_group % v_in_group) == 0) {
+               (v_out_group % v_in_group) == 0) {
       /* Generic integral-factors upsampling method */
       upsample->methods[ci] = int_upsample;
       upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
@@ -488,10 +488,10 @@
       ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
     if (need_buffer) {
       upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) jround_up((long) cinfo->output_width,
-				(long) cinfo->max_h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         (JDIMENSION) jround_up((long) cinfo->output_width,
+                                (long) cinfo->max_h_samp_factor),
+         (JDIMENSION) cinfo->max_v_samp_factor);
     }
   }
 }
diff --git a/jdtrans.c b/jdtrans.c
index f0cd0ae..0a163c0 100644
--- a/jdtrans.c
+++ b/jdtrans.c
@@ -55,20 +55,20 @@
       int retcode;
       /* Call progress monitor hook if present */
       if (cinfo->progress != NULL)
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       /* Absorb some more input */
       retcode = (*cinfo->inputctl->consume_input) (cinfo);
       if (retcode == JPEG_SUSPENDED)
-	return NULL;
+        return NULL;
       if (retcode == JPEG_REACHED_EOI)
-	break;
+        break;
       /* Advance progress counter if appropriate */
       if (cinfo->progress != NULL &&
-	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	  /* startup underestimated number of scans; ratchet up one scan */
-	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	}
+          (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+          /* startup underestimated number of scans; ratchet up one scan */
+          cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
       }
     }
     /* Set state so that jpeg_finish_decompress does the right thing */
@@ -84,7 +84,7 @@
   }
   /* Oops, improper usage */
   ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return NULL;			/* keep compiler happy */
+  return NULL;                  /* keep compiler happy */
 }
 
 
diff --git a/jerror.c b/jerror.c
index 3da7be8..cd4c2a3 100644
--- a/jerror.c
+++ b/jerror.c
@@ -28,7 +28,7 @@
 #include <windows.h>
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 
@@ -42,10 +42,10 @@
  */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_message_table	jMsgTable
+#define jpeg_std_message_table  jMsgTable
 #endif
 
-#define JMESSAGE(code,string)	string ,
+#define JMESSAGE(code,string)   string ,
 
 const char * const jpeg_std_message_table[] = {
 #include "jerror.h"
@@ -105,7 +105,7 @@
 #ifdef USE_WINDOWS_MESSAGEBOX
   /* Display it in a message dialog box */
   MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
-	     MB_OK | MB_ICONERROR);
+             MB_OK | MB_ICONERROR);
 #else
   /* Send it to stderr, adding a newline */
   fprintf(stderr, "%s\n", buffer);
@@ -167,8 +167,8 @@
   if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
     msgtext = err->jpeg_message_table[msg_code];
   } else if (err->addon_message_table != NULL &&
-	     msg_code >= err->first_addon_message &&
-	     msg_code <= err->last_addon_message) {
+             msg_code >= err->first_addon_message &&
+             msg_code <= err->last_addon_message) {
     msgtext = err->addon_message_table[msg_code - err->first_addon_message];
   }
 
@@ -193,10 +193,10 @@
     sprintf(buffer, msgtext, err->msg_parm.s);
   else
     sprintf(buffer, msgtext,
-	    err->msg_parm.i[0], err->msg_parm.i[1],
-	    err->msg_parm.i[2], err->msg_parm.i[3],
-	    err->msg_parm.i[4], err->msg_parm.i[5],
-	    err->msg_parm.i[6], err->msg_parm.i[7]);
+            err->msg_parm.i[0], err->msg_parm.i[1],
+            err->msg_parm.i[2], err->msg_parm.i[3],
+            err->msg_parm.i[4], err->msg_parm.i[5],
+            err->msg_parm.i[6], err->msg_parm.i[7]);
 }
 
 
@@ -213,17 +213,17 @@
 {
   cinfo->err->num_warnings = 0;
   /* trace_level is not reset since it is an application-supplied parameter */
-  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+  cinfo->err->msg_code = 0;     /* may be useful as a flag for "no error" */
 }
 
 
 /*
  * Fill in the standard error-handling methods in a jpeg_error_mgr object.
  * Typical call is:
- *	struct jpeg_compress_struct cinfo;
- *	struct jpeg_error_mgr err;
+ *      struct jpeg_compress_struct cinfo;
+ *      struct jpeg_error_mgr err;
  *
- *	cinfo.err = jpeg_std_error(&err);
+ *      cinfo.err = jpeg_std_error(&err);
  * after which the application may override some of the methods.
  */
 
@@ -236,16 +236,16 @@
   err->format_message = format_message;
   err->reset_error_mgr = reset_error_mgr;
 
-  err->trace_level = 0;		/* default = no tracing */
-  err->num_warnings = 0;	/* no warnings emitted yet */
-  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+  err->trace_level = 0;         /* default = no tracing */
+  err->num_warnings = 0;        /* no warnings emitted yet */
+  err->msg_code = 0;            /* may be useful as a flag for "no error" */
 
   /* Initialize message table pointers */
   err->jpeg_message_table = jpeg_std_message_table;
   err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
 
   err->addon_message_table = NULL;
-  err->first_addon_message = 0;	/* for safety */
+  err->first_addon_message = 0; /* for safety */
   err->last_addon_message = 0;
 
   return err;
diff --git a/jerror.h b/jerror.h
index ea6ca4e..402613e 100644
--- a/jerror.h
+++ b/jerror.h
@@ -35,7 +35,7 @@
 
 typedef enum {
 
-#define JMESSAGE(code,string)	code ,
+#define JMESSAGE(code,string)   code ,
 
 #endif /* JMAKE_ENUM_LIST */
 
@@ -44,7 +44,7 @@
 /* For maintenance convenience, list is alphabetical by message code name */
 #if JPEG_LIB_VERSION < 70
 JMESSAGE(JERR_ARITH_NOTIMPL,
-	 "Sorry, arithmetic coding is not implemented")
+         "Sorry, arithmetic coding is not implemented")
 #endif
 JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
 JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
@@ -57,26 +57,26 @@
 JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
 #if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_BAD_DROP_SAMPLING,
-	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+         "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
 #endif
 JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
 JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
 JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
 JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
 JMESSAGE(JERR_BAD_LIB_VERSION,
-	 "Wrong JPEG library version: library is %d, caller expects %d")
+         "Wrong JPEG library version: library is %d, caller expects %d")
 JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
 JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
 JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
 JMESSAGE(JERR_BAD_PROGRESSION,
-	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+         "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
 JMESSAGE(JERR_BAD_PROG_SCRIPT,
-	 "Invalid progressive parameters at scan script entry %d")
+         "Invalid progressive parameters at scan script entry %d")
 JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
 JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
 JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
 JMESSAGE(JERR_BAD_STRUCT_SIZE,
-	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+         "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
 JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
 JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
 JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
@@ -100,7 +100,7 @@
 JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
 JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
 JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
-	 "Cannot transcode due to multiple use of quantization table %d")
+         "Cannot transcode due to multiple use of quantization table %d")
 JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
 JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
 JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
@@ -115,7 +115,7 @@
 JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
 JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
 JMESSAGE(JERR_QUANT_COMPONENTS,
-	 "Cannot quantize more than %d color components")
+         "Cannot quantize more than %d color components")
 JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
 JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
 JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
@@ -127,7 +127,7 @@
 JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
 JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
 JMESSAGE(JERR_TFILE_WRITE,
-	 "Write failed on temporary file --- out of disk space?")
+         "Write failed on temporary file --- out of disk space?")
 JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
 JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
 JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
@@ -137,9 +137,9 @@
 JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT_SHORT)
 JMESSAGE(JMSG_VERSION, JVERSION)
 JMESSAGE(JTRC_16BIT_TABLES,
-	 "Caution: quantization tables are too coarse for baseline JPEG")
+         "Caution: quantization tables are too coarse for baseline JPEG")
 JMESSAGE(JTRC_ADOBE,
-	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+         "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
 JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
 JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
 JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
@@ -152,9 +152,9 @@
 JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
 JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
 JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
-	 "Warning: thumbnail image size does not match data length %u")
+         "Warning: thumbnail image size does not match data length %u")
 JMESSAGE(JTRC_JFIF_EXTENSION,
-	 "JFIF extension marker: type 0x%02x, length %u")
+         "JFIF extension marker: type 0x%02x, length %u")
 JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
 JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
 JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
@@ -165,7 +165,7 @@
 JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
 JMESSAGE(JTRC_RST, "RST%d")
 JMESSAGE(JTRC_SMOOTH_NOTIMPL,
-	 "Smoothing not supported with nonstandard sampling ratios")
+         "Smoothing not supported with nonstandard sampling ratios")
 JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
 JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
 JMESSAGE(JTRC_SOI, "Start of Image")
@@ -175,13 +175,13 @@
 JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
 JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
 JMESSAGE(JTRC_THUMB_JPEG,
-	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+         "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
 JMESSAGE(JTRC_THUMB_PALETTE,
-	 "JFIF extension marker: palette thumbnail image, length %u")
+         "JFIF extension marker: palette thumbnail image, length %u")
 JMESSAGE(JTRC_THUMB_RGB,
-	 "JFIF extension marker: RGB thumbnail image, length %u")
+         "JFIF extension marker: RGB thumbnail image, length %u")
 JMESSAGE(JTRC_UNKNOWN_IDS,
-	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+         "Unrecognized component IDs %d %d %d, assuming YCbCr")
 JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
 JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
 JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
@@ -189,15 +189,15 @@
 JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
 #endif
 JMESSAGE(JWRN_BOGUS_PROGRESSION,
-	 "Inconsistent progression sequence for component %d coefficient %d")
+         "Inconsistent progression sequence for component %d coefficient %d")
 JMESSAGE(JWRN_EXTRANEOUS_DATA,
-	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+         "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
 JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
 JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
 JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
 JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
 JMESSAGE(JWRN_MUST_RESYNC,
-	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+         "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
 JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
 JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
 #if JPEG_LIB_VERSION < 70
@@ -257,7 +257,7 @@
    strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
    (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
 
-#define MAKESTMT(stuff)		do { stuff } while (0)
+#define MAKESTMT(stuff)         do { stuff } while (0)
 
 /* Nonfatal errors (we can keep going, but the data is probably corrupt) */
 #define WARNMS(cinfo,code)  \
@@ -288,26 +288,26 @@
    (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
 #define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+           _mp[4] = (p5); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+           _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMSS(cinfo,lvl,code,str)  \
   ((cinfo)->err->msg_code = (code), \
    strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
diff --git a/jfdctflt.c b/jfdctflt.c
index 79d7a00..a8367c6 100644
--- a/jfdctflt.c
+++ b/jfdctflt.c
@@ -37,7 +37,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_FLOAT_SUPPORTED
 
@@ -76,24 +76,24 @@
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = tmp10 + tmp11; /* phase 3 */
     dataptr[4] = tmp10 - tmp11;
-    
+
     z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
     dataptr[6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -103,15 +103,15 @@
     z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
     z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
-    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[5] = z13 + z2;      /* phase 6 */
     dataptr[3] = z13 - z2;
     dataptr[1] = z11 + z4;
     dataptr[7] = z11 - z4;
 
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 
   /* Pass 2: process columns. */
@@ -126,24 +126,24 @@
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
     dataptr[DCTSIZE*4] = tmp10 - tmp11;
-    
+
     z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
     dataptr[DCTSIZE*6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -153,7 +153,7 @@
     z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
     z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
@@ -161,7 +161,7 @@
     dataptr[DCTSIZE*1] = z11 + z4;
     dataptr[DCTSIZE*7] = z11 - z4;
 
-    dataptr++;			/* advance pointer to next column */
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
diff --git a/jfdctfst.c b/jfdctfst.c
index ccb378a..4936d47 100644
--- a/jfdctfst.c
+++ b/jfdctfst.c
@@ -33,7 +33,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_IFAST_SUPPORTED
 
@@ -76,10 +76,10 @@
  */
 
 #if CONST_BITS == 8
-#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
-#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
-#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
-#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#define FIX_0_382683433  ((INT32)   98)         /* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)         /* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)         /* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)         /* FIX(1.306562965) */
 #else
 #define FIX_0_382683433  FIX(0.382683433)
 #define FIX_0_541196100  FIX(0.541196100)
@@ -132,24 +132,24 @@
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = tmp10 + tmp11; /* phase 3 */
     dataptr[4] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
     dataptr[6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -159,15 +159,15 @@
     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
-    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[5] = z13 + z2;      /* phase 6 */
     dataptr[3] = z13 - z2;
     dataptr[1] = z11 + z4;
     dataptr[7] = z11 - z4;
 
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 
   /* Pass 2: process columns. */
@@ -182,24 +182,24 @@
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
     dataptr[DCTSIZE*4] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
     dataptr[DCTSIZE*6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -209,7 +209,7 @@
     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
@@ -217,7 +217,7 @@
     dataptr[DCTSIZE*1] = z11 + z4;
     dataptr[DCTSIZE*7] = z11 - z4;
 
-    dataptr++;			/* advance pointer to next column */
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
diff --git a/jfdctint.c b/jfdctint.c
index 0a78b64..14f486c 100644
--- a/jfdctint.c
+++ b/jfdctint.c
@@ -26,7 +26,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_ISLOW_SUPPORTED
 
@@ -79,7 +79,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -90,18 +90,18 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 #else
 #define FIX_0_298631336  FIX(0.298631336)
 #define FIX_0_390180644  FIX(0.390180644)
@@ -160,36 +160,36 @@
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
      */
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-				   CONST_BITS-PASS1_BITS);
+                                   CONST_BITS-PASS1_BITS);
     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-				   CONST_BITS-PASS1_BITS);
-    
+                                   CONST_BITS-PASS1_BITS);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
      * cK represents cos(K*pi/16).
      * i0..i3 in the paper are tmp4..tmp7 here.
      */
-    
+
     z1 = tmp4 + tmp7;
     z2 = tmp5 + tmp6;
     z3 = tmp4 + tmp6;
     z4 = tmp5 + tmp7;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -198,16 +198,16 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
-    
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 
   /* Pass 2: process columns.
@@ -225,36 +225,36 @@
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
      */
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-					   CONST_BITS+PASS1_BITS);
-    
+                                           CONST_BITS+PASS1_BITS);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
      * cK represents cos(K*pi/16).
      * i0..i3 in the paper are tmp4..tmp7 here.
      */
-    
+
     z1 = tmp4 + tmp7;
     z2 = tmp5 + tmp6;
     z3 = tmp4 + tmp6;
     z4 = tmp5 + tmp7;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -263,20 +263,20 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
-					   CONST_BITS+PASS1_BITS);
-    
-    dataptr++;			/* advance pointer to next column */
+                                           CONST_BITS+PASS1_BITS);
+
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
diff --git a/jidctflt.c b/jidctflt.c
index 0188ce3..c172ea1 100644
--- a/jidctflt.c
+++ b/jidctflt.c
@@ -39,7 +39,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_FLOAT_SUPPORTED
 
@@ -66,8 +66,8 @@
 
 GLOBAL(void)
 jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
@@ -95,14 +95,14 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+        inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
       FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
@@ -111,13 +111,13 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
+
+      inptr++;                  /* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
+
     /* Even part */
 
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
@@ -125,17 +125,17 @@
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
 
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp10 = tmp0 + tmp2;        /* phase 3 */
     tmp11 = tmp0 - tmp2;
 
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp13 = tmp1 + tmp3;        /* phases 5-3 */
     tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
 
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp0 = tmp10 + tmp13;       /* phase 2 */
     tmp3 = tmp10 - tmp13;
     tmp1 = tmp11 + tmp12;
     tmp2 = tmp11 - tmp12;
-    
+
     /* Odd part */
 
     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
@@ -143,19 +143,19 @@
     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
 
-    z13 = tmp6 + tmp5;		/* phase 6 */
+    z13 = tmp6 + tmp5;          /* phase 6 */
     z10 = tmp6 - tmp5;
     z11 = tmp4 + tmp7;
     z12 = tmp4 - tmp7;
 
-    tmp7 = z11 + z13;		/* phase 5 */
+    tmp7 = z11 + z13;           /* phase 5 */
     tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
 
     z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
     tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
     tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
 
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
     tmp5 = tmp11 - tmp6;
     tmp4 = tmp10 + tmp5;
 
@@ -168,11 +168,11 @@
     wsptr[DCTSIZE*4] = tmp3 + tmp4;
     wsptr[DCTSIZE*3] = tmp3 - tmp4;
 
-    inptr++;			/* advance pointers to next column */
+    inptr++;                    /* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
-  
+
   /* Pass 2: process rows from work array, store into output array. */
   /* Note that we must descale the results by a factor of 8 == 2**3. */
 
@@ -184,7 +184,7 @@
      * the simplification applies less often (typically 5% to 10% of the time).
      * And testing floats for zero is relatively expensive, so we don't bother.
      */
-    
+
     /* Even part */
 
     tmp10 = wsptr[0] + wsptr[4];
@@ -219,23 +219,23 @@
     /* Final output stage: scale down by a factor of 8 and range-limit */
 
     outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+                            & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
diff --git a/jidctfst.c b/jidctfst.c
index dba4216..cae22b9 100644
--- a/jidctfst.c
+++ b/jidctfst.c
@@ -35,7 +35,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_IFAST_SUPPORTED
 
@@ -78,7 +78,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  8
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -89,10 +89,10 @@
  */
 
 #if CONST_BITS == 8
-#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
-#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
-#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
-#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#define FIX_1_082392200  ((INT32)  277)         /* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)         /* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)         /* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)         /* FIX(2.613125930) */
 #else
 #define FIX_1_082392200  FIX(1.082392200)
 #define FIX_1_414213562  FIX(1.414213562)
@@ -129,7 +129,7 @@
 #define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
 #else
 #define DEQUANTIZE(coef,quantval)  \
-	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+        DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
 #endif
 
 
@@ -138,11 +138,11 @@
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#define ISHIFT_TEMPS    DCTELEM ishift_temp;
 #if BITS_IN_JSAMPLE == 8
-#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#define DCTELEMBITS  16         /* DCTELEM may be 16 or 32 bits */
 #else
-#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#define DCTELEMBITS  32         /* DCTELEM must be 32 bits */
 #endif
 #define IRIGHT_SHIFT(x,shft)  \
     ((ishift_temp = (x)) < 0 ? \
@@ -150,7 +150,7 @@
      (ishift_temp >> (shft)))
 #else
 #define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#define IRIGHT_SHIFT(x,shft)    ((x) >> (shft))
 #endif
 
 #ifdef USE_ACCURATE_ROUNDING
@@ -166,8 +166,8 @@
 
 GLOBAL(void)
 jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   DCTELEM tmp10, tmp11, tmp12, tmp13;
@@ -178,9 +178,9 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS			/* for DESCALE */
-  ISHIFT_TEMPS			/* for IDESCALE */
+  int workspace[DCTSIZE2];      /* buffers data between passes */
+  SHIFT_TEMPS                   /* for DESCALE */
+  ISHIFT_TEMPS                  /* for IDESCALE */
 
   /* Pass 1: process columns from input, store into work array. */
 
@@ -196,11 +196,11 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+        inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
       int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
 
@@ -212,13 +212,13 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
+
+      inptr++;                  /* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
+
     /* Even part */
 
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
@@ -226,17 +226,17 @@
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
 
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp10 = tmp0 + tmp2;        /* phase 3 */
     tmp11 = tmp0 - tmp2;
 
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp13 = tmp1 + tmp3;        /* phases 5-3 */
     tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
 
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp0 = tmp10 + tmp13;       /* phase 2 */
     tmp3 = tmp10 - tmp13;
     tmp1 = tmp11 + tmp12;
     tmp2 = tmp11 - tmp12;
-    
+
     /* Odd part */
 
     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
@@ -244,19 +244,19 @@
     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
 
-    z13 = tmp6 + tmp5;		/* phase 6 */
+    z13 = tmp6 + tmp5;          /* phase 6 */
     z10 = tmp6 - tmp5;
     z11 = tmp4 + tmp7;
     z12 = tmp4 - tmp7;
 
-    tmp7 = z11 + z13;		/* phase 5 */
+    tmp7 = z11 + z13;           /* phase 5 */
     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
 
     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
 
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
     tmp5 = tmp11 - tmp6;
     tmp4 = tmp10 + tmp5;
 
@@ -269,11 +269,11 @@
     wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
     wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
 
-    inptr++;			/* advance pointers to next column */
+    inptr++;                    /* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
-  
+
   /* Pass 2: process rows from work array, store into output array. */
   /* Note that we must descale the results by a factor of 8 == 2**3, */
   /* and also undo the PASS1_BITS scaling. */
@@ -288,14 +288,14 @@
      * test takes more time than it's worth.  In that case this section
      * may be commented out.
      */
-    
+
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
       outptr[2] = dcval;
@@ -305,11 +305,11 @@
       outptr[6] = dcval;
       outptr[7] = dcval;
 
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part */
 
     tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
@@ -317,7 +317,7 @@
 
     tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
     tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
-	    - tmp13;
+            - tmp13;
 
     tmp0 = tmp10 + tmp13;
     tmp3 = tmp10 - tmp13;
@@ -331,37 +331,37 @@
     z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
     z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
 
-    tmp7 = z11 + z13;		/* phase 5 */
+    tmp7 = z11 + z13;           /* phase 5 */
     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
 
     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
 
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
     tmp5 = tmp11 - tmp6;
     tmp4 = tmp10 + tmp5;
 
     /* Final output stage: scale down by a factor of 8 and range-limit */
 
     outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
 
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
diff --git a/jidctint.c b/jidctint.c
index a72b320..688fd22 100644
--- a/jidctint.c
+++ b/jidctint.c
@@ -2,6 +2,7 @@
  * jidctint.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modification developed 2002-2009 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -23,12 +24,33 @@
  * The advantage of this method is that no data path contains more than one
  * multiplication; this allows a very simple and accurate implementation in
  * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide IDCT routines with various output sample block sizes for
+ * direct resolution reduction or enlargement without additional resampling:
+ * NxN (N=1...16) pixels for one 8x8 input DCT block.
+ *
+ * For N<8 we simply take the corresponding low-frequency coefficients of
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
+ * to yield the downscaled outputs.
+ * This can be seen as direct low-pass downsampling from the DCT domain
+ * point of view rather than the usual spatial domain point of view,
+ * yielding significant computational savings and results at least
+ * as good as common bilinear (averaging) spatial downsampling.
+ *
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
+ * lower frequencies and higher frequencies assumed to be zero.
+ * It turns out that the computational effort is similar to the 8x8 IDCT
+ * regarding the output size.
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
  */
 
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_ISLOW_SUPPORTED
 
@@ -38,7 +60,7 @@
  */
 
 #if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
 #endif
 
 
@@ -79,7 +101,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -90,18 +112,18 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 #else
 #define FIX_0_298631336  FIX(0.298631336)
 #define FIX_0_390180644  FIX(0.390180644)
@@ -146,8 +168,8 @@
 
 GLOBAL(void)
 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   INT32 tmp0, tmp1, tmp2, tmp3;
   INT32 tmp10, tmp11, tmp12, tmp13;
@@ -158,7 +180,7 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
+  int workspace[DCTSIZE2];      /* buffers data between passes */
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array. */
@@ -177,14 +199,14 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+        inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
@@ -193,49 +215,49 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
+
+      inptr++;                  /* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
+
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
-    
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
+
     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
 
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     z1 = tmp0 + tmp3;
     z2 = tmp1 + tmp2;
     z3 = tmp0 + tmp2;
     z4 = tmp1 + tmp3;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -244,17 +266,17 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     tmp0 += z1 + z3;
     tmp1 += z2 + z4;
     tmp2 += z2 + z3;
     tmp3 += z1 + z4;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
@@ -263,12 +285,12 @@
     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
-    inptr++;			/* advance pointers to next column */
+
+    inptr++;                    /* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
-  
+
   /* Pass 2: process rows from work array, store into output array. */
   /* Note that we must descale the results by a factor of 8 == 2**3, */
   /* and also undo the PASS1_BITS scaling. */
@@ -283,14 +305,14 @@
      * test takes more time than it's worth.  In that case this section
      * may be commented out.
      */
-    
+
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
       outptr[2] = dcval;
@@ -300,44 +322,44 @@
       outptr[6] = dcval;
       outptr[7] = dcval;
 
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
-    
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
-    
+
     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    
+
     tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
     tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = (INT32) wsptr[7];
     tmp1 = (INT32) wsptr[5];
     tmp2 = (INT32) wsptr[3];
     tmp3 = (INT32) wsptr[1];
-    
+
     z1 = tmp0 + tmp3;
     z2 = tmp1 + tmp2;
     z3 = tmp0 + tmp2;
     z4 = tmp1 + tmp3;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -346,44 +368,2256 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     tmp0 += z1 + z3;
     tmp1 += z2 + z4;
     tmp2 += z2 + z3;
     tmp3 += z1 + z4;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x7 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*7];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp13 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp13 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 7;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x6 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*6];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) (tmp11 + tmp1);
+    wsptr[6*4] = (int) (tmp11 - tmp1);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 6;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 5x5 output block.
+ *
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*5];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp12 <<= CONST_BITS;
+    tmp0 = (INT32) wsptr[2];
+    tmp1 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 5;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x3 output block.
+ *
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*3];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 3 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 3;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 9x9 output block.
+ *
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
+
+GLOBAL(void)
+jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*9];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 9 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 9; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x10 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*10];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+                        CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) (tmp22 + tmp12);
+    wsptr[8*7] = (int) (tmp22 - tmp12);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 11x11 output block.
+ *
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
+
+GLOBAL(void)
+jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*11];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+            MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+             MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+             MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+             MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 11 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 11; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp10 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+            MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+             MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+             MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+             MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x12 output block.
+ *
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*12];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+             MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+             MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 13x13 output block.
+ *
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
+
+GLOBAL(void)
+jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*13];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+            MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+             MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 13 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 13; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+            MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+             MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x14 output block.
+ *
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*14];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+                        CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+            MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) (tmp23 + tmp13);
+    wsptr[8*10] = (int) (tmp23 - tmp13);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+            MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 15x15 output block.
+ *
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
+
+GLOBAL(void)
+jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*15];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 15 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 15; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[5];
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = (INT32) wsptr[7];
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x16 output block.
+ *
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 16 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
 #endif /* DCT_ISLOW_SUPPORTED */
diff --git a/jidctred.c b/jidctred.c
index 421f3c7..2b385f8 100644
--- a/jidctred.c
+++ b/jidctred.c
@@ -23,7 +23,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef IDCT_SCALING_SUPPORTED
 
@@ -44,7 +44,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -55,20 +55,20 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_211164243  ((INT32)  1730)	/* FIX(0.211164243) */
-#define FIX_0_509795579  ((INT32)  4176)	/* FIX(0.509795579) */
-#define FIX_0_601344887  ((INT32)  4926)	/* FIX(0.601344887) */
-#define FIX_0_720959822  ((INT32)  5906)	/* FIX(0.720959822) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_850430095  ((INT32)  6967)	/* FIX(0.850430095) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_061594337  ((INT32)  8697)	/* FIX(1.061594337) */
-#define FIX_1_272758580  ((INT32)  10426)	/* FIX(1.272758580) */
-#define FIX_1_451774981  ((INT32)  11893)	/* FIX(1.451774981) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_2_172734803  ((INT32)  17799)	/* FIX(2.172734803) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_624509785  ((INT32)  29692)	/* FIX(3.624509785) */
+#define FIX_0_211164243  ((INT32)  1730)        /* FIX(0.211164243) */
+#define FIX_0_509795579  ((INT32)  4176)        /* FIX(0.509795579) */
+#define FIX_0_601344887  ((INT32)  4926)        /* FIX(0.601344887) */
+#define FIX_0_720959822  ((INT32)  5906)        /* FIX(0.720959822) */
+#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
+#define FIX_0_850430095  ((INT32)  6967)        /* FIX(0.850430095) */
+#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
+#define FIX_1_061594337  ((INT32)  8697)        /* FIX(1.061594337) */
+#define FIX_1_272758580  ((INT32)  10426)       /* FIX(1.272758580) */
+#define FIX_1_451774981  ((INT32)  11893)       /* FIX(1.451774981) */
+#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
+#define FIX_2_172734803  ((INT32)  17799)       /* FIX(2.172734803) */
+#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
+#define FIX_3_624509785  ((INT32)  29692)       /* FIX(3.624509785) */
 #else
 #define FIX_0_211164243  FIX(0.211164243)
 #define FIX_0_509795579  FIX(0.509795579)
@@ -116,8 +116,8 @@
 
 GLOBAL(void)
 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   INT32 tmp0, tmp2, tmp10, tmp12;
   INT32 z1, z2, z3, z4;
@@ -127,7 +127,7 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE*4];	/* buffers data between passes */
+  int workspace[DCTSIZE*4];     /* buffers data between passes */
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array. */
@@ -140,57 +140,57 @@
     if (ctr == DCTSIZE-4)
       continue;
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 &&
-	inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 &&
+        inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero; we need not examine term 4 for 4x4 output */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
       wsptr[DCTSIZE*3] = dcval;
-      
+
       continue;
     }
-    
+
     /* Even part */
-    
+
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     tmp0 <<= (CONST_BITS+1);
-    
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
 
     tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);
-    
+
     tmp10 = tmp0 + tmp2;
     tmp12 = tmp0 - tmp2;
-    
+
     /* Odd part */
-    
+
     z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
-	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
-	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
-	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
-    
+         + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+         + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+         + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+
     tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
-	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
-	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
-	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+         + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+         + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+         + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
 
     /* Final output stage */
-    
+
     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1);
     wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1);
     wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1);
     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1);
   }
-  
+
   /* Pass 2: process 4 rows from work array, store into output array. */
 
   wsptr = workspace;
@@ -200,64 +200,64 @@
 
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
       outptr[2] = dcval;
       outptr[3] = dcval;
-      
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part */
-    
+
     tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1);
-    
+
     tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
-	 + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
-    
+         + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
+
     tmp10 = tmp0 + tmp2;
     tmp12 = tmp0 - tmp2;
-    
+
     /* Odd part */
-    
+
     z1 = (INT32) wsptr[7];
     z2 = (INT32) wsptr[5];
     z3 = (INT32) wsptr[3];
     z4 = (INT32) wsptr[1];
-    
+
     tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
-	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
-	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
-	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
-    
+         + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+         + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+         + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+
     tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
-	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
-	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
-	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+         + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+         + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+         + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
 
     /* Final output stage */
-    
+
     outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
     outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
     outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
     outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
@@ -269,8 +269,8 @@
 
 GLOBAL(void)
 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   INT32 tmp0, tmp10, z1;
   JCOEFPTR inptr;
@@ -279,7 +279,7 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE*2];	/* buffers data between passes */
+  int workspace[DCTSIZE*2];     /* buffers data between passes */
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array. */
@@ -292,21 +292,21 @@
     if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6)
       continue;
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
-      
+
       continue;
     }
-    
+
     /* Even part */
-    
+
     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     tmp10 = z1 << (CONST_BITS+2);
-    
+
     /* Odd part */
 
     z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
@@ -319,11 +319,11 @@
     tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
 
     /* Final output stage */
-    
+
     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2);
     wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2);
   }
-  
+
   /* Pass 2: process 2 rows from work array, store into output array. */
 
   wsptr = workspace;
@@ -335,37 +335,37 @@
     if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
-      
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part */
-    
+
     tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2);
-    
+
     /* Odd part */
 
     tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
-	 + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
-	 + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
-	 + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+         + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
+         + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
+         + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
 
     /* Final output stage */
-    
+
     outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
-					  CONST_BITS+PASS1_BITS+3+2)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+2)
+                            & RANGE_MASK];
     outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0,
-					  CONST_BITS+PASS1_BITS+3+2)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+                                          CONST_BITS+PASS1_BITS+3+2)
+                            & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
@@ -377,8 +377,8 @@
 
 GLOBAL(void)
 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   int dcval;
   ISLOW_MULT_TYPE * quantptr;
diff --git a/jinclude.h b/jinclude.h
index 0a4f151..b14a3fc 100644
--- a/jinclude.h
+++ b/jinclude.h
@@ -17,8 +17,8 @@
 
 /* Include auto-config file to find out which system include files we need. */
 
-#include "jconfig.h"		/* auto configuration options */
-#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+#include "jconfig.h"            /* auto configuration options */
+#define JCONFIG_INCLUDED        /* so that jpeglib.h doesn't do it again */
 
 /*
  * We need the NULL macro and size_t typedef.
@@ -58,14 +58,14 @@
 #ifdef NEED_BSD_STRINGS
 
 #include <strings.h>
-#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
-#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+#define MEMZERO(target,size)    bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)  bcopy((const void *)(src), (void *)(dest), (size_t)(size))
 
 #else /* not BSD, assume ANSI/SysV string lib */
 
 #include <string.h>
-#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
-#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+#define MEMZERO(target,size)    memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)  memcpy((void *)(dest), (const void *)(src), (size_t)(size))
 
 #endif
 
@@ -77,7 +77,7 @@
  * we always use this SIZEOF() macro in place of using sizeof() directly.
  */
 
-#define SIZEOF(object)	((size_t) sizeof(object))
+#define SIZEOF(object)  ((size_t) sizeof(object))
 
 /*
  * The modules that use fread() and fwrite() always invoke them through
diff --git a/jmemmgr.c b/jmemmgr.c
index cf32524..15b6325 100644
--- a/jmemmgr.c
+++ b/jmemmgr.c
@@ -25,13 +25,13 @@
  */
 
 #define JPEG_INTERNALS
-#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#define AM_MEMORY_MANAGER       /* we define jvirt_Xarray_control structs */
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
+#include "jmemsys.h"            /* import the system-dependent declarations */
 
 #ifndef NO_GETENV
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare getenv() */
 extern char * getenv JPP((const char * name));
 #endif
 #endif
@@ -78,7 +78,7 @@
  * such a compiler.
  */
 
-#ifndef ALIGN_SIZE		/* so can override from jconfig.h */
+#ifndef ALIGN_SIZE              /* so can override from jconfig.h */
 #ifndef WITH_SIMD
 #define ALIGN_SIZE  SIZEOF(double)
 #else
@@ -98,17 +98,17 @@
 typedef struct small_pool_struct * small_pool_ptr;
 
 typedef struct small_pool_struct {
-  small_pool_ptr next;	/* next in list of pools */
-  size_t bytes_used;		/* how many bytes already used within pool */
-  size_t bytes_left;		/* bytes still available in this pool */
+  small_pool_ptr next;  /* next in list of pools */
+  size_t bytes_used;            /* how many bytes already used within pool */
+  size_t bytes_left;            /* bytes still available in this pool */
 } small_pool_hdr;
 
 typedef struct large_pool_struct FAR * large_pool_ptr;
 
 typedef struct large_pool_struct {
-  large_pool_ptr next;	/* next in list of pools */
-  size_t bytes_used;		/* how many bytes already used within pool */
-  size_t bytes_left;		/* bytes still available in this pool */
+  large_pool_ptr next;  /* next in list of pools */
+  size_t bytes_used;            /* how many bytes already used within pool */
+  size_t bytes_left;            /* bytes still available in this pool */
 } large_pool_hdr;
 
 /*
@@ -116,7 +116,7 @@
  */
 
 typedef struct {
-  struct jpeg_memory_mgr pub;	/* public fields */
+  struct jpeg_memory_mgr pub;   /* public fields */
 
   /* Each pool identifier (lifetime class) names a linked list of pools. */
   small_pool_ptr small_list[JPOOL_NUMPOOLS];
@@ -136,7 +136,7 @@
   /* alloc_sarray and alloc_barray set this value for use by virtual
    * array routines.
    */
-  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+  JDIMENSION last_rowsperchunk; /* from most recent alloc_sarray/barray */
 } my_memory_mgr;
 
 typedef my_memory_mgr * my_mem_ptr;
@@ -150,39 +150,39 @@
  */
 
 struct jvirt_sarray_control {
-  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
+  JSAMPARRAY mem_buffer;        /* => the in-memory buffer */
+  JDIMENSION rows_in_array;     /* total virtual array height */
+  JDIMENSION samplesperrow;     /* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;         /* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;       /* height of memory buffer */
+  JDIMENSION rowsperchunk;      /* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;     /* first logical row # in the buffer */
+  JDIMENSION first_undef_row;   /* row # of first uninitialized row */
+  boolean pre_zero;             /* pre-zero mode requested? */
+  boolean dirty;                /* do current buffer contents need written? */
+  boolean b_s_open;             /* is backing-store data valid? */
+  jvirt_sarray_ptr next;        /* link to next virtual sarray control block */
+  backing_store_info b_s_info;  /* System-dependent control info */
 };
 
 struct jvirt_barray_control {
-  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_barray_ptr next;	/* link to next virtual barray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
+  JBLOCKARRAY mem_buffer;       /* => the in-memory buffer */
+  JDIMENSION rows_in_array;     /* total virtual array height */
+  JDIMENSION blocksperrow;      /* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;         /* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;       /* height of memory buffer */
+  JDIMENSION rowsperchunk;      /* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;     /* first logical row # in the buffer */
+  JDIMENSION first_undef_row;   /* row # of first uninitialized row */
+  boolean pre_zero;             /* pre-zero mode requested? */
+  boolean dirty;                /* do current buffer contents need written? */
+  boolean b_s_open;             /* is backing-store data valid? */
+  jvirt_barray_ptr next;        /* link to next virtual barray control block */
+  backing_store_info b_s_info;  /* System-dependent control info */
 };
 
 
-#ifdef MEM_STATS		/* optional extra stuff for statistics */
+#ifdef MEM_STATS                /* optional extra stuff for statistics */
 
 LOCAL(void)
 print_mem_stats (j_common_ptr cinfo, int pool_id)
@@ -196,19 +196,19 @@
    * This is helpful because message parm array can't handle longs.
    */
   fprintf(stderr, "Freeing pool %d, total space = %ld\n",
-	  pool_id, mem->total_space_allocated);
+          pool_id, mem->total_space_allocated);
 
   for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
        lhdr_ptr = lhdr_ptr->next) {
     fprintf(stderr, "  Large chunk used %ld\n",
-	    (long) lhdr_ptr->bytes_used);
+            (long) lhdr_ptr->bytes_used);
   }
 
   for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
        shdr_ptr = shdr_ptr->next) {
     fprintf(stderr, "  Small chunk used %ld free %ld\n",
-	    (long) shdr_ptr->bytes_used,
-	    (long) shdr_ptr->bytes_left);
+            (long) shdr_ptr->bytes_used,
+            (long) shdr_ptr->bytes_left);
   }
 }
 
@@ -221,7 +221,7 @@
 /* If we compiled MEM_STATS support, report alloc requests before dying */
 {
 #ifdef MEM_STATS
-  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+  cinfo->err->trace_level = 2;  /* force self_destruct to report stats */
 #endif
   ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
 }
@@ -244,19 +244,19 @@
  * adjustment.
  */
 
-static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] =
 {
-	1600,			/* first PERMANENT pool */
-	16000			/* first IMAGE pool */
+        1600,                   /* first PERMANENT pool */
+        16000                   /* first IMAGE pool */
 };
 
-static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] =
 {
-	0,			/* additional PERMANENT pools */
-	5000			/* additional IMAGE pools */
+        0,                      /* additional PERMANENT pools */
+        5000                    /* additional IMAGE pools */
 };
 
-#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+#define MIN_SLOP  50            /* greater than 0 to avoid futile looping */
 
 
 METHODDEF(void *)
@@ -278,16 +278,16 @@
 
   /* Check for unsatisfiable request (do now to ensure no overflow below) */
   if ((SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
-    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
+    out_of_memory(cinfo, 1);    /* request exceeds malloc's ability */
 
   /* See if space is available in any existing pool */
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
   prev_hdr_ptr = NULL;
   hdr_ptr = mem->small_list[pool_id];
   while (hdr_ptr != NULL) {
     if (hdr_ptr->bytes_left >= sizeofobject)
-      break;			/* found pool with enough space */
+      break;                    /* found pool with enough space */
     prev_hdr_ptr = hdr_ptr;
     hdr_ptr = hdr_ptr->next;
   }
@@ -296,7 +296,7 @@
   if (hdr_ptr == NULL) {
     /* min_request is what we need now, slop is what will be leftover */
     min_request = SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1;
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+    if (prev_hdr_ptr == NULL)   /* first pool in class? */
       slop = first_pool_slop[pool_id];
     else
       slop = extra_pool_slop[pool_id];
@@ -307,17 +307,17 @@
     for (;;) {
       hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
       if (hdr_ptr != NULL)
-	break;
+        break;
       slop /= 2;
-      if (slop < MIN_SLOP)	/* give up when it gets real small */
-	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+      if (slop < MIN_SLOP)      /* give up when it gets real small */
+        out_of_memory(cinfo, 2); /* jpeg_get_small failed */
     }
     mem->total_space_allocated += min_request + slop;
     /* Success, initialize the new pool header and add to end of list */
     hdr_ptr->next = NULL;
     hdr_ptr->bytes_used = 0;
     hdr_ptr->bytes_left = sizeofobject + slop;
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+    if (prev_hdr_ptr == NULL)   /* first pool in class? */
       mem->small_list[pool_id] = hdr_ptr;
     else
       prev_hdr_ptr->next = hdr_ptr;
@@ -367,17 +367,17 @@
 
   /* Check for unsatisfiable request (do now to ensure no overflow below) */
   if ((SIZEOF(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
-    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
+    out_of_memory(cinfo, 3);    /* request exceeds malloc's ability */
 
   /* Always make a new pool */
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
   hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
-					    SIZEOF(large_pool_hdr) +
-					    ALIGN_SIZE - 1);
+                                            SIZEOF(large_pool_hdr) +
+                                            ALIGN_SIZE - 1);
   if (hdr_ptr == NULL)
-    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
+    out_of_memory(cinfo, 4);    /* jpeg_get_large failed */
   mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr) + ALIGN_SIZE - 1;
 
   /* Success, initialize the new pool header and add to list */
@@ -417,7 +417,7 @@
 
 METHODDEF(JSAMPARRAY)
 alloc_sarray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION samplesperrow, JDIMENSION numrows)
+              JDIMENSION samplesperrow, JDIMENSION numrows)
 /* Allocate a 2-D sample array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -428,12 +428,12 @@
 
   /* Make sure each row is properly aligned */
   if ((ALIGN_SIZE % SIZEOF(JSAMPLE)) != 0)
-    out_of_memory(cinfo, 5);	/* safety check */
+    out_of_memory(cinfo, 5);    /* safety check */
   samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE));
 
   /* Calculate max # of rows allowed in one allocation chunk */
   ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
-	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+          ((long) samplesperrow * SIZEOF(JSAMPLE));
   if (ltemp <= 0)
     ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
   if (ltemp < (long) numrows)
@@ -444,15 +444,15 @@
 
   /* Get space for row pointers (small object) */
   result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
-				    (size_t) (numrows * SIZEOF(JSAMPROW)));
+                                    (size_t) (numrows * SIZEOF(JSAMPROW)));
 
   /* Get the rows themselves (large objects) */
   currow = 0;
   while (currow < numrows) {
     rowsperchunk = MIN(rowsperchunk, numrows - currow);
     workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
-	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
-		  * SIZEOF(JSAMPLE)));
+        (size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
+                  * SIZEOF(JSAMPLE)));
     for (i = rowsperchunk; i > 0; i--) {
       result[currow++] = workspace;
       workspace += samplesperrow;
@@ -470,7 +470,7 @@
 
 METHODDEF(JBLOCKARRAY)
 alloc_barray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION blocksperrow, JDIMENSION numrows)
+              JDIMENSION blocksperrow, JDIMENSION numrows)
 /* Allocate a 2-D coefficient-block array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -481,11 +481,11 @@
 
   /* Make sure each row is properly aligned */
   if ((SIZEOF(JBLOCK) % ALIGN_SIZE) != 0)
-    out_of_memory(cinfo, 6);	/* safety check */
+    out_of_memory(cinfo, 6);    /* safety check */
 
   /* Calculate max # of rows allowed in one allocation chunk */
   ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
-	  ((long) blocksperrow * SIZEOF(JBLOCK));
+          ((long) blocksperrow * SIZEOF(JBLOCK));
   if (ltemp <= 0)
     ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
   if (ltemp < (long) numrows)
@@ -496,15 +496,15 @@
 
   /* Get space for row pointers (small object) */
   result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
-				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
+                                     (size_t) (numrows * SIZEOF(JBLOCKROW)));
 
   /* Get the rows themselves (large objects) */
   currow = 0;
   while (currow < numrows) {
     rowsperchunk = MIN(rowsperchunk, numrows - currow);
     workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
-	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
-		  * SIZEOF(JBLOCK)));
+        (size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
+                  * SIZEOF(JBLOCK)));
     for (i = rowsperchunk; i > 0; i--) {
       result[currow++] = workspace;
       workspace += blocksperrow;
@@ -554,8 +554,8 @@
 
 METHODDEF(jvirt_sarray_ptr)
 request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION samplesperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
+                     JDIMENSION samplesperrow, JDIMENSION numrows,
+                     JDIMENSION maxaccess)
 /* Request a virtual 2-D sample array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -563,18 +563,18 @@
 
   /* Only IMAGE-lifetime virtual arrays are currently supported */
   if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
   /* get control block */
   result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_sarray_control));
+                                          SIZEOF(struct jvirt_sarray_control));
 
-  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->mem_buffer = NULL;    /* marks array not yet realized */
   result->rows_in_array = numrows;
   result->samplesperrow = samplesperrow;
   result->maxaccess = maxaccess;
   result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->b_s_open = FALSE;     /* no associated backing-store object */
   result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
   mem->virt_sarray_list = result;
 
@@ -584,8 +584,8 @@
 
 METHODDEF(jvirt_barray_ptr)
 request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION blocksperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
+                     JDIMENSION blocksperrow, JDIMENSION numrows,
+                     JDIMENSION maxaccess)
 /* Request a virtual 2-D coefficient-block array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -593,18 +593,18 @@
 
   /* Only IMAGE-lifetime virtual arrays are currently supported */
   if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
   /* get control block */
   result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_barray_control));
+                                          SIZEOF(struct jvirt_barray_control));
 
-  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->mem_buffer = NULL;    /* marks array not yet realized */
   result->rows_in_array = numrows;
   result->blocksperrow = blocksperrow;
   result->maxaccess = maxaccess;
   result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->b_s_open = FALSE;     /* no associated backing-store object */
   result->next = mem->virt_barray_list; /* add to list of virtual arrays */
   mem->virt_barray_list = result;
 
@@ -631,26 +631,26 @@
   for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
     if (sptr->mem_buffer == NULL) { /* if not realized yet */
       space_per_minheight += (long) sptr->maxaccess *
-			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+                             (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
       maximum_space += (long) sptr->rows_in_array *
-		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+                       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
     }
   }
   for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
     if (bptr->mem_buffer == NULL) { /* if not realized yet */
       space_per_minheight += (long) bptr->maxaccess *
-			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+                             (long) bptr->blocksperrow * SIZEOF(JBLOCK);
       maximum_space += (long) bptr->rows_in_array *
-		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+                       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
     }
   }
 
   if (space_per_minheight <= 0)
-    return;			/* no unrealized arrays, no work */
+    return;                     /* no unrealized arrays, no work */
 
   /* Determine amount of memory to actually use; this is system-dependent. */
   avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
-				 mem->total_space_allocated);
+                                 mem->total_space_allocated);
 
   /* If the maximum space needed is available, make all the buffers full
    * height; otherwise parcel it out with the same number of minheights
@@ -673,19 +673,19 @@
     if (sptr->mem_buffer == NULL) { /* if not realized yet */
       minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
       if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	sptr->rows_in_mem = sptr->rows_in_array;
+        /* This buffer fits in memory */
+        sptr->rows_in_mem = sptr->rows_in_array;
       } else {
-	/* It doesn't fit in memory, create backing store. */
-	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
-				(long) sptr->rows_in_array *
-				(long) sptr->samplesperrow *
-				(long) SIZEOF(JSAMPLE));
-	sptr->b_s_open = TRUE;
+        /* It doesn't fit in memory, create backing store. */
+        sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+        jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+                                (long) sptr->rows_in_array *
+                                (long) sptr->samplesperrow *
+                                (long) SIZEOF(JSAMPLE));
+        sptr->b_s_open = TRUE;
       }
       sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
-				      sptr->samplesperrow, sptr->rows_in_mem);
+                                      sptr->samplesperrow, sptr->rows_in_mem);
       sptr->rowsperchunk = mem->last_rowsperchunk;
       sptr->cur_start_row = 0;
       sptr->first_undef_row = 0;
@@ -697,19 +697,19 @@
     if (bptr->mem_buffer == NULL) { /* if not realized yet */
       minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
       if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	bptr->rows_in_mem = bptr->rows_in_array;
+        /* This buffer fits in memory */
+        bptr->rows_in_mem = bptr->rows_in_array;
       } else {
-	/* It doesn't fit in memory, create backing store. */
-	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
-				(long) bptr->rows_in_array *
-				(long) bptr->blocksperrow *
-				(long) SIZEOF(JBLOCK));
-	bptr->b_s_open = TRUE;
+        /* It doesn't fit in memory, create backing store. */
+        bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+        jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+                                (long) bptr->rows_in_array *
+                                (long) bptr->blocksperrow *
+                                (long) SIZEOF(JBLOCK));
+        bptr->b_s_open = TRUE;
       }
       bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
-				      bptr->blocksperrow, bptr->rows_in_mem);
+                                      bptr->blocksperrow, bptr->rows_in_mem);
       bptr->rowsperchunk = mem->last_rowsperchunk;
       bptr->cur_start_row = 0;
       bptr->first_undef_row = 0;
@@ -736,17 +736,17 @@
     rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
     /* Transfer no more than fits in file */
     rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
+    if (rows <= 0)              /* this chunk might be past end of file! */
       break;
     byte_count = rows * bytesperrow;
     if (writing)
       (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
+                                            (void FAR *) ptr->mem_buffer[i],
+                                            file_offset, byte_count);
     else
       (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
+                                           (void FAR *) ptr->mem_buffer[i],
+                                           file_offset, byte_count);
     file_offset += byte_count;
   }
 }
@@ -769,17 +769,17 @@
     rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
     /* Transfer no more than fits in file */
     rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
+    if (rows <= 0)              /* this chunk might be past end of file! */
       break;
     byte_count = rows * bytesperrow;
     if (writing)
       (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
+                                            (void FAR *) ptr->mem_buffer[i],
+                                            file_offset, byte_count);
     else
       (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
+                                           (void FAR *) ptr->mem_buffer[i],
+                                           file_offset, byte_count);
     file_offset += byte_count;
   }
 }
@@ -787,8 +787,8 @@
 
 METHODDEF(JSAMPARRAY)
 access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
+                    JDIMENSION start_row, JDIMENSION num_rows,
+                    boolean writable)
 /* Access the part of a virtual sample array starting at start_row */
 /* and extending for num_rows rows.  writable is true if  */
 /* caller intends to modify the accessed area. */
@@ -826,7 +826,7 @@
 
       ltemp = (long) end_row - (long) ptr->rows_in_mem;
       if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
+        ltemp = 0;              /* don't fall off front end of file */
       ptr->cur_start_row = (JDIMENSION) ltemp;
     }
     /* Read in the selected part of the array.
@@ -841,9 +841,9 @@
    */
   if (ptr->first_undef_row < end_row) {
     if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
+      if (writable)             /* writer skipped over a section of array */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;    /* but reader is allowed to read ahead */
     } else {
       undef_row = ptr->first_undef_row;
     }
@@ -854,12 +854,12 @@
       undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
       end_row -= ptr->cur_start_row;
       while (undef_row < end_row) {
-	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
+        jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+        undef_row++;
       }
     } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      if (! writable)           /* reader looking at undefined data */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
     }
   }
   /* Flag the buffer dirty if caller will write in it */
@@ -872,8 +872,8 @@
 
 METHODDEF(JBLOCKARRAY)
 access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
+                    JDIMENSION start_row, JDIMENSION num_rows,
+                    boolean writable)
 /* Access the part of a virtual block array starting at start_row */
 /* and extending for num_rows rows.  writable is true if  */
 /* caller intends to modify the accessed area. */
@@ -911,7 +911,7 @@
 
       ltemp = (long) end_row - (long) ptr->rows_in_mem;
       if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
+        ltemp = 0;              /* don't fall off front end of file */
       ptr->cur_start_row = (JDIMENSION) ltemp;
     }
     /* Read in the selected part of the array.
@@ -926,9 +926,9 @@
    */
   if (ptr->first_undef_row < end_row) {
     if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
+      if (writable)             /* writer skipped over a section of array */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;    /* but reader is allowed to read ahead */
     } else {
       undef_row = ptr->first_undef_row;
     }
@@ -939,12 +939,12 @@
       undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
       end_row -= ptr->cur_start_row;
       while (undef_row < end_row) {
-	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
+        jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+        undef_row++;
       }
     } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      if (! writable)           /* reader looking at undefined data */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
     }
   }
   /* Flag the buffer dirty if caller will write in it */
@@ -968,7 +968,7 @@
   size_t space_freed;
 
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
 #ifdef MEM_STATS
   if (cinfo->err->trace_level > 1)
@@ -981,16 +981,16 @@
     jvirt_barray_ptr bptr;
 
     for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-      if (sptr->b_s_open) {	/* there may be no backing store */
-	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      if (sptr->b_s_open) {     /* there may be no backing store */
+        sptr->b_s_open = FALSE; /* prevent recursive close if error */
+        (*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
       }
     }
     mem->virt_sarray_list = NULL;
     for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-      if (bptr->b_s_open) {	/* there may be no backing store */
-	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      if (bptr->b_s_open) {     /* there may be no backing store */
+        bptr->b_s_open = FALSE; /* prevent recursive close if error */
+        (*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
       }
     }
     mem->virt_barray_list = NULL;
@@ -1003,8 +1003,8 @@
   while (lhdr_ptr != NULL) {
     large_pool_ptr next_lhdr_ptr = lhdr_ptr->next;
     space_freed = lhdr_ptr->bytes_used +
-		  lhdr_ptr->bytes_left +
-		  SIZEOF(large_pool_hdr);
+                  lhdr_ptr->bytes_left +
+                  SIZEOF(large_pool_hdr);
     jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
     lhdr_ptr = next_lhdr_ptr;
@@ -1017,8 +1017,8 @@
   while (shdr_ptr != NULL) {
     small_pool_ptr next_shdr_ptr = shdr_ptr->next;
     space_freed = shdr_ptr->bytes_used +
-		  shdr_ptr->bytes_left +
-		  SIZEOF(small_pool_hdr);
+                  shdr_ptr->bytes_left +
+                  SIZEOF(small_pool_hdr);
     jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
     shdr_ptr = next_shdr_ptr;
@@ -1046,9 +1046,9 @@
 
   /* Release the memory manager control block too. */
   jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
-  cinfo->mem = NULL;		/* ensures I will be called only once */
+  cinfo->mem = NULL;            /* ensures I will be called only once */
 
-  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+  jpeg_mem_term(cinfo);         /* system-dependent cleanup */
 }
 
 
@@ -1065,7 +1065,7 @@
   int pool;
   size_t test_mac;
 
-  cinfo->mem = NULL;		/* for safety if init fails */
+  cinfo->mem = NULL;            /* for safety if init fails */
 
   /* Check for configuration errors.
    * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
@@ -1092,7 +1092,7 @@
   mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
 
   if (mem == NULL) {
-    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    jpeg_mem_term(cinfo);       /* system-dependent cleanup */
     ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
   }
 
@@ -1140,9 +1140,9 @@
       char ch = 'x';
 
       if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
-	if (ch == 'm' || ch == 'M')
-	  max_to_use *= 1000L;
-	mem->pub.max_memory_to_use = max_to_use * 1000L;
+        if (ch == 'm' || ch == 'M')
+          max_to_use *= 1000L;
+        mem->pub.max_memory_to_use = max_to_use * 1000L;
       }
     }
   }
diff --git a/jmemnobs.c b/jmemnobs.c
index 34b1895..2e4de09 100644
--- a/jmemnobs.c
+++ b/jmemnobs.c
@@ -18,9 +18,9 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
+#include "jmemsys.h"            /* import the system-dependent declarations */
 
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
 extern void * malloc JPP((size_t size));
 extern void free JPP((void *ptr));
 #endif
@@ -71,7 +71,7 @@
 
 GLOBAL(size_t)
 jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed,
-		    size_t max_bytes_needed, size_t already_allocated)
+                    size_t max_bytes_needed, size_t already_allocated)
 {
   return max_bytes_needed;
 }
@@ -85,7 +85,7 @@
 
 GLOBAL(void)
 jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
+                         long total_bytes_needed)
 {
   ERREXIT(cinfo, JERR_NO_BACKING_STORE);
 }
@@ -99,7 +99,7 @@
 GLOBAL(long)
 jpeg_mem_init (j_common_ptr cinfo)
 {
-  return 0;			/* just set max_memory_to_use to 0 */
+  return 0;                     /* just set max_memory_to_use to 0 */
 }
 
 GLOBAL(void)
diff --git a/jmemsys.h b/jmemsys.h
index b190945..5b6b7c4 100644
--- a/jmemsys.h
+++ b/jmemsys.h
@@ -22,14 +22,14 @@
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_get_small		jGetSmall
-#define jpeg_free_small		jFreeSmall
-#define jpeg_get_large		jGetLarge
-#define jpeg_free_large		jFreeLarge
-#define jpeg_mem_available	jMemAvail
-#define jpeg_open_backing_store	jOpenBackStore
-#define jpeg_mem_init		jMemInit
-#define jpeg_mem_term		jMemTerm
+#define jpeg_get_small          jGetSmall
+#define jpeg_free_small         jFreeSmall
+#define jpeg_get_large          jGetLarge
+#define jpeg_free_large         jFreeLarge
+#define jpeg_mem_available      jMemAvail
+#define jpeg_open_backing_store jOpenBackStore
+#define jpeg_mem_init           jMemInit
+#define jpeg_mem_term           jMemTerm
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 
@@ -46,7 +46,7 @@
 
 EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
 EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
-				  size_t sizeofobject));
+                                  size_t sizeofobject));
 
 /*
  * These two functions are used to allocate and release large chunks of
@@ -58,9 +58,9 @@
  */
 
 EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
-				       size_t sizeofobject));
+                                       size_t sizeofobject));
 EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
-				  size_t sizeofobject));
+                                  size_t sizeofobject));
 
 /*
  * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
@@ -74,7 +74,7 @@
  * size_t and will be a multiple of sizeof(align_type).
  */
 
-#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#ifndef MAX_ALLOC_CHUNK         /* may be overridden in jconfig.h */
 #define MAX_ALLOC_CHUNK  1000000000L
 #endif
 
@@ -101,9 +101,9 @@
  */
 
 EXTERN(size_t) jpeg_mem_available JPP((j_common_ptr cinfo,
-				     size_t min_bytes_needed,
-				     size_t max_bytes_needed,
-				     size_t already_allocated));
+                                     size_t min_bytes_needed,
+                                     size_t max_bytes_needed,
+                                     size_t already_allocated));
 
 
 /*
@@ -113,23 +113,23 @@
  * are private to the system-dependent backing store routines.
  */
 
-#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+#define TEMP_NAME_LENGTH   64   /* max length of a temporary file's name */
 
 
-#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+#ifdef USE_MSDOS_MEMMGR         /* DOS-specific junk */
 
-typedef unsigned short XMSH;	/* type of extended-memory handles */
-typedef unsigned short EMSH;	/* type of expanded-memory handles */
+typedef unsigned short XMSH;    /* type of extended-memory handles */
+typedef unsigned short EMSH;    /* type of expanded-memory handles */
 
 typedef union {
-  short file_handle;		/* DOS file handle if it's a temp file */
-  XMSH xms_handle;		/* handle if it's a chunk of XMS */
-  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+  short file_handle;            /* DOS file handle if it's a temp file */
+  XMSH xms_handle;              /* handle if it's a chunk of XMS */
+  EMSH ems_handle;              /* handle if it's a chunk of EMS */
 } handle_union;
 
 #endif /* USE_MSDOS_MEMMGR */
 
-#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#ifdef USE_MAC_MEMMGR           /* Mac-specific junk */
 #include <Files.h>
 #endif /* USE_MAC_MEMMGR */
 
@@ -139,30 +139,30 @@
 typedef struct backing_store_struct {
   /* Methods for reading/writing/closing this backing-store object */
   JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
-				     backing_store_ptr info,
-				     void FAR * buffer_address,
-				     long file_offset, long byte_count));
+                                     backing_store_ptr info,
+                                     void FAR * buffer_address,
+                                     long file_offset, long byte_count));
   JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info,
-				      void FAR * buffer_address,
-				      long file_offset, long byte_count));
+                                      backing_store_ptr info,
+                                      void FAR * buffer_address,
+                                      long file_offset, long byte_count));
   JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info));
+                                      backing_store_ptr info));
 
   /* Private fields for system-dependent backing-store management */
 #ifdef USE_MSDOS_MEMMGR
   /* For the MS-DOS manager (jmemdos.c), we need: */
-  handle_union handle;		/* reference to backing-store storage object */
+  handle_union handle;          /* reference to backing-store storage object */
   char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
 #else
 #ifdef USE_MAC_MEMMGR
   /* For the Mac manager (jmemmac.c), we need: */
-  short temp_file;		/* file reference number to temp file */
-  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  short temp_file;              /* file reference number to temp file */
+  FSSpec tempSpec;              /* the FSSpec for the temp file */
   char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
 #else
   /* For a typical implementation with temp files, we need: */
-  FILE * temp_file;		/* stdio reference to temp file */
+  FILE * temp_file;             /* stdio reference to temp file */
   char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
 #endif
 #endif
@@ -178,8 +178,8 @@
  */
 
 EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
-					  backing_store_ptr info,
-					  long total_bytes_needed));
+                                          backing_store_ptr info,
+                                          long total_bytes_needed));
 
 
 /*
diff --git a/jmorecfg.h b/jmorecfg.h
index 55af056..235f507 100644
--- a/jmorecfg.h
+++ b/jmorecfg.h
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009, 2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -22,7 +22,7 @@
  * We do not support run-time selection of data precision, sorry.
  */
 
-#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
+#define BITS_IN_JSAMPLE  8      /* use 8 or 12 */
 
 
 /*
@@ -34,7 +34,7 @@
  * bytes of storage, whether actually used in an image or not.)
  */
 
-#define MAX_COMPONENTS  10	/* maximum number of image components */
+#define MAX_COMPONENTS  10      /* maximum number of image components */
 
 
 /*
@@ -72,8 +72,8 @@
 
 #endif /* HAVE_UNSIGNED_CHAR */
 
-#define MAXJSAMPLE	255
-#define CENTERJSAMPLE	128
+#define MAXJSAMPLE      255
+#define CENTERJSAMPLE   128
 
 #endif /* BITS_IN_JSAMPLE == 8 */
 
@@ -86,8 +86,8 @@
 typedef short JSAMPLE;
 #define GETJSAMPLE(value)  ((int) (value))
 
-#define MAXJSAMPLE	4095
-#define CENTERJSAMPLE	2048
+#define MAXJSAMPLE      4095
+#define CENTERJSAMPLE   2048
 
 #endif /* BITS_IN_JSAMPLE == 12 */
 
@@ -153,13 +153,13 @@
 
 /* INT16 must hold at least the values -32768..32767. */
 
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+#ifndef XMD_H                   /* X11/xmd.h correctly defines INT16 */
 typedef short INT16;
 #endif
 
 /* INT32 must hold at least signed 32-bit values. */
 
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+#ifndef XMD_H                   /* X11/xmd.h correctly defines INT32 */
 typedef long INT32;
 #endif
 
@@ -183,13 +183,13 @@
  */
 
 /* a function called through method pointers: */
-#define METHODDEF(type)		static type
+#define METHODDEF(type)         static type
 /* a function used only in its module: */
-#define LOCAL(type)		static type
+#define LOCAL(type)             static type
 /* a function referenced thru EXTERNs: */
-#define GLOBAL(type)		type
+#define GLOBAL(type)            type
 /* a reference to a GLOBAL function: */
-#define EXTERN(type)		extern type
+#define EXTERN(type)            extern type
 
 
 /* This macro is used to declare a "method", that is, a function pointer.
@@ -231,11 +231,11 @@
 #ifndef HAVE_BOOLEAN
 typedef int boolean;
 #endif
-#ifndef FALSE			/* in case these macros already exist */
-#define FALSE	0		/* values of boolean */
+#ifndef FALSE                   /* in case these macros already exist */
+#define FALSE   0               /* values of boolean */
 #endif
 #ifndef TRUE
-#define TRUE	1
+#define TRUE    1
 #endif
 
 
@@ -263,15 +263,15 @@
 
 /* Capability options common to encoder and decoder: */
 
-#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+#define DCT_ISLOW_SUPPORTED     /* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED     /* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED     /* floating-point: accurate, fast on fast HW */
 
 /* Encoder capability options: */
 
 #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+#define C_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+#define ENTROPY_OPT_SUPPORTED       /* Optimization of entropy coding parms? */
 /* Note: if you selected 12-bit data precision, it is dangerous to turn off
  * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
  * precision, so jchuff.c normally uses entropy optimization to compute
@@ -285,37 +285,43 @@
 /* Decoder capability options: */
 
 #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define D_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+#define SAVE_MARKERS_SUPPORTED      /* jpeg_save_markers() needed? */
 #define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
-#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#define IDCT_SCALING_SUPPORTED      /* Output rescaling via IDCT? */
 #undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
 #define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
-#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
-#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+#define QUANT_1PASS_SUPPORTED       /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED       /* 2-pass color quantization? */
 
 /* more capability options later, no doubt */
 
 
 /*
- * Ordering of RGB data in scanlines passed to or from the application.
- * If your application wants to deal with data in the order B,G,R, just
- * change these macros.  You can also deal with formats such as R,G,B,X
- * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
- * the offsets will also change the order in which colormap data is organized.
- * RESTRICTIONS:
- * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
- * 2. These macros only affect RGB<=>YCbCr color conversion, so they are not
- *    useful if you are using JPEG color spaces other than YCbCr or grayscale.
- * 3. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
- *    is not 3 (they don't understand about dummy color components!).  So you
- *    can't use color quantization if you change that value.
+ * The RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros are a vestigial
+ * feature of libjpeg.  The idea was that, if an application developer needed
+ * to compress from/decompress to a BGR/BGRX/RGBX/XBGR/XRGB buffer, they could
+ * change these macros, rebuild libjpeg, and link their application statically
+ * with it.  In reality, few people ever did this, because there were some
+ * severe restrictions involved (cjpeg and djpeg no longer worked properly,
+ * compressing/decompressing RGB JPEGs no longer worked properly, and the color
+ * quantizer wouldn't work with pixel sizes other than 3.)  Further, since all
+ * of the O/S-supplied versions of libjpeg were built with the default values
+ * of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications have
+ * come to regard these values as immutable.
+ *
+ * The libjpeg-turbo colorspace extensions provide a much cleaner way of
+ * compressing from/decompressing to buffers with arbitrary component orders
+ * and pixel sizes.  Thus, we do not support changing the values of RGB_RED,
+ * RGB_GREEN, RGB_BLUE, or RGB_PIXELSIZE.  In addition to the restrictions
+ * listed above, changing these values will also break the SIMD extensions and
+ * the regression tests.
  */
 
-#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
-#define RGB_GREEN	1	/* Offset of Green */
-#define RGB_BLUE	2	/* Offset of Blue */
-#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
+#define RGB_RED         0       /* Offset of Red in an RGB scanline element */
+#define RGB_GREEN       1       /* Offset of Green */
+#define RGB_BLUE        2       /* Offset of Blue */
+#define RGB_PIXELSIZE   3       /* JSAMPLEs per RGB scanline element */
 
 #define JPEG_NUMCS 16
 
@@ -382,7 +388,7 @@
 
 #ifndef MULTIPLIER
 #ifndef WITH_SIMD
-#define MULTIPLIER  int		/* type for fastest integer multiply */
+#define MULTIPLIER  int         /* type for fastest integer multiply */
 #else
 #define MULTIPLIER short  /* prefer 16-bit with SIMD for parellelism */
 #endif
diff --git a/jpegcomp.h b/jpegcomp.h
index 1b9e0a4..ed9eeab 100644
--- a/jpegcomp.h
+++ b/jpegcomp.h
@@ -11,6 +11,8 @@
 
 #if JPEG_LIB_VERSION >= 70
 #define _DCT_scaled_size DCT_h_scaled_size
+#define _DCT_h_scaled_size DCT_h_scaled_size
+#define _DCT_v_scaled_size DCT_v_scaled_size
 #define _min_DCT_scaled_size min_DCT_h_scaled_size
 #define _min_DCT_h_scaled_size min_DCT_h_scaled_size
 #define _min_DCT_v_scaled_size min_DCT_v_scaled_size
@@ -18,6 +20,8 @@
 #define _jpeg_height jpeg_height
 #else
 #define _DCT_scaled_size DCT_scaled_size
+#define _DCT_h_scaled_size DCT_scaled_size
+#define _DCT_v_scaled_size DCT_scaled_size
 #define _min_DCT_scaled_size min_DCT_scaled_size
 #define _min_DCT_h_scaled_size min_DCT_scaled_size
 #define _min_DCT_v_scaled_size min_DCT_scaled_size
diff --git a/jpegint.h b/jpegint.h
index 7871748..44a330d 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -14,30 +14,30 @@
 
 /* Declarations for both compression & decompression */
 
-typedef enum {			/* Operating modes for buffer controllers */
-	JBUF_PASS_THRU,		/* Plain stripwise operation */
-	/* Remaining modes require a full-image buffer to have been created */
-	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
-	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
-	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+typedef enum {            /* Operating modes for buffer controllers */
+  JBUF_PASS_THRU,         /* Plain stripwise operation */
+  /* Remaining modes require a full-image buffer to have been created */
+  JBUF_SAVE_SOURCE,       /* Run source subobject only, save output */
+  JBUF_CRANK_DEST,        /* Run dest subobject only, using saved data */
+  JBUF_SAVE_AND_PASS      /* Run both subobjects, save output */
 } J_BUF_MODE;
 
 /* Values of global_state field (jdapi.c has some dependencies on ordering!) */
-#define CSTATE_START	100	/* after create_compress */
-#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
-#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
-#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
-#define DSTATE_START	200	/* after create_decompress */
-#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
-#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
-#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
-#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
-#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
-#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
-#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
-#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
-#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
-#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+#define CSTATE_START    100     /* after create_compress */
+#define CSTATE_SCANNING 101     /* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK   102     /* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS  103     /* jpeg_write_coefficients done */
+#define DSTATE_START    200     /* after create_decompress */
+#define DSTATE_INHEADER 201     /* reading header markers, no SOS yet */
+#define DSTATE_READY    202     /* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD  203     /* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN  204     /* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING 205     /* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK   206     /* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE 207     /* expecting jpeg_start_output */
+#define DSTATE_BUFPOST  208     /* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS  209     /* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING 210     /* looking for EOI in jpeg_finish_decompress */
 
 
 /* Declarations for compression modules */
@@ -49,54 +49,54 @@
   JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
 
   /* State variables made visible to other modules */
-  boolean call_pass_startup;	/* True if pass_startup must be called */
-  boolean is_last_pass;		/* True during last pass */
+  boolean call_pass_startup;    /* True if pass_startup must be called */
+  boolean is_last_pass;         /* True during last pass */
 };
 
 /* Main buffer control (downsampled-data buffer) */
 struct jpeg_c_main_controller {
   JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
   JMETHOD(void, process_data, (j_compress_ptr cinfo,
-			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			       JDIMENSION in_rows_avail));
+                               JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                               JDIMENSION in_rows_avail));
 };
 
 /* Compression preprocessing (downsampling input buffer control) */
 struct jpeg_c_prep_controller {
   JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
   JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
-				   JSAMPARRAY input_buf,
-				   JDIMENSION *in_row_ctr,
-				   JDIMENSION in_rows_avail,
-				   JSAMPIMAGE output_buf,
-				   JDIMENSION *out_row_group_ctr,
-				   JDIMENSION out_row_groups_avail));
+                                   JSAMPARRAY input_buf,
+                                   JDIMENSION *in_row_ctr,
+                                   JDIMENSION in_rows_avail,
+                                   JSAMPIMAGE output_buf,
+                                   JDIMENSION *out_row_group_ctr,
+                                   JDIMENSION out_row_groups_avail));
 };
 
 /* Coefficient buffer control */
 struct jpeg_c_coef_controller {
   JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
   JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
-				   JSAMPIMAGE input_buf));
+                                   JSAMPIMAGE input_buf));
 };
 
 /* Colorspace conversion */
 struct jpeg_color_converter {
   JMETHOD(void, start_pass, (j_compress_ptr cinfo));
   JMETHOD(void, color_convert, (j_compress_ptr cinfo,
-				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-				JDIMENSION output_row, int num_rows));
+                                JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                                JDIMENSION output_row, int num_rows));
 };
 
 /* Downsampling */
 struct jpeg_downsampler {
   JMETHOD(void, start_pass, (j_compress_ptr cinfo));
   JMETHOD(void, downsample, (j_compress_ptr cinfo,
-			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-			     JSAMPIMAGE output_buf,
-			     JDIMENSION out_row_group_index));
+                             JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+                             JSAMPIMAGE output_buf,
+                             JDIMENSION out_row_group_index));
 
-  boolean need_context_rows;	/* TRUE if need rows above & below */
+  boolean need_context_rows;    /* TRUE if need rows above & below */
 };
 
 /* Forward DCT (also controls coefficient quantization) */
@@ -104,10 +104,10 @@
   JMETHOD(void, start_pass, (j_compress_ptr cinfo));
   /* perhaps this should be an array??? */
   JMETHOD(void, forward_DCT, (j_compress_ptr cinfo,
-			      jpeg_component_info * compptr,
-			      JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-			      JDIMENSION start_row, JDIMENSION start_col,
-			      JDIMENSION num_blocks));
+                              jpeg_component_info * compptr,
+                              JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                              JDIMENSION start_row, JDIMENSION start_col,
+                              JDIMENSION num_blocks));
 };
 
 /* Entropy encoding */
@@ -127,7 +127,7 @@
   /* These routines are exported to allow insertion of extra markers */
   /* Probably only COM and APPn markers should be written this way */
   JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
-				      unsigned int datalen));
+                                      unsigned int datalen));
   JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
 };
 
@@ -140,7 +140,7 @@
   JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
 
   /* State variables made visible to other modules */
-  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+  boolean is_dummy_pass;        /* True during 1st pass for 2-pass quant */
 };
 
 /* Input control module */
@@ -151,16 +151,16 @@
   JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
 
   /* State variables made visible to other modules */
-  boolean has_multiple_scans;	/* True if file has multiple scans */
-  boolean eoi_reached;		/* True when EOI has been consumed */
+  boolean has_multiple_scans;   /* True if file has multiple scans */
+  boolean eoi_reached;          /* True when EOI has been consumed */
 };
 
 /* Main buffer control (downsampled-data buffer) */
 struct jpeg_d_main_controller {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
   JMETHOD(void, process_data, (j_decompress_ptr cinfo,
-			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			       JDIMENSION out_rows_avail));
+                               JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                               JDIMENSION out_rows_avail));
 };
 
 /* Coefficient buffer control */
@@ -169,7 +169,7 @@
   JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
   JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
   JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
-				 JSAMPIMAGE output_buf));
+                                 JSAMPIMAGE output_buf));
   /* Pointer to array of coefficient virtual arrays, or NULL if none */
   jvirt_barray_ptr *coef_arrays;
 };
@@ -178,12 +178,12 @@
 struct jpeg_d_post_controller {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
   JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
-				    JSAMPIMAGE input_buf,
-				    JDIMENSION *in_row_group_ctr,
-				    JDIMENSION in_row_groups_avail,
-				    JSAMPARRAY output_buf,
-				    JDIMENSION *out_row_ctr,
-				    JDIMENSION out_rows_avail));
+                                    JSAMPIMAGE input_buf,
+                                    JDIMENSION *in_row_group_ctr,
+                                    JDIMENSION in_row_groups_avail,
+                                    JSAMPARRAY output_buf,
+                                    JDIMENSION *out_row_ctr,
+                                    JDIMENSION out_rows_avail));
 };
 
 /* Marker reading & parsing */
@@ -200,28 +200,28 @@
   /* State of marker reader --- nominally internal, but applications
    * supplying COM or APPn handlers might like to know the state.
    */
-  boolean saw_SOI;		/* found SOI? */
-  boolean saw_SOF;		/* found SOF? */
-  int next_restart_num;		/* next restart number expected (0-7) */
-  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+  boolean saw_SOI;              /* found SOI? */
+  boolean saw_SOF;              /* found SOF? */
+  int next_restart_num;         /* next restart number expected (0-7) */
+  unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */
 };
 
 /* Entropy decoding */
 struct jpeg_entropy_decoder {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
   JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
-				JBLOCKROW *MCU_data));
+                                JBLOCKROW *MCU_data));
 
   /* This is here to share code between baseline and progressive decoders; */
   /* other modules probably should not use it */
-  boolean insufficient_data;	/* set TRUE after emitting warning */
+  boolean insufficient_data;    /* set TRUE after emitting warning */
 };
 
 /* Inverse DCT (also performs dequantization) */
 typedef JMETHOD(void, inverse_DCT_method_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col));
+                (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col));
 
 struct jpeg_inverse_dct {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
@@ -233,30 +233,30 @@
 struct jpeg_upsampler {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
   JMETHOD(void, upsample, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf,
-			   JDIMENSION *in_row_group_ctr,
-			   JDIMENSION in_row_groups_avail,
-			   JSAMPARRAY output_buf,
-			   JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail));
+                           JSAMPIMAGE input_buf,
+                           JDIMENSION *in_row_group_ctr,
+                           JDIMENSION in_row_groups_avail,
+                           JSAMPARRAY output_buf,
+                           JDIMENSION *out_row_ctr,
+                           JDIMENSION out_rows_avail));
 
-  boolean need_context_rows;	/* TRUE if need rows above & below */
+  boolean need_context_rows;    /* TRUE if need rows above & below */
 };
 
 /* Colorspace conversion */
 struct jpeg_color_deconverter {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
   JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
-				JSAMPIMAGE input_buf, JDIMENSION input_row,
-				JSAMPARRAY output_buf, int num_rows));
+                                JSAMPIMAGE input_buf, JDIMENSION input_row,
+                                JSAMPARRAY output_buf, int num_rows));
 };
 
 /* Color quantization or color precision reduction */
 struct jpeg_color_quantizer {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
   JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
-				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
-				 int num_rows));
+                                 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
+                                 int num_rows));
   JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
   JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
 };
@@ -265,9 +265,9 @@
 /* Miscellaneous useful macros */
 
 #undef MAX
-#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#define MAX(a,b)        ((a) > (b) ? (a) : (b))
 #undef MIN
-#define MIN(a,b)	((a) < (b) ? (a) : (b))
+#define MIN(a,b)        ((a) < (b) ? (a) : (b))
 
 
 /* We assume that right shift corresponds to signed division by 2 with
@@ -281,69 +281,69 @@
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define SHIFT_TEMPS	INT32 shift_temp;
+#define SHIFT_TEMPS     INT32 shift_temp;
 #define RIGHT_SHIFT(x,shft)  \
-	((shift_temp = (x)) < 0 ? \
-	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
-	 (shift_temp >> (shft)))
+        ((shift_temp = (x)) < 0 ? \
+         (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+         (shift_temp >> (shft)))
 #else
 #define SHIFT_TEMPS
-#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 #endif
 
 
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_compress_master	jICompress
-#define jinit_c_master_control	jICMaster
-#define jinit_c_main_controller	jICMainC
-#define jinit_c_prep_controller	jICPrepC
-#define jinit_c_coef_controller	jICCoefC
-#define jinit_color_converter	jICColor
-#define jinit_downsampler	jIDownsampler
-#define jinit_forward_dct	jIFDCT
-#define jinit_huff_encoder	jIHEncoder
-#define jinit_phuff_encoder	jIPHEncoder
-#define jinit_arith_encoder	jIAEncoder
-#define jinit_marker_writer	jIMWriter
-#define jinit_master_decompress	jIDMaster
-#define jinit_d_main_controller	jIDMainC
-#define jinit_d_coef_controller	jIDCoefC
-#define jinit_d_post_controller	jIDPostC
-#define jinit_input_controller	jIInCtlr
-#define jinit_marker_reader	jIMReader
-#define jinit_huff_decoder	jIHDecoder
-#define jinit_phuff_decoder	jIPHDecoder
-#define jinit_arith_decoder	jIADecoder
-#define jinit_inverse_dct	jIIDCT
-#define jinit_upsampler		jIUpsampler
-#define jinit_color_deconverter	jIDColor
-#define jinit_1pass_quantizer	jI1Quant
-#define jinit_2pass_quantizer	jI2Quant
-#define jinit_merged_upsampler	jIMUpsampler
-#define jinit_memory_mgr	jIMemMgr
-#define jdiv_round_up		jDivRound
-#define jround_up		jRound
-#define jcopy_sample_rows	jCopySamples
-#define jcopy_block_row		jCopyBlocks
-#define jzero_far		jZeroFar
-#define jpeg_zigzag_order	jZIGTable
-#define jpeg_natural_order	jZAGTable
-#define jpeg_aritab		jAriTab
+#define jinit_compress_master   jICompress
+#define jinit_c_master_control  jICMaster
+#define jinit_c_main_controller jICMainC
+#define jinit_c_prep_controller jICPrepC
+#define jinit_c_coef_controller jICCoefC
+#define jinit_color_converter   jICColor
+#define jinit_downsampler       jIDownsampler
+#define jinit_forward_dct       jIFDCT
+#define jinit_huff_encoder      jIHEncoder
+#define jinit_phuff_encoder     jIPHEncoder
+#define jinit_arith_encoder     jIAEncoder
+#define jinit_marker_writer     jIMWriter
+#define jinit_master_decompress jIDMaster
+#define jinit_d_main_controller jIDMainC
+#define jinit_d_coef_controller jIDCoefC
+#define jinit_d_post_controller jIDPostC
+#define jinit_input_controller  jIInCtlr
+#define jinit_marker_reader     jIMReader
+#define jinit_huff_decoder      jIHDecoder
+#define jinit_phuff_decoder     jIPHDecoder
+#define jinit_arith_decoder     jIADecoder
+#define jinit_inverse_dct       jIIDCT
+#define jinit_upsampler         jIUpsampler
+#define jinit_color_deconverter jIDColor
+#define jinit_1pass_quantizer   jI1Quant
+#define jinit_2pass_quantizer   jI2Quant
+#define jinit_merged_upsampler  jIMUpsampler
+#define jinit_memory_mgr        jIMemMgr
+#define jdiv_round_up           jDivRound
+#define jround_up               jRound
+#define jcopy_sample_rows       jCopySamples
+#define jcopy_block_row         jCopyBlocks
+#define jzero_far               jZeroFar
+#define jpeg_zigzag_order       jZIGTable
+#define jpeg_natural_order      jZAGTable
+#define jpeg_aritab             jAriTab
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 
 /* Compression module initialization routines */
 EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
 EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
-					 boolean transcode_only));
+                                         boolean transcode_only));
 EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
+                                          boolean need_full_buffer));
 EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
+                                          boolean need_full_buffer));
 EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
+                                          boolean need_full_buffer));
 EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
 EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
 EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
@@ -354,11 +354,11 @@
 /* Decompression module initialization routines */
 EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
+                                          boolean need_full_buffer));
 EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
+                                          boolean need_full_buffer));
 EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
+                                          boolean need_full_buffer));
 EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
@@ -377,13 +377,13 @@
 EXTERN(long) jdiv_round_up JPP((long a, long b));
 EXTERN(long) jround_up JPP((long a, long b));
 EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
-				    JSAMPARRAY output_array, int dest_row,
-				    int num_rows, JDIMENSION num_cols));
+                                    JSAMPARRAY output_array, int dest_row,
+                                    int num_rows, JDIMENSION num_cols));
 EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
-				  JDIMENSION num_blocks));
+                                  JDIMENSION num_blocks));
 EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
 /* Constant tables in jutils.c */
-#if 0				/* This table is not actually needed in v6a */
+#if 0                           /* This table is not actually needed in v6a */
 extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
 #endif
 extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
@@ -394,7 +394,7 @@
 /* Suppress undefined-structure complaints if necessary. */
 
 #ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+#ifndef AM_MEMORY_MANAGER       /* only jmemmgr.c defines these */
 struct jvirt_sarray_control { long dummy; };
 struct jvirt_barray_control { long dummy; };
 #endif
diff --git a/jpeglib.h b/jpeglib.h
index 6bf0b78..4b36f87 100644
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -4,8 +4,8 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2002-2009 by Guido Vollbeding.
- * Modifications:
- * Copyright (C) 2009-2011, D. R. Commander.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file defines the application interface for the JPEG library.
@@ -23,10 +23,10 @@
  * manual configuration options that most people need not worry about.
  */
 
-#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
-#include "jconfig.h"		/* widely used configuration options */
+#ifndef JCONFIG_INCLUDED        /* in case jinclude.h already did */
+#include "jconfig.h"            /* widely used configuration options */
 #endif
-#include "jmorecfg.h"		/* seldom changed options */
+#include "jmorecfg.h"           /* seldom changed options */
 
 
 #ifdef __cplusplus
@@ -41,13 +41,13 @@
  * if you want to be compatible.
  */
 
-#define DCTSIZE		    8	/* The basic DCT block is 8x8 samples */
-#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
-#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
-#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
-#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
-#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
-#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+#define DCTSIZE             8   /* The basic DCT block is 8x8 samples */
+#define DCTSIZE2            64  /* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4   /* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4   /* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16  /* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4   /* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4   /* JPEG limit on sampling factors */
 /* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
  * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
  * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
@@ -66,16 +66,16 @@
  * but the pointer arrays can fit in near memory.
  */
 
-typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
-typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
-typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+typedef JSAMPLE FAR *JSAMPROW;  /* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;   /* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */
 
-typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
-typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
-typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
-typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */
+typedef JBLOCK FAR *JBLOCKROW;  /* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;         /* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;       /* a 3-D array of coefficient blocks */
 
-typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+typedef JCOEF FAR *JCOEFPTR;    /* useful in a couple of places */
 
 
 /* Types for JPEG compression parameters and working tables. */
@@ -88,13 +88,13 @@
    * (not the zigzag order in which they are stored in a JPEG DQT marker).
    * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
    */
-  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  UINT16 quantval[DCTSIZE2];    /* quantization step for each coefficient */
   /* This field is used only during compression.  It's initialized FALSE when
    * the table is created, and set TRUE when it's been output to the file.
    * You could suppress output of a table by setting this to TRUE.
    * (See jpeg_suppress_tables for an example.)
    */
-  boolean sent_table;		/* TRUE when table has been output */
+  boolean sent_table;           /* TRUE when table has been output */
 } JQUANT_TBL;
 
 
@@ -102,15 +102,15 @@
 
 typedef struct {
   /* These two fields directly represent the contents of a JPEG DHT marker */
-  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
-				/* length k bits; bits[0] is unused */
-  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  UINT8 bits[17];               /* bits[k] = # of symbols with codes of */
+                                /* length k bits; bits[0] is unused */
+  UINT8 huffval[256];           /* The symbols, in order of incr code length */
   /* This field is used only during compression.  It's initialized FALSE when
    * the table is created, and set TRUE when it's been output to the file.
    * You could suppress output of a table by setting this to TRUE.
    * (See jpeg_suppress_tables for an example.)
    */
-  boolean sent_table;		/* TRUE when table has been output */
+  boolean sent_table;           /* TRUE when table has been output */
 } JHUFF_TBL;
 
 
@@ -120,20 +120,20 @@
   /* These values are fixed over the whole image. */
   /* For compression, they must be supplied by parameter setup; */
   /* for decompression, they are read from the SOF marker. */
-  int component_id;		/* identifier for this component (0..255) */
-  int component_index;		/* its index in SOF or cinfo->comp_info[] */
-  int h_samp_factor;		/* horizontal sampling factor (1..4) */
-  int v_samp_factor;		/* vertical sampling factor (1..4) */
-  int quant_tbl_no;		/* quantization table selector (0..3) */
+  int component_id;             /* identifier for this component (0..255) */
+  int component_index;          /* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;            /* horizontal sampling factor (1..4) */
+  int v_samp_factor;            /* vertical sampling factor (1..4) */
+  int quant_tbl_no;             /* quantization table selector (0..3) */
   /* These values may vary between scans. */
   /* For compression, they must be supplied by parameter setup; */
   /* for decompression, they are read from the SOS marker. */
   /* The decompressor output side may not use these variables. */
-  int dc_tbl_no;		/* DC entropy table selector (0..3) */
-  int ac_tbl_no;		/* AC entropy table selector (0..3) */
-  
+  int dc_tbl_no;                /* DC entropy table selector (0..3) */
+  int ac_tbl_no;                /* AC entropy table selector (0..3) */
+
   /* Remaining fields should be treated as private by applications. */
-  
+
   /* These values are computed during compression or decompression startup: */
   /* Component's size in DCT blocks.
    * Any dummy blocks added to complete an MCU are not counted; therefore
@@ -144,8 +144,8 @@
   /* Size of a DCT block in samples.  Always DCTSIZE for compression.
    * For decompression this is the size of the output from one DCT block,
    * reflecting any scaling we choose to apply during the IDCT step.
-   * Values of 1,2,4,8 are likely to be supported.  Note that different
-   * components may receive different IDCT scalings.
+   * Values from 1 to 16 are supported.
+   * Note that different components may receive different IDCT scalings.
    */
 #if JPEG_LIB_VERSION >= 70
   int DCT_h_scaled_size;
@@ -159,22 +159,22 @@
    * and similarly for height.  For decompression, IDCT scaling is included, so
    * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE)
    */
-  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_width;  /* actual width in samples */
   JDIMENSION downsampled_height; /* actual height in samples */
   /* This flag is used only for decompression.  In cases where some of the
    * components will be ignored (eg grayscale output from YCbCr image),
    * we can skip most computations for the unused components.
    */
-  boolean component_needed;	/* do we need the value of this component? */
+  boolean component_needed;     /* do we need the value of this component? */
 
   /* These values are computed before starting a scan of the component. */
   /* The decompressor output side may not use these variables. */
-  int MCU_width;		/* number of blocks per MCU, horizontally */
-  int MCU_height;		/* number of blocks per MCU, vertically */
-  int MCU_blocks;		/* MCU_width * MCU_height */
-  int MCU_sample_width;		/* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
-  int last_col_width;		/* # of non-dummy blocks across in last MCU */
-  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+  int MCU_width;                /* number of blocks per MCU, horizontally */
+  int MCU_height;               /* number of blocks per MCU, vertically */
+  int MCU_blocks;               /* MCU_width * MCU_height */
+  int MCU_sample_width;         /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
+  int last_col_width;           /* # of non-dummy blocks across in last MCU */
+  int last_row_height;          /* # of non-dummy blocks down in last MCU */
 
   /* Saved quantization table for component; NULL if none yet saved.
    * See jdinput.c comments about the need for this information.
@@ -190,10 +190,10 @@
 /* The script for encoding a multiple-scan file is an array of these: */
 
 typedef struct {
-  int comps_in_scan;		/* number of components encoded in this scan */
+  int comps_in_scan;            /* number of components encoded in this scan */
   int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
-  int Ss, Se;			/* progressive JPEG spectral selection parms */
-  int Ah, Al;			/* progressive JPEG successive approx. parms */
+  int Ss, Se;                   /* progressive JPEG spectral selection parms */
+  int Ah, Al;                   /* progressive JPEG successive approx. parms */
 } jpeg_scan_info;
 
 /* The decompressor can save APPn and COM markers in a list of these: */
@@ -201,11 +201,11 @@
 typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
 
 struct jpeg_marker_struct {
-  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
-  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
-  unsigned int original_length;	/* # bytes of data in the file */
-  unsigned int data_length;	/* # bytes of data saved at data[] */
-  JOCTET FAR * data;		/* the data contained in the marker */
+  jpeg_saved_marker_ptr next;   /* next in list, or NULL */
+  UINT8 marker;                 /* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length; /* # bytes of data in the file */
+  unsigned int data_length;     /* # bytes of data saved at data[] */
+  JOCTET FAR * data;            /* the data contained in the marker */
   /* the marker length word is not counted in data_length or original_length */
 };
 
@@ -215,72 +215,71 @@
 #define JCS_ALPHA_EXTENSIONS 1
 
 typedef enum {
-	JCS_UNKNOWN,		/* error/unspecified */
-	JCS_GRAYSCALE,		/* monochrome */
-	JCS_RGB,		/* red/green/blue as specified by the RGB_RED, RGB_GREEN,
-				   RGB_BLUE, and RGB_PIXELSIZE macros */
-	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
-	JCS_CMYK,		/* C/M/Y/K */
-	JCS_YCCK,		/* Y/Cb/Cr/K */
-	JCS_EXT_RGB,		/* red/green/blue */
-	JCS_EXT_RGBX,		/* red/green/blue/x */
-	JCS_EXT_BGR,		/* blue/green/red */
-	JCS_EXT_BGRX,		/* blue/green/red/x */
-	JCS_EXT_XBGR,		/* x/blue/green/red */
-	JCS_EXT_XRGB,		/* x/red/green/blue */
-	/* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX,
-	   JCS_EXT_XBGR, or JCS_EXT_XRGB during decompression, the X byte is
-	   undefined, and in order to ensure the best performance,
-	   libjpeg-turbo can set that byte to whatever value it wishes.  Use
-	   the following colorspace constants to ensure that the X byte is set
-	   to 0xFF, so that it can be interpreted as an opaque alpha
-	   channel. */
-	JCS_EXT_RGBA,		/* red/green/blue/alpha */
-	JCS_EXT_BGRA,		/* blue/green/red/alpha */
-	JCS_EXT_ABGR,		/* alpha/blue/green/red */
-	JCS_EXT_ARGB		/* alpha/red/green/blue */
+  JCS_UNKNOWN,            /* error/unspecified */
+  JCS_GRAYSCALE,          /* monochrome */
+  JCS_RGB,                /* red/green/blue as specified by the RGB_RED,
+                             RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros */
+  JCS_YCbCr,              /* Y/Cb/Cr (also known as YUV) */
+  JCS_CMYK,               /* C/M/Y/K */
+  JCS_YCCK,               /* Y/Cb/Cr/K */
+  JCS_EXT_RGB,            /* red/green/blue */
+  JCS_EXT_RGBX,           /* red/green/blue/x */
+  JCS_EXT_BGR,            /* blue/green/red */
+  JCS_EXT_BGRX,           /* blue/green/red/x */
+  JCS_EXT_XBGR,           /* x/blue/green/red */
+  JCS_EXT_XRGB,           /* x/red/green/blue */
+  /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR,
+     or JCS_EXT_XRGB during decompression, the X byte is undefined, and in
+     order to ensure the best performance, libjpeg-turbo can set that byte to
+     whatever value it wishes.  Use the following colorspace constants to
+     ensure that the X byte is set to 0xFF, so that it can be interpreted as an
+     opaque alpha channel. */
+  JCS_EXT_RGBA,           /* red/green/blue/alpha */
+  JCS_EXT_BGRA,           /* blue/green/red/alpha */
+  JCS_EXT_ABGR,           /* alpha/blue/green/red */
+  JCS_EXT_ARGB            /* alpha/red/green/blue */
 } J_COLOR_SPACE;
 
 /* DCT/IDCT algorithm options. */
 
 typedef enum {
-	JDCT_ISLOW,		/* slow but accurate integer algorithm */
-	JDCT_IFAST,		/* faster, less accurate integer method */
-	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+  JDCT_ISLOW,             /* slow but accurate integer algorithm */
+  JDCT_IFAST,             /* faster, less accurate integer method */
+  JDCT_FLOAT              /* floating-point: accurate, fast on fast HW */
 } J_DCT_METHOD;
 
-#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#ifndef JDCT_DEFAULT            /* may be overridden in jconfig.h */
 #define JDCT_DEFAULT  JDCT_ISLOW
 #endif
-#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#ifndef JDCT_FASTEST            /* may be overridden in jconfig.h */
 #define JDCT_FASTEST  JDCT_IFAST
 #endif
 
 /* Dithering options for decompression. */
 
 typedef enum {
-	JDITHER_NONE,		/* no dithering */
-	JDITHER_ORDERED,	/* simple ordered dither */
-	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+  JDITHER_NONE,           /* no dithering */
+  JDITHER_ORDERED,        /* simple ordered dither */
+  JDITHER_FS              /* Floyd-Steinberg error diffusion dither */
 } J_DITHER_MODE;
 
 
 /* Common fields between JPEG compression and decompression master structs. */
 
 #define jpeg_common_fields \
-  struct jpeg_error_mgr * err;	/* Error handler module */\
-  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_error_mgr * err;  /* Error handler module */\
+  struct jpeg_memory_mgr * mem; /* Memory manager module */\
   struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
-  void * client_data;		/* Available for use by application */\
-  boolean is_decompressor;	/* So common code can tell which is which */\
-  int global_state		/* For checking call sequence validity */
+  void * client_data;           /* Available for use by application */\
+  boolean is_decompressor;      /* So common code can tell which is which */\
+  int global_state              /* For checking call sequence validity */
 
 /* Routines that are to be used by both halves of the library are declared
  * to receive a pointer to this structure.  There are no actual instances of
  * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
  */
 struct jpeg_common_struct {
-  jpeg_common_fields;		/* Fields common to both master struct types */
+  jpeg_common_fields;           /* Fields common to both master struct types */
   /* Additional fields follow in an actual jpeg_compress_struct or
    * jpeg_decompress_struct.  All three structs must agree on these
    * initial fields!  (This would be a lot cleaner in C++.)
@@ -295,7 +294,7 @@
 /* Master record for a compression instance */
 
 struct jpeg_compress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+  jpeg_common_fields;           /* Fields shared with jpeg_decompress_struct */
 
   /* Destination for compressed data */
   struct jpeg_destination_mgr * dest;
@@ -305,12 +304,12 @@
    * be correct before you can even call jpeg_set_defaults().
    */
 
-  JDIMENSION image_width;	/* input image width */
-  JDIMENSION image_height;	/* input image height */
-  int input_components;		/* # of color components in input image */
-  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+  JDIMENSION image_width;       /* input image width */
+  JDIMENSION image_height;      /* input image height */
+  int input_components;         /* # of color components in input image */
+  J_COLOR_SPACE in_color_space; /* colorspace of input image */
 
-  double input_gamma;		/* image gamma of input image */
+  double input_gamma;           /* image gamma of input image */
 
   /* Compression parameters --- these fields must be set before calling
    * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
@@ -323,8 +322,8 @@
 #if JPEG_LIB_VERSION >= 70
   unsigned int scale_num, scale_denom; /* fraction by which to scale image */
 
-  JDIMENSION jpeg_width;	/* scaled JPEG image width */
-  JDIMENSION jpeg_height;	/* scaled JPEG image height */
+  JDIMENSION jpeg_width;        /* scaled JPEG image width */
+  JDIMENSION jpeg_height;       /* scaled JPEG image height */
   /* Dimensions of actual JPEG image that will be written to file,
    * derived from input dimensions by scaling factors above.
    * These fields are computed by jpeg_start_compress().
@@ -333,9 +332,9 @@
    */
 #endif
 
-  int data_precision;		/* bits of precision in image data */
+  int data_precision;           /* bits of precision in image data */
 
-  int num_components;		/* # of color components in JPEG image */
+  int num_components;           /* # of color components in JPEG image */
   J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
 
   jpeg_component_info * comp_info;
@@ -357,22 +356,22 @@
   UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
   UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
 
-  int num_scans;		/* # of entries in scan_info array */
+  int num_scans;                /* # of entries in scan_info array */
   const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
   /* The default value of scan_info is NULL, which causes a single-scan
    * sequential JPEG file to be emitted.  To create a multi-scan file,
    * set num_scans and scan_info to point to an array of scan definitions.
    */
 
-  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  boolean raw_data_in;          /* TRUE=caller supplies downsampled data */
+  boolean arith_code;           /* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;      /* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;     /* TRUE=first samples are cosited */
 #if JPEG_LIB_VERSION >= 70
   boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
 #endif
-  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
-  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+  int smoothing_factor;         /* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;      /* DCT algorithm selector */
 
   /* The restart interval can be specified in absolute MCUs by setting
    * restart_interval, or in MCU rows by setting restart_in_rows
@@ -380,28 +379,28 @@
    * for each scan).
    */
   unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
-  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+  int restart_in_rows;          /* if > 0, MCU rows per restart interval */
 
   /* Parameters controlling emission of special markers. */
 
-  boolean write_JFIF_header;	/* should a JFIF marker be written? */
-  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  boolean write_JFIF_header;    /* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;     /* What to write for the JFIF version number */
   UINT8 JFIF_minor_version;
   /* These three values are not used by the JPEG code, merely copied */
   /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
   /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
   /* ratio is defined by X_density/Y_density even when density_unit=0. */
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
-  
+  UINT8 density_unit;           /* JFIF code for pixel size units */
+  UINT16 X_density;             /* Horizontal pixel density */
+  UINT16 Y_density;             /* Vertical pixel density */
+  boolean write_Adobe_marker;   /* should an Adobe marker be written? */
+
   /* State variable: index of next scanline to be written to
    * jpeg_write_scanlines().  Application may use this to control its
    * processing loop, e.g., "while (next_scanline < image_height)".
    */
 
-  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+  JDIMENSION next_scanline;     /* 0 .. image_height-1  */
 
   /* Remaining fields are known throughout compressor, but generally
    * should not be touched by a surrounding application.
@@ -410,44 +409,44 @@
   /*
    * These fields are computed during compression startup
    */
-  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
+  boolean progressive_mode;     /* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;        /* largest h_samp_factor */
+  int max_v_samp_factor;        /* largest v_samp_factor */
 
 #if JPEG_LIB_VERSION >= 70
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+  int min_DCT_h_scaled_size;    /* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;    /* smallest DCT_v_scaled_size of any component */
 #endif
 
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+  JDIMENSION total_iMCU_rows;   /* # of iMCU rows to be input to coef ctlr */
   /* The coefficient controller receives data in units of MCU rows as defined
    * for fully interleaved scans (whether the JPEG file is interleaved or not).
    * There are v_samp_factor * DCTSIZE sample rows of each component in an
    * "iMCU" (interleaved MCU) row.
    */
-  
+
   /*
    * These fields are valid during any one scan.
    * They describe the components and MCUs actually appearing in the scan.
    */
-  int comps_in_scan;		/* # of JPEG components in this scan */
+  int comps_in_scan;            /* # of JPEG components in this scan */
   jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
   /* *cur_comp_info[i] describes component that appears i'th in SOS */
-  
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-  
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+
+  JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
+
+  int blocks_in_MCU;            /* # of DCT blocks per MCU */
   int MCU_membership[C_MAX_BLOCKS_IN_MCU];
   /* MCU_membership[i] is index in cur_comp_info of component owning */
   /* i'th block in an MCU */
 
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+  int Ss, Se, Ah, Al;           /* progressive JPEG parameters for scan */
 
 #if JPEG_LIB_VERSION >= 80
-  int block_size;		/* the basic DCT block size: 1..16 */
-  const int * natural_order;	/* natural-order position array */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) */
+  int block_size;               /* the basic DCT block size: 1..16 */
+  const int * natural_order;    /* natural-order position array */
+  int lim_Se;                   /* min( Se, DCTSIZE2-1 ) */
 #endif
 
   /*
@@ -470,7 +469,7 @@
 /* Master record for a decompression instance */
 
 struct jpeg_decompress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+  jpeg_common_fields;           /* Fields shared with jpeg_compress_struct */
 
   /* Source of compressed data */
   struct jpeg_source_mgr * src;
@@ -478,9 +477,9 @@
   /* Basic description of image --- filled in by jpeg_read_header(). */
   /* Application may inspect these values to decide how to process image. */
 
-  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
-  JDIMENSION image_height;	/* nominal image height */
-  int num_components;		/* # of color components in JPEG image */
+  JDIMENSION image_width;       /* nominal image width (from SOF marker) */
+  JDIMENSION image_height;      /* nominal image height */
+  int num_components;           /* # of color components in JPEG image */
   J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
 
   /* Decompression processing parameters --- these fields must be set before
@@ -492,24 +491,24 @@
 
   unsigned int scale_num, scale_denom; /* fraction by which to scale image */
 
-  double output_gamma;		/* image gamma wanted in output */
+  double output_gamma;          /* image gamma wanted in output */
 
-  boolean buffered_image;	/* TRUE=multiple output passes */
-  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+  boolean buffered_image;       /* TRUE=multiple output passes */
+  boolean raw_data_out;         /* TRUE=downsampled data wanted */
 
-  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
-  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
-  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+  J_DCT_METHOD dct_method;      /* IDCT algorithm selector */
+  boolean do_fancy_upsampling;  /* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;   /* TRUE=apply interblock smoothing */
 
-  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  boolean quantize_colors;      /* TRUE=colormapped output wanted */
   /* the following are ignored if not quantize_colors: */
-  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
-  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
-  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  J_DITHER_MODE dither_mode;    /* type of color dithering to use */
+  boolean two_pass_quantize;    /* TRUE=use two-pass color quantization */
+  int desired_number_of_colors; /* max # colors to use in created colormap */
   /* these are significant only in buffered-image mode: */
-  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_1pass_quant;   /* enable future use of 1-pass quantizer */
   boolean enable_external_quant;/* enable future use of external colormap */
-  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+  boolean enable_2pass_quant;   /* enable future use of 2-pass quantizer */
 
   /* Description of actual output image that will be returned to application.
    * These fields are computed by jpeg_start_decompress().
@@ -517,14 +516,14 @@
    * in advance of calling jpeg_start_decompress().
    */
 
-  JDIMENSION output_width;	/* scaled image width */
-  JDIMENSION output_height;	/* scaled image height */
-  int out_color_components;	/* # of color components in out_color_space */
-  int output_components;	/* # of color components returned */
+  JDIMENSION output_width;      /* scaled image width */
+  JDIMENSION output_height;     /* scaled image height */
+  int out_color_components;     /* # of color components in out_color_space */
+  int output_components;        /* # of color components returned */
   /* output_components is 1 (a colormap index) when quantizing colors;
    * otherwise it equals out_color_components.
    */
-  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  int rec_outbuf_height;        /* min recommended height of scanline buffer */
   /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
    * high, space and time will be wasted due to unnecessary data copying.
    * Usually rec_outbuf_height will be 1 or 2, at most 4.
@@ -536,8 +535,8 @@
    * jpeg_start_decompress or jpeg_start_output.
    * The map has out_color_components rows and actual_number_of_colors columns.
    */
-  int actual_number_of_colors;	/* number of entries in use */
-  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+  int actual_number_of_colors;  /* number of entries in use */
+  JSAMPARRAY colormap;          /* The color map as a 2-D pixel array */
 
   /* State variables: these variables indicate the progress of decompression.
    * The application may examine these but must not modify them.
@@ -547,20 +546,20 @@
    * Application may use this to control its processing loop, e.g.,
    * "while (output_scanline < output_height)".
    */
-  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+  JDIMENSION output_scanline;   /* 0 .. output_height-1  */
 
   /* Current input scan number and number of iMCU rows completed in scan.
    * These indicate the progress of the decompressor input side.
    */
-  int input_scan_number;	/* Number of SOS markers seen so far */
-  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+  int input_scan_number;        /* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;    /* Number of iMCU rows completed */
 
   /* The "output scan number" is the notional scan being displayed by the
    * output side.  The decompressor will not allow output scan/row number
    * to get ahead of input scan/row, but it can fall arbitrarily far behind.
    */
-  int output_scan_number;	/* Nominal scan number being displayed */
-  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+  int output_scan_number;       /* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;   /* Number of iMCU rows read */
 
   /* Current progression status.  coef_bits[c][i] indicates the precision
    * with which component c's DCT coefficient i (in zigzag order) is known.
@@ -569,7 +568,7 @@
    * (thus, 0 at completion of the progression).
    * This pointer is NULL when reading a non-progressive file.
    */
-  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+  int (*coef_bits)[DCTSIZE2];   /* -1 or current Al value for each coef */
 
   /* Internal JPEG parameters --- the application usually need not look at
    * these fields.  Note that the decompressor output side may not use
@@ -591,16 +590,16 @@
    * are given in SOF/SOS markers or defined to be reset by SOI.
    */
 
-  int data_precision;		/* bits of precision in image data */
+  int data_precision;           /* bits of precision in image data */
 
   jpeg_component_info * comp_info;
   /* comp_info[i] describes component that appears i'th in SOF */
 
 #if JPEG_LIB_VERSION >= 80
-  boolean is_baseline;		/* TRUE if Baseline SOF0 encountered */
+  boolean is_baseline;          /* TRUE if Baseline SOF0 encountered */
 #endif
-  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean progressive_mode;     /* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;           /* TRUE=arithmetic coding, FALSE=Huffman */
 
   UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
   UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
@@ -611,17 +610,17 @@
   /* These fields record data obtained from optional markers recognized by
    * the JPEG library.
    */
-  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  boolean saw_JFIF_marker;      /* TRUE iff a JFIF APP0 marker was found */
   /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
-  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_major_version;     /* JFIF version number */
   UINT8 JFIF_minor_version;
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
-  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+  UINT8 density_unit;           /* JFIF code for pixel size units */
+  UINT16 X_density;             /* Horizontal pixel density */
+  UINT16 Y_density;             /* Vertical pixel density */
+  boolean saw_Adobe_marker;     /* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;        /* Color transform code from Adobe marker */
 
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  boolean CCIR601_sampling;     /* TRUE=first samples are cosited */
 
   /* Aside from the specific data retained from APPn markers known to the
    * library, the uninterpreted contents of any or all APPn and COM markers
@@ -636,17 +635,17 @@
   /*
    * These fields are computed during decompression startup
    */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
+  int max_h_samp_factor;        /* largest h_samp_factor */
+  int max_v_samp_factor;        /* largest v_samp_factor */
 
 #if JPEG_LIB_VERSION >= 70
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+  int min_DCT_h_scaled_size;    /* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;    /* smallest DCT_v_scaled_size of any component */
 #else
-  int min_DCT_scaled_size;	/* smallest DCT_scaled_size of any component */
+  int min_DCT_scaled_size;      /* smallest DCT_scaled_size of any component */
 #endif
 
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  JDIMENSION total_iMCU_rows;   /* # of iMCU rows in image */
   /* The coefficient controller's input and output progress is measured in
    * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
    * in fully interleaved JPEG scans, but are used whether the scan is
@@ -662,26 +661,26 @@
    * They describe the components and MCUs actually appearing in the scan.
    * Note that the decompressor output side must not use these fields.
    */
-  int comps_in_scan;		/* # of JPEG components in this scan */
+  int comps_in_scan;            /* # of JPEG components in this scan */
   jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
   /* *cur_comp_info[i] describes component that appears i'th in SOS */
 
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+  JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
 
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int blocks_in_MCU;            /* # of DCT blocks per MCU */
   int MCU_membership[D_MAX_BLOCKS_IN_MCU];
   /* MCU_membership[i] is index in cur_comp_info of component owning */
   /* i'th block in an MCU */
 
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+  int Ss, Se, Ah, Al;           /* progressive JPEG parameters for scan */
 
 #if JPEG_LIB_VERSION >= 80
   /* These fields are derived from Se of first SOS marker.
    */
-  int block_size;		/* the basic DCT block size: 1..16 */
+  int block_size;               /* the basic DCT block size: 1..16 */
   const int * natural_order; /* natural-order position array for entropy decode */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) for entropy decode */
+  int lim_Se;                   /* min( Se, DCTSIZE2-1 ) for entropy decode */
 #endif
 
   /* This field is shared between entropy decoder and marker parser.
@@ -726,10 +725,10 @@
   JMETHOD(void, output_message, (j_common_ptr cinfo));
   /* Format a message string for the most recent JPEG error or message */
   JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
-#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+#define JMSG_LENGTH_MAX  200    /* recommended size of format_message buffer */
   /* Reset error state variables at start of a new image */
   JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
-  
+
   /* The message ID code and any parameters are saved here.
    * A message can have one string parameter or up to 8 int parameters.
    */
@@ -739,18 +738,18 @@
     int i[8];
     char s[JMSG_STR_PARM_MAX];
   } msg_parm;
-  
+
   /* Standard state variables for error facility */
-  
-  int trace_level;		/* max msg_level that will be displayed */
-  
+
+  int trace_level;              /* max msg_level that will be displayed */
+
   /* For recoverable corrupt-data errors, we emit a warning message,
    * but keep going unless emit_message chooses to abort.  emit_message
    * should count warnings in num_warnings.  The surrounding application
    * can check for bad data by seeing if num_warnings is nonzero at the
    * end of processing.
    */
-  long num_warnings;		/* number of corrupt-data warnings */
+  long num_warnings;            /* number of corrupt-data warnings */
 
   /* These fields point to the table(s) of error message strings.
    * An application can change the table pointer to switch to a different
@@ -768,8 +767,8 @@
    * It contains strings numbered first_addon_message..last_addon_message.
    */
   const char * const * addon_message_table; /* Non-library errors */
-  int first_addon_message;	/* code for first string in addon table */
-  int last_addon_message;	/* code for last string in addon table */
+  int first_addon_message;      /* code for first string in addon table */
+  int last_addon_message;       /* code for last string in addon table */
 };
 
 
@@ -778,18 +777,18 @@
 struct jpeg_progress_mgr {
   JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
 
-  long pass_counter;		/* work units completed in this pass */
-  long pass_limit;		/* total number of work units in this pass */
-  int completed_passes;		/* passes completed so far */
-  int total_passes;		/* total number of passes expected */
+  long pass_counter;            /* work units completed in this pass */
+  long pass_limit;              /* total number of work units in this pass */
+  int completed_passes;         /* passes completed so far */
+  int total_passes;             /* total number of passes expected */
 };
 
 
 /* Data destination object for compression */
 
 struct jpeg_destination_mgr {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  JOCTET * next_output_byte;    /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
 
   JMETHOD(void, init_destination, (j_compress_ptr cinfo));
   JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
@@ -801,7 +800,7 @@
 
 struct jpeg_source_mgr {
   const JOCTET * next_input_byte; /* => next byte to read from buffer */
-  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+  size_t bytes_in_buffer;       /* # of bytes remaining in buffer */
 
   JMETHOD(void, init_source, (j_decompress_ptr cinfo));
   JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
@@ -822,9 +821,9 @@
  * successful.
  */
 
-#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
-#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
-#define JPOOL_NUMPOOLS	2
+#define JPOOL_PERMANENT 0       /* lasts until master record is destroyed */
+#define JPOOL_IMAGE     1       /* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS  2
 
 typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
 typedef struct jvirt_barray_control * jvirt_barray_ptr;
@@ -833,38 +832,38 @@
 struct jpeg_memory_mgr {
   /* Method pointers */
   JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
-				size_t sizeofobject));
+                                size_t sizeofobject));
   JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
-				     size_t sizeofobject));
+                                     size_t sizeofobject));
   JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
-				     JDIMENSION samplesperrow,
-				     JDIMENSION numrows));
+                                     JDIMENSION samplesperrow,
+                                     JDIMENSION numrows));
   JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
-				      JDIMENSION blocksperrow,
-				      JDIMENSION numrows));
+                                      JDIMENSION blocksperrow,
+                                      JDIMENSION numrows));
   JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION samplesperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
+                                                  int pool_id,
+                                                  boolean pre_zero,
+                                                  JDIMENSION samplesperrow,
+                                                  JDIMENSION numrows,
+                                                  JDIMENSION maxaccess));
   JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION blocksperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
+                                                  int pool_id,
+                                                  boolean pre_zero,
+                                                  JDIMENSION blocksperrow,
+                                                  JDIMENSION numrows,
+                                                  JDIMENSION maxaccess));
   JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
   JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
-					   jvirt_sarray_ptr ptr,
-					   JDIMENSION start_row,
-					   JDIMENSION num_rows,
-					   boolean writable));
+                                           jvirt_sarray_ptr ptr,
+                                           JDIMENSION start_row,
+                                           JDIMENSION num_rows,
+                                           boolean writable));
   JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
-					    jvirt_barray_ptr ptr,
-					    JDIMENSION start_row,
-					    JDIMENSION num_rows,
-					    boolean writable));
+                                            jvirt_barray_ptr ptr,
+                                            JDIMENSION start_row,
+                                            JDIMENSION num_rows,
+                                            boolean writable));
   JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
   JMETHOD(void, self_destruct, (j_common_ptr cinfo));
 
@@ -892,87 +891,87 @@
  */
 
 #ifdef HAVE_PROTOTYPES
-#define JPP(arglist)	arglist
+#define JPP(arglist)    arglist
 #else
-#define JPP(arglist)	()
+#define JPP(arglist)    ()
 #endif
 
 
 /* Short forms of external names for systems with brain-damaged linkers.
  * We shorten external names to be unique in the first six letters, which
  * is good enough for all known systems.
- * (If your compiler itself needs names to be unique in less than 15 
+ * (If your compiler itself needs names to be unique in less than 15
  * characters, you are out of luck.  Get a better compiler.)
  */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_error		jStdError
-#define jpeg_CreateCompress	jCreaCompress
-#define jpeg_CreateDecompress	jCreaDecompress
-#define jpeg_destroy_compress	jDestCompress
-#define jpeg_destroy_decompress	jDestDecompress
-#define jpeg_stdio_dest		jStdDest
-#define jpeg_stdio_src		jStdSrc
-#if JPEG_LIB_VERSION >= 80
-#define jpeg_mem_dest		jMemDest
-#define jpeg_mem_src		jMemSrc
+#define jpeg_std_error          jStdError
+#define jpeg_CreateCompress     jCreaCompress
+#define jpeg_CreateDecompress   jCreaDecompress
+#define jpeg_destroy_compress   jDestCompress
+#define jpeg_destroy_decompress jDestDecompress
+#define jpeg_stdio_dest         jStdDest
+#define jpeg_stdio_src          jStdSrc
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+#define jpeg_mem_dest           jMemDest
+#define jpeg_mem_src            jMemSrc
 #endif
-#define jpeg_set_defaults	jSetDefaults
-#define jpeg_set_colorspace	jSetColorspace
-#define jpeg_default_colorspace	jDefColorspace
-#define jpeg_set_quality	jSetQuality
-#define jpeg_set_linear_quality	jSetLQuality
+#define jpeg_set_defaults       jSetDefaults
+#define jpeg_set_colorspace     jSetColorspace
+#define jpeg_default_colorspace jDefColorspace
+#define jpeg_set_quality        jSetQuality
+#define jpeg_set_linear_quality jSetLQuality
 #if JPEG_LIB_VERSION >= 70
-#define jpeg_default_qtables	jDefQTables
+#define jpeg_default_qtables    jDefQTables
 #endif
-#define jpeg_add_quant_table	jAddQuantTable
-#define jpeg_quality_scaling	jQualityScaling
-#define jpeg_simple_progression	jSimProgress
-#define jpeg_suppress_tables	jSuppressTables
-#define jpeg_alloc_quant_table	jAlcQTable
-#define jpeg_alloc_huff_table	jAlcHTable
-#define jpeg_start_compress	jStrtCompress
-#define jpeg_write_scanlines	jWrtScanlines
-#define jpeg_finish_compress	jFinCompress
+#define jpeg_add_quant_table    jAddQuantTable
+#define jpeg_quality_scaling    jQualityScaling
+#define jpeg_simple_progression jSimProgress
+#define jpeg_suppress_tables    jSuppressTables
+#define jpeg_alloc_quant_table  jAlcQTable
+#define jpeg_alloc_huff_table   jAlcHTable
+#define jpeg_start_compress     jStrtCompress
+#define jpeg_write_scanlines    jWrtScanlines
+#define jpeg_finish_compress    jFinCompress
 #if JPEG_LIB_VERSION >= 70
-#define jpeg_calc_jpeg_dimensions	jCjpegDimensions
+#define jpeg_calc_jpeg_dimensions       jCjpegDimensions
 #endif
-#define jpeg_write_raw_data	jWrtRawData
-#define jpeg_write_marker	jWrtMarker
-#define jpeg_write_m_header	jWrtMHeader
-#define jpeg_write_m_byte	jWrtMByte
-#define jpeg_write_tables	jWrtTables
-#define jpeg_read_header	jReadHeader
-#define jpeg_start_decompress	jStrtDecompress
-#define jpeg_read_scanlines	jReadScanlines
-#define jpeg_finish_decompress	jFinDecompress
-#define jpeg_read_raw_data	jReadRawData
-#define jpeg_has_multiple_scans	jHasMultScn
-#define jpeg_start_output	jStrtOutput
-#define jpeg_finish_output	jFinOutput
-#define jpeg_input_complete	jInComplete
-#define jpeg_new_colormap	jNewCMap
-#define jpeg_consume_input	jConsumeInput
+#define jpeg_write_raw_data     jWrtRawData
+#define jpeg_write_marker       jWrtMarker
+#define jpeg_write_m_header     jWrtMHeader
+#define jpeg_write_m_byte       jWrtMByte
+#define jpeg_write_tables       jWrtTables
+#define jpeg_read_header        jReadHeader
+#define jpeg_start_decompress   jStrtDecompress
+#define jpeg_read_scanlines     jReadScanlines
+#define jpeg_finish_decompress  jFinDecompress
+#define jpeg_read_raw_data      jReadRawData
+#define jpeg_has_multiple_scans jHasMultScn
+#define jpeg_start_output       jStrtOutput
+#define jpeg_finish_output      jFinOutput
+#define jpeg_input_complete     jInComplete
+#define jpeg_new_colormap       jNewCMap
+#define jpeg_consume_input      jConsumeInput
 #if JPEG_LIB_VERSION >= 80
-#define jpeg_core_output_dimensions	jCoreDimensions
+#define jpeg_core_output_dimensions     jCoreDimensions
 #endif
-#define jpeg_calc_output_dimensions	jCalcDimensions
-#define jpeg_save_markers	jSaveMarkers
-#define jpeg_set_marker_processor	jSetMarker
-#define jpeg_read_coefficients	jReadCoefs
-#define jpeg_write_coefficients	jWrtCoefs
-#define jpeg_copy_critical_parameters	jCopyCrit
-#define jpeg_abort_compress	jAbrtCompress
-#define jpeg_abort_decompress	jAbrtDecompress
-#define jpeg_abort		jAbort
-#define jpeg_destroy		jDestroy
-#define jpeg_resync_to_restart	jResyncRestart
+#define jpeg_calc_output_dimensions     jCalcDimensions
+#define jpeg_save_markers       jSaveMarkers
+#define jpeg_set_marker_processor       jSetMarker
+#define jpeg_read_coefficients  jReadCoefs
+#define jpeg_write_coefficients jWrtCoefs
+#define jpeg_copy_critical_parameters   jCopyCrit
+#define jpeg_abort_compress     jAbrtCompress
+#define jpeg_abort_decompress   jAbrtDecompress
+#define jpeg_abort              jAbort
+#define jpeg_destroy            jDestroy
+#define jpeg_resync_to_restart  jResyncRestart
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 
 /* Default error-management setup */
 EXTERN(struct jpeg_error_mgr *) jpeg_std_error
-	JPP((struct jpeg_error_mgr * err));
+        JPP((struct jpeg_error_mgr * err));
 
 /* Initialization of JPEG compression objects.
  * jpeg_create_compress() and jpeg_create_decompress() are the exported
@@ -983,14 +982,14 @@
  */
 #define jpeg_create_compress(cinfo) \
     jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
-			(size_t) sizeof(struct jpeg_compress_struct))
+                        (size_t) sizeof(struct jpeg_compress_struct))
 #define jpeg_create_decompress(cinfo) \
     jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
-			  (size_t) sizeof(struct jpeg_decompress_struct))
+                          (size_t) sizeof(struct jpeg_decompress_struct))
 EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
-				      int version, size_t structsize));
+                                      int version, size_t structsize));
 EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
-					int version, size_t structsize));
+                                        int version, size_t structsize));
 /* Destruction of JPEG compression objects */
 EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
 EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
@@ -1000,48 +999,48 @@
 EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
 EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
 
-#if JPEG_LIB_VERSION >= 80
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /* Data source and destination managers: memory buffers. */
 EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
-			       unsigned char ** outbuffer,
-			       unsigned long * outsize));
+                               unsigned char ** outbuffer,
+                               unsigned long * outsize));
 EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
-			      unsigned char * inbuffer,
-			      unsigned long insize));
+                              unsigned char * inbuffer,
+                              unsigned long insize));
 #endif
 
 /* Default parameter setup for compression */
 EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
 /* Compression parameter setup aids */
 EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
-				      J_COLOR_SPACE colorspace));
+                                      J_COLOR_SPACE colorspace));
 EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
 EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
-				   boolean force_baseline));
+                                   boolean force_baseline));
 EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
-					  int scale_factor,
-					  boolean force_baseline));
+                                          int scale_factor,
+                                          boolean force_baseline));
 #if JPEG_LIB_VERSION >= 70
 EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
-				       boolean force_baseline));
+                                       boolean force_baseline));
 #endif
 EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
-				       const unsigned int *basic_table,
-				       int scale_factor,
-				       boolean force_baseline));
+                                       const unsigned int *basic_table,
+                                       int scale_factor,
+                                       boolean force_baseline));
 EXTERN(int) jpeg_quality_scaling JPP((int quality));
 EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
 EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
-				       boolean suppress));
+                                       boolean suppress));
 EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
 EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
 
 /* Main entry points for compression */
 EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
-				      boolean write_all_tables));
+                                      boolean write_all_tables));
 EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
-					     JSAMPARRAY scanlines,
-					     JDIMENSION num_lines));
+                                             JSAMPARRAY scanlines,
+                                             JDIMENSION num_lines));
 EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
 
 #if JPEG_LIB_VERSION >= 70
@@ -1051,29 +1050,29 @@
 
 /* Replaces jpeg_write_scanlines when writing raw downsampled data. */
 EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
-					    JSAMPIMAGE data,
-					    JDIMENSION num_lines));
+                                            JSAMPIMAGE data,
+                                            JDIMENSION num_lines));
 
 /* Write a special marker.  See libjpeg.txt concerning safe usage. */
 EXTERN(void) jpeg_write_marker
-	JPP((j_compress_ptr cinfo, int marker,
-	     const JOCTET * dataptr, unsigned int datalen));
+        JPP((j_compress_ptr cinfo, int marker,
+             const JOCTET * dataptr, unsigned int datalen));
 /* Same, but piecemeal. */
 EXTERN(void) jpeg_write_m_header
-	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
+        JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
 EXTERN(void) jpeg_write_m_byte
-	JPP((j_compress_ptr cinfo, int val));
+        JPP((j_compress_ptr cinfo, int val));
 
 /* Alternate compression function: just write an abbreviated table file */
 EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
 
 /* Decompression startup: read start of JPEG datastream to see what's there */
 EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
-				  boolean require_image));
+                                  boolean require_image));
 /* Return value is one of: */
-#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
-#define JPEG_HEADER_OK		1 /* Found valid image datastream */
-#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+#define JPEG_SUSPENDED          0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK          1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */
 /* If you pass require_image = TRUE (normal case), you need not check for
  * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
  * JPEG_SUSPENDED is only possible if you use a data source module that can
@@ -1083,29 +1082,29 @@
 /* Main entry points for decompression */
 EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
 EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
-					    JSAMPARRAY scanlines,
-					    JDIMENSION max_lines));
+                                            JSAMPARRAY scanlines,
+                                            JDIMENSION max_lines));
 EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
 
 /* Replaces jpeg_read_scanlines when reading raw downsampled data. */
 EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
-					   JSAMPIMAGE data,
-					   JDIMENSION max_lines));
+                                           JSAMPIMAGE data,
+                                           JDIMENSION max_lines));
 
 /* Additional entry points for buffered-image mode. */
 EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
 EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
-				       int scan_number));
+                                       int scan_number));
 EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
 EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
 EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
 EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
 /* Return value is one of: */
-/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
-#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
-#define JPEG_REACHED_EOI	2 /* Reached end of image */
-#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
-#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+/* #define JPEG_SUSPENDED       0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS        1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI        2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED      3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED     4 /* Completed last iMCU row of a scan */
 
 /* Precalculate output dimensions for current decompression parameters. */
 #if JPEG_LIB_VERSION >= 80
@@ -1115,20 +1114,20 @@
 
 /* Control saving of COM and APPn markers into marker_list. */
 EXTERN(void) jpeg_save_markers
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     unsigned int length_limit));
+        JPP((j_decompress_ptr cinfo, int marker_code,
+             unsigned int length_limit));
 
 /* Install a special processing method for COM or APPn markers. */
 EXTERN(void) jpeg_set_marker_processor
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     jpeg_marker_parser_method routine));
+        JPP((j_decompress_ptr cinfo, int marker_code,
+             jpeg_marker_parser_method routine));
 
 /* Read or write raw DCT coefficients --- useful for lossless transcoding. */
 EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
 EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
-					  jvirt_barray_ptr * coef_arrays));
+                                          jvirt_barray_ptr * coef_arrays));
 EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
-						j_compress_ptr dstinfo));
+                                                j_compress_ptr dstinfo));
 
 /* If you choose to abort compression or decompression before completing
  * jpeg_finish_(de)compress, then you need to clean up to release memory,
@@ -1147,17 +1146,17 @@
 
 /* Default restart-marker-resync procedure for use by data source modules */
 EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
-					    int desired));
+                                            int desired));
 
 
 /* These marker codes are exported since applications and data source modules
  * are likely to want to use them.
  */
 
-#define JPEG_RST0	0xD0	/* RST0 marker code */
-#define JPEG_EOI	0xD9	/* EOI marker code */
-#define JPEG_APP0	0xE0	/* APP0 marker code */
-#define JPEG_COM	0xFE	/* COM marker code */
+#define JPEG_RST0       0xD0    /* RST0 marker code */
+#define JPEG_EOI        0xD9    /* EOI marker code */
+#define JPEG_APP0       0xE0    /* APP0 marker code */
+#define JPEG_COM        0xFE    /* COM marker code */
 
 
 /* If we have a brain-damaged compiler that emits warnings (or worse, errors)
@@ -1166,7 +1165,7 @@
  */
 
 #ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+#ifndef JPEG_INTERNALS          /* will be defined in jpegint.h */
 struct jvirt_sarray_control { long dummy; };
 struct jvirt_barray_control { long dummy; };
 struct jpeg_comp_master { long dummy; };
@@ -1201,8 +1200,8 @@
  */
 
 #ifdef JPEG_INTERNALS
-#include "jpegint.h"		/* fetch private declarations */
-#include "jerror.h"		/* fetch error codes too */
+#include "jpegint.h"            /* fetch private declarations */
+#include "jerror.h"             /* fetch error codes too */
 #endif
 
 #ifdef __cplusplus
diff --git a/jpegtran.c b/jpegtran.c
index 19c068b..c7906f4 100644
--- a/jpegtran.c
+++ b/jpegtran.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-2010, Thomas G. Lane, Guido Vollbeding.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -13,18 +13,18 @@
  * provides some lossless and sort-of-lossless transformations of JPEG data.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include "transupp.h"		/* Support routines for jpegtran */
-#include "jversion.h"		/* for version message */
-#include "config.h"
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "transupp.h"           /* Support routines for jpegtran */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
@@ -38,9 +38,9 @@
  */
 
 
-static const char * progname;	/* program name for error messages */
-static char * outfilename;	/* for -outfile switch */
-static JCOPY_OPTION copyoption;	/* -copy switch */
+static const char * progname;   /* program name for error messages */
+static char * outfilename;      /* for -outfile switch */
+static JCOPY_OPTION copyoption; /* -copy switch */
 static jpeg_transform_info transformoption; /* image transformation options */
 
 
@@ -106,12 +106,12 @@
     transformoption.transform = transform;
   } else {
     fprintf(stderr, "%s: can only do one image transformation at a time\n",
-	    progname);
+            progname);
     usage();
   }
 #else
   fprintf(stderr, "%s: sorry, image transformation was not compiled\n",
-	  progname);
+          progname);
   exit(EXIT_FAILURE);
 #endif
 }
@@ -119,7 +119,7 @@
 
 LOCAL(int)
 parse_switches (j_compress_ptr cinfo, int argc, char **argv,
-		int last_file_arg_seen, boolean for_real)
+                int last_file_arg_seen, boolean for_real)
 /* Parse optional switches.
  * Returns argv[] index of first file-name argument (== argc if none).
  * Any file names with indexes <= last_file_arg_seen are ignored;
@@ -132,7 +132,7 @@
   int argn;
   char * arg;
   boolean simple_progressive;
-  char * scansarg = NULL;	/* saves -scans parm if any */
+  char * scansarg = NULL;       /* saves -scans parm if any */
 
   /* Set up default JPEG parameters. */
   simple_progressive = FALSE;
@@ -153,12 +153,12 @@
     if (*arg != '-') {
       /* Not a switch, must be a file name argument */
       if (argn <= last_file_arg_seen) {
-	outfilename = NULL;	/* -outfile applies to just one input file */
-	continue;		/* ignore this name if previously processed */
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
       }
-      break;			/* else done parsing switches */
+      break;                    /* else done parsing switches */
     }
-    arg++;			/* advance past switch marker character */
+    arg++;                      /* advance past switch marker character */
 
     if (keymatch(arg, "arithmetic", 1)) {
       /* Use arithmetic coding. */
@@ -166,35 +166,35 @@
       cinfo->arith_code = TRUE;
 #else
       fprintf(stderr, "%s: sorry, arithmetic coding not supported\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
     } else if (keymatch(arg, "copy", 2)) {
       /* Select which extra markers to copy. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "none", 1)) {
-	copyoption = JCOPYOPT_NONE;
+        copyoption = JCOPYOPT_NONE;
       } else if (keymatch(argv[argn], "comments", 1)) {
-	copyoption = JCOPYOPT_COMMENTS;
+        copyoption = JCOPYOPT_COMMENTS;
       } else if (keymatch(argv[argn], "all", 1)) {
-	copyoption = JCOPYOPT_ALL;
+        copyoption = JCOPYOPT_ALL;
       } else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "crop", 2)) {
       /* Perform lossless cropping. */
 #if TRANSFORMS_SUPPORTED
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
-	fprintf(stderr, "%s: bogus -crop argument '%s'\n",
-		progname, argv[argn]);
-	exit(EXIT_FAILURE);
+        fprintf(stderr, "%s: bogus -crop argument '%s'\n",
+                progname, argv[argn]);
+        exit(EXIT_FAILURE);
       }
 #else
-      select_transform(JXFORM_NONE);	/* force an error */
+      select_transform(JXFORM_NONE);    /* force an error */
 #endif
 
     } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
@@ -203,32 +203,32 @@
       static boolean printed_version = FALSE;
 
       if (! printed_version) {
-	fprintf(stderr, "%s version %s (build %s)\n",
-		PACKAGE_NAME, VERSION, BUILD);
-	fprintf(stderr, "%s\n\n", JCOPYRIGHT);
-	fprintf(stderr, "Emulating The Independent JPEG Group's libjpeg, version %s\n\n",
-		JVERSION);
-	printed_version = TRUE;
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, "%s\n\n", JCOPYRIGHT);
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
 
     } else if (keymatch(arg, "flip", 1)) {
       /* Mirror left-right or top-bottom. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "horizontal", 1))
-	select_transform(JXFORM_FLIP_H);
+        select_transform(JXFORM_FLIP_H);
       else if (keymatch(argv[argn], "vertical", 1))
-	select_transform(JXFORM_FLIP_V);
+        select_transform(JXFORM_FLIP_V);
       else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "grayscale", 1) || keymatch(arg, "greyscale",1)) {
       /* Force to grayscale. */
 #if TRANSFORMS_SUPPORTED
       transformoption.force_grayscale = TRUE;
 #else
-      select_transform(JXFORM_NONE);	/* force an error */
+      select_transform(JXFORM_NONE);    /* force an error */
 #endif
 
     } else if (keymatch(arg, "maxmemory", 3)) {
@@ -236,12 +236,12 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (ch == 'm' || ch == 'M')
-	lval *= 1000L;
+        lval *= 1000L;
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
@@ -250,15 +250,15 @@
       cinfo->optimize_coding = TRUE;
 #else
       fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
     } else if (keymatch(arg, "outfile", 4)) {
       /* Set output file name. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      outfilename = argv[argn];	/* save it away for later use */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
 
     } else if (keymatch(arg, "perfect", 2)) {
       /* Fail if there is any partial edge MCUs that the transform can't
@@ -272,7 +272,7 @@
       /* We must postpone execution until num_components is known. */
 #else
       fprintf(stderr, "%s: sorry, progressive output was not compiled\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -281,43 +281,43 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (lval < 0 || lval > 65535L)
-	usage();
+        usage();
       if (ch == 'b' || ch == 'B') {
-	cinfo->restart_interval = (unsigned int) lval;
-	cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
+        cinfo->restart_interval = (unsigned int) lval;
+        cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
       } else {
-	cinfo->restart_in_rows = (int) lval;
-	/* restart_interval will be computed during startup */
+        cinfo->restart_in_rows = (int) lval;
+        /* restart_interval will be computed during startup */
       }
 
     } else if (keymatch(arg, "rotate", 2)) {
       /* Rotate 90, 180, or 270 degrees (measured clockwise). */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "90", 2))
-	select_transform(JXFORM_ROT_90);
+        select_transform(JXFORM_ROT_90);
       else if (keymatch(argv[argn], "180", 3))
-	select_transform(JXFORM_ROT_180);
+        select_transform(JXFORM_ROT_180);
       else if (keymatch(argv[argn], "270", 3))
-	select_transform(JXFORM_ROT_270);
+        select_transform(JXFORM_ROT_270);
       else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "scans", 1)) {
       /* Set scan script. */
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       scansarg = argv[argn];
       /* We must postpone reading the file in case -progressive appears. */
 #else
       fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -334,7 +334,7 @@
       transformoption.trim = TRUE;
 
     } else {
-      usage();			/* bogus switch */
+      usage();                  /* bogus switch */
     }
   }
 
@@ -343,18 +343,18 @@
   if (for_real) {
 
 #ifdef C_PROGRESSIVE_SUPPORTED
-    if (simple_progressive)	/* process -progressive; -scans can override */
+    if (simple_progressive)     /* process -progressive; -scans can override */
       jpeg_simple_progression(cinfo);
 #endif
 
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-    if (scansarg != NULL)	/* process -scans if it was present */
+    if (scansarg != NULL)       /* process -scans if it was present */
       if (! read_scan_script(cinfo, scansarg))
-	usage();
+        usage();
 #endif
   }
 
-  return argn;			/* return index of next arg (file name) */
+  return argn;                  /* return index of next arg (file name) */
 }
 
 
@@ -375,7 +375,7 @@
   jvirt_barray_ptr * dst_coef_arrays;
   int file_index;
   /* We assume all-in-memory processing and can therefore use only a
-   * single file pointer for sequential input and output operation. 
+   * single file pointer for sequential input and output operation.
    */
   FILE * fp;
 
@@ -386,7 +386,7 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "jpegtran";	/* in case C library doesn't provide it */
+    progname = "jpegtran";      /* in case C library doesn't provide it */
 
   /* Initialize the JPEG decompression object with default error handling. */
   srcinfo.err = jpeg_std_error(&jsrcerr);
@@ -419,14 +419,14 @@
   if (outfilename == NULL) {
     if (file_index != argc-2) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
     outfilename = argv[file_index+1];
   } else {
     if (file_index != argc-1) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
   }
@@ -485,8 +485,8 @@
    */
 #if TRANSFORMS_SUPPORTED
   dst_coef_arrays = jtransform_adjust_parameters(&srcinfo, &dstinfo,
-						 src_coef_arrays,
-						 &transformoption);
+                                                 src_coef_arrays,
+                                                 &transformoption);
 #else
   dst_coef_arrays = src_coef_arrays;
 #endif
@@ -527,8 +527,8 @@
   /* Execute image transformation, if any */
 #if TRANSFORMS_SUPPORTED
   jtransform_execute_transformation(&srcinfo, &dstinfo,
-				    src_coef_arrays,
-				    &transformoption);
+                                    src_coef_arrays,
+                                    &transformoption);
 #endif
 
   /* Finish compression and release memory */
@@ -547,5 +547,5 @@
 
   /* All done. */
   exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/jquant1.c b/jquant1.c
index 9da420d..dbcdd27 100644
--- a/jquant1.c
+++ b/jquant1.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009, D. R. Commander
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -70,9 +70,9 @@
  * table in both directions.
  */
 
-#define ODITHER_SIZE  16	/* dimension of dither matrix */
+#define ODITHER_SIZE  16        /* dimension of dither matrix */
 /* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
-#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)       /* # cells in matrix */
 #define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
 
 typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
@@ -107,8 +107,8 @@
  * Errors are accumulated into the array fserrors[], at a resolution of
  * 1/16th of a pixel count.  The error at a given pixel is propagated
  * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
+ *              ...     (here)  7/16
+ *              3/16    5/16    1/16
  * We work left-to-right on even rows, right-to-left on odd rows.
  *
  * We can get away with a single array (holding one row's worth of errors)
@@ -127,43 +127,43 @@
  */
 
 #if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+typedef INT16 FSERROR;          /* 16 bits should be enough */
+typedef int LOCFSERROR;         /* use 'int' for calculation temps */
 #else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+typedef INT32 FSERROR;          /* may need more than 16 bits */
+typedef INT32 LOCFSERROR;       /* be sure calculation temps are big enough */
 #endif
 
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+typedef FSERROR FAR *FSERRPTR;  /* pointer to error array (in FAR storage!) */
 
 
 /* Private subobject */
 
-#define MAX_Q_COMPS 4		/* max components I can handle */
+#define MAX_Q_COMPS 4           /* max components I can handle */
 
 typedef struct {
   struct jpeg_color_quantizer pub; /* public fields */
 
   /* Initially allocated colormap is saved here */
-  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
-  int sv_actual;		/* number of entries in use */
+  JSAMPARRAY sv_colormap;       /* The color map as a 2-D pixel array */
+  int sv_actual;                /* number of entries in use */
 
-  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  JSAMPARRAY colorindex;        /* Precomputed mapping for speed */
   /* colorindex[i][j] = index of color closest to pixel value j in component i,
    * premultiplied as described above.  Since colormap indexes must fit into
    * JSAMPLEs, the entries of this array will too.
    */
-  boolean is_padded;		/* is the colorindex padded for odither? */
+  boolean is_padded;            /* is the colorindex padded for odither? */
 
-  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+  int Ncolors[MAX_Q_COMPS];     /* # of values alloced to each component */
 
   /* Variables for ordered dithering */
-  int row_index;		/* cur row's vertical index in dither matrix */
+  int row_index;                /* cur row's vertical index in dither matrix */
   ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
 
   /* Variables for Floyd-Steinberg dithering */
   FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
+  boolean on_odd_row;           /* flag to remember which row we are on */
 } my_cquantizer;
 
 typedef my_cquantizer * my_cquantize_ptr;
@@ -205,11 +205,11 @@
   iroot = 1;
   do {
     iroot++;
-    temp = iroot;		/* set temp = iroot ** nc */
+    temp = iroot;               /* set temp = iroot ** nc */
     for (i = 1; i < nc; i++)
       temp *= iroot;
   } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
-  iroot--;			/* now iroot = floor(root) */
+  iroot--;                      /* now iroot = floor(root) */
 
   /* Must have at least 2 color values per component */
   if (iroot < 2)
@@ -233,10 +233,10 @@
       j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
       /* calculate new total_colors if Ncolors[j] is incremented */
       temp = total_colors / Ncolors[j];
-      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      temp *= Ncolors[j]+1;     /* done in long arith to avoid oflo */
       if (temp > (long) max_colors)
-	break;			/* won't fit, done with this pass */
-      Ncolors[j]++;		/* OK, apply the increment */
+        break;                  /* won't fit, done with this pass */
+      Ncolors[j]++;             /* OK, apply the increment */
       total_colors = (int) temp;
       changed = TRUE;
     }
@@ -278,8 +278,8 @@
 create_colormap (j_decompress_ptr cinfo)
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colormap;		/* Created colormap */
-  int total_colors;		/* Number of distinct output colors */
+  JSAMPARRAY colormap;          /* Created colormap */
+  int total_colors;             /* Number of distinct output colors */
   int i,j,k, nci, blksize, blkdist, ptr, val;
 
   /* Select number of colors for each component */
@@ -288,8 +288,8 @@
   /* Report selected color counts */
   if (cinfo->out_color_components == 3)
     TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
-	     total_colors, cquantize->Ncolors[0],
-	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+             total_colors, cquantize->Ncolors[0],
+             cquantize->Ncolors[1], cquantize->Ncolors[2]);
   else
     TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
 
@@ -314,12 +314,12 @@
       val = output_value(cinfo, i, j, nci-1);
       /* Fill in all colormap entries that have this value of this component */
       for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
-	/* fill in blksize entries beginning at ptr */
-	for (k = 0; k < blksize; k++)
-	  colormap[i][ptr+k] = (JSAMPLE) val;
+        /* fill in blksize entries beginning at ptr */
+        for (k = 0; k < blksize; k++)
+          colormap[i][ptr+k] = (JSAMPLE) val;
       }
     }
-    blkdist = blksize;		/* blksize of this color is blkdist of next */
+    blkdist = blksize;          /* blksize of this color is blkdist of next */
   }
 
   /* Save the colormap in private storage,
@@ -377,16 +377,16 @@
     val = 0;
     k = largest_input_value(cinfo, i, 0, nci-1);
     for (j = 0; j <= MAXJSAMPLE; j++) {
-      while (j > k)		/* advance val if past boundary */
-	k = largest_input_value(cinfo, i, ++val, nci-1);
+      while (j > k)             /* advance val if past boundary */
+        k = largest_input_value(cinfo, i, ++val, nci-1);
       /* premultiply so that no multiplication needed in main processing */
       indexptr[j] = (JSAMPLE) (val * blksize);
     }
     /* Pad at both ends if necessary */
     if (pad)
       for (j = 1; j <= MAXJSAMPLE; j++) {
-	indexptr[-j] = indexptr[0];
-	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+        indexptr[-j] = indexptr[0];
+        indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
       }
   }
 }
@@ -406,7 +406,7 @@
 
   odither = (ODITHER_MATRIX_PTR)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(ODITHER_MATRIX));
+                                SIZEOF(ODITHER_MATRIX));
   /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
    * Hence the dither value for the matrix cell with fill order f
    * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
@@ -416,7 +416,7 @@
   for (j = 0; j < ODITHER_SIZE; j++) {
     for (k = 0; k < ODITHER_SIZE; k++) {
       num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
-	    * MAXJSAMPLE;
+            * MAXJSAMPLE;
       /* Ensure round towards zero despite C's lack of consistency
        * about rounding negative values in integer division...
        */
@@ -429,7 +429,7 @@
 
 /*
  * Create the ordered-dither tables.
- * Components having the same number of representative colors may 
+ * Components having the same number of representative colors may
  * share a dither table.
  */
 
@@ -442,14 +442,14 @@
 
   for (i = 0; i < cinfo->out_color_components; i++) {
     nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    odither = NULL;		/* search for matching prior component */
+    odither = NULL;             /* search for matching prior component */
     for (j = 0; j < i; j++) {
       if (nci == cquantize->Ncolors[j]) {
-	odither = cquantize->odither[j];
-	break;
+        odither = cquantize->odither[j];
+        break;
       }
     }
-    if (odither == NULL)	/* need a new table? */
+    if (odither == NULL)        /* need a new table? */
       odither = make_odither_array(cinfo, nci);
     cquantize->odither[i] = odither;
   }
@@ -462,7 +462,7 @@
 
 METHODDEF(void)
 color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		JSAMPARRAY output_buf, int num_rows)
+                JSAMPARRAY output_buf, int num_rows)
 /* General case, no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -480,7 +480,7 @@
     for (col = width; col > 0; col--) {
       pixcode = 0;
       for (ci = 0; ci < nc; ci++) {
-	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+        pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
       }
       *ptrout++ = (JSAMPLE) pixcode;
     }
@@ -490,7 +490,7 @@
 
 METHODDEF(void)
 color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		 JSAMPARRAY output_buf, int num_rows)
+                 JSAMPARRAY output_buf, int num_rows)
 /* Fast path for out_color_components==3, no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -518,15 +518,15 @@
 
 METHODDEF(void)
 quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		     JSAMPARRAY output_buf, int num_rows)
+                     JSAMPARRAY output_buf, int num_rows)
 /* General case, with ordered dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   register JSAMPROW input_ptr;
   register JSAMPROW output_ptr;
   JSAMPROW colorindex_ci;
-  int * dither;			/* points to active row of dither matrix */
-  int row_index, col_index;	/* current indexes into dither matrix */
+  int * dither;                 /* points to active row of dither matrix */
+  int row_index, col_index;     /* current indexes into dither matrix */
   int nc = cinfo->out_color_components;
   int ci;
   int row;
@@ -536,7 +536,7 @@
   for (row = 0; row < num_rows; row++) {
     /* Initialize output values to 0 so can process components separately */
     jzero_far((void FAR *) output_buf[row],
-	      (size_t) (width * SIZEOF(JSAMPLE)));
+              (size_t) (width * SIZEOF(JSAMPLE)));
     row_index = cquantize->row_index;
     for (ci = 0; ci < nc; ci++) {
       input_ptr = input_buf[row] + ci;
@@ -546,17 +546,17 @@
       col_index = 0;
 
       for (col = width; col > 0; col--) {
-	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
-	 * select output value, accumulate into output code for this pixel.
-	 * Range-limiting need not be done explicitly, as we have extended
-	 * the colorindex table to produce the right answers for out-of-range
-	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
-	 * required amount of padding.
-	 */
-	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
-	input_ptr += nc;
-	output_ptr++;
-	col_index = (col_index + 1) & ODITHER_MASK;
+        /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+         * select output value, accumulate into output code for this pixel.
+         * Range-limiting need not be done explicitly, as we have extended
+         * the colorindex table to produce the right answers for out-of-range
+         * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+         * required amount of padding.
+         */
+        *output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+        input_ptr += nc;
+        output_ptr++;
+        col_index = (col_index + 1) & ODITHER_MASK;
       }
     }
     /* Advance row index for next row */
@@ -568,7 +568,7 @@
 
 METHODDEF(void)
 quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		      JSAMPARRAY output_buf, int num_rows)
+                      JSAMPARRAY output_buf, int num_rows)
 /* Fast path for out_color_components==3, with ordered dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -578,10 +578,10 @@
   JSAMPROW colorindex0 = cquantize->colorindex[0];
   JSAMPROW colorindex1 = cquantize->colorindex[1];
   JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int * dither0;		/* points to active row of dither matrix */
+  int * dither0;                /* points to active row of dither matrix */
   int * dither1;
   int * dither2;
-  int row_index, col_index;	/* current indexes into dither matrix */
+  int row_index, col_index;     /* current indexes into dither matrix */
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
@@ -597,11 +597,11 @@
 
     for (col = width; col > 0; col--) {
       pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
-					dither0[col_index]]);
+                                        dither0[col_index]]);
       pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
-					dither1[col_index]]);
+                                        dither1[col_index]]);
       pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
-					dither2[col_index]]);
+                                        dither2[col_index]]);
       *output_ptr++ = (JSAMPLE) pixcode;
       col_index = (col_index + 1) & ODITHER_MASK;
     }
@@ -613,24 +613,24 @@
 
 METHODDEF(void)
 quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		    JSAMPARRAY output_buf, int num_rows)
+                    JSAMPARRAY output_buf, int num_rows)
 /* General case, with Floyd-Steinberg dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register LOCFSERROR cur;	/* current error or pixel value */
-  LOCFSERROR belowerr;		/* error for pixel below cur */
-  LOCFSERROR bpreverr;		/* error for below/prev col */
-  LOCFSERROR bnexterr;		/* error for below/next col */
+  register LOCFSERROR cur;      /* current error or pixel value */
+  LOCFSERROR belowerr;          /* error for pixel below cur */
+  LOCFSERROR bpreverr;          /* error for below/prev col */
+  LOCFSERROR bnexterr;          /* error for below/next col */
   LOCFSERROR delta;
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register FSERRPTR errorptr;   /* => fserrors[] at column before current */
   register JSAMPROW input_ptr;
   register JSAMPROW output_ptr;
   JSAMPROW colorindex_ci;
   JSAMPROW colormap_ci;
   int pixcode;
   int nc = cinfo->out_color_components;
-  int dir;			/* 1 for left-to-right, -1 for right-to-left */
-  int dirnc;			/* dir * nc */
+  int dir;                      /* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;                    /* dir * nc */
   int ci;
   int row;
   JDIMENSION col;
@@ -641,22 +641,22 @@
   for (row = 0; row < num_rows; row++) {
     /* Initialize output values to 0 so can process components separately */
     jzero_far((void FAR *) output_buf[row],
-	      (size_t) (width * SIZEOF(JSAMPLE)));
+              (size_t) (width * SIZEOF(JSAMPLE)));
     for (ci = 0; ci < nc; ci++) {
       input_ptr = input_buf[row] + ci;
       output_ptr = output_buf[row];
       if (cquantize->on_odd_row) {
-	/* work right to left in this row */
-	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
-	output_ptr += width-1;
-	dir = -1;
-	dirnc = -nc;
-	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+        /* work right to left in this row */
+        input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+        output_ptr += width-1;
+        dir = -1;
+        dirnc = -nc;
+        errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
       } else {
-	/* work left to right in this row */
-	dir = 1;
-	dirnc = nc;
-	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+        /* work left to right in this row */
+        dir = 1;
+        dirnc = nc;
+        errorptr = cquantize->fserrors[ci]; /* => entry before first column */
       }
       colorindex_ci = cquantize->colorindex[ci];
       colormap_ci = cquantize->sv_colormap[ci];
@@ -666,47 +666,47 @@
       belowerr = bpreverr = 0;
 
       for (col = width; col > 0; col--) {
-	/* cur holds the error propagated from the previous pixel on the
-	 * current line.  Add the error propagated from the previous line
-	 * to form the complete error correction term for this pixel, and
-	 * round the error term (which is expressed * 16) to an integer.
-	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-	 * for either sign of the error value.
-	 * Note: errorptr points to *previous* column's array entry.
-	 */
-	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
-	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-	 * The maximum error is +- MAXJSAMPLE; this sets the required size
-	 * of the range_limit array.
-	 */
-	cur += GETJSAMPLE(*input_ptr);
-	cur = GETJSAMPLE(range_limit[cur]);
-	/* Select output value, accumulate into output code for this pixel */
-	pixcode = GETJSAMPLE(colorindex_ci[cur]);
-	*output_ptr += (JSAMPLE) pixcode;
-	/* Compute actual representation error at this pixel */
-	/* Note: we can do this even though we don't have the final */
-	/* pixel code, because the colormap is orthogonal. */
-	cur -= GETJSAMPLE(colormap_ci[pixcode]);
-	/* Compute error fractions to be propagated to adjacent pixels.
-	 * Add these into the running sums, and simultaneously shift the
-	 * next-line error sums left by 1 column.
-	 */
-	bnexterr = cur;
-	delta = cur * 2;
-	cur += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr + cur);
-	cur += delta;		/* form error * 5 */
-	bpreverr = belowerr + cur;
-	belowerr = bnexterr;
-	cur += delta;		/* form error * 7 */
-	/* At this point cur contains the 7/16 error value to be propagated
-	 * to the next pixel on the current line, and all the errors for the
-	 * next line have been shifted over. We are therefore ready to move on.
-	 */
-	input_ptr += dirnc;	/* advance input ptr to next column */
-	output_ptr += dir;	/* advance output ptr to next column */
-	errorptr += dir;	/* advance errorptr to current column */
+        /* cur holds the error propagated from the previous pixel on the
+         * current line.  Add the error propagated from the previous line
+         * to form the complete error correction term for this pixel, and
+         * round the error term (which is expressed * 16) to an integer.
+         * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+         * for either sign of the error value.
+         * Note: errorptr points to *previous* column's array entry.
+         */
+        cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+        /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+         * The maximum error is +- MAXJSAMPLE; this sets the required size
+         * of the range_limit array.
+         */
+        cur += GETJSAMPLE(*input_ptr);
+        cur = GETJSAMPLE(range_limit[cur]);
+        /* Select output value, accumulate into output code for this pixel */
+        pixcode = GETJSAMPLE(colorindex_ci[cur]);
+        *output_ptr += (JSAMPLE) pixcode;
+        /* Compute actual representation error at this pixel */
+        /* Note: we can do this even though we don't have the final */
+        /* pixel code, because the colormap is orthogonal. */
+        cur -= GETJSAMPLE(colormap_ci[pixcode]);
+        /* Compute error fractions to be propagated to adjacent pixels.
+         * Add these into the running sums, and simultaneously shift the
+         * next-line error sums left by 1 column.
+         */
+        bnexterr = cur;
+        delta = cur * 2;
+        cur += delta;           /* form error * 3 */
+        errorptr[0] = (FSERROR) (bpreverr + cur);
+        cur += delta;           /* form error * 5 */
+        bpreverr = belowerr + cur;
+        belowerr = bnexterr;
+        cur += delta;           /* form error * 7 */
+        /* At this point cur contains the 7/16 error value to be propagated
+         * to the next pixel on the current line, and all the errors for the
+         * next line have been shifted over. We are therefore ready to move on.
+         */
+        input_ptr += dirnc;     /* advance input ptr to next column */
+        output_ptr += dir;      /* advance output ptr to next column */
+        errorptr += dir;        /* advance errorptr to current column */
       }
       /* Post-loop cleanup: we must unload the final error value into the
        * final fserrors[] entry.  Note we need not unload belowerr because
@@ -766,7 +766,7 @@
       cquantize->pub.color_quantize = quantize3_ord_dither;
     else
       cquantize->pub.color_quantize = quantize_ord_dither;
-    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    cquantize->row_index = 0;   /* initialize state for ordered dither */
     /* If user changed to ordered dither from another mode,
      * we must recreate the color index table with padding.
      * This will cost extra space, but probably isn't very likely.
@@ -829,13 +829,13 @@
 
   cquantize = (my_cquantize_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_cquantizer));
+                                SIZEOF(my_cquantizer));
   cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
   cquantize->pub.start_pass = start_pass_1_quant;
   cquantize->pub.finish_pass = finish_pass_1_quant;
   cquantize->pub.new_color_map = new_color_map_1_quant;
   cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
-  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+  cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */
 
   /* Make sure my internal arrays won't overflow */
   if (cinfo->out_color_components > MAX_Q_COMPS)
diff --git a/jquant2.c b/jquant2.c
index 3952e1c..0fdb197 100644
--- a/jquant2.c
+++ b/jquant2.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2009, 2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -72,9 +72,9 @@
  * probably need to change these scale factors.
  */
 
-#define R_SCALE 2		/* scale R distances by this much */
-#define G_SCALE 3		/* scale G distances by this much */
-#define B_SCALE 1		/* and B by this much */
+#define R_SCALE 2               /* scale R distances by this much */
+#define G_SCALE 3               /* scale G distances by this much */
+#define B_SCALE 1               /* and B by this much */
 
 static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE};
 #define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]]
@@ -112,9 +112,9 @@
 /* These will do the right thing for either R,G,B or B,G,R color order,
  * but you may not like the results for other color orders.
  */
-#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
-#define HIST_C1_BITS  6		/* bits of precision in G histogram */
-#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+#define HIST_C0_BITS  5         /* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6         /* bits of precision in G histogram */
+#define HIST_C2_BITS  5         /* bits of precision in B/R histogram */
 
 /* Number of elements along histogram axes. */
 #define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
@@ -127,13 +127,13 @@
 #define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
 
 
-typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+typedef UINT16 histcell;        /* histogram cell; prefer an unsigned type */
 
-typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+typedef histcell FAR * histptr; /* for pointers to histogram cells */
 
 typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
-typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
-typedef hist2d * hist3d;	/* type for top-level pointer */
+typedef hist1d FAR * hist2d;    /* type for the 2nd-level pointers */
+typedef hist2d * hist3d;        /* type for top-level pointer */
 
 
 /* Declarations for Floyd-Steinberg dithering.
@@ -141,8 +141,8 @@
  * Errors are accumulated into the array fserrors[], at a resolution of
  * 1/16th of a pixel count.  The error at a given pixel is propagated
  * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
+ *              ...     (here)  7/16
+ *              3/16    5/16    1/16
  * We work left-to-right on even rows, right-to-left on odd rows.
  *
  * We can get away with a single array (holding one row's worth of errors)
@@ -161,14 +161,14 @@
  */
 
 #if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+typedef INT16 FSERROR;          /* 16 bits should be enough */
+typedef int LOCFSERROR;         /* use 'int' for calculation temps */
 #else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+typedef INT32 FSERROR;          /* may need more than 16 bits */
+typedef INT32 LOCFSERROR;       /* be sure calculation temps are big enough */
 #endif
 
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+typedef FSERROR FAR *FSERRPTR;  /* pointer to error array (in FAR storage!) */
 
 
 /* Private subobject */
@@ -177,18 +177,18 @@
   struct jpeg_color_quantizer pub; /* public fields */
 
   /* Space for the eventually created colormap is stashed here */
-  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
-  int desired;			/* desired # of colors = size of colormap */
+  JSAMPARRAY sv_colormap;       /* colormap allocated at init time */
+  int desired;                  /* desired # of colors = size of colormap */
 
   /* Variables for accumulating image statistics */
-  hist3d histogram;		/* pointer to the histogram */
+  hist3d histogram;             /* pointer to the histogram */
 
-  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+  boolean needs_zeroed;         /* TRUE if next pass must zero histogram */
 
   /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors;		/* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-  int * error_limiter;		/* table for clamping the applied error */
+  FSERRPTR fserrors;            /* accumulated errors */
+  boolean on_odd_row;           /* flag to remember which row we are on */
+  int * error_limiter;          /* table for clamping the applied error */
 } my_cquantizer;
 
 typedef my_cquantizer * my_cquantize_ptr;
@@ -205,7 +205,7 @@
 
 METHODDEF(void)
 prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		  JSAMPARRAY output_buf, int num_rows)
+                  JSAMPARRAY output_buf, int num_rows)
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   register JSAMPROW ptr;
@@ -220,11 +220,11 @@
     for (col = width; col > 0; col--) {
       /* get pixel value and index into the histogram */
       histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
-			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
-			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+                         [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+                         [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
       /* increment, check for overflow and undo increment if so. */
       if (++(*histp) <= 0)
-	(*histp)--;
+        (*histp)--;
       ptr += 3;
     }
   }
@@ -312,67 +312,67 @@
   if (c0max > c0min)
     for (c0 = c0min; c0 <= c0max; c0++)
       for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0min = c0min = c0;
-	    goto have_c0min;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c0min = c0min = c0;
+            goto have_c0min;
+          }
       }
  have_c0min:
   if (c0max > c0min)
     for (c0 = c0max; c0 >= c0min; c0--)
       for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0max = c0max = c0;
-	    goto have_c0max;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c0max = c0max = c0;
+            goto have_c0max;
+          }
       }
  have_c0max:
   if (c1max > c1min)
     for (c1 = c1min; c1 <= c1max; c1++)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1min = c1min = c1;
-	    goto have_c1min;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c1min = c1min = c1;
+            goto have_c1min;
+          }
       }
  have_c1min:
   if (c1max > c1min)
     for (c1 = c1max; c1 >= c1min; c1--)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1max = c1max = c1;
-	    goto have_c1max;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c1max = c1max = c1;
+            goto have_c1max;
+          }
       }
  have_c1max:
   if (c2max > c2min)
     for (c2 = c2min; c2 <= c2max; c2++)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2min = c2min = c2;
-	    goto have_c2min;
-	  }
+        histp = & histogram[c0][c1min][c2];
+        for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+          if (*histp != 0) {
+            boxp->c2min = c2min = c2;
+            goto have_c2min;
+          }
       }
  have_c2min:
   if (c2max > c2min)
     for (c2 = c2max; c2 >= c2min; c2--)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2max = c2max = c2;
-	    goto have_c2max;
-	  }
+        histp = & histogram[c0][c1min][c2];
+        for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+          if (*histp != 0) {
+            boxp->c2max = c2max = c2;
+            goto have_c2max;
+          }
       }
  have_c2max:
 
@@ -395,9 +395,9 @@
     for (c1 = c1min; c1 <= c1max; c1++) {
       histp = & histogram[c0][c1][c2min];
       for (c2 = c2min; c2 <= c2max; c2++, histp++)
-	if (*histp != 0) {
-	  ccount++;
-	}
+        if (*histp != 0) {
+          ccount++;
+        }
     }
   boxp->colorcount = ccount;
 }
@@ -405,7 +405,7 @@
 
 LOCAL(int)
 median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
-	    int desired_colors)
+            int desired_colors)
 /* Repeatedly select and split the largest box until we have enough boxes */
 {
   int n,lb;
@@ -421,9 +421,9 @@
     } else {
       b1 = find_biggest_volume(boxlist, numboxes);
     }
-    if (b1 == NULL)		/* no splittable boxes left! */
+    if (b1 == NULL)             /* no splittable boxes left! */
       break;
-    b2 = &boxlist[numboxes];	/* where new box will go */
+    b2 = &boxlist[numboxes];    /* where new box will go */
     /* Copy the color bounds to the new box. */
     b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
     b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
@@ -504,12 +504,12 @@
     for (c1 = c1min; c1 <= c1max; c1++) {
       histp = & histogram[c0][c1][c2min];
       for (c2 = c2min; c2 <= c2max; c2++) {
-	if ((count = *histp++) != 0) {
-	  total += count;
-	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
-	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
-	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
-	}
+        if ((count = *histp++) != 0) {
+          total += count;
+          c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+          c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+          c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+        }
       }
     }
 
@@ -628,7 +628,7 @@
 
 LOCAL(int)
 find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		    JSAMPLE colorlist[])
+                    JSAMPLE colorlist[])
 /* Locate the colormap entries close enough to an update box to be candidates
  * for the nearest entry to some cell(s) in the update box.  The update box
  * is specified by the center coordinates of its first cell.  The number of
@@ -643,7 +643,7 @@
   int centerc0, centerc1, centerc2;
   int i, x, ncolors;
   INT32 minmaxdist, min_dist, max_dist, tdist;
-  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+  INT32 mindist[MAXNUMCOLORS];  /* min distance to colormap entry i */
 
   /* Compute true coordinates of update box's upper corner and center.
    * Actually we compute the coordinates of the center of the upper-corner
@@ -685,11 +685,11 @@
       /* within cell range so no contribution to min_dist */
       min_dist = 0;
       if (x <= centerc0) {
-	tdist = (x - maxc0) * C0_SCALE;
-	max_dist = tdist*tdist;
+        tdist = (x - maxc0) * C0_SCALE;
+        max_dist = tdist*tdist;
       } else {
-	tdist = (x - minc0) * C0_SCALE;
-	max_dist = tdist*tdist;
+        tdist = (x - minc0) * C0_SCALE;
+        max_dist = tdist*tdist;
       }
     }
 
@@ -707,11 +707,11 @@
     } else {
       /* within cell range so no contribution to min_dist */
       if (x <= centerc1) {
-	tdist = (x - maxc1) * C1_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - maxc1) * C1_SCALE;
+        max_dist += tdist*tdist;
       } else {
-	tdist = (x - minc1) * C1_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - minc1) * C1_SCALE;
+        max_dist += tdist*tdist;
       }
     }
 
@@ -729,15 +729,15 @@
     } else {
       /* within cell range so no contribution to min_dist */
       if (x <= centerc2) {
-	tdist = (x - maxc2) * C2_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - maxc2) * C2_SCALE;
+        max_dist += tdist*tdist;
       } else {
-	tdist = (x - minc2) * C2_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - minc2) * C2_SCALE;
+        max_dist += tdist*tdist;
       }
     }
 
-    mindist[i] = min_dist;	/* save away the results */
+    mindist[i] = min_dist;      /* save away the results */
     if (max_dist < minmaxdist)
       minmaxdist = max_dist;
   }
@@ -757,7 +757,7 @@
 
 LOCAL(void)
 find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+                  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
 /* Find the closest colormap entry for each cell in the update box,
  * given the list of candidate colors prepared by find_nearby_colors.
  * Return the indexes of the closest entries in the bestcolor[] array.
@@ -767,13 +767,13 @@
 {
   int ic0, ic1, ic2;
   int i, icolor;
-  register INT32 * bptr;	/* pointer into bestdist[] array */
-  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
-  INT32 dist0, dist1;		/* initial distance values */
-  register INT32 dist2;		/* current distance in inner loop */
-  INT32 xx0, xx1;		/* distance increments */
+  register INT32 * bptr;        /* pointer into bestdist[] array */
+  JSAMPLE * cptr;               /* pointer into bestcolor[] array */
+  INT32 dist0, dist1;           /* initial distance values */
+  register INT32 dist2;         /* current distance in inner loop */
+  INT32 xx0, xx1;               /* distance increments */
   register INT32 xx2;
-  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  INT32 inc0, inc1, inc2;       /* initial values for increments */
   /* This array holds the distance to the nearest-so-far color for each cell */
   INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
@@ -813,20 +813,20 @@
       dist1 = dist0;
       xx1 = inc1;
       for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
-	dist2 = dist1;
-	xx2 = inc2;
-	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
-	  if (dist2 < *bptr) {
-	    *bptr = dist2;
-	    *cptr = (JSAMPLE) icolor;
-	  }
-	  dist2 += xx2;
-	  xx2 += 2 * STEP_C2 * STEP_C2;
-	  bptr++;
-	  cptr++;
-	}
-	dist1 += xx1;
-	xx1 += 2 * STEP_C1 * STEP_C1;
+        dist2 = dist1;
+        xx2 = inc2;
+        for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+          if (dist2 < *bptr) {
+            *bptr = dist2;
+            *cptr = (JSAMPLE) icolor;
+          }
+          dist2 += xx2;
+          xx2 += 2 * STEP_C2 * STEP_C2;
+          bptr++;
+          cptr++;
+        }
+        dist1 += xx1;
+        xx1 += 2 * STEP_C1 * STEP_C1;
       }
       dist0 += xx0;
       xx0 += 2 * STEP_C0 * STEP_C0;
@@ -843,13 +843,13 @@
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   hist3d histogram = cquantize->histogram;
-  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int minc0, minc1, minc2;      /* lower left corner of update box */
   int ic0, ic1, ic2;
-  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
-  register histptr cachep;	/* pointer into main cache array */
+  register JSAMPLE * cptr;      /* pointer into bestcolor[] array */
+  register histptr cachep;      /* pointer into main cache array */
   /* This array lists the candidate colormap indexes. */
   JSAMPLE colorlist[MAXNUMCOLORS];
-  int numcolors;		/* number of candidate colors */
+  int numcolors;                /* number of candidate colors */
   /* This array holds the actually closest colormap index for each cell. */
   JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
@@ -873,10 +873,10 @@
 
   /* Determine the actually nearest colors. */
   find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
-		   bestcolor);
+                   bestcolor);
 
   /* Save the best color numbers (plus 1) in the main cache array */
-  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c0 <<= BOX_C0_LOG;            /* convert ID back to base cell indexes */
   c1 <<= BOX_C1_LOG;
   c2 <<= BOX_C2_LOG;
   cptr = bestcolor;
@@ -884,7 +884,7 @@
     for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
       cachep = & histogram[c0+ic0][c1+ic1][c2];
       for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
-	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+        *cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
       }
     }
   }
@@ -897,7 +897,7 @@
 
 METHODDEF(void)
 pass2_no_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+                 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
 /* This version performs no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -921,7 +921,7 @@
       /* If we have not seen this color before, find nearest colormap entry */
       /* and update the cache */
       if (*cachep == 0)
-	fill_inverse_cmap(cinfo, c0,c1,c2);
+        fill_inverse_cmap(cinfo, c0,c1,c2);
       /* Now emit the colormap index for this cell */
       *outptr++ = (JSAMPLE) (*cachep - 1);
     }
@@ -931,20 +931,20 @@
 
 METHODDEF(void)
 pass2_fs_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+                 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
 /* This version performs Floyd-Steinberg dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   hist3d histogram = cquantize->histogram;
-  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */
   LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
   LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  JSAMPROW inptr;		/* => current input pixel */
-  JSAMPROW outptr;		/* => current output pixel */
+  register FSERRPTR errorptr;   /* => fserrors[] at column before current */
+  JSAMPROW inptr;               /* => current input pixel */
+  JSAMPROW outptr;              /* => current output pixel */
   histptr cachep;
-  int dir;			/* +1 or -1 depending on direction */
-  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int dir;                      /* +1 or -1 depending on direction */
+  int dir3;                     /* 3*dir, for advancing inptr & errorptr */
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
@@ -960,7 +960,7 @@
     outptr = output_buf[row];
     if (cquantize->on_odd_row) {
       /* work right to left in this row */
-      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      inptr += (width-1) * 3;   /* so point to rightmost pixel */
       outptr += width-1;
       dir = -1;
       dir3 = -3;
@@ -1012,14 +1012,14 @@
       /* If we have not seen this color before, find nearest colormap */
       /* entry and update the cache */
       if (*cachep == 0)
-	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+        fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
       /* Now emit the colormap index for this cell */
       { register int pixcode = *cachep - 1;
-	*outptr = (JSAMPLE) pixcode;
-	/* Compute representation error for this pixel */
-	cur0 -= GETJSAMPLE(colormap0[pixcode]);
-	cur1 -= GETJSAMPLE(colormap1[pixcode]);
-	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+        *outptr = (JSAMPLE) pixcode;
+        /* Compute representation error for this pixel */
+        cur0 -= GETJSAMPLE(colormap0[pixcode]);
+        cur1 -= GETJSAMPLE(colormap1[pixcode]);
+        cur2 -= GETJSAMPLE(colormap2[pixcode]);
       }
       /* Compute error fractions to be propagated to adjacent pixels.
        * Add these into the running sums, and simultaneously shift the
@@ -1027,29 +1027,29 @@
        */
       { register LOCFSERROR bnexterr;
 
-	bnexterr = cur0;	/* Process component 0 */
-	errorptr[0] = (FSERROR) (bpreverr0 + cur0 * 3);
-	bpreverr0 = belowerr0 + cur0 * 5;
-	belowerr0 = bnexterr;
-	cur0 *= 7;
-	bnexterr = cur1;	/* Process component 1 */
-	errorptr[1] = (FSERROR) (bpreverr1 + cur1 * 3);
-	bpreverr1 = belowerr1 + cur1 * 5;
-	belowerr1 = bnexterr;
-	cur1 *= 7;
-	bnexterr = cur2;	/* Process component 2 */
-	errorptr[2] = (FSERROR) (bpreverr2 + cur2 * 3);
-	bpreverr2 = belowerr2 + cur2 * 5;
-	belowerr2 = bnexterr;
-	cur2 *= 7;
+        bnexterr = cur0;        /* Process component 0 */
+        errorptr[0] = (FSERROR) (bpreverr0 + cur0 * 3);
+        bpreverr0 = belowerr0 + cur0 * 5;
+        belowerr0 = bnexterr;
+        cur0 *= 7;
+        bnexterr = cur1;        /* Process component 1 */
+        errorptr[1] = (FSERROR) (bpreverr1 + cur1 * 3);
+        bpreverr1 = belowerr1 + cur1 * 5;
+        belowerr1 = bnexterr;
+        cur1 *= 7;
+        bnexterr = cur2;        /* Process component 2 */
+        errorptr[2] = (FSERROR) (bpreverr2 + cur2 * 3);
+        bpreverr2 = belowerr2 + cur2 * 5;
+        belowerr2 = bnexterr;
+        cur2 *= 7;
       }
       /* At this point curN contains the 7/16 error value to be propagated
        * to the next pixel on the current line, and all the errors for the
        * next line have been shifted over.  We are therefore ready to move on.
        */
-      inptr += dir3;		/* Advance pixel pointers to next column */
+      inptr += dir3;            /* Advance pixel pointers to next column */
       outptr += dir;
-      errorptr += dir3;		/* advance errorptr to current column */
+      errorptr += dir3;         /* advance errorptr to current column */
     }
     /* Post-loop cleanup: we must unload the final error values into the
      * final fserrors[] entry.  Note we need not unload belowerrN because
@@ -1089,7 +1089,7 @@
 
   table = (int *) (*cinfo->mem->alloc_small)
     ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
-  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  table += MAXJSAMPLE;          /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
   cquantize->error_limiter = table;
 
 #define STEPSIZE ((MAXJSAMPLE+1)/16)
@@ -1172,16 +1172,16 @@
 
     if (cinfo->dither_mode == JDITHER_FS) {
       size_t arraysize = (size_t) ((cinfo->output_width + 2) *
-				   (3 * SIZEOF(FSERROR)));
+                                   (3 * SIZEOF(FSERROR)));
       /* Allocate Floyd-Steinberg workspace if we didn't already. */
       if (cquantize->fserrors == NULL)
-	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+        cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
       /* Initialize the propagated errors to zero. */
       jzero_far((void FAR *) cquantize->fserrors, arraysize);
       /* Make the error-limit table if we didn't already. */
       if (cquantize->error_limiter == NULL)
-	init_error_limit(cinfo);
+        init_error_limit(cinfo);
       cquantize->on_odd_row = FALSE;
     }
 
@@ -1190,7 +1190,7 @@
   if (cquantize->needs_zeroed) {
     for (i = 0; i < HIST_C0_ELEMS; i++) {
       jzero_far((void FAR *) histogram[i],
-		HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+                HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
     }
     cquantize->needs_zeroed = FALSE;
   }
@@ -1223,11 +1223,11 @@
 
   cquantize = (my_cquantize_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_cquantizer));
+                                SIZEOF(my_cquantizer));
   cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
   cquantize->pub.start_pass = start_pass_2_quant;
   cquantize->pub.new_color_map = new_color_map_2_quant;
-  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->fserrors = NULL;   /* flag optional arrays not allocated */
   cquantize->error_limiter = NULL;
 
   /* Make sure jdmaster didn't give me a case I can't handle */
diff --git a/jsimd.h b/jsimd.h
index 3fa2c43..ae07215 100644
--- a/jsimd.h
+++ b/jsimd.h
@@ -3,7 +3,7 @@
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright 2011 D. R. Commander
- * 
+ *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * For conditions of distribution and use, see copyright notice in jsimdext.inc
diff --git a/jsimd_none.c b/jsimd_none.c
index 9787902..54130db 100644
--- a/jsimd_none.c
+++ b/jsimd_none.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright 2009-2011 D. R. Commander
- * 
+ *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * For conditions of distribution and use, see copyright notice in jsimdext.inc
@@ -95,7 +95,7 @@
 
 GLOBAL(void)
 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
@@ -103,7 +103,7 @@
 
 GLOBAL(void)
 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
@@ -123,7 +123,7 @@
 
 GLOBAL(void)
 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
@@ -131,7 +131,7 @@
 
 GLOBAL(void)
 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
@@ -292,22 +292,22 @@
 
 GLOBAL(void)
 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
 }
 
 GLOBAL(void)
 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
 }
 
 GLOBAL(void)
 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
 }
 
diff --git a/jsimddct.h b/jsimddct.h
index a1c7440..0d8804c 100644
--- a/jsimddct.h
+++ b/jsimddct.h
@@ -2,7 +2,7 @@
  * jsimddct.h
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * 
+ *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * For conditions of distribution and use, see copyright notice in jsimdext.inc
diff --git a/jutils.c b/jutils.c
index d18a955..b9997a3 100644
--- a/jutils.c
+++ b/jutils.c
@@ -21,7 +21,7 @@
  * of a DCT block read in natural order (left to right, top to bottom).
  */
 
-#if 0				/* This table is not actually needed in v6a */
+#if 0                           /* This table is not actually needed in v6a */
 
 const int jpeg_zigzag_order[DCTSIZE2] = {
    0,  1,  5,  6, 14, 15, 27, 28,
@@ -96,21 +96,21 @@
  * is not all that great, because these routines aren't very heavily used.)
  */
 
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macros */
-#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
-#define FMEMZERO(target,size)	MEMZERO(target,size)
-#else				/* 80x86 case, define if we can */
+#ifndef NEED_FAR_POINTERS       /* normal case, same as regular macros */
+#define FMEMCOPY(dest,src,size) MEMCOPY(dest,src,size)
+#define FMEMZERO(target,size)   MEMZERO(target,size)
+#else                           /* 80x86 case, define if we can */
 #ifdef USE_FMEM
-#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
-#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
+#define FMEMCOPY(dest,src,size) _fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
+#define FMEMZERO(target,size)   _fmemset((void FAR *)(target), 0, (size_t)(size))
 #endif
 #endif
 
 
 GLOBAL(void)
 jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
-		   JSAMPARRAY output_array, int dest_row,
-		   int num_rows, JDIMENSION num_cols)
+                   JSAMPARRAY output_array, int dest_row,
+                   int num_rows, JDIMENSION num_cols)
 /* Copy some rows of samples from one place to another.
  * num_rows rows are copied from input_array[source_row++]
  * to output_array[dest_row++]; these areas may overlap for duplication.
@@ -135,7 +135,7 @@
     FMEMCOPY(outptr, inptr, count);
 #else
     for (count = num_cols; count > 0; count--)
-      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+      *outptr++ = *inptr++;     /* needn't bother with GETJSAMPLE() here */
 #endif
   }
 }
@@ -143,7 +143,7 @@
 
 GLOBAL(void)
 jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
-		 JDIMENSION num_blocks)
+                 JDIMENSION num_blocks)
 /* Copy a row of coefficient blocks from one place to another. */
 {
 #ifdef FMEMCOPY
diff --git a/jversion.h b/jversion.h
index 8d3a5f7..1bd4dba 100644
--- a/jversion.h
+++ b/jversion.h
@@ -2,8 +2,8 @@
  * jversion.h
  *
  * This file was part of the Independent JPEG Group's software:
- * Copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding.
- * Modifications:
+ * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, 2012-2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -13,22 +13,22 @@
 
 #if JPEG_LIB_VERSION >= 80
 
-#define JVERSION	"8b  16-May-2010"
+#define JVERSION        "8d  15-Jan-2012"
 
 #elif JPEG_LIB_VERSION >= 70
 
-#define JVERSION	"7  27-Jun-2009"
+#define JVERSION        "7  27-Jun-2009"
 
 #else
 
-#define JVERSION	"6b  27-Mar-1998"
+#define JVERSION        "6b  27-Mar-1998"
 
 #endif
 
-#define JCOPYRIGHT	"Copyright (C) 1991-2010 Thomas G. Lane, Guido Vollbeding\n" \
-			"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-			"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
-			"Copyright (C) 2009-2014 D. R. Commander\n" \
-			"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)"
+#define JCOPYRIGHT      "Copyright (C) 1991-2012 Thomas G. Lane, Guido Vollbeding\n" \
+                        "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
+                        "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
+                        "Copyright (C) 2009-2014 D. R. Commander\n" \
+                        "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)"
 
 #define JCOPYRIGHT_SHORT "Copyright (C) 1991-2014 The libjpeg-turbo Project and many others"
diff --git a/libjpeg.map.in b/libjpeg.map.in
index 5443943..b4480d8 100644
--- a/libjpeg.map.in
+++ b/libjpeg.map.in
@@ -1,4 +1,5 @@
 LIBJPEGTURBO_@JPEG_LIB_VERSION_DECIMAL@ {
+  @MEM_SRCDST_FUNCTIONS@
   local:
     jsimd_*;
     jconst_*;
diff --git a/libjpeg.txt b/libjpeg.txt
index 6970136..255a7e1 100644
--- a/libjpeg.txt
+++ b/libjpeg.txt
@@ -1,9 +1,9 @@
 USING THE IJG JPEG LIBRARY
 
 This file was part of the Independent JPEG Group's software:
-Copyright (C) 1994-2009, Thomas G. Lane, Guido Vollbeding.
-Modifications:
-Copyright (C) 2010, D. R. Commander.
+Copyright (C) 1994-2011, Thomas G. Lane, Guido Vollbeding.
+libjpeg-turbo Modifications:
+Copyright (C) 2010, 2014, D. R. Commander.
 For conditions of distribution and use, see the accompanying README file.
 
 
@@ -27,32 +27,32 @@
 -----------------
 
 Overview:
-	Functions provided by the library
-	Outline of typical usage
+        Functions provided by the library
+        Outline of typical usage
 Basic library usage:
-	Data formats
-	Compression details
-	Decompression details
-	Mechanics of usage: include files, linking, etc
+        Data formats
+        Compression details
+        Decompression details
+        Mechanics of usage: include files, linking, etc
 Advanced features:
-	Compression parameter selection
-	Decompression parameter selection
-	Special color spaces
-	Error handling
-	Compressed data handling (source and destination managers)
-	I/O suspension
-	Progressive JPEG support
-	Buffered-image mode
-	Abbreviated datastreams and multiple images
-	Special markers
-	Raw (downsampled) image data
-	Really raw data: DCT coefficients
-	Progress monitoring
-	Memory management
-	Memory usage
-	Library compile-time options
-	Portability considerations
-	Notes for MS-DOS implementors
+        Compression parameter selection
+        Decompression parameter selection
+        Special color spaces
+        Error handling
+        Compressed data handling (source and destination managers)
+        I/O suspension
+        Progressive JPEG support
+        Buffered-image mode
+        Abbreviated datastreams and multiple images
+        Special markers
+        Raw (downsampled) image data
+        Really raw data: DCT coefficients
+        Progress monitoring
+        Memory management
+        Memory usage
+        Library compile-time options
+        Portability considerations
+        Notes for MS-DOS implementors
 
 You should read at least the overview and basic usage sections before trying
 to program with the library.  The sections on advanced features can be read
@@ -93,10 +93,10 @@
 the ISO JPEG standard; most baseline, extended-sequential, and progressive
 JPEG processes are supported.  (Our subset includes all features now in common
 use.)  Unsupported ISO options include:
-	* Hierarchical storage
-	* Lossless JPEG
-	* DNL marker
-	* Nonintegral subsampling ratios
+        * Hierarchical storage
+        * Lossless JPEG
+        * DNL marker
+        * Nonintegral subsampling ratios
 We support both 8- and 12-bit data precision, but this is a compile-time
 choice rather than a run-time choice; hence it is difficult to use both
 precisions in a single application.
@@ -113,14 +113,14 @@
 
 The rough outline of a JPEG compression operation is:
 
-	Allocate and initialize a JPEG compression object
-	Specify the destination for the compressed data (eg, a file)
-	Set parameters for compression, including image size & colorspace
-	jpeg_start_compress(...);
-	while (scan lines remain to be written)
-		jpeg_write_scanlines(...);
-	jpeg_finish_compress(...);
-	Release the JPEG compression object
+        Allocate and initialize a JPEG compression object
+        Specify the destination for the compressed data (eg, a file)
+        Set parameters for compression, including image size & colorspace
+        jpeg_start_compress(...);
+        while (scan lines remain to be written)
+                jpeg_write_scanlines(...);
+        jpeg_finish_compress(...);
+        Release the JPEG compression object
 
 A JPEG compression object holds parameters and working state for the JPEG
 library.  We make creation/destruction of the object separate from starting
@@ -139,15 +139,15 @@
 
 Similarly, the rough outline of a JPEG decompression operation is:
 
-	Allocate and initialize a JPEG decompression object
-	Specify the source of the compressed data (eg, a file)
-	Call jpeg_read_header() to obtain image info
-	Set parameters for decompression
-	jpeg_start_decompress(...);
-	while (scan lines remain to be read)
-		jpeg_read_scanlines(...);
-	jpeg_finish_decompress(...);
-	Release the JPEG decompression object
+        Allocate and initialize a JPEG decompression object
+        Specify the source of the compressed data (eg, a file)
+        Call jpeg_read_header() to obtain image info
+        Set parameters for decompression
+        jpeg_start_decompress(...);
+        while (scan lines remain to be read)
+                jpeg_read_scanlines(...);
+        jpeg_finish_decompress(...);
+        Release the JPEG decompression object
 
 This is comparable to the compression outline except that reading the
 datastream header is a separate step.  This is helpful because information
@@ -272,11 +272,11 @@
 
 Typical code for this step, if you are using the default error handler, is
 
-	struct jpeg_compress_struct cinfo;
-	struct jpeg_error_mgr jerr;
-	...
-	cinfo.err = jpeg_std_error(&jerr);
-	jpeg_create_compress(&cinfo);
+        struct jpeg_compress_struct cinfo;
+        struct jpeg_error_mgr jerr;
+        ...
+        cinfo.err = jpeg_std_error(&jerr);
+        jpeg_create_compress(&cinfo);
 
 jpeg_create_compress allocates a small amount of memory, so it could fail
 if you are out of memory.  In that case it will exit via the error handler;
@@ -293,13 +293,13 @@
 If you use the standard destination module, you must open the target stdio
 stream beforehand.  Typical code for this step looks like:
 
-	FILE * outfile;
-	...
-	if ((outfile = fopen(filename, "wb")) == NULL) {
-	    fprintf(stderr, "can't open %s\n", filename);
-	    exit(1);
-	}
-	jpeg_stdio_dest(&cinfo, outfile);
+        FILE * outfile;
+        ...
+        if ((outfile = fopen(filename, "wb")) == NULL) {
+            fprintf(stderr, "can't open %s\n", filename);
+            exit(1);
+        }
+        jpeg_stdio_dest(&cinfo, outfile);
 
 where the last line invokes the standard destination module.
 
@@ -320,10 +320,10 @@
 You must supply information about the source image by setting the following
 fields in the JPEG object (cinfo structure):
 
-	image_width		Width of image, in pixels
-	image_height		Height of image, in pixels
-	input_components	Number of color channels (samples per pixel)
-	in_color_space		Color space of source image
+        image_width             Width of image, in pixels
+        image_height            Height of image, in pixels
+        input_components        Number of color channels (samples per pixel)
+        in_color_space          Color space of source image
 
 The image dimensions are, hopefully, obvious.  JPEG supports image dimensions
 of 1 to 64K pixels in either direction.  The input color space is typically
@@ -347,13 +347,13 @@
 
 Typical code for a 24-bit RGB source image is
 
-	cinfo.image_width = Width; 	/* image width and height, in pixels */
-	cinfo.image_height = Height;
-	cinfo.input_components = 3;	/* # of color components per pixel */
-	cinfo.in_color_space = JCS_RGB; /* colorspace of input image */
+        cinfo.image_width = Width;      /* image width and height, in pixels */
+        cinfo.image_height = Height;
+        cinfo.input_components = 3;     /* # of color components per pixel */
+        cinfo.in_color_space = JCS_RGB; /* colorspace of input image */
 
-	jpeg_set_defaults(&cinfo);
-	/* Make optional parameter settings here */
+        jpeg_set_defaults(&cinfo);
+        /* Make optional parameter settings here */
 
 
 4. jpeg_start_compress(...);
@@ -365,7 +365,7 @@
 
 Typical code:
 
-	jpeg_start_compress(&cinfo, TRUE);
+        jpeg_start_compress(&cinfo, TRUE);
 
 The "TRUE" parameter ensures that a complete JPEG interchange datastream
 will be written.  This is appropriate in most cases.  If you think you might
@@ -378,7 +378,7 @@
 
 
 5. while (scan lines remain to be written)
-	jpeg_write_scanlines(...);
+        jpeg_write_scanlines(...);
 
 Now write all the required image data by calling jpeg_write_scanlines()
 one or more times.  You can pass one or more scanlines in each call, up
@@ -403,15 +403,15 @@
 example.c shows the following code for the case of a full-size 2-D source
 array containing 3-byte RGB pixels:
 
-	JSAMPROW row_pointer[1];	/* pointer to a single row */
-	int row_stride;			/* physical row width in buffer */
+        JSAMPROW row_pointer[1];        /* pointer to a single row */
+        int row_stride;                 /* physical row width in buffer */
 
-	row_stride = image_width * 3;	/* JSAMPLEs per row in image_buffer */
+        row_stride = image_width * 3;   /* JSAMPLEs per row in image_buffer */
 
-	while (cinfo.next_scanline < cinfo.image_height) {
-	    row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride];
-	    jpeg_write_scanlines(&cinfo, row_pointer, 1);
-	}
+        while (cinfo.next_scanline < cinfo.image_height) {
+            row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride];
+            jpeg_write_scanlines(&cinfo, row_pointer, 1);
+        }
 
 jpeg_write_scanlines() returns the number of scanlines actually written.
 This will normally be equal to the number passed in, so you can usually
@@ -436,7 +436,7 @@
 
 Typical code:
 
-	jpeg_finish_compress(&cinfo);
+        jpeg_finish_compress(&cinfo);
 
 If using the stdio destination manager, don't forget to close the output
 stdio stream (if necessary) afterwards.
@@ -479,7 +479,7 @@
 
 Typical code:
 
-	jpeg_destroy_compress(&cinfo);
+        jpeg_destroy_compress(&cinfo);
 
 
 8. Aborting.
@@ -520,11 +520,11 @@
 
 Typical code:
 
-	struct jpeg_decompress_struct cinfo;
-	struct jpeg_error_mgr jerr;
-	...
-	cinfo.err = jpeg_std_error(&jerr);
-	jpeg_create_decompress(&cinfo);
+        struct jpeg_decompress_struct cinfo;
+        struct jpeg_error_mgr jerr;
+        ...
+        cinfo.err = jpeg_std_error(&jerr);
+        jpeg_create_decompress(&cinfo);
 
 (Both here and in the IJG code, we usually use variable name "cinfo" for
 both compression and decompression objects.)
@@ -540,13 +540,13 @@
 If you use the standard source module, you must open the source stdio stream
 beforehand.  Typical code for this step looks like:
 
-	FILE * infile;
-	...
-	if ((infile = fopen(filename, "rb")) == NULL) {
-	    fprintf(stderr, "can't open %s\n", filename);
-	    exit(1);
-	}
-	jpeg_stdio_src(&cinfo, infile);
+        FILE * infile;
+        ...
+        if ((infile = fopen(filename, "rb")) == NULL) {
+            fprintf(stderr, "can't open %s\n", filename);
+            exit(1);
+        }
+        jpeg_stdio_src(&cinfo, infile);
 
 where the last line invokes the standard source module.
 
@@ -569,7 +569,7 @@
 
 Typical code for this step is just
 
-	jpeg_read_header(&cinfo, TRUE);
+        jpeg_read_header(&cinfo, TRUE);
 
 This will read the source datastream header markers, up to the beginning
 of the compressed data proper.  On return, the image dimensions and other
@@ -617,7 +617,7 @@
 
 Typical code is just
 
-	jpeg_start_decompress(&cinfo);
+        jpeg_start_decompress(&cinfo);
 
 If you have requested a multi-pass operating mode, such as 2-pass color
 quantization, jpeg_start_decompress() will do everything needed before data
@@ -630,12 +630,12 @@
 scaling, are available in the JPEG object; so is the selected colormap, if
 colormapped output has been requested.  Useful fields include
 
-	output_width		image width and height, as scaled
-	output_height
-	out_color_components	# of color components in out_color_space
-	output_components	# of color components returned per pixel
-	colormap		the selected colormap, if any
-	actual_number_of_colors		number of entries in colormap
+        output_width            image width and height, as scaled
+        output_height
+        out_color_components    # of color components in out_color_space
+        output_components       # of color components returned per pixel
+        colormap                the selected colormap, if any
+        actual_number_of_colors         number of entries in colormap
 
 output_components is 1 (a colormap index) when quantizing colors; otherwise it
 equals out_color_components.  It is the number of JSAMPLE values that will be
@@ -654,7 +654,7 @@
 
 
 6. while (scan lines remain to be read)
-	jpeg_read_scanlines(...);
+        jpeg_read_scanlines(...);
 
 Now you can read the decompressed image data by calling jpeg_read_scanlines()
 one or more times.  At each call, you pass in the maximum number of scanlines
@@ -696,7 +696,7 @@
 
 Typical code:
 
-	jpeg_finish_decompress(&cinfo);
+        jpeg_finish_decompress(&cinfo);
 
 If using the stdio source manager, don't forget to close the source stdio
 stream if necessary.
@@ -719,7 +719,7 @@
 
 Typical code:
 
-	jpeg_destroy_decompress(&cinfo);
+        jpeg_destroy_decompress(&cinfo);
 
 
 9. Aborting.
@@ -800,220 +800,231 @@
 The helper routines are:
 
 jpeg_set_defaults (j_compress_ptr cinfo)
-	This routine sets all JPEG parameters to reasonable defaults, using
-	only the input image's color space (field in_color_space, which must
-	already be set in cinfo).  Many applications will only need to use
-	this routine and perhaps jpeg_set_quality().
+        This routine sets all JPEG parameters to reasonable defaults, using
+        only the input image's color space (field in_color_space, which must
+        already be set in cinfo).  Many applications will only need to use
+        this routine and perhaps jpeg_set_quality().
 
 jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
-	Sets the JPEG file's colorspace (field jpeg_color_space) as specified,
-	and sets other color-space-dependent parameters appropriately.  See
-	"Special color spaces", below, before using this.  A large number of
-	parameters, including all per-component parameters, are set by this
-	routine; if you want to twiddle individual parameters you should call
-	jpeg_set_colorspace() before rather than after.
+        Sets the JPEG file's colorspace (field jpeg_color_space) as specified,
+        and sets other color-space-dependent parameters appropriately.  See
+        "Special color spaces", below, before using this.  A large number of
+        parameters, including all per-component parameters, are set by this
+        routine; if you want to twiddle individual parameters you should call
+        jpeg_set_colorspace() before rather than after.
 
 jpeg_default_colorspace (j_compress_ptr cinfo)
-	Selects an appropriate JPEG colorspace based on cinfo->in_color_space,
-	and calls jpeg_set_colorspace().  This is actually a subroutine of
-	jpeg_set_defaults().  It's broken out in case you want to change
-	just the colorspace-dependent JPEG parameters.
+        Selects an appropriate JPEG colorspace based on cinfo->in_color_space,
+        and calls jpeg_set_colorspace().  This is actually a subroutine of
+        jpeg_set_defaults().  It's broken out in case you want to change
+        just the colorspace-dependent JPEG parameters.
 
 jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
-	Constructs JPEG quantization tables appropriate for the indicated
-	quality setting.  The quality value is expressed on the 0..100 scale
-	recommended by IJG (cjpeg's "-quality" switch uses this routine).
-	Note that the exact mapping from quality values to tables may change
-	in future IJG releases as more is learned about DCT quantization.
-	If the force_baseline parameter is TRUE, then the quantization table
-	entries are constrained to the range 1..255 for full JPEG baseline
-	compatibility.  In the current implementation, this only makes a
-	difference for quality settings below 25, and it effectively prevents
-	very small/low quality files from being generated.  The IJG decoder
-	is capable of reading the non-baseline files generated at low quality
-	settings when force_baseline is FALSE, but other decoders may not be.
+        Constructs JPEG quantization tables appropriate for the indicated
+        quality setting.  The quality value is expressed on the 0..100 scale
+        recommended by IJG (cjpeg's "-quality" switch uses this routine).
+        Note that the exact mapping from quality values to tables may change
+        in future IJG releases as more is learned about DCT quantization.
+        If the force_baseline parameter is TRUE, then the quantization table
+        entries are constrained to the range 1..255 for full JPEG baseline
+        compatibility.  In the current implementation, this only makes a
+        difference for quality settings below 25, and it effectively prevents
+        very small/low quality files from being generated.  The IJG decoder
+        is capable of reading the non-baseline files generated at low quality
+        settings when force_baseline is FALSE, but other decoders may not be.
 
 jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
-			 boolean force_baseline)
-	Same as jpeg_set_quality() except that the generated tables are the
-	sample tables given in the JPEC spec section K.1, multiplied by the
-	specified scale factor (which is expressed as a percentage; thus
-	scale_factor = 100 reproduces the spec's tables).  Note that larger
-	scale factors give lower quality.  This entry point is useful for
-	conforming to the Adobe PostScript DCT conventions, but we do not
-	recommend linear scaling as a user-visible quality scale otherwise.
-	force_baseline again constrains the computed table entries to 1..255.
+                         boolean force_baseline)
+        Same as jpeg_set_quality() except that the generated tables are the
+        sample tables given in the JPEC spec section K.1, multiplied by the
+        specified scale factor (which is expressed as a percentage; thus
+        scale_factor = 100 reproduces the spec's tables).  Note that larger
+        scale factors give lower quality.  This entry point is useful for
+        conforming to the Adobe PostScript DCT conventions, but we do not
+        recommend linear scaling as a user-visible quality scale otherwise.
+        force_baseline again constrains the computed table entries to 1..255.
 
 int jpeg_quality_scaling (int quality)
-	Converts a value on the IJG-recommended quality scale to a linear
-	scaling percentage.  Note that this routine may change or go away
-	in future releases --- IJG may choose to adopt a scaling method that
-	can't be expressed as a simple scalar multiplier, in which case the
-	premise of this routine collapses.  Caveat user.
+        Converts a value on the IJG-recommended quality scale to a linear
+        scaling percentage.  Note that this routine may change or go away
+        in future releases --- IJG may choose to adopt a scaling method that
+        can't be expressed as a simple scalar multiplier, in which case the
+        premise of this routine collapses.  Caveat user.
 
 jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
-	[libjpeg v7+ API/ABI emulation only]
-	Set default quantization tables with linear q_scale_factor[] values
-	(see below).
+        [libjpeg v7+ API/ABI emulation only]
+        Set default quantization tables with linear q_scale_factor[] values
+        (see below).
 
 jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
-		      const unsigned int *basic_table,
-		      int scale_factor, boolean force_baseline)
-	Allows an arbitrary quantization table to be created.  which_tbl
-	indicates which table slot to fill.  basic_table points to an array
-	of 64 unsigned ints given in normal array order.  These values are
-	multiplied by scale_factor/100 and then clamped to the range 1..65535
-	(or to 1..255 if force_baseline is TRUE).
-	CAUTION: prior to library version 6a, jpeg_add_quant_table expected
-	the basic table to be given in JPEG zigzag order.  If you need to
-	write code that works with either older or newer versions of this
-	routine, you must check the library version number.  Something like
-	"#if JPEG_LIB_VERSION >= 61" is the right test.
+                      const unsigned int *basic_table,
+                      int scale_factor, boolean force_baseline)
+        Allows an arbitrary quantization table to be created.  which_tbl
+        indicates which table slot to fill.  basic_table points to an array
+        of 64 unsigned ints given in normal array order.  These values are
+        multiplied by scale_factor/100 and then clamped to the range 1..65535
+        (or to 1..255 if force_baseline is TRUE).
+        CAUTION: prior to library version 6a, jpeg_add_quant_table expected
+        the basic table to be given in JPEG zigzag order.  If you need to
+        write code that works with either older or newer versions of this
+        routine, you must check the library version number.  Something like
+        "#if JPEG_LIB_VERSION >= 61" is the right test.
 
 jpeg_simple_progression (j_compress_ptr cinfo)
-	Generates a default scan script for writing a progressive-JPEG file.
-	This is the recommended method of creating a progressive file,
-	unless you want to make a custom scan sequence.  You must ensure that
-	the JPEG color space is set correctly before calling this routine.
+        Generates a default scan script for writing a progressive-JPEG file.
+        This is the recommended method of creating a progressive file,
+        unless you want to make a custom scan sequence.  You must ensure that
+        the JPEG color space is set correctly before calling this routine.
 
 
 Compression parameters (cinfo fields) include:
 
 J_DCT_METHOD dct_method
-	Selects the algorithm used for the DCT step.  Choices are:
-		JDCT_ISLOW: slow but accurate integer algorithm
-		JDCT_IFAST: faster, less accurate integer method
-		JDCT_FLOAT: floating-point method
-		JDCT_DEFAULT: default method (normally JDCT_ISLOW)
-		JDCT_FASTEST: fastest method (normally JDCT_IFAST)
-	The FLOAT method is very slightly more accurate than the ISLOW method,
-	but may give different results on different machines due to varying
-	roundoff behavior.  The integer methods should give the same results
-	on all machines.  On machines with sufficiently fast FP hardware, the
-	floating-point method may also be the fastest.  The IFAST method is
-	considerably less accurate than the other two; its use is not
-	recommended if high quality is a concern.  JDCT_DEFAULT and
-	JDCT_FASTEST are macros configurable by each installation.
+        Selects the algorithm used for the DCT step.  Choices are:
+                JDCT_ISLOW: slow but accurate integer algorithm
+                JDCT_IFAST: faster, less accurate integer method
+                JDCT_FLOAT: floating-point method
+                JDCT_DEFAULT: default method (normally JDCT_ISLOW)
+                JDCT_FASTEST: fastest method (normally JDCT_IFAST)
+        In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than
+        JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary
+        with other SIMD implementations, or when using libjpeg-turbo without
+        SIMD extensions.)  For quality levels of 90 and below, there should be
+        little or no perceptible difference between the two algorithms.  For
+        quality levels above 90, however, the difference between JDCT_IFAST and
+        JDCT_ISLOW becomes more pronounced.  With quality=97, for instance,
+        JDCT_IFAST incurs generally about a 1-3 dB loss (in PSNR) relative to
+        JDCT_ISLOW, but this can be larger for some images.  Do not use
+        JDCT_IFAST with quality levels above 97.  The algorithm often
+        degenerates at quality=98 and above and can actually produce a more
+        lossy image than if lower quality levels had been used.  Also, in
+        libjpeg-turbo, JDCT_IFAST is not fully accelerated for quality levels
+        above 97, so it will be slower than JDCT_ISLOW.  JDCT_FLOAT is mainly a
+        legacy feature.  It does not produce significantly more accurate
+        results than the ISLOW method, and it is much slower.  The FLOAT method
+        may also give different results on different machines due to varying
+        roundoff behavior, whereas the integer methods should give the same
+        results on all machines.
 
 J_COLOR_SPACE jpeg_color_space
 int num_components
-	The JPEG color space and corresponding number of components; see
-	"Special color spaces", below, for more info.  We recommend using
-	jpeg_set_color_space() if you want to change these.
+        The JPEG color space and corresponding number of components; see
+        "Special color spaces", below, for more info.  We recommend using
+        jpeg_set_color_space() if you want to change these.
 
 boolean optimize_coding
-	TRUE causes the compressor to compute optimal Huffman coding tables
-	for the image.  This requires an extra pass over the data and
-	therefore costs a good deal of space and time.  The default is
-	FALSE, which tells the compressor to use the supplied or default
-	Huffman tables.  In most cases optimal tables save only a few percent
-	of file size compared to the default tables.  Note that when this is
-	TRUE, you need not supply Huffman tables at all, and any you do
-	supply will be overwritten.
+        TRUE causes the compressor to compute optimal Huffman coding tables
+        for the image.  This requires an extra pass over the data and
+        therefore costs a good deal of space and time.  The default is
+        FALSE, which tells the compressor to use the supplied or default
+        Huffman tables.  In most cases optimal tables save only a few percent
+        of file size compared to the default tables.  Note that when this is
+        TRUE, you need not supply Huffman tables at all, and any you do
+        supply will be overwritten.
 
 unsigned int restart_interval
 int restart_in_rows
-	To emit restart markers in the JPEG file, set one of these nonzero.
-	Set restart_interval to specify the exact interval in MCU blocks.
-	Set restart_in_rows to specify the interval in MCU rows.  (If
-	restart_in_rows is not 0, then restart_interval is set after the
-	image width in MCUs is computed.)  Defaults are zero (no restarts).
-	One restart marker per MCU row is often a good choice.
-	NOTE: the overhead of restart markers is higher in grayscale JPEG
-	files than in color files, and MUCH higher in progressive JPEGs.
-	If you use restarts, you may want to use larger intervals in those
-	cases.
+        To emit restart markers in the JPEG file, set one of these nonzero.
+        Set restart_interval to specify the exact interval in MCU blocks.
+        Set restart_in_rows to specify the interval in MCU rows.  (If
+        restart_in_rows is not 0, then restart_interval is set after the
+        image width in MCUs is computed.)  Defaults are zero (no restarts).
+        One restart marker per MCU row is often a good choice.
+        NOTE: the overhead of restart markers is higher in grayscale JPEG
+        files than in color files, and MUCH higher in progressive JPEGs.
+        If you use restarts, you may want to use larger intervals in those
+        cases.
 
 const jpeg_scan_info * scan_info
 int num_scans
-	By default, scan_info is NULL; this causes the compressor to write a
-	single-scan sequential JPEG file.  If not NULL, scan_info points to
-	an array of scan definition records of length num_scans.  The
-	compressor will then write a JPEG file having one scan for each scan
-	definition record.  This is used to generate noninterleaved or
-	progressive JPEG files.  The library checks that the scan array
-	defines a valid JPEG scan sequence.  (jpeg_simple_progression creates
-	a suitable scan definition array for progressive JPEG.)  This is
-	discussed further under "Progressive JPEG support".
+        By default, scan_info is NULL; this causes the compressor to write a
+        single-scan sequential JPEG file.  If not NULL, scan_info points to
+        an array of scan definition records of length num_scans.  The
+        compressor will then write a JPEG file having one scan for each scan
+        definition record.  This is used to generate noninterleaved or
+        progressive JPEG files.  The library checks that the scan array
+        defines a valid JPEG scan sequence.  (jpeg_simple_progression creates
+        a suitable scan definition array for progressive JPEG.)  This is
+        discussed further under "Progressive JPEG support".
 
 int smoothing_factor
-	If non-zero, the input image is smoothed; the value should be 1 for
-	minimal smoothing to 100 for maximum smoothing.  Consult jcsample.c
-	for details of the smoothing algorithm.  The default is zero.
+        If non-zero, the input image is smoothed; the value should be 1 for
+        minimal smoothing to 100 for maximum smoothing.  Consult jcsample.c
+        for details of the smoothing algorithm.  The default is zero.
 
 boolean write_JFIF_header
-	If TRUE, a JFIF APP0 marker is emitted.  jpeg_set_defaults() and
-	jpeg_set_colorspace() set this TRUE if a JFIF-legal JPEG color space
-	(ie, YCbCr or grayscale) is selected, otherwise FALSE.
+        If TRUE, a JFIF APP0 marker is emitted.  jpeg_set_defaults() and
+        jpeg_set_colorspace() set this TRUE if a JFIF-legal JPEG color space
+        (ie, YCbCr or grayscale) is selected, otherwise FALSE.
 
 UINT8 JFIF_major_version
 UINT8 JFIF_minor_version
-	The version number to be written into the JFIF marker.
-	jpeg_set_defaults() initializes the version to 1.01 (major=minor=1).
-	You should set it to 1.02 (major=1, minor=2) if you plan to write
-	any JFIF 1.02 extension markers.
+        The version number to be written into the JFIF marker.
+        jpeg_set_defaults() initializes the version to 1.01 (major=minor=1).
+        You should set it to 1.02 (major=1, minor=2) if you plan to write
+        any JFIF 1.02 extension markers.
 
 UINT8 density_unit
 UINT16 X_density
 UINT16 Y_density
-	The resolution information to be written into the JFIF marker;
-	not used otherwise.  density_unit may be 0 for unknown,
-	1 for dots/inch, or 2 for dots/cm.  The default values are 0,1,1
-	indicating square pixels of unknown size.
+        The resolution information to be written into the JFIF marker;
+        not used otherwise.  density_unit may be 0 for unknown,
+        1 for dots/inch, or 2 for dots/cm.  The default values are 0,1,1
+        indicating square pixels of unknown size.
 
 boolean write_Adobe_marker
-	If TRUE, an Adobe APP14 marker is emitted.  jpeg_set_defaults() and
-	jpeg_set_colorspace() set this TRUE if JPEG color space RGB, CMYK,
-	or YCCK is selected, otherwise FALSE.  It is generally a bad idea
-	to set both write_JFIF_header and write_Adobe_marker.  In fact,
-	you probably shouldn't change the default settings at all --- the
-	default behavior ensures that the JPEG file's color space can be
-	recognized by the decoder.
+        If TRUE, an Adobe APP14 marker is emitted.  jpeg_set_defaults() and
+        jpeg_set_colorspace() set this TRUE if JPEG color space RGB, CMYK,
+        or YCCK is selected, otherwise FALSE.  It is generally a bad idea
+        to set both write_JFIF_header and write_Adobe_marker.  In fact,
+        you probably shouldn't change the default settings at all --- the
+        default behavior ensures that the JPEG file's color space can be
+        recognized by the decoder.
 
 JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]
-	Pointers to coefficient quantization tables, one per table slot,
-	or NULL if no table is defined for a slot.  Usually these should
-	be set via one of the above helper routines; jpeg_add_quant_table()
-	is general enough to define any quantization table.  The other
-	routines will set up table slot 0 for luminance quality and table
-	slot 1 for chrominance.
+        Pointers to coefficient quantization tables, one per table slot,
+        or NULL if no table is defined for a slot.  Usually these should
+        be set via one of the above helper routines; jpeg_add_quant_table()
+        is general enough to define any quantization table.  The other
+        routines will set up table slot 0 for luminance quality and table
+        slot 1 for chrominance.
 
 int q_scale_factor[NUM_QUANT_TBLS]
-	[libjpeg v7+ API/ABI emulation only]
-	Linear quantization scaling factors (0-100, default 100)
-	for use with jpeg_default_qtables().
-	See rdswitch.c and cjpeg.c for an example of usage.
-	Note that the q_scale_factor[] values use "linear" scales, so JPEG
-	quality levels chosen by the user must be converted to these scales
-	using jpeg_quality_scaling().  Here is an example that corresponds to
-	cjpeg -quality 90,70:
+        [libjpeg v7+ API/ABI emulation only]
+        Linear quantization scaling factors (0-100, default 100)
+        for use with jpeg_default_qtables().
+        See rdswitch.c and cjpeg.c for an example of usage.
+        Note that the q_scale_factor[] values use "linear" scales, so JPEG
+        quality levels chosen by the user must be converted to these scales
+        using jpeg_quality_scaling().  Here is an example that corresponds to
+        cjpeg -quality 90,70:
 
-		jpeg_set_defaults(cinfo);
+                jpeg_set_defaults(cinfo);
 
-		/* Set luminance quality 90. */
-		cinfo->q_scale_factor[0] = jpeg_quality_scaling(90);
-		/* Set chrominance quality 70. */
-		cinfo->q_scale_factor[1] = jpeg_quality_scaling(70);
+                /* Set luminance quality 90. */
+                cinfo->q_scale_factor[0] = jpeg_quality_scaling(90);
+                /* Set chrominance quality 70. */
+                cinfo->q_scale_factor[1] = jpeg_quality_scaling(70);
 
-		jpeg_default_qtables(cinfo, force_baseline);
+                jpeg_default_qtables(cinfo, force_baseline);
 
-	CAUTION: Setting separate quality levels for chrominance and luminance
-	is mainly only useful if chrominance subsampling is disabled.  2x2
-	chrominance subsampling (AKA "4:2:0") is the default, but you can
-	explicitly disable subsampling as follows:
+        CAUTION: Setting separate quality levels for chrominance and luminance
+        is mainly only useful if chrominance subsampling is disabled.  2x2
+        chrominance subsampling (AKA "4:2:0") is the default, but you can
+        explicitly disable subsampling as follows:
 
-		cinfo->comp_info[0].v_samp_factor = 1;
-		cinfo->comp_info[0].h_samp_factor = 1;
+                cinfo->comp_info[0].v_samp_factor = 1;
+                cinfo->comp_info[0].h_samp_factor = 1;
 
 JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]
 JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]
-	Pointers to Huffman coding tables, one per table slot, or NULL if
-	no table is defined for a slot.  Slots 0 and 1 are filled with the
-	JPEG sample tables by jpeg_set_defaults().  If you need to allocate
-	more table structures, jpeg_alloc_huff_table() may be used.
-	Note that optimal Huffman tables can be computed for an image
-	by setting optimize_coding, as discussed above; there's seldom
-	any need to mess with providing your own Huffman tables.
+        Pointers to Huffman coding tables, one per table slot, or NULL if
+        no table is defined for a slot.  Slots 0 and 1 are filled with the
+        JPEG sample tables by jpeg_set_defaults().  If you need to allocate
+        more table structures, jpeg_alloc_huff_table() may be used.
+        Note that optimal Huffman tables can be computed for an image
+        by setting optimize_coding, as discussed above; there's seldom
+        any need to mess with providing your own Huffman tables.
 
 
 [libjpeg v7+ API/ABI emulation only]
@@ -1021,9 +1032,10 @@
 given by the following fields.  These are computed from the input image
 dimensions and the compression parameters by jpeg_start_compress().  You can
 also call jpeg_calc_jpeg_dimensions() to obtain the values that will result
-from the current parameter settings.
+from the current parameter settings.  This can be useful if you are trying
+to pick a scaling ratio that will get close to a desired target size.
 
-JDIMENSION jpeg_width		Actual dimensions of output image.
+JDIMENSION jpeg_width           Actual dimensions of output image.
 JDIMENSION jpeg_height
 
 
@@ -1034,32 +1046,32 @@
 to use that routine, it's up to you to allocate the array.
 
 int component_id
-	The one-byte identifier code to be recorded in the JPEG file for
-	this component.  For the standard color spaces, we recommend you
-	leave the default values alone.
+        The one-byte identifier code to be recorded in the JPEG file for
+        this component.  For the standard color spaces, we recommend you
+        leave the default values alone.
 
 int h_samp_factor
 int v_samp_factor
-	Horizontal and vertical sampling factors for the component; must
-	be 1..4 according to the JPEG standard.  Note that larger sampling
-	factors indicate a higher-resolution component; many people find
-	this behavior quite unintuitive.  The default values are 2,2 for
-	luminance components and 1,1 for chrominance components, except
-	for grayscale where 1,1 is used.
+        Horizontal and vertical sampling factors for the component; must
+        be 1..4 according to the JPEG standard.  Note that larger sampling
+        factors indicate a higher-resolution component; many people find
+        this behavior quite unintuitive.  The default values are 2,2 for
+        luminance components and 1,1 for chrominance components, except
+        for grayscale where 1,1 is used.
 
 int quant_tbl_no
-	Quantization table number for component.  The default value is
-	0 for luminance components and 1 for chrominance components.
+        Quantization table number for component.  The default value is
+        0 for luminance components and 1 for chrominance components.
 
 int dc_tbl_no
 int ac_tbl_no
-	DC and AC entropy coding table numbers.  The default values are
-	0 for luminance components and 1 for chrominance components.
+        DC and AC entropy coding table numbers.  The default values are
+        0 for luminance components and 1 for chrominance components.
 
 int component_index
-	Must equal the component's index in comp_info[].  (Beginning in
-	release v6, the compressor library will fill this in automatically;
-	you don't have to.)
+        Must equal the component's index in comp_info[].  (Beginning in
+        release v6, the compressor library will fill this in automatically;
+        you don't have to.)
 
 
 Decompression parameter selection
@@ -1079,18 +1091,18 @@
 The following fields in the JPEG object are set by jpeg_read_header() and
 may be useful to the application in choosing decompression parameters:
 
-JDIMENSION image_width			Width and height of image
+JDIMENSION image_width                  Width and height of image
 JDIMENSION image_height
-int num_components			Number of color components
-J_COLOR_SPACE jpeg_color_space		Colorspace of image
-boolean saw_JFIF_marker			TRUE if a JFIF APP0 marker was seen
-  UINT8 JFIF_major_version		Version information from JFIF marker
+int num_components                      Number of color components
+J_COLOR_SPACE jpeg_color_space          Colorspace of image
+boolean saw_JFIF_marker                 TRUE if a JFIF APP0 marker was seen
+  UINT8 JFIF_major_version              Version information from JFIF marker
   UINT8 JFIF_minor_version
-  UINT8 density_unit			Resolution data from JFIF marker
+  UINT8 density_unit                    Resolution data from JFIF marker
   UINT16 X_density
   UINT16 Y_density
-boolean saw_Adobe_marker		TRUE if an Adobe APP14 marker was seen
-  UINT8 Adobe_transform			Color transform code from Adobe marker
+boolean saw_Adobe_marker                TRUE if an Adobe APP14 marker was seen
+  UINT8 Adobe_transform                 Color transform code from Adobe marker
 
 The JPEG color space, unfortunately, is something of a guess since the JPEG
 standard proper does not provide a way to record it.  In practice most files
@@ -1102,50 +1114,51 @@
 returned image are:
 
 J_COLOR_SPACE out_color_space
-	Output color space.  jpeg_read_header() sets an appropriate default
-	based on jpeg_color_space; typically it will be RGB or grayscale.
-	The application can change this field to request output in a different
-	colorspace.  For example, set it to JCS_GRAYSCALE to get grayscale
-	output from a color file.  (This is useful for previewing: grayscale
-	output is faster than full color since the color components need not
-	be processed.)  Note that not all possible color space transforms are
-	currently implemented; you may need to extend jdcolor.c if you want an
-	unusual conversion.
+        Output color space.  jpeg_read_header() sets an appropriate default
+        based on jpeg_color_space; typically it will be RGB or grayscale.
+        The application can change this field to request output in a different
+        colorspace.  For example, set it to JCS_GRAYSCALE to get grayscale
+        output from a color file.  (This is useful for previewing: grayscale
+        output is faster than full color since the color components need not
+        be processed.)  Note that not all possible color space transforms are
+        currently implemented; you may need to extend jdcolor.c if you want an
+        unusual conversion.
 
 unsigned int scale_num, scale_denom
-	Scale the image by the fraction scale_num/scale_denom.  Default is
-	1/1, or no scaling.  Currently, the only supported scaling ratios
-	are 1/1, 1/2, 1/4, and 1/8.  (The library design allows for arbitrary
-	scaling ratios but this is not likely to be implemented any time soon.)
-	Smaller scaling ratios permit significantly faster decoding since
-	fewer pixels need be processed and a simpler IDCT method can be used.
+        Scale the image by the fraction scale_num/scale_denom.  Default is
+        1/1, or no scaling.  Currently, the only supported scaling ratios
+        are M/8 with all M from 1 to 16, or any reduced fraction thereof (such
+        as 1/2, 3/4, etc.)  (The library design allows for arbitrary
+        scaling ratios but this is not likely to be implemented any time soon.)
+        Smaller scaling ratios permit significantly faster decoding since
+        fewer pixels need be processed and a simpler IDCT method can be used.
 
 boolean quantize_colors
-	If set TRUE, colormapped output will be delivered.  Default is FALSE,
-	meaning that full-color output will be delivered.
+        If set TRUE, colormapped output will be delivered.  Default is FALSE,
+        meaning that full-color output will be delivered.
 
 The next three parameters are relevant only if quantize_colors is TRUE.
 
 int desired_number_of_colors
-	Maximum number of colors to use in generating a library-supplied color
-	map (the actual number of colors is returned in a different field).
-	Default 256.  Ignored when the application supplies its own color map.
+        Maximum number of colors to use in generating a library-supplied color
+        map (the actual number of colors is returned in a different field).
+        Default 256.  Ignored when the application supplies its own color map.
 
 boolean two_pass_quantize
-	If TRUE, an extra pass over the image is made to select a custom color
-	map for the image.  This usually looks a lot better than the one-size-
-	fits-all colormap that is used otherwise.  Default is TRUE.  Ignored
-	when the application supplies its own color map.
+        If TRUE, an extra pass over the image is made to select a custom color
+        map for the image.  This usually looks a lot better than the one-size-
+        fits-all colormap that is used otherwise.  Default is TRUE.  Ignored
+        when the application supplies its own color map.
 
 J_DITHER_MODE dither_mode
-	Selects color dithering method.  Supported values are:
-		JDITHER_NONE	no dithering: fast, very low quality
-		JDITHER_ORDERED	ordered dither: moderate speed and quality
-		JDITHER_FS	Floyd-Steinberg dither: slow, high quality
-	Default is JDITHER_FS.  (At present, ordered dither is implemented
-	only in the single-pass, standard-colormap case.  If you ask for
-	ordered dither when two_pass_quantize is TRUE or when you supply
-	an external color map, you'll get F-S dithering.)
+        Selects color dithering method.  Supported values are:
+                JDITHER_NONE    no dithering: fast, very low quality
+                JDITHER_ORDERED ordered dither: moderate speed and quality
+                JDITHER_FS      Floyd-Steinberg dither: slow, high quality
+        Default is JDITHER_FS.  (At present, ordered dither is implemented
+        only in the single-pass, standard-colormap case.  If you ask for
+        ordered dither when two_pass_quantize is TRUE or when you supply
+        an external color map, you'll get F-S dithering.)
 
 When quantize_colors is TRUE, the target color map is described by the next
 two fields.  colormap is set to NULL by jpeg_read_header().  The application
@@ -1156,39 +1169,63 @@
 only accepted for 3-component output color spaces.]
 
 JSAMPARRAY colormap
-	The color map, represented as a 2-D pixel array of out_color_components
-	rows and actual_number_of_colors columns.  Ignored if not quantizing.
-	CAUTION: if the JPEG library creates its own colormap, the storage
-	pointed to by this field is released by jpeg_finish_decompress().
-	Copy the colormap somewhere else first, if you want to save it.
+        The color map, represented as a 2-D pixel array of out_color_components
+        rows and actual_number_of_colors columns.  Ignored if not quantizing.
+        CAUTION: if the JPEG library creates its own colormap, the storage
+        pointed to by this field is released by jpeg_finish_decompress().
+        Copy the colormap somewhere else first, if you want to save it.
 
 int actual_number_of_colors
-	The number of colors in the color map.
+        The number of colors in the color map.
 
 Additional decompression parameters that the application may set include:
 
 J_DCT_METHOD dct_method
-	Selects the algorithm used for the DCT step.  Choices are the same
-	as described above for compression.
+        Selects the algorithm used for the DCT step.  Choices are:
+                JDCT_ISLOW: slow but accurate integer algorithm
+                JDCT_IFAST: faster, less accurate integer method
+                JDCT_FLOAT: floating-point method
+                JDCT_DEFAULT: default method (normally JDCT_ISLOW)
+                JDCT_FASTEST: fastest method (normally JDCT_IFAST)
+        In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than
+        JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary
+        with other SIMD implementations, or when using libjpeg-turbo without
+        SIMD extensions.)  If the JPEG image was compressed using a quality
+        level of 85 or below, then there should be little or no perceptible
+        difference between the two algorithms.  When decompressing images that
+        were compressed using quality levels above 85, however, the difference
+        between JDCT_IFAST and JDCT_ISLOW becomes more pronounced.  With images
+        compressed using quality=97, for instance, JDCT_IFAST incurs generally
+        about a 4-6 dB loss (in PSNR) relative to JDCT_ISLOW, but this can be
+        larger for some images.  If you can avoid it, do not use JDCT_IFAST
+        when decompressing images that were compressed using quality levels
+        above 97.  The algorithm often degenerates for such images and can
+        actually produce a more lossy output image than if the JPEG image had
+        been compressed using lower quality levels.  JDCT_FLOAT is mainly a
+        legacy feature.  It does not produce significantly more accurate
+        results than the ISLOW method, and it is much slower.  The FLOAT method
+        may also give different results on different machines due to varying
+        roundoff behavior, whereas the integer methods should give the same
+        results on all machines.
 
 boolean do_fancy_upsampling
-	If TRUE, do careful upsampling of chroma components.  If FALSE,
-	a faster but sloppier method is used.  Default is TRUE.  The visual
-	impact of the sloppier method is often very small.
+        If TRUE, do careful upsampling of chroma components.  If FALSE,
+        a faster but sloppier method is used.  Default is TRUE.  The visual
+        impact of the sloppier method is often very small.
 
 boolean do_block_smoothing
-	If TRUE, interblock smoothing is applied in early stages of decoding
-	progressive JPEG files; if FALSE, not.  Default is TRUE.  Early
-	progression stages look "fuzzy" with smoothing, "blocky" without.
-	In any case, block smoothing ceases to be applied after the first few
-	AC coefficients are known to full accuracy, so it is relevant only
-	when using buffered-image mode for progressive images.
+        If TRUE, interblock smoothing is applied in early stages of decoding
+        progressive JPEG files; if FALSE, not.  Default is TRUE.  Early
+        progression stages look "fuzzy" with smoothing, "blocky" without.
+        In any case, block smoothing ceases to be applied after the first few
+        AC coefficients are known to full accuracy, so it is relevant only
+        when using buffered-image mode for progressive images.
 
 boolean enable_1pass_quant
 boolean enable_external_quant
 boolean enable_2pass_quant
-	These are significant only in buffered-image mode, which is
-	described in its own section below.
+        These are significant only in buffered-image mode, which is
+        described in its own section below.
 
 
 The output image dimensions are given by the following fields.  These are
@@ -1200,11 +1237,11 @@
 JPEG library's memory manager to allocate output buffer space, because you
 are supposed to request such buffers *before* jpeg_start_decompress().
 
-JDIMENSION output_width		Actual dimensions of output image.
+JDIMENSION output_width         Actual dimensions of output image.
 JDIMENSION output_height
-int out_color_components	Number of color components in out_color_space.
-int output_components		Number of color components returned.
-int rec_outbuf_height		Recommended height of scanline buffer.
+int out_color_components        Number of color components in out_color_space.
+int output_components           Number of color components returned.
+int rec_outbuf_height           Recommended height of scanline buffer.
 
 When quantizing colors, output_components is 1, indicating a single color map
 index per pixel.  Otherwise it equals out_color_components.  The output arrays
@@ -1244,10 +1281,10 @@
 space depending on in_color_space, but you can override this by calling
 jpeg_set_colorspace().  Of course you must select a supported transformation.
 jccolor.c currently supports the following transformations:
-	RGB => YCbCr
-	RGB => GRAYSCALE
-	YCbCr => GRAYSCALE
-	CMYK => YCCK
+        RGB => YCbCr
+        RGB => GRAYSCALE
+        YCbCr => GRAYSCALE
+        CMYK => YCCK
 plus the null transforms: GRAYSCALE => GRAYSCALE, RGB => RGB,
 YCbCr => YCbCr, CMYK => CMYK, YCCK => YCCK, and UNKNOWN => UNKNOWN.
 
@@ -1277,10 +1314,11 @@
 selects a default output color space based on (its guess of) jpeg_color_space;
 set out_color_space to override this.  Again, you must select a supported
 transformation.  jdcolor.c currently supports
-	YCbCr => GRAYSCALE
-	YCbCr => RGB
-	GRAYSCALE => RGB
-	YCCK => CMYK
+        YCbCr => RGB
+        YCbCr => GRAYSCALE
+        RGB => GRAYSCALE
+        GRAYSCALE => RGB
+        YCCK => CMYK
 as well as the null transforms.  (Since GRAYSCALE=>RGB is provided, an
 application can force grayscale JPEGs to look like color JPEGs if it only
 wants to handle one case.)
@@ -1350,31 +1388,31 @@
 The individual methods that you might wish to override are:
 
 error_exit (j_common_ptr cinfo)
-	Receives control for a fatal error.  Information sufficient to
-	generate the error message has been stored in cinfo->err; call
-	output_message to display it.  Control must NOT return to the caller;
-	generally this routine will exit() or longjmp() somewhere.
-	Typically you would override this routine to get rid of the exit()
-	default behavior.  Note that if you continue processing, you should
-	clean up the JPEG object with jpeg_abort() or jpeg_destroy().
+        Receives control for a fatal error.  Information sufficient to
+        generate the error message has been stored in cinfo->err; call
+        output_message to display it.  Control must NOT return to the caller;
+        generally this routine will exit() or longjmp() somewhere.
+        Typically you would override this routine to get rid of the exit()
+        default behavior.  Note that if you continue processing, you should
+        clean up the JPEG object with jpeg_abort() or jpeg_destroy().
 
 output_message (j_common_ptr cinfo)
-	Actual output of any JPEG message.  Override this to send messages
-	somewhere other than stderr.  Note that this method does not know
-	how to generate a message, only where to send it.
+        Actual output of any JPEG message.  Override this to send messages
+        somewhere other than stderr.  Note that this method does not know
+        how to generate a message, only where to send it.
 
 format_message (j_common_ptr cinfo, char * buffer)
-	Constructs a readable error message string based on the error info
-	stored in cinfo->err.  This method is called by output_message.  Few
-	applications should need to override this method.  One possible
-	reason for doing so is to implement dynamic switching of error message
-	language.
+        Constructs a readable error message string based on the error info
+        stored in cinfo->err.  This method is called by output_message.  Few
+        applications should need to override this method.  One possible
+        reason for doing so is to implement dynamic switching of error message
+        language.
 
 emit_message (j_common_ptr cinfo, int msg_level)
-	Decide whether or not to emit a warning or trace message; if so,
-	calls output_message.  The main reason for overriding this method
-	would be to abort on warnings.  msg_level is -1 for warnings,
-	0 and up for trace messages.
+        Decide whether or not to emit a warning or trace message; if so,
+        calls output_message.  The main reason for overriding this method
+        would be to abort on warnings.  msg_level is -1 for warnings,
+        0 and up for trace messages.
 
 Only error_exit() and emit_message() are called from the rest of the JPEG
 library; the other two are internal to the error handler.
@@ -1397,9 +1435,9 @@
 addon messages (the addon messages are defined in cderror.h).
 
 Actual invocation of the error handler is done via macros defined in jerror.h:
-	ERREXITn(...)	for fatal errors
-	WARNMSn(...)	for corrupt-data warnings
-	TRACEMSn(...)	for trace and informational messages.
+        ERREXITn(...)   for fatal errors
+        WARNMSn(...)    for corrupt-data warnings
+        TRACEMSn(...)   for trace and informational messages.
 These macros store the message code and any additional parameters into the
 error handler struct, then invoke the error_exit() or emit_message() method.
 The variants of each macro are for varying numbers of additional parameters.
@@ -1440,8 +1478,8 @@
 A data destination manager struct contains a pointer and count defining the
 next byte to write in the work buffer and the remaining free space:
 
-	JOCTET * next_output_byte;  /* => next byte to write in buffer */
-	size_t free_in_buffer;      /* # of byte spaces remaining in buffer */
+        JOCTET * next_output_byte;  /* => next byte to write in buffer */
+        size_t free_in_buffer;      /* # of byte spaces remaining in buffer */
 
 The library increments the pointer and decrements the count until the buffer
 is filled.  The manager's empty_output_buffer method must reset the pointer
@@ -1451,27 +1489,27 @@
 A data destination manager provides three methods:
 
 init_destination (j_compress_ptr cinfo)
-	Initialize destination.  This is called by jpeg_start_compress()
-	before any data is actually written.  It must initialize
-	next_output_byte and free_in_buffer.  free_in_buffer must be
-	initialized to a positive value.
+        Initialize destination.  This is called by jpeg_start_compress()
+        before any data is actually written.  It must initialize
+        next_output_byte and free_in_buffer.  free_in_buffer must be
+        initialized to a positive value.
 
 empty_output_buffer (j_compress_ptr cinfo)
-	This is called whenever the buffer has filled (free_in_buffer
-	reaches zero).  In typical applications, it should write out the
-	*entire* buffer (use the saved start address and buffer length;
-	ignore the current state of next_output_byte and free_in_buffer).
-	Then reset the pointer & count to the start of the buffer, and
-	return TRUE indicating that the buffer has been dumped.
-	free_in_buffer must be set to a positive value when TRUE is
-	returned.  A FALSE return should only be used when I/O suspension is
-	desired (this operating mode is discussed in the next section).
+        This is called whenever the buffer has filled (free_in_buffer
+        reaches zero).  In typical applications, it should write out the
+        *entire* buffer (use the saved start address and buffer length;
+        ignore the current state of next_output_byte and free_in_buffer).
+        Then reset the pointer & count to the start of the buffer, and
+        return TRUE indicating that the buffer has been dumped.
+        free_in_buffer must be set to a positive value when TRUE is
+        returned.  A FALSE return should only be used when I/O suspension is
+        desired (this operating mode is discussed in the next section).
 
 term_destination (j_compress_ptr cinfo)
-	Terminate destination --- called by jpeg_finish_compress() after all
-	data has been written.  In most applications, this must flush any
-	data remaining in the buffer.  Use either next_output_byte or
-	free_in_buffer to determine how much data is in the buffer.
+        Terminate destination --- called by jpeg_finish_compress() after all
+        data has been written.  In most applications, this must flush any
+        data remaining in the buffer.  Use either next_output_byte or
+        free_in_buffer to determine how much data is in the buffer.
 
 term_destination() is NOT called by jpeg_abort() or jpeg_destroy().  If you
 want the destination manager to be cleaned up during an abort, you must do it
@@ -1489,8 +1527,8 @@
 defining the next byte to read from the work buffer and the number of bytes
 remaining:
 
-	const JOCTET * next_input_byte; /* => next byte to read from buffer */
-	size_t bytes_in_buffer;         /* # of bytes remaining in buffer */
+        const JOCTET * next_input_byte; /* => next byte to read from buffer */
+        size_t bytes_in_buffer;         /* # of bytes remaining in buffer */
 
 The library increments the pointer and decrements the count until the buffer
 is emptied.  The manager's fill_input_buffer method must reset the pointer and
@@ -1500,47 +1538,47 @@
 A data source manager provides five methods:
 
 init_source (j_decompress_ptr cinfo)
-	Initialize source.  This is called by jpeg_read_header() before any
-	data is actually read.  Unlike init_destination(), it may leave
-	bytes_in_buffer set to 0 (in which case a fill_input_buffer() call
-	will occur immediately).
+        Initialize source.  This is called by jpeg_read_header() before any
+        data is actually read.  Unlike init_destination(), it may leave
+        bytes_in_buffer set to 0 (in which case a fill_input_buffer() call
+        will occur immediately).
 
 fill_input_buffer (j_decompress_ptr cinfo)
-	This is called whenever bytes_in_buffer has reached zero and more
-	data is wanted.  In typical applications, it should read fresh data
-	into the buffer (ignoring the current state of next_input_byte and
-	bytes_in_buffer), reset the pointer & count to the start of the
-	buffer, and return TRUE indicating that the buffer has been reloaded.
-	It is not necessary to fill the buffer entirely, only to obtain at
-	least one more byte.  bytes_in_buffer MUST be set to a positive value
-	if TRUE is returned.  A FALSE return should only be used when I/O
-	suspension is desired (this mode is discussed in the next section).
+        This is called whenever bytes_in_buffer has reached zero and more
+        data is wanted.  In typical applications, it should read fresh data
+        into the buffer (ignoring the current state of next_input_byte and
+        bytes_in_buffer), reset the pointer & count to the start of the
+        buffer, and return TRUE indicating that the buffer has been reloaded.
+        It is not necessary to fill the buffer entirely, only to obtain at
+        least one more byte.  bytes_in_buffer MUST be set to a positive value
+        if TRUE is returned.  A FALSE return should only be used when I/O
+        suspension is desired (this mode is discussed in the next section).
 
 skip_input_data (j_decompress_ptr cinfo, long num_bytes)
-	Skip num_bytes worth of data.  The buffer pointer and count should
-	be advanced over num_bytes input bytes, refilling the buffer as
-	needed.  This is used to skip over a potentially large amount of
-	uninteresting data (such as an APPn marker).  In some applications
-	it may be possible to optimize away the reading of the skipped data,
-	but it's not clear that being smart is worth much trouble; large
-	skips are uncommon.  bytes_in_buffer may be zero on return.
-	A zero or negative skip count should be treated as a no-op.
+        Skip num_bytes worth of data.  The buffer pointer and count should
+        be advanced over num_bytes input bytes, refilling the buffer as
+        needed.  This is used to skip over a potentially large amount of
+        uninteresting data (such as an APPn marker).  In some applications
+        it may be possible to optimize away the reading of the skipped data,
+        but it's not clear that being smart is worth much trouble; large
+        skips are uncommon.  bytes_in_buffer may be zero on return.
+        A zero or negative skip count should be treated as a no-op.
 
 resync_to_restart (j_decompress_ptr cinfo, int desired)
-	This routine is called only when the decompressor has failed to find
-	a restart (RSTn) marker where one is expected.  Its mission is to
-	find a suitable point for resuming decompression.  For most
-	applications, we recommend that you just use the default resync
-	procedure, jpeg_resync_to_restart().  However, if you are able to back
-	up in the input data stream, or if you have a-priori knowledge about
-	the likely location of restart markers, you may be able to do better.
-	Read the read_restart_marker() and jpeg_resync_to_restart() routines
-	in jdmarker.c if you think you'd like to implement your own resync
-	procedure.
+        This routine is called only when the decompressor has failed to find
+        a restart (RSTn) marker where one is expected.  Its mission is to
+        find a suitable point for resuming decompression.  For most
+        applications, we recommend that you just use the default resync
+        procedure, jpeg_resync_to_restart().  However, if you are able to back
+        up in the input data stream, or if you have a-priori knowledge about
+        the likely location of restart markers, you may be able to do better.
+        Read the read_restart_marker() and jpeg_resync_to_restart() routines
+        in jdmarker.c if you think you'd like to implement your own resync
+        procedure.
 
 term_source (j_decompress_ptr cinfo)
-	Terminate source --- called by jpeg_finish_decompress() after all
-	data has been read.  Often a no-op.
+        Terminate source --- called by jpeg_finish_decompress() after all
+        data has been read.  Often a no-op.
 
 For both fill_input_buffer() and skip_input_data(), there is no such thing
 as an EOF return.  If the end of the file has been reached, the routine has
@@ -1648,7 +1686,7 @@
   * jpeg_read_header(): will return JPEG_SUSPENDED.
   * jpeg_start_decompress(): will return FALSE, rather than its usual TRUE.
   * jpeg_read_scanlines(): will return the number of scanlines already
-	completed (possibly 0).
+        completed (possibly 0).
   * jpeg_finish_decompress(): will return FALSE, rather than its usual TRUE.
 The surrounding application must recognize these cases, load more data into
 the input buffer, and repeat the call.  In the case of jpeg_read_scanlines(),
@@ -1826,23 +1864,23 @@
 
 The basic control flow for buffered-image decoding is
 
-	jpeg_create_decompress()
-	set data source
-	jpeg_read_header()
-	set overall decompression parameters
-	cinfo.buffered_image = TRUE;	/* select buffered-image mode */
-	jpeg_start_decompress()
-	for (each output pass) {
-	    adjust output decompression parameters if required
-	    jpeg_start_output()		/* start a new output pass */
-	    for (all scanlines in image) {
-	        jpeg_read_scanlines()
-	        display scanlines
-	    }
-	    jpeg_finish_output()	/* terminate output pass */
-	}
-	jpeg_finish_decompress()
-	jpeg_destroy_decompress()
+        jpeg_create_decompress()
+        set data source
+        jpeg_read_header()
+        set overall decompression parameters
+        cinfo.buffered_image = TRUE;    /* select buffered-image mode */
+        jpeg_start_decompress()
+        for (each output pass) {
+            adjust output decompression parameters if required
+            jpeg_start_output()         /* start a new output pass */
+            for (all scanlines in image) {
+                jpeg_read_scanlines()
+                display scanlines
+            }
+            jpeg_finish_output()        /* terminate output pass */
+        }
+        jpeg_finish_decompress()
+        jpeg_destroy_decompress()
 
 This differs from ordinary unbuffered decoding in that there is an additional
 level of looping.  The application can choose how many output passes to make
@@ -1851,9 +1889,9 @@
 The simplest approach to displaying progressive images is to do one display
 pass for each scan appearing in the input file.  In this case the outer loop
 condition is typically
-	while (! jpeg_input_complete(&cinfo))
+        while (! jpeg_input_complete(&cinfo))
 and the start-output call should read
-	jpeg_start_output(&cinfo, cinfo.input_scan_number);
+        jpeg_start_output(&cinfo, cinfo.input_scan_number);
 The second parameter to jpeg_start_output() indicates which scan of the input
 file is to be displayed; the scans are numbered starting at 1 for this
 purpose.  (You can use a loop counter starting at 1 if you like, but using
@@ -1884,11 +1922,11 @@
 cause the library to decode input data in advance of what's needed to produce
 output.  This is done by calling the routine jpeg_consume_input().
 The return value is one of the following:
-	JPEG_REACHED_SOS:    reached an SOS marker (the start of a new scan)
-	JPEG_REACHED_EOI:    reached the EOI marker (end of image)
-	JPEG_ROW_COMPLETED:  completed reading one MCU row of compressed data
-	JPEG_SCAN_COMPLETED: completed reading last MCU row of current scan
-	JPEG_SUSPENDED:      suspended before completing any of the above
+        JPEG_REACHED_SOS:    reached an SOS marker (the start of a new scan)
+        JPEG_REACHED_EOI:    reached the EOI marker (end of image)
+        JPEG_ROW_COMPLETED:  completed reading one MCU row of compressed data
+        JPEG_SCAN_COMPLETED: completed reading last MCU row of current scan
+        JPEG_SUSPENDED:      suspended before completing any of the above
 (JPEG_SUSPENDED can occur only if a suspending data source is used.)  This
 routine can be called at any time after initializing the JPEG object.  It
 reads some additional data and returns when one of the indicated significant
@@ -1965,27 +2003,27 @@
 output pass after receiving all the data; otherwise your last display may not
 be full quality across the whole screen.  So the right outer loop logic is
 something like this:
-	do {
-	    absorb any waiting input by calling jpeg_consume_input()
-	    final_pass = jpeg_input_complete(&cinfo);
-	    adjust output decompression parameters if required
-	    jpeg_start_output(&cinfo, cinfo.input_scan_number);
-	    ...
-	    jpeg_finish_output()
-	} while (! final_pass);
+        do {
+            absorb any waiting input by calling jpeg_consume_input()
+            final_pass = jpeg_input_complete(&cinfo);
+            adjust output decompression parameters if required
+            jpeg_start_output(&cinfo, cinfo.input_scan_number);
+            ...
+            jpeg_finish_output()
+        } while (! final_pass);
 rather than quitting as soon as jpeg_input_complete() returns TRUE.  This
 arrangement makes it simple to use higher-quality decoding parameters
 for the final pass.  But if you don't want to use special parameters for
 the final pass, the right loop logic is like this:
-	for (;;) {
-	    absorb any waiting input by calling jpeg_consume_input()
-	    jpeg_start_output(&cinfo, cinfo.input_scan_number);
-	    ...
-	    jpeg_finish_output()
-	    if (jpeg_input_complete(&cinfo) &&
-	        cinfo.input_scan_number == cinfo.output_scan_number)
-	      break;
-	}
+        for (;;) {
+            absorb any waiting input by calling jpeg_consume_input()
+            jpeg_start_output(&cinfo, cinfo.input_scan_number);
+            ...
+            jpeg_finish_output()
+            if (jpeg_input_complete(&cinfo) &&
+                cinfo.input_scan_number == cinfo.output_scan_number)
+              break;
+        }
 In this case you don't need to know in advance whether an output pass is to
 be the last one, so it's not necessary to have reached EOF before starting
 the final output pass; rather, what you want to test is whether the output
@@ -2094,9 +2132,9 @@
 one(s) you intend to use before you call jpeg_start_decompress().  (If we did
 not require this, the max_memory_to_use setting would be a complete fiction.)
 You do this by setting one or more of these three cinfo fields to TRUE:
-	enable_1pass_quant		Fixed color cube colormap
-	enable_external_quant		Externally-supplied colormap
-	enable_2pass_quant		Two-pass custom colormap
+        enable_1pass_quant              Fixed color cube colormap
+        enable_external_quant           Externally-supplied colormap
+        enable_2pass_quant              Two-pass custom colormap
 All three are initialized FALSE by jpeg_read_header().  But
 jpeg_start_decompress() automatically sets TRUE the one selected by the
 current two_pass_quantize and colormap settings, so you only need to set the
@@ -2247,14 +2285,14 @@
 A sure-fire way to create matching tables-only and abbreviated image files
 is to proceed as follows:
 
-	create JPEG compression object
-	set JPEG parameters
-	set destination to tables-only file
-	jpeg_write_tables(&cinfo);
-	set destination to image file
-	jpeg_start_compress(&cinfo, FALSE);
-	write data...
-	jpeg_finish_compress(&cinfo);
+        create JPEG compression object
+        set JPEG parameters
+        set destination to tables-only file
+        jpeg_write_tables(&cinfo);
+        set destination to image file
+        jpeg_start_compress(&cinfo, FALSE);
+        write data...
+        jpeg_finish_compress(&cinfo);
 
 Since the JPEG parameters are not altered between writing the table file and
 the abbreviated image file, the same tables are sure to be used.  Of course,
@@ -2282,7 +2320,7 @@
 
     if (cinfo.quant_tbl_ptrs[n] == NULL)
       cinfo.quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) &cinfo);
-    quant_ptr = cinfo.quant_tbl_ptrs[n];	/* quant_ptr is JQUANT_TBL* */
+    quant_ptr = cinfo.quant_tbl_ptrs[n];        /* quant_ptr is JQUANT_TBL* */
     for (i = 0; i < 64; i++) {
       /* Qtable[] is desired quantization table, in natural array order */
       quant_ptr->quantval[i] = Qtable[i];
@@ -2292,7 +2330,7 @@
 
     if (cinfo.ac_huff_tbl_ptrs[n] == NULL)
       cinfo.ac_huff_tbl_ptrs[n] = jpeg_alloc_huff_table((j_common_ptr) &cinfo);
-    huff_ptr = cinfo.ac_huff_tbl_ptrs[n];	/* huff_ptr is JHUFF_TBL* */
+    huff_ptr = cinfo.ac_huff_tbl_ptrs[n];       /* huff_ptr is JHUFF_TBL* */
     for (i = 1; i <= 16; i++) {
       /* counts[i] is number of Huffman codes of length i bits, i=1..16 */
       huff_ptr->bits[i] = counts[i];
@@ -2314,15 +2352,15 @@
 FALSE to indicate that you do not require an image to be present.  Thus, the
 typical scenario is
 
-	create JPEG decompression object
-	set source to tables-only file
-	jpeg_read_header(&cinfo, FALSE);
-	set source to abbreviated image file
-	jpeg_read_header(&cinfo, TRUE);
-	set decompression parameters
-	jpeg_start_decompress(&cinfo);
-	read data...
-	jpeg_finish_decompress(&cinfo);
+        create JPEG decompression object
+        set source to tables-only file
+        jpeg_read_header(&cinfo, FALSE);
+        set source to abbreviated image file
+        jpeg_read_header(&cinfo, TRUE);
+        set decompression parameters
+        jpeg_start_decompress(&cinfo);
+        read data...
+        jpeg_finish_decompress(&cinfo);
 
 In some cases, you may want to read a file without knowing whether it contains
 an image or just tables.  In that case, pass FALSE and check the return value
@@ -2395,7 +2433,7 @@
 "JPEG_APP0 + n" for APPn.  (Actually, jpeg_write_marker will let you write
 any marker type, but we don't recommend writing any other kinds of marker.)
 For example, to write a user comment string pointed to by comment_text:
-	jpeg_write_marker(cinfo, JPEG_COM, comment_text, strlen(comment_text));
+        jpeg_write_marker(cinfo, JPEG_COM, comment_text, strlen(comment_text));
 
 If it's not convenient to store all the marker data in memory at once,
 you can instead call jpeg_write_m_header() followed by multiple calls to
@@ -2441,7 +2479,7 @@
 
 
 To save the contents of special markers in memory, call
-	jpeg_save_markers(cinfo, marker_code, length_limit)
+        jpeg_save_markers(cinfo, marker_code, length_limit)
 where marker_code is the marker type to save, JPEG_COM or JPEG_APP0+n.
 (To arrange to save all the special marker types, you need to call this
 routine 17 times, for COM and APP0-APP15.)  If the incoming marker is longer
@@ -2486,7 +2524,7 @@
 If you want to supply your own marker-reading routine, you do it by calling
 jpeg_set_marker_processor().  A marker processor routine must have the
 signature
-	boolean jpeg_marker_parser_method (j_decompress_ptr cinfo)
+        boolean jpeg_marker_parser_method (j_decompress_ptr cinfo)
 Although the marker code is not explicitly passed, the routine can find it
 in cinfo->unread_marker.  At the time of call, the marker proper has been
 read from the data source module.  The processor routine is responsible for
@@ -2573,8 +2611,8 @@
 
 The required dimensions of the supplied data can be computed for each
 component as
-	cinfo->comp_info[i].width_in_blocks*DCTSIZE  samples per row
-	cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image
+        cinfo->comp_info[i].width_in_blocks*DCTSIZE  samples per row
+        cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image
 after jpeg_start_compress() has initialized those fields.  If the valid data
 is smaller than this, it must be padded appropriately.  For some sampling
 factors and image sizes, additional dummy DCT blocks are inserted to make
@@ -2582,12 +2620,12 @@
 blocks itself; it does not read them from your supplied data.  Therefore you
 need never pad by more than DCTSIZE samples.  An example may help here.
 Assume 2h2v downsampling of YCbCr data, that is
-	cinfo->comp_info[0].h_samp_factor = 2		for Y
-	cinfo->comp_info[0].v_samp_factor = 2
-	cinfo->comp_info[1].h_samp_factor = 1		for Cb
-	cinfo->comp_info[1].v_samp_factor = 1
-	cinfo->comp_info[2].h_samp_factor = 1		for Cr
-	cinfo->comp_info[2].v_samp_factor = 1
+        cinfo->comp_info[0].h_samp_factor = 2           for Y
+        cinfo->comp_info[0].v_samp_factor = 2
+        cinfo->comp_info[1].h_samp_factor = 1           for Cb
+        cinfo->comp_info[1].v_samp_factor = 1
+        cinfo->comp_info[2].h_samp_factor = 1           for Cr
+        cinfo->comp_info[2].v_samp_factor = 1
 and suppose that the nominal image dimensions (cinfo->image_width and
 cinfo->image_height) are 101x101 pixels.  Then jpeg_start_compress() will
 compute downsampled_width = 101 and width_in_blocks = 13 for Y,
@@ -2758,18 +2796,18 @@
 can use the same callback routine for both compression and decompression.
 
 The jpeg_progress_mgr struct contains four fields which are set by the library:
-	long pass_counter;	/* work units completed in this pass */
-	long pass_limit;	/* total number of work units in this pass */
-	int completed_passes;	/* passes completed so far */
-	int total_passes;	/* total number of passes expected */
+        long pass_counter;      /* work units completed in this pass */
+        long pass_limit;        /* total number of work units in this pass */
+        int completed_passes;   /* passes completed so far */
+        int total_passes;       /* total number of passes expected */
 During any one pass, pass_counter increases from 0 up to (not including)
 pass_limit; the step size is usually but not necessarily 1.  The pass_limit
 value may change from one pass to another.  The expected total number of
 passes is in total_passes, and the number of passes already completed is in
 completed_passes.  Thus the fraction of work completed may be estimated as
-		completed_passes + (pass_counter/pass_limit)
-		--------------------------------------------
-				total_passes
+                completed_passes + (pass_counter/pass_limit)
+                --------------------------------------------
+                                total_passes
 ignoring the fact that the passes may not be equal amounts of work.
 
 When decompressing, pass_limit can even change within a pass, because it
@@ -2942,7 +2980,7 @@
 
 You can also save a few K by not having text error messages in the library;
 the standard error message table occupies about 5Kb.  This is particularly
-reasonable for embedded applications where there's no good way to display 
+reasonable for embedded applications where there's no good way to display
 a message anyway.  To do this, remove the creation of the message table
 (jpeg_std_message_table[]) from jerror.c, and alter format_message to do
 something reasonable without it.  You could output the numeric value of the
@@ -2966,10 +3004,10 @@
 
 The code is not dependent on the exact sizes of the C data types.  As
 distributed, we make the assumptions that
-	char	is at least 8 bits wide
-	short	is at least 16 bits wide
-	int	is at least 16 bits wide
-	long	is at least 32 bits wide
+        char    is at least 8 bits wide
+        short   is at least 16 bits wide
+        int     is at least 16 bits wide
+        long    is at least 32 bits wide
 (These are the minimum requirements of the ANSI C standard.)  Wider types will
 work fine, although memory may be used inefficiently if char is much larger
 than 8 bits or short is much bigger than 16 bits.  The code should work
diff --git a/md5/Makefile.am b/md5/Makefile.am
new file mode 100644
index 0000000..b36f019
--- /dev/null
+++ b/md5/Makefile.am
@@ -0,0 +1,4 @@
+noinst_PROGRAMS = md5cmp
+
+md5cmp_SOURCES = md5cmp.c md5.c md5hl.c md5.h
+md5cmp_CFLAGS = -I$(srcdir)
diff --git a/md5/md5.c b/md5/md5.c
new file mode 100644
index 0000000..b30df97
--- /dev/null
+++ b/md5/md5.c
@@ -0,0 +1,331 @@
+/*
+ * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
+ *
+ * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ * rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ *
+ * This code is the same as the code published by RSA Inc.  It has been
+ * edited for clarity and style only.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "./md5.h"
+
+static void MD5Transform(unsigned int [4], const unsigned char [64]);
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define Encode memcpy
+#define Decode memcpy
+#else 
+
+/*
+ * OS X doesn't have le32toh() or htole32()
+ */
+#ifdef __APPLE__
+#include <libkern/OSByteOrder.h>
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#endif
+
+/*
+ * Encodes input (unsigned int) into output (unsigned char). Assumes len is
+ * a multiple of 4.
+ */
+
+static void
+Encode (unsigned char *output, unsigned int *input, unsigned int len)
+{
+	unsigned int i;
+	unsigned int *op = (unsigned int *)output;
+
+	for (i = 0; i < len / 4; i++)
+		op[i] = htole32(input[i]);
+}
+
+/*
+ * Decodes input (unsigned char) into output (unsigned int). Assumes len is
+ * a multiple of 4.
+ */
+
+static void
+Decode (unsigned int *output, const unsigned char *input, unsigned int len)
+{
+	unsigned int i;
+	const unsigned int *ip = (const unsigned int *)input;
+
+	for (i = 0; i < len / 4; i++)
+		output[i] = le32toh(ip[i]);
+}
+#endif
+
+static unsigned char PADDING[64] = {
+  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* F, G, H and I are basic MD5 functions. */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+/* ROTATE_LEFT rotates x left n bits. */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+/*
+ * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+ * Rotation is separate from addition to prevent recomputation.
+ */
+#define FF(a, b, c, d, x, s, ac) { \
+	(a) += F ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+#define GG(a, b, c, d, x, s, ac) { \
+	(a) += G ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+#define HH(a, b, c, d, x, s, ac) { \
+	(a) += H ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+#define II(a, b, c, d, x, s, ac) { \
+	(a) += I ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+
+/* MD5 initialization. Begins an MD5 operation, writing a new context. */
+
+void
+MD5Init (context)
+	MD5_CTX *context;
+{
+
+	context->count[0] = context->count[1] = 0;
+
+	/* Load magic initialization constants.  */
+	context->state[0] = 0x67452301;
+	context->state[1] = 0xefcdab89;
+	context->state[2] = 0x98badcfe;
+	context->state[3] = 0x10325476;
+}
+
+/* 
+ * MD5 block update operation. Continues an MD5 message-digest
+ * operation, processing another message block, and updating the
+ * context.
+ */
+
+void
+MD5Update (context, in, inputLen)
+	MD5_CTX *context;
+	const void *in;
+	unsigned int inputLen;
+{
+	unsigned int i, idx, partLen;
+	const unsigned char *input = in;
+
+	/* Compute number of bytes mod 64 */
+	idx = (unsigned int)((context->count[0] >> 3) & 0x3F);
+
+	/* Update number of bits */
+	if ((context->count[0] += ((unsigned int)inputLen << 3))
+	    < ((unsigned int)inputLen << 3))
+		context->count[1]++;
+	context->count[1] += ((unsigned int)inputLen >> 29);
+
+	partLen = 64 - idx;
+
+	/* Transform as many times as possible. */
+	if (inputLen >= partLen) {
+		memcpy((void *)&context->buffer[idx], (const void *)input,
+		    partLen);
+		MD5Transform (context->state, context->buffer);
+
+		for (i = partLen; i + 63 < inputLen; i += 64)
+			MD5Transform (context->state, &input[i]);
+
+		idx = 0;
+	}
+	else
+		i = 0;
+
+	/* Buffer remaining input */
+	memcpy ((void *)&context->buffer[idx], (const void *)&input[i],
+	    inputLen-i);
+}
+
+/*
+ * MD5 padding. Adds padding followed by original length.
+ */
+
+void
+MD5Pad (context)
+	MD5_CTX *context;
+{
+	unsigned char bits[8];
+	unsigned int idx, padLen;
+
+	/* Save number of bits */
+	Encode (bits, context->count, 8);
+
+	/* Pad out to 56 mod 64. */
+	idx = (unsigned int)((context->count[0] >> 3) & 0x3f);
+	padLen = (idx < 56) ? (56 - idx) : (120 - idx);
+	MD5Update (context, PADDING, padLen);
+
+	/* Append length (before padding) */
+	MD5Update (context, bits, 8);
+}
+
+/*
+ * MD5 finalization. Ends an MD5 message-digest operation, writing the
+ * the message digest and zeroizing the context.
+ */
+
+void
+MD5Final (digest, context)
+	unsigned char digest[16];
+	MD5_CTX *context;
+{
+	/* Do padding. */
+	MD5Pad (context);
+
+	/* Store state in digest */
+	Encode (digest, context->state, 16);
+
+	/* Zeroize sensitive information. */
+	memset ((void *)context, 0, sizeof (*context));
+}
+
+/* MD5 basic transformation. Transforms state based on block. */
+
+static void
+MD5Transform (state, block)
+	unsigned int state[4];
+	const unsigned char block[64];
+{
+	unsigned int a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+	Decode (x, block, 64);
+
+	/* Round 1 */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+	FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
+	FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
+	FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
+	FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
+	FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
+	FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
+	FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
+	FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
+	FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
+	FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
+	FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
+	FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
+	FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
+	FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
+	FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
+	FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
+
+	/* Round 2 */
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+	GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
+	GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
+	GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
+	GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
+	GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
+	GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
+	GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
+	GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
+	GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
+	GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
+	GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
+	GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
+	GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
+	GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
+	GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
+	GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
+
+	/* Round 3 */
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+	HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
+	HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
+	HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
+	HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
+	HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
+	HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
+	HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
+	HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
+	HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
+	HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
+	HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
+	HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
+	HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
+	HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
+	HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
+	HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
+
+	/* Round 4 */
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+	II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
+	II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
+	II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
+	II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
+	II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
+	II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
+	II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
+	II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
+	II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
+	II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
+	II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
+	II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
+	II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
+	II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
+	II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
+	II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
+
+	state[0] += a;
+	state[1] += b;
+	state[2] += c;
+	state[3] += d;
+
+	/* Zeroize sensitive information. */
+	memset ((void *)x, 0, sizeof (x));
+}
diff --git a/md5/md5.h b/md5/md5.h
new file mode 100644
index 0000000..551e252
--- /dev/null
+++ b/md5/md5.h
@@ -0,0 +1,49 @@
+/* MD5.H - header file for MD5C.C
+ * $FreeBSD$
+ */
+
+/*-
+ Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+rights reserved.
+
+License to copy and use this software is granted provided that it
+is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+Algorithm" in all material mentioning or referencing this software
+or this function.
+
+License is also granted to make and use derivative works provided
+that such works are identified as "derived from the RSA Data
+Security, Inc. MD5 Message-Digest Algorithm" in all material
+mentioning or referencing the derived work.
+
+RSA Data Security, Inc. makes no representations concerning either
+the merchantability of this software or the suitability of this
+software for any particular purpose. It is provided "as is"
+without express or implied warranty of any kind.
+
+These notices must be retained in any copies of any part of this
+documentation and/or software.
+ */
+
+#ifndef _SYS_MD5_H_
+#define _SYS_MD5_H_
+
+#define MD5_BLOCK_LENGTH		64
+#define MD5_DIGEST_LENGTH		16
+#define MD5_DIGEST_STRING_LENGTH	(MD5_DIGEST_LENGTH * 2 + 1)
+
+/* MD5 context. */
+typedef struct MD5Context {
+  unsigned int state[4];	/* state (ABCD) */
+  unsigned int count[2];	/* number of bits, modulo 2^64 (lsb first) */
+  unsigned char buffer[64];	/* input buffer */
+} MD5_CTX;
+
+void   MD5Init (MD5_CTX *);
+void   MD5Update (MD5_CTX *, const void *, unsigned int);
+void   MD5Final (unsigned char [16], MD5_CTX *);
+char * MD5End(MD5_CTX *, char *);
+char * MD5File(const char *, char *);
+char * MD5FileChunk(const char *, char *, off_t, off_t);
+char * MD5Data(const void *, unsigned int, char *);
+#endif /* _SYS_MD5_H_ */
diff --git a/java/org/libjpegturbo/turbojpeg/TJLoader.java b/md5/md5cmp.c
similarity index 68%
rename from java/org/libjpegturbo/turbojpeg/TJLoader.java
rename to md5/md5cmp.c
index db77bba..07acda4 100644
--- a/java/org/libjpegturbo/turbojpeg/TJLoader.java
+++ b/md5/md5cmp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2013 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -26,10 +26,34 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-package org.libjpegturbo.turbojpeg;
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include "./md5.h"
 
-final class TJLoader {
-  static void load() {
-    System.loadLibrary("turbojpeg");
-  }
-};
+int main(int argc, char *argv[])
+{
+	char *md5sum = NULL, buf[65];
+
+	if (argc < 3) {
+		fprintf(stderr, "USAGE: %s <correct MD5 sum> <file>\n", argv[0]);
+		return -1;
+	}
+
+	if (strlen(argv[1]) != 32)
+		fprintf(stderr, "WARNING: MD5 hash size is wrong.\n");
+
+	md5sum = MD5File(argv[2], buf);
+	if (!md5sum) {
+		perror("Could not obtain MD5 sum");
+		return -1;
+	}
+
+	if (!strcasecmp(md5sum, argv[1])) {
+		fprintf(stderr, "%s: OK\n", argv[2]);
+		return 0;
+	} else {
+		fprintf(stderr, "%s: FAILED.  Checksum is %s\n", argv[2], md5sum);
+		return -1;
+	}
+}
diff --git a/md5/md5hl.c b/md5/md5hl.c
new file mode 100644
index 0000000..eaa41e2
--- /dev/null
+++ b/md5/md5hl.c
@@ -0,0 +1,97 @@
+/* mdXhl.c * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define LENGTH 16
+
+#include "./md5.h"
+
+char *
+MD5End(MD5_CTX *ctx, char *buf)
+{
+	int i;
+	unsigned char digest[LENGTH];
+	static const char hex[]="0123456789abcdef";
+
+	if (!buf)
+		buf = malloc(2*LENGTH + 1);
+	if (!buf)
+		return 0;
+	MD5Final(digest, ctx);
+	for (i = 0; i < LENGTH; i++) {
+		buf[i+i] = hex[digest[i] >> 4];
+		buf[i+i+1] = hex[digest[i] & 0x0f];
+	}
+	buf[i+i] = '\0';
+	return buf;
+}
+
+char *
+MD5File(const char *filename, char *buf)
+{
+	return (MD5FileChunk(filename, buf, 0, 0));
+}
+
+char *
+MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len)
+{
+	unsigned char buffer[BUFSIZ];
+	MD5_CTX ctx;
+	struct stat stbuf;
+	int f, i, e;
+	off_t n;
+
+	MD5Init(&ctx);
+	f = open(filename, O_RDONLY);
+	if (f < 0)
+		return 0;
+	if (fstat(f, &stbuf) < 0)
+		return 0;
+	if (ofs > stbuf.st_size)
+		ofs = stbuf.st_size;
+	if ((len == 0) || (len > stbuf.st_size - ofs))
+		len = stbuf.st_size - ofs;
+	if (lseek(f, ofs, SEEK_SET) < 0)
+		return 0;
+	n = len;
+	i = 0;
+	while (n > 0) {
+		if (n > sizeof(buffer))
+			i = read(f, buffer, sizeof(buffer));
+		else
+			i = read(f, buffer, n);
+		if (i < 0) 
+			break;
+		MD5Update(&ctx, buffer, i);
+		n -= i;
+	} 
+	e = errno;
+	close(f);
+	errno = e;
+	if (i < 0)
+		return 0;
+	return (MD5End(&ctx, buf));
+}
+
+char *
+MD5Data (const void *data, unsigned int len, char *buf)
+{
+	MD5_CTX ctx;
+
+	MD5Init(&ctx);
+	MD5Update(&ctx,data,len);
+	return (MD5End(&ctx, buf));
+}
diff --git a/rdbmp.c b/rdbmp.c
index c053074..0b2351d 100644
--- a/rdbmp.c
+++ b/rdbmp.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * Modified 2009-2010 by Guido Vollbeding.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Modified 2011 by Siarhei Siamashka.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -24,7 +24,7 @@
  * This code contributed by James Arthur Boucher.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef BMP_SUPPORTED
 
@@ -33,19 +33,19 @@
 
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else /* !HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 typedef char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else
 typedef char U_CHAR;
-#define UCH(x)	((int) (x) & 0xFF)
+#define UCH(x)  ((int) (x) & 0xFF)
 #endif
 #endif /* HAVE_UNSIGNED_CHAR */
 
 
-#define	ReadOK(file,buffer,len)	(JFREAD(file,buffer,len) == ((size_t) (len)))
+#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len)))
 
 
 /* Private version of data source object */
@@ -55,15 +55,15 @@
 typedef struct _bmp_source_struct {
   struct cjpeg_source_struct pub; /* public fields */
 
-  j_compress_ptr cinfo;		/* back link saves passing separate parm */
+  j_compress_ptr cinfo;         /* back link saves passing separate parm */
 
-  JSAMPARRAY colormap;		/* BMP colormap (converted to my format) */
+  JSAMPARRAY colormap;          /* BMP colormap (converted to my format) */
 
-  jvirt_sarray_ptr whole_image;	/* Needed to reverse row order */
-  JDIMENSION source_row;	/* Current source row number */
-  JDIMENSION row_width;		/* Physical width of scanlines in file */
+  jvirt_sarray_ptr whole_image; /* Needed to reverse row order */
+  JDIMENSION source_row;        /* Current source row number */
+  JDIMENSION row_width;         /* Physical width of scanlines in file */
 
-  int bits_per_pixel;		/* remembers 8- or 24-bit format */
+  int bits_per_pixel;           /* remembers 8- or 24-bit format */
 } bmp_source_struct;
 
 
@@ -140,7 +140,7 @@
   outptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     t = GETJSAMPLE(*inptr++);
-    *outptr++ = colormap[0][t];	/* can omit GETJSAMPLE() safely */
+    *outptr++ = colormap[0][t]; /* can omit GETJSAMPLE() safely */
     *outptr++ = colormap[1][t];
     *outptr++ = colormap[2][t];
   }
@@ -170,7 +170,7 @@
   inptr = image_ptr[0];
   outptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
-    outptr[2] = *inptr++;	/* can omit GETJSAMPLE() safely */
+    outptr[2] = *inptr++;       /* can omit GETJSAMPLE() safely */
     outptr[1] = *inptr++;
     outptr[0] = *inptr++;
     outptr += 3;
@@ -200,10 +200,10 @@
   inptr = image_ptr[0];
   outptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
-    outptr[2] = *inptr++;	/* can omit GETJSAMPLE() safely */
+    outptr[2] = *inptr++;       /* can omit GETJSAMPLE() safely */
     outptr[1] = *inptr++;
     outptr[0] = *inptr++;
-    inptr++;			/* skip the 4th byte (Alpha channel) */
+    inptr++;                    /* skip the 4th byte (Alpha channel) */
     outptr += 3;
   }
 
@@ -280,11 +280,11 @@
   U_CHAR bmpfileheader[14];
   U_CHAR bmpinfoheader[64];
 #define GET_2B(array,offset)  ((unsigned int) UCH(array[offset]) + \
-			       (((unsigned int) UCH(array[offset+1])) << 8))
+                               (((unsigned int) UCH(array[offset+1])) << 8))
 #define GET_4B(array,offset)  ((INT32) UCH(array[offset]) + \
-			       (((INT32) UCH(array[offset+1])) << 8) + \
-			       (((INT32) UCH(array[offset+2])) << 16) + \
-			       (((INT32) UCH(array[offset+3])) << 24))
+                               (((INT32) UCH(array[offset+1])) << 8) + \
+                               (((INT32) UCH(array[offset+2])) << 16) + \
+                               (((INT32) UCH(array[offset+3])) << 24))
   INT32 bfOffBits;
   INT32 headerSize;
   INT32 biWidth;
@@ -293,7 +293,7 @@
   INT32 biCompression;
   INT32 biXPelsPerMeter,biYPelsPerMeter;
   INT32 biClrUsed = 0;
-  int mapentrysize = 0;		/* 0 indicates no colormap */
+  int mapentrysize = 0;         /* 0 indicates no colormap */
   INT32 bPad;
   JDIMENSION row_width;
 
@@ -325,11 +325,11 @@
     source->bits_per_pixel = (int) GET_2B(bmpinfoheader,10);
 
     switch (source->bits_per_pixel) {
-    case 8:			/* colormapped image */
-      mapentrysize = 3;		/* OS/2 uses RGBTRIPLE colormap */
+    case 8:                     /* colormapped image */
+      mapentrysize = 3;         /* OS/2 uses RGBTRIPLE colormap */
       TRACEMS2(cinfo, 1, JTRC_BMP_OS2_MAPPED, (int) biWidth, (int) biHeight);
       break;
-    case 24:			/* RGB image */
+    case 24:                    /* RGB image */
       TRACEMS2(cinfo, 1, JTRC_BMP_OS2, (int) biWidth, (int) biHeight);
       break;
     default:
@@ -352,14 +352,14 @@
     /* biSizeImage, biClrImportant fields are ignored */
 
     switch (source->bits_per_pixel) {
-    case 8:			/* colormapped image */
-      mapentrysize = 4;		/* Windows uses RGBQUAD colormap */
+    case 8:                     /* colormapped image */
+      mapentrysize = 4;         /* Windows uses RGBQUAD colormap */
       TRACEMS2(cinfo, 1, JTRC_BMP_MAPPED, (int) biWidth, (int) biHeight);
       break;
-    case 24:			/* RGB image */
+    case 24:                    /* RGB image */
       TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight);
       break;
-    case 32:			/* RGB image + Alpha channel */
+    case 32:                    /* RGB image + Alpha channel */
       TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight);
       break;
     default:
@@ -373,7 +373,7 @@
       /* Set JFIF density parameters from the BMP data */
       cinfo->X_density = (UINT16) (biXPelsPerMeter/100); /* 100 cm per meter */
       cinfo->Y_density = (UINT16) (biYPelsPerMeter/100);
-      cinfo->density_unit = 2;	/* dots/cm */
+      cinfo->density_unit = 2;  /* dots/cm */
     }
     break;
   default:
@@ -392,7 +392,7 @@
   /* Read the colormap, if any */
   if (mapentrysize > 0) {
     if (biClrUsed <= 0)
-      biClrUsed = 256;		/* assume it's 256 */
+      biClrUsed = 256;          /* assume it's 256 */
     else if (biClrUsed > 256)
       ERREXIT(cinfo, JERR_BMP_BADCMAP);
     /* Allocate space to store the colormap */
@@ -406,7 +406,7 @@
   }
 
   /* Skip any remaining pad bytes */
-  if (bPad < 0)			/* incorrect bfOffBits value? */
+  if (bPad < 0)                 /* incorrect bfOffBits value? */
     ERREXIT(cinfo, JERR_BMP_BADHEADER);
   while (--bPad >= 0) {
     (void) read_byte(source);
@@ -469,8 +469,8 @@
   /* Create module interface object */
   source = (bmp_source_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(bmp_source_struct));
-  source->cinfo = cinfo;	/* make back link for subroutines */
+                                  SIZEOF(bmp_source_struct));
+  source->cinfo = cinfo;        /* make back link for subroutines */
   /* Fill in method ptrs, except get_pixel_rows which start_input sets */
   source->pub.start_input = start_input_bmp;
   source->pub.finish_input = finish_input_bmp;
diff --git a/rdcolmap.c b/rdcolmap.c
index 42b3437..ac6f50e 100644
--- a/rdcolmap.c
+++ b/rdcolmap.c
@@ -21,9 +21,9 @@
  * currently implemented.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
-#ifdef QUANT_2PASS_SUPPORTED	/* otherwise can't quantize to supplied map */
+#ifdef QUANT_2PASS_SUPPORTED    /* otherwise can't quantize to supplied map */
 
 /* Portions of this code are based on the PBMPLUS library, which is:
 **
@@ -54,9 +54,9 @@
   /* Check for duplicate color. */
   for (index = 0; index < ncolors; index++) {
     if (GETJSAMPLE(colormap0[index]) == R &&
-	GETJSAMPLE(colormap1[index]) == G &&
-	GETJSAMPLE(colormap2[index]) == B)
-      return;			/* color is already in map */
+        GETJSAMPLE(colormap1[index]) == G &&
+        GETJSAMPLE(colormap2[index]) == B)
+      return;                   /* color is already in map */
   }
 
   /* Check for map overflow. */
@@ -107,9 +107,9 @@
     if (R == EOF || G == EOF || B == EOF)
       ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
     add_map_entry(cinfo,
-		  R << (BITS_IN_JSAMPLE-8),
-		  G << (BITS_IN_JSAMPLE-8),
-		  B << (BITS_IN_JSAMPLE-8));
+                  R << (BITS_IN_JSAMPLE-8),
+                  G << (BITS_IN_JSAMPLE-8),
+                  B << (BITS_IN_JSAMPLE-8));
   }
 }
 
@@ -123,7 +123,7 @@
 /* A comment/newline sequence is returned as a newline */
 {
   register int ch;
-  
+
   ch = getc(infile);
   if (ch == '#') {
     do {
@@ -143,17 +143,17 @@
 {
   register int ch;
   register unsigned int val;
-  
+
   /* Skip any leading whitespace */
   do {
     ch = pbm_getc(infile);
     if (ch == EOF)
       ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
   } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
-  
+
   if (ch < '0' || ch > '9')
     ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
-  
+
   val = ch - '0';
   while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') {
     val *= 10;
@@ -175,7 +175,7 @@
   int R, G, B;
 
   /* Initial 'P' has already been read by read_color_map */
-  c = getc(infile);		/* save format discriminator for a sec */
+  c = getc(infile);             /* save format discriminator for a sec */
 
   /* while we fetch the remaining header info */
   w = read_pbm_integer(cinfo, infile);
@@ -190,26 +190,26 @@
     ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
 
   switch (c) {
-  case '3':			/* it's a text-format PPM file */
+  case '3':                     /* it's a text-format PPM file */
     for (row = 0; row < h; row++) {
       for (col = 0; col < w; col++) {
-	R = read_pbm_integer(cinfo, infile);
-	G = read_pbm_integer(cinfo, infile);
-	B = read_pbm_integer(cinfo, infile);
-	add_map_entry(cinfo, R, G, B);
+        R = read_pbm_integer(cinfo, infile);
+        G = read_pbm_integer(cinfo, infile);
+        B = read_pbm_integer(cinfo, infile);
+        add_map_entry(cinfo, R, G, B);
       }
     }
     break;
 
-  case '6':			/* it's a raw-format PPM file */
+  case '6':                     /* it's a raw-format PPM file */
     for (row = 0; row < h; row++) {
       for (col = 0; col < w; col++) {
-	R = getc(infile);
-	G = getc(infile);
-	B = getc(infile);
-	if (R == EOF || G == EOF || B == EOF)
-	  ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
-	add_map_entry(cinfo, R, G, B);
+        R = getc(infile);
+        G = getc(infile);
+        B = getc(infile);
+        if (R == EOF || G == EOF || B == EOF)
+          ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+        add_map_entry(cinfo, R, G, B);
       }
     }
     break;
diff --git a/rdgif.c b/rdgif.c
index b27c167..5caad8a 100644
--- a/rdgif.c
+++ b/rdgif.c
@@ -19,7 +19,7 @@
  *    CompuServe Incorporated."
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef GIF_SUPPORTED
 
@@ -32,7 +32,7 @@
 {
   fprintf(stderr, "GIF input is unsupported for legal reasons.  Sorry.\n");
   exit(EXIT_FAILURE);
-  return NULL;			/* keep compiler happy */
+  return NULL;                  /* keep compiler happy */
 }
 
 #endif /* GIF_SUPPORTED */
diff --git a/rdjpgcom.c b/rdjpgcom.c
index 3719154..02ce90f 100644
--- a/rdjpgcom.c
+++ b/rdjpgcom.c
@@ -12,45 +12,45 @@
  * JPEG markers.
  */
 
-#define JPEG_CJPEG_DJPEG	/* to get the command-line config symbols */
-#include "jinclude.h"		/* get auto-config symbols, <stdio.h> */
+#define JPEG_CJPEG_DJPEG        /* to get the command-line config symbols */
+#include "jinclude.h"           /* get auto-config symbols, <stdio.h> */
 
 #ifdef HAVE_LOCALE_H
-#include <locale.h>		/* Bill Allombert: use locale for isprint */
+#include <locale.h>             /* Bill Allombert: use locale for isprint */
 #endif
-#include <ctype.h>		/* to declare isupper(), tolower() */
+#include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef USE_SETMODE
-#include <fcntl.h>		/* to declare setmode()'s parameter macros */
+#include <fcntl.h>              /* to declare setmode()'s parameter macros */
 /* If you have setmode() but not <io.h>, just delete this line: */
-#include <io.h>			/* to declare setmode() */
+#include <io.h>                 /* to declare setmode() */
 #endif
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
 #else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
+#ifdef VMS                      /* VMS is very nonstandard */
+#define READ_BINARY     "rb", "ctx=stm"
+#else                           /* standard ANSI-compliant case */
+#define READ_BINARY     "rb"
 #endif
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 #ifndef EXIT_SUCCESS
 #ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
+#define EXIT_SUCCESS  1         /* VMS is very nonstandard */
 #else
 #define EXIT_SUCCESS  0
 #endif
@@ -62,7 +62,7 @@
  * To reuse this code in another application, you might need to change these.
  */
 
-static FILE * infile;		/* input JPEG file */
+static FILE * infile;           /* input JPEG file */
 
 /* Return next input byte, or EOF if no more */
 #define NEXTBYTE()  getc(infile)
@@ -107,11 +107,11 @@
  * in this program.  (See jdmarker.c for a more complete list.)
  */
 
-#define M_SOF0  0xC0		/* Start Of Frame N */
-#define M_SOF1  0xC1		/* N indicates which compression process */
-#define M_SOF2  0xC2		/* Only SOF0-SOF2 are now in common use */
+#define M_SOF0  0xC0            /* Start Of Frame N */
+#define M_SOF1  0xC1            /* N indicates which compression process */
+#define M_SOF2  0xC2            /* Only SOF0-SOF2 are now in common use */
 #define M_SOF3  0xC3
-#define M_SOF5  0xC5		/* NB: codes C4 and CC are NOT SOF markers */
+#define M_SOF5  0xC5            /* NB: codes C4 and CC are NOT SOF markers */
 #define M_SOF6  0xC6
 #define M_SOF7  0xC7
 #define M_SOF9  0xC9
@@ -120,12 +120,12 @@
 #define M_SOF13 0xCD
 #define M_SOF14 0xCE
 #define M_SOF15 0xCF
-#define M_SOI   0xD8		/* Start Of Image (beginning of datastream) */
-#define M_EOI   0xD9		/* End Of Image (end of datastream) */
-#define M_SOS   0xDA		/* Start Of Scan (begins compressed data) */
-#define M_APP0	0xE0		/* Application-specific marker, type N */
-#define M_APP12	0xEC		/* (we don't bother to list all 16 APPn's) */
-#define M_COM   0xFE		/* COMment */
+#define M_SOI   0xD8            /* Start Of Image (beginning of datastream) */
+#define M_EOI   0xD9            /* End Of Image (end of datastream) */
+#define M_SOS   0xDA            /* Start Of Scan (begins compressed data) */
+#define M_APP0  0xE0            /* Application-specific marker, type N */
+#define M_APP12 0xEC            /* (we don't bother to list all 16 APPn's) */
+#define M_COM   0xFE            /* COMment */
 
 
 /*
@@ -253,7 +253,7 @@
       printf("\n");
     } else if (ch == '\n') {
       if (lastch != '\r')
-	printf("\n");
+        printf("\n");
     } else if (ch == '\\') {
       printf("\\\\");
     } else if (isprint(ch)) {
@@ -287,7 +287,7 @@
   const char * process;
   int ci;
 
-  length = read_2_bytes();	/* usual parameter length count */
+  length = read_2_bytes();      /* usual parameter length count */
 
   data_precision = read_1_byte();
   image_height = read_2_bytes();
@@ -295,33 +295,33 @@
   num_components = read_1_byte();
 
   switch (marker) {
-  case M_SOF0:	process = "Baseline";  break;
-  case M_SOF1:	process = "Extended sequential";  break;
-  case M_SOF2:	process = "Progressive";  break;
-  case M_SOF3:	process = "Lossless";  break;
-  case M_SOF5:	process = "Differential sequential";  break;
-  case M_SOF6:	process = "Differential progressive";  break;
-  case M_SOF7:	process = "Differential lossless";  break;
-  case M_SOF9:	process = "Extended sequential, arithmetic coding";  break;
-  case M_SOF10:	process = "Progressive, arithmetic coding";  break;
-  case M_SOF11:	process = "Lossless, arithmetic coding";  break;
-  case M_SOF13:	process = "Differential sequential, arithmetic coding";  break;
-  case M_SOF14:	process = "Differential progressive, arithmetic coding"; break;
-  case M_SOF15:	process = "Differential lossless, arithmetic coding";  break;
-  default:	process = "Unknown";  break;
+  case M_SOF0:  process = "Baseline";  break;
+  case M_SOF1:  process = "Extended sequential";  break;
+  case M_SOF2:  process = "Progressive";  break;
+  case M_SOF3:  process = "Lossless";  break;
+  case M_SOF5:  process = "Differential sequential";  break;
+  case M_SOF6:  process = "Differential progressive";  break;
+  case M_SOF7:  process = "Differential lossless";  break;
+  case M_SOF9:  process = "Extended sequential, arithmetic coding";  break;
+  case M_SOF10: process = "Progressive, arithmetic coding";  break;
+  case M_SOF11: process = "Lossless, arithmetic coding";  break;
+  case M_SOF13: process = "Differential sequential, arithmetic coding";  break;
+  case M_SOF14: process = "Differential progressive, arithmetic coding"; break;
+  case M_SOF15: process = "Differential lossless, arithmetic coding";  break;
+  default:      process = "Unknown";  break;
   }
 
   printf("JPEG image is %uw * %uh, %d color components, %d bits per sample\n",
-	 image_width, image_height, num_components, data_precision);
+         image_width, image_height, num_components, data_precision);
   printf("JPEG process: %s\n", process);
 
   if (length != (unsigned int) (8 + num_components * 3))
     ERREXIT("Bogus SOF marker length");
 
   for (ci = 0; ci < num_components; ci++) {
-    (void) read_1_byte();	/* Component ID code */
-    (void) read_1_byte();	/* H, V sampling factors */
-    (void) read_1_byte();	/* Quantization table number */
+    (void) read_1_byte();       /* Component ID code */
+    (void) read_1_byte();       /* H, V sampling factors */
+    (void) read_1_byte();       /* Quantization table number */
   }
 }
 
@@ -352,29 +352,29 @@
       /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be,
        * treated as SOFn.  C4 in particular is actually DHT.
        */
-    case M_SOF0:		/* Baseline */
-    case M_SOF1:		/* Extended sequential, Huffman */
-    case M_SOF2:		/* Progressive, Huffman */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_SOF9:		/* Extended sequential, arithmetic */
-    case M_SOF10:		/* Progressive, arithmetic */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
+    case M_SOF2:                /* Progressive, Huffman */
+    case M_SOF3:                /* Lossless, Huffman */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_SOF9:                /* Extended sequential, arithmetic */
+    case M_SOF10:               /* Progressive, arithmetic */
+    case M_SOF11:               /* Lossless, arithmetic */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
       if (verbose)
-	process_SOFn(marker);
+        process_SOFn(marker);
       else
-	skip_variable();
+        skip_variable();
       break;
 
-    case M_SOS:			/* stop before hitting compressed data */
+    case M_SOS:                 /* stop before hitting compressed data */
       return marker;
 
-    case M_EOI:			/* in case it's a tables-only JPEG stream */
+    case M_EOI:                 /* in case it's a tables-only JPEG stream */
       return marker;
 
     case M_COM:
@@ -386,14 +386,14 @@
        * APP12 markers, so we print those out too when in -verbose mode.
        */
       if (verbose) {
-	printf("APP12 contains:\n");
-	process_COM(raw);
+        printf("APP12 contains:\n");
+        process_COM(raw);
       } else
-	skip_variable();
+        skip_variable();
       break;
 
-    default:			/* Anything else just gets skipped */
-      skip_variable();		/* we assume it has a parameter count... */
+    default:                    /* Anything else just gets skipped */
+      skip_variable();          /* we assume it has a parameter count... */
       break;
     }
   } /* end loop */
@@ -402,7 +402,7 @@
 
 /* Command line parsing code */
 
-static const char * progname;	/* program name for error messages */
+static const char * progname;   /* program name for error messages */
 
 
 static void
@@ -432,17 +432,17 @@
 
   while ((ca = *arg++) != '\0') {
     if ((ck = *keyword++) == '\0')
-      return 0;			/* arg longer than keyword, no good */
-    if (isupper(ca))		/* force arg to lcase (assume ck is already) */
+      return 0;                 /* arg longer than keyword, no good */
+    if (isupper(ca))            /* force arg to lcase (assume ck is already) */
       ca = tolower(ca);
     if (ca != ck)
-      return 0;			/* no good */
-    nmatched++;			/* count matched characters */
+      return 0;                 /* no good */
+    nmatched++;                 /* count matched characters */
   }
   /* reached end of argument; fail if it's too short for unique abbrev */
   if (nmatched < minchars)
     return 0;
-  return 1;			/* A-OK */
+  return 1;                     /* A-OK */
 }
 
 
@@ -464,14 +464,14 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "rdjpgcom";	/* in case C library doesn't provide it */
+    progname = "rdjpgcom";      /* in case C library doesn't provide it */
 
   /* Parse switches, if any */
   for (argn = 1; argn < argc; argn++) {
     arg = argv[argn];
     if (arg[0] != '-')
-      break;			/* not switch, must be file name */
-    arg++;			/* advance over '-' */
+      break;                    /* not switch, must be file name */
+    arg++;                      /* advance over '-' */
     if (keymatch(arg, "verbose", 1)) {
       verbose++;
     } else if (keymatch(arg, "raw", 1)) {
@@ -493,10 +493,10 @@
     }
   } else {
     /* default input file is stdin */
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
     setmode(fileno(stdin), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
     if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
       fprintf(stderr, "%s: can't open stdin\n", progname);
       exit(EXIT_FAILURE);
@@ -511,5 +511,5 @@
 
   /* All done. */
   exit(EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/rdppm.c b/rdppm.c
index a757022..c55ab2b 100644
--- a/rdppm.c
+++ b/rdppm.c
@@ -19,7 +19,7 @@
  * the file is indeed PPM format).
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef PPM_SUPPORTED
 
@@ -41,19 +41,19 @@
 
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else /* !HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 typedef char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else
 typedef char U_CHAR;
-#define UCH(x)	((int) (x) & 0xFF)
+#define UCH(x)  ((int) (x) & 0xFF)
 #endif
 #endif /* HAVE_UNSIGNED_CHAR */
 
 
-#define	ReadOK(file,buffer,len)	(JFREAD(file,buffer,len) == ((size_t) (len)))
+#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len)))
 
 
 /*
@@ -72,10 +72,10 @@
 typedef struct {
   struct cjpeg_source_struct pub; /* public fields */
 
-  U_CHAR *iobuffer;		/* non-FAR pointer to I/O buffer */
-  JSAMPROW pixrow;		/* FAR pointer to same */
-  size_t buffer_width;		/* width of I/O buffer */
-  JSAMPLE *rescale;		/* => maxval-remapping array, or NULL */
+  U_CHAR *iobuffer;             /* non-FAR pointer to I/O buffer */
+  JSAMPROW pixrow;              /* FAR pointer to same */
+  size_t buffer_width;          /* width of I/O buffer */
+  JSAMPLE *rescale;             /* => maxval-remapping array, or NULL */
 } ppm_source_struct;
 
 typedef ppm_source_struct * ppm_source_ptr;
@@ -308,10 +308,10 @@
 
   /* detect unsupported variants (ie, PBM) before trying to read header */
   switch (c) {
-  case '2':			/* it's a text-format PGM file */
-  case '3':			/* it's a text-format PPM file */
-  case '5':			/* it's a raw-format PGM file */
-  case '6':			/* it's a raw-format PPM file */
+  case '2':                     /* it's a text-format PGM file */
+  case '3':                     /* it's a text-format PPM file */
+  case '5':                     /* it's a raw-format PGM file */
+  case '6':                     /* it's a raw-format PPM file */
     break;
   default:
     ERREXIT(cinfo, JERR_PPM_NOT);
@@ -331,12 +331,12 @@
   cinfo->image_height = (JDIMENSION) h;
 
   /* initialize flags to most common settings */
-  need_iobuffer = TRUE;		/* do we need an I/O buffer? */
-  use_raw_buffer = FALSE;	/* do we map input buffer onto I/O buffer? */
-  need_rescale = TRUE;		/* do we need a rescale array? */
+  need_iobuffer = TRUE;         /* do we need an I/O buffer? */
+  use_raw_buffer = FALSE;       /* do we map input buffer onto I/O buffer? */
+  need_rescale = TRUE;          /* do we need a rescale array? */
 
   switch (c) {
-  case '2':			/* it's a text-format PGM file */
+  case '2':                     /* it's a text-format PGM file */
     cinfo->input_components = 1;
     cinfo->in_color_space = JCS_GRAYSCALE;
     TRACEMS2(cinfo, 1, JTRC_PGM_TEXT, w, h);
@@ -344,7 +344,7 @@
     need_iobuffer = FALSE;
     break;
 
-  case '3':			/* it's a text-format PPM file */
+  case '3':                     /* it's a text-format PPM file */
     cinfo->input_components = 3;
     cinfo->in_color_space = JCS_RGB;
     TRACEMS2(cinfo, 1, JTRC_PPM_TEXT, w, h);
@@ -352,7 +352,7 @@
     need_iobuffer = FALSE;
     break;
 
-  case '5':			/* it's a raw-format PGM file */
+  case '5':                     /* it's a raw-format PGM file */
     cinfo->input_components = 1;
     cinfo->in_color_space = JCS_GRAYSCALE;
     TRACEMS2(cinfo, 1, JTRC_PGM, w, h);
@@ -367,7 +367,7 @@
     }
     break;
 
-  case '6':			/* it's a raw-format PPM file */
+  case '6':                     /* it's a raw-format PPM file */
     cinfo->input_components = 3;
     cinfo->in_color_space = JCS_RGB;
     TRACEMS2(cinfo, 1, JTRC_PPM, w, h);
@@ -389,7 +389,7 @@
       ((maxval<=255) ? SIZEOF(U_CHAR) : (2*SIZEOF(U_CHAR)));
     source->iobuffer = (U_CHAR *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  source->buffer_width);
+                                  source->buffer_width);
   }
 
   /* Create compressor input buffer. */
@@ -415,7 +415,7 @@
     /* On 16-bit-int machines we have to be careful of maxval = 65535 */
     source->rescale = (JSAMPLE *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  (size_t) (((long) maxval + 1L) * SIZEOF(JSAMPLE)));
+                                  (size_t) (((long) maxval + 1L) * SIZEOF(JSAMPLE)));
     half_maxval = maxval / 2;
     for (val = 0; val <= (INT32) maxval; val++) {
       /* The multiplication here must be done in 32 bits to avoid overflow */
@@ -448,7 +448,7 @@
   /* Create module interface object */
   source = (ppm_source_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(ppm_source_struct));
+                                  SIZEOF(ppm_source_struct));
   /* Fill in method ptrs, except get_pixel_rows which start_input sets */
   source->pub.start_input = start_input_ppm;
   source->pub.finish_input = finish_input_ppm;
diff --git a/rdrle.c b/rdrle.c
index 542bc37..f8b3587 100644
--- a/rdrle.c
+++ b/rdrle.c
@@ -19,7 +19,7 @@
  * with updates from Robert Hutchinson.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef RLE_SUPPORTED
 
@@ -38,7 +38,7 @@
 
 /*
  * We support the following types of RLE files:
- *   
+ *
  *   GRAYSCALE   - 8 bits, no colormap
  *   MAPPEDGRAY  - 8 bits, 1 channel colomap
  *   PSEUDOCOLOR - 8 bits, 3 channel colormap
@@ -66,7 +66,7 @@
 
   rle_kind visual;              /* actual type of input file */
   jvirt_sarray_ptr image;       /* virtual array to hold the image */
-  JDIMENSION row;		/* current row # in the virtual array */
+  JDIMENSION row;               /* current row # in the virtual array */
   rle_hdr header;               /* Input file information */
   rle_pixel** rle_row;          /* holds a row returned by rle_getrow() */
 
@@ -111,10 +111,10 @@
   }
 
   /* Figure out what we have, set private vars and return values accordingly */
-  
+
   width  = source->header.xmax - source->header.xmin + 1;
   height = source->header.ymax - source->header.ymin + 1;
-  source->header.xmin = 0;		/* realign horizontally */
+  source->header.xmin = 0;              /* realign horizontally */
   source->header.xmax = width-1;
 
   cinfo->image_width      = width;
@@ -131,17 +131,17 @@
   } else if (source->header.ncolors == 1 && source->header.ncmap == 3) {
     source->visual     = PSEUDOCOLOR;
     TRACEMS3(cinfo, 1, JTRC_RLE_MAPPED, width, height,
-	     1 << source->header.cmaplen);
+             1 << source->header.cmaplen);
   } else if (source->header.ncolors == 3 && source->header.ncmap == 3) {
     source->visual     = TRUECOLOR;
     TRACEMS3(cinfo, 1, JTRC_RLE_FULLMAP, width, height,
-	     1 << source->header.cmaplen);
+             1 << source->header.cmaplen);
   } else if (source->header.ncolors == 3 && source->header.ncmap == 0) {
     source->visual     = DIRECTCOLOR;
     TRACEMS2(cinfo, 1, JTRC_RLE, width, height);
   } else
     ERREXIT(cinfo, JERR_RLE_UNSUPPORTED);
-  
+
   if (source->visual == GRAYSCALE || source->visual == MAPPEDGRAY) {
     cinfo->in_color_space   = JCS_GRAYSCALE;
     cinfo->input_components = 1;
@@ -348,7 +348,7 @@
   source->row = cinfo->image_height;
 
   /* And fetch the topmost (bottommost) row */
-  return (*source->pub.get_pixel_rows) (cinfo, sinfo);   
+  return (*source->pub.get_pixel_rows) (cinfo, sinfo);
 }
 
 
diff --git a/rdswitch.c b/rdswitch.c
index 7f3c576..6549974 100644
--- a/rdswitch.c
+++ b/rdswitch.c
@@ -3,21 +3,21 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modifications:
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to process some of cjpeg's more complicated
  * command-line switches.  Switches processed here are:
- *	-qtables file		Read quantization tables from text file
- *	-scans file		Read scan script from text file
- *	-quality N[,N,...]	Set quality ratings
- *	-qslots N[,N,...]	Set component quantization table selectors
- *	-sample HxV[,HxV,...]	Set component sampling factors
+ *      -qtables file           Read quantization tables from text file
+ *      -scans file             Read scan script from text file
+ *      -quality N[,N,...]      Set quality ratings
+ *      -qslots N[,N,...]       Set component quantization table selectors
+ *      -sample HxV[,HxV,...]   Set component sampling factors
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include <ctype.h>		/* to declare isdigit(), isspace() */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include <ctype.h>              /* to declare isdigit(), isspace() */
 
 
 LOCAL(int)
@@ -26,7 +26,7 @@
 /* A comment/newline sequence is returned as a newline */
 {
   register int ch;
-  
+
   ch = getc(file);
   if (ch == '#') {
     do {
@@ -44,7 +44,7 @@
 {
   register int ch;
   register long val;
-  
+
   /* Skip any leading whitespace, detect EOF */
   do {
     ch = text_getc(file);
@@ -53,7 +53,7 @@
       return FALSE;
     }
   } while (isspace(ch));
-  
+
   if (! isdigit(ch)) {
     *termchar = ch;
     return FALSE;
@@ -108,15 +108,15 @@
     table[0] = (unsigned int) val;
     for (i = 1; i < DCTSIZE2; i++) {
       if (! read_text_integer(fp, &val, &termchar)) {
-	fprintf(stderr, "Invalid table data in file %s\n", filename);
-	fclose(fp);
-	return FALSE;
+        fprintf(stderr, "Invalid table data in file %s\n", filename);
+        fclose(fp);
+        return FALSE;
       }
       table[i] = (unsigned int) val;
     }
 #if JPEG_LIB_VERSION >= 70
     jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno],
-			 force_baseline);
+                         force_baseline);
 #else
     jpeg_add_quant_table(cinfo, tblno, table, q_scale_factor[tblno],
                          force_baseline);
@@ -150,7 +150,7 @@
   ch = *termchar;
   while (ch != EOF && isspace(ch))
     ch = text_getc(file);
-  if (isdigit(ch)) {		/* oops, put it back */
+  if (isdigit(ch)) {            /* oops, put it back */
     if (ungetc(ch, file) == EOF)
       return FALSE;
     ch = ' ';
@@ -188,7 +188,7 @@
   int scanno, ncomps, termchar;
   long val;
   jpeg_scan_info * scanptr;
-#define MAX_SCANS  100		/* quite arbitrary limit */
+#define MAX_SCANS  100          /* quite arbitrary limit */
   jpeg_scan_info scans[MAX_SCANS];
 
   if ((fp = fopen(filename, "r")) == NULL) {
@@ -208,29 +208,29 @@
     ncomps = 1;
     while (termchar == ' ') {
       if (ncomps >= MAX_COMPS_IN_SCAN) {
-	fprintf(stderr, "Too many components in one scan in file %s\n",
-		filename);
-	fclose(fp);
-	return FALSE;
+        fprintf(stderr, "Too many components in one scan in file %s\n",
+                filename);
+        fclose(fp);
+        return FALSE;
       }
       if (! read_scan_integer(fp, &val, &termchar))
-	goto bogus;
+        goto bogus;
       scanptr->component_index[ncomps] = (int) val;
       ncomps++;
     }
     scanptr->comps_in_scan = ncomps;
     if (termchar == ':') {
       if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ')
-	goto bogus;
+        goto bogus;
       scanptr->Ss = (int) val;
       if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ')
-	goto bogus;
+        goto bogus;
       scanptr->Se = (int) val;
       if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ')
-	goto bogus;
+        goto bogus;
       scanptr->Ah = (int) val;
       if (! read_scan_integer(fp, &val, &termchar))
-	goto bogus;
+        goto bogus;
       scanptr->Al = (int) val;
     } else {
       /* set non-progressive parameters */
@@ -261,7 +261,7 @@
      */
     scanptr = (jpeg_scan_info *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  scanno * SIZEOF(jpeg_scan_info));
+                                  scanno * SIZEOF(jpeg_scan_info));
     MEMCOPY(scanptr, scans, scanno * SIZEOF(jpeg_scan_info));
     cinfo->scan_info = scanptr;
     cinfo->num_scans = scanno;
@@ -305,9 +305,9 @@
 jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
 {
   jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       q_scale_factor[0], force_baseline);
+                       q_scale_factor[0], force_baseline);
   jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       q_scale_factor[1], force_baseline);
+                       q_scale_factor[1], force_baseline);
 }
 #endif
 
@@ -319,17 +319,17 @@
  * If there are more q-table slots than parameters, the last value is replicated.
  */
 {
-  int val = 75;			/* default value */
+  int val = 75;                 /* default value */
   int tblno;
   char ch;
 
   for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
     if (*arg) {
-      ch = ',';			/* if not set by sscanf, will be ',' */
+      ch = ',';                 /* if not set by sscanf, will be ',' */
       if (sscanf(arg, "%d%c", &val, &ch) < 1)
-	return FALSE;
-      if (ch != ',')		/* syntax check */
-	return FALSE;
+        return FALSE;
+      if (ch != ',')            /* syntax check */
+        return FALSE;
       /* Convert user 0-100 rating to percentage scaling */
 #if JPEG_LIB_VERSION >= 70
       cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
@@ -337,7 +337,7 @@
       q_scale_factor[tblno] = jpeg_quality_scaling(val);
 #endif
       while (*arg && *arg++ != ',') /* advance to next segment of arg string */
-	;
+        ;
     } else {
       /* reached end of parameter, set remaining factors to last value */
 #if JPEG_LIB_VERSION >= 70
@@ -359,25 +359,25 @@
  * If there are more components than parameters, the last value is replicated.
  */
 {
-  int val = 0;			/* default table # */
+  int val = 0;                  /* default table # */
   int ci;
   char ch;
 
   for (ci = 0; ci < MAX_COMPONENTS; ci++) {
     if (*arg) {
-      ch = ',';			/* if not set by sscanf, will be ',' */
+      ch = ',';                 /* if not set by sscanf, will be ',' */
       if (sscanf(arg, "%d%c", &val, &ch) < 1)
-	return FALSE;
-      if (ch != ',')		/* syntax check */
-	return FALSE;
+        return FALSE;
+      if (ch != ',')            /* syntax check */
+        return FALSE;
       if (val < 0 || val >= NUM_QUANT_TBLS) {
-	fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n",
-		NUM_QUANT_TBLS-1);
-	return FALSE;
+        fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n",
+                NUM_QUANT_TBLS-1);
+        return FALSE;
       }
       cinfo->comp_info[ci].quant_tbl_no = val;
       while (*arg && *arg++ != ',') /* advance to next segment of arg string */
-	;
+        ;
     } else {
       /* reached end of parameter, set remaining components to last table */
       cinfo->comp_info[ci].quant_tbl_no = val;
@@ -399,19 +399,19 @@
 
   for (ci = 0; ci < MAX_COMPONENTS; ci++) {
     if (*arg) {
-      ch2 = ',';		/* if not set by sscanf, will be ',' */
+      ch2 = ',';                /* if not set by sscanf, will be ',' */
       if (sscanf(arg, "%d%c%d%c", &val1, &ch1, &val2, &ch2) < 3)
-	return FALSE;
+        return FALSE;
       if ((ch1 != 'x' && ch1 != 'X') || ch2 != ',') /* syntax check */
-	return FALSE;
+        return FALSE;
       if (val1 <= 0 || val1 > 4 || val2 <= 0 || val2 > 4) {
-	fprintf(stderr, "JPEG sampling factors must be 1..4\n");
-	return FALSE;
+        fprintf(stderr, "JPEG sampling factors must be 1..4\n");
+        return FALSE;
       }
       cinfo->comp_info[ci].h_samp_factor = val1;
       cinfo->comp_info[ci].v_samp_factor = val2;
       while (*arg && *arg++ != ',') /* advance to next segment of arg string */
-	;
+        ;
     } else {
       /* reached end of parameter, set remaining components to 1x1 sampling */
       cinfo->comp_info[ci].h_samp_factor = 1;
diff --git a/rdtarga.c b/rdtarga.c
index 4c2cd26..e8bbaf6 100644
--- a/rdtarga.c
+++ b/rdtarga.c
@@ -17,7 +17,7 @@
  * Based on code contributed by Lee Daniel Crocker.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef TARGA_SUPPORTED
 
@@ -26,19 +26,19 @@
 
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else /* !HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 typedef char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else
 typedef char U_CHAR;
-#define UCH(x)	((int) (x) & 0xFF)
+#define UCH(x)  ((int) (x) & 0xFF)
 #endif
 #endif /* HAVE_UNSIGNED_CHAR */
 
 
-#define	ReadOK(file,buffer,len)	(JFREAD(file,buffer,len) == ((size_t) (len)))
+#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len)))
 
 
 /* Private version of data source object */
@@ -48,12 +48,12 @@
 typedef struct _tga_source_struct {
   struct cjpeg_source_struct pub; /* public fields */
 
-  j_compress_ptr cinfo;		/* back link saves passing separate parm */
+  j_compress_ptr cinfo;         /* back link saves passing separate parm */
 
-  JSAMPARRAY colormap;		/* Targa colormap (converted to my format) */
+  JSAMPARRAY colormap;          /* Targa colormap (converted to my format) */
 
-  jvirt_sarray_ptr whole_image;	/* Needed if funny input row order */
-  JDIMENSION current_row;	/* Current logical row number to read */
+  jvirt_sarray_ptr whole_image; /* Needed if funny input row order */
+  JDIMENSION current_row;       /* Current logical row number to read */
 
   /* Pointer to routine to extract next Targa pixel from input file */
   JMETHOD(void, read_pixel, (tga_source_ptr sinfo));
@@ -61,15 +61,15 @@
   /* Result of read_pixel is delivered here: */
   U_CHAR tga_pixel[4];
 
-  int pixel_size;		/* Bytes per Targa pixel (1 to 4) */
+  int pixel_size;               /* Bytes per Targa pixel (1 to 4) */
 
   /* State info for reading RLE-coded pixels; both counts must be init to 0 */
-  int block_count;		/* # of pixels remaining in RLE block */
-  int dup_pixel_count;		/* # of times to duplicate previous pixel */
+  int block_count;              /* # of pixels remaining in RLE block */
+  int dup_pixel_count;          /* # of times to duplicate previous pixel */
 
   /* This saves the correct pixel-row-expansion method for preload_image */
   JMETHOD(JDIMENSION, get_pixel_rows, (j_compress_ptr cinfo,
-				       cjpeg_source_ptr sinfo));
+                                       cjpeg_source_ptr sinfo));
 } tga_source_struct;
 
 
@@ -148,9 +148,9 @@
   /* Time to read RLE block header? */
   if (--sinfo->block_count < 0) { /* decrement pixels remaining in block */
     i = read_byte(sinfo);
-    if (i & 0x80) {		/* Start of duplicate-pixel block? */
+    if (i & 0x80) {             /* Start of duplicate-pixel block? */
       sinfo->dup_pixel_count = i & 0x7F; /* number of dups after this one */
-      sinfo->block_count = 0;	/* then read new block header */
+      sinfo->block_count = 0;   /* then read new block header */
     } else {
       sinfo->block_count = i & 0x7F; /* number of pixels after this one */
     }
@@ -177,7 +177,7 @@
   tga_source_ptr source = (tga_source_ptr) sinfo;
   register JSAMPROW ptr;
   register JDIMENSION col;
-  
+
   ptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
@@ -215,7 +215,7 @@
   register int t;
   register JSAMPROW ptr;
   register JDIMENSION col;
-  
+
   ptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
@@ -242,7 +242,7 @@
   tga_source_ptr source = (tga_source_ptr) sinfo;
   register JSAMPROW ptr;
   register JDIMENSION col;
-  
+
   ptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
@@ -338,8 +338,8 @@
   unsigned int width, height, maplen;
   boolean is_bottom_up;
 
-#define GET_2B(offset)	((unsigned int) UCH(targaheader[offset]) + \
-			 (((unsigned int) UCH(targaheader[offset+1])) << 8))
+#define GET_2B(offset)  ((unsigned int) UCH(targaheader[offset]) + \
+                         (((unsigned int) UCH(targaheader[offset+1])) << 8))
 
   if (! ReadOK(source->pub.input_file, targaheader, 18))
     ERREXIT(cinfo, JERR_INPUT_EOF);
@@ -355,17 +355,17 @@
   width = GET_2B(12);
   height = GET_2B(14);
   source->pixel_size = UCH(targaheader[16]) >> 3;
-  flags = UCH(targaheader[17]);	/* Image Descriptor byte */
+  flags = UCH(targaheader[17]); /* Image Descriptor byte */
 
-  is_bottom_up = ((flags & 0x20) == 0);	/* bit 5 set => top-down */
-  interlace_type = flags >> 6;	/* bits 6/7 are interlace code */
+  is_bottom_up = ((flags & 0x20) == 0); /* bit 5 set => top-down */
+  interlace_type = flags >> 6;  /* bits 6/7 are interlace code */
 
-  if (cmaptype > 1 ||		/* cmaptype must be 0 or 1 */
+  if (cmaptype > 1 ||           /* cmaptype must be 0 or 1 */
       source->pixel_size < 1 || source->pixel_size > 4 ||
       (UCH(targaheader[16]) & 7) != 0 || /* bits/pixel must be multiple of 8 */
-      interlace_type != 0)	/* currently don't allow interlaced image */
+      interlace_type != 0)      /* currently don't allow interlaced image */
     ERREXIT(cinfo, JERR_TGA_BADPARMS);
-  
+
   if (subtype > 8) {
     /* It's an RLE-coded file */
     source->read_pixel = read_rle_pixel;
@@ -377,18 +377,18 @@
   }
 
   /* Now should have subtype 1, 2, or 3 */
-  components = 3;		/* until proven different */
+  components = 3;               /* until proven different */
   cinfo->in_color_space = JCS_RGB;
 
   switch (subtype) {
-  case 1:			/* Colormapped image */
+  case 1:                       /* Colormapped image */
     if (source->pixel_size == 1 && cmaptype == 1)
       source->get_pixel_rows = get_8bit_row;
     else
       ERREXIT(cinfo, JERR_TGA_BADPARMS);
     TRACEMS2(cinfo, 1, JTRC_TGA_MAPPED, width, height);
     break;
-  case 2:			/* RGB image */
+  case 2:                       /* RGB image */
     switch (source->pixel_size) {
     case 2:
       source->get_pixel_rows = get_16bit_row;
@@ -405,7 +405,7 @@
     }
     TRACEMS2(cinfo, 1, JTRC_TGA, width, height);
     break;
-  case 3:			/* Grayscale image */
+  case 3:                       /* Grayscale image */
     components = 1;
     cinfo->in_color_space = JCS_GRAYSCALE;
     if (source->pixel_size == 1)
@@ -440,8 +440,8 @@
     source->pub.buffer_height = 1;
     source->pub.get_pixel_rows = source->get_pixel_rows;
   }
-  
-  while (idlen--)		/* Throw away ID field */
+
+  while (idlen--)               /* Throw away ID field */
     (void) read_byte(source);
 
   if (maplen > 0) {
@@ -453,7 +453,7 @@
     /* and read it from the file */
     read_colormap(source, (int) maplen, UCH(targaheader[7]));
   } else {
-    if (cmaptype)		/* but you promised a cmap! */
+    if (cmaptype)               /* but you promised a cmap! */
       ERREXIT(cinfo, JERR_TGA_BADPARMS);
     source->colormap = NULL;
   }
@@ -488,8 +488,8 @@
   /* Create module interface object */
   source = (tga_source_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(tga_source_struct));
-  source->cinfo = cinfo;	/* make back link for subroutines */
+                                  SIZEOF(tga_source_struct));
+  source->cinfo = cinfo;        /* make back link for subroutines */
   /* Fill in method ptrs, except get_pixel_rows which start_input sets */
   source->pub.start_input = start_input_tga;
   source->pub.finish_input = finish_input_tga;
diff --git a/release/ReadMe.rtf b/release/ReadMe.rtf
index 06c94ab..2ae4878 100644
--- a/release/ReadMe.rtf
+++ b/release/ReadMe.rtf
@@ -1,13 +1,13 @@
 {\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf360
 {\fonttbl\f0\fswiss\fcharset0 Helvetica;}
 {\colortbl;\red255\green255\blue255;}
-\margl1440\margr1440\vieww15200\viewh9600\viewkind0
+\margl1440\margr1440\vieww26380\viewh15840\viewkind0
 \deftab720
 \pard\pardeftab720\ql\qnatural
 
-\f0\fs24 \cf0 libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX, SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as fast as the unmodified version of libjpeg, all else being equal.\
+\f0\fs24 \cf0 libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as fast as libjpeg, all else being equal.  On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines.  In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.\
 \
-libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but the TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, including improved support for Mac OS X, 64-bit support, support for 32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman encoding/decoding, and various bug fixes.  The goal was to produce a fully open-source codec that could replace the partially closed-source TurboJPEG/IPP codec used by VirtualGL and TurboVNC.  libjpeg-turbo generally achieves 80-120% of the performance of TurboJPEG/IPP.  It is faster in some areas but slower in others.\
+libjpeg-turbo implements both the traditional libjpeg API as well as the less powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features colorspace extensions that allow it to compress from/decompress to 32-bit and big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java interface.\
 \
-In early 2010, libjpeg-turbo spun off into its own independent project, with the goal of making high-speed JPEG compression/decompression technology available to a broader range of users and developers.\
+libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, and in early 2010, libjpeg-turbo spun off into an independent project, with the goal of making high-speed JPEG compression/decompression technology available to a broader range of users and developers.\
 }
\ No newline at end of file
diff --git a/release/Welcome.rtf b/release/Welcome.rtf
index 9082f65..a570c5b 100755
--- a/release/Welcome.rtf
+++ b/release/Welcome.rtf
@@ -1,4 +1,4 @@
-{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf350
+{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf360
 {\fonttbl\f0\fswiss\fcharset0 Helvetica;\f1\fmodern\fcharset0 CourierNewPSMT;}
 {\colortbl;\red255\green255\blue255;}
 \margl1440\margr1440\vieww9000\viewh8400\viewkind0
@@ -14,6 +14,4 @@
 
 \f0 \cf0 \
 from the command line.\
-\
-If TurboJPEG/IPP is currently installed, then it must be removed prior to installing libjpeg-turbo.\
 }
\ No newline at end of file
diff --git a/release/copyright b/release/copyright
deleted file mode 100755
index 125388d..0000000
--- a/release/copyright
+++ /dev/null
@@ -1 +0,0 @@
-libjpeg-turbo is released under a BSD-style license (see README and README-turbo.txt)
diff --git a/release/deb-control.tmpl b/release/deb-control.tmpl
index 9cf7c46..510b1d6 100644
--- a/release/deb-control.tmpl
+++ b/release/deb-control.tmpl
@@ -4,23 +4,28 @@
 Priority: optional
 Architecture: {__ARCH}
 Essential: no
-Maintainer: The libjpeg-turbo Project [http://www.libjpeg-turbo.org]
+Maintainer: The libjpeg-turbo Project <information@libjpeg-turbo.org>
+Homepage: http://www.libjpeg-turbo.org
+Installed-Size: {__SIZE}
 Description: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
- libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX,
- SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86,
+ libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
+ NEON) to accelerate baseline JPEG compression and decompression on x86,
  x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as
- fast as the unmodified version of libjpeg, all else being equal.
+ fast as libjpeg, all else being equal.  On other types of systems,
+ libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue
+ of its highly-optimized Huffman coding routines.  In many cases, the
+ performance of libjpeg-turbo rivals that of proprietary high-speed JPEG
+ codecs.
  .
- libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but
- the TigerVNC and VirtualGL projects made numerous enhancements to the codec
- in 2009, including improved support for Mac OS X, 64-bit support, support
- for 32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated
- Huffman encoding/decoding, and various bug fixes.  The goal was to produce a
- fully open-source codec that could replace the partially closed-source
- TurboJPEG/IPP codec used by VirtualGL and TurboVNC.  libjpeg-turbo generally
- achieves 80-120% of the performance of TurboJPEG/IPP.  It is faster in some
- areas but slower in others.
+ libjpeg-turbo implements both the traditional libjpeg API as well as the less
+ powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features
+ colorspace extensions that allow it to compress from/decompress to 32-bit and
+ big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java
+ interface.
  .
- In early 2010, libjpeg-turbo spun off into its own independent project, with
- the goal of making high-speed JPEG compression/decompression technology
- available to a broader range of users and developers.
+ libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated
+ derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and
+ VirtualGL projects made numerous enhancements to the codec in 2009, and in
+ early 2010, libjpeg-turbo spun off into an independent project, with the goal
+ of making high-speed JPEG compression/decompression technology available to a
+ broader range of users and developers.
diff --git a/release/libjpeg-turbo.nsi.in b/release/libjpeg-turbo.nsi.in
index f4643fd..4f65303 100755
--- a/release/libjpeg-turbo.nsi.in
+++ b/release/libjpeg-turbo.nsi.in
@@ -1,7 +1,7 @@
 !include x64.nsh
 Name "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@"
 OutFile "@CMAKE_BINARY_DIR@\${BUILDDIR}@INST_NAME@.exe"
-InstallDir c:\@INST_DIR@
+InstallDir "@INST_DIR@"
 
 SetCompressor bzip2
 
@@ -41,6 +41,11 @@
 !endif
 	SetOutPath $INSTDIR\bin
 !ifdef GCC
+	File "@CMAKE_BINARY_DIR@\libturbojpeg.dll"
+!else
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg.dll"
+!endif
+!ifdef GCC
 	File "/oname=libjpeg-@DLL_VERSION@.dll" "@CMAKE_BINARY_DIR@\sharedlib\libjpeg-*.dll" 
 !else
 	File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}jpeg@DLL_VERSION@.dll"
@@ -82,12 +87,12 @@
 	File "@CMAKE_SOURCE_DIR@\usage.txt"
 	File "@CMAKE_SOURCE_DIR@\wizard.txt"
 
-	WriteRegStr HKLM "SOFTWARE\@INST_DIR@ @VERSION@" "Install_Dir" "$INSTDIR"
+	WriteRegStr HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@" "Install_Dir" "$INSTDIR"
 
-	WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_DIR@ @VERSION@" "DisplayName" "@CMAKE_PROJECT_NAME@ SDK v@VERSION@ for @INST_PLATFORM@"
-	WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_DIR@ @VERSION@" "UninstallString" '"$INSTDIR\uninstall_@VERSION@.exe"'
-	WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_DIR@ @VERSION@" "NoModify" 1
-	WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_DIR@ @VERSION@" "NoRepair" 1
+	WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "DisplayName" "@CMAKE_PROJECT_NAME@ SDK v@VERSION@ for @INST_PLATFORM@"
+	WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "UninstallString" '"$INSTDIR\uninstall_@VERSION@.exe"'
+	WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoModify" 1
+	WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoRepair" 1
 	WriteUninstaller "uninstall_@VERSION@.exe"
 SectionEnd
 
@@ -100,19 +105,21 @@
 
 	SetShellVarContext all
 
-	DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_DIR@ @VERSION@"
-	DeleteRegKey HKLM "SOFTWARE\@INST_DIR@ @VERSION@"
+	DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@"
+	DeleteRegKey HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@"
 
 !ifdef GCC
 	Delete $INSTDIR\bin\libjpeg-@DLL_VERSION@.dll
 	Delete $SYSDIR\libturbojpeg.dll
-	Delete $INSTDIR\lib\libturbojpeg.dll.a"
-	Delete $INSTDIR\lib\libturbojpeg.a"
-	Delete $INSTDIR\lib\libjpeg.dll.a"
-	Delete $INSTDIR\lib\libjpeg.a"
+	Delete $INSTDIR\bin\libturbojpeg.dll
+	Delete $INSTDIR\lib\libturbojpeg.dll.a
+	Delete $INSTDIR\lib\libturbojpeg.a
+	Delete $INSTDIR\lib\libjpeg.dll.a
+	Delete $INSTDIR\lib\libjpeg.a
 !else
 	Delete $INSTDIR\bin\jpeg@DLL_VERSION@.dll
 	Delete $SYSDIR\turbojpeg.dll
+	Delete $INSTDIR\bin\turbojpeg.dll
 	Delete $INSTDIR\lib\jpeg.lib
 	Delete $INSTDIR\lib\jpeg-static.lib
 	Delete $INSTDIR\lib\turbojpeg.lib
@@ -127,11 +134,11 @@
 	Delete $INSTDIR\bin\tjbench.exe
 	Delete $INSTDIR\bin\rdjpgcom.exe
 	Delete $INSTDIR\bin\wrjpgcom.exe
-	Delete $INSTDIR\include\jconfig.h"
-	Delete $INSTDIR\include\jerror.h"
-	Delete $INSTDIR\include\jmorecfg.h"
-	Delete $INSTDIR\include\jpeglib.h"
-	Delete $INSTDIR\include\turbojpeg.h"
+	Delete $INSTDIR\include\jconfig.h
+	Delete $INSTDIR\include\jerror.h
+	Delete $INSTDIR\include\jmorecfg.h
+	Delete $INSTDIR\include\jpeglib.h
+	Delete $INSTDIR\include\turbojpeg.h
 	Delete $INSTDIR\uninstall_@VERSION@.exe
 	Delete $INSTDIR\doc\README
 	Delete $INSTDIR\doc\README-turbo.txt
diff --git a/release/libjpeg-turbo.spec.in b/release/libjpeg-turbo.spec.in
index 9e0ff88..207d638 100644
--- a/release/libjpeg-turbo.spec.in
+++ b/release/libjpeg-turbo.spec.in
@@ -1,11 +1,32 @@
+# Path under which libjpeg-turbo should be installed
+%define _prefix %{__prefix}
+
+# Path under which executables should be installed
+%define _bindir %{__bindir}
+
+# Path under which Java classes and man pages should be installed
+%define _datadir %{__datadir}
+
+# Path under which docs should be installed
+%define _docdir /usr/share/doc/%{name}-%{version}
+
+# Path under which headers should be installed
+%define _includedir %{__includedir}
+
+# _libdir is set to %{_prefix}/%{_lib} by default
 %ifarch x86_64
-%define __lib lib64
+%define _lib lib64
 %else
-%define __lib lib
+%if "%{_prefix}" == "/opt/libjpeg-turbo"
+%define _lib lib32
+%endif
 %endif
 
+# Path under which man pages should be installed
+%define _mandir %{__mandir}
+
 Summary: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
-Name: @PACKAGE_NAME@
+Name: @PKGNAME@
 Version: @VERSION@
 Vendor: The libjpeg-turbo Project
 URL: http://www.libjpeg-turbo.org
@@ -15,51 +36,66 @@
 License: BSD-style
 BuildRoot: %{_blddir}/%{name}-buildroot-%{version}-%{release}
 Prereq: /sbin/ldconfig
-Provides: %{name} = %{version}-%{release}, turbojpeg = 2.00
-Obsoletes: turbojpeg
+%ifarch x86_64
+Provides: %{name} = %{version}-%{release}, @PACKAGE_NAME@ = %{version}-%{release}, libturbojpeg.so()(64bit)
+%else
+Provides: %{name} = %{version}-%{release}, @PACKAGE_NAME@ = %{version}-%{release}, libturbojpeg.so
+%endif
 
 %description
-libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX,
-SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86,
-x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as
-fast as the unmodified version of libjpeg, all else being equal.
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
+NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64,
+and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as fast as
+libjpeg, all else being equal.  On other types of systems, libjpeg-turbo can
+still outperform libjpeg by a significant amount, by virtue of its
+highly-optimized Huffman coding routines.  In many cases, the performance of
+libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
 
-libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but
-the TigerVNC and VirtualGL projects made numerous enhancements to the codec in
-2009, including improved support for Mac OS X, 64-bit support, support for
-32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman
-encoding/decoding, and various bug fixes.  The goal was to produce a fully
-open-source codec that could replace the partially closed-source TurboJPEG/IPP
-codec used by VirtualGL and TurboVNC.  libjpeg-turbo generally achieves 80-120%
-of the performance of TurboJPEG/IPP.  It is faster in some areas but slower in
-others.
+libjpeg-turbo implements both the traditional libjpeg API as well as the less
+powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features
+colorspace extensions that allow it to compress from/decompress to 32-bit and
+big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java
+interface.
 
-In early 2010, libjpeg-turbo spun off into its own independent project, with
-the goal of making high-speed JPEG compression/decompression technology
-available to a broader range of users and developers.
+libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated
+derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and
+VirtualGL projects made numerous enhancements to the codec in 2009, and in
+early 2010, libjpeg-turbo spun off into an independent project, with the goal
+of making high-speed JPEG compression/decompression technology available to a
+broader range of users and developers.
 
 #-->%prep
-#-->%setup -q
+#-->%setup -q -n libjpeg-turbo-%{version}
 
 #-->%build
-#-->./configure libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man JPEG_LIB_VERSION=@JPEG_LIB_VERSION@ SO_MAJOR_VERSION=@SO_MAJOR_VERSION@ SO_MINOR_VERSION=@SO_MINOR_VERSION@ --with-pic @RPM_CONFIG_ARGS@
-#-->make DESTDIR=$RPM_BUILD_ROOT libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man
+#-->./configure prefix=%{_prefix} bindir=%{_bindir} datadir=%{_datadir} \
+#-->	docdir=%{_docdir} includedir=%{_includedir} libdir=%{_libdir} \
+#-->	mandir=%{_mandir} JPEG_LIB_VERSION=@JPEG_LIB_VERSION@ \
+#-->	SO_MAJOR_VERSION=@SO_MAJOR_VERSION@ SO_MINOR_VERSION=@SO_MINOR_VERSION@ \
+#-->	--with-pic @RPM_CONFIG_ARGS@
+#-->make DESTDIR=$RPM_BUILD_ROOT
 
 %install
 
 rm -rf $RPM_BUILD_ROOT
-make install DESTDIR=$RPM_BUILD_ROOT libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man docdir=/opt/%{name}/doc exampledir=/opt/%{name}/doc
-rm -f $RPM_BUILD_ROOT/opt/%{name}/%{__lib}/*.la
-mkdir -p $RPM_BUILD_ROOT/usr/%{__lib}
-mv $RPM_BUILD_ROOT/opt/%{name}/%{__lib}/libturbojpeg.* $RPM_BUILD_ROOT/usr/%{__lib}
-/sbin/ldconfig -n $RPM_BUILD_ROOT/opt/%{name}/%{__lib}
-/sbin/ldconfig -n $RPM_BUILD_ROOT/usr/%{__lib}
-mkdir -p $RPM_BUILD_ROOT/usr/include
-mv $RPM_BUILD_ROOT/opt/%{name}/include/turbojpeg.h $RPM_BUILD_ROOT/usr/include
-ln -fs /usr/include/turbojpeg.h $RPM_BUILD_ROOT/opt/%{name}/include/
-ln -fs /usr/%{__lib}/libturbojpeg.a $RPM_BUILD_ROOT/opt/%{name}/%{__lib}/
-mkdir -p $RPM_BUILD_ROOT%{_defaultdocdir}
-mv $RPM_BUILD_ROOT/opt/%{name}/doc $RPM_BUILD_ROOT%{_defaultdocdir}/%{name}-%{version}
+make install DESTDIR=$RPM_BUILD_ROOT docdir=%{_docdir} exampledir=%{_docdir}
+rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
+/sbin/ldconfig -n $RPM_BUILD_ROOT%{_libdir}
+
+#-->%if 0
+
+LJT_LIBDIR=%{__libdir}
+if [ ! "$LJT_LIBDIR" = "%{_libdir}" ]; then
+	echo ERROR: libjpeg-turbo must be configured with libdir=%{_prefix}/%{_lib} when generating an in-tree RPM for this architecture.
+	exit 1
+fi
+
+#-->%endif
+
+LJT_DOCDIR=%{__docdir}
+if [ "%{_prefix}" = "/opt/libjpeg-turbo" -a "$LJT_DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+	ln -fs %{_docdir} $RPM_BUILD_ROOT/$LJT_DOCDIR
+fi
 
 %post -p /sbin/ldconfig
 
@@ -70,42 +106,44 @@
 
 %files
 %defattr(-,root,root)
-%dir %{_defaultdocdir}/%{name}-%{version}
-%doc %{_defaultdocdir}/%{name}-%{version}/*
-%dir /opt/%{name}
-%dir /opt/%{name}/bin
-/opt/%{name}/bin/cjpeg
-/opt/%{name}/bin/djpeg
-/opt/%{name}/bin/jpegtran
-/opt/%{name}/bin/tjbench
-/opt/%{name}/bin/rdjpgcom
-/opt/%{name}/bin/wrjpgcom
-%dir /opt/%{name}/%{__lib}
-%ifarch x86_64
-%else
-/opt/%{name}/lib32
+%dir %{_docdir}
+%doc %{_docdir}/*
+%dir %{_prefix}
+%if "%{_prefix}" == "/opt/libjpeg-turbo" && "%{_docdir}" != "%{_prefix}/doc"
+ %{_prefix}/doc
 %endif
-/opt/%{name}/%{__lib}/libjpeg.so.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@
-/opt/%{name}/%{__lib}/libjpeg.so.@SO_MAJOR_VERSION@
-/opt/%{name}/%{__lib}/libjpeg.so
-/opt/%{name}/%{__lib}/libjpeg.a
-/opt/%{name}/%{__lib}/libturbojpeg.a
-/usr/%{__lib}/libturbojpeg.so
-/usr/%{__lib}/libturbojpeg.a
-/usr/include/turbojpeg.h
-%dir /opt/%{name}/include
-/opt/%{name}/include/jconfig.h
-/opt/%{name}/include/jerror.h
-/opt/%{name}/include/jmorecfg.h
-/opt/%{name}/include/jpeglib.h
-/opt/%{name}/include/turbojpeg.h
-%dir /opt/%{name}/man
-%dir /opt/%{name}/man/man1
-/opt/%{name}/man/man1/cjpeg.1*
-/opt/%{name}/man/man1/djpeg.1*
-/opt/%{name}/man/man1/jpegtran.1*
-/opt/%{name}/man/man1/rdjpgcom.1*
-/opt/%{name}/man/man1/wrjpgcom.1*
+%dir %{_bindir}
+%{_bindir}/cjpeg
+%{_bindir}/djpeg
+%{_bindir}/jpegtran
+%{_bindir}/tjbench
+%{_bindir}/rdjpgcom
+%{_bindir}/wrjpgcom
+%dir %{_libdir}
+%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@
+%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@
+%{_libdir}/libjpeg.so
+%{_libdir}/libjpeg.a
+%{_libdir}/libturbojpeg.so.0.0.0
+%{_libdir}/libturbojpeg.so.0
+%{_libdir}/libturbojpeg.so
+%{_libdir}/libturbojpeg.a
+%dir %{_includedir}
+%{_includedir}/jconfig.h
+%{_includedir}/jerror.h
+%{_includedir}/jmorecfg.h
+%{_includedir}/jpeglib.h
+%{_includedir}/turbojpeg.h
+%dir %{_mandir}
+%dir %{_mandir}/man1
+%{_mandir}/man1/cjpeg.1*
+%{_mandir}/man1/djpeg.1*
+%{_mandir}/man1/jpegtran.1*
+%{_mandir}/man1/rdjpgcom.1*
+%{_mandir}/man1/wrjpgcom.1*
+%if "%{_prefix}" != "%{_datadir}"
+ %dir %{_datadir}
+%endif
 @JAVA_RPM_CONTENTS_1@
 @JAVA_RPM_CONTENTS_2@
 
diff --git a/release/makecygwinpkg.in b/release/makecygwinpkg.in
index 32cecdd..f303546 100755
--- a/release/makecygwinpkg.in
+++ b/release/makecygwinpkg.in
@@ -15,21 +15,27 @@
 	fi
 }
 
-PACKAGE_NAME=@PACKAGE_NAME@
+PACKAGE_NAME=@PKGNAME@
 VERSION=@VERSION@
+BUILD=@BUILD@
 SRCDIR=@abs_top_srcdir@
 
+PREFIX=%{__prefix}
+DOCDIR=%{__docdir}
+LIBDIR=%{__libdir}
+
 umask 022
-rm -f $PACKAGE_NAME-$VERSION-cygwin.tar.bz2
+rm -f $PACKAGE_NAME-$VERSION-$BUILD.tar.bz2
 TMPDIR=`mktemp -d /tmp/ljtbuild.XXXXXX`
 __PWD=`pwd`
-make install DESTDIR=$TMPDIR/pkg mandir=/opt/$PACKAGE_NAME/man \
-	docdir=/usr/share/doc/$PACKAGE_NAME-$VERSION \
+make install DESTDIR=$TMPDIR/pkg docdir=/usr/share/doc/$PACKAGE_NAME-$VERSION \
 	exampledir=/usr/share/doc/$PACKAGE_NAME-$VERSION
-rm $TMPDIR/pkg/opt/$PACKAGE_NAME/lib/*.la
-ln -fs lib $TMPDIR/pkg/opt/$PACKAGE_NAME/lib32
+rm $TMPDIR/pkg$LIBDIR/*.la
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+	ln -fs /usr/share/doc/$PACKAGE_NAME-$VERSION $TMPDIR/pkg$DOCDIR
+fi
 cd $TMPDIR/pkg
-tar cfj ../$PACKAGE_NAME-$VERSION-cygwin.tar.bz2 *
+tar cfj ../$PACKAGE_NAME-$VERSION-$BUILD.tar.bz2 *
 cd $__PWD
 mv $TMPDIR/*.tar.bz2 .
 
diff --git a/release/makedpkg.in b/release/makedpkg.in
index 628e0a6..80cc89b 100644
--- a/release/makedpkg.in
+++ b/release/makedpkg.in
@@ -7,14 +7,20 @@
 trap onexit EXIT
 
 TMPDIR=
+SUDO=
 
 onexit()
 {
 	if [ ! "$TMPDIR" = "" ]; then
-		sudo rm -rf $TMPDIR
+		$SUDO rm -rf $TMPDIR
 	fi
 }
 
+uid()
+{
+	id | cut -f2 -d = | cut -f1 -d \(;
+}
+
 makedeb()
 {
 	SUPPLEMENT=$1
@@ -23,65 +29,52 @@
 	if [ $SUPPLEMENT = 1 ]; then
 		PACKAGE_NAME=$PACKAGE_NAME\32
 		DEBARCH=amd64
-		__LIB=lib32
-	else
-		__LIB=lib
 	fi
 
 	umask 022
 	rm -f $PACKAGE_NAME\_$VERSION\_$DEBARCH.deb
 	TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX`
 	mkdir $TMPDIR/DEBIAN
-	(cat $SRCDIR/release/deb-control.tmpl | sed s/{__PKGNAME}/$PACKAGE_NAME/g \
-		| sed s/{__VERSION}/$VERSION/g | sed s/{__BUILD}/$BUILD/g \
-		| sed s/{__ARCH}/$DEBARCH/g > $TMPDIR/DEBIAN/control)
-
-	make install prefix=$TMPDIR/opt/$DIRNAME libdir=$TMPDIR/opt/$DIRNAME/$__LIB \
-		mandir=$TMPDIR/opt/$DIRNAME/man \
-		docdir=$TMPDIR/usr/share/doc/$DIRNAME-$VERSION \
-		exampledir=$TMPDIR/usr/share/doc/$DIRNAME-$VERSION
-	rm -f $TMPDIR/opt/$DIRNAME/$__LIB/*.la
 
 	if [ $SUPPLEMENT = 1 ]; then
-		rm -rf $TMPDIR/opt/$DIRNAME/include
-		rm -rf $TMPDIR/opt/$DIRNAME/man
-		rm -rf $TMPDIR/opt/$DIRNAME/bin
-		rm -rf $TMPDIR/opt/$DIRNAME/classes
-		rm -rf $TMPDIR/usr
-	fi
-
-	mkdir -p $TMPDIR/usr/$__LIB
-	mv $TMPDIR/opt/$DIRNAME/$__LIB/libturbojpeg.* $TMPDIR/usr/$__LIB
-	ln -fs /usr/$__LIB/libturbojpeg.a $TMPDIR/opt/$DIRNAME/$__LIB/
-	/sbin/ldconfig -n $TMPDIR/opt/$DIRNAME/$__LIB
-	/sbin/ldconfig -n $TMPDIR/usr/$__LIB
-
-	if [ ! $SUPPLEMENT = 1 ]; then
-		if [ "$DEBARCH" = "amd64" ]; then
-			ln -fs lib $TMPDIR/opt/$DIRNAME/lib64
-		else
-			if [ ! "$__LIB" = "lib32" ]; then
-				ln -fs lib $TMPDIR/opt/$DIRNAME/lib32
-			fi
-		fi
-		mkdir -p $TMPDIR/usr/include
-		mv $TMPDIR/opt/$DIRNAME/include/turbojpeg.h $TMPDIR/usr/include
-		ln -fs /usr/include/turbojpeg.h $TMPDIR/opt/$DIRNAME/include/
+		make install DESTDIR=$TMPDIR bindir=/dummy/bin datadir=/dummy/data \
+			docdir=/dummy/doc includedir=/dummy/include mandir=/dummy/man
+		rm -f $TMPDIR$LIBDIR/*.la
+		rm -rf $TMPDIR/dummy
 	else
-		mkdir -p $TMPDIR/usr/lib/i386-linux-gnu
-		ln -fs /usr/lib32/libturbojpeg.so $TMPDIR/usr/lib/i386-linux-gnu/libturbojpeg.so
-		ln -fs /usr/lib32/libturbojpeg.a $TMPDIR/usr/lib/i386-linux-gnu/libturbojpeg.a
+		make install DESTDIR=$TMPDIR docdir=/usr/share/doc/$DIRNAME-$VERSION \
+			exampledir=/usr/share/doc/$DIRNAME-$VERSION
+		rm -f $TMPDIR$LIBDIR/*.la
+		if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+			ln -fs /usr/share/doc/$DIRNAME-$VERSION $TMPDIR$DOCDIR
+		fi
 	fi
 
-	sudo chown -Rh root:root $TMPDIR/*
+	SIZE=`du -s $TMPDIR | cut -f1`
+	(cat $SRCDIR/release/deb-control.tmpl | sed s/{__PKGNAME}/$PACKAGE_NAME/g \
+		| sed s/{__VERSION}/$VERSION/g | sed s/{__BUILD}/$BUILD/g \
+		| sed s/{__ARCH}/$DEBARCH/g | sed s/{__SIZE}/$SIZE/g \
+		> $TMPDIR/DEBIAN/control)
+
+
+	/sbin/ldconfig -n $TMPDIR$LIBDIR
+
+	$SUDO chown -Rh root:root $TMPDIR/*
 	dpkg -b $TMPDIR $PACKAGE_NAME\_$VERSION\_$DEBARCH.deb
 }
 
-PACKAGE_NAME=@PACKAGE_NAME@
+PACKAGE_NAME=@PKGNAME@
 VERSION=@VERSION@
 BUILD=@BUILD@
 DEBARCH=@DEBARCH@
 SRCDIR=@abs_top_srcdir@
+PREFIX=%{__prefix}
+DOCDIR=%{__docdir}
+LIBDIR=%{__libdir}
+
+if [ ! `uid` -eq 0 ]; then
+	SUDO=sudo
+fi
 
 makedeb 0
 if [ "$DEBARCH" = "i386" ]; then makedeb 1; fi
diff --git a/release/makemacpkg.in b/release/makemacpkg.in
index 7b43aa3..65abdef 100644
--- a/release/makemacpkg.in
+++ b/release/makemacpkg.in
@@ -17,11 +17,11 @@
 
 usage()
 {
-	echo "$0 [-build32 [32-bit build dir]] [-buildarmv6 [ARM v6 build dir]] [-buildarmv7 [ARM v7 build dir]]"
+	echo "$0 [-build32 [32-bit build dir]] [-buildarmv6 [ARM v6 build dir]] [-buildarmv7 [ARM v7 build dir]] [-buildarmv7s [ARM v7s build dir]]"
 	exit 1
 }
 
-PACKAGE_NAME=@PACKAGE_NAME@
+PACKAGE_NAME=@PKGNAME@
 VERSION=@VERSION@
 BUILD=@BUILD@
 SRCDIR=@abs_top_srcdir@
@@ -31,7 +31,15 @@
 BUILDARMV6=0
 BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7
 BUILDARMV7=0
+BUILDDIRARMV7S=@abs_top_srcdir@/iosarmv7s
+BUILDARMV7S=0
 WITH_JAVA=@WITH_JAVA@
+
+PREFIX=%{__prefix}
+BINDIR=%{__bindir}
+DOCDIR=%{__docdir}
+LIBDIR=%{__libdir}
+
 while [ $# -gt 0 ]; do
 	case $1 in
 	-h*)             usage 0                   ;;
@@ -59,6 +67,14 @@
 			fi
 		fi
 		;;
+	-buildarmv7s)
+		BUILDARMV7S=1
+		if [ $# -gt 1 ]; then
+			if [[ ! "$2" =~ -.* ]]; then
+				BUILDDIRARMV7S=$2;  shift
+			fi
+		fi
+		;;
 	esac
 	shift
 done
@@ -72,15 +88,13 @@
 TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX`
 PKGROOT=$TMPDIR/pkg/Package_Root
 mkdir -p $PKGROOT
-mkdir -p $PKGROOT/opt/$PACKAGE_NAME/bin
-make install DESTDIR=$PKGROOT mandir=/opt/$PACKAGE_NAME/man \
-	docdir=/Library/Documentation/$PACKAGE_NAME \
+make install DESTDIR=$PKGROOT docdir=/Library/Documentation/$PACKAGE_NAME \
 	exampledir=/Library/Documentation/$PACKAGE_NAME
-rm -f $PKGROOT/opt/$PACKAGE_NAME/lib/*.la
-mkdir -p $PKGROOT/usr/lib
-mv $PKGROOT/opt/$PACKAGE_NAME/lib/libturbojpeg.* $PKGROOT/usr/lib
-mkdir -p $PKGROOT/usr/include
-mv $PKGROOT/opt/$PACKAGE_NAME/include/turbojpeg.h $PKGROOT/usr/include
+rm -f $PKGROOT$LIBDIR/*.la
+
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+	ln -fs /Library/Documentation/$PACKAGE_NAME $PKGROOT$DOCDIR
+fi
 
 if [ $BUILD32 = 1 ]; then
 	if [ ! -d $BUILDDIR32 ]; then
@@ -95,55 +109,55 @@
 	pushd $BUILDDIR32
 	make install DESTDIR=$TMPDIR/dist.x86
 	popd
-	if [ ! -h $TMPDIR/dist.x86/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
-		! -h $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
+	if [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
+		! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
 		lipo -create \
-			-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib \
-			-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib \
-			-output $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib
-	elif [ ! -h $TMPDIR/dist.x86/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
-		! -h $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
+			-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
+			-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
+			-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
+	elif [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
+		! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
 		lipo -create \
-			-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
-			-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
-			-output $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
+			-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
+			-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
+			-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
 	fi
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/lib/libjpeg.a \
-		-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a \
-		-output $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a
+		-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.a \
+		-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/lib/libturbojpeg.dylib \
-		-arch x86_64 $PKGROOT/usr/lib/libturbojpeg.dylib \
-		-output $PKGROOT/usr/lib/libturbojpeg.dylib
+		-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.0.dylib \
+		-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/lib/libturbojpeg.a \
-		-arch x86_64 $PKGROOT/usr/lib/libturbojpeg.a \
-		-output $PKGROOT/usr/lib/libturbojpeg.a
+		-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.a \
+		-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/bin/cjpeg \
-		-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/bin/cjpeg \
-		-output $PKGROOT/opt/$PACKAGE_NAME/bin/cjpeg
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/cjpeg \
+		-arch x86_64 $PKGROOT/$BINDIR/cjpeg \
+		-output $PKGROOT/$BINDIR/cjpeg
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/bin/djpeg \
-		-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/bin/djpeg \
-		-output $PKGROOT/opt/$PACKAGE_NAME/bin/djpeg
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/djpeg \
+		-arch x86_64 $PKGROOT/$BINDIR/djpeg \
+		-output $PKGROOT/$BINDIR/djpeg
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/bin/jpegtran \
-		-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/bin/jpegtran \
-		-output $PKGROOT/opt/$PACKAGE_NAME/bin/jpegtran
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/jpegtran \
+		-arch x86_64 $PKGROOT/$BINDIR/jpegtran \
+		-output $PKGROOT/$BINDIR/jpegtran
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/bin/tjbench \
-		-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/bin/tjbench \
-		-output $PKGROOT/opt/$PACKAGE_NAME/bin/tjbench
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/tjbench \
+		-arch x86_64 $PKGROOT/$BINDIR/tjbench \
+		-output $PKGROOT/$BINDIR/tjbench
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/bin/rdjpgcom \
-		-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/bin/rdjpgcom \
-		-output $PKGROOT/opt/$PACKAGE_NAME/bin/rdjpgcom
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/rdjpgcom \
+		-arch x86_64 $PKGROOT/$BINDIR/rdjpgcom \
+		-output $PKGROOT/$BINDIR/rdjpgcom
 	lipo -create \
-		-arch i386 $TMPDIR/dist.x86/opt/$PACKAGE_NAME/bin/wrjpgcom \
-		-arch x86_64 $PKGROOT/opt/$PACKAGE_NAME/bin/wrjpgcom \
-		-output $PKGROOT/opt/$PACKAGE_NAME/bin/wrjpgcom
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/wrjpgcom \
+		-arch x86_64 $PKGROOT/$BINDIR/wrjpgcom \
+		-output $PKGROOT/$BINDIR/wrjpgcom
 
 fi
 
@@ -161,13 +175,13 @@
 	make install DESTDIR=$TMPDIR/dist.armv6
 	popd
 	lipo -create \
-		$PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a \
-		-arch arm $TMPDIR/dist.armv6/opt/$PACKAGE_NAME/lib/libjpeg.a \
-		-output $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a
+		$PKGROOT/$LIBDIR/libjpeg.a \
+		-arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
 	lipo -create \
-		$PKGROOT/usr/lib/libturbojpeg.a \
-		-arch arm $TMPDIR/dist.armv6/opt/$PACKAGE_NAME/lib/libturbojpeg.a \
-		-output $PKGROOT/usr/lib/libturbojpeg.a
+		$PKGROOT/$LIBDIR/libturbojpeg.a \
+		-arch arm $TMPDIR/dist.armv6/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
 fi
 
 if [ $BUILDARMV7 = 1 ]; then
@@ -184,28 +198,51 @@
 	make install DESTDIR=$TMPDIR/dist.armv7
 	popd
 	lipo -create \
-		$PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a \
-		-arch arm $TMPDIR/dist.armv7/opt/$PACKAGE_NAME/lib/libjpeg.a \
-		-output $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a
+		$PKGROOT/$LIBDIR/libjpeg.a \
+		-arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
 	lipo -create \
-		$PKGROOT/usr/lib/libturbojpeg.a \
-		-arch arm $TMPDIR/dist.armv7/opt/$PACKAGE_NAME/lib/libturbojpeg.a \
-		-output $PKGROOT/usr/lib/libturbojpeg.a
+		$PKGROOT/$LIBDIR/libturbojpeg.a \
+		-arch arm $TMPDIR/dist.armv7/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
 fi
 
-install_name_tool -id /opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib
-install_name_tool -id libturbojpeg.dylib $PKGROOT/usr/lib/libturbojpeg.dylib
+if [ $BUILDARMV7S = 1 ]; then
+	if [ ! -d $BUILDDIRARMV7S ]; then
+		echo ERROR: ARM v7s build directory $BUILDDIRARMV7S does not exist
+		exit 1
+	fi
+	if [ ! -f $BUILDDIRARMV7S/Makefile ]; then
+		echo ERROR: ARM v7s build directory $BUILDDIRARMV7S is not configured
+		exit 1
+	fi
+	mkdir -p $TMPDIR/dist.armv7s
+	pushd $BUILDDIRARMV7S
+	make install DESTDIR=$TMPDIR/dist.armv7s
+	popd
+	lipo -create \
+		$PKGROOT/$LIBDIR/libjpeg.a \
+		-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
+	lipo -create \
+		$PKGROOT/$LIBDIR/libturbojpeg.a \
+		-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
+fi
 
-ln -fs /usr/include/turbojpeg.h $PKGROOT/opt/$PACKAGE_NAME/include/
-ln -fs /usr/lib/libturbojpeg.a $PKGROOT/opt/$PACKAGE_NAME/lib/
+install_name_tool -id $LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
+install_name_tool -id $LIBDIR/libturbojpeg.0.dylib $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
+
 if [ $WITH_JAVA = 1 ]; then
-	ln -fs libturbojpeg.dylib $PKGROOT/usr/lib/libturbojpeg.jnilib
+	ln -fs libturbojpeg.0.dylib $PKGROOT/$LIBDIR/libturbojpeg.jnilib
 fi
-if [ ! -h $PKGROOT/opt/$PACKAGE_NAME/lib32 ]; then
-	ln -fs lib $PKGROOT/opt/$PACKAGE_NAME/lib32
-fi
-if [ ! -h $PKGROOT/opt/$PACKAGE_NAME/lib64 ]; then
-	ln -fs lib $PKGROOT/opt/$PACKAGE_NAME/lib64
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$LIBDIR" = "/opt/libjpeg-turbo/lib" ]; then
+	if [ ! -h $PKGROOT/$PREFIX/lib32 ]; then
+		ln -fs lib $PKGROOT/$PREFIX/lib32
+	fi
+	if [ ! -h $PKGROOT/$PREFIX/lib64 ]; then
+		ln -fs lib $PKGROOT/$PREFIX/lib64
+	fi
 fi
 
 chmod 1775 $PKGROOT/Library
@@ -214,10 +251,9 @@
 
 cp pkgscripts/Description.plist $TMPDIR/pkg/
 cp pkgscripts/Info.plist $TMPDIR/pkg/
-install -m 755 pkgscripts/uninstall $PKGROOT/opt/$PACKAGE_NAME/bin/
+install -m 755 pkgscripts/uninstall $PKGROOT/$BINDIR/
 
-sudo chown -R root:admin $PKGROOT 
-sudo chown -R root:0 $PKGROOT/usr 
+sudo chown -R root:admin $PKGROOT
 cp $SRCDIR/release/License.rtf $SRCDIR/release/Welcome.rtf $SRCDIR/release/ReadMe.rtf $TMPDIR/pkg/Resources/ 
 
 mkdir $TMPDIR/dmg
diff --git a/release/makesunpkg.in b/release/makesunpkg.in
deleted file mode 100644
index 7cf36a8..0000000
--- a/release/makesunpkg.in
+++ /dev/null
@@ -1,143 +0,0 @@
-#!/bin/sh
-
-set -u
-set -e
-trap onexit INT
-trap onexit TERM
-trap onexit EXIT
-
-TMPDIR=
-
-onexit()
-{
-	if [ ! "$TMPDIR" = "" ]; then
-		rm -rf $TMPDIR
-	fi
-}
-
-usage()
-{
-	echo "$0 [combined [32-bit build dir.]]"
-	exit 1
-}
-
-COMBINED=0
-
-PACKAGE_NAME=@PACKAGE_NAME@
-VERSION=@VERSION@
-BUILD=@BUILD@
-PKGARCH=@DEBARCH@
-SRCDIR=@abs_top_srcdir@
-BUILDDIR32=@abs_top_srcdir@/solx86
-WITH_JAVA=@WITH_JAVA@
-if [ $# -gt 0 ]; then
-	if [ "$1" = "combined" ]; then
-		COMBINED=1
-		if [ $# -gt 1 ]; then	BUILDDIR32=$2; fi
-	fi
-fi
-
-umask 022
-TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX`
-rm -f $PACKAGE_NAME.pkg.bz2
-cp $SRCDIR/release/copyright $TMPDIR
-touch $TMPDIR/depend
-cp pkgscripts/pkginfo $TMPDIR/pkginfo 
-
-if [ "$PKGARCH" = "i386" ]; then
-	__LIB=lib
-else
-	__LIB=lib/$PKGARCH
-fi
-
-if [ $COMBINED = 1 ]; then
-	if [ ! -d $BUILDDIR32 ]; then
-		echo ERROR: 32-bit build directory $BUILDDIR32 does not exist
-		exit 1
-	fi
-	if [ ! -f $BUILDDIR32/Makefile ]; then
-		echo ERROR: 32-bit build directory $BUILDDIR32 is not configured
-		exit 1
-	fi
-	PWD=`pwd`
-	cd $BUILDDIR32
-	make install DESTDIR=$TMPDIR mandir=/opt/$PACKAGE_NAME/man AM_MAKEFLAGS="mandir=/opt/$PACKAGE_NAME/man"
-	cd $PWD
-fi
-# This mess is to work around a bug in /usr/ccs/bin/make
-make install DESTDIR=$TMPDIR libdir=/opt/$PACKAGE_NAME/$__LIB \
-	mandir=/opt/$PACKAGE_NAME/man docdir=/opt/$PACKAGE_NAME/doc \
-	exampledir=/opt/$PACKAGE_NAME/doc \
-	AM_MAKEFLAGS="libdir=/opt/$PACKAGE_NAME/$__LIB mandir=/opt/$PACKAGE_NAME/man docdir=/opt/$PACKAGE_NAME/doc exampledir=/opt/$PACKAGE_NAME/doc"
-rm -f $TMPDIR/opt/$PACKAGE_NAME/$__LIB/*.la
-
-cat >$TMPDIR/proto <<EOF
-i copyright
-i depend
-i pkginfo
-d none $PACKAGE_NAME 0755 root bin
-d none $PACKAGE_NAME/bin 0755 root bin
-f none $PACKAGE_NAME/bin/cjpeg 0755 root bin
-f none $PACKAGE_NAME/bin/djpeg 0755 root bin
-f none $PACKAGE_NAME/bin/jpegtran 0755 root bin
-f none $PACKAGE_NAME/bin/tjbench 0755 root bin
-f none $PACKAGE_NAME/bin/rdjpgcom 0755 root bin
-f none $PACKAGE_NAME/bin/wrjpgcom 0755 root bin
-d none $PACKAGE_NAME/lib 0755 root bin
-EOF
-if [ $COMBINED = 1 ]; then
-cat >>$TMPDIR/proto <<EOF
-f none $PACKAGE_NAME/lib/libjpeg.so.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@ 0755 root bin
-s none $PACKAGE_NAME/lib/libjpeg.so.@SO_MAJOR_VERSION@=libjpeg.so.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@
-s none $PACKAGE_NAME/lib/libjpeg.so=libjpeg.so.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@
-f none $PACKAGE_NAME/lib/libjpeg.a 0644 root bin
-f none $PACKAGE_NAME/lib/libturbojpeg.so 0755 root bin
-f none $PACKAGE_NAME/lib/libturbojpeg.a 0644 root bin
-EOF
-fi
-if [ "${__LIB}" != "lib" ]; then
-echo d none $PACKAGE_NAME/${__LIB} 0755 root bin >>$TMPDIR/proto
-echo s none $PACKAGE_NAME/lib64=${__LIB} >>$TMPDIR/proto
-echo s none $PACKAGE_NAME/lib/64=$PKGARCH >>$TMPDIR/proto
-fi
-cat >>$TMPDIR/proto <<EOF
-f none $PACKAGE_NAME/${__LIB}/libjpeg.so.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@ 0755 root bin
-s none $PACKAGE_NAME/${__LIB}/libjpeg.so.@SO_MAJOR_VERSION@=libjpeg.so.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@
-s none $PACKAGE_NAME/${__LIB}/libjpeg.so=libjpeg.so.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@
-f none $PACKAGE_NAME/${__LIB}/libjpeg.a 0644 root bin
-f none $PACKAGE_NAME/${__LIB}/libturbojpeg.so 0755 root bin
-f none $PACKAGE_NAME/${__LIB}/libturbojpeg.a 0644 root bin
-s none $PACKAGE_NAME/lib32=lib
-d none $PACKAGE_NAME/man 0755 root bin
-d none $PACKAGE_NAME/man/man1 0755 root bin
-d none $PACKAGE_NAME/man/man1/cjpeg.1 0644 root bin
-d none $PACKAGE_NAME/man/man1/djpeg.1 0644 root bin
-d none $PACKAGE_NAME/man/man1/jpegtran.1 0644 root bin
-d none $PACKAGE_NAME/man/man1/rdjpgcom.1 0644 root bin
-d none $PACKAGE_NAME/man/man1/wrjpgcom.1 0644 root bin
-d none $PACKAGE_NAME/include 0755 root bin
-f none $PACKAGE_NAME/include/jconfig.h 0644 root bin
-f none $PACKAGE_NAME/include/jerror.h 0644 root bin
-f none $PACKAGE_NAME/include/jmorecfg.h 0644 root bin
-f none $PACKAGE_NAME/include/jpeglib.h 0644 root bin
-f none $PACKAGE_NAME/include/turbojpeg.h 0644 root bin
-d none $PACKAGE_NAME/doc 0755 root bin
-f none $PACKAGE_NAME/doc/README 0644 root bin
-f none $PACKAGE_NAME/doc/README-turbo.txt 0644 root bin
-f none $PACKAGE_NAME/doc/example.c 0644 root bin
-f none $PACKAGE_NAME/doc/libjpeg.txt 0644 root bin
-f none $PACKAGE_NAME/doc/structure.txt 0644 root bin
-f none $PACKAGE_NAME/doc/usage.txt 0644 root bin
-f none $PACKAGE_NAME/doc/wizard.txt 0644 root bin
-EOF
-if [ $WITH_JAVA = 1 ]; then
-echo d none $PACKAGE_NAME/classes 0755 root bin >>$TMPDIR/proto
-echo f none $PACKAGE_NAME/classes/turbojpeg.jar 0644 root bin >>$TMPDIR/proto
-fi
-
-pkgmk -o -r $TMPDIR/opt -d $TMPDIR -a i386 -f $TMPDIR/proto
-pkgtrans -s $TMPDIR $TMPDIR/$PACKAGE_NAME-$VERSION.pkg $PACKAGE_NAME
-bzip2 $TMPDIR/$PACKAGE_NAME-$VERSION.pkg
-cp $TMPDIR/$PACKAGE_NAME-$VERSION.pkg.bz2 . 
-
-exit
diff --git a/release/pkginfo.in b/release/pkginfo.in
deleted file mode 100644
index 772ad0f..0000000
--- a/release/pkginfo.in
+++ /dev/null
@@ -1,16 +0,0 @@
-ARCH=i386
-PKG=@PACKAGE_NAME@
-NAME=@PACKAGE_NAME@ SDK and run time libraries
-VERSION=@VERSION@,REV=@BUILD@
-SUNW_PKGVERS=1.0
-DESC=A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
-VENDOR=The libjpeg-turbo Project
-HOTLINE=http://www.libjpeg-turbo.org
-EMAIL=information@libjpeg-turbo.org
-MAXINST=1
-CATEGORY=application
-BASEDIR=/opt
-CLASSES=none
-SUNW_PKG_ALLZONES=true
-SUNW_PKG_HOLLOW=false
-SUNW_PKG_THISZONE=false
diff --git a/release/uninstall.in b/release/uninstall.in
index 2e4598c..f167bbd 100644
--- a/release/uninstall.in
+++ b/release/uninstall.in
@@ -1,4 +1,4 @@
-# Copyright (C)2009-2011 D. R. Commander.  All Rights Reserved.
+# Copyright (C)2009-2011, 2013 D. R. Commander.  All Rights Reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -31,7 +31,7 @@
 	exit -1
 fi
 
-PACKAGE=@PACKAGE_NAME@
+PACKAGE=@PKGNAME@
 MACPACKAGE=com.$PACKAGE.$PACKAGE
 RECEIPT=/Library/Receipts/$PACKAGE.pkg
 
@@ -56,27 +56,46 @@
 popd
 
 echo Removing package directories ...
-if [ -d /opt/$PACKAGE/bin ]; then
-	rmdir /opt/$PACKAGE/bin 2>&1 || EXITSTATUS=-1
+PREFIX=%{__prefix}
+BINDIR=%{__bindir}
+DATADIR=%{__datadir}
+INCLUDEDIR=%{__includedir}
+LIBDIR=%{__libdir}
+MANDIR=%{__mandir}
+
+if [ -d $BINDIR ]; then
+	rmdir $BINDIR 2>&1 || EXITSTATUS=-1
 fi
-if [ -d /opt/$PACKAGE/lib ]; then
-	rmdir /opt/$PACKAGE/lib 2>&1 || EXITSTATUS=-1
+if [ -d $LIBDIR ]; then
+	rmdir $LIBDIR 2>&1 || EXITSTATUS=-1
 fi
-if [ -d /opt/$PACKAGE/include ]; then
-	rmdir /opt/$PACKAGE/include 2>&1 || EXITSTATUS=-1
+if [ -d $INCLUDEDIR ]; then
+	rmdir $INCLUDEDIR 2>&1 || EXITSTATUS=-1
 fi
-rm /opt/$PACKAGE/lib32 2>&1 || EXITSTATUS=-1
-rm /opt/$PACKAGE/lib64 2>&1 || EXITSTATUS=-1
-if [ -d /opt/$PACKAGE/man/man1 ]; then
-	rmdir /opt/$PACKAGE/man/man1 2>&1 || EXITSTATUS=-1
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$LIBDIR" = "/opt/libjpeg-turbo/lib" ]; then
+	if [ -h $LIBDIR\32 ]; then
+		rm $LIBDIR\32 2>&1 || EXITSTATUS=-1
+	fi
+	if [ -h $LIBDIR\64 ]; then
+		rm $LIBDIR\64 2>&1 || EXITSTATUS=-1
+	fi
 fi
-if [ -d /opt/$PACKAGE/man ]; then
-	rmdir /opt/$PACKAGE/man 2>&1 || EXITSTATUS=-1
+if [ -d $MANDIR/man1 ]; then
+	rmdir $MANDIR/man1 2>&1 || EXITSTATUS=-1
 fi
-if [ -d /opt/$PACKAGE/classes ]; then
-	rmdir /opt/$PACKAGE/classes 2>&1 || EXITSTATUS=-1
+if [ -d $MANDIR ]; then
+	rmdir $MANDIR 2>&1 || EXITSTATUS=-1
 fi
-rmdir /opt/$PACKAGE 2>&1 || EXITSTATUS=-1
+if [ -d $DATADIR/classes ]; then
+	rmdir $DATADIR/classes 2>&1 || EXITSTATUS=-1
+fi
+if [ -d $DATADIR -a "$DATADIR" != "$PREFIX" ]; then
+	rmdir $DATADIR 2>&1 || EXITSTATUS=-1
+fi
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a -h "$PREFIX/doc" ]; then
+	rm $PREFIX/doc 2>&1 || EXITSTATUS=-1
+fi
+rmdir $PREFIX 2>&1 || EXITSTATUS=-1
 rmdir /Library/Documentation/$PACKAGE 2>&1 || EXITSTATUS=-1
 
 if [ -d $RECEIPT ]; then
diff --git a/sharedlib/CMakeLists.txt b/sharedlib/CMakeLists.txt
index 25ddbdf..cd3f268 100755
--- a/sharedlib/CMakeLists.txt
+++ b/sharedlib/CMakeLists.txt
@@ -24,8 +24,13 @@
   set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
 endif()
 
-add_library(jpeg SHARED ${JPEG_SRCS} ${SIMD_OBJS}
-  ${CMAKE_SOURCE_DIR}/win/jpeg${DLL_VERSION}.def)
+if(WITH_MEM_SRCDST AND NOT WITH_JPEG8)
+  add_library(jpeg SHARED ${JPEG_SRCS} ${SIMD_OBJS}
+    ${CMAKE_SOURCE_DIR}/win/jpeg${DLL_VERSION}-memsrcdst.def)
+else()
+  add_library(jpeg SHARED ${JPEG_SRCS} ${SIMD_OBJS}
+    ${CMAKE_SOURCE_DIR}/win/jpeg${DLL_VERSION}.def)
+endif()
 set_target_properties(jpeg PROPERTIES SOVERSION ${DLL_VERSION}
   VERSION ${FULLVERSION})
 if(MSVC)
diff --git a/simd/Makefile.am b/simd/Makefile.am
index a12ff6e..a983432 100644
--- a/simd/Makefile.am
+++ b/simd/Makefile.am
@@ -58,7 +58,7 @@
 
 endif
 
-AM_CPPFLAGS = -I$(top_srcdir) 
+AM_CPPFLAGS = -I$(top_srcdir)
 
 .asm.lo:
 	$(LIBTOOL) --mode=compile --tag NASM $(srcdir)/nasm_lt.sh $(NASM) $(NAFLAGS) -I$(srcdir) -I. $< -o $@
diff --git a/simd/jcclrmmx.asm b/simd/jcclrmmx.asm
index e095253..f34104f 100644
--- a/simd/jcclrmmx.asm
+++ b/simd/jcclrmmx.asm
@@ -28,450 +28,450 @@
 ;                           JDIMENSION output_row, int num_rows);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION img_width
-%define input_buf(b)	(b)+12		; JSAMPARRAY input_buf
-%define output_buf(b)	(b)+16		; JSAMPIMAGE output_buf
-%define output_row(b)	(b)+20		; JDIMENSION output_row
-%define num_rows(b)	(b)+24		; int num_rows
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		8
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          8
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_rgb_ycc_convert_mmx)
+        align   16
+        global  EXTN(jsimd_rgb_ycc_convert_mmx)
 
 EXTN(jsimd_rgb_ycc_convert_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [img_width(eax)]	; num_cols
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [img_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	esi, JSAMPIMAGE [output_buf(eax)]
-	mov	ecx, JDIMENSION [output_row(eax)]
-	mov	edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
-	lea	edi, [edi+ecx*SIZEOF_JSAMPROW]
-	lea	ebx, [ebx+ecx*SIZEOF_JSAMPROW]
-	lea	edx, [edx+ecx*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
 
-	pop	ecx
+        pop     ecx
 
-	mov	esi, JSAMPARRAY [input_buf(eax)]
-	mov	eax, INT [num_rows(eax)]
-	test	eax,eax
-	jle	near .return
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
 .rowloop:
-	pushpic	eax
-	push	edx
-	push	ebx
-	push	edi
-	push	esi
-	push	ecx			; col
+        pushpic eax
+        push    edx
+        push    ebx
+        push    edi
+        push    esi
+        push    ecx                     ; col
 
-	mov	esi, JSAMPROW [esi]	; inptr
-	mov	edi, JSAMPROW [edi]	; outptr0
-	mov	ebx, JSAMPROW [ebx]	; outptr1
-	mov	edx, JSAMPROW [edx]	; outptr2
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        mov     ebx, JSAMPROW [ebx]     ; outptr1
+        mov     edx, JSAMPROW [edx]     ; outptr2
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
 
-	cmp	ecx, byte SIZEOF_MMWORD
-	jae	short .columnloop
-	alignx	16,7
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     short .columnloop
+        alignx  16,7
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
 .column_ld1:
-	push	eax
-	push	edx
-	lea	ecx,[ecx+ecx*2]		; imul ecx,RGB_PIXELSIZE
-	test	cl, SIZEOF_BYTE
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_BYTE
-	xor	eax,eax
-	mov	al, BYTE [esi+ecx]
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        xor     eax,eax
+        mov     al, BYTE [esi+ecx]
 .column_ld2:
-	test	cl, SIZEOF_WORD
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_WORD
-	xor	edx,edx
-	mov	dx, WORD [esi+ecx]
-	shl	eax, WORD_BIT
-	or	eax,edx
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        xor     edx,edx
+        mov     dx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
 .column_ld4:
-	movd	mmA,eax
-	pop	edx
-	pop	eax
-	test	cl, SIZEOF_DWORD
-	jz	short .column_ld8
-	sub	ecx, byte SIZEOF_DWORD
-	movd	mmG, DWORD [esi+ecx]
-	psllq	mmA, DWORD_BIT
-	por	mmA,mmG
+        movd    mmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    mmG, DWORD [esi+ecx]
+        psllq   mmA, DWORD_BIT
+        por     mmA,mmG
 .column_ld8:
-	test	cl, SIZEOF_MMWORD
-	jz	short .column_ld16
-	movq	mmG,mmA
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	mov	ecx, SIZEOF_MMWORD
-	jmp	short .rgb_ycc_cnv
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        movq    mmG,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        mov     ecx, SIZEOF_MMWORD
+        jmp     short .rgb_ycc_cnv
 .column_ld16:
-	test	cl, 2*SIZEOF_MMWORD
-	mov	ecx, SIZEOF_MMWORD
-	jz	short .rgb_ycc_cnv
-	movq	mmF,mmA
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmG, MMWORD [esi+1*SIZEOF_MMWORD]
-	jmp	short .rgb_ycc_cnv
-	alignx	16,7
+        test    cl, 2*SIZEOF_MMWORD
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_ycc_cnv
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
 
 .columnloop:
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmG, MMWORD [esi+1*SIZEOF_MMWORD]
-	movq	mmF, MMWORD [esi+2*SIZEOF_MMWORD]
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+2*SIZEOF_MMWORD]
 
 .rgb_ycc_cnv:
-	; mmA=(00 10 20 01 11 21 02 12)
-	; mmG=(22 03 13 23 04 14 24 05)
-	; mmF=(15 25 06 16 26 07 17 27)
+        ; mmA=(00 10 20 01 11 21 02 12)
+        ; mmG=(22 03 13 23 04 14 24 05)
+        ; mmF=(15 25 06 16 26 07 17 27)
 
-	movq      mmD,mmA
-	psllq     mmA,4*BYTE_BIT	; mmA=(-- -- -- -- 00 10 20 01)
-	psrlq     mmD,4*BYTE_BIT	; mmD=(11 21 02 12 -- -- -- --)
+        movq      mmD,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 10 20 01)
+        psrlq     mmD,4*BYTE_BIT        ; mmD=(11 21 02 12 -- -- -- --)
 
-	punpckhbw mmA,mmG		; mmA=(00 04 10 14 20 24 01 05)
-	psllq     mmG,4*BYTE_BIT	; mmG=(-- -- -- -- 22 03 13 23)
+        punpckhbw mmA,mmG               ; mmA=(00 04 10 14 20 24 01 05)
+        psllq     mmG,4*BYTE_BIT        ; mmG=(-- -- -- -- 22 03 13 23)
 
-	punpcklbw mmD,mmF		; mmD=(11 15 21 25 02 06 12 16)
-	punpckhbw mmG,mmF		; mmG=(22 26 03 07 13 17 23 27)
+        punpcklbw mmD,mmF               ; mmD=(11 15 21 25 02 06 12 16)
+        punpckhbw mmG,mmF               ; mmG=(22 26 03 07 13 17 23 27)
 
-	movq      mmE,mmA
-	psllq     mmA,4*BYTE_BIT	; mmA=(-- -- -- -- 00 04 10 14)
-	psrlq     mmE,4*BYTE_BIT	; mmE=(20 24 01 05 -- -- -- --)
+        movq      mmE,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 04 10 14)
+        psrlq     mmE,4*BYTE_BIT        ; mmE=(20 24 01 05 -- -- -- --)
 
-	punpckhbw mmA,mmD		; mmA=(00 02 04 06 10 12 14 16)
-	psllq     mmD,4*BYTE_BIT	; mmD=(-- -- -- -- 11 15 21 25)
+        punpckhbw mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        psllq     mmD,4*BYTE_BIT        ; mmD=(-- -- -- -- 11 15 21 25)
 
-	punpcklbw mmE,mmG		; mmE=(20 22 24 26 01 03 05 07)
-	punpckhbw mmD,mmG		; mmD=(11 13 15 17 21 23 25 27)
+        punpcklbw mmE,mmG               ; mmE=(20 22 24 26 01 03 05 07)
+        punpckhbw mmD,mmG               ; mmD=(11 13 15 17 21 23 25 27)
 
-	pxor      mmH,mmH
+        pxor      mmH,mmH
 
-	movq      mmC,mmA
-	punpcklbw mmA,mmH		; mmA=(00 02 04 06)
-	punpckhbw mmC,mmH		; mmC=(10 12 14 16)
+        movq      mmC,mmA
+        punpcklbw mmA,mmH               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmH               ; mmC=(10 12 14 16)
 
-	movq      mmB,mmE
-	punpcklbw mmE,mmH		; mmE=(20 22 24 26)
-	punpckhbw mmB,mmH		; mmB=(01 03 05 07)
+        movq      mmB,mmE
+        punpcklbw mmE,mmH               ; mmE=(20 22 24 26)
+        punpckhbw mmB,mmH               ; mmB=(01 03 05 07)
 
-	movq      mmF,mmD
-	punpcklbw mmD,mmH		; mmD=(11 13 15 17)
-	punpckhbw mmF,mmH		; mmF=(21 23 25 27)
+        movq      mmF,mmD
+        punpcklbw mmD,mmH               ; mmD=(11 13 15 17)
+        punpckhbw mmF,mmH               ; mmF=(21 23 25 27)
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 .column_ld1:
-	test	cl, SIZEOF_MMWORD/8
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_MMWORD/8
-	movd	mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_MMWORD/8
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_MMWORD/8
+        movd    mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld2:
-	test	cl, SIZEOF_MMWORD/4
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_MMWORD/4
-	movq	mmF,mmA
-	movq	mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_MMWORD/4
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_MMWORD/4
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld4:
-	test	cl, SIZEOF_MMWORD/2
-	mov	ecx, SIZEOF_MMWORD
-	jz	short .rgb_ycc_cnv
-	movq	mmD,mmA
-	movq	mmC,mmF
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmF, MMWORD [esi+1*SIZEOF_MMWORD]
-	jmp	short .rgb_ycc_cnv
-	alignx	16,7
+        test    cl, SIZEOF_MMWORD/2
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_ycc_cnv
+        movq    mmD,mmA
+        movq    mmC,mmF
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
 
 .columnloop:
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmF, MMWORD [esi+1*SIZEOF_MMWORD]
-	movq	mmD, MMWORD [esi+2*SIZEOF_MMWORD]
-	movq	mmC, MMWORD [esi+3*SIZEOF_MMWORD]
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmD, MMWORD [esi+2*SIZEOF_MMWORD]
+        movq    mmC, MMWORD [esi+3*SIZEOF_MMWORD]
 
 .rgb_ycc_cnv:
-	; mmA=(00 10 20 30 01 11 21 31)
-	; mmF=(02 12 22 32 03 13 23 33)
-	; mmD=(04 14 24 34 05 15 25 35)
-	; mmC=(06 16 26 36 07 17 27 37)
+        ; mmA=(00 10 20 30 01 11 21 31)
+        ; mmF=(02 12 22 32 03 13 23 33)
+        ; mmD=(04 14 24 34 05 15 25 35)
+        ; mmC=(06 16 26 36 07 17 27 37)
 
-	movq      mmB,mmA
-	punpcklbw mmA,mmF		; mmA=(00 02 10 12 20 22 30 32)
-	punpckhbw mmB,mmF		; mmB=(01 03 11 13 21 23 31 33)
+        movq      mmB,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 10 12 20 22 30 32)
+        punpckhbw mmB,mmF               ; mmB=(01 03 11 13 21 23 31 33)
 
-	movq      mmG,mmD
-	punpcklbw mmD,mmC		; mmD=(04 06 14 16 24 26 34 36)
-	punpckhbw mmG,mmC		; mmG=(05 07 15 17 25 27 35 37)
+        movq      mmG,mmD
+        punpcklbw mmD,mmC               ; mmD=(04 06 14 16 24 26 34 36)
+        punpckhbw mmG,mmC               ; mmG=(05 07 15 17 25 27 35 37)
 
-	movq      mmE,mmA
-	punpcklwd mmA,mmD		; mmA=(00 02 04 06 10 12 14 16)
-	punpckhwd mmE,mmD		; mmE=(20 22 24 26 30 32 34 36)
+        movq      mmE,mmA
+        punpcklwd mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        punpckhwd mmE,mmD               ; mmE=(20 22 24 26 30 32 34 36)
 
-	movq      mmH,mmB
-	punpcklwd mmB,mmG		; mmB=(01 03 05 07 11 13 15 17)
-	punpckhwd mmH,mmG		; mmH=(21 23 25 27 31 33 35 37)
+        movq      mmH,mmB
+        punpcklwd mmB,mmG               ; mmB=(01 03 05 07 11 13 15 17)
+        punpckhwd mmH,mmG               ; mmH=(21 23 25 27 31 33 35 37)
 
-	pxor      mmF,mmF
+        pxor      mmF,mmF
 
-	movq      mmC,mmA
-	punpcklbw mmA,mmF		; mmA=(00 02 04 06)
-	punpckhbw mmC,mmF		; mmC=(10 12 14 16)
+        movq      mmC,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmF               ; mmC=(10 12 14 16)
 
-	movq      mmD,mmB
-	punpcklbw mmB,mmF		; mmB=(01 03 05 07)
-	punpckhbw mmD,mmF		; mmD=(11 13 15 17)
+        movq      mmD,mmB
+        punpcklbw mmB,mmF               ; mmB=(01 03 05 07)
+        punpckhbw mmD,mmF               ; mmD=(11 13 15 17)
 
-	movq      mmG,mmE
-	punpcklbw mmE,mmF		; mmE=(20 22 24 26)
-	punpckhbw mmG,mmF		; mmG=(30 32 34 36)
+        movq      mmG,mmE
+        punpcklbw mmE,mmF               ; mmE=(20 22 24 26)
+        punpckhbw mmG,mmF               ; mmG=(30 32 34 36)
 
-	punpcklbw mmF,mmH
-	punpckhbw mmH,mmH
-	psrlw     mmF,BYTE_BIT		; mmF=(21 23 25 27)
-	psrlw     mmH,BYTE_BIT		; mmH=(31 33 35 37)
+        punpcklbw mmF,mmH
+        punpckhbw mmH,mmH
+        psrlw     mmF,BYTE_BIT          ; mmF=(21 23 25 27)
+        psrlw     mmH,BYTE_BIT          ; mmH=(31 33 35 37)
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
-	; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
+        ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
+        ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
 
-	; (Original)
-	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
-	; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
-	; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
-	;
-	; (This implementation)
-	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
-	; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
-	; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
 
-	movq      MMWORD [wk(0)], mm0	; wk(0)=RE
-	movq      MMWORD [wk(1)], mm1	; wk(1)=RO
-	movq      MMWORD [wk(2)], mm4	; wk(2)=BE
-	movq      MMWORD [wk(3)], mm5	; wk(3)=BO
+        movq      MMWORD [wk(0)], mm0   ; wk(0)=RE
+        movq      MMWORD [wk(1)], mm1   ; wk(1)=RO
+        movq      MMWORD [wk(2)], mm4   ; wk(2)=BE
+        movq      MMWORD [wk(3)], mm5   ; wk(3)=BO
 
-	movq      mm6,mm1
-	punpcklwd mm1,mm3
-	punpckhwd mm6,mm3
-	movq      mm7,mm1
-	movq      mm4,mm6
-	pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
-	pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
-	pmaddwd   mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
-	pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+        movq      mm6,mm1
+        punpcklwd mm1,mm3
+        punpckhwd mm6,mm3
+        movq      mm7,mm1
+        movq      mm4,mm6
+        pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        pmaddwd   mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+        pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
 
-	movq      MMWORD [wk(4)], mm1	; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
-	movq      MMWORD [wk(5)], mm6	; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movq      MMWORD [wk(4)], mm1   ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
+        movq      MMWORD [wk(5)], mm6   ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	pxor      mm1,mm1
-	pxor      mm6,mm6
-	punpcklwd mm1,mm5		; mm1=BOL
-	punpckhwd mm6,mm5		; mm6=BOH
-	psrld     mm1,1			; mm1=BOL*FIX(0.500)
-	psrld     mm6,1			; mm6=BOH*FIX(0.500)
+        pxor      mm1,mm1
+        pxor      mm6,mm6
+        punpcklwd mm1,mm5               ; mm1=BOL
+        punpckhwd mm6,mm5               ; mm6=BOH
+        psrld     mm1,1                 ; mm1=BOL*FIX(0.500)
+        psrld     mm6,1                 ; mm6=BOH*FIX(0.500)
 
-	movq      mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ]
+        movq      mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ]
 
-	paddd     mm7,mm1
-	paddd     mm4,mm6
-	paddd     mm7,mm5
-	paddd     mm4,mm5
-	psrld     mm7,SCALEBITS		; mm7=CbOL
-	psrld     mm4,SCALEBITS		; mm4=CbOH
-	packssdw  mm7,mm4		; mm7=CbO
+        paddd     mm7,mm1
+        paddd     mm4,mm6
+        paddd     mm7,mm5
+        paddd     mm4,mm5
+        psrld     mm7,SCALEBITS         ; mm7=CbOL
+        psrld     mm4,SCALEBITS         ; mm4=CbOH
+        packssdw  mm7,mm4               ; mm7=CbO
 
-	movq      mm1, MMWORD [wk(2)]	; mm1=BE
+        movq      mm1, MMWORD [wk(2)]   ; mm1=BE
 
-	movq      mm6,mm0
-	punpcklwd mm0,mm2
-	punpckhwd mm6,mm2
-	movq      mm5,mm0
-	movq      mm4,mm6
-	pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
-	pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
-	pmaddwd   mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
-	pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+        movq      mm6,mm0
+        punpcklwd mm0,mm2
+        punpckhwd mm6,mm2
+        movq      mm5,mm0
+        movq      mm4,mm6
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+        pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
 
-	movq      MMWORD [wk(6)], mm0	; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
-	movq      MMWORD [wk(7)], mm6	; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
+        movq      MMWORD [wk(6)], mm0   ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movq      MMWORD [wk(7)], mm6   ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	pxor      mm0,mm0
-	pxor      mm6,mm6
-	punpcklwd mm0,mm1		; mm0=BEL
-	punpckhwd mm6,mm1		; mm6=BEH
-	psrld     mm0,1			; mm0=BEL*FIX(0.500)
-	psrld     mm6,1			; mm6=BEH*FIX(0.500)
+        pxor      mm0,mm0
+        pxor      mm6,mm6
+        punpcklwd mm0,mm1               ; mm0=BEL
+        punpckhwd mm6,mm1               ; mm6=BEH
+        psrld     mm0,1                 ; mm0=BEL*FIX(0.500)
+        psrld     mm6,1                 ; mm6=BEH*FIX(0.500)
 
-	movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
+        movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
 
-	paddd     mm5,mm0
-	paddd     mm4,mm6
-	paddd     mm5,mm1
-	paddd     mm4,mm1
-	psrld     mm5,SCALEBITS		; mm5=CbEL
-	psrld     mm4,SCALEBITS		; mm4=CbEH
-	packssdw  mm5,mm4		; mm5=CbE
+        paddd     mm5,mm0
+        paddd     mm4,mm6
+        paddd     mm5,mm1
+        paddd     mm4,mm1
+        psrld     mm5,SCALEBITS         ; mm5=CbEL
+        psrld     mm4,SCALEBITS         ; mm4=CbEH
+        packssdw  mm5,mm4               ; mm5=CbE
 
-	psllw     mm7,BYTE_BIT
-	por       mm5,mm7		; mm5=Cb
-	movq      MMWORD [ebx], mm5	; Save Cb
+        psllw     mm7,BYTE_BIT
+        por       mm5,mm7               ; mm5=Cb
+        movq      MMWORD [ebx], mm5     ; Save Cb
 
-	movq      mm0, MMWORD [wk(3)]	; mm0=BO
-	movq      mm6, MMWORD [wk(2)]	; mm6=BE
-	movq      mm1, MMWORD [wk(1)]	; mm1=RO
+        movq      mm0, MMWORD [wk(3)]   ; mm0=BO
+        movq      mm6, MMWORD [wk(2)]   ; mm6=BE
+        movq      mm1, MMWORD [wk(1)]   ; mm1=RO
 
-	movq      mm4,mm0
-	punpcklwd mm0,mm3
-	punpckhwd mm4,mm3
-	movq      mm7,mm0
-	movq      mm5,mm4
-	pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
-	pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
-	pmaddwd   mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
-	pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+        movq      mm4,mm0
+        punpcklwd mm0,mm3
+        punpckhwd mm4,mm3
+        movq      mm7,mm0
+        movq      mm5,mm4
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        pmaddwd   mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
 
-	movq      mm3,[GOTOFF(eax,PD_ONEHALF)]	; mm3=[PD_ONEHALF]
+        movq      mm3,[GOTOFF(eax,PD_ONEHALF)]  ; mm3=[PD_ONEHALF]
 
-	paddd     mm0, MMWORD [wk(4)]
-	paddd     mm4, MMWORD [wk(5)]
-	paddd     mm0,mm3
-	paddd     mm4,mm3
-	psrld     mm0,SCALEBITS		; mm0=YOL
-	psrld     mm4,SCALEBITS		; mm4=YOH
-	packssdw  mm0,mm4		; mm0=YO
+        paddd     mm0, MMWORD [wk(4)]
+        paddd     mm4, MMWORD [wk(5)]
+        paddd     mm0,mm3
+        paddd     mm4,mm3
+        psrld     mm0,SCALEBITS         ; mm0=YOL
+        psrld     mm4,SCALEBITS         ; mm4=YOH
+        packssdw  mm0,mm4               ; mm0=YO
 
-	pxor      mm3,mm3
-	pxor      mm4,mm4
-	punpcklwd mm3,mm1		; mm3=ROL
-	punpckhwd mm4,mm1		; mm4=ROH
-	psrld     mm3,1			; mm3=ROL*FIX(0.500)
-	psrld     mm4,1			; mm4=ROH*FIX(0.500)
+        pxor      mm3,mm3
+        pxor      mm4,mm4
+        punpcklwd mm3,mm1               ; mm3=ROL
+        punpckhwd mm4,mm1               ; mm4=ROH
+        psrld     mm3,1                 ; mm3=ROL*FIX(0.500)
+        psrld     mm4,1                 ; mm4=ROH*FIX(0.500)
 
-	movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
+        movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
 
-	paddd     mm7,mm3
-	paddd     mm5,mm4
-	paddd     mm7,mm1
-	paddd     mm5,mm1
-	psrld     mm7,SCALEBITS		; mm7=CrOL
-	psrld     mm5,SCALEBITS		; mm5=CrOH
-	packssdw  mm7,mm5		; mm7=CrO
+        paddd     mm7,mm3
+        paddd     mm5,mm4
+        paddd     mm7,mm1
+        paddd     mm5,mm1
+        psrld     mm7,SCALEBITS         ; mm7=CrOL
+        psrld     mm5,SCALEBITS         ; mm5=CrOH
+        packssdw  mm7,mm5               ; mm7=CrO
 
-	movq      mm3, MMWORD [wk(0)]	; mm3=RE
+        movq      mm3, MMWORD [wk(0)]   ; mm3=RE
 
-	movq      mm4,mm6
-	punpcklwd mm6,mm2
-	punpckhwd mm4,mm2
-	movq      mm1,mm6
-	movq      mm5,mm4
-	pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
-	pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
-	pmaddwd   mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
-	pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+        movq      mm4,mm6
+        punpcklwd mm6,mm2
+        punpckhwd mm4,mm2
+        movq      mm1,mm6
+        movq      mm5,mm4
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        pmaddwd   mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
 
-	movq      mm2,[GOTOFF(eax,PD_ONEHALF)]	; mm2=[PD_ONEHALF]
+        movq      mm2,[GOTOFF(eax,PD_ONEHALF)]  ; mm2=[PD_ONEHALF]
 
-	paddd     mm6, MMWORD [wk(6)]
-	paddd     mm4, MMWORD [wk(7)]
-	paddd     mm6,mm2
-	paddd     mm4,mm2
-	psrld     mm6,SCALEBITS		; mm6=YEL
-	psrld     mm4,SCALEBITS		; mm4=YEH
-	packssdw  mm6,mm4		; mm6=YE
+        paddd     mm6, MMWORD [wk(6)]
+        paddd     mm4, MMWORD [wk(7)]
+        paddd     mm6,mm2
+        paddd     mm4,mm2
+        psrld     mm6,SCALEBITS         ; mm6=YEL
+        psrld     mm4,SCALEBITS         ; mm4=YEH
+        packssdw  mm6,mm4               ; mm6=YE
 
-	psllw     mm0,BYTE_BIT
-	por       mm6,mm0		; mm6=Y
-	movq      MMWORD [edi], mm6	; Save Y
+        psllw     mm0,BYTE_BIT
+        por       mm6,mm0               ; mm6=Y
+        movq      MMWORD [edi], mm6     ; Save Y
 
-	pxor      mm2,mm2
-	pxor      mm4,mm4
-	punpcklwd mm2,mm3		; mm2=REL
-	punpckhwd mm4,mm3		; mm4=REH
-	psrld     mm2,1			; mm2=REL*FIX(0.500)
-	psrld     mm4,1			; mm4=REH*FIX(0.500)
+        pxor      mm2,mm2
+        pxor      mm4,mm4
+        punpcklwd mm2,mm3               ; mm2=REL
+        punpckhwd mm4,mm3               ; mm4=REH
+        psrld     mm2,1                 ; mm2=REL*FIX(0.500)
+        psrld     mm4,1                 ; mm4=REH*FIX(0.500)
 
-	movq      mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ]
+        movq      mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ]
 
-	paddd     mm1,mm2
-	paddd     mm5,mm4
-	paddd     mm1,mm0
-	paddd     mm5,mm0
-	psrld     mm1,SCALEBITS		; mm1=CrEL
-	psrld     mm5,SCALEBITS		; mm5=CrEH
-	packssdw  mm1,mm5		; mm1=CrE
+        paddd     mm1,mm2
+        paddd     mm5,mm4
+        paddd     mm1,mm0
+        paddd     mm5,mm0
+        psrld     mm1,SCALEBITS         ; mm1=CrEL
+        psrld     mm5,SCALEBITS         ; mm5=CrEH
+        packssdw  mm1,mm5               ; mm1=CrE
 
-	psllw     mm7,BYTE_BIT
-	por       mm1,mm7		; mm1=Cr
-	movq      MMWORD [edx], mm1	; Save Cr
+        psllw     mm7,BYTE_BIT
+        por       mm1,mm7               ; mm1=Cr
+        movq      MMWORD [edx], mm1     ; Save Cr
 
-	sub	ecx, byte SIZEOF_MMWORD
-	add	esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD	; inptr
-	add	edi, byte SIZEOF_MMWORD			; outptr0
-	add	ebx, byte SIZEOF_MMWORD			; outptr1
-	add	edx, byte SIZEOF_MMWORD			; outptr2
-	cmp	ecx, byte SIZEOF_MMWORD
-	jae	near .columnloop
-	test	ecx,ecx
-	jnz	near .column_ld1
+        sub     ecx, byte SIZEOF_MMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; inptr
+        add     edi, byte SIZEOF_MMWORD                 ; outptr0
+        add     ebx, byte SIZEOF_MMWORD                 ; outptr1
+        add     edx, byte SIZEOF_MMWORD                 ; outptr2
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
 
-	pop	ecx			; col
-	pop	esi
-	pop	edi
-	pop	ebx
-	pop	edx
-	poppic	eax
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        pop     ebx
+        pop     edx
+        poppic  eax
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_buf
-	add	edi, byte SIZEOF_JSAMPROW
-	add	ebx, byte SIZEOF_JSAMPROW
-	add	edx, byte SIZEOF_JSAMPROW
-	dec	eax				; num_rows
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcclrss2-64.asm b/simd/jcclrss2-64.asm
index f5d6bed..1cdae27 100644
--- a/simd/jcclrss2-64.asm
+++ b/simd/jcclrss2-64.asm
@@ -32,454 +32,454 @@
 ; r13 = JDIMENSION output_row
 ; r14 = int num_rows
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		8
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          8
 
-	align	16
+        align   16
 
-	global	EXTN(jsimd_rgb_ycc_convert_sse2)
+        global  EXTN(jsimd_rgb_ycc_convert_sse2)
 
 EXTN(jsimd_rgb_ycc_convert_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
 
-	mov	rcx, r10
-	test	rcx,rcx
-	jz	near .return
+        mov     rcx, r10
+        test    rcx,rcx
+        jz      near .return
 
-	push	rcx
+        push    rcx
 
-	mov rsi, r12
-	mov rcx, r13
-	mov	rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
-	mov	rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
-	mov	rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
-	lea	rdi, [rdi+rcx*SIZEOF_JSAMPROW]
-	lea	rbx, [rbx+rcx*SIZEOF_JSAMPROW]
-	lea	rdx, [rdx+rcx*SIZEOF_JSAMPROW]
+        mov rsi, r12
+        mov rcx, r13
+        mov     rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
+        lea     rdi, [rdi+rcx*SIZEOF_JSAMPROW]
+        lea     rbx, [rbx+rcx*SIZEOF_JSAMPROW]
+        lea     rdx, [rdx+rcx*SIZEOF_JSAMPROW]
 
-	pop	rcx
+        pop     rcx
 
-	mov rsi, r11
-	mov	eax, r14d
-	test	rax,rax
-	jle	near .return
+        mov rsi, r11
+        mov     eax, r14d
+        test    rax,rax
+        jle     near .return
 .rowloop:
-	push	rdx
-	push	rbx
-	push	rdi
-	push	rsi
-	push	rcx			; col
+        push    rdx
+        push    rbx
+        push    rdi
+        push    rsi
+        push    rcx                     ; col
 
-	mov	rsi, JSAMPROW [rsi]	; inptr
-	mov	rdi, JSAMPROW [rdi]	; outptr0
-	mov	rbx, JSAMPROW [rbx]	; outptr1
-	mov	rdx, JSAMPROW [rdx]	; outptr2
+        mov     rsi, JSAMPROW [rsi]     ; inptr
+        mov     rdi, JSAMPROW [rdi]     ; outptr0
+        mov     rbx, JSAMPROW [rbx]     ; outptr1
+        mov     rdx, JSAMPROW [rdx]     ; outptr2
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
 .column_ld1:
-	push	rax
-	push	rdx
-	lea	rcx,[rcx+rcx*2]		; imul ecx,RGB_PIXELSIZE
-	test	cl, SIZEOF_BYTE
-	jz	short .column_ld2
-	sub	rcx, byte SIZEOF_BYTE
-	movzx	rax, BYTE [rsi+rcx]
+        push    rax
+        push    rdx
+        lea     rcx,[rcx+rcx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_BYTE
+        movzx   rax, BYTE [rsi+rcx]
 .column_ld2:
-	test	cl, SIZEOF_WORD
-	jz	short .column_ld4
-	sub	rcx, byte SIZEOF_WORD
-	movzx	rdx, WORD [rsi+rcx]
-	shl	rax, WORD_BIT
-	or	rax,rdx
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_WORD
+        movzx   rdx, WORD [rsi+rcx]
+        shl     rax, WORD_BIT
+        or      rax,rdx
 .column_ld4:
-	movd	xmmA,eax
-	pop	rdx
-	pop	rax
-	test	cl, SIZEOF_DWORD
-	jz	short .column_ld8
-	sub	rcx, byte SIZEOF_DWORD
-	movd	xmmF, XMM_DWORD [rsi+rcx]
-	pslldq	xmmA, SIZEOF_DWORD
-	por	xmmA,xmmF
+        movd    xmmA,eax
+        pop     rdx
+        pop     rax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
 .column_ld8:
-	test	cl, SIZEOF_MMWORD
-	jz	short .column_ld16
-	sub	rcx, byte SIZEOF_MMWORD
-	movq	xmmB, XMM_MMWORD [rsi+rcx]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmB
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     rcx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
 .column_ld16:
-	test	cl, SIZEOF_XMMWORD
-	jz	short .column_ld32
-	movdqa	xmmF,xmmA
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	mov	rcx, SIZEOF_XMMWORD
-	jmp	short .rgb_ycc_cnv
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .rgb_ycc_cnv
 .column_ld32:
-	test	cl, 2*SIZEOF_XMMWORD
-	mov	rcx, SIZEOF_XMMWORD
-	jz	short .rgb_ycc_cnv
-	movdqa	xmmB,xmmA
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_ycc_cnv
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	movdqu	xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
 
 .rgb_ycc_cnv:
-	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	movdqa    xmmG,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
-	psrldq    xmmG,8	; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmF	; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
-	pslldq    xmmF,8	; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
 
-	punpcklbw xmmG,xmmB	; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
-	punpckhbw xmmF,xmmB	; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
 
-	movdqa    xmmD,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
-	psrldq    xmmD,8	; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmG	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
-	pslldq    xmmG,8	; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
 
-	punpcklbw xmmD,xmmF	; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
-	punpckhbw xmmG,xmmF	; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
 
-	movdqa    xmmE,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
-	psrldq    xmmE,8	; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	pslldq    xmmD,8	; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
 
-	punpcklbw xmmE,xmmG	; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmG	; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
 
-	pxor      xmmH,xmmH
+        pxor      xmmH,xmmH
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmH	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmH	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmB,xmmE
-	punpcklbw xmmE,xmmH	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmB,xmmH	; xmmB=(01 03 05 07 09 0B 0D 0F)
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
 
-	movdqa    xmmF,xmmD
-	punpcklbw xmmD,xmmH	; xmmD=(11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmF,xmmH	; xmmF=(21 23 25 27 29 2B 2D 2F)
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 .column_ld1:
-	test	cl, SIZEOF_XMMWORD/16
-	jz	short .column_ld2
-	sub	rcx, byte SIZEOF_XMMWORD/16
-	movd	xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
 .column_ld2:
-	test	cl, SIZEOF_XMMWORD/8
-	jz	short .column_ld4
-	sub	rcx, byte SIZEOF_XMMWORD/8
-	movq	xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmE
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
 .column_ld4:
-	test	cl, SIZEOF_XMMWORD/4
-	jz	short .column_ld8
-	sub	rcx, byte SIZEOF_XMMWORD/4
-	movdqa	xmmE,xmmA
-	movdqu	xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
 .column_ld8:
-	test	cl, SIZEOF_XMMWORD/2
-	mov	rcx, SIZEOF_XMMWORD
-	jz	short .rgb_ycc_cnv
-	movdqa	xmmF,xmmA
-	movdqa	xmmH,xmmE
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_ycc_cnv
+        test    cl, SIZEOF_XMMWORD/2
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
-	movdqu	xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
 
 .rgb_ycc_cnv:
-	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpcklbw xmmA,xmmE	; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
-	punpckhbw xmmD,xmmE	; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
 
-	movdqa    xmmC,xmmF
-	punpcklbw xmmF,xmmH	; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
-	punpckhbw xmmC,xmmH	; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
 
-	movdqa    xmmB,xmmA
-	punpcklwd xmmA,xmmF	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
-	punpckhwd xmmB,xmmF	; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
 
-	movdqa    xmmG,xmmD
-	punpcklwd xmmD,xmmC	; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
-	punpckhwd xmmG,xmmC	; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
 
-	movdqa    xmmE,xmmA
-	punpcklbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	punpckhbw xmmE,xmmD	; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
 
-	movdqa    xmmH,xmmB
-	punpcklbw xmmB,xmmG	; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmH,xmmG	; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
 
-	pxor      xmmF,xmmF
+        pxor      xmmF,xmmF
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmF	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmF	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmD,xmmB
-	punpcklbw xmmB,xmmF	; xmmB=(01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmF	; xmmD=(11 13 15 17 19 1B 1D 1F)
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
 
-	movdqa    xmmG,xmmE
-	punpcklbw xmmE,xmmF	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmG,xmmF	; xmmG=(30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
 
-	punpcklbw xmmF,xmmH
-	punpckhbw xmmH,xmmH
-	psrlw     xmmF,BYTE_BIT	; xmmF=(21 23 25 27 29 2B 2D 2F)
-	psrlw     xmmH,BYTE_BIT	; xmmH=(31 33 35 37 39 3B 3D 3F)
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
-	; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
 
-	; (Original)
-	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
-	; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
-	; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
-	;
-	; (This implementation)
-	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
-	; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
-	; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
 
-	movdqa    XMMWORD [wk(0)], xmm0	; wk(0)=RE
-	movdqa    XMMWORD [wk(1)], xmm1	; wk(1)=RO
-	movdqa    XMMWORD [wk(2)], xmm4	; wk(2)=BE
-	movdqa    XMMWORD [wk(3)], xmm5	; wk(3)=BO
+        movdqa    XMMWORD [wk(0)], xmm0 ; wk(0)=RE
+        movdqa    XMMWORD [wk(1)], xmm1 ; wk(1)=RO
+        movdqa    XMMWORD [wk(2)], xmm4 ; wk(2)=BE
+        movdqa    XMMWORD [wk(3)], xmm5 ; wk(3)=BO
 
-	movdqa    xmm6,xmm1
-	punpcklwd xmm1,xmm3
-	punpckhwd xmm6,xmm3
-	movdqa    xmm7,xmm1
-	movdqa    xmm4,xmm6
-	pmaddwd   xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
-	pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
-	pmaddwd   xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
-	pmaddwd   xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+        movdqa    xmm6,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm6,xmm3
+        movdqa    xmm7,xmm1
+        movdqa    xmm4,xmm6
+        pmaddwd   xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        pmaddwd   xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+        pmaddwd   xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
 
-	movdqa    XMMWORD [wk(4)], xmm1	; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
-	movdqa    XMMWORD [wk(5)], xmm6	; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movdqa    XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
+        movdqa    XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	pxor      xmm1,xmm1
-	pxor      xmm6,xmm6
-	punpcklwd xmm1,xmm5		; xmm1=BOL
-	punpckhwd xmm6,xmm5		; xmm6=BOH
-	psrld     xmm1,1		; xmm1=BOL*FIX(0.500)
-	psrld     xmm6,1		; xmm6=BOH*FIX(0.500)
+        pxor      xmm1,xmm1
+        pxor      xmm6,xmm6
+        punpcklwd xmm1,xmm5             ; xmm1=BOL
+        punpckhwd xmm6,xmm5             ; xmm6=BOH
+        psrld     xmm1,1                ; xmm1=BOL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BOH*FIX(0.500)
 
-	movdqa    xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ]
+        movdqa    xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm7,xmm1
-	paddd     xmm4,xmm6
-	paddd     xmm7,xmm5
-	paddd     xmm4,xmm5
-	psrld     xmm7,SCALEBITS	; xmm7=CbOL
-	psrld     xmm4,SCALEBITS	; xmm4=CbOH
-	packssdw  xmm7,xmm4		; xmm7=CbO
+        paddd     xmm7,xmm1
+        paddd     xmm4,xmm6
+        paddd     xmm7,xmm5
+        paddd     xmm4,xmm5
+        psrld     xmm7,SCALEBITS        ; xmm7=CbOL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbOH
+        packssdw  xmm7,xmm4             ; xmm7=CbO
 
-	movdqa    xmm1, XMMWORD [wk(2)]	; xmm1=BE
+        movdqa    xmm1, XMMWORD [wk(2)] ; xmm1=BE
 
-	movdqa    xmm6,xmm0
-	punpcklwd xmm0,xmm2
-	punpckhwd xmm6,xmm2
-	movdqa    xmm5,xmm0
-	movdqa    xmm4,xmm6
-	pmaddwd   xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
-	pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
-	pmaddwd   xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
-	pmaddwd   xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+        movdqa    xmm6,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm6,xmm2
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm6
+        pmaddwd   xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        pmaddwd   xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+        pmaddwd   xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
 
-	movdqa    XMMWORD [wk(6)], xmm0	; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
-	movdqa    XMMWORD [wk(7)], xmm6	; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
+        movdqa    XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movdqa    XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	pxor      xmm0,xmm0
-	pxor      xmm6,xmm6
-	punpcklwd xmm0,xmm1		; xmm0=BEL
-	punpckhwd xmm6,xmm1		; xmm6=BEH
-	psrld     xmm0,1		; xmm0=BEL*FIX(0.500)
-	psrld     xmm6,1		; xmm6=BEH*FIX(0.500)
+        pxor      xmm0,xmm0
+        pxor      xmm6,xmm6
+        punpcklwd xmm0,xmm1             ; xmm0=BEL
+        punpckhwd xmm6,xmm1             ; xmm6=BEH
+        psrld     xmm0,1                ; xmm0=BEL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BEH*FIX(0.500)
 
-	movdqa    xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
+        movdqa    xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm5,xmm0
-	paddd     xmm4,xmm6
-	paddd     xmm5,xmm1
-	paddd     xmm4,xmm1
-	psrld     xmm5,SCALEBITS	; xmm5=CbEL
-	psrld     xmm4,SCALEBITS	; xmm4=CbEH
-	packssdw  xmm5,xmm4		; xmm5=CbE
+        paddd     xmm5,xmm0
+        paddd     xmm4,xmm6
+        paddd     xmm5,xmm1
+        paddd     xmm4,xmm1
+        psrld     xmm5,SCALEBITS        ; xmm5=CbEL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbEH
+        packssdw  xmm5,xmm4             ; xmm5=CbE
 
-	psllw     xmm7,BYTE_BIT
-	por       xmm5,xmm7		; xmm5=Cb
-	movdqa    XMMWORD [rbx], xmm5	; Save Cb
+        psllw     xmm7,BYTE_BIT
+        por       xmm5,xmm7             ; xmm5=Cb
+        movdqa    XMMWORD [rbx], xmm5   ; Save Cb
 
-	movdqa    xmm0, XMMWORD [wk(3)]	; xmm0=BO
-	movdqa    xmm6, XMMWORD [wk(2)]	; xmm6=BE
-	movdqa    xmm1, XMMWORD [wk(1)]	; xmm1=RO
+        movdqa    xmm0, XMMWORD [wk(3)] ; xmm0=BO
+        movdqa    xmm6, XMMWORD [wk(2)] ; xmm6=BE
+        movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=RO
 
-	movdqa    xmm4,xmm0
-	punpcklwd xmm0,xmm3
-	punpckhwd xmm4,xmm3
-	movdqa    xmm7,xmm0
-	movdqa    xmm5,xmm4
-	pmaddwd   xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
-	pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
-	pmaddwd   xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
-	pmaddwd   xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+        movdqa    xmm4,xmm0
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm4,xmm3
+        movdqa    xmm7,xmm0
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        pmaddwd   xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+        pmaddwd   xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
 
-	movdqa    xmm3,[rel PD_ONEHALF]	; xmm3=[PD_ONEHALF]
+        movdqa    xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF]
 
-	paddd     xmm0, XMMWORD [wk(4)]
-	paddd     xmm4, XMMWORD [wk(5)]
-	paddd     xmm0,xmm3
-	paddd     xmm4,xmm3
-	psrld     xmm0,SCALEBITS	; xmm0=YOL
-	psrld     xmm4,SCALEBITS	; xmm4=YOH
-	packssdw  xmm0,xmm4		; xmm0=YO
+        paddd     xmm0, XMMWORD [wk(4)]
+        paddd     xmm4, XMMWORD [wk(5)]
+        paddd     xmm0,xmm3
+        paddd     xmm4,xmm3
+        psrld     xmm0,SCALEBITS        ; xmm0=YOL
+        psrld     xmm4,SCALEBITS        ; xmm4=YOH
+        packssdw  xmm0,xmm4             ; xmm0=YO
 
-	pxor      xmm3,xmm3
-	pxor      xmm4,xmm4
-	punpcklwd xmm3,xmm1		; xmm3=ROL
-	punpckhwd xmm4,xmm1		; xmm4=ROH
-	psrld     xmm3,1		; xmm3=ROL*FIX(0.500)
-	psrld     xmm4,1		; xmm4=ROH*FIX(0.500)
+        pxor      xmm3,xmm3
+        pxor      xmm4,xmm4
+        punpcklwd xmm3,xmm1             ; xmm3=ROL
+        punpckhwd xmm4,xmm1             ; xmm4=ROH
+        psrld     xmm3,1                ; xmm3=ROL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=ROH*FIX(0.500)
 
-	movdqa    xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
+        movdqa    xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm7,xmm3
-	paddd     xmm5,xmm4
-	paddd     xmm7,xmm1
-	paddd     xmm5,xmm1
-	psrld     xmm7,SCALEBITS	; xmm7=CrOL
-	psrld     xmm5,SCALEBITS	; xmm5=CrOH
-	packssdw  xmm7,xmm5		; xmm7=CrO
+        paddd     xmm7,xmm3
+        paddd     xmm5,xmm4
+        paddd     xmm7,xmm1
+        paddd     xmm5,xmm1
+        psrld     xmm7,SCALEBITS        ; xmm7=CrOL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrOH
+        packssdw  xmm7,xmm5             ; xmm7=CrO
 
-	movdqa    xmm3, XMMWORD [wk(0)]	; xmm3=RE
+        movdqa    xmm3, XMMWORD [wk(0)] ; xmm3=RE
 
-	movdqa    xmm4,xmm6
-	punpcklwd xmm6,xmm2
-	punpckhwd xmm4,xmm2
-	movdqa    xmm1,xmm6
-	movdqa    xmm5,xmm4
-	pmaddwd   xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
-	pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
-	pmaddwd   xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
-	pmaddwd   xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+        movdqa    xmm4,xmm6
+        punpcklwd xmm6,xmm2
+        punpckhwd xmm4,xmm2
+        movdqa    xmm1,xmm6
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        pmaddwd   xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+        pmaddwd   xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
 
-	movdqa    xmm2,[rel PD_ONEHALF]	; xmm2=[PD_ONEHALF]
+        movdqa    xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF]
 
-	paddd     xmm6, XMMWORD [wk(6)]
-	paddd     xmm4, XMMWORD [wk(7)]
-	paddd     xmm6,xmm2
-	paddd     xmm4,xmm2
-	psrld     xmm6,SCALEBITS	; xmm6=YEL
-	psrld     xmm4,SCALEBITS	; xmm4=YEH
-	packssdw  xmm6,xmm4		; xmm6=YE
+        paddd     xmm6, XMMWORD [wk(6)]
+        paddd     xmm4, XMMWORD [wk(7)]
+        paddd     xmm6,xmm2
+        paddd     xmm4,xmm2
+        psrld     xmm6,SCALEBITS        ; xmm6=YEL
+        psrld     xmm4,SCALEBITS        ; xmm4=YEH
+        packssdw  xmm6,xmm4             ; xmm6=YE
 
-	psllw     xmm0,BYTE_BIT
-	por       xmm6,xmm0		; xmm6=Y
-	movdqa    XMMWORD [rdi], xmm6	; Save Y
+        psllw     xmm0,BYTE_BIT
+        por       xmm6,xmm0             ; xmm6=Y
+        movdqa    XMMWORD [rdi], xmm6   ; Save Y
 
-	pxor      xmm2,xmm2
-	pxor      xmm4,xmm4
-	punpcklwd xmm2,xmm3		; xmm2=REL
-	punpckhwd xmm4,xmm3		; xmm4=REH
-	psrld     xmm2,1		; xmm2=REL*FIX(0.500)
-	psrld     xmm4,1		; xmm4=REH*FIX(0.500)
+        pxor      xmm2,xmm2
+        pxor      xmm4,xmm4
+        punpcklwd xmm2,xmm3             ; xmm2=REL
+        punpckhwd xmm4,xmm3             ; xmm4=REH
+        psrld     xmm2,1                ; xmm2=REL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=REH*FIX(0.500)
 
-	movdqa    xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ]
+        movdqa    xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm1,xmm2
-	paddd     xmm5,xmm4
-	paddd     xmm1,xmm0
-	paddd     xmm5,xmm0
-	psrld     xmm1,SCALEBITS	; xmm1=CrEL
-	psrld     xmm5,SCALEBITS	; xmm5=CrEH
-	packssdw  xmm1,xmm5		; xmm1=CrE
+        paddd     xmm1,xmm2
+        paddd     xmm5,xmm4
+        paddd     xmm1,xmm0
+        paddd     xmm5,xmm0
+        psrld     xmm1,SCALEBITS        ; xmm1=CrEL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrEH
+        packssdw  xmm1,xmm5             ; xmm1=CrE
 
-	psllw     xmm7,BYTE_BIT
-	por       xmm1,xmm7		; xmm1=Cr
-	movdqa    XMMWORD [rdx], xmm1	; Save Cr
+        psllw     xmm7,BYTE_BIT
+        por       xmm1,xmm7             ; xmm1=Cr
+        movdqa    XMMWORD [rdx], xmm1   ; Save Cr
 
-	sub	rcx, byte SIZEOF_XMMWORD
-	add	rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; inptr
-	add	rdi, byte SIZEOF_XMMWORD		; outptr0
-	add	rbx, byte SIZEOF_XMMWORD		; outptr1
-	add	rdx, byte SIZEOF_XMMWORD		; outptr2
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	test	rcx,rcx
-	jnz	near .column_ld1
+        sub     rcx, byte SIZEOF_XMMWORD
+        add     rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     rdi, byte SIZEOF_XMMWORD                ; outptr0
+        add     rbx, byte SIZEOF_XMMWORD                ; outptr1
+        add     rdx, byte SIZEOF_XMMWORD                ; outptr2
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    rcx,rcx
+        jnz     near .column_ld1
 
-	pop	rcx			; col
-	pop	rsi
-	pop	rdi
-	pop	rbx
-	pop	rdx
+        pop     rcx                     ; col
+        pop     rsi
+        pop     rdi
+        pop     rbx
+        pop     rdx
 
-	add	rsi, byte SIZEOF_JSAMPROW	; input_buf
-	add	rdi, byte SIZEOF_JSAMPROW
-	add	rbx, byte SIZEOF_JSAMPROW
-	add	rdx, byte SIZEOF_JSAMPROW
-	dec	rax				; num_rows
-	jg	near .rowloop
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     rdi, byte SIZEOF_JSAMPROW
+        add     rbx, byte SIZEOF_JSAMPROW
+        add     rdx, byte SIZEOF_JSAMPROW
+        dec     rax                             ; num_rows
+        jg      near .rowloop
 
 .return:
-	pop	rbx
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcclrss2.asm b/simd/jcclrss2.asm
index 517b705..220d016 100644
--- a/simd/jcclrss2.asm
+++ b/simd/jcclrss2.asm
@@ -25,479 +25,479 @@
 ;                             JDIMENSION output_row, int num_rows);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION img_width
-%define input_buf(b)	(b)+12		; JSAMPARRAY input_buf
-%define output_buf(b)	(b)+16		; JSAMPIMAGE output_buf
-%define output_row(b)	(b)+20		; JDIMENSION output_row
-%define num_rows(b)	(b)+24		; int num_rows
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		8
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          8
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
+        align   16
 
-	global	EXTN(jsimd_rgb_ycc_convert_sse2)
+        global  EXTN(jsimd_rgb_ycc_convert_sse2)
 
 EXTN(jsimd_rgb_ycc_convert_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [img_width(eax)]
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [img_width(eax)]
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	esi, JSAMPIMAGE [output_buf(eax)]
-	mov	ecx, JDIMENSION [output_row(eax)]
-	mov	edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
-	lea	edi, [edi+ecx*SIZEOF_JSAMPROW]
-	lea	ebx, [ebx+ecx*SIZEOF_JSAMPROW]
-	lea	edx, [edx+ecx*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
 
-	pop	ecx
+        pop     ecx
 
-	mov	esi, JSAMPARRAY [input_buf(eax)]
-	mov	eax, INT [num_rows(eax)]
-	test	eax,eax
-	jle	near .return
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
 .rowloop:
-	pushpic	eax
-	push	edx
-	push	ebx
-	push	edi
-	push	esi
-	push	ecx			; col
+        pushpic eax
+        push    edx
+        push    ebx
+        push    edi
+        push    esi
+        push    ecx                     ; col
 
-	mov	esi, JSAMPROW [esi]	; inptr
-	mov	edi, JSAMPROW [edi]	; outptr0
-	mov	ebx, JSAMPROW [ebx]	; outptr1
-	mov	edx, JSAMPROW [edx]	; outptr2
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        mov     ebx, JSAMPROW [ebx]     ; outptr1
+        mov     edx, JSAMPROW [edx]     ; outptr2
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	alignx	16,7
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        alignx  16,7
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
 .column_ld1:
-	push	eax
-	push	edx
-	lea	ecx,[ecx+ecx*2]		; imul ecx,RGB_PIXELSIZE
-	test	cl, SIZEOF_BYTE
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_BYTE
-	movzx	eax, BYTE [esi+ecx]
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        movzx   eax, BYTE [esi+ecx]
 .column_ld2:
-	test	cl, SIZEOF_WORD
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_WORD
-	movzx	edx, WORD [esi+ecx]
-	shl	eax, WORD_BIT
-	or	eax,edx
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        movzx   edx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
 .column_ld4:
-	movd	xmmA,eax
-	pop	edx
-	pop	eax
-	test	cl, SIZEOF_DWORD
-	jz	short .column_ld8
-	sub	ecx, byte SIZEOF_DWORD
-	movd	xmmF, XMM_DWORD [esi+ecx]
-	pslldq	xmmA, SIZEOF_DWORD
-	por	xmmA,xmmF
+        movd    xmmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
 .column_ld8:
-	test	cl, SIZEOF_MMWORD
-	jz	short .column_ld16
-	sub	ecx, byte SIZEOF_MMWORD
-	movq	xmmB, XMM_MMWORD [esi+ecx]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmB
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     ecx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
 .column_ld16:
-	test	cl, SIZEOF_XMMWORD
-	jz	short .column_ld32
-	movdqa	xmmF,xmmA
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	mov	ecx, SIZEOF_XMMWORD
-	jmp	short .rgb_ycc_cnv
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .rgb_ycc_cnv
 .column_ld32:
-	test	cl, 2*SIZEOF_XMMWORD
-	mov	ecx, SIZEOF_XMMWORD
-	jz	short .rgb_ycc_cnv
-	movdqa	xmmB,xmmA
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_ycc_cnv
-	alignx	16,7
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	movdqu	xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
 
 .rgb_ycc_cnv:
-	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	movdqa    xmmG,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
-	psrldq    xmmG,8	; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmF	; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
-	pslldq    xmmF,8	; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
 
-	punpcklbw xmmG,xmmB	; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
-	punpckhbw xmmF,xmmB	; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
 
-	movdqa    xmmD,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
-	psrldq    xmmD,8	; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmG	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
-	pslldq    xmmG,8	; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
 
-	punpcklbw xmmD,xmmF	; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
-	punpckhbw xmmG,xmmF	; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
 
-	movdqa    xmmE,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
-	psrldq    xmmE,8	; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	pslldq    xmmD,8	; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
 
-	punpcklbw xmmE,xmmG	; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmG	; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
 
-	pxor      xmmH,xmmH
+        pxor      xmmH,xmmH
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmH	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmH	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmB,xmmE
-	punpcklbw xmmE,xmmH	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmB,xmmH	; xmmB=(01 03 05 07 09 0B 0D 0F)
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
 
-	movdqa    xmmF,xmmD
-	punpcklbw xmmD,xmmH	; xmmD=(11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmF,xmmH	; xmmF=(21 23 25 27 29 2B 2D 2F)
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 .column_ld1:
-	test	cl, SIZEOF_XMMWORD/16
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_XMMWORD/16
-	movd	xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld2:
-	test	cl, SIZEOF_XMMWORD/8
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_XMMWORD/8
-	movq	xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmE
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
 .column_ld4:
-	test	cl, SIZEOF_XMMWORD/4
-	jz	short .column_ld8
-	sub	ecx, byte SIZEOF_XMMWORD/4
-	movdqa	xmmE,xmmA
-	movdqu	xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld8:
-	test	cl, SIZEOF_XMMWORD/2
-	mov	ecx, SIZEOF_XMMWORD
-	jz	short .rgb_ycc_cnv
-	movdqa	xmmF,xmmA
-	movdqa	xmmH,xmmE
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_ycc_cnv
-	alignx	16,7
+        test    cl, SIZEOF_XMMWORD/2
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
-	movdqu	xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
 
 .rgb_ycc_cnv:
-	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpcklbw xmmA,xmmE	; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
-	punpckhbw xmmD,xmmE	; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
 
-	movdqa    xmmC,xmmF
-	punpcklbw xmmF,xmmH	; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
-	punpckhbw xmmC,xmmH	; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
 
-	movdqa    xmmB,xmmA
-	punpcklwd xmmA,xmmF	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
-	punpckhwd xmmB,xmmF	; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
 
-	movdqa    xmmG,xmmD
-	punpcklwd xmmD,xmmC	; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
-	punpckhwd xmmG,xmmC	; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
 
-	movdqa    xmmE,xmmA
-	punpcklbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	punpckhbw xmmE,xmmD	; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
 
-	movdqa    xmmH,xmmB
-	punpcklbw xmmB,xmmG	; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmH,xmmG	; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
 
-	pxor      xmmF,xmmF
+        pxor      xmmF,xmmF
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmF	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmF	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmD,xmmB
-	punpcklbw xmmB,xmmF	; xmmB=(01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmF	; xmmD=(11 13 15 17 19 1B 1D 1F)
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
 
-	movdqa    xmmG,xmmE
-	punpcklbw xmmE,xmmF	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmG,xmmF	; xmmG=(30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
 
-	punpcklbw xmmF,xmmH
-	punpckhbw xmmH,xmmH
-	psrlw     xmmF,BYTE_BIT	; xmmF=(21 23 25 27 29 2B 2D 2F)
-	psrlw     xmmH,BYTE_BIT	; xmmH=(31 33 35 37 39 3B 3D 3F)
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
-	; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
 
-	; (Original)
-	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
-	; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
-	; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
-	;
-	; (This implementation)
-	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
-	; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
-	; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
 
-	movdqa    XMMWORD [wk(0)], xmm0	; wk(0)=RE
-	movdqa    XMMWORD [wk(1)], xmm1	; wk(1)=RO
-	movdqa    XMMWORD [wk(2)], xmm4	; wk(2)=BE
-	movdqa    XMMWORD [wk(3)], xmm5	; wk(3)=BO
+        movdqa    XMMWORD [wk(0)], xmm0 ; wk(0)=RE
+        movdqa    XMMWORD [wk(1)], xmm1 ; wk(1)=RO
+        movdqa    XMMWORD [wk(2)], xmm4 ; wk(2)=BE
+        movdqa    XMMWORD [wk(3)], xmm5 ; wk(3)=BO
 
-	movdqa    xmm6,xmm1
-	punpcklwd xmm1,xmm3
-	punpckhwd xmm6,xmm3
-	movdqa    xmm7,xmm1
-	movdqa    xmm4,xmm6
-	pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
-	pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
-	pmaddwd   xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
-	pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+        movdqa    xmm6,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm6,xmm3
+        movdqa    xmm7,xmm1
+        movdqa    xmm4,xmm6
+        pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        pmaddwd   xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
 
-	movdqa    XMMWORD [wk(4)], xmm1	; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
-	movdqa    XMMWORD [wk(5)], xmm6	; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movdqa    XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
+        movdqa    XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	pxor      xmm1,xmm1
-	pxor      xmm6,xmm6
-	punpcklwd xmm1,xmm5		; xmm1=BOL
-	punpckhwd xmm6,xmm5		; xmm6=BOH
-	psrld     xmm1,1		; xmm1=BOL*FIX(0.500)
-	psrld     xmm6,1		; xmm6=BOH*FIX(0.500)
+        pxor      xmm1,xmm1
+        pxor      xmm6,xmm6
+        punpcklwd xmm1,xmm5             ; xmm1=BOL
+        punpckhwd xmm6,xmm5             ; xmm6=BOH
+        psrld     xmm1,1                ; xmm1=BOL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BOH*FIX(0.500)
 
-	movdqa    xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ]
+        movdqa    xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm7,xmm1
-	paddd     xmm4,xmm6
-	paddd     xmm7,xmm5
-	paddd     xmm4,xmm5
-	psrld     xmm7,SCALEBITS	; xmm7=CbOL
-	psrld     xmm4,SCALEBITS	; xmm4=CbOH
-	packssdw  xmm7,xmm4		; xmm7=CbO
+        paddd     xmm7,xmm1
+        paddd     xmm4,xmm6
+        paddd     xmm7,xmm5
+        paddd     xmm4,xmm5
+        psrld     xmm7,SCALEBITS        ; xmm7=CbOL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbOH
+        packssdw  xmm7,xmm4             ; xmm7=CbO
 
-	movdqa    xmm1, XMMWORD [wk(2)]	; xmm1=BE
+        movdqa    xmm1, XMMWORD [wk(2)] ; xmm1=BE
 
-	movdqa    xmm6,xmm0
-	punpcklwd xmm0,xmm2
-	punpckhwd xmm6,xmm2
-	movdqa    xmm5,xmm0
-	movdqa    xmm4,xmm6
-	pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
-	pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
-	pmaddwd   xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
-	pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+        movdqa    xmm6,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm6,xmm2
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm6
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
 
-	movdqa    XMMWORD [wk(6)], xmm0	; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
-	movdqa    XMMWORD [wk(7)], xmm6	; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
+        movdqa    XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movdqa    XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	pxor      xmm0,xmm0
-	pxor      xmm6,xmm6
-	punpcklwd xmm0,xmm1		; xmm0=BEL
-	punpckhwd xmm6,xmm1		; xmm6=BEH
-	psrld     xmm0,1		; xmm0=BEL*FIX(0.500)
-	psrld     xmm6,1		; xmm6=BEH*FIX(0.500)
+        pxor      xmm0,xmm0
+        pxor      xmm6,xmm6
+        punpcklwd xmm0,xmm1             ; xmm0=BEL
+        punpckhwd xmm6,xmm1             ; xmm6=BEH
+        psrld     xmm0,1                ; xmm0=BEL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BEH*FIX(0.500)
 
-	movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
+        movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm5,xmm0
-	paddd     xmm4,xmm6
-	paddd     xmm5,xmm1
-	paddd     xmm4,xmm1
-	psrld     xmm5,SCALEBITS	; xmm5=CbEL
-	psrld     xmm4,SCALEBITS	; xmm4=CbEH
-	packssdw  xmm5,xmm4		; xmm5=CbE
+        paddd     xmm5,xmm0
+        paddd     xmm4,xmm6
+        paddd     xmm5,xmm1
+        paddd     xmm4,xmm1
+        psrld     xmm5,SCALEBITS        ; xmm5=CbEL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbEH
+        packssdw  xmm5,xmm4             ; xmm5=CbE
 
-	psllw     xmm7,BYTE_BIT
-	por       xmm5,xmm7		; xmm5=Cb
-	movdqa    XMMWORD [ebx], xmm5	; Save Cb
+        psllw     xmm7,BYTE_BIT
+        por       xmm5,xmm7             ; xmm5=Cb
+        movdqa    XMMWORD [ebx], xmm5   ; Save Cb
 
-	movdqa    xmm0, XMMWORD [wk(3)]	; xmm0=BO
-	movdqa    xmm6, XMMWORD [wk(2)]	; xmm6=BE
-	movdqa    xmm1, XMMWORD [wk(1)]	; xmm1=RO
+        movdqa    xmm0, XMMWORD [wk(3)] ; xmm0=BO
+        movdqa    xmm6, XMMWORD [wk(2)] ; xmm6=BE
+        movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=RO
 
-	movdqa    xmm4,xmm0
-	punpcklwd xmm0,xmm3
-	punpckhwd xmm4,xmm3
-	movdqa    xmm7,xmm0
-	movdqa    xmm5,xmm4
-	pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
-	pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
-	pmaddwd   xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
-	pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+        movdqa    xmm4,xmm0
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm4,xmm3
+        movdqa    xmm7,xmm0
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        pmaddwd   xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
 
-	movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)]	; xmm3=[PD_ONEHALF]
+        movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
 
-	paddd     xmm0, XMMWORD [wk(4)]
-	paddd     xmm4, XMMWORD [wk(5)]
-	paddd     xmm0,xmm3
-	paddd     xmm4,xmm3
-	psrld     xmm0,SCALEBITS	; xmm0=YOL
-	psrld     xmm4,SCALEBITS	; xmm4=YOH
-	packssdw  xmm0,xmm4		; xmm0=YO
+        paddd     xmm0, XMMWORD [wk(4)]
+        paddd     xmm4, XMMWORD [wk(5)]
+        paddd     xmm0,xmm3
+        paddd     xmm4,xmm3
+        psrld     xmm0,SCALEBITS        ; xmm0=YOL
+        psrld     xmm4,SCALEBITS        ; xmm4=YOH
+        packssdw  xmm0,xmm4             ; xmm0=YO
 
-	pxor      xmm3,xmm3
-	pxor      xmm4,xmm4
-	punpcklwd xmm3,xmm1		; xmm3=ROL
-	punpckhwd xmm4,xmm1		; xmm4=ROH
-	psrld     xmm3,1		; xmm3=ROL*FIX(0.500)
-	psrld     xmm4,1		; xmm4=ROH*FIX(0.500)
+        pxor      xmm3,xmm3
+        pxor      xmm4,xmm4
+        punpcklwd xmm3,xmm1             ; xmm3=ROL
+        punpckhwd xmm4,xmm1             ; xmm4=ROH
+        psrld     xmm3,1                ; xmm3=ROL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=ROH*FIX(0.500)
 
-	movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
+        movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm7,xmm3
-	paddd     xmm5,xmm4
-	paddd     xmm7,xmm1
-	paddd     xmm5,xmm1
-	psrld     xmm7,SCALEBITS	; xmm7=CrOL
-	psrld     xmm5,SCALEBITS	; xmm5=CrOH
-	packssdw  xmm7,xmm5		; xmm7=CrO
+        paddd     xmm7,xmm3
+        paddd     xmm5,xmm4
+        paddd     xmm7,xmm1
+        paddd     xmm5,xmm1
+        psrld     xmm7,SCALEBITS        ; xmm7=CrOL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrOH
+        packssdw  xmm7,xmm5             ; xmm7=CrO
 
-	movdqa    xmm3, XMMWORD [wk(0)]	; xmm3=RE
+        movdqa    xmm3, XMMWORD [wk(0)] ; xmm3=RE
 
-	movdqa    xmm4,xmm6
-	punpcklwd xmm6,xmm2
-	punpckhwd xmm4,xmm2
-	movdqa    xmm1,xmm6
-	movdqa    xmm5,xmm4
-	pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
-	pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
-	pmaddwd   xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
-	pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+        movdqa    xmm4,xmm6
+        punpcklwd xmm6,xmm2
+        punpckhwd xmm4,xmm2
+        movdqa    xmm1,xmm6
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        pmaddwd   xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
 
-	movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)]	; xmm2=[PD_ONEHALF]
+        movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
 
-	paddd     xmm6, XMMWORD [wk(6)]
-	paddd     xmm4, XMMWORD [wk(7)]
-	paddd     xmm6,xmm2
-	paddd     xmm4,xmm2
-	psrld     xmm6,SCALEBITS	; xmm6=YEL
-	psrld     xmm4,SCALEBITS	; xmm4=YEH
-	packssdw  xmm6,xmm4		; xmm6=YE
+        paddd     xmm6, XMMWORD [wk(6)]
+        paddd     xmm4, XMMWORD [wk(7)]
+        paddd     xmm6,xmm2
+        paddd     xmm4,xmm2
+        psrld     xmm6,SCALEBITS        ; xmm6=YEL
+        psrld     xmm4,SCALEBITS        ; xmm4=YEH
+        packssdw  xmm6,xmm4             ; xmm6=YE
 
-	psllw     xmm0,BYTE_BIT
-	por       xmm6,xmm0		; xmm6=Y
-	movdqa    XMMWORD [edi], xmm6	; Save Y
+        psllw     xmm0,BYTE_BIT
+        por       xmm6,xmm0             ; xmm6=Y
+        movdqa    XMMWORD [edi], xmm6   ; Save Y
 
-	pxor      xmm2,xmm2
-	pxor      xmm4,xmm4
-	punpcklwd xmm2,xmm3		; xmm2=REL
-	punpckhwd xmm4,xmm3		; xmm4=REH
-	psrld     xmm2,1		; xmm2=REL*FIX(0.500)
-	psrld     xmm4,1		; xmm4=REH*FIX(0.500)
+        pxor      xmm2,xmm2
+        pxor      xmm4,xmm4
+        punpcklwd xmm2,xmm3             ; xmm2=REL
+        punpckhwd xmm4,xmm3             ; xmm4=REH
+        psrld     xmm2,1                ; xmm2=REL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=REH*FIX(0.500)
 
-	movdqa    xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ]
+        movdqa    xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ]
 
-	paddd     xmm1,xmm2
-	paddd     xmm5,xmm4
-	paddd     xmm1,xmm0
-	paddd     xmm5,xmm0
-	psrld     xmm1,SCALEBITS	; xmm1=CrEL
-	psrld     xmm5,SCALEBITS	; xmm5=CrEH
-	packssdw  xmm1,xmm5		; xmm1=CrE
+        paddd     xmm1,xmm2
+        paddd     xmm5,xmm4
+        paddd     xmm1,xmm0
+        paddd     xmm5,xmm0
+        psrld     xmm1,SCALEBITS        ; xmm1=CrEL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrEH
+        packssdw  xmm1,xmm5             ; xmm1=CrE
 
-	psllw     xmm7,BYTE_BIT
-	por       xmm1,xmm7		; xmm1=Cr
-	movdqa    XMMWORD [edx], xmm1	; Save Cr
+        psllw     xmm7,BYTE_BIT
+        por       xmm1,xmm7             ; xmm1=Cr
+        movdqa    XMMWORD [edx], xmm1   ; Save Cr
 
-	sub	ecx, byte SIZEOF_XMMWORD
-	add	esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; inptr
-	add	edi, byte SIZEOF_XMMWORD		; outptr0
-	add	ebx, byte SIZEOF_XMMWORD		; outptr1
-	add	edx, byte SIZEOF_XMMWORD		; outptr2
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	test	ecx,ecx
-	jnz	near .column_ld1
+        sub     ecx, byte SIZEOF_XMMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     edi, byte SIZEOF_XMMWORD                ; outptr0
+        add     ebx, byte SIZEOF_XMMWORD                ; outptr1
+        add     edx, byte SIZEOF_XMMWORD                ; outptr2
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
 
-	pop	ecx			; col
-	pop	esi
-	pop	edi
-	pop	ebx
-	pop	edx
-	poppic	eax
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        pop     ebx
+        pop     edx
+        poppic  eax
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_buf
-	add	edi, byte SIZEOF_JSAMPROW
-	add	ebx, byte SIZEOF_JSAMPROW
-	add	edx, byte SIZEOF_JSAMPROW
-	dec	eax				; num_rows
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jccolmmx.asm b/simd/jccolmmx.asm
index 9650e47..87058ee 100644
--- a/simd/jccolmmx.asm
+++ b/simd/jccolmmx.asm
@@ -21,38 +21,38 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_081	equ	 5329			; FIX(0.08131)
-F_0_114	equ	 7471			; FIX(0.11400)
-F_0_168	equ	11059			; FIX(0.16874)
-F_0_250	equ	16384			; FIX(0.25000)
-F_0_299	equ	19595			; FIX(0.29900)
-F_0_331	equ	21709			; FIX(0.33126)
-F_0_418	equ	27439			; FIX(0.41869)
-F_0_587	equ	38470			; FIX(0.58700)
-F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+F_0_081 equ      5329                   ; FIX(0.08131)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_168 equ     11059                   ; FIX(0.16874)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_331 equ     21709                   ; FIX(0.33126)
+F_0_418 equ     27439                   ; FIX(0.41869)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_rgb_ycc_convert_mmx)
+        alignz  16
+        global  EXTN(jconst_rgb_ycc_convert_mmx)
 
 EXTN(jconst_rgb_ycc_convert_mmx):
 
-PW_F0299_F0337	times 2 dw  F_0_299, F_0_337
-PW_F0114_F0250	times 2 dw  F_0_114, F_0_250
-PW_MF016_MF033	times 2 dw -F_0_168,-F_0_331
-PW_MF008_MF041	times 2 dw -F_0_081,-F_0_418
-PD_ONEHALFM1_CJ	times 2 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
-PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
+PW_F0299_F0337  times 2 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 2 dw  F_0_114, F_0_250
+PW_MF016_MF033  times 2 dw -F_0_168,-F_0_331
+PW_MF008_MF041  times 2 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ times 2 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF      times 2 dd  (1 << (SCALEBITS-1))
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jcclrmmx.asm"
 
diff --git a/simd/jccolss2-64.asm b/simd/jccolss2-64.asm
index ae60148..5b1ee78 100644
--- a/simd/jccolss2-64.asm
+++ b/simd/jccolss2-64.asm
@@ -18,38 +18,38 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_081	equ	 5329			; FIX(0.08131)
-F_0_114	equ	 7471			; FIX(0.11400)
-F_0_168	equ	11059			; FIX(0.16874)
-F_0_250	equ	16384			; FIX(0.25000)
-F_0_299	equ	19595			; FIX(0.29900)
-F_0_331	equ	21709			; FIX(0.33126)
-F_0_418	equ	27439			; FIX(0.41869)
-F_0_587	equ	38470			; FIX(0.58700)
-F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+F_0_081 equ      5329                   ; FIX(0.08131)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_168 equ     11059                   ; FIX(0.16874)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_331 equ     21709                   ; FIX(0.33126)
+F_0_418 equ     27439                   ; FIX(0.41869)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_rgb_ycc_convert_sse2)
+        alignz  16
+        global  EXTN(jconst_rgb_ycc_convert_sse2)
 
 EXTN(jconst_rgb_ycc_convert_sse2):
 
-PW_F0299_F0337	times 4 dw  F_0_299, F_0_337
-PW_F0114_F0250	times 4 dw  F_0_114, F_0_250
-PW_MF016_MF033	times 4 dw -F_0_168,-F_0_331
-PW_MF008_MF041	times 4 dw -F_0_081,-F_0_418
-PD_ONEHALFM1_CJ	times 4 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
-PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PW_MF016_MF033  times 4 dw -F_0_168,-F_0_331
+PW_MF008_MF041  times 4 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 
 %include "jcclrss2-64.asm"
 
diff --git a/simd/jccolss2.asm b/simd/jccolss2.asm
index ac001d1..2b8faef 100644
--- a/simd/jccolss2.asm
+++ b/simd/jccolss2.asm
@@ -18,38 +18,38 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_081	equ	 5329			; FIX(0.08131)
-F_0_114	equ	 7471			; FIX(0.11400)
-F_0_168	equ	11059			; FIX(0.16874)
-F_0_250	equ	16384			; FIX(0.25000)
-F_0_299	equ	19595			; FIX(0.29900)
-F_0_331	equ	21709			; FIX(0.33126)
-F_0_418	equ	27439			; FIX(0.41869)
-F_0_587	equ	38470			; FIX(0.58700)
-F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+F_0_081 equ      5329                   ; FIX(0.08131)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_168 equ     11059                   ; FIX(0.16874)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_331 equ     21709                   ; FIX(0.33126)
+F_0_418 equ     27439                   ; FIX(0.41869)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_rgb_ycc_convert_sse2)
+        alignz  16
+        global  EXTN(jconst_rgb_ycc_convert_sse2)
 
 EXTN(jconst_rgb_ycc_convert_sse2):
 
-PW_F0299_F0337	times 4 dw  F_0_299, F_0_337
-PW_F0114_F0250	times 4 dw  F_0_114, F_0_250
-PW_MF016_MF033	times 4 dw -F_0_168,-F_0_331
-PW_MF008_MF041	times 4 dw -F_0_081,-F_0_418
-PD_ONEHALFM1_CJ	times 4 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
-PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PW_MF016_MF033  times 4 dw -F_0_168,-F_0_331
+PW_MF008_MF041  times 4 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jcclrss2.asm"
 
diff --git a/simd/jcgrammx.asm b/simd/jcgrammx.asm
index b8b8dd3..43ffd0f 100644
--- a/simd/jcgrammx.asm
+++ b/simd/jcgrammx.asm
@@ -21,31 +21,31 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_114	equ	 7471			; FIX(0.11400)
-F_0_250	equ	16384			; FIX(0.25000)
-F_0_299	equ	19595			; FIX(0.29900)
-F_0_587	equ	38470			; FIX(0.58700)
-F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_rgb_gray_convert_mmx)
+        alignz  16
+        global  EXTN(jconst_rgb_gray_convert_mmx)
 
 EXTN(jconst_rgb_gray_convert_mmx):
 
-PW_F0299_F0337	times 2 dw  F_0_299, F_0_337
-PW_F0114_F0250	times 2 dw  F_0_114, F_0_250
-PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
+PW_F0299_F0337  times 2 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 2 dw  F_0_114, F_0_250
+PD_ONEHALF      times 2 dd  (1 << (SCALEBITS-1))
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jcgrymmx.asm"
 
diff --git a/simd/jcgrass2-64.asm b/simd/jcgrass2-64.asm
index ba28cc3..39236ff 100644
--- a/simd/jcgrass2-64.asm
+++ b/simd/jcgrass2-64.asm
@@ -18,31 +18,31 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_114	equ	 7471			; FIX(0.11400)
-F_0_250	equ	16384			; FIX(0.25000)
-F_0_299	equ	19595			; FIX(0.29900)
-F_0_587	equ	38470			; FIX(0.58700)
-F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_rgb_gray_convert_sse2)
+        alignz  16
+        global  EXTN(jconst_rgb_gray_convert_sse2)
 
 EXTN(jconst_rgb_gray_convert_sse2):
 
-PW_F0299_F0337	times 4 dw  F_0_299, F_0_337
-PW_F0114_F0250	times 4 dw  F_0_114, F_0_250
-PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 
 %include "jcgryss2-64.asm"
 
diff --git a/simd/jcgrass2.asm b/simd/jcgrass2.asm
index 998968e..f5bd93d 100644
--- a/simd/jcgrass2.asm
+++ b/simd/jcgrass2.asm
@@ -18,31 +18,31 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_114	equ	 7471			; FIX(0.11400)
-F_0_250	equ	16384			; FIX(0.25000)
-F_0_299	equ	19595			; FIX(0.29900)
-F_0_587	equ	38470			; FIX(0.58700)
-F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_rgb_gray_convert_sse2)
+        alignz  16
+        global  EXTN(jconst_rgb_gray_convert_sse2)
 
 EXTN(jconst_rgb_gray_convert_sse2):
 
-PW_F0299_F0337	times 4 dw  F_0_299, F_0_337
-PW_F0114_F0250	times 4 dw  F_0_114, F_0_250
-PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jcgryss2.asm"
 
diff --git a/simd/jcgrymmx.asm b/simd/jcgrymmx.asm
index bbeea09..cbe5622 100644
--- a/simd/jcgrymmx.asm
+++ b/simd/jcgrymmx.asm
@@ -29,329 +29,329 @@
 ;                             JDIMENSION output_row, int num_rows);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION img_width
-%define input_buf(b)	(b)+12		; JSAMPARRAY input_buf
-%define output_buf(b)	(b)+16		; JSAMPIMAGE output_buf
-%define output_row(b)	(b)+20		; JDIMENSION output_row
-%define num_rows(b)	(b)+24		; int num_rows
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_rgb_gray_convert_mmx)
+        align   16
+        global  EXTN(jsimd_rgb_gray_convert_mmx)
 
 EXTN(jsimd_rgb_gray_convert_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [img_width(eax)]	; num_cols
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [img_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	esi, JSAMPIMAGE [output_buf(eax)]
-	mov	ecx, JDIMENSION [output_row(eax)]
-	mov	edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
-	lea	edi, [edi+ecx*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
 
-	pop	ecx
+        pop     ecx
 
-	mov	esi, JSAMPARRAY [input_buf(eax)]
-	mov	eax, INT [num_rows(eax)]
-	test	eax,eax
-	jle	near .return
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
 .rowloop:
-	pushpic	eax
-	push	edi
-	push	esi
-	push	ecx			; col
+        pushpic eax
+        push    edi
+        push    esi
+        push    ecx                     ; col
 
-	mov	esi, JSAMPROW [esi]	; inptr
-	mov	edi, JSAMPROW [edi]	; outptr0
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
 
-	cmp	ecx, byte SIZEOF_MMWORD
-	jae	short .columnloop
-	alignx	16,7
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     short .columnloop
+        alignx  16,7
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
 .column_ld1:
-	push	eax
-	push	edx
-	lea	ecx,[ecx+ecx*2]		; imul ecx,RGB_PIXELSIZE
-	test	cl, SIZEOF_BYTE
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_BYTE
-	xor	eax,eax
-	mov	al, BYTE [esi+ecx]
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        xor     eax,eax
+        mov     al, BYTE [esi+ecx]
 .column_ld2:
-	test	cl, SIZEOF_WORD
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_WORD
-	xor	edx,edx
-	mov	dx, WORD [esi+ecx]
-	shl	eax, WORD_BIT
-	or	eax,edx
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        xor     edx,edx
+        mov     dx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
 .column_ld4:
-	movd	mmA,eax
-	pop	edx
-	pop	eax
-	test	cl, SIZEOF_DWORD
-	jz	short .column_ld8
-	sub	ecx, byte SIZEOF_DWORD
-	movd	mmG, DWORD [esi+ecx]
-	psllq	mmA, DWORD_BIT
-	por	mmA,mmG
+        movd    mmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    mmG, DWORD [esi+ecx]
+        psllq   mmA, DWORD_BIT
+        por     mmA,mmG
 .column_ld8:
-	test	cl, SIZEOF_MMWORD
-	jz	short .column_ld16
-	movq	mmG,mmA
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	mov	ecx, SIZEOF_MMWORD
-	jmp	short .rgb_gray_cnv
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        movq    mmG,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        mov     ecx, SIZEOF_MMWORD
+        jmp     short .rgb_gray_cnv
 .column_ld16:
-	test	cl, 2*SIZEOF_MMWORD
-	mov	ecx, SIZEOF_MMWORD
-	jz	short .rgb_gray_cnv
-	movq	mmF,mmA
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmG, MMWORD [esi+1*SIZEOF_MMWORD]
-	jmp	short .rgb_gray_cnv
-	alignx	16,7
+        test    cl, 2*SIZEOF_MMWORD
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_gray_cnv
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
 
 .columnloop:
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmG, MMWORD [esi+1*SIZEOF_MMWORD]
-	movq	mmF, MMWORD [esi+2*SIZEOF_MMWORD]
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+2*SIZEOF_MMWORD]
 
 .rgb_gray_cnv:
-	; mmA=(00 10 20 01 11 21 02 12)
-	; mmG=(22 03 13 23 04 14 24 05)
-	; mmF=(15 25 06 16 26 07 17 27)
+        ; mmA=(00 10 20 01 11 21 02 12)
+        ; mmG=(22 03 13 23 04 14 24 05)
+        ; mmF=(15 25 06 16 26 07 17 27)
 
-	movq      mmD,mmA
-	psllq     mmA,4*BYTE_BIT	; mmA=(-- -- -- -- 00 10 20 01)
-	psrlq     mmD,4*BYTE_BIT	; mmD=(11 21 02 12 -- -- -- --)
+        movq      mmD,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 10 20 01)
+        psrlq     mmD,4*BYTE_BIT        ; mmD=(11 21 02 12 -- -- -- --)
 
-	punpckhbw mmA,mmG		; mmA=(00 04 10 14 20 24 01 05)
-	psllq     mmG,4*BYTE_BIT	; mmG=(-- -- -- -- 22 03 13 23)
+        punpckhbw mmA,mmG               ; mmA=(00 04 10 14 20 24 01 05)
+        psllq     mmG,4*BYTE_BIT        ; mmG=(-- -- -- -- 22 03 13 23)
 
-	punpcklbw mmD,mmF		; mmD=(11 15 21 25 02 06 12 16)
-	punpckhbw mmG,mmF		; mmG=(22 26 03 07 13 17 23 27)
+        punpcklbw mmD,mmF               ; mmD=(11 15 21 25 02 06 12 16)
+        punpckhbw mmG,mmF               ; mmG=(22 26 03 07 13 17 23 27)
 
-	movq      mmE,mmA
-	psllq     mmA,4*BYTE_BIT	; mmA=(-- -- -- -- 00 04 10 14)
-	psrlq     mmE,4*BYTE_BIT	; mmE=(20 24 01 05 -- -- -- --)
+        movq      mmE,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 04 10 14)
+        psrlq     mmE,4*BYTE_BIT        ; mmE=(20 24 01 05 -- -- -- --)
 
-	punpckhbw mmA,mmD		; mmA=(00 02 04 06 10 12 14 16)
-	psllq     mmD,4*BYTE_BIT	; mmD=(-- -- -- -- 11 15 21 25)
+        punpckhbw mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        psllq     mmD,4*BYTE_BIT        ; mmD=(-- -- -- -- 11 15 21 25)
 
-	punpcklbw mmE,mmG		; mmE=(20 22 24 26 01 03 05 07)
-	punpckhbw mmD,mmG		; mmD=(11 13 15 17 21 23 25 27)
+        punpcklbw mmE,mmG               ; mmE=(20 22 24 26 01 03 05 07)
+        punpckhbw mmD,mmG               ; mmD=(11 13 15 17 21 23 25 27)
 
-	pxor      mmH,mmH
+        pxor      mmH,mmH
 
-	movq      mmC,mmA
-	punpcklbw mmA,mmH		; mmA=(00 02 04 06)
-	punpckhbw mmC,mmH		; mmC=(10 12 14 16)
+        movq      mmC,mmA
+        punpcklbw mmA,mmH               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmH               ; mmC=(10 12 14 16)
 
-	movq      mmB,mmE
-	punpcklbw mmE,mmH		; mmE=(20 22 24 26)
-	punpckhbw mmB,mmH		; mmB=(01 03 05 07)
+        movq      mmB,mmE
+        punpcklbw mmE,mmH               ; mmE=(20 22 24 26)
+        punpckhbw mmB,mmH               ; mmB=(01 03 05 07)
 
-	movq      mmF,mmD
-	punpcklbw mmD,mmH		; mmD=(11 13 15 17)
-	punpckhbw mmF,mmH		; mmF=(21 23 25 27)
+        movq      mmF,mmD
+        punpcklbw mmD,mmH               ; mmD=(11 13 15 17)
+        punpckhbw mmF,mmH               ; mmF=(21 23 25 27)
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 .column_ld1:
-	test	cl, SIZEOF_MMWORD/8
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_MMWORD/8
-	movd	mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_MMWORD/8
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_MMWORD/8
+        movd    mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld2:
-	test	cl, SIZEOF_MMWORD/4
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_MMWORD/4
-	movq	mmF,mmA
-	movq	mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_MMWORD/4
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_MMWORD/4
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld4:
-	test	cl, SIZEOF_MMWORD/2
-	mov	ecx, SIZEOF_MMWORD
-	jz	short .rgb_gray_cnv
-	movq	mmD,mmA
-	movq	mmC,mmF
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmF, MMWORD [esi+1*SIZEOF_MMWORD]
-	jmp	short .rgb_gray_cnv
-	alignx	16,7
+        test    cl, SIZEOF_MMWORD/2
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_gray_cnv
+        movq    mmD,mmA
+        movq    mmC,mmF
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
 
 .columnloop:
-	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mmF, MMWORD [esi+1*SIZEOF_MMWORD]
-	movq	mmD, MMWORD [esi+2*SIZEOF_MMWORD]
-	movq	mmC, MMWORD [esi+3*SIZEOF_MMWORD]
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmD, MMWORD [esi+2*SIZEOF_MMWORD]
+        movq    mmC, MMWORD [esi+3*SIZEOF_MMWORD]
 
 .rgb_gray_cnv:
-	; mmA=(00 10 20 30 01 11 21 31)
-	; mmF=(02 12 22 32 03 13 23 33)
-	; mmD=(04 14 24 34 05 15 25 35)
-	; mmC=(06 16 26 36 07 17 27 37)
+        ; mmA=(00 10 20 30 01 11 21 31)
+        ; mmF=(02 12 22 32 03 13 23 33)
+        ; mmD=(04 14 24 34 05 15 25 35)
+        ; mmC=(06 16 26 36 07 17 27 37)
 
-	movq      mmB,mmA
-	punpcklbw mmA,mmF		; mmA=(00 02 10 12 20 22 30 32)
-	punpckhbw mmB,mmF		; mmB=(01 03 11 13 21 23 31 33)
+        movq      mmB,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 10 12 20 22 30 32)
+        punpckhbw mmB,mmF               ; mmB=(01 03 11 13 21 23 31 33)
 
-	movq      mmG,mmD
-	punpcklbw mmD,mmC		; mmD=(04 06 14 16 24 26 34 36)
-	punpckhbw mmG,mmC		; mmG=(05 07 15 17 25 27 35 37)
+        movq      mmG,mmD
+        punpcklbw mmD,mmC               ; mmD=(04 06 14 16 24 26 34 36)
+        punpckhbw mmG,mmC               ; mmG=(05 07 15 17 25 27 35 37)
 
-	movq      mmE,mmA
-	punpcklwd mmA,mmD		; mmA=(00 02 04 06 10 12 14 16)
-	punpckhwd mmE,mmD		; mmE=(20 22 24 26 30 32 34 36)
+        movq      mmE,mmA
+        punpcklwd mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        punpckhwd mmE,mmD               ; mmE=(20 22 24 26 30 32 34 36)
 
-	movq      mmH,mmB
-	punpcklwd mmB,mmG		; mmB=(01 03 05 07 11 13 15 17)
-	punpckhwd mmH,mmG		; mmH=(21 23 25 27 31 33 35 37)
+        movq      mmH,mmB
+        punpcklwd mmB,mmG               ; mmB=(01 03 05 07 11 13 15 17)
+        punpckhwd mmH,mmG               ; mmH=(21 23 25 27 31 33 35 37)
 
-	pxor      mmF,mmF
+        pxor      mmF,mmF
 
-	movq      mmC,mmA
-	punpcklbw mmA,mmF		; mmA=(00 02 04 06)
-	punpckhbw mmC,mmF		; mmC=(10 12 14 16)
+        movq      mmC,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmF               ; mmC=(10 12 14 16)
 
-	movq      mmD,mmB
-	punpcklbw mmB,mmF		; mmB=(01 03 05 07)
-	punpckhbw mmD,mmF		; mmD=(11 13 15 17)
+        movq      mmD,mmB
+        punpcklbw mmB,mmF               ; mmB=(01 03 05 07)
+        punpckhbw mmD,mmF               ; mmD=(11 13 15 17)
 
-	movq      mmG,mmE
-	punpcklbw mmE,mmF		; mmE=(20 22 24 26)
-	punpckhbw mmG,mmF		; mmG=(30 32 34 36)
+        movq      mmG,mmE
+        punpcklbw mmE,mmF               ; mmE=(20 22 24 26)
+        punpckhbw mmG,mmF               ; mmG=(30 32 34 36)
 
-	punpcklbw mmF,mmH
-	punpckhbw mmH,mmH
-	psrlw     mmF,BYTE_BIT		; mmF=(21 23 25 27)
-	psrlw     mmH,BYTE_BIT		; mmH=(31 33 35 37)
+        punpcklbw mmF,mmH
+        punpckhbw mmH,mmH
+        psrlw     mmF,BYTE_BIT          ; mmF=(21 23 25 27)
+        psrlw     mmH,BYTE_BIT          ; mmH=(31 33 35 37)
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
-	; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
+        ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
+        ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
 
-	; (Original)
-	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
-	;
-	; (This implementation)
-	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
 
-	movq      mm6,mm1
-	punpcklwd mm1,mm3
-	punpckhwd mm6,mm3
-	pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
-	pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movq      mm6,mm1
+        punpcklwd mm1,mm3
+        punpckhwd mm6,mm3
+        pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	movq      mm7, mm6	; mm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movq      mm7, mm6      ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	movq      mm6,mm0
-	punpcklwd mm0,mm2
-	punpckhwd mm6,mm2
-	pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
-	pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        movq      mm6,mm0
+        punpcklwd mm0,mm2
+        punpckhwd mm6,mm2
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	movq      MMWORD [wk(0)], mm0	; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
-	movq      MMWORD [wk(1)], mm6	; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+        movq      MMWORD [wk(0)], mm0   ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movq      MMWORD [wk(1)], mm6   ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	movq      mm0, mm5	; mm0=BO
-	movq      mm6, mm4	; mm6=BE
+        movq      mm0, mm5      ; mm0=BO
+        movq      mm6, mm4      ; mm6=BE
 
-	movq      mm4,mm0
-	punpcklwd mm0,mm3
-	punpckhwd mm4,mm3
-	pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
-	pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        movq      mm4,mm0
+        punpcklwd mm0,mm3
+        punpckhwd mm4,mm3
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
 
-	movq      mm3,[GOTOFF(eax,PD_ONEHALF)]	; mm3=[PD_ONEHALF]
+        movq      mm3,[GOTOFF(eax,PD_ONEHALF)]  ; mm3=[PD_ONEHALF]
 
-	paddd     mm0, mm1
-	paddd     mm4, mm7
-	paddd     mm0,mm3
-	paddd     mm4,mm3
-	psrld     mm0,SCALEBITS		; mm0=YOL
-	psrld     mm4,SCALEBITS		; mm4=YOH
-	packssdw  mm0,mm4		; mm0=YO
+        paddd     mm0, mm1
+        paddd     mm4, mm7
+        paddd     mm0,mm3
+        paddd     mm4,mm3
+        psrld     mm0,SCALEBITS         ; mm0=YOL
+        psrld     mm4,SCALEBITS         ; mm4=YOH
+        packssdw  mm0,mm4               ; mm0=YO
 
-	movq      mm4,mm6
-	punpcklwd mm6,mm2
-	punpckhwd mm4,mm2
-	pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
-	pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        movq      mm4,mm6
+        punpcklwd mm6,mm2
+        punpckhwd mm4,mm2
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
 
-	movq      mm2,[GOTOFF(eax,PD_ONEHALF)]	; mm2=[PD_ONEHALF]
+        movq      mm2,[GOTOFF(eax,PD_ONEHALF)]  ; mm2=[PD_ONEHALF]
 
-	paddd     mm6, MMWORD [wk(0)]
-	paddd     mm4, MMWORD [wk(1)]
-	paddd     mm6,mm2
-	paddd     mm4,mm2
-	psrld     mm6,SCALEBITS		; mm6=YEL
-	psrld     mm4,SCALEBITS		; mm4=YEH
-	packssdw  mm6,mm4		; mm6=YE
+        paddd     mm6, MMWORD [wk(0)]
+        paddd     mm4, MMWORD [wk(1)]
+        paddd     mm6,mm2
+        paddd     mm4,mm2
+        psrld     mm6,SCALEBITS         ; mm6=YEL
+        psrld     mm4,SCALEBITS         ; mm4=YEH
+        packssdw  mm6,mm4               ; mm6=YE
 
-	psllw     mm0,BYTE_BIT
-	por       mm6,mm0		; mm6=Y
-	movq      MMWORD [edi], mm6	; Save Y
+        psllw     mm0,BYTE_BIT
+        por       mm6,mm0               ; mm6=Y
+        movq      MMWORD [edi], mm6     ; Save Y
 
-	sub	ecx, byte SIZEOF_MMWORD
-	add	esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD	; inptr
-	add	edi, byte SIZEOF_MMWORD			; outptr0
-	cmp	ecx, byte SIZEOF_MMWORD
-	jae	near .columnloop
-	test	ecx,ecx
-	jnz	near .column_ld1
+        sub     ecx, byte SIZEOF_MMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; inptr
+        add     edi, byte SIZEOF_MMWORD                 ; outptr0
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
 
-	pop	ecx			; col
-	pop	esi
-	pop	edi
-	poppic	eax
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        poppic  eax
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_buf
-	add	edi, byte SIZEOF_JSAMPROW
-	dec	eax				; num_rows
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcgryss2-64.asm b/simd/jcgryss2-64.asm
index 23ae8af..2308129 100644
--- a/simd/jcgryss2-64.asm
+++ b/simd/jcgryss2-64.asm
@@ -32,333 +32,333 @@
 ; r13 = JDIMENSION output_row
 ; r14 = int num_rows
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
+        align   16
 
-	global	EXTN(jsimd_rgb_gray_convert_sse2)
+        global  EXTN(jsimd_rgb_gray_convert_sse2)
 
 EXTN(jsimd_rgb_gray_convert_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
 
-	mov	rcx, r10
-	test	rcx,rcx
-	jz	near .return
+        mov     rcx, r10
+        test    rcx,rcx
+        jz      near .return
 
-	push	rcx
+        push    rcx
 
-	mov rsi, r12
-	mov rcx, r13
-	mov	rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
-	lea	rdi, [rdi+rcx*SIZEOF_JSAMPROW]
+        mov rsi, r12
+        mov rcx, r13
+        mov     rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+        lea     rdi, [rdi+rcx*SIZEOF_JSAMPROW]
 
-	pop	rcx
+        pop     rcx
 
-	mov rsi, r11
-	mov	eax, r14d
-	test	rax,rax
-	jle	near .return
+        mov rsi, r11
+        mov     eax, r14d
+        test    rax,rax
+        jle     near .return
 .rowloop:
-	push	rdi
-	push	rsi
-	push	rcx			; col
+        push    rdi
+        push    rsi
+        push    rcx                     ; col
 
-	mov	rsi, JSAMPROW [rsi]	; inptr
-	mov	rdi, JSAMPROW [rdi]	; outptr0
+        mov     rsi, JSAMPROW [rsi]     ; inptr
+        mov     rdi, JSAMPROW [rdi]     ; outptr0
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
 .column_ld1:
-	push	rax
-	push	rdx
-	lea	rcx,[rcx+rcx*2]		; imul ecx,RGB_PIXELSIZE
-	test	cl, SIZEOF_BYTE
-	jz	short .column_ld2
-	sub	rcx, byte SIZEOF_BYTE
-	movzx	rax, BYTE [rsi+rcx]
+        push    rax
+        push    rdx
+        lea     rcx,[rcx+rcx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_BYTE
+        movzx   rax, BYTE [rsi+rcx]
 .column_ld2:
-	test	cl, SIZEOF_WORD
-	jz	short .column_ld4
-	sub	rcx, byte SIZEOF_WORD
-	movzx	rdx, WORD [rsi+rcx]
-	shl	rax, WORD_BIT
-	or	rax,rdx
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_WORD
+        movzx   rdx, WORD [rsi+rcx]
+        shl     rax, WORD_BIT
+        or      rax,rdx
 .column_ld4:
-	movd	xmmA,eax
-	pop	rdx
-	pop	rax
-	test	cl, SIZEOF_DWORD
-	jz	short .column_ld8
-	sub	rcx, byte SIZEOF_DWORD
-	movd	xmmF, XMM_DWORD [rsi+rcx]
-	pslldq	xmmA, SIZEOF_DWORD
-	por	xmmA,xmmF
+        movd    xmmA,eax
+        pop     rdx
+        pop     rax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
 .column_ld8:
-	test	cl, SIZEOF_MMWORD
-	jz	short .column_ld16
-	sub	rcx, byte SIZEOF_MMWORD
-	movq	xmmB, XMM_MMWORD [rsi+rcx]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmB
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     rcx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
 .column_ld16:
-	test	cl, SIZEOF_XMMWORD
-	jz	short .column_ld32
-	movdqa	xmmF,xmmA
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	mov	rcx, SIZEOF_XMMWORD
-	jmp	short .rgb_gray_cnv
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .rgb_gray_cnv
 .column_ld32:
-	test	cl, 2*SIZEOF_XMMWORD
-	mov	rcx, SIZEOF_XMMWORD
-	jz	short .rgb_gray_cnv
-	movdqa	xmmB,xmmA
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_gray_cnv
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	movdqu	xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
 
 .rgb_gray_cnv:
-	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	movdqa    xmmG,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
-	psrldq    xmmG,8	; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmF	; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
-	pslldq    xmmF,8	; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
 
-	punpcklbw xmmG,xmmB	; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
-	punpckhbw xmmF,xmmB	; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
 
-	movdqa    xmmD,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
-	psrldq    xmmD,8	; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmG	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
-	pslldq    xmmG,8	; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
 
-	punpcklbw xmmD,xmmF	; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
-	punpckhbw xmmG,xmmF	; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
 
-	movdqa    xmmE,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
-	psrldq    xmmE,8	; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	pslldq    xmmD,8	; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
 
-	punpcklbw xmmE,xmmG	; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmG	; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
 
-	pxor      xmmH,xmmH
+        pxor      xmmH,xmmH
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmH	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmH	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmB,xmmE
-	punpcklbw xmmE,xmmH	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmB,xmmH	; xmmB=(01 03 05 07 09 0B 0D 0F)
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
 
-	movdqa    xmmF,xmmD
-	punpcklbw xmmD,xmmH	; xmmD=(11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmF,xmmH	; xmmF=(21 23 25 27 29 2B 2D 2F)
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 .column_ld1:
-	test	cl, SIZEOF_XMMWORD/16
-	jz	short .column_ld2
-	sub	rcx, byte SIZEOF_XMMWORD/16
-	movd	xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
 .column_ld2:
-	test	cl, SIZEOF_XMMWORD/8
-	jz	short .column_ld4
-	sub	rcx, byte SIZEOF_XMMWORD/8
-	movq	xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmE
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
 .column_ld4:
-	test	cl, SIZEOF_XMMWORD/4
-	jz	short .column_ld8
-	sub	rcx, byte SIZEOF_XMMWORD/4
-	movdqa	xmmE,xmmA
-	movdqu	xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
 .column_ld8:
-	test	cl, SIZEOF_XMMWORD/2
-	mov	rcx, SIZEOF_XMMWORD
-	jz	short .rgb_gray_cnv
-	movdqa	xmmF,xmmA
-	movdqa	xmmH,xmmE
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_gray_cnv
+        test    cl, SIZEOF_XMMWORD/2
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
-	movdqu	xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
 
 .rgb_gray_cnv:
-	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpcklbw xmmA,xmmE	; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
-	punpckhbw xmmD,xmmE	; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
 
-	movdqa    xmmC,xmmF
-	punpcklbw xmmF,xmmH	; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
-	punpckhbw xmmC,xmmH	; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
 
-	movdqa    xmmB,xmmA
-	punpcklwd xmmA,xmmF	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
-	punpckhwd xmmB,xmmF	; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
 
-	movdqa    xmmG,xmmD
-	punpcklwd xmmD,xmmC	; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
-	punpckhwd xmmG,xmmC	; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
 
-	movdqa    xmmE,xmmA
-	punpcklbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	punpckhbw xmmE,xmmD	; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
 
-	movdqa    xmmH,xmmB
-	punpcklbw xmmB,xmmG	; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmH,xmmG	; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
 
-	pxor      xmmF,xmmF
+        pxor      xmmF,xmmF
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmF	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmF	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmD,xmmB
-	punpcklbw xmmB,xmmF	; xmmB=(01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmF	; xmmD=(11 13 15 17 19 1B 1D 1F)
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
 
-	movdqa    xmmG,xmmE
-	punpcklbw xmmE,xmmF	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmG,xmmF	; xmmG=(30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
 
-	punpcklbw xmmF,xmmH
-	punpckhbw xmmH,xmmH
-	psrlw     xmmF,BYTE_BIT	; xmmF=(21 23 25 27 29 2B 2D 2F)
-	psrlw     xmmH,BYTE_BIT	; xmmH=(31 33 35 37 39 3B 3D 3F)
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
-	; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
 
-	; (Original)
-	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
-	;
-	; (This implementation)
-	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
 
-	movdqa    xmm6,xmm1
-	punpcklwd xmm1,xmm3
-	punpckhwd xmm6,xmm3
-	pmaddwd   xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
-	pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movdqa    xmm6,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm6,xmm3
+        pmaddwd   xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	movdqa    xmm7, xmm6	; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movdqa    xmm7, xmm6    ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	movdqa    xmm6,xmm0
-	punpcklwd xmm0,xmm2
-	punpckhwd xmm6,xmm2
-	pmaddwd   xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
-	pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        movdqa    xmm6,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm6,xmm2
+        pmaddwd   xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	movdqa    XMMWORD [wk(0)], xmm0	; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
-	movdqa    XMMWORD [wk(1)], xmm6	; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+        movdqa    XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movdqa    XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	movdqa    xmm0, xmm5	; xmm0=BO
-	movdqa    xmm6, xmm4	; xmm6=BE
+        movdqa    xmm0, xmm5    ; xmm0=BO
+        movdqa    xmm6, xmm4    ; xmm6=BE
 
-	movdqa    xmm4,xmm0
-	punpcklwd xmm0,xmm3
-	punpckhwd xmm4,xmm3
-	pmaddwd   xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
-	pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        movdqa    xmm4,xmm0
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm4,xmm3
+        pmaddwd   xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
 
-	movdqa    xmm3,[rel PD_ONEHALF]	; xmm3=[PD_ONEHALF]
+        movdqa    xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF]
 
-	paddd     xmm0, xmm1
-	paddd     xmm4, xmm7
-	paddd     xmm0,xmm3
-	paddd     xmm4,xmm3
-	psrld     xmm0,SCALEBITS	; xmm0=YOL
-	psrld     xmm4,SCALEBITS	; xmm4=YOH
-	packssdw  xmm0,xmm4		; xmm0=YO
+        paddd     xmm0, xmm1
+        paddd     xmm4, xmm7
+        paddd     xmm0,xmm3
+        paddd     xmm4,xmm3
+        psrld     xmm0,SCALEBITS        ; xmm0=YOL
+        psrld     xmm4,SCALEBITS        ; xmm4=YOH
+        packssdw  xmm0,xmm4             ; xmm0=YO
 
-	movdqa    xmm4,xmm6
-	punpcklwd xmm6,xmm2
-	punpckhwd xmm4,xmm2
-	pmaddwd   xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
-	pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        movdqa    xmm4,xmm6
+        punpcklwd xmm6,xmm2
+        punpckhwd xmm4,xmm2
+        pmaddwd   xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
 
-	movdqa    xmm2,[rel PD_ONEHALF]	; xmm2=[PD_ONEHALF]
+        movdqa    xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF]
 
-	paddd     xmm6, XMMWORD [wk(0)]
-	paddd     xmm4, XMMWORD [wk(1)]
-	paddd     xmm6,xmm2
-	paddd     xmm4,xmm2
-	psrld     xmm6,SCALEBITS	; xmm6=YEL
-	psrld     xmm4,SCALEBITS	; xmm4=YEH
-	packssdw  xmm6,xmm4		; xmm6=YE
+        paddd     xmm6, XMMWORD [wk(0)]
+        paddd     xmm4, XMMWORD [wk(1)]
+        paddd     xmm6,xmm2
+        paddd     xmm4,xmm2
+        psrld     xmm6,SCALEBITS        ; xmm6=YEL
+        psrld     xmm4,SCALEBITS        ; xmm4=YEH
+        packssdw  xmm6,xmm4             ; xmm6=YE
 
-	psllw     xmm0,BYTE_BIT
-	por       xmm6,xmm0		; xmm6=Y
-	movdqa    XMMWORD [rdi], xmm6	; Save Y
+        psllw     xmm0,BYTE_BIT
+        por       xmm6,xmm0             ; xmm6=Y
+        movdqa    XMMWORD [rdi], xmm6   ; Save Y
 
-	sub	rcx, byte SIZEOF_XMMWORD
-	add	rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; inptr
-	add	rdi, byte SIZEOF_XMMWORD		; outptr0
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	test	rcx,rcx
-	jnz	near .column_ld1
+        sub     rcx, byte SIZEOF_XMMWORD
+        add     rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     rdi, byte SIZEOF_XMMWORD                ; outptr0
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    rcx,rcx
+        jnz     near .column_ld1
 
-	pop	rcx			; col
-	pop	rsi
-	pop	rdi
+        pop     rcx                     ; col
+        pop     rsi
+        pop     rdi
 
-	add	rsi, byte SIZEOF_JSAMPROW	; input_buf
-	add	rdi, byte SIZEOF_JSAMPROW
-	dec	rax				; num_rows
-	jg	near .rowloop
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     rdi, byte SIZEOF_JSAMPROW
+        dec     rax                             ; num_rows
+        jg      near .rowloop
 
 .return:
-	pop	rbx
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcgryss2.asm b/simd/jcgryss2.asm
index c294287..b5125c1 100644
--- a/simd/jcgryss2.asm
+++ b/simd/jcgryss2.asm
@@ -26,358 +26,358 @@
 ;                              JDIMENSION output_row, int num_rows);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION img_width
-%define input_buf(b)	(b)+12		; JSAMPARRAY input_buf
-%define output_buf(b)	(b)+16		; JSAMPIMAGE output_buf
-%define output_row(b)	(b)+20		; JDIMENSION output_row
-%define num_rows(b)	(b)+24		; int num_rows
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
+        align   16
 
-	global	EXTN(jsimd_rgb_gray_convert_sse2)
+        global  EXTN(jsimd_rgb_gray_convert_sse2)
 
 EXTN(jsimd_rgb_gray_convert_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [img_width(eax)]
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [img_width(eax)]
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	esi, JSAMPIMAGE [output_buf(eax)]
-	mov	ecx, JDIMENSION [output_row(eax)]
-	mov	edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
-	lea	edi, [edi+ecx*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
 
-	pop	ecx
+        pop     ecx
 
-	mov	esi, JSAMPARRAY [input_buf(eax)]
-	mov	eax, INT [num_rows(eax)]
-	test	eax,eax
-	jle	near .return
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
 .rowloop:
-	pushpic	eax
-	push	edi
-	push	esi
-	push	ecx			; col
+        pushpic eax
+        push    edi
+        push    esi
+        push    ecx                     ; col
 
-	mov	esi, JSAMPROW [esi]	; inptr
-	mov	edi, JSAMPROW [edi]	; outptr0
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	alignx	16,7
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        alignx  16,7
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
 .column_ld1:
-	push	eax
-	push	edx
-	lea	ecx,[ecx+ecx*2]		; imul ecx,RGB_PIXELSIZE
-	test	cl, SIZEOF_BYTE
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_BYTE
-	movzx	eax, BYTE [esi+ecx]
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        movzx   eax, BYTE [esi+ecx]
 .column_ld2:
-	test	cl, SIZEOF_WORD
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_WORD
-	movzx	edx, WORD [esi+ecx]
-	shl	eax, WORD_BIT
-	or	eax,edx
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        movzx   edx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
 .column_ld4:
-	movd	xmmA,eax
-	pop	edx
-	pop	eax
-	test	cl, SIZEOF_DWORD
-	jz	short .column_ld8
-	sub	ecx, byte SIZEOF_DWORD
-	movd	xmmF, XMM_DWORD [esi+ecx]
-	pslldq	xmmA, SIZEOF_DWORD
-	por	xmmA,xmmF
+        movd    xmmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
 .column_ld8:
-	test	cl, SIZEOF_MMWORD
-	jz	short .column_ld16
-	sub	ecx, byte SIZEOF_MMWORD
-	movq	xmmB, XMM_MMWORD [esi+ecx]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmB
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     ecx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
 .column_ld16:
-	test	cl, SIZEOF_XMMWORD
-	jz	short .column_ld32
-	movdqa	xmmF,xmmA
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	mov	ecx, SIZEOF_XMMWORD
-	jmp	short .rgb_gray_cnv
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .rgb_gray_cnv
 .column_ld32:
-	test	cl, 2*SIZEOF_XMMWORD
-	mov	ecx, SIZEOF_XMMWORD
-	jz	short .rgb_gray_cnv
-	movdqa	xmmB,xmmA
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_gray_cnv
-	alignx	16,7
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	movdqu	xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
 
 .rgb_gray_cnv:
-	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	movdqa    xmmG,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
-	psrldq    xmmG,8	; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmF	; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
-	pslldq    xmmF,8	; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
 
-	punpcklbw xmmG,xmmB	; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
-	punpckhbw xmmF,xmmB	; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
 
-	movdqa    xmmD,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
-	psrldq    xmmD,8	; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmG	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
-	pslldq    xmmG,8	; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
 
-	punpcklbw xmmD,xmmF	; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
-	punpckhbw xmmG,xmmF	; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
 
-	movdqa    xmmE,xmmA
-	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
-	psrldq    xmmE,8	; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
 
-	punpckhbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	pslldq    xmmD,8	; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
 
-	punpcklbw xmmE,xmmG	; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmG	; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
 
-	pxor      xmmH,xmmH
+        pxor      xmmH,xmmH
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmH	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmH	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmB,xmmE
-	punpcklbw xmmE,xmmH	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmB,xmmH	; xmmB=(01 03 05 07 09 0B 0D 0F)
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
 
-	movdqa    xmmF,xmmD
-	punpcklbw xmmD,xmmH	; xmmD=(11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmF,xmmH	; xmmF=(21 23 25 27 29 2B 2D 2F)
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 .column_ld1:
-	test	cl, SIZEOF_XMMWORD/16
-	jz	short .column_ld2
-	sub	ecx, byte SIZEOF_XMMWORD/16
-	movd	xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld2:
-	test	cl, SIZEOF_XMMWORD/8
-	jz	short .column_ld4
-	sub	ecx, byte SIZEOF_XMMWORD/8
-	movq	xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
-	pslldq	xmmA, SIZEOF_MMWORD
-	por	xmmA,xmmE
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
 .column_ld4:
-	test	cl, SIZEOF_XMMWORD/4
-	jz	short .column_ld8
-	sub	ecx, byte SIZEOF_XMMWORD/4
-	movdqa	xmmE,xmmA
-	movdqu	xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
 .column_ld8:
-	test	cl, SIZEOF_XMMWORD/2
-	mov	ecx, SIZEOF_XMMWORD
-	jz	short .rgb_gray_cnv
-	movdqa	xmmF,xmmA
-	movdqa	xmmH,xmmE
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	jmp	short .rgb_gray_cnv
-	alignx	16,7
+        test    cl, SIZEOF_XMMWORD/2
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
 
 .columnloop:
-	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqu	xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	movdqu	xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
-	movdqu	xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
 
 .rgb_gray_cnv:
-	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpcklbw xmmA,xmmE	; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
-	punpckhbw xmmD,xmmE	; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
 
-	movdqa    xmmC,xmmF
-	punpcklbw xmmF,xmmH	; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
-	punpckhbw xmmC,xmmH	; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
 
-	movdqa    xmmB,xmmA
-	punpcklwd xmmA,xmmF	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
-	punpckhwd xmmB,xmmF	; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
 
-	movdqa    xmmG,xmmD
-	punpcklwd xmmD,xmmC	; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
-	punpckhwd xmmG,xmmC	; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
 
-	movdqa    xmmE,xmmA
-	punpcklbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
-	punpckhbw xmmE,xmmD	; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
 
-	movdqa    xmmH,xmmB
-	punpcklbw xmmB,xmmG	; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
-	punpckhbw xmmH,xmmG	; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
 
-	pxor      xmmF,xmmF
+        pxor      xmmF,xmmF
 
-	movdqa    xmmC,xmmA
-	punpcklbw xmmA,xmmF	; xmmA=(00 02 04 06 08 0A 0C 0E)
-	punpckhbw xmmC,xmmF	; xmmC=(10 12 14 16 18 1A 1C 1E)
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
 
-	movdqa    xmmD,xmmB
-	punpcklbw xmmB,xmmF	; xmmB=(01 03 05 07 09 0B 0D 0F)
-	punpckhbw xmmD,xmmF	; xmmD=(11 13 15 17 19 1B 1D 1F)
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
 
-	movdqa    xmmG,xmmE
-	punpcklbw xmmE,xmmF	; xmmE=(20 22 24 26 28 2A 2C 2E)
-	punpckhbw xmmG,xmmF	; xmmG=(30 32 34 36 38 3A 3C 3E)
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
 
-	punpcklbw xmmF,xmmH
-	punpckhbw xmmH,xmmH
-	psrlw     xmmF,BYTE_BIT	; xmmF=(21 23 25 27 29 2B 2D 2F)
-	psrlw     xmmH,BYTE_BIT	; xmmH=(31 33 35 37 39 3B 3D 3F)
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
-	; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
 
-	; (Original)
-	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
-	;
-	; (This implementation)
-	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
 
-	movdqa    xmm6,xmm1
-	punpcklwd xmm1,xmm3
-	punpckhwd xmm6,xmm3
-	pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
-	pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movdqa    xmm6,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm6,xmm3
+        pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	movdqa    xmm7, xmm6	; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+        movdqa    xmm7, xmm6    ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
 
-	movdqa    xmm6,xmm0
-	punpcklwd xmm0,xmm2
-	punpckhwd xmm6,xmm2
-	pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
-	pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        movdqa    xmm6,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm6,xmm2
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	movdqa    XMMWORD [wk(0)], xmm0	; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
-	movdqa    XMMWORD [wk(1)], xmm6	; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+        movdqa    XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movdqa    XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
 
-	movdqa    xmm0, xmm5	; xmm0=BO
-	movdqa    xmm6, xmm4	; xmm6=BE
+        movdqa    xmm0, xmm5    ; xmm0=BO
+        movdqa    xmm6, xmm4    ; xmm6=BE
 
-	movdqa    xmm4,xmm0
-	punpcklwd xmm0,xmm3
-	punpckhwd xmm4,xmm3
-	pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
-	pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        movdqa    xmm4,xmm0
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm4,xmm3
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
 
-	movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)]	; xmm3=[PD_ONEHALF]
+        movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
 
-	paddd     xmm0, xmm1
-	paddd     xmm4, xmm7
-	paddd     xmm0,xmm3
-	paddd     xmm4,xmm3
-	psrld     xmm0,SCALEBITS	; xmm0=YOL
-	psrld     xmm4,SCALEBITS	; xmm4=YOH
-	packssdw  xmm0,xmm4		; xmm0=YO
+        paddd     xmm0, xmm1
+        paddd     xmm4, xmm7
+        paddd     xmm0,xmm3
+        paddd     xmm4,xmm3
+        psrld     xmm0,SCALEBITS        ; xmm0=YOL
+        psrld     xmm4,SCALEBITS        ; xmm4=YOH
+        packssdw  xmm0,xmm4             ; xmm0=YO
 
-	movdqa    xmm4,xmm6
-	punpcklwd xmm6,xmm2
-	punpckhwd xmm4,xmm2
-	pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
-	pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        movdqa    xmm4,xmm6
+        punpcklwd xmm6,xmm2
+        punpckhwd xmm4,xmm2
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
 
-	movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)]	; xmm2=[PD_ONEHALF]
+        movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
 
-	paddd     xmm6, XMMWORD [wk(0)]
-	paddd     xmm4, XMMWORD [wk(1)]
-	paddd     xmm6,xmm2
-	paddd     xmm4,xmm2
-	psrld     xmm6,SCALEBITS	; xmm6=YEL
-	psrld     xmm4,SCALEBITS	; xmm4=YEH
-	packssdw  xmm6,xmm4		; xmm6=YE
+        paddd     xmm6, XMMWORD [wk(0)]
+        paddd     xmm4, XMMWORD [wk(1)]
+        paddd     xmm6,xmm2
+        paddd     xmm4,xmm2
+        psrld     xmm6,SCALEBITS        ; xmm6=YEL
+        psrld     xmm4,SCALEBITS        ; xmm4=YEH
+        packssdw  xmm6,xmm4             ; xmm6=YE
 
-	psllw     xmm0,BYTE_BIT
-	por       xmm6,xmm0		; xmm6=Y
-	movdqa    XMMWORD [edi], xmm6	; Save Y
+        psllw     xmm0,BYTE_BIT
+        por       xmm6,xmm0             ; xmm6=Y
+        movdqa    XMMWORD [edi], xmm6   ; Save Y
 
-	sub	ecx, byte SIZEOF_XMMWORD
-	add	esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; inptr
-	add	edi, byte SIZEOF_XMMWORD		; outptr0
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	test	ecx,ecx
-	jnz	near .column_ld1
+        sub     ecx, byte SIZEOF_XMMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     edi, byte SIZEOF_XMMWORD                ; outptr0
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
 
-	pop	ecx			; col
-	pop	esi
-	pop	edi
-	poppic	eax
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        poppic  eax
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_buf
-	add	edi, byte SIZEOF_JSAMPROW
-	dec	eax				; num_rows
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcqnt3dn.asm b/simd/jcqnt3dn.asm
index 182c869..0b8ec17 100644
--- a/simd/jcqnt3dn.asm
+++ b/simd/jcqnt3dn.asm
@@ -20,8 +20,8 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Load data into workspace, applying unsigned->signed conversion
 ;
@@ -30,98 +30,98 @@
 ;                             FAST_FLOAT * workspace);
 ;
 
-%define sample_data	ebp+8		; JSAMPARRAY sample_data
-%define start_col	ebp+12		; JDIMENSION start_col
-%define workspace	ebp+16		; FAST_FLOAT * workspace
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_convsamp_float_3dnow)
+        align   16
+        global  EXTN(jsimd_convsamp_float_3dnow)
 
 EXTN(jsimd_convsamp_float_3dnow):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	pcmpeqw  mm7,mm7
-	psllw    mm7,7
-	packsswb mm7,mm7		; mm7 = PB_CENTERJSAMPLE (0x808080..)
+        pcmpeqw  mm7,mm7
+        psllw    mm7,7
+        packsswb mm7,mm7                ; mm7 = PB_CENTERJSAMPLE (0x808080..)
 
-	mov	esi, JSAMPARRAY [sample_data]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [start_col]
-	mov	edi, POINTER [workspace]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/2
-	alignx	16,7
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
 .convloop:
-	mov	ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
-	movq	mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
+        movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+        movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
 
-	psubb	mm0,mm7				; mm0=(01234567)
-	psubb	mm1,mm7				; mm1=(89ABCDEF)
+        psubb   mm0,mm7                         ; mm0=(01234567)
+        psubb   mm1,mm7                         ; mm1=(89ABCDEF)
 
-	punpcklbw mm2,mm0			; mm2=(*0*1*2*3)
-	punpckhbw mm0,mm0			; mm0=(*4*5*6*7)
-	punpcklbw mm3,mm1			; mm3=(*8*9*A*B)
-	punpckhbw mm1,mm1			; mm1=(*C*D*E*F)
+        punpcklbw mm2,mm0                       ; mm2=(*0*1*2*3)
+        punpckhbw mm0,mm0                       ; mm0=(*4*5*6*7)
+        punpcklbw mm3,mm1                       ; mm3=(*8*9*A*B)
+        punpckhbw mm1,mm1                       ; mm1=(*C*D*E*F)
 
-	punpcklwd mm4,mm2			; mm4=(***0***1)
-	punpckhwd mm2,mm2			; mm2=(***2***3)
-	punpcklwd mm5,mm0			; mm5=(***4***5)
-	punpckhwd mm0,mm0			; mm0=(***6***7)
+        punpcklwd mm4,mm2                       ; mm4=(***0***1)
+        punpckhwd mm2,mm2                       ; mm2=(***2***3)
+        punpcklwd mm5,mm0                       ; mm5=(***4***5)
+        punpckhwd mm0,mm0                       ; mm0=(***6***7)
 
-	psrad	mm4,(DWORD_BIT-BYTE_BIT)	; mm4=(01)
-	psrad	mm2,(DWORD_BIT-BYTE_BIT)	; mm2=(23)
-	pi2fd	mm4,mm4
-	pi2fd	mm2,mm2
-	psrad	mm5,(DWORD_BIT-BYTE_BIT)	; mm5=(45)
-	psrad	mm0,(DWORD_BIT-BYTE_BIT)	; mm0=(67)
-	pi2fd	mm5,mm5
-	pi2fd	mm0,mm0
+        psrad   mm4,(DWORD_BIT-BYTE_BIT)        ; mm4=(01)
+        psrad   mm2,(DWORD_BIT-BYTE_BIT)        ; mm2=(23)
+        pi2fd   mm4,mm4
+        pi2fd   mm2,mm2
+        psrad   mm5,(DWORD_BIT-BYTE_BIT)        ; mm5=(45)
+        psrad   mm0,(DWORD_BIT-BYTE_BIT)        ; mm0=(67)
+        pi2fd   mm5,mm5
+        pi2fd   mm0,mm0
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2
-	movq	MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
-	movq	MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2
+        movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
+        movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
 
-	punpcklwd mm6,mm3			; mm6=(***8***9)
-	punpckhwd mm3,mm3			; mm3=(***A***B)
-	punpcklwd mm4,mm1			; mm4=(***C***D)
-	punpckhwd mm1,mm1			; mm1=(***E***F)
+        punpcklwd mm6,mm3                       ; mm6=(***8***9)
+        punpckhwd mm3,mm3                       ; mm3=(***A***B)
+        punpcklwd mm4,mm1                       ; mm4=(***C***D)
+        punpckhwd mm1,mm1                       ; mm1=(***E***F)
 
-	psrad	mm6,(DWORD_BIT-BYTE_BIT)	; mm6=(89)
-	psrad	mm3,(DWORD_BIT-BYTE_BIT)	; mm3=(AB)
-	pi2fd	mm6,mm6
-	pi2fd	mm3,mm3
-	psrad	mm4,(DWORD_BIT-BYTE_BIT)	; mm4=(CD)
-	psrad	mm1,(DWORD_BIT-BYTE_BIT)	; mm1=(EF)
-	pi2fd	mm4,mm4
-	pi2fd	mm1,mm1
+        psrad   mm6,(DWORD_BIT-BYTE_BIT)        ; mm6=(89)
+        psrad   mm3,(DWORD_BIT-BYTE_BIT)        ; mm3=(AB)
+        pi2fd   mm6,mm6
+        pi2fd   mm3,mm3
+        psrad   mm4,(DWORD_BIT-BYTE_BIT)        ; mm4=(CD)
+        psrad   mm1,(DWORD_BIT-BYTE_BIT)        ; mm1=(EF)
+        pi2fd   mm4,mm4
+        pi2fd   mm1,mm1
 
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3
-	movq	MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4
-	movq	MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3
+        movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
 
-	add	esi, byte 2*SIZEOF_JSAMPROW
-	add	edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
-	dec	ecx
-	jnz	near .convloop
+        add     esi, byte 2*SIZEOF_JSAMPROW
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .convloop
 
-	femms		; empty MMX/3DNow! state
+        femms           ; empty MMX/3DNow! state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 
 ; --------------------------------------------------------------------------
@@ -133,101 +133,101 @@
 ;                             FAST_FLOAT * workspace);
 ;
 
-%define coef_block	ebp+8		; JCOEFPTR coef_block
-%define divisors	ebp+12		; FAST_FLOAT * divisors
-%define workspace	ebp+16		; FAST_FLOAT * workspace
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; FAST_FLOAT * divisors
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_quantize_float_3dnow)
+        align   16
+        global  EXTN(jsimd_quantize_float_3dnow)
 
 EXTN(jsimd_quantize_float_3dnow):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov       eax, 0x4B400000	; (float)0x00C00000 (rndint_magic)
-	movd      mm7,eax
-	punpckldq mm7,mm7		; mm7={12582912.0F 12582912.0F}
+        mov       eax, 0x4B400000       ; (float)0x00C00000 (rndint_magic)
+        movd      mm7,eax
+        punpckldq mm7,mm7               ; mm7={12582912.0F 12582912.0F}
 
-	mov	esi, POINTER [workspace]
-	mov	edx, POINTER [divisors]
-	mov	edi, JCOEFPTR [coef_block]
-	mov	eax, DCTSIZE2/16
-	alignx	16,7
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/16
+        alignx  16,7
 .quantloop:
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
-	pfmul	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
-	pfmul	mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
-	pfmul	mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
-	pfmul	mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
 
-	pfadd	mm0,mm7			; mm0=(00 ** 01 **)
-	pfadd	mm1,mm7			; mm1=(02 ** 03 **)
-	pfadd	mm2,mm7			; mm0=(04 ** 05 **)
-	pfadd	mm3,mm7			; mm1=(06 ** 07 **)
+        pfadd   mm0,mm7                 ; mm0=(00 ** 01 **)
+        pfadd   mm1,mm7                 ; mm1=(02 ** 03 **)
+        pfadd   mm2,mm7                 ; mm0=(04 ** 05 **)
+        pfadd   mm3,mm7                 ; mm1=(06 ** 07 **)
 
-	movq      mm4,mm0
-	punpcklwd mm0,mm1		; mm0=(00 02 ** **)
-	punpckhwd mm4,mm1		; mm4=(01 03 ** **)
-	movq      mm5,mm2
-	punpcklwd mm2,mm3		; mm2=(04 06 ** **)
-	punpckhwd mm5,mm3		; mm5=(05 07 ** **)
+        movq      mm4,mm0
+        punpcklwd mm0,mm1               ; mm0=(00 02 ** **)
+        punpckhwd mm4,mm1               ; mm4=(01 03 ** **)
+        movq      mm5,mm2
+        punpcklwd mm2,mm3               ; mm2=(04 06 ** **)
+        punpckhwd mm5,mm3               ; mm5=(05 07 ** **)
 
-	punpcklwd mm0,mm4		; mm0=(00 01 02 03)
-	punpcklwd mm2,mm5		; mm2=(04 05 06 07)
+        punpcklwd mm0,mm4               ; mm0=(00 01 02 03)
+        punpcklwd mm2,mm5               ; mm2=(04 05 06 07)
 
-	movq	mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
-	pfmul	mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
-	pfmul	mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)]
-	pfmul	mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
-	pfmul	mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
 
-	pfadd	mm6,mm7			; mm0=(10 ** 11 **)
-	pfadd	mm1,mm7			; mm4=(12 ** 13 **)
-	pfadd	mm3,mm7			; mm0=(14 ** 15 **)
-	pfadd	mm4,mm7			; mm4=(16 ** 17 **)
+        pfadd   mm6,mm7                 ; mm0=(10 ** 11 **)
+        pfadd   mm1,mm7                 ; mm4=(12 ** 13 **)
+        pfadd   mm3,mm7                 ; mm0=(14 ** 15 **)
+        pfadd   mm4,mm7                 ; mm4=(16 ** 17 **)
 
-	movq      mm5,mm6
-	punpcklwd mm6,mm1		; mm6=(10 12 ** **)
-	punpckhwd mm5,mm1		; mm5=(11 13 ** **)
-	movq      mm1,mm3
-	punpcklwd mm3,mm4		; mm3=(14 16 ** **)
-	punpckhwd mm1,mm4		; mm1=(15 17 ** **)
+        movq      mm5,mm6
+        punpcklwd mm6,mm1               ; mm6=(10 12 ** **)
+        punpckhwd mm5,mm1               ; mm5=(11 13 ** **)
+        movq      mm1,mm3
+        punpcklwd mm3,mm4               ; mm3=(14 16 ** **)
+        punpckhwd mm1,mm4               ; mm1=(15 17 ** **)
 
-	punpcklwd mm6,mm5		; mm6=(10 11 12 13)
-	punpcklwd mm3,mm1		; mm3=(14 15 16 17)
+        punpcklwd mm6,mm5               ; mm6=(10 11 12 13)
+        punpcklwd mm3,mm1               ; mm3=(14 15 16 17)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
 
-	add	esi, byte 16*SIZEOF_FAST_FLOAT
-	add	edx, byte 16*SIZEOF_FAST_FLOAT
-	add	edi, byte 16*SIZEOF_JCOEF
-	dec	eax
-	jnz	near .quantloop
+        add     esi, byte 16*SIZEOF_FAST_FLOAT
+        add     edx, byte 16*SIZEOF_FAST_FLOAT
+        add     edi, byte 16*SIZEOF_JCOEF
+        dec     eax
+        jnz     near .quantloop
 
-	femms		; empty MMX/3DNow! state
+        femms           ; empty MMX/3DNow! state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcqntmmx.asm b/simd/jcqntmmx.asm
index 08b08b7..ae837d2 100644
--- a/simd/jcqntmmx.asm
+++ b/simd/jcqntmmx.asm
@@ -20,8 +20,8 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Load data into workspace, applying unsigned->signed conversion
 ;
@@ -30,92 +30,92 @@
 ;                     DCTELEM * workspace);
 ;
 
-%define sample_data	ebp+8		; JSAMPARRAY sample_data
-%define start_col	ebp+12		; JDIMENSION start_col
-%define workspace	ebp+16		; DCTELEM * workspace
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; DCTELEM * workspace
 
-	align	16
-	global	EXTN(jsimd_convsamp_mmx)
+        align   16
+        global  EXTN(jsimd_convsamp_mmx)
 
 EXTN(jsimd_convsamp_mmx):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	pxor	mm6,mm6			; mm6=(all 0's)
-	pcmpeqw	mm7,mm7
-	psllw	mm7,7			; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
+        pxor    mm6,mm6                 ; mm6=(all 0's)
+        pcmpeqw mm7,mm7
+        psllw   mm7,7                   ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
 
-	mov	esi, JSAMPARRAY [sample_data]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [start_col]
-	mov	edi, POINTER [workspace]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .convloop:
-	mov	ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]	; mm0=(01234567)
-	movq	mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]	; mm1=(89ABCDEF)
+        movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]    ; mm0=(01234567)
+        movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]    ; mm1=(89ABCDEF)
 
-	mov	ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE]	; mm2=(GHIJKLMN)
-	movq	mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE]	; mm3=(OPQRSTUV)
+        movq    mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE]    ; mm2=(GHIJKLMN)
+        movq    mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE]    ; mm3=(OPQRSTUV)
 
-	movq      mm4,mm0
-	punpcklbw mm0,mm6		; mm0=(0123)
-	punpckhbw mm4,mm6		; mm4=(4567)
-	movq      mm5,mm1
-	punpcklbw mm1,mm6		; mm1=(89AB)
-	punpckhbw mm5,mm6		; mm5=(CDEF)
+        movq      mm4,mm0
+        punpcklbw mm0,mm6               ; mm0=(0123)
+        punpckhbw mm4,mm6               ; mm4=(4567)
+        movq      mm5,mm1
+        punpcklbw mm1,mm6               ; mm1=(89AB)
+        punpckhbw mm5,mm6               ; mm5=(CDEF)
 
-	paddw	mm0,mm7
-	paddw	mm4,mm7
-	paddw	mm1,mm7
-	paddw	mm5,mm7
+        paddw   mm0,mm7
+        paddw   mm4,mm7
+        paddw   mm1,mm7
+        paddw   mm5,mm7
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5
 
-	movq      mm0,mm2
-	punpcklbw mm2,mm6		; mm2=(GHIJ)
-	punpckhbw mm0,mm6		; mm0=(KLMN)
-	movq      mm4,mm3
-	punpcklbw mm3,mm6		; mm3=(OPQR)
-	punpckhbw mm4,mm6		; mm4=(STUV)
+        movq      mm0,mm2
+        punpcklbw mm2,mm6               ; mm2=(GHIJ)
+        punpckhbw mm0,mm6               ; mm0=(KLMN)
+        movq      mm4,mm3
+        punpcklbw mm3,mm6               ; mm3=(OPQR)
+        punpckhbw mm4,mm6               ; mm4=(STUV)
 
-	paddw	mm2,mm7
-	paddw	mm0,mm7
-	paddw	mm3,mm7
-	paddw	mm4,mm7
+        paddw   mm2,mm7
+        paddw   mm0,mm7
+        paddw   mm3,mm7
+        paddw   mm4,mm7
 
-	movq	MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2
-	movq	MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0
-	movq	MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3
-	movq	MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4
 
-	add	esi, byte 4*SIZEOF_JSAMPROW
-	add	edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
-	dec	ecx
-	jnz	short .convloop
+        add     esi, byte 4*SIZEOF_JSAMPROW
+        add     edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     short .convloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -135,140 +135,140 @@
 %define SCALE(m,n,b)      MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
 %define SHIFT(m,n,b)      MMBLOCK(DCTSIZE*3+(m),(n),(b),SIZEOF_DCTELEM)
 
-%define coef_block	ebp+8		; JCOEFPTR coef_block
-%define divisors	ebp+12		; DCTELEM * divisors
-%define workspace	ebp+16		; DCTELEM * workspace
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; DCTELEM * divisors
+%define workspace       ebp+16          ; DCTELEM * workspace
 
-	align	16
-	global	EXTN(jsimd_quantize_mmx)
+        align   16
+        global  EXTN(jsimd_quantize_mmx)
 
 EXTN(jsimd_quantize_mmx):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	esi, POINTER [workspace]
-	mov	edx, POINTER [divisors]
-	mov	edi, JCOEFPTR [coef_block]
-	mov	ah, 2
-	alignx	16,7
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     ah, 2
+        alignx  16,7
 .quantloop1:
-	mov	al, DCTSIZE2/8/2
-	alignx	16,7
+        mov     al, DCTSIZE2/8/2
+        alignx  16,7
 .quantloop2:
-	movq	mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
 
-	movq	mm0,mm2
-	movq	mm1,mm3
+        movq    mm0,mm2
+        movq    mm1,mm3
 
-	psraw	mm2,(WORD_BIT-1)  ; -1 if value < 0, 0 otherwise
-	psraw	mm3,(WORD_BIT-1)
+        psraw   mm2,(WORD_BIT-1)  ; -1 if value < 0, 0 otherwise
+        psraw   mm3,(WORD_BIT-1)
 
-	pxor	mm0,mm2   ; val = -val
-	pxor	mm1,mm3
-	psubw	mm0,mm2
-	psubw	mm1,mm3
+        pxor    mm0,mm2   ; val = -val
+        pxor    mm1,mm3
+        psubw   mm0,mm2
+        psubw   mm1,mm3
 
-	;
-	; MMX is an annoyingly crappy instruction set. It has two
-	; misfeatures that are causing problems here:
-	;
-	; - All multiplications are signed.
-	;
-	; - The second operand for the shifts is not treated as packed.
-	;
-	;
-	; We work around the first problem by implementing this algorithm:
-	;
-	; unsigned long unsigned_multiply(unsigned short x, unsigned short y)
-	; {
-	;   enum { SHORT_BIT = 16 };
-	;   signed short sx = (signed short) x;
-	;   signed short sy = (signed short) y;
-	;   signed long sz;
-	; 
-	;   sz = (long) sx * (long) sy;     /* signed multiply */
-	; 
-	;   if (sx < 0) sz += (long) sy << SHORT_BIT;
-	;   if (sy < 0) sz += (long) sx << SHORT_BIT;
-	; 
-	;   return (unsigned long) sz;
-	; }
-	;
-	; (note that a negative sx adds _sy_ and vice versa)
-	;
-	; For the second problem, we replace the shift by a multiplication.
-	; Unfortunately that means we have to deal with the signed issue again.
-	;
+        ;
+        ; MMX is an annoyingly crappy instruction set. It has two
+        ; misfeatures that are causing problems here:
+        ;
+        ; - All multiplications are signed.
+        ;
+        ; - The second operand for the shifts is not treated as packed.
+        ;
+        ;
+        ; We work around the first problem by implementing this algorithm:
+        ;
+        ; unsigned long unsigned_multiply(unsigned short x, unsigned short y)
+        ; {
+        ;   enum { SHORT_BIT = 16 };
+        ;   signed short sx = (signed short) x;
+        ;   signed short sy = (signed short) y;
+        ;   signed long sz;
+        ;
+        ;   sz = (long) sx * (long) sy;     /* signed multiply */
+        ;
+        ;   if (sx < 0) sz += (long) sy << SHORT_BIT;
+        ;   if (sy < 0) sz += (long) sx << SHORT_BIT;
+        ;
+        ;   return (unsigned long) sz;
+        ; }
+        ;
+        ; (note that a negative sx adds _sy_ and vice versa)
+        ;
+        ; For the second problem, we replace the shift by a multiplication.
+        ; Unfortunately that means we have to deal with the signed issue again.
+        ;
 
-	paddw	mm0, MMWORD [CORRECTION(0,0,edx)]   ; correction + roundfactor
-	paddw	mm1, MMWORD [CORRECTION(0,1,edx)]
+        paddw   mm0, MMWORD [CORRECTION(0,0,edx)]   ; correction + roundfactor
+        paddw   mm1, MMWORD [CORRECTION(0,1,edx)]
 
-	movq	mm4,mm0   ; store current value for later
-	movq	mm5,mm1
-	pmulhw	mm0, MMWORD [RECIPROCAL(0,0,edx)]   ; reciprocal
-	pmulhw	mm1, MMWORD [RECIPROCAL(0,1,edx)]
-	paddw	mm0,mm4		; reciprocal is always negative (MSB=1),
-	paddw	mm1,mm5   ; so we always need to add the initial value
-	                ; (input value is never negative as we
-	                ; inverted it at the start of this routine)
+        movq    mm4,mm0   ; store current value for later
+        movq    mm5,mm1
+        pmulhw  mm0, MMWORD [RECIPROCAL(0,0,edx)]   ; reciprocal
+        pmulhw  mm1, MMWORD [RECIPROCAL(0,1,edx)]
+        paddw   mm0,mm4         ; reciprocal is always negative (MSB=1),
+        paddw   mm1,mm5   ; so we always need to add the initial value
+                        ; (input value is never negative as we
+                        ; inverted it at the start of this routine)
 
-	; here it gets a bit tricky as both scale
-	; and mm0/mm1 can be negative
-	movq	mm6, MMWORD [SCALE(0,0,edx)]	; scale
-	movq	mm7, MMWORD [SCALE(0,1,edx)]
-	movq	mm4,mm0
-	movq	mm5,mm1
-	pmulhw	mm0,mm6
-	pmulhw	mm1,mm7
+        ; here it gets a bit tricky as both scale
+        ; and mm0/mm1 can be negative
+        movq    mm6, MMWORD [SCALE(0,0,edx)]    ; scale
+        movq    mm7, MMWORD [SCALE(0,1,edx)]
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pmulhw  mm0,mm6
+        pmulhw  mm1,mm7
 
-	psraw	mm6,(WORD_BIT-1)    ; determine if scale is negative
-	psraw	mm7,(WORD_BIT-1)
+        psraw   mm6,(WORD_BIT-1)    ; determine if scale is negative
+        psraw   mm7,(WORD_BIT-1)
 
-	pand	mm6,mm4             ; and add input if it is
-	pand	mm7,mm5
-	paddw	mm0,mm6
-	paddw	mm1,mm7
+        pand    mm6,mm4             ; and add input if it is
+        pand    mm7,mm5
+        paddw   mm0,mm6
+        paddw   mm1,mm7
 
-	psraw	mm4,(WORD_BIT-1)    ; then check if negative input 
-	psraw	mm5,(WORD_BIT-1)
+        psraw   mm4,(WORD_BIT-1)    ; then check if negative input
+        psraw   mm5,(WORD_BIT-1)
 
-	pand	mm4, MMWORD [SCALE(0,0,edx)]	; and add scale if it is
-	pand	mm5, MMWORD [SCALE(0,1,edx)]
-	paddw	mm0,mm4
-	paddw	mm1,mm5
+        pand    mm4, MMWORD [SCALE(0,0,edx)]    ; and add scale if it is
+        pand    mm5, MMWORD [SCALE(0,1,edx)]
+        paddw   mm0,mm4
+        paddw   mm1,mm5
 
-	pxor	mm0,mm2   ; val = -val
-	pxor	mm1,mm3
-	psubw	mm0,mm2
-	psubw	mm1,mm3
+        pxor    mm0,mm2   ; val = -val
+        pxor    mm1,mm3
+        psubw   mm0,mm2
+        psubw   mm1,mm3
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1
 
-	add	esi, byte 8*SIZEOF_DCTELEM
-	add	edx, byte 8*SIZEOF_DCTELEM
-	add	edi, byte 8*SIZEOF_JCOEF
-	dec	al
-	jnz	near .quantloop2
-	dec	ah
-	jnz	near .quantloop1	; to avoid branch misprediction
+        add     esi, byte 8*SIZEOF_DCTELEM
+        add     edx, byte 8*SIZEOF_DCTELEM
+        add     edi, byte 8*SIZEOF_JCOEF
+        dec     al
+        jnz     near .quantloop2
+        dec     ah
+        jnz     near .quantloop1        ; to avoid branch misprediction
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcqnts2f-64.asm b/simd/jcqnts2f-64.asm
index d0efa1b..0752542 100644
--- a/simd/jcqnts2f-64.asm
+++ b/simd/jcqnts2f-64.asm
@@ -21,8 +21,8 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Load data into workspace, applying unsigned->signed conversion
 ;
@@ -35,65 +35,65 @@
 ; r11 = JDIMENSION start_col
 ; r12 = FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_convsamp_float_sse2)
+        align   16
+        global  EXTN(jsimd_convsamp_float_sse2)
 
 EXTN(jsimd_convsamp_float_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
 
-	pcmpeqw  xmm7,xmm7
-	psllw    xmm7,7
-	packsswb xmm7,xmm7		; xmm7 = PB_CENTERJSAMPLE (0x808080..)
+        pcmpeqw  xmm7,xmm7
+        psllw    xmm7,7
+        packsswb xmm7,xmm7              ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
 
-	mov rsi, r10
-	mov	rax, r11
-	mov rdi, r12
-	mov	rcx, DCTSIZE/2
+        mov rsi, r10
+        mov     rax, r11
+        mov rdi, r12
+        mov     rcx, DCTSIZE/2
 .convloop:
-	mov	rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]       ; (JSAMPLE *)
 
-	movq	xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
-	movq	xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]
+        movq    xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
+        movq    xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]
 
-	psubb	xmm0,xmm7			; xmm0=(01234567)
-	psubb	xmm1,xmm7			; xmm1=(89ABCDEF)
+        psubb   xmm0,xmm7                       ; xmm0=(01234567)
+        psubb   xmm1,xmm7                       ; xmm1=(89ABCDEF)
 
-	punpcklbw xmm0,xmm0			; xmm0=(*0*1*2*3*4*5*6*7)
-	punpcklbw xmm1,xmm1			; xmm1=(*8*9*A*B*C*D*E*F)
+        punpcklbw xmm0,xmm0                     ; xmm0=(*0*1*2*3*4*5*6*7)
+        punpcklbw xmm1,xmm1                     ; xmm1=(*8*9*A*B*C*D*E*F)
 
-	punpcklwd xmm2,xmm0			; xmm2=(***0***1***2***3)
-	punpckhwd xmm0,xmm0			; xmm0=(***4***5***6***7)
-	punpcklwd xmm3,xmm1			; xmm3=(***8***9***A***B)
-	punpckhwd xmm1,xmm1			; xmm1=(***C***D***E***F)
+        punpcklwd xmm2,xmm0                     ; xmm2=(***0***1***2***3)
+        punpckhwd xmm0,xmm0                     ; xmm0=(***4***5***6***7)
+        punpcklwd xmm3,xmm1                     ; xmm3=(***8***9***A***B)
+        punpckhwd xmm1,xmm1                     ; xmm1=(***C***D***E***F)
 
-	psrad     xmm2,(DWORD_BIT-BYTE_BIT)	; xmm2=(0123)
-	psrad     xmm0,(DWORD_BIT-BYTE_BIT)	; xmm0=(4567)
-	cvtdq2ps  xmm2,xmm2			; xmm2=(0123)
-	cvtdq2ps  xmm0,xmm0			; xmm0=(4567)
-	psrad     xmm3,(DWORD_BIT-BYTE_BIT)	; xmm3=(89AB)
-	psrad     xmm1,(DWORD_BIT-BYTE_BIT)	; xmm1=(CDEF)
-	cvtdq2ps  xmm3,xmm3			; xmm3=(89AB)
-	cvtdq2ps  xmm1,xmm1			; xmm1=(CDEF)
+        psrad     xmm2,(DWORD_BIT-BYTE_BIT)     ; xmm2=(0123)
+        psrad     xmm0,(DWORD_BIT-BYTE_BIT)     ; xmm0=(4567)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=(0123)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=(4567)
+        psrad     xmm3,(DWORD_BIT-BYTE_BIT)     ; xmm3=(89AB)
+        psrad     xmm1,(DWORD_BIT-BYTE_BIT)     ; xmm1=(CDEF)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=(89AB)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=(CDEF)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
 
-	add	rsi, byte 2*SIZEOF_JSAMPROW
-	add	rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
-	dec	rcx
-	jnz	short .convloop
+        add     rsi, byte 2*SIZEOF_JSAMPROW
+        add     rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     rcx
+        jnz     short .convloop
 
-	pop	rbx
-	uncollect_args
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
 
 
 ; --------------------------------------------------------------------------
@@ -109,50 +109,50 @@
 ; r11 = FAST_FLOAT * divisors
 ; r12 = FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_quantize_float_sse2)
+        align   16
+        global  EXTN(jsimd_quantize_float_sse2)
 
 EXTN(jsimd_quantize_float_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
 
-	mov rsi, r12
-	mov rdx, r11
-	mov rdi, r10
-	mov	rax, DCTSIZE2/16
+        mov rsi, r12
+        mov rdx, r11
+        mov rdi, r10
+        mov     rax, DCTSIZE2/16
 .quantloop:
-	movaps	xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)]
-	mulps	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
-	mulps	xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)]
-	mulps	xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
-	mulps	xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
 
-	cvtps2dq xmm0,xmm0
-	cvtps2dq xmm1,xmm1
-	cvtps2dq xmm2,xmm2
-	cvtps2dq xmm3,xmm3
+        cvtps2dq xmm0,xmm0
+        cvtps2dq xmm1,xmm1
+        cvtps2dq xmm2,xmm2
+        cvtps2dq xmm3,xmm3
 
-	packssdw xmm0,xmm1
-	packssdw xmm2,xmm3
+        packssdw xmm0,xmm1
+        packssdw xmm2,xmm3
 
-	movdqa	XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0
-	movdqa	XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2
 
-	add	rsi, byte 16*SIZEOF_FAST_FLOAT
-	add	rdx, byte 16*SIZEOF_FAST_FLOAT
-	add	rdi, byte 16*SIZEOF_JCOEF
-	dec	rax
-	jnz	short .quantloop
+        add     rsi, byte 16*SIZEOF_FAST_FLOAT
+        add     rdx, byte 16*SIZEOF_FAST_FLOAT
+        add     rdi, byte 16*SIZEOF_JCOEF
+        dec     rax
+        jnz     short .quantloop
 
-	uncollect_args
-	pop	rbp
-	ret
+        uncollect_args
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcqnts2f.asm b/simd/jcqnts2f.asm
index d80ae5d..0df2df7 100644
--- a/simd/jcqnts2f.asm
+++ b/simd/jcqnts2f.asm
@@ -20,8 +20,8 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Load data into workspace, applying unsigned->signed conversion
 ;
@@ -30,75 +30,75 @@
 ;                            FAST_FLOAT * workspace);
 ;
 
-%define sample_data	ebp+8		; JSAMPARRAY sample_data
-%define start_col	ebp+12		; JDIMENSION start_col
-%define workspace	ebp+16		; FAST_FLOAT * workspace
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_convsamp_float_sse2)
+        align   16
+        global  EXTN(jsimd_convsamp_float_sse2)
 
 EXTN(jsimd_convsamp_float_sse2):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	pcmpeqw  xmm7,xmm7
-	psllw    xmm7,7
-	packsswb xmm7,xmm7		; xmm7 = PB_CENTERJSAMPLE (0x808080..)
+        pcmpeqw  xmm7,xmm7
+        psllw    xmm7,7
+        packsswb xmm7,xmm7              ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
 
-	mov	esi, JSAMPARRAY [sample_data]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [start_col]
-	mov	edi, POINTER [workspace]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/2
-	alignx	16,7
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
 .convloop:
-	mov	ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]
-	movq	xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]
+        movq    xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+        movq    xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]
 
-	psubb	xmm0,xmm7			; xmm0=(01234567)
-	psubb	xmm1,xmm7			; xmm1=(89ABCDEF)
+        psubb   xmm0,xmm7                       ; xmm0=(01234567)
+        psubb   xmm1,xmm7                       ; xmm1=(89ABCDEF)
 
-	punpcklbw xmm0,xmm0			; xmm0=(*0*1*2*3*4*5*6*7)
-	punpcklbw xmm1,xmm1			; xmm1=(*8*9*A*B*C*D*E*F)
+        punpcklbw xmm0,xmm0                     ; xmm0=(*0*1*2*3*4*5*6*7)
+        punpcklbw xmm1,xmm1                     ; xmm1=(*8*9*A*B*C*D*E*F)
 
-	punpcklwd xmm2,xmm0			; xmm2=(***0***1***2***3)
-	punpckhwd xmm0,xmm0			; xmm0=(***4***5***6***7)
-	punpcklwd xmm3,xmm1			; xmm3=(***8***9***A***B)
-	punpckhwd xmm1,xmm1			; xmm1=(***C***D***E***F)
+        punpcklwd xmm2,xmm0                     ; xmm2=(***0***1***2***3)
+        punpckhwd xmm0,xmm0                     ; xmm0=(***4***5***6***7)
+        punpcklwd xmm3,xmm1                     ; xmm3=(***8***9***A***B)
+        punpckhwd xmm1,xmm1                     ; xmm1=(***C***D***E***F)
 
-	psrad     xmm2,(DWORD_BIT-BYTE_BIT)	; xmm2=(0123)
-	psrad     xmm0,(DWORD_BIT-BYTE_BIT)	; xmm0=(4567)
-	cvtdq2ps  xmm2,xmm2			; xmm2=(0123)
-	cvtdq2ps  xmm0,xmm0			; xmm0=(4567)
-	psrad     xmm3,(DWORD_BIT-BYTE_BIT)	; xmm3=(89AB)
-	psrad     xmm1,(DWORD_BIT-BYTE_BIT)	; xmm1=(CDEF)
-	cvtdq2ps  xmm3,xmm3			; xmm3=(89AB)
-	cvtdq2ps  xmm1,xmm1			; xmm1=(CDEF)
+        psrad     xmm2,(DWORD_BIT-BYTE_BIT)     ; xmm2=(0123)
+        psrad     xmm0,(DWORD_BIT-BYTE_BIT)     ; xmm0=(4567)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=(0123)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=(4567)
+        psrad     xmm3,(DWORD_BIT-BYTE_BIT)     ; xmm3=(89AB)
+        psrad     xmm1,(DWORD_BIT-BYTE_BIT)     ; xmm1=(CDEF)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=(89AB)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=(CDEF)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
 
-	add	esi, byte 2*SIZEOF_JSAMPROW
-	add	edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
-	dec	ecx
-	jnz	short .convloop
+        add     esi, byte 2*SIZEOF_JSAMPROW
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     short .convloop
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 
 ; --------------------------------------------------------------------------
@@ -110,62 +110,62 @@
 ;                         FAST_FLOAT * workspace);
 ;
 
-%define coef_block	ebp+8		; JCOEFPTR coef_block
-%define divisors	ebp+12		; FAST_FLOAT * divisors
-%define workspace	ebp+16		; FAST_FLOAT * workspace
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; FAST_FLOAT * divisors
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_quantize_float_sse2)
+        align   16
+        global  EXTN(jsimd_quantize_float_sse2)
 
 EXTN(jsimd_quantize_float_sse2):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	esi, POINTER [workspace]
-	mov	edx, POINTER [divisors]
-	mov	edi, JCOEFPTR [coef_block]
-	mov	eax, DCTSIZE2/16
-	alignx	16,7
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/16
+        alignx  16,7
 .quantloop:
-	movaps	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
-	mulps	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
-	mulps	xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
-	mulps	xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
-	mulps	xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
 
-	cvtps2dq xmm0,xmm0
-	cvtps2dq xmm1,xmm1
-	cvtps2dq xmm2,xmm2
-	cvtps2dq xmm3,xmm3
+        cvtps2dq xmm0,xmm0
+        cvtps2dq xmm1,xmm1
+        cvtps2dq xmm2,xmm2
+        cvtps2dq xmm3,xmm3
 
-	packssdw xmm0,xmm1
-	packssdw xmm2,xmm3
+        packssdw xmm0,xmm1
+        packssdw xmm2,xmm3
 
-	movdqa	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0
-	movdqa	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2
 
-	add	esi, byte 16*SIZEOF_FAST_FLOAT
-	add	edx, byte 16*SIZEOF_FAST_FLOAT
-	add	edi, byte 16*SIZEOF_JCOEF
-	dec	eax
-	jnz	short .quantloop
+        add     esi, byte 16*SIZEOF_FAST_FLOAT
+        add     edx, byte 16*SIZEOF_FAST_FLOAT
+        add     edi, byte 16*SIZEOF_JCOEF
+        dec     eax
+        jnz     short .quantloop
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcqnts2i-64.asm b/simd/jcqnts2i-64.asm
index cc33d59..5de8062 100644
--- a/simd/jcqnts2i-64.asm
+++ b/simd/jcqnts2i-64.asm
@@ -21,8 +21,8 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Load data into workspace, applying unsigned->signed conversion
 ;
@@ -35,60 +35,60 @@
 ; r11 = JDIMENSION start_col
 ; r12 = DCTELEM * workspace
 
-	align	16
-	global	EXTN(jsimd_convsamp_sse2)
+        align   16
+        global  EXTN(jsimd_convsamp_sse2)
 
 EXTN(jsimd_convsamp_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
 
-	pxor	xmm6,xmm6		; xmm6=(all 0's)
-	pcmpeqw	xmm7,xmm7
-	psllw	xmm7,7			; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        pxor    xmm6,xmm6               ; xmm6=(all 0's)
+        pcmpeqw xmm7,xmm7
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
 
-	mov rsi, r10
-	mov rax, r11
-	mov rdi, r12
-	mov	rcx, DCTSIZE/4
+        mov rsi, r10
+        mov rax, r11
+        mov rdi, r12
+        mov     rcx, DCTSIZE/4
 .convloop:
-	mov	rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]       ; (JSAMPLE *)
 
-	movq	xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]	; xmm0=(01234567)
-	movq	xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]	; xmm1=(89ABCDEF)
+        movq    xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]       ; xmm0=(01234567)
+        movq    xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]       ; xmm1=(89ABCDEF)
 
-	mov	rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]	; xmm2=(GHIJKLMN)
-	movq	xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]	; xmm3=(OPQRSTUV)
+        movq    xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]       ; xmm2=(GHIJKLMN)
+        movq    xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]       ; xmm3=(OPQRSTUV)
 
-	punpcklbw xmm0,xmm6		; xmm0=(01234567)
-	punpcklbw xmm1,xmm6		; xmm1=(89ABCDEF)
-	paddw     xmm0,xmm7
-	paddw     xmm1,xmm7
-	punpcklbw xmm2,xmm6		; xmm2=(GHIJKLMN)
-	punpcklbw xmm3,xmm6		; xmm3=(OPQRSTUV)
-	paddw     xmm2,xmm7
-	paddw     xmm3,xmm7
+        punpcklbw xmm0,xmm6             ; xmm0=(01234567)
+        punpcklbw xmm1,xmm6             ; xmm1=(89ABCDEF)
+        paddw     xmm0,xmm7
+        paddw     xmm1,xmm7
+        punpcklbw xmm2,xmm6             ; xmm2=(GHIJKLMN)
+        punpcklbw xmm3,xmm6             ; xmm3=(OPQRSTUV)
+        paddw     xmm2,xmm7
+        paddw     xmm3,xmm7
 
-	movdqa	XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
-	movdqa	XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
-	movdqa	XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
-	movdqa	XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
 
-	add	rsi, byte 4*SIZEOF_JSAMPROW
-	add	rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM
-	dec	rcx
-	jnz	short .convloop
+        add     rsi, byte 4*SIZEOF_JSAMPROW
+        add     rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     rcx
+        jnz     short .convloop
 
-	pop	rbx
-	uncollect_args
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -111,77 +111,77 @@
 ; r11 = DCTELEM * divisors
 ; r12 = DCTELEM * workspace
 
-	align	16
-	global	EXTN(jsimd_quantize_sse2)
+        align   16
+        global  EXTN(jsimd_quantize_sse2)
 
 EXTN(jsimd_quantize_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
 
-	mov rsi, r12
-	mov rdx, r11
-	mov rdi, r10
-	mov	rax, DCTSIZE2/32
+        mov rsi, r12
+        mov rdx, r11
+        mov rdi, r10
+        mov     rax, DCTSIZE2/32
 .quantloop:
-	movdqa	xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)]
-	movdqa	xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)]
-	movdqa	xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)]
-	movdqa	xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)]
-	movdqa	xmm0,xmm4
-	movdqa	xmm1,xmm5
-	movdqa	xmm2,xmm6
-	movdqa	xmm3,xmm7
-	psraw	xmm4,(WORD_BIT-1)
-	psraw	xmm5,(WORD_BIT-1)
-	psraw	xmm6,(WORD_BIT-1)
-	psraw	xmm7,(WORD_BIT-1)
-	pxor	xmm0,xmm4
-	pxor	xmm1,xmm5
-	pxor	xmm2,xmm6
-	pxor	xmm3,xmm7
-	psubw	xmm0,xmm4		; if (xmm0 < 0) xmm0 = -xmm0;
-	psubw	xmm1,xmm5		; if (xmm1 < 0) xmm1 = -xmm1;
-	psubw	xmm2,xmm6		; if (xmm2 < 0) xmm2 = -xmm2;
-	psubw	xmm3,xmm7		; if (xmm3 < 0) xmm3 = -xmm3;
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm0,xmm4
+        movdqa  xmm1,xmm5
+        movdqa  xmm2,xmm6
+        movdqa  xmm3,xmm7
+        psraw   xmm4,(WORD_BIT-1)
+        psraw   xmm5,(WORD_BIT-1)
+        psraw   xmm6,(WORD_BIT-1)
+        psraw   xmm7,(WORD_BIT-1)
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4               ; if (xmm0 < 0) xmm0 = -xmm0;
+        psubw   xmm1,xmm5               ; if (xmm1 < 0) xmm1 = -xmm1;
+        psubw   xmm2,xmm6               ; if (xmm2 < 0) xmm2 = -xmm2;
+        psubw   xmm3,xmm7               ; if (xmm3 < 0) xmm3 = -xmm3;
 
-	paddw	xmm0, XMMWORD [CORRECTION(0,0,rdx)]  ; correction + roundfactor
-	paddw	xmm1, XMMWORD [CORRECTION(1,0,rdx)]
-	paddw	xmm2, XMMWORD [CORRECTION(2,0,rdx)]
-	paddw	xmm3, XMMWORD [CORRECTION(3,0,rdx)]
-	pmulhuw	xmm0, XMMWORD [RECIPROCAL(0,0,rdx)]  ; reciprocal
-	pmulhuw	xmm1, XMMWORD [RECIPROCAL(1,0,rdx)]
-	pmulhuw	xmm2, XMMWORD [RECIPROCAL(2,0,rdx)]
-	pmulhuw	xmm3, XMMWORD [RECIPROCAL(3,0,rdx)]
-	pmulhuw	xmm0, XMMWORD [SCALE(0,0,rdx)]	; scale
-	pmulhuw	xmm1, XMMWORD [SCALE(1,0,rdx)]
-	pmulhuw	xmm2, XMMWORD [SCALE(2,0,rdx)]
-	pmulhuw	xmm3, XMMWORD [SCALE(3,0,rdx)]
+        paddw   xmm0, XMMWORD [CORRECTION(0,0,rdx)]  ; correction + roundfactor
+        paddw   xmm1, XMMWORD [CORRECTION(1,0,rdx)]
+        paddw   xmm2, XMMWORD [CORRECTION(2,0,rdx)]
+        paddw   xmm3, XMMWORD [CORRECTION(3,0,rdx)]
+        pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)]  ; reciprocal
+        pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)]
+        pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)]
+        pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)]
+        pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)]  ; scale
+        pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)]
+        pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)]
+        pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)]
 
-	pxor	xmm0,xmm4
-	pxor	xmm1,xmm5
-	pxor	xmm2,xmm6
-	pxor	xmm3,xmm7
-	psubw	xmm0,xmm4
-	psubw	xmm1,xmm5
-	psubw	xmm2,xmm6
-	psubw	xmm3,xmm7
-	movdqa	XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
-	movdqa	XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
-	movdqa	XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
-	movdqa	XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4
+        psubw   xmm1,xmm5
+        psubw   xmm2,xmm6
+        psubw   xmm3,xmm7
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
 
-	add	rsi, byte 32*SIZEOF_DCTELEM
-	add	rdx, byte 32*SIZEOF_DCTELEM
-	add	rdi, byte 32*SIZEOF_JCOEF
-	dec	rax
-	jnz	near .quantloop
+        add     rsi, byte 32*SIZEOF_DCTELEM
+        add     rdx, byte 32*SIZEOF_DCTELEM
+        add     rdi, byte 32*SIZEOF_JCOEF
+        dec     rax
+        jnz     near .quantloop
 
-	uncollect_args
-	pop	rbp
-	ret
+        uncollect_args
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcqnts2i.asm b/simd/jcqnts2i.asm
index 0864d6e..07cdc68 100644
--- a/simd/jcqnts2i.asm
+++ b/simd/jcqnts2i.asm
@@ -20,8 +20,8 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Load data into workspace, applying unsigned->signed conversion
 ;
@@ -30,70 +30,70 @@
 ;                      DCTELEM * workspace);
 ;
 
-%define sample_data	ebp+8		; JSAMPARRAY sample_data
-%define start_col	ebp+12		; JDIMENSION start_col
-%define workspace	ebp+16		; DCTELEM * workspace
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; DCTELEM * workspace
 
-	align	16
-	global	EXTN(jsimd_convsamp_sse2)
+        align   16
+        global  EXTN(jsimd_convsamp_sse2)
 
 EXTN(jsimd_convsamp_sse2):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	pxor	xmm6,xmm6		; xmm6=(all 0's)
-	pcmpeqw	xmm7,xmm7
-	psllw	xmm7,7			; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        pxor    xmm6,xmm6               ; xmm6=(all 0's)
+        pcmpeqw xmm7,xmm7
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
 
-	mov	esi, JSAMPARRAY [sample_data]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [start_col]
-	mov	edi, POINTER [workspace]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .convloop:
-	mov	ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]	; xmm0=(01234567)
-	movq	xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]	; xmm1=(89ABCDEF)
+        movq    xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]       ; xmm0=(01234567)
+        movq    xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]       ; xmm1=(89ABCDEF)
 
-	mov	ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]	; xmm2=(GHIJKLMN)
-	movq	xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]	; xmm3=(OPQRSTUV)
+        movq    xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]       ; xmm2=(GHIJKLMN)
+        movq    xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]       ; xmm3=(OPQRSTUV)
 
-	punpcklbw xmm0,xmm6		; xmm0=(01234567)
-	punpcklbw xmm1,xmm6		; xmm1=(89ABCDEF)
-	paddw     xmm0,xmm7
-	paddw     xmm1,xmm7
-	punpcklbw xmm2,xmm6		; xmm2=(GHIJKLMN)
-	punpcklbw xmm3,xmm6		; xmm3=(OPQRSTUV)
-	paddw     xmm2,xmm7
-	paddw     xmm3,xmm7
+        punpcklbw xmm0,xmm6             ; xmm0=(01234567)
+        punpcklbw xmm1,xmm6             ; xmm1=(89ABCDEF)
+        paddw     xmm0,xmm7
+        paddw     xmm1,xmm7
+        punpcklbw xmm2,xmm6             ; xmm2=(GHIJKLMN)
+        punpcklbw xmm3,xmm6             ; xmm3=(OPQRSTUV)
+        paddw     xmm2,xmm7
+        paddw     xmm3,xmm7
 
-	movdqa	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
-	movdqa	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
-	movdqa	XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
-	movdqa	XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
 
-	add	esi, byte 4*SIZEOF_JSAMPROW
-	add	edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
-	dec	ecx
-	jnz	short .convloop
+        add     esi, byte 4*SIZEOF_JSAMPROW
+        add     edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     short .convloop
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -112,89 +112,89 @@
 %define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
 %define SCALE(m,n,b)      XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
 
-%define coef_block	ebp+8		; JCOEFPTR coef_block
-%define divisors	ebp+12		; DCTELEM * divisors
-%define workspace	ebp+16		; DCTELEM * workspace
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; DCTELEM * divisors
+%define workspace       ebp+16          ; DCTELEM * workspace
 
-	align	16
-	global	EXTN(jsimd_quantize_sse2)
+        align   16
+        global  EXTN(jsimd_quantize_sse2)
 
 EXTN(jsimd_quantize_sse2):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	esi, POINTER [workspace]
-	mov	edx, POINTER [divisors]
-	mov	edi, JCOEFPTR [coef_block]
-	mov	eax, DCTSIZE2/32
-	alignx	16,7
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/32
+        alignx  16,7
 .quantloop:
-	movdqa	xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
-	movdqa	xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
-	movdqa	xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
-	movdqa	xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)]
-	movdqa	xmm0,xmm4
-	movdqa	xmm1,xmm5
-	movdqa	xmm2,xmm6
-	movdqa	xmm3,xmm7
-	psraw	xmm4,(WORD_BIT-1)
-	psraw	xmm5,(WORD_BIT-1)
-	psraw	xmm6,(WORD_BIT-1)
-	psraw	xmm7,(WORD_BIT-1)
-	pxor	xmm0,xmm4
-	pxor	xmm1,xmm5
-	pxor	xmm2,xmm6
-	pxor	xmm3,xmm7
-	psubw	xmm0,xmm4		; if (xmm0 < 0) xmm0 = -xmm0;
-	psubw	xmm1,xmm5		; if (xmm1 < 0) xmm1 = -xmm1;
-	psubw	xmm2,xmm6		; if (xmm2 < 0) xmm2 = -xmm2;
-	psubw	xmm3,xmm7		; if (xmm3 < 0) xmm3 = -xmm3;
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm0,xmm4
+        movdqa  xmm1,xmm5
+        movdqa  xmm2,xmm6
+        movdqa  xmm3,xmm7
+        psraw   xmm4,(WORD_BIT-1)
+        psraw   xmm5,(WORD_BIT-1)
+        psraw   xmm6,(WORD_BIT-1)
+        psraw   xmm7,(WORD_BIT-1)
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4               ; if (xmm0 < 0) xmm0 = -xmm0;
+        psubw   xmm1,xmm5               ; if (xmm1 < 0) xmm1 = -xmm1;
+        psubw   xmm2,xmm6               ; if (xmm2 < 0) xmm2 = -xmm2;
+        psubw   xmm3,xmm7               ; if (xmm3 < 0) xmm3 = -xmm3;
 
-	paddw	xmm0, XMMWORD [CORRECTION(0,0,edx)]  ; correction + roundfactor
-	paddw	xmm1, XMMWORD [CORRECTION(1,0,edx)]
-	paddw	xmm2, XMMWORD [CORRECTION(2,0,edx)]
-	paddw	xmm3, XMMWORD [CORRECTION(3,0,edx)]
-	pmulhuw	xmm0, XMMWORD [RECIPROCAL(0,0,edx)]  ; reciprocal
-	pmulhuw	xmm1, XMMWORD [RECIPROCAL(1,0,edx)]
-	pmulhuw	xmm2, XMMWORD [RECIPROCAL(2,0,edx)]
-	pmulhuw	xmm3, XMMWORD [RECIPROCAL(3,0,edx)]
-	pmulhuw	xmm0, XMMWORD [SCALE(0,0,edx)]	; scale
-	pmulhuw	xmm1, XMMWORD [SCALE(1,0,edx)]
-	pmulhuw	xmm2, XMMWORD [SCALE(2,0,edx)]
-	pmulhuw	xmm3, XMMWORD [SCALE(3,0,edx)]
+        paddw   xmm0, XMMWORD [CORRECTION(0,0,edx)]  ; correction + roundfactor
+        paddw   xmm1, XMMWORD [CORRECTION(1,0,edx)]
+        paddw   xmm2, XMMWORD [CORRECTION(2,0,edx)]
+        paddw   xmm3, XMMWORD [CORRECTION(3,0,edx)]
+        pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)]  ; reciprocal
+        pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)]
+        pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)]
+        pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)]
+        pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)]  ; scale
+        pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)]
+        pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)]
+        pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)]
 
-	pxor	xmm0,xmm4
-	pxor	xmm1,xmm5
-	pxor	xmm2,xmm6
-	pxor	xmm3,xmm7
-	psubw	xmm0,xmm4
-	psubw	xmm1,xmm5
-	psubw	xmm2,xmm6
-	psubw	xmm3,xmm7
-	movdqa	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
-	movdqa	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
-	movdqa	XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
-	movdqa	XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4
+        psubw   xmm1,xmm5
+        psubw   xmm2,xmm6
+        psubw   xmm3,xmm7
+        movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
 
-	add	esi, byte 32*SIZEOF_DCTELEM
-	add	edx, byte 32*SIZEOF_DCTELEM
-	add	edi, byte 32*SIZEOF_JCOEF
-	dec	eax
-	jnz	near .quantloop
+        add     esi, byte 32*SIZEOF_DCTELEM
+        add     edx, byte 32*SIZEOF_DCTELEM
+        add     edi, byte 32*SIZEOF_JCOEF
+        dec     eax
+        jnz     near .quantloop
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcqntsse.asm b/simd/jcqntsse.asm
index 3065eca..2e32d0c 100644
--- a/simd/jcqntsse.asm
+++ b/simd/jcqntsse.asm
@@ -20,8 +20,8 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Load data into workspace, applying unsigned->signed conversion
 ;
@@ -30,98 +30,98 @@
 ;                           FAST_FLOAT * workspace);
 ;
 
-%define sample_data	ebp+8		; JSAMPARRAY sample_data
-%define start_col	ebp+12		; JDIMENSION start_col
-%define workspace	ebp+16		; FAST_FLOAT * workspace
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_convsamp_float_sse)
+        align   16
+        global  EXTN(jsimd_convsamp_float_sse)
 
 EXTN(jsimd_convsamp_float_sse):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	pcmpeqw  mm7,mm7
-	psllw    mm7,7
-	packsswb mm7,mm7		; mm7 = PB_CENTERJSAMPLE (0x808080..)
+        pcmpeqw  mm7,mm7
+        psllw    mm7,7
+        packsswb mm7,mm7                ; mm7 = PB_CENTERJSAMPLE (0x808080..)
 
-	mov	esi, JSAMPARRAY [sample_data]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [start_col]
-	mov	edi, POINTER [workspace]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/2
-	alignx	16,7
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
 .convloop:
-	mov	ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; (JSAMPLE *)
-	mov	edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; (JSAMPLE *)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
 
-	movq	mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
-	movq	mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
+        movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+        movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
 
-	psubb	mm0,mm7				; mm0=(01234567)
-	psubb	mm1,mm7				; mm1=(89ABCDEF)
+        psubb   mm0,mm7                         ; mm0=(01234567)
+        psubb   mm1,mm7                         ; mm1=(89ABCDEF)
 
-	punpcklbw mm2,mm0			; mm2=(*0*1*2*3)
-	punpckhbw mm0,mm0			; mm0=(*4*5*6*7)
-	punpcklbw mm3,mm1			; mm3=(*8*9*A*B)
-	punpckhbw mm1,mm1			; mm1=(*C*D*E*F)
+        punpcklbw mm2,mm0                       ; mm2=(*0*1*2*3)
+        punpckhbw mm0,mm0                       ; mm0=(*4*5*6*7)
+        punpcklbw mm3,mm1                       ; mm3=(*8*9*A*B)
+        punpckhbw mm1,mm1                       ; mm1=(*C*D*E*F)
 
-	punpcklwd mm4,mm2			; mm4=(***0***1)
-	punpckhwd mm2,mm2			; mm2=(***2***3)
-	punpcklwd mm5,mm0			; mm5=(***4***5)
-	punpckhwd mm0,mm0			; mm0=(***6***7)
+        punpcklwd mm4,mm2                       ; mm4=(***0***1)
+        punpckhwd mm2,mm2                       ; mm2=(***2***3)
+        punpcklwd mm5,mm0                       ; mm5=(***4***5)
+        punpckhwd mm0,mm0                       ; mm0=(***6***7)
 
-	psrad     mm4,(DWORD_BIT-BYTE_BIT)	; mm4=(01)
-	psrad     mm2,(DWORD_BIT-BYTE_BIT)	; mm2=(23)
-	cvtpi2ps  xmm0,mm4			; xmm0=(01**)
-	cvtpi2ps  xmm1,mm2			; xmm1=(23**)
-	psrad     mm5,(DWORD_BIT-BYTE_BIT)	; mm5=(45)
-	psrad     mm0,(DWORD_BIT-BYTE_BIT)	; mm0=(67)
-	cvtpi2ps  xmm2,mm5			; xmm2=(45**)
-	cvtpi2ps  xmm3,mm0			; xmm3=(67**)
+        psrad     mm4,(DWORD_BIT-BYTE_BIT)      ; mm4=(01)
+        psrad     mm2,(DWORD_BIT-BYTE_BIT)      ; mm2=(23)
+        cvtpi2ps  xmm0,mm4                      ; xmm0=(01**)
+        cvtpi2ps  xmm1,mm2                      ; xmm1=(23**)
+        psrad     mm5,(DWORD_BIT-BYTE_BIT)      ; mm5=(45)
+        psrad     mm0,(DWORD_BIT-BYTE_BIT)      ; mm0=(67)
+        cvtpi2ps  xmm2,mm5                      ; xmm2=(45**)
+        cvtpi2ps  xmm3,mm0                      ; xmm3=(67**)
 
-	punpcklwd mm6,mm3			; mm6=(***8***9)
-	punpckhwd mm3,mm3			; mm3=(***A***B)
-	punpcklwd mm4,mm1			; mm4=(***C***D)
-	punpckhwd mm1,mm1			; mm1=(***E***F)
+        punpcklwd mm6,mm3                       ; mm6=(***8***9)
+        punpckhwd mm3,mm3                       ; mm3=(***A***B)
+        punpcklwd mm4,mm1                       ; mm4=(***C***D)
+        punpckhwd mm1,mm1                       ; mm1=(***E***F)
 
-	psrad     mm6,(DWORD_BIT-BYTE_BIT)	; mm6=(89)
-	psrad     mm3,(DWORD_BIT-BYTE_BIT)	; mm3=(AB)
-	cvtpi2ps  xmm4,mm6			; xmm4=(89**)
-	cvtpi2ps  xmm5,mm3			; xmm5=(AB**)
-	psrad     mm4,(DWORD_BIT-BYTE_BIT)	; mm4=(CD)
-	psrad     mm1,(DWORD_BIT-BYTE_BIT)	; mm1=(EF)
-	cvtpi2ps  xmm6,mm4			; xmm6=(CD**)
-	cvtpi2ps  xmm7,mm1			; xmm7=(EF**)
+        psrad     mm6,(DWORD_BIT-BYTE_BIT)      ; mm6=(89)
+        psrad     mm3,(DWORD_BIT-BYTE_BIT)      ; mm3=(AB)
+        cvtpi2ps  xmm4,mm6                      ; xmm4=(89**)
+        cvtpi2ps  xmm5,mm3                      ; xmm5=(AB**)
+        psrad     mm4,(DWORD_BIT-BYTE_BIT)      ; mm4=(CD)
+        psrad     mm1,(DWORD_BIT-BYTE_BIT)      ; mm1=(EF)
+        cvtpi2ps  xmm6,mm4                      ; xmm6=(CD**)
+        cvtpi2ps  xmm7,mm1                      ; xmm7=(EF**)
 
-	movlhps   xmm0,xmm1			; xmm0=(0123)
-	movlhps   xmm2,xmm3			; xmm2=(4567)
-	movlhps   xmm4,xmm5			; xmm4=(89AB)
-	movlhps   xmm6,xmm7			; xmm6=(CDEF)
+        movlhps   xmm0,xmm1                     ; xmm0=(0123)
+        movlhps   xmm2,xmm3                     ; xmm2=(4567)
+        movlhps   xmm4,xmm5                     ; xmm4=(89AB)
+        movlhps   xmm6,xmm7                     ; xmm6=(CDEF)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4
-	movaps	XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
 
-	add	esi, byte 2*SIZEOF_JSAMPROW
-	add	edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
-	dec	ecx
-	jnz	near .convloop
+        add     esi, byte 2*SIZEOF_JSAMPROW
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .convloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 
 ; --------------------------------------------------------------------------
@@ -133,79 +133,79 @@
 ;                           FAST_FLOAT * workspace);
 ;
 
-%define coef_block	ebp+8		; JCOEFPTR coef_block
-%define divisors	ebp+12		; FAST_FLOAT * divisors
-%define workspace	ebp+16		; FAST_FLOAT * workspace
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; FAST_FLOAT * divisors
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
 
-	align	16
-	global	EXTN(jsimd_quantize_float_sse)
+        align   16
+        global  EXTN(jsimd_quantize_float_sse)
 
 EXTN(jsimd_quantize_float_sse):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	esi, POINTER [workspace]
-	mov	edx, POINTER [divisors]
-	mov	edi, JCOEFPTR [coef_block]
-	mov	eax, DCTSIZE2/16
-	alignx	16,7
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/16
+        alignx  16,7
 .quantloop:
-	movaps	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
-	mulps	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
-	mulps	xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
-	mulps	xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
-	mulps	xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
 
-	movhlps  xmm4,xmm0
-	movhlps  xmm5,xmm1
+        movhlps  xmm4,xmm0
+        movhlps  xmm5,xmm1
 
-	cvtps2pi mm0,xmm0
-	cvtps2pi mm1,xmm1
-	cvtps2pi mm4,xmm4
-	cvtps2pi mm5,xmm5
+        cvtps2pi mm0,xmm0
+        cvtps2pi mm1,xmm1
+        cvtps2pi mm4,xmm4
+        cvtps2pi mm5,xmm5
 
-	movhlps  xmm6,xmm2
-	movhlps  xmm7,xmm3
+        movhlps  xmm6,xmm2
+        movhlps  xmm7,xmm3
 
-	cvtps2pi mm2,xmm2
-	cvtps2pi mm3,xmm3
-	cvtps2pi mm6,xmm6
-	cvtps2pi mm7,xmm7
+        cvtps2pi mm2,xmm2
+        cvtps2pi mm3,xmm3
+        cvtps2pi mm6,xmm6
+        cvtps2pi mm7,xmm7
 
-	packssdw mm0,mm4
-	packssdw mm1,mm5
-	packssdw mm2,mm6
-	packssdw mm3,mm7
+        packssdw mm0,mm4
+        packssdw mm1,mm5
+        packssdw mm2,mm6
+        packssdw mm3,mm7
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
 
-	add	esi, byte 16*SIZEOF_FAST_FLOAT
-	add	edx, byte 16*SIZEOF_FAST_FLOAT
-	add	edi, byte 16*SIZEOF_JCOEF
-	dec	eax
-	jnz	short .quantloop
+        add     esi, byte 16*SIZEOF_FAST_FLOAT
+        add     edx, byte 16*SIZEOF_FAST_FLOAT
+        add     edi, byte 16*SIZEOF_JCOEF
+        dec     eax
+        jnz     short .quantloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcsammmx.asm b/simd/jcsammmx.asm
index 9e43b2f..9ab1518 100644
--- a/simd/jcsammmx.asm
+++ b/simd/jcsammmx.asm
@@ -19,8 +19,8 @@
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Downsample pixel values of a single component.
 ; This version handles the common case of 2:1 horizontal and 1:1 vertical,
@@ -32,135 +32,135 @@
 ;                            JSAMPARRAY input_data, JSAMPARRAY output_data);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION image_width
-%define max_v_samp(b)	(b)+12		; int max_v_samp_factor
-%define v_samp(b)			(b)+16		; JDIMENSION v_samp_factor
-%define width_blks(b)	(b)+20		; JDIMENSION width_blocks
-%define input_data(b)	(b)+24		; JSAMPARRAY input_data
-%define output_data(b)	(b)+28	; JSAMPARRAY output_data
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
 
-	align	16
-	global	EXTN(jsimd_h2v1_downsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v1_downsample_mmx)
 
 EXTN(jsimd_h2v1_downsample_mmx):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	ecx, JDIMENSION [width_blks(ebp)]
-	shl	ecx,3			; imul ecx,DCTSIZE (ecx = output_cols)
-	jz	near .return
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
 
-	mov	edx, JDIMENSION [img_width(ebp)]
+        mov     edx, JDIMENSION [img_width(ebp)]
 
-	; -- expand_right_edge
+        ; -- expand_right_edge
 
-	push	ecx
-	shl	ecx,1				; output_cols * 2
-	sub	ecx,edx
-	jle	short .expand_end
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
 
-	mov	eax, INT [max_v_samp(ebp)]
-	test	eax,eax
-	jle	short .expand_end
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
 
-	cld
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	alignx	16,7
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
 .expandloop:
-	push	eax
-	push	ecx
+        push    eax
+        push    ecx
 
-	mov	edi, JSAMPROW [esi]
-	add	edi,edx
-	mov	al, JSAMPLE [edi-1]
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
 
-	rep stosb
+        rep stosb
 
-	pop	ecx
-	pop	eax
+        pop     ecx
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW
-	dec	eax
-	jg	short .expandloop
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
 
 .expand_end:
-	pop	ecx				; output_cols
+        pop     ecx                             ; output_cols
 
-	; -- h2v1_downsample
+        ; -- h2v1_downsample
 
-	mov	eax, JDIMENSION [v_samp(ebp)]	; rowctr
-	test	eax,eax
-	jle	near .return
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
 
-	mov       edx, 0x00010000	; bias pattern
-	movd      mm7,edx
-	pcmpeqw   mm6,mm6
-	punpckldq mm7,mm7		; mm7={0, 1, 0, 1}
-	psrlw     mm6,BYTE_BIT		; mm6={0xFF 0x00 0xFF 0x00 ..}
+        mov       edx, 0x00010000       ; bias pattern
+        movd      mm7,edx
+        pcmpeqw   mm6,mm6
+        punpckldq mm7,mm7               ; mm7={0, 1, 0, 1}
+        psrlw     mm6,BYTE_BIT          ; mm6={0xFF 0x00 0xFF 0x00 ..}
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, JSAMPARRAY [output_data(ebp)]	; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
 .rowloop:
-	push	ecx
-	push	edi
-	push	esi
+        push    ecx
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]		; inptr
-	mov	edi, JSAMPROW [edi]		; outptr
-	alignx	16,7
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
+        alignx  16,7
 .columnloop:
 
-	movq	mm0, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mm1, MMWORD [esi+1*SIZEOF_MMWORD]
-	movq	mm2,mm0
-	movq	mm3,mm1
+        movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm1, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mm2,mm0
+        movq    mm3,mm1
 
-	pand	mm0,mm6
-	psrlw	mm2,BYTE_BIT
-	pand	mm1,mm6
-	psrlw	mm3,BYTE_BIT
+        pand    mm0,mm6
+        psrlw   mm2,BYTE_BIT
+        pand    mm1,mm6
+        psrlw   mm3,BYTE_BIT
 
-	paddw	mm0,mm2
-	paddw	mm1,mm3
-	paddw	mm0,mm7
-	paddw	mm1,mm7
-	psrlw	mm0,1
-	psrlw	mm1,1
+        paddw   mm0,mm2
+        paddw   mm1,mm3
+        paddw   mm0,mm7
+        paddw   mm1,mm7
+        psrlw   mm0,1
+        psrlw   mm1,1
 
-	packuswb mm0,mm1
+        packuswb mm0,mm1
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
 
-	add	esi, byte 2*SIZEOF_MMWORD	; inptr
-	add	edi, byte 1*SIZEOF_MMWORD	; outptr
-	sub	ecx, byte SIZEOF_MMWORD		; outcol
-	jnz	short .columnloop
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+        add     edi, byte 1*SIZEOF_MMWORD       ; outptr
+        sub     ecx, byte SIZEOF_MMWORD         ; outcol
+        jnz     short .columnloop
 
-	pop	esi
-	pop	edi
-	pop	ecx
+        pop     esi
+        pop     edi
+        pop     ecx
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_data
-	add	edi, byte SIZEOF_JSAMPROW	; output_data
-	dec	eax				; rowctr
-	jg	short .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     eax                             ; rowctr
+        jg      short .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -174,151 +174,151 @@
 ;                            JSAMPARRAY input_data, JSAMPARRAY output_data);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION image_width
-%define max_v_samp(b)	(b)+12		; int max_v_samp_factor
-%define v_samp(b)			(b)+16		; JDIMENSION v_samp_factor
-%define width_blks(b)	(b)+20		; JDIMENSION width_blocks
-%define input_data(b)	(b)+24		; JSAMPARRAY input_data
-%define output_data(b)	(b)+28	; JSAMPARRAY output_data
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
 
-	align	16
-	global	EXTN(jsimd_h2v2_downsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v2_downsample_mmx)
 
 EXTN(jsimd_h2v2_downsample_mmx):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	ecx, JDIMENSION [width_blks(ebp)]
-	shl	ecx,3			; imul ecx,DCTSIZE (ecx = output_cols)
-	jz	near .return
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
 
-	mov	edx, JDIMENSION [img_width(ebp)]
+        mov     edx, JDIMENSION [img_width(ebp)]
 
-	; -- expand_right_edge
+        ; -- expand_right_edge
 
-	push	ecx
-	shl	ecx,1				; output_cols * 2
-	sub	ecx,edx
-	jle	short .expand_end
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
 
-	mov	eax, INT [max_v_samp(ebp)]
-	test	eax,eax
-	jle	short .expand_end
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
 
-	cld
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	alignx	16,7
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
 .expandloop:
-	push	eax
-	push	ecx
+        push    eax
+        push    ecx
 
-	mov	edi, JSAMPROW [esi]
-	add	edi,edx
-	mov	al, JSAMPLE [edi-1]
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
 
-	rep stosb
+        rep stosb
 
-	pop	ecx
-	pop	eax
+        pop     ecx
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW
-	dec	eax
-	jg	short .expandloop
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
 
 .expand_end:
-	pop	ecx				; output_cols
+        pop     ecx                             ; output_cols
 
-	; -- h2v2_downsample
+        ; -- h2v2_downsample
 
-	mov	eax, JDIMENSION [v_samp(ebp)]	; rowctr
-	test	eax,eax
-	jle	near .return
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
 
-	mov       edx, 0x00020001	; bias pattern
-	movd      mm7,edx
-	pcmpeqw   mm6,mm6
-	punpckldq mm7,mm7		; mm7={1, 2, 1, 2}
-	psrlw     mm6,BYTE_BIT		; mm6={0xFF 0x00 0xFF 0x00 ..}
+        mov       edx, 0x00020001       ; bias pattern
+        movd      mm7,edx
+        pcmpeqw   mm6,mm6
+        punpckldq mm7,mm7               ; mm7={1, 2, 1, 2}
+        psrlw     mm6,BYTE_BIT          ; mm6={0xFF 0x00 0xFF 0x00 ..}
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, JSAMPARRAY [output_data(ebp)]	; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
 .rowloop:
-	push	ecx
-	push	edi
-	push	esi
+        push    ecx
+        push    edi
+        push    esi
 
-	mov	edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; inptr0
-	mov	esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; inptr1
-	mov	edi, JSAMPROW [edi]			; outptr
-	alignx	16,7
+        mov     edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1
+        mov     edi, JSAMPROW [edi]                     ; outptr
+        alignx  16,7
 .columnloop:
 
-	movq	mm0, MMWORD [edx+0*SIZEOF_MMWORD]
-	movq	mm1, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mm2, MMWORD [edx+1*SIZEOF_MMWORD]
-	movq	mm3, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mm0, MMWORD [edx+0*SIZEOF_MMWORD]
+        movq    mm1, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm2, MMWORD [edx+1*SIZEOF_MMWORD]
+        movq    mm3, MMWORD [esi+1*SIZEOF_MMWORD]
 
-	movq	mm4,mm0
-	movq	mm5,mm1
-	pand	mm0,mm6
-	psrlw	mm4,BYTE_BIT
-	pand	mm1,mm6
-	psrlw	mm5,BYTE_BIT
-	paddw	mm0,mm4
-	paddw	mm1,mm5
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pand    mm0,mm6
+        psrlw   mm4,BYTE_BIT
+        pand    mm1,mm6
+        psrlw   mm5,BYTE_BIT
+        paddw   mm0,mm4
+        paddw   mm1,mm5
 
-	movq	mm4,mm2
-	movq	mm5,mm3
-	pand	mm2,mm6
-	psrlw	mm4,BYTE_BIT
-	pand	mm3,mm6
-	psrlw	mm5,BYTE_BIT
-	paddw	mm2,mm4
-	paddw	mm3,mm5
+        movq    mm4,mm2
+        movq    mm5,mm3
+        pand    mm2,mm6
+        psrlw   mm4,BYTE_BIT
+        pand    mm3,mm6
+        psrlw   mm5,BYTE_BIT
+        paddw   mm2,mm4
+        paddw   mm3,mm5
 
-	paddw	mm0,mm1
-	paddw	mm2,mm3
-	paddw	mm0,mm7
-	paddw	mm2,mm7
-	psrlw	mm0,2
-	psrlw	mm2,2
+        paddw   mm0,mm1
+        paddw   mm2,mm3
+        paddw   mm0,mm7
+        paddw   mm2,mm7
+        psrlw   mm0,2
+        psrlw   mm2,2
 
-	packuswb mm0,mm2
+        packuswb mm0,mm2
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
 
-	add	edx, byte 2*SIZEOF_MMWORD	; inptr0
-	add	esi, byte 2*SIZEOF_MMWORD	; inptr1
-	add	edi, byte 1*SIZEOF_MMWORD	; outptr
-	sub	ecx, byte SIZEOF_MMWORD		; outcol
-	jnz	near .columnloop
+        add     edx, byte 2*SIZEOF_MMWORD       ; inptr0
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr1
+        add     edi, byte 1*SIZEOF_MMWORD       ; outptr
+        sub     ecx, byte SIZEOF_MMWORD         ; outcol
+        jnz     near .columnloop
 
-	pop	esi
-	pop	edi
-	pop	ecx
+        pop     esi
+        pop     edi
+        pop     ecx
 
-	add	esi, byte 2*SIZEOF_JSAMPROW	; input_data
-	add	edi, byte 1*SIZEOF_JSAMPROW	; output_data
-	dec	eax				; rowctr
-	jg	near .rowloop
+        add     esi, byte 2*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 1*SIZEOF_JSAMPROW     ; output_data
+        dec     eax                             ; rowctr
+        jg      near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcsamss2-64.asm b/simd/jcsamss2-64.asm
index 6a16dc5..6c50d9c 100644
--- a/simd/jcsamss2-64.asm
+++ b/simd/jcsamss2-64.asm
@@ -20,8 +20,8 @@
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Downsample pixel values of a single component.
 ; This version handles the common case of 2:1 horizontal and 1:1 vertical,
@@ -40,130 +40,130 @@
 ; r14 = JSAMPARRAY input_data
 ; r15 = JSAMPARRAY output_data
 
-	align	16
-	global	EXTN(jsimd_h2v1_downsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_downsample_sse2)
 
 EXTN(jsimd_h2v1_downsample_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
 
-	mov rcx, r13
-	shl	rcx,3			; imul rcx,DCTSIZE (rcx = output_cols)
-	jz	near .return
+        mov rcx, r13
+        shl     rcx,3                   ; imul rcx,DCTSIZE (rcx = output_cols)
+        jz      near .return
 
-	mov rdx, r10
+        mov rdx, r10
 
-	; -- expand_right_edge
+        ; -- expand_right_edge
 
-	push	rcx
-	shl	rcx,1				; output_cols * 2
-	sub	rcx,rdx
-	jle	short .expand_end
+        push    rcx
+        shl     rcx,1                           ; output_cols * 2
+        sub     rcx,rdx
+        jle     short .expand_end
 
-	mov	rax, r11
-	test	rax,rax
-	jle	short .expand_end
+        mov     rax, r11
+        test    rax,rax
+        jle     short .expand_end
 
-	cld
-	mov	rsi, r14	; input_data
+        cld
+        mov     rsi, r14        ; input_data
 .expandloop:
-	push	rax
-	push	rcx
+        push    rax
+        push    rcx
 
-	mov	rdi, JSAMPROW [rsi]
-	add	rdi,rdx
-	mov	al, JSAMPLE [rdi-1]
+        mov     rdi, JSAMPROW [rsi]
+        add     rdi,rdx
+        mov     al, JSAMPLE [rdi-1]
 
-	rep stosb
+        rep stosb
 
-	pop	rcx
-	pop	rax
+        pop     rcx
+        pop     rax
 
-	add	rsi, byte SIZEOF_JSAMPROW
-	dec	rax
-	jg	short .expandloop
+        add     rsi, byte SIZEOF_JSAMPROW
+        dec     rax
+        jg      short .expandloop
 
 .expand_end:
-	pop	rcx				; output_cols
+        pop     rcx                             ; output_cols
 
-	; -- h2v1_downsample
+        ; -- h2v1_downsample
 
-	mov	rax, r12	; rowctr
-	test	eax,eax
-	jle	near .return
+        mov     rax, r12        ; rowctr
+        test    eax,eax
+        jle     near .return
 
-	mov	rdx, 0x00010000		; bias pattern
-	movd	xmm7,edx
-	pcmpeqw	xmm6,xmm6
-	pshufd	xmm7,xmm7,0x00		; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
-	psrlw	xmm6,BYTE_BIT		; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        mov     rdx, 0x00010000         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
 
-	mov	rsi, r14	; input_data
-	mov	rdi, r15	; output_data
+        mov     rsi, r14        ; input_data
+        mov     rdi, r15        ; output_data
 .rowloop:
-	push	rcx
-	push	rdi
-	push	rsi
+        push    rcx
+        push    rdi
+        push    rsi
 
-	mov	rsi, JSAMPROW [rsi]		; inptr
-	mov rdi, JSAMPROW [rdi]		; outptr
+        mov     rsi, JSAMPROW [rsi]             ; inptr
+        mov rdi, JSAMPROW [rdi]         ; outptr
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	short .columnloop
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
 
 .columnloop_r8:
-	movdqa	xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	pxor	xmm1,xmm1
-	mov	rcx, SIZEOF_XMMWORD
-	jmp	short .downsample
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        pxor    xmm1,xmm1
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .downsample
 
 .columnloop:
-	movdqa	xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqa	xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD]
 
 .downsample:
-	movdqa	xmm2,xmm0
-	movdqa	xmm3,xmm1
+        movdqa  xmm2,xmm0
+        movdqa  xmm3,xmm1
 
-	pand	xmm0,xmm6
-	psrlw	xmm2,BYTE_BIT
-	pand	xmm1,xmm6
-	psrlw	xmm3,BYTE_BIT
+        pand    xmm0,xmm6
+        psrlw   xmm2,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm3,BYTE_BIT
 
-	paddw	xmm0,xmm2
-	paddw	xmm1,xmm3
-	paddw	xmm0,xmm7
-	paddw	xmm1,xmm7
-	psrlw	xmm0,1
-	psrlw	xmm1,1
+        paddw   xmm0,xmm2
+        paddw   xmm1,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
+        psrlw   xmm0,1
+        psrlw   xmm1,1
 
-	packuswb xmm0,xmm1
+        packuswb xmm0,xmm1
 
-	movdqa	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
 
-	sub	rcx, byte SIZEOF_XMMWORD	; outcol
-	add	rsi, byte 2*SIZEOF_XMMWORD	; inptr
-	add	rdi, byte 1*SIZEOF_XMMWORD	; outptr
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	short .columnloop
-	test	rcx,rcx
-	jnz	short .columnloop_r8
+        sub     rcx, byte SIZEOF_XMMWORD        ; outcol
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     rdi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        test    rcx,rcx
+        jnz     short .columnloop_r8
 
-	pop	rsi
-	pop	rdi
-	pop	rcx
+        pop     rsi
+        pop     rdi
+        pop     rcx
 
-	add	rsi, byte SIZEOF_JSAMPROW	; input_data
-	add	rdi, byte SIZEOF_JSAMPROW	; output_data
-	dec	rax				; rowctr
-	jg	near .rowloop
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_data
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     rax                             ; rowctr
+        jg      near .rowloop
 
 .return:
-	uncollect_args
-	pop	rbp
-	ret
+        uncollect_args
+        pop     rbp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -184,147 +184,147 @@
 ; r14 = JSAMPARRAY input_data
 ; r15 = JSAMPARRAY output_data
 
-	align	16
-	global	EXTN(jsimd_h2v2_downsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_downsample_sse2)
 
 EXTN(jsimd_h2v2_downsample_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
 
-	mov	rcx, r13
-	shl	rcx,3			; imul rcx,DCTSIZE (rcx = output_cols)
-	jz	near .return
+        mov     rcx, r13
+        shl     rcx,3                   ; imul rcx,DCTSIZE (rcx = output_cols)
+        jz      near .return
 
-	mov	rdx, r10
+        mov     rdx, r10
 
-	; -- expand_right_edge
+        ; -- expand_right_edge
 
-	push	rcx
-	shl	rcx,1				; output_cols * 2
-	sub	rcx,rdx
-	jle	short .expand_end
+        push    rcx
+        shl     rcx,1                           ; output_cols * 2
+        sub     rcx,rdx
+        jle     short .expand_end
 
-	mov	rax, r11
-	test	rax,rax
-	jle	short .expand_end
+        mov     rax, r11
+        test    rax,rax
+        jle     short .expand_end
 
-	cld
-	mov	rsi, r14	; input_data
+        cld
+        mov     rsi, r14        ; input_data
 .expandloop:
-	push	rax
-	push	rcx
+        push    rax
+        push    rcx
 
-	mov	rdi, JSAMPROW [rsi]
-	add	rdi,rdx
-	mov	al, JSAMPLE [rdi-1]
+        mov     rdi, JSAMPROW [rsi]
+        add     rdi,rdx
+        mov     al, JSAMPLE [rdi-1]
 
-	rep stosb
+        rep stosb
 
-	pop	rcx
-	pop	rax
+        pop     rcx
+        pop     rax
 
-	add	rsi, byte SIZEOF_JSAMPROW
-	dec	rax
-	jg	short .expandloop
+        add     rsi, byte SIZEOF_JSAMPROW
+        dec     rax
+        jg      short .expandloop
 
 .expand_end:
-	pop	rcx				; output_cols
+        pop     rcx                             ; output_cols
 
-	; -- h2v2_downsample
+        ; -- h2v2_downsample
 
-	mov	rax, r12	; rowctr
-	test	rax,rax
-	jle	near .return
+        mov     rax, r12        ; rowctr
+        test    rax,rax
+        jle     near .return
 
-	mov	rdx, 0x00020001		; bias pattern
-	movd	xmm7,edx
-	pcmpeqw	xmm6,xmm6
-	pshufd	xmm7,xmm7,0x00		; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
-	psrlw	xmm6,BYTE_BIT		; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        mov     rdx, 0x00020001         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
 
-	mov	rsi, r14	; input_data
-	mov	rdi, r15	; output_data
+        mov     rsi, r14        ; input_data
+        mov     rdi, r15        ; output_data
 .rowloop:
-	push	rcx
-	push	rdi
-	push	rsi
+        push    rcx
+        push    rdi
+        push    rsi
 
-	mov	rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]	; inptr0
-	mov	rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]	; inptr1
-	mov	rdi, JSAMPROW [rdi]			; outptr
+        mov     rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]   ; inptr1
+        mov     rdi, JSAMPROW [rdi]                     ; outptr
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	short .columnloop
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
 
 .columnloop_r8:
-	movdqa	xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
-	movdqa	xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	pxor	xmm2,xmm2
-	pxor	xmm3,xmm3
-	mov	rcx, SIZEOF_XMMWORD
-	jmp	short .downsample
+        movdqa  xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .downsample
 
 .columnloop:
-	movdqa	xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
-	movdqa	xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqa	xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD]
-	movdqa	xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD]
 
 .downsample:
-	movdqa	xmm4,xmm0
-	movdqa	xmm5,xmm1
-	pand	xmm0,xmm6
-	psrlw	xmm4,BYTE_BIT
-	pand	xmm1,xmm6
-	psrlw	xmm5,BYTE_BIT
-	paddw	xmm0,xmm4
-	paddw	xmm1,xmm5
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        pand    xmm0,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm0,xmm4
+        paddw   xmm1,xmm5
 
-	movdqa	xmm4,xmm2
-	movdqa	xmm5,xmm3
-	pand	xmm2,xmm6
-	psrlw	xmm4,BYTE_BIT
-	pand	xmm3,xmm6
-	psrlw	xmm5,BYTE_BIT
-	paddw	xmm2,xmm4
-	paddw	xmm3,xmm5
+        movdqa  xmm4,xmm2
+        movdqa  xmm5,xmm3
+        pand    xmm2,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm3,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm2,xmm4
+        paddw   xmm3,xmm5
 
-	paddw	xmm0,xmm1
-	paddw	xmm2,xmm3
-	paddw	xmm0,xmm7
-	paddw	xmm2,xmm7
-	psrlw	xmm0,2
-	psrlw	xmm2,2
+        paddw   xmm0,xmm1
+        paddw   xmm2,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm7
+        psrlw   xmm0,2
+        psrlw   xmm2,2
 
-	packuswb xmm0,xmm2
+        packuswb xmm0,xmm2
 
-	movdqa	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
 
-	sub	rcx, byte SIZEOF_XMMWORD	; outcol
-	add	rdx, byte 2*SIZEOF_XMMWORD	; inptr0
-	add	rsi, byte 2*SIZEOF_XMMWORD	; inptr1
-	add	rdi, byte 1*SIZEOF_XMMWORD	; outptr
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	test	rcx,rcx
-	jnz	near .columnloop_r8
+        sub     rcx, byte SIZEOF_XMMWORD        ; outcol
+        add     rdx, byte 2*SIZEOF_XMMWORD      ; inptr0
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr1
+        add     rdi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    rcx,rcx
+        jnz     near .columnloop_r8
 
-	pop	rsi
-	pop	rdi
-	pop	rcx
+        pop     rsi
+        pop     rdi
+        pop     rcx
 
-	add	rsi, byte 2*SIZEOF_JSAMPROW	; input_data
-	add	rdi, byte 1*SIZEOF_JSAMPROW	; output_data
-	dec	rax				; rowctr
-	jg	near .rowloop
+        add     rsi, byte 2*SIZEOF_JSAMPROW     ; input_data
+        add     rdi, byte 1*SIZEOF_JSAMPROW     ; output_data
+        dec     rax                             ; rowctr
+        jg      near .rowloop
 
 .return:
-	uncollect_args
-	pop	rbp
-	ret
+        uncollect_args
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jcsamss2.asm b/simd/jcsamss2.asm
index 818e911..7361843 100644
--- a/simd/jcsamss2.asm
+++ b/simd/jcsamss2.asm
@@ -19,8 +19,8 @@
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Downsample pixel values of a single component.
 ; This version handles the common case of 2:1 horizontal and 1:1 vertical,
@@ -32,148 +32,148 @@
 ;                             JSAMPARRAY input_data, JSAMPARRAY output_data);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION image_width
-%define max_v_samp(b)	(b)+12		; int max_v_samp_factor
-%define v_samp(b)			(b)+16		; JDIMENSION v_samp_factor
-%define width_blks(b)	(b)+20		; JDIMENSION width_blocks
-%define input_data(b)	(b)+24		; JSAMPARRAY input_data
-%define output_data(b)	(b)+28		; JSAMPARRAY output_data
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
 
-	align	16
-	global	EXTN(jsimd_h2v1_downsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_downsample_sse2)
 
 EXTN(jsimd_h2v1_downsample_sse2):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	ecx, JDIMENSION [width_blks(ebp)]
-	shl	ecx,3			; imul ecx,DCTSIZE (ecx = output_cols)
-	jz	near .return
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
 
-	mov	edx, JDIMENSION [img_width(ebp)]
+        mov     edx, JDIMENSION [img_width(ebp)]
 
-	; -- expand_right_edge
+        ; -- expand_right_edge
 
-	push	ecx
-	shl	ecx,1				; output_cols * 2
-	sub	ecx,edx
-	jle	short .expand_end
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
 
-	mov	eax, INT [max_v_samp(ebp)]
-	test	eax,eax
-	jle	short .expand_end
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
 
-	cld
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	alignx	16,7
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
 .expandloop:
-	push	eax
-	push	ecx
+        push    eax
+        push    ecx
 
-	mov	edi, JSAMPROW [esi]
-	add	edi,edx
-	mov	al, JSAMPLE [edi-1]
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
 
-	rep stosb
+        rep stosb
 
-	pop	ecx
-	pop	eax
+        pop     ecx
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW
-	dec	eax
-	jg	short .expandloop
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
 
 .expand_end:
-	pop	ecx				; output_cols
+        pop     ecx                             ; output_cols
 
-	; -- h2v1_downsample
+        ; -- h2v1_downsample
 
-	mov	eax, JDIMENSION [v_samp(ebp)]	; rowctr
-	test	eax,eax
-	jle	near .return
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
 
-	mov	edx, 0x00010000		; bias pattern
-	movd	xmm7,edx
-	pcmpeqw	xmm6,xmm6
-	pshufd	xmm7,xmm7,0x00		; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
-	psrlw	xmm6,BYTE_BIT		; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        mov     edx, 0x00010000         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, JSAMPARRAY [output_data(ebp)]	; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
 .rowloop:
-	push	ecx
-	push	edi
-	push	esi
+        push    ecx
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]		; inptr
-	mov	edi, JSAMPROW [edi]		; outptr
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	short .columnloop
-	alignx	16,7
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        alignx  16,7
 
 .columnloop_r8:
-	movdqa	xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	pxor	xmm1,xmm1
-	mov	ecx, SIZEOF_XMMWORD
-	jmp	short .downsample
-	alignx	16,7
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        pxor    xmm1,xmm1
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .downsample
+        alignx  16,7
 
 .columnloop:
-	movdqa	xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqa	xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD]
 
 .downsample:
-	movdqa	xmm2,xmm0
-	movdqa	xmm3,xmm1
+        movdqa  xmm2,xmm0
+        movdqa  xmm3,xmm1
 
-	pand	xmm0,xmm6
-	psrlw	xmm2,BYTE_BIT
-	pand	xmm1,xmm6
-	psrlw	xmm3,BYTE_BIT
+        pand    xmm0,xmm6
+        psrlw   xmm2,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm3,BYTE_BIT
 
-	paddw	xmm0,xmm2
-	paddw	xmm1,xmm3
-	paddw	xmm0,xmm7
-	paddw	xmm1,xmm7
-	psrlw	xmm0,1
-	psrlw	xmm1,1
+        paddw   xmm0,xmm2
+        paddw   xmm1,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
+        psrlw   xmm0,1
+        psrlw   xmm1,1
 
-	packuswb xmm0,xmm1
+        packuswb xmm0,xmm1
 
-	movdqa	XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
 
-	sub	ecx, byte SIZEOF_XMMWORD	; outcol
-	add	esi, byte 2*SIZEOF_XMMWORD	; inptr
-	add	edi, byte 1*SIZEOF_XMMWORD	; outptr
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	short .columnloop
-	test	ecx,ecx
-	jnz	short .columnloop_r8
+        sub     ecx, byte SIZEOF_XMMWORD        ; outcol
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     edi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        test    ecx,ecx
+        jnz     short .columnloop_r8
 
-	pop	esi
-	pop	edi
-	pop	ecx
+        pop     esi
+        pop     edi
+        pop     ecx
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_data
-	add	edi, byte SIZEOF_JSAMPROW	; output_data
-	dec	eax				; rowctr
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     eax                             ; rowctr
+        jg      near .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -187,165 +187,165 @@
 ;                             JSAMPARRAY input_data, JSAMPARRAY output_data);
 ;
 
-%define img_width(b)	(b)+8			; JDIMENSION image_width
-%define max_v_samp(b)	(b)+12		; int max_v_samp_factor
-%define v_samp(b)			(b)+16		; JDIMENSION v_samp_factor
-%define width_blks(b)	(b)+20		; JDIMENSION width_blocks
-%define input_data(b)	(b)+24		; JSAMPARRAY input_data
-%define output_data(b)	(b)+28	; JSAMPARRAY output_data
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
 
-	align	16
-	global	EXTN(jsimd_h2v2_downsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_downsample_sse2)
 
 EXTN(jsimd_h2v2_downsample_sse2):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	ecx, JDIMENSION [width_blks(ebp)]
-	shl	ecx,3			; imul ecx,DCTSIZE (ecx = output_cols)
-	jz	near .return
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
 
-	mov	edx, JDIMENSION [img_width(ebp)]
+        mov     edx, JDIMENSION [img_width(ebp)]
 
-	; -- expand_right_edge
+        ; -- expand_right_edge
 
-	push	ecx
-	shl	ecx,1				; output_cols * 2
-	sub	ecx,edx
-	jle	short .expand_end
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
 
-	mov	eax, INT [max_v_samp(ebp)]
-	test	eax,eax
-	jle	short .expand_end
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
 
-	cld
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	alignx	16,7
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
 .expandloop:
-	push	eax
-	push	ecx
+        push    eax
+        push    ecx
 
-	mov	edi, JSAMPROW [esi]
-	add	edi,edx
-	mov	al, JSAMPLE [edi-1]
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
 
-	rep stosb
+        rep stosb
 
-	pop	ecx
-	pop	eax
+        pop     ecx
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW
-	dec	eax
-	jg	short .expandloop
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
 
 .expand_end:
-	pop	ecx				; output_cols
+        pop     ecx                             ; output_cols
 
-	; -- h2v2_downsample
+        ; -- h2v2_downsample
 
-	mov	eax, JDIMENSION [v_samp(ebp)]	; rowctr
-	test	eax,eax
-	jle	near .return
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
 
-	mov	edx, 0x00020001		; bias pattern
-	movd	xmm7,edx
-	pcmpeqw	xmm6,xmm6
-	pshufd	xmm7,xmm7,0x00		; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
-	psrlw	xmm6,BYTE_BIT		; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        mov     edx, 0x00020001         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, JSAMPARRAY [output_data(ebp)]	; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
 .rowloop:
-	push	ecx
-	push	edi
-	push	esi
+        push    ecx
+        push    edi
+        push    esi
 
-	mov	edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; inptr0
-	mov	esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; inptr1
-	mov	edi, JSAMPROW [edi]			; outptr
+        mov     edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1
+        mov     edi, JSAMPROW [edi]                     ; outptr
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	short .columnloop
-	alignx	16,7
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        alignx  16,7
 
 .columnloop_r8:
-	movdqa	xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
-	movdqa	xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	pxor	xmm2,xmm2
-	pxor	xmm3,xmm3
-	mov	ecx, SIZEOF_XMMWORD
-	jmp	short .downsample
-	alignx	16,7
+        movdqa  xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .downsample
+        alignx  16,7
 
 .columnloop:
-	movdqa	xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
-	movdqa	xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqa	xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD]
-	movdqa	xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD]
 
 .downsample:
-	movdqa	xmm4,xmm0
-	movdqa	xmm5,xmm1
-	pand	xmm0,xmm6
-	psrlw	xmm4,BYTE_BIT
-	pand	xmm1,xmm6
-	psrlw	xmm5,BYTE_BIT
-	paddw	xmm0,xmm4
-	paddw	xmm1,xmm5
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        pand    xmm0,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm0,xmm4
+        paddw   xmm1,xmm5
 
-	movdqa	xmm4,xmm2
-	movdqa	xmm5,xmm3
-	pand	xmm2,xmm6
-	psrlw	xmm4,BYTE_BIT
-	pand	xmm3,xmm6
-	psrlw	xmm5,BYTE_BIT
-	paddw	xmm2,xmm4
-	paddw	xmm3,xmm5
+        movdqa  xmm4,xmm2
+        movdqa  xmm5,xmm3
+        pand    xmm2,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm3,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm2,xmm4
+        paddw   xmm3,xmm5
 
-	paddw	xmm0,xmm1
-	paddw	xmm2,xmm3
-	paddw	xmm0,xmm7
-	paddw	xmm2,xmm7
-	psrlw	xmm0,2
-	psrlw	xmm2,2
+        paddw   xmm0,xmm1
+        paddw   xmm2,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm7
+        psrlw   xmm0,2
+        psrlw   xmm2,2
 
-	packuswb xmm0,xmm2
+        packuswb xmm0,xmm2
 
-	movdqa	XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
 
-	sub	ecx, byte SIZEOF_XMMWORD	; outcol
-	add	edx, byte 2*SIZEOF_XMMWORD	; inptr0
-	add	esi, byte 2*SIZEOF_XMMWORD	; inptr1
-	add	edi, byte 1*SIZEOF_XMMWORD	; outptr
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jae	near .columnloop
-	test	ecx,ecx
-	jnz	near .columnloop_r8
+        sub     ecx, byte SIZEOF_XMMWORD        ; outcol
+        add     edx, byte 2*SIZEOF_XMMWORD      ; inptr0
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr1
+        add     edi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .columnloop_r8
 
-	pop	esi
-	pop	edi
-	pop	ecx
+        pop     esi
+        pop     edi
+        pop     ecx
 
-	add	esi, byte 2*SIZEOF_JSAMPROW	; input_data
-	add	edi, byte 1*SIZEOF_JSAMPROW	; output_data
-	dec	eax				; rowctr
-	jg	near .rowloop
+        add     esi, byte 2*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 1*SIZEOF_JSAMPROW     ; output_data
+        dec     eax                             ; rowctr
+        jg      near .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdclrmmx.asm b/simd/jdclrmmx.asm
index 1c255e8..bb1d2fa 100644
--- a/simd/jdclrmmx.asm
+++ b/simd/jdclrmmx.asm
@@ -28,378 +28,378 @@
 ;                            JSAMPARRAY output_buf, int num_rows)
 ;
 
-%define out_width(b)	(b)+8			; JDIMENSION out_width
-%define input_buf(b)	(b)+12		; JSAMPIMAGE input_buf
-%define input_row(b)	(b)+16		; JDIMENSION input_row
-%define output_buf(b)	(b)+20		; JSAMPARRAY output_buf
-%define num_rows(b)	(b)+24		; int num_rows
+%define out_width(b)    (b)+8           ; JDIMENSION out_width
+%define input_buf(b)    (b)+12          ; JSAMPIMAGE input_buf
+%define input_row(b)    (b)+16          ; JDIMENSION input_row
+%define output_buf(b)   (b)+20          ; JSAMPARRAY output_buf
+%define num_rows(b)     (b)+24          ; int num_rows
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_ycc_rgb_convert_mmx)
+        align   16
+        global  EXTN(jsimd_ycc_rgb_convert_mmx)
 
 EXTN(jsimd_ycc_rgb_convert_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [out_width(eax)]	; num_cols
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [out_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	edi, JSAMPIMAGE [input_buf(eax)]
-	mov	ecx, JDIMENSION [input_row(eax)]
-	mov	esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
-	lea	esi, [esi+ecx*SIZEOF_JSAMPROW]
-	lea	ebx, [ebx+ecx*SIZEOF_JSAMPROW]
-	lea	edx, [edx+ecx*SIZEOF_JSAMPROW]
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [input_row(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
 
-	pop	ecx
+        pop     ecx
 
-	mov	edi, JSAMPARRAY [output_buf(eax)]
-	mov	eax, INT [num_rows(eax)]
-	test	eax,eax
-	jle	near .return
-	alignx	16,7
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
 .rowloop:
-	push	eax
-	push	edi
-	push	edx
-	push	ebx
-	push	esi
-	push	ecx			; col
+        push    eax
+        push    edi
+        push    edx
+        push    ebx
+        push    esi
+        push    ecx                     ; col
 
-	mov	esi, JSAMPROW [esi]	; inptr0
-	mov	ebx, JSAMPROW [ebx]	; inptr1
-	mov	edx, JSAMPROW [edx]	; inptr2
-	mov	edi, JSAMPROW [edi]	; outptr
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
-	alignx	16,7
+        mov     esi, JSAMPROW [esi]     ; inptr0
+        mov     ebx, JSAMPROW [ebx]     ; inptr1
+        mov     edx, JSAMPROW [edx]     ; inptr2
+        mov     edi, JSAMPROW [edi]     ; outptr
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+        alignx  16,7
 .columnloop:
 
-	movq	mm5, MMWORD [ebx]	; mm5=Cb(01234567)
-	movq	mm1, MMWORD [edx]	; mm1=Cr(01234567)
+        movq    mm5, MMWORD [ebx]       ; mm5=Cb(01234567)
+        movq    mm1, MMWORD [edx]       ; mm1=Cr(01234567)
 
-	pcmpeqw	mm4,mm4
-	pcmpeqw	mm7,mm7
-	psrlw	mm4,BYTE_BIT
-	psllw	mm7,7			; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
-	movq	mm0,mm4			; mm0=mm4={0xFF 0x00 0xFF 0x00 ..}
+        pcmpeqw mm4,mm4
+        pcmpeqw mm7,mm7
+        psrlw   mm4,BYTE_BIT
+        psllw   mm7,7                   ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
+        movq    mm0,mm4                 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..}
 
-	pand	mm4,mm5			; mm4=Cb(0246)=CbE
-	psrlw	mm5,BYTE_BIT		; mm5=Cb(1357)=CbO
-	pand	mm0,mm1			; mm0=Cr(0246)=CrE
-	psrlw	mm1,BYTE_BIT		; mm1=Cr(1357)=CrO
+        pand    mm4,mm5                 ; mm4=Cb(0246)=CbE
+        psrlw   mm5,BYTE_BIT            ; mm5=Cb(1357)=CbO
+        pand    mm0,mm1                 ; mm0=Cr(0246)=CrE
+        psrlw   mm1,BYTE_BIT            ; mm1=Cr(1357)=CrO
 
-	paddw	mm4,mm7
-	paddw	mm5,mm7
-	paddw	mm0,mm7
-	paddw	mm1,mm7
+        paddw   mm4,mm7
+        paddw   mm5,mm7
+        paddw   mm0,mm7
+        paddw   mm1,mm7
 
-	; (Original)
-	; R = Y                + 1.40200 * Cr
-	; G = Y - 0.34414 * Cb - 0.71414 * Cr
-	; B = Y + 1.77200 * Cb
-	;
-	; (This implementation)
-	; R = Y                + 0.40200 * Cr + Cr
-	; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
-	; B = Y - 0.22800 * Cb + Cb + Cb
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
 
-	movq	mm2,mm4			; mm2=CbE
-	movq	mm3,mm5			; mm3=CbO
-	paddw	mm4,mm4			; mm4=2*CbE
-	paddw	mm5,mm5			; mm5=2*CbO
-	movq	mm6,mm0			; mm6=CrE
-	movq	mm7,mm1			; mm7=CrO
-	paddw	mm0,mm0			; mm0=2*CrE
-	paddw	mm1,mm1			; mm1=2*CrO
+        movq    mm2,mm4                 ; mm2=CbE
+        movq    mm3,mm5                 ; mm3=CbO
+        paddw   mm4,mm4                 ; mm4=2*CbE
+        paddw   mm5,mm5                 ; mm5=2*CbO
+        movq    mm6,mm0                 ; mm6=CrE
+        movq    mm7,mm1                 ; mm7=CrO
+        paddw   mm0,mm0                 ; mm0=2*CrE
+        paddw   mm1,mm1                 ; mm1=2*CrO
 
-	pmulhw	mm4,[GOTOFF(eax,PW_MF0228)]	; mm4=(2*CbE * -FIX(0.22800))
-	pmulhw	mm5,[GOTOFF(eax,PW_MF0228)]	; mm5=(2*CbO * -FIX(0.22800))
-	pmulhw	mm0,[GOTOFF(eax,PW_F0402)]	; mm0=(2*CrE * FIX(0.40200))
-	pmulhw	mm1,[GOTOFF(eax,PW_F0402)]	; mm1=(2*CrO * FIX(0.40200))
+        pmulhw  mm4,[GOTOFF(eax,PW_MF0228)]     ; mm4=(2*CbE * -FIX(0.22800))
+        pmulhw  mm5,[GOTOFF(eax,PW_MF0228)]     ; mm5=(2*CbO * -FIX(0.22800))
+        pmulhw  mm0,[GOTOFF(eax,PW_F0402)]      ; mm0=(2*CrE * FIX(0.40200))
+        pmulhw  mm1,[GOTOFF(eax,PW_F0402)]      ; mm1=(2*CrO * FIX(0.40200))
 
-	paddw	mm4,[GOTOFF(eax,PW_ONE)]
-	paddw	mm5,[GOTOFF(eax,PW_ONE)]
-	psraw	mm4,1			; mm4=(CbE * -FIX(0.22800))
-	psraw	mm5,1			; mm5=(CbO * -FIX(0.22800))
-	paddw	mm0,[GOTOFF(eax,PW_ONE)]
-	paddw	mm1,[GOTOFF(eax,PW_ONE)]
-	psraw	mm0,1			; mm0=(CrE * FIX(0.40200))
-	psraw	mm1,1			; mm1=(CrO * FIX(0.40200))
+        paddw   mm4,[GOTOFF(eax,PW_ONE)]
+        paddw   mm5,[GOTOFF(eax,PW_ONE)]
+        psraw   mm4,1                   ; mm4=(CbE * -FIX(0.22800))
+        psraw   mm5,1                   ; mm5=(CbO * -FIX(0.22800))
+        paddw   mm0,[GOTOFF(eax,PW_ONE)]
+        paddw   mm1,[GOTOFF(eax,PW_ONE)]
+        psraw   mm0,1                   ; mm0=(CrE * FIX(0.40200))
+        psraw   mm1,1                   ; mm1=(CrO * FIX(0.40200))
 
-	paddw	mm4,mm2
-	paddw	mm5,mm3
-	paddw	mm4,mm2			; mm4=(CbE * FIX(1.77200))=(B-Y)E
-	paddw	mm5,mm3			; mm5=(CbO * FIX(1.77200))=(B-Y)O
-	paddw	mm0,mm6			; mm0=(CrE * FIX(1.40200))=(R-Y)E
-	paddw	mm1,mm7			; mm1=(CrO * FIX(1.40200))=(R-Y)O
+        paddw   mm4,mm2
+        paddw   mm5,mm3
+        paddw   mm4,mm2                 ; mm4=(CbE * FIX(1.77200))=(B-Y)E
+        paddw   mm5,mm3                 ; mm5=(CbO * FIX(1.77200))=(B-Y)O
+        paddw   mm0,mm6                 ; mm0=(CrE * FIX(1.40200))=(R-Y)E
+        paddw   mm1,mm7                 ; mm1=(CrO * FIX(1.40200))=(R-Y)O
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=(B-Y)E
-	movq	MMWORD [wk(1)], mm5	; wk(1)=(B-Y)O
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(B-Y)E
+        movq    MMWORD [wk(1)], mm5     ; wk(1)=(B-Y)O
 
-	movq      mm4,mm2
-	movq      mm5,mm3
-	punpcklwd mm2,mm6
-	punpckhwd mm4,mm6
-	pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   mm4,[GOTOFF(eax,PW_MF0344_F0285)]
-	punpcklwd mm3,mm7
-	punpckhwd mm5,mm7
-	pmaddwd   mm3,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
+        movq      mm4,mm2
+        movq      mm5,mm3
+        punpcklwd mm2,mm6
+        punpckhwd mm4,mm6
+        pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm4,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd mm3,mm7
+        punpckhwd mm5,mm7
+        pmaddwd   mm3,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
 
-	paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     mm4,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     mm2,SCALEBITS
-	psrad     mm4,SCALEBITS
-	paddd     mm3,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     mm3,SCALEBITS
-	psrad     mm5,SCALEBITS
+        paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm4,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm2,SCALEBITS
+        psrad     mm4,SCALEBITS
+        paddd     mm3,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm3,SCALEBITS
+        psrad     mm5,SCALEBITS
 
-	packssdw  mm2,mm4	; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
-	packssdw  mm3,mm5	; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
-	psubw     mm2,mm6	; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
-	psubw     mm3,mm7	; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+        packssdw  mm2,mm4       ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+        packssdw  mm3,mm5       ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+        psubw     mm2,mm6       ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+        psubw     mm3,mm7       ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
 
-	movq      mm5, MMWORD [esi]	; mm5=Y(01234567)
+        movq      mm5, MMWORD [esi]     ; mm5=Y(01234567)
 
-	pcmpeqw   mm4,mm4
-	psrlw     mm4,BYTE_BIT		; mm4={0xFF 0x00 0xFF 0x00 ..}
-	pand      mm4,mm5		; mm4=Y(0246)=YE
-	psrlw     mm5,BYTE_BIT		; mm5=Y(1357)=YO
+        pcmpeqw   mm4,mm4
+        psrlw     mm4,BYTE_BIT          ; mm4={0xFF 0x00 0xFF 0x00 ..}
+        pand      mm4,mm5               ; mm4=Y(0246)=YE
+        psrlw     mm5,BYTE_BIT          ; mm5=Y(1357)=YO
 
-	paddw     mm0,mm4		; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6)
-	paddw     mm1,mm5		; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7)
-	packuswb  mm0,mm0		; mm0=(R0 R2 R4 R6 ** ** ** **)
-	packuswb  mm1,mm1		; mm1=(R1 R3 R5 R7 ** ** ** **)
+        paddw     mm0,mm4               ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6)
+        paddw     mm1,mm5               ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7)
+        packuswb  mm0,mm0               ; mm0=(R0 R2 R4 R6 ** ** ** **)
+        packuswb  mm1,mm1               ; mm1=(R1 R3 R5 R7 ** ** ** **)
 
-	paddw     mm2,mm4		; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6)
-	paddw     mm3,mm5		; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7)
-	packuswb  mm2,mm2		; mm2=(G0 G2 G4 G6 ** ** ** **)
-	packuswb  mm3,mm3		; mm3=(G1 G3 G5 G7 ** ** ** **)
+        paddw     mm2,mm4               ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6)
+        paddw     mm3,mm5               ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7)
+        packuswb  mm2,mm2               ; mm2=(G0 G2 G4 G6 ** ** ** **)
+        packuswb  mm3,mm3               ; mm3=(G1 G3 G5 G7 ** ** ** **)
 
-	paddw     mm4, MMWORD [wk(0)]	; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
-	paddw     mm5, MMWORD [wk(1)]	; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
-	packuswb  mm4,mm4		; mm4=(B0 B2 B4 B6 ** ** ** **)
-	packuswb  mm5,mm5		; mm5=(B1 B3 B5 B7 ** ** ** **)
+        paddw     mm4, MMWORD [wk(0)]   ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
+        paddw     mm5, MMWORD [wk(1)]   ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
+        packuswb  mm4,mm4               ; mm4=(B0 B2 B4 B6 ** ** ** **)
+        packuswb  mm5,mm5               ; mm5=(B1 B3 B5 B7 ** ** ** **)
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
-	; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
-	; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
-	; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
-	; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
 
-	punpcklbw mmA,mmC		; mmA=(00 10 02 12 04 14 06 16)
-	punpcklbw mmE,mmB		; mmE=(20 01 22 03 24 05 26 07)
-	punpcklbw mmD,mmF		; mmD=(11 21 13 23 15 25 17 27)
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmB               ; mmE=(20 01 22 03 24 05 26 07)
+        punpcklbw mmD,mmF               ; mmD=(11 21 13 23 15 25 17 27)
 
-	movq      mmG,mmA
-	movq      mmH,mmA
-	punpcklwd mmA,mmE		; mmA=(00 10 20 01 02 12 22 03)
-	punpckhwd mmG,mmE		; mmG=(04 14 24 05 06 16 26 07)
+        movq      mmG,mmA
+        movq      mmH,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 01 02 12 22 03)
+        punpckhwd mmG,mmE               ; mmG=(04 14 24 05 06 16 26 07)
 
-	psrlq     mmH,2*BYTE_BIT	; mmH=(02 12 04 14 06 16 -- --)
-	psrlq     mmE,2*BYTE_BIT	; mmE=(22 03 24 05 26 07 -- --)
+        psrlq     mmH,2*BYTE_BIT        ; mmH=(02 12 04 14 06 16 -- --)
+        psrlq     mmE,2*BYTE_BIT        ; mmE=(22 03 24 05 26 07 -- --)
 
-	movq      mmC,mmD
-	movq      mmB,mmD
-	punpcklwd mmD,mmH		; mmD=(11 21 02 12 13 23 04 14)
-	punpckhwd mmC,mmH		; mmC=(15 25 06 16 17 27 -- --)
+        movq      mmC,mmD
+        movq      mmB,mmD
+        punpcklwd mmD,mmH               ; mmD=(11 21 02 12 13 23 04 14)
+        punpckhwd mmC,mmH               ; mmC=(15 25 06 16 17 27 -- --)
 
-	psrlq     mmB,2*BYTE_BIT	; mmB=(13 23 15 25 17 27 -- --)
+        psrlq     mmB,2*BYTE_BIT        ; mmB=(13 23 15 25 17 27 -- --)
 
-	movq      mmF,mmE
-	punpcklwd mmE,mmB		; mmE=(22 03 13 23 24 05 15 25)
-	punpckhwd mmF,mmB		; mmF=(26 07 17 27 -- -- -- --)
+        movq      mmF,mmE
+        punpcklwd mmE,mmB               ; mmE=(22 03 13 23 24 05 15 25)
+        punpckhwd mmF,mmB               ; mmF=(26 07 17 27 -- -- -- --)
 
-	punpckldq mmA,mmD		; mmA=(00 10 20 01 11 21 02 12)
-	punpckldq mmE,mmG		; mmE=(22 03 13 23 04 14 24 05)
-	punpckldq mmC,mmF		; mmC=(15 25 06 16 26 07 17 27)
+        punpckldq mmA,mmD               ; mmA=(00 10 20 01 11 21 02 12)
+        punpckldq mmE,mmG               ; mmE=(22 03 13 23 04 14 24 05)
+        punpckldq mmC,mmF               ; mmC=(15 25 06 16 26 07 17 27)
 
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st16
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmE
-	movq	MMWORD [edi+2*SIZEOF_MMWORD], mmC
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
 
-	sub	ecx, byte SIZEOF_MMWORD
-	jz	short .nextrow
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      short .nextrow
 
-	add	esi, byte SIZEOF_MMWORD			; inptr0
-	add	ebx, byte SIZEOF_MMWORD			; inptr1
-	add	edx, byte SIZEOF_MMWORD			; inptr2
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD	; outptr
-	jmp	near .columnloop
-	alignx	16,7
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st16:
-	lea	ecx, [ecx+ecx*2]	; imul ecx, RGB_PIXELSIZE
-	cmp	ecx, byte 2*SIZEOF_MMWORD
-	jb	short .column_st8
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmE
-	movq	mmA,mmC
-	sub	ecx, byte 2*SIZEOF_MMWORD
-	add	edi, byte 2*SIZEOF_MMWORD
-	jmp	short .column_st4
+        lea     ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_MMWORD
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    mmA,mmC
+        sub     ecx, byte 2*SIZEOF_MMWORD
+        add     edi, byte 2*SIZEOF_MMWORD
+        jmp     short .column_st4
 .column_st8:
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st4
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	mmA,mmE
-	sub	ecx, byte SIZEOF_MMWORD
-	add	edi, byte SIZEOF_MMWORD
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmE
+        sub     ecx, byte SIZEOF_MMWORD
+        add     edi, byte SIZEOF_MMWORD
 .column_st4:
-	movd	eax,mmA
-	cmp	ecx, byte SIZEOF_DWORD
-	jb	short .column_st2
-	mov	DWORD [edi+0*SIZEOF_DWORD], eax
-	psrlq	mmA,DWORD_BIT
-	movd	eax,mmA
-	sub	ecx, byte SIZEOF_DWORD
-	add	edi, byte SIZEOF_DWORD
+        movd    eax,mmA
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st2
+        mov     DWORD [edi+0*SIZEOF_DWORD], eax
+        psrlq   mmA,DWORD_BIT
+        movd    eax,mmA
+        sub     ecx, byte SIZEOF_DWORD
+        add     edi, byte SIZEOF_DWORD
 .column_st2:
-	cmp	ecx, byte SIZEOF_WORD
-	jb	short .column_st1
-	mov	WORD [edi+0*SIZEOF_WORD], ax
-	shr	eax,WORD_BIT
-	sub	ecx, byte SIZEOF_WORD
-	add	edi, byte SIZEOF_WORD
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi+0*SIZEOF_WORD], ax
+        shr     eax,WORD_BIT
+        sub     ecx, byte SIZEOF_WORD
+        add     edi, byte SIZEOF_WORD
 .column_st1:
-	cmp	ecx, byte SIZEOF_BYTE
-	jb	short .nextrow
-	mov	BYTE [edi+0*SIZEOF_BYTE], al
+        cmp     ecx, byte SIZEOF_BYTE
+        jb      short .nextrow
+        mov     BYTE [edi+0*SIZEOF_BYTE], al
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 %ifdef RGBX_FILLER_0XFF
-	pcmpeqb   mm6,mm6		; mm6=(X0 X2 X4 X6 ** ** ** **)
-	pcmpeqb   mm7,mm7		; mm7=(X1 X3 X5 X7 ** ** ** **)
+        pcmpeqb   mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pcmpeqb   mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
 %else
-	pxor      mm6,mm6		; mm6=(X0 X2 X4 X6 ** ** ** **)
-	pxor      mm7,mm7		; mm7=(X1 X3 X5 X7 ** ** ** **)
+        pxor      mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pxor      mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
 %endif
-	; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
-	; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
-	; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
-	; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
 
-	punpcklbw mmA,mmC		; mmA=(00 10 02 12 04 14 06 16)
-	punpcklbw mmE,mmG		; mmE=(20 30 22 32 24 34 26 36)
-	punpcklbw mmB,mmD		; mmB=(01 11 03 13 05 15 07 17)
-	punpcklbw mmF,mmH		; mmF=(21 31 23 33 25 35 27 37)
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmG               ; mmE=(20 30 22 32 24 34 26 36)
+        punpcklbw mmB,mmD               ; mmB=(01 11 03 13 05 15 07 17)
+        punpcklbw mmF,mmH               ; mmF=(21 31 23 33 25 35 27 37)
 
-	movq      mmC,mmA
-	punpcklwd mmA,mmE		; mmA=(00 10 20 30 02 12 22 32)
-	punpckhwd mmC,mmE		; mmC=(04 14 24 34 06 16 26 36)
-	movq      mmG,mmB
-	punpcklwd mmB,mmF		; mmB=(01 11 21 31 03 13 23 33)
-	punpckhwd mmG,mmF		; mmG=(05 15 25 35 07 17 27 37)
+        movq      mmC,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 30 02 12 22 32)
+        punpckhwd mmC,mmE               ; mmC=(04 14 24 34 06 16 26 36)
+        movq      mmG,mmB
+        punpcklwd mmB,mmF               ; mmB=(01 11 21 31 03 13 23 33)
+        punpckhwd mmG,mmF               ; mmG=(05 15 25 35 07 17 27 37)
 
-	movq      mmD,mmA
-	punpckldq mmA,mmB		; mmA=(00 10 20 30 01 11 21 31)
-	punpckhdq mmD,mmB		; mmD=(02 12 22 32 03 13 23 33)
-	movq      mmH,mmC
-	punpckldq mmC,mmG		; mmC=(04 14 24 34 05 15 25 35)
-	punpckhdq mmH,mmG		; mmH=(06 16 26 36 07 17 27 37)
+        movq      mmD,mmA
+        punpckldq mmA,mmB               ; mmA=(00 10 20 30 01 11 21 31)
+        punpckhdq mmD,mmB               ; mmD=(02 12 22 32 03 13 23 33)
+        movq      mmH,mmC
+        punpckldq mmC,mmG               ; mmC=(04 14 24 34 05 15 25 35)
+        punpckhdq mmH,mmG               ; mmH=(06 16 26 36 07 17 27 37)
 
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st16
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmD
-	movq	MMWORD [edi+2*SIZEOF_MMWORD], mmC
-	movq	MMWORD [edi+3*SIZEOF_MMWORD], mmH
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mmH
 
-	sub	ecx, byte SIZEOF_MMWORD
-	jz	short .nextrow
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      short .nextrow
 
-	add	esi, byte SIZEOF_MMWORD			; inptr0
-	add	ebx, byte SIZEOF_MMWORD			; inptr1
-	add	edx, byte SIZEOF_MMWORD			; inptr2
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD	; outptr
-	jmp	near .columnloop
-	alignx	16,7
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st16:
-	cmp	ecx, byte SIZEOF_MMWORD/2
-	jb	short .column_st8
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmD
-	movq	mmA,mmC
-	movq	mmD,mmH
-	sub	ecx, byte SIZEOF_MMWORD/2
-	add	edi, byte 2*SIZEOF_MMWORD
+        cmp     ecx, byte SIZEOF_MMWORD/2
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    mmA,mmC
+        movq    mmD,mmH
+        sub     ecx, byte SIZEOF_MMWORD/2
+        add     edi, byte 2*SIZEOF_MMWORD
 .column_st8:
-	cmp	ecx, byte SIZEOF_MMWORD/4
-	jb	short .column_st4
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	mmA,mmD
-	sub	ecx, byte SIZEOF_MMWORD/4
-	add	edi, byte 1*SIZEOF_MMWORD
+        cmp     ecx, byte SIZEOF_MMWORD/4
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmD
+        sub     ecx, byte SIZEOF_MMWORD/4
+        add     edi, byte 1*SIZEOF_MMWORD
 .column_st4:
-	cmp	ecx, byte SIZEOF_MMWORD/8
-	jb	short .nextrow
-	movd	DWORD [edi+0*SIZEOF_DWORD], mmA
+        cmp     ecx, byte SIZEOF_MMWORD/8
+        jb      short .nextrow
+        movd    DWORD [edi+0*SIZEOF_DWORD], mmA
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	alignx	16,7
+        alignx  16,7
 
 .nextrow:
-	pop	ecx
-	pop	esi
-	pop	ebx
-	pop	edx
-	pop	edi
-	pop	eax
+        pop     ecx
+        pop     esi
+        pop     ebx
+        pop     edx
+        pop     edi
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW
-	add	ebx, byte SIZEOF_JSAMPROW
-	add	edx, byte SIZEOF_JSAMPROW
-	add	edi, byte SIZEOF_JSAMPROW	; output_buf
-	dec	eax				; num_rows
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        add     edi, byte SIZEOF_JSAMPROW       ; output_buf
+        dec     eax                             ; num_rows
+        jg      near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdclrss2-64.asm b/simd/jdclrss2-64.asm
index 7d17c52..88a9f1e 100644
--- a/simd/jdclrss2-64.asm
+++ b/simd/jdclrss2-64.asm
@@ -18,7 +18,7 @@
 ; [TAB8]
 
 %include "jcolsamp.inc"
-				
+
 ; --------------------------------------------------------------------------
 ;
 ; Convert some rows of samples to the output colorspace.
@@ -35,407 +35,407 @@
 ; r13 = JSAMPARRAY output_buf
 ; r14 = int num_rows
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_ycc_rgb_convert_sse2)
+        align   16
+        global  EXTN(jsimd_ycc_rgb_convert_sse2)
 
 EXTN(jsimd_ycc_rgb_convert_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
 
-	mov	rcx, r10	; num_cols
-	test	rcx,rcx
-	jz	near .return
+        mov     rcx, r10        ; num_cols
+        test    rcx,rcx
+        jz      near .return
 
-	push	rcx
+        push    rcx
 
-	mov	rdi, r11
-	mov	rcx, r12
-	mov	rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
-	mov	rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
-	mov	rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
-	lea	rsi, [rsi+rcx*SIZEOF_JSAMPROW]
-	lea	rbx, [rbx+rcx*SIZEOF_JSAMPROW]
-	lea	rdx, [rdx+rcx*SIZEOF_JSAMPROW]
+        mov     rdi, r11
+        mov     rcx, r12
+        mov     rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+        lea     rsi, [rsi+rcx*SIZEOF_JSAMPROW]
+        lea     rbx, [rbx+rcx*SIZEOF_JSAMPROW]
+        lea     rdx, [rdx+rcx*SIZEOF_JSAMPROW]
 
-	pop	rcx
+        pop     rcx
 
-	mov	rdi, r13
-	mov	eax, r14d
-	test	rax,rax
-	jle	near .return
+        mov     rdi, r13
+        mov     eax, r14d
+        test    rax,rax
+        jle     near .return
 .rowloop:
-	push	rax
-	push	rdi
-	push	rdx
-	push	rbx
-	push	rsi
-	push	rcx			; col
+        push    rax
+        push    rdi
+        push    rdx
+        push    rbx
+        push    rsi
+        push    rcx                     ; col
 
-	mov	rsi, JSAMPROW [rsi]	; inptr0
-	mov	rbx, JSAMPROW [rbx]	; inptr1
-	mov	rdx, JSAMPROW [rdx]	; inptr2
-	mov	rdi, JSAMPROW [rdi]	; outptr
+        mov     rsi, JSAMPROW [rsi]     ; inptr0
+        mov     rbx, JSAMPROW [rbx]     ; inptr1
+        mov     rdx, JSAMPROW [rdx]     ; inptr2
+        mov     rdi, JSAMPROW [rdi]     ; outptr
 .columnloop:
 
-	movdqa	xmm5, XMMWORD [rbx]	; xmm5=Cb(0123456789ABCDEF)
-	movdqa	xmm1, XMMWORD [rdx]	; xmm1=Cr(0123456789ABCDEF)
+        movdqa  xmm5, XMMWORD [rbx]     ; xmm5=Cb(0123456789ABCDEF)
+        movdqa  xmm1, XMMWORD [rdx]     ; xmm1=Cr(0123456789ABCDEF)
 
-	pcmpeqw	xmm4,xmm4
-	pcmpeqw	xmm7,xmm7
-	psrlw	xmm4,BYTE_BIT
-	psllw	xmm7,7			; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
-	movdqa	xmm0,xmm4		; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
+        pcmpeqw xmm4,xmm4
+        pcmpeqw xmm7,xmm7
+        psrlw   xmm4,BYTE_BIT
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        movdqa  xmm0,xmm4               ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
 
-	pand	xmm4,xmm5		; xmm4=Cb(02468ACE)=CbE
-	psrlw	xmm5,BYTE_BIT		; xmm5=Cb(13579BDF)=CbO
-	pand	xmm0,xmm1		; xmm0=Cr(02468ACE)=CrE
-	psrlw	xmm1,BYTE_BIT		; xmm1=Cr(13579BDF)=CrO
+        pand    xmm4,xmm5               ; xmm4=Cb(02468ACE)=CbE
+        psrlw   xmm5,BYTE_BIT           ; xmm5=Cb(13579BDF)=CbO
+        pand    xmm0,xmm1               ; xmm0=Cr(02468ACE)=CrE
+        psrlw   xmm1,BYTE_BIT           ; xmm1=Cr(13579BDF)=CrO
 
-	paddw	xmm4,xmm7
-	paddw	xmm5,xmm7
-	paddw	xmm0,xmm7
-	paddw	xmm1,xmm7
+        paddw   xmm4,xmm7
+        paddw   xmm5,xmm7
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
 
-	; (Original)
-	; R = Y                + 1.40200 * Cr
-	; G = Y - 0.34414 * Cb - 0.71414 * Cr
-	; B = Y + 1.77200 * Cb
-	;
-	; (This implementation)
-	; R = Y                + 0.40200 * Cr + Cr
-	; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
-	; B = Y - 0.22800 * Cb + Cb + Cb
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
 
-	movdqa	xmm2,xmm4		; xmm2=CbE
-	movdqa	xmm3,xmm5		; xmm3=CbO
-	paddw	xmm4,xmm4		; xmm4=2*CbE
-	paddw	xmm5,xmm5		; xmm5=2*CbO
-	movdqa	xmm6,xmm0		; xmm6=CrE
-	movdqa	xmm7,xmm1		; xmm7=CrO
-	paddw	xmm0,xmm0		; xmm0=2*CrE
-	paddw	xmm1,xmm1		; xmm1=2*CrO
+        movdqa  xmm2,xmm4               ; xmm2=CbE
+        movdqa  xmm3,xmm5               ; xmm3=CbO
+        paddw   xmm4,xmm4               ; xmm4=2*CbE
+        paddw   xmm5,xmm5               ; xmm5=2*CbO
+        movdqa  xmm6,xmm0               ; xmm6=CrE
+        movdqa  xmm7,xmm1               ; xmm7=CrO
+        paddw   xmm0,xmm0               ; xmm0=2*CrE
+        paddw   xmm1,xmm1               ; xmm1=2*CrO
 
-	pmulhw	xmm4,[rel PW_MF0228]	; xmm4=(2*CbE * -FIX(0.22800))
-	pmulhw	xmm5,[rel PW_MF0228]	; xmm5=(2*CbO * -FIX(0.22800))
-	pmulhw	xmm0,[rel PW_F0402]	; xmm0=(2*CrE * FIX(0.40200))
-	pmulhw	xmm1,[rel PW_F0402]	; xmm1=(2*CrO * FIX(0.40200))
+        pmulhw  xmm4,[rel PW_MF0228]    ; xmm4=(2*CbE * -FIX(0.22800))
+        pmulhw  xmm5,[rel PW_MF0228]    ; xmm5=(2*CbO * -FIX(0.22800))
+        pmulhw  xmm0,[rel PW_F0402]     ; xmm0=(2*CrE * FIX(0.40200))
+        pmulhw  xmm1,[rel PW_F0402]     ; xmm1=(2*CrO * FIX(0.40200))
 
-	paddw	xmm4,[rel PW_ONE]
-	paddw	xmm5,[rel PW_ONE]
-	psraw	xmm4,1			; xmm4=(CbE * -FIX(0.22800))
-	psraw	xmm5,1			; xmm5=(CbO * -FIX(0.22800))
-	paddw	xmm0,[rel PW_ONE]
-	paddw	xmm1,[rel PW_ONE]
-	psraw	xmm0,1			; xmm0=(CrE * FIX(0.40200))
-	psraw	xmm1,1			; xmm1=(CrO * FIX(0.40200))
+        paddw   xmm4,[rel PW_ONE]
+        paddw   xmm5,[rel PW_ONE]
+        psraw   xmm4,1                  ; xmm4=(CbE * -FIX(0.22800))
+        psraw   xmm5,1                  ; xmm5=(CbO * -FIX(0.22800))
+        paddw   xmm0,[rel PW_ONE]
+        paddw   xmm1,[rel PW_ONE]
+        psraw   xmm0,1                  ; xmm0=(CrE * FIX(0.40200))
+        psraw   xmm1,1                  ; xmm1=(CrO * FIX(0.40200))
 
-	paddw	xmm4,xmm2
-	paddw	xmm5,xmm3
-	paddw	xmm4,xmm2		; xmm4=(CbE * FIX(1.77200))=(B-Y)E
-	paddw	xmm5,xmm3		; xmm5=(CbO * FIX(1.77200))=(B-Y)O
-	paddw	xmm0,xmm6		; xmm0=(CrE * FIX(1.40200))=(R-Y)E
-	paddw	xmm1,xmm7		; xmm1=(CrO * FIX(1.40200))=(R-Y)O
+        paddw   xmm4,xmm2
+        paddw   xmm5,xmm3
+        paddw   xmm4,xmm2               ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
+        paddw   xmm5,xmm3               ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
+        paddw   xmm0,xmm6               ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
+        paddw   xmm1,xmm7               ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=(B-Y)E
-	movdqa	XMMWORD [wk(1)], xmm5	; wk(1)=(B-Y)O
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=(B-Y)E
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(B-Y)O
 
-	movdqa    xmm4,xmm2
-	movdqa    xmm5,xmm3
-	punpcklwd xmm2,xmm6
-	punpckhwd xmm4,xmm6
-	pmaddwd   xmm2,[rel PW_MF0344_F0285]
-	pmaddwd   xmm4,[rel PW_MF0344_F0285]
-	punpcklwd xmm3,xmm7
-	punpckhwd xmm5,xmm7
-	pmaddwd   xmm3,[rel PW_MF0344_F0285]
-	pmaddwd   xmm5,[rel PW_MF0344_F0285]
+        movdqa    xmm4,xmm2
+        movdqa    xmm5,xmm3
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm4,xmm6
+        pmaddwd   xmm2,[rel PW_MF0344_F0285]
+        pmaddwd   xmm4,[rel PW_MF0344_F0285]
+        punpcklwd xmm3,xmm7
+        punpckhwd xmm5,xmm7
+        pmaddwd   xmm3,[rel PW_MF0344_F0285]
+        pmaddwd   xmm5,[rel PW_MF0344_F0285]
 
-	paddd     xmm2,[rel PD_ONEHALF]
-	paddd     xmm4,[rel PD_ONEHALF]
-	psrad     xmm2,SCALEBITS
-	psrad     xmm4,SCALEBITS
-	paddd     xmm3,[rel PD_ONEHALF]
-	paddd     xmm5,[rel PD_ONEHALF]
-	psrad     xmm3,SCALEBITS
-	psrad     xmm5,SCALEBITS
+        paddd     xmm2,[rel PD_ONEHALF]
+        paddd     xmm4,[rel PD_ONEHALF]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm4,SCALEBITS
+        paddd     xmm3,[rel PD_ONEHALF]
+        paddd     xmm5,[rel PD_ONEHALF]
+        psrad     xmm3,SCALEBITS
+        psrad     xmm5,SCALEBITS
 
-	packssdw  xmm2,xmm4	; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
-	packssdw  xmm3,xmm5	; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
-	psubw     xmm2,xmm6	; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
-	psubw     xmm3,xmm7	; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+        packssdw  xmm2,xmm4     ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+        packssdw  xmm3,xmm5     ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+        psubw     xmm2,xmm6     ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+        psubw     xmm3,xmm7     ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
 
-	movdqa    xmm5, XMMWORD [rsi]	; xmm5=Y(0123456789ABCDEF)
+        movdqa    xmm5, XMMWORD [rsi]   ; xmm5=Y(0123456789ABCDEF)
 
-	pcmpeqw   xmm4,xmm4
-	psrlw     xmm4,BYTE_BIT		; xmm4={0xFF 0x00 0xFF 0x00 ..}
-	pand      xmm4,xmm5		; xmm4=Y(02468ACE)=YE
-	psrlw     xmm5,BYTE_BIT		; xmm5=Y(13579BDF)=YO
+        pcmpeqw   xmm4,xmm4
+        psrlw     xmm4,BYTE_BIT         ; xmm4={0xFF 0x00 0xFF 0x00 ..}
+        pand      xmm4,xmm5             ; xmm4=Y(02468ACE)=YE
+        psrlw     xmm5,BYTE_BIT         ; xmm5=Y(13579BDF)=YO
 
-	paddw     xmm0,xmm4		; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
-	paddw     xmm1,xmm5		; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
-	packuswb  xmm0,xmm0		; xmm0=R(02468ACE********)
-	packuswb  xmm1,xmm1		; xmm1=R(13579BDF********)
+        paddw     xmm0,xmm4             ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm5             ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
 
-	paddw     xmm2,xmm4		; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
-	paddw     xmm3,xmm5		; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
-	packuswb  xmm2,xmm2		; xmm2=G(02468ACE********)
-	packuswb  xmm3,xmm3		; xmm3=G(13579BDF********)
+        paddw     xmm2,xmm4             ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm5             ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
 
-	paddw     xmm4, XMMWORD [wk(0)]	; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
-	paddw     xmm5, XMMWORD [wk(1)]	; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
-	packuswb  xmm4,xmm4		; xmm4=B(02468ACE********)
-	packuswb  xmm5,xmm5		; xmm5=B(13579BDF********)
+        paddw     xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
+        paddw     xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmB	; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
-	punpcklbw xmmD,xmmF	; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
 
-	movdqa    xmmG,xmmA
-	movdqa    xmmH,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
-	punpckhwd xmmG,xmmE	; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
 
-	psrldq    xmmH,2	; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
-	psrldq    xmmE,2	; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
 
-	movdqa    xmmC,xmmD
-	movdqa    xmmB,xmmD
-	punpcklwd xmmD,xmmH	; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
-	punpckhwd xmmC,xmmH	; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
 
-	psrldq    xmmB,2	; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
 
-	movdqa    xmmF,xmmE
-	punpcklwd xmmE,xmmB	; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
-	punpckhwd xmmF,xmmB	; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
 
-	pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
-	movdqa    xmmB,xmmE
-	punpckldq xmmA,xmmD	; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
-	punpckldq xmmE,xmmH	; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
-	punpckhdq xmmD,xmmB	; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
 
-	pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
-	movdqa    xmmB,xmmF
-	punpckldq xmmG,xmmC	; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
-	punpckldq xmmF,xmmH	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
-	punpckhdq xmmC,xmmB	; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
 
-	punpcklqdq xmmA,xmmE	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	punpcklqdq xmmD,xmmG	; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	punpcklqdq xmmF,xmmC	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	rdi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
 .out0:
-	add	rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	rcx, byte SIZEOF_XMMWORD
-	jz	near .nextrow
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
 
-	add	rsi, byte SIZEOF_XMMWORD	; inptr0
-	add	rbx, byte SIZEOF_XMMWORD	; inptr1
-	add	rdx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
 
 .column_st32:
-	lea	rcx, [rcx+rcx*2]		; imul ecx, RGB_PIXELSIZE
-	cmp	rcx, byte 2*SIZEOF_XMMWORD
-	jb	short .column_st16
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	add	rdi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmF
-	sub	rcx, byte 2*SIZEOF_XMMWORD
-	jmp	short .column_st15
+        lea     rcx, [rcx+rcx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     rcx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     rcx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
 .column_st16:
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jb	short .column_st15
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	add	rdi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	rcx, byte SIZEOF_XMMWORD
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD
 .column_st15:
-	; Store the lower 8 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	rcx, byte SIZEOF_MMWORD
-	jb	short .column_st7
-	movq	XMM_MMWORD [rdi], xmmA
-	add	rdi, byte SIZEOF_MMWORD
-	sub	rcx, byte SIZEOF_MMWORD
-	psrldq	xmmA, SIZEOF_MMWORD
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_MMWORD
+        sub     rcx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
 .column_st7:
-	; Store the lower 4 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	rcx, byte SIZEOF_DWORD
-	jb	short .column_st3
-	movd	XMM_DWORD [rdi], xmmA
-	add	rdi, byte SIZEOF_DWORD
-	sub	rcx, byte SIZEOF_DWORD
-	psrldq	xmmA, SIZEOF_DWORD
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_DWORD
+        sub     rcx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
 .column_st3:
-	; Store the lower 2 bytes of rax to the output when it has enough
-	; space.
-	movd	eax, xmmA
-	cmp	rcx, byte SIZEOF_WORD
-	jb	short .column_st1
-	mov	WORD [rdi], ax
-	add	rdi, byte SIZEOF_WORD
-	sub	rcx, byte SIZEOF_WORD
-	shr	rax, 16
+        ; Store the lower 2 bytes of rax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     rcx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [rdi], ax
+        add     rdi, byte SIZEOF_WORD
+        sub     rcx, byte SIZEOF_WORD
+        shr     rax, 16
 .column_st1:
-	; Store the lower 1 byte of rax to the output when it has enough
-	; space.
-	test	rcx, rcx
-	jz	short .nextrow
-	mov	BYTE [rdi], al
+        ; Store the lower 1 byte of rax to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .nextrow
+        mov     BYTE [rdi], al
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 %ifdef RGBX_FILLER_0XFF
-	pcmpeqb   xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pcmpeqb   xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %else
-	pxor      xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pxor      xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %endif
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmG	; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
-	punpcklbw xmmB,xmmD	; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
-	punpcklbw xmmF,xmmH	; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
 
-	movdqa    xmmC,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
-	punpckhwd xmmC,xmmE	; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
-	movdqa    xmmG,xmmB
-	punpcklwd xmmB,xmmF	; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
-	punpckhwd xmmG,xmmF	; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpckldq xmmA,xmmB	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	punpckhdq xmmD,xmmB	; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	movdqa    xmmH,xmmC
-	punpckldq xmmC,xmmG	; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	punpckhdq xmmH,xmmG	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	rdi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
-	movntdq	XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
-	movdqu	XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
 .out0:
-	add	rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	rcx, byte SIZEOF_XMMWORD
-	jz	near .nextrow
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
 
-	add	rsi, byte SIZEOF_XMMWORD	; inptr0
-	add	rbx, byte SIZEOF_XMMWORD	; inptr1
-	add	rdx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
 
 .column_st32:
-	cmp	rcx, byte SIZEOF_XMMWORD/2
-	jb	short .column_st16
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	add	rdi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmC
-	movdqa	xmmD,xmmH
-	sub	rcx, byte SIZEOF_XMMWORD/2
+        cmp     rcx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     rcx, byte SIZEOF_XMMWORD/2
 .column_st16:
-	cmp	rcx, byte SIZEOF_XMMWORD/4
-	jb	short .column_st15
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	add	rdi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	rcx, byte SIZEOF_XMMWORD/4
+        cmp     rcx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD/4
 .column_st15:
-	; Store two pixels (8 bytes) of xmmA to the output when it has enough
-	; space.
-	cmp	rcx, byte SIZEOF_XMMWORD/8
-	jb	short .column_st7
-	movq	MMWORD [rdi], xmmA
-	add	rdi, byte SIZEOF_XMMWORD/8*4
-	sub	rcx, byte SIZEOF_XMMWORD/8
-	psrldq	xmmA, SIZEOF_XMMWORD/8*4
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_XMMWORD/8*4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
 .column_st7:
-	; Store one pixel (4 bytes) of xmmA to the output when it has enough
-	; space.
-	test	rcx, rcx
-	jz	short .nextrow
-	movd	XMM_DWORD [rdi], xmmA
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .nextrow
+        movd    XMM_DWORD [rdi], xmmA
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
 .nextrow:
-	pop	rcx
-	pop	rsi
-	pop	rbx
-	pop	rdx
-	pop	rdi
-	pop	rax
+        pop     rcx
+        pop     rsi
+        pop     rbx
+        pop     rdx
+        pop     rdi
+        pop     rax
 
-	add	rsi, byte SIZEOF_JSAMPROW
-	add	rbx, byte SIZEOF_JSAMPROW
-	add	rdx, byte SIZEOF_JSAMPROW
-	add	rdi, byte SIZEOF_JSAMPROW	; output_buf
-	dec	rax				; num_rows
-	jg	near .rowloop
+        add     rsi, byte SIZEOF_JSAMPROW
+        add     rbx, byte SIZEOF_JSAMPROW
+        add     rdx, byte SIZEOF_JSAMPROW
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_buf
+        dec     rax                             ; num_rows
+        jg      near .rowloop
 
-	sfence		; flush the write buffer
+        sfence          ; flush the write buffer
 
 .return:
-	pop	rbx
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdclrss2.asm b/simd/jdclrss2.asm
index 97754cb..07b4fcf 100644
--- a/simd/jdclrss2.asm
+++ b/simd/jdclrss2.asm
@@ -18,7 +18,7 @@
 ; [TAB8]
 
 %include "jcolsamp.inc"
-				
+
 ; --------------------------------------------------------------------------
 ;
 ; Convert some rows of samples to the output colorspace.
@@ -29,432 +29,432 @@
 ;                             JSAMPARRAY output_buf, int num_rows)
 ;
 
-%define out_width(b)	(b)+8			; JDIMENSION out_width
-%define input_buf(b)	(b)+12		; JSAMPIMAGE input_buf
-%define input_row(b)	(b)+16		; JDIMENSION input_row
-%define output_buf(b)	(b)+20		; JSAMPARRAY output_buf
-%define num_rows(b)	(b)+24		; int num_rows
+%define out_width(b)    (b)+8           ; JDIMENSION out_width
+%define input_buf(b)    (b)+12          ; JSAMPIMAGE input_buf
+%define input_row(b)    (b)+16          ; JDIMENSION input_row
+%define output_buf(b)   (b)+20          ; JSAMPARRAY output_buf
+%define num_rows(b)     (b)+24          ; int num_rows
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_ycc_rgb_convert_sse2)
+        align   16
+        global  EXTN(jsimd_ycc_rgb_convert_sse2)
 
 EXTN(jsimd_ycc_rgb_convert_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [out_width(eax)]	; num_cols
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [out_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	edi, JSAMPIMAGE [input_buf(eax)]
-	mov	ecx, JDIMENSION [input_row(eax)]
-	mov	esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
-	lea	esi, [esi+ecx*SIZEOF_JSAMPROW]
-	lea	ebx, [ebx+ecx*SIZEOF_JSAMPROW]
-	lea	edx, [edx+ecx*SIZEOF_JSAMPROW]
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [input_row(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
 
-	pop	ecx
+        pop     ecx
 
-	mov	edi, JSAMPARRAY [output_buf(eax)]
-	mov	eax, INT [num_rows(eax)]
-	test	eax,eax
-	jle	near .return
-	alignx	16,7
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
 .rowloop:
-	push	eax
-	push	edi
-	push	edx
-	push	ebx
-	push	esi
-	push	ecx			; col
+        push    eax
+        push    edi
+        push    edx
+        push    ebx
+        push    esi
+        push    ecx                     ; col
 
-	mov	esi, JSAMPROW [esi]	; inptr0
-	mov	ebx, JSAMPROW [ebx]	; inptr1
-	mov	edx, JSAMPROW [edx]	; inptr2
-	mov	edi, JSAMPROW [edi]	; outptr
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
-	alignx	16,7
+        mov     esi, JSAMPROW [esi]     ; inptr0
+        mov     ebx, JSAMPROW [ebx]     ; inptr1
+        mov     edx, JSAMPROW [edx]     ; inptr2
+        mov     edi, JSAMPROW [edi]     ; outptr
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+        alignx  16,7
 .columnloop:
 
-	movdqa	xmm5, XMMWORD [ebx]	; xmm5=Cb(0123456789ABCDEF)
-	movdqa	xmm1, XMMWORD [edx]	; xmm1=Cr(0123456789ABCDEF)
+        movdqa  xmm5, XMMWORD [ebx]     ; xmm5=Cb(0123456789ABCDEF)
+        movdqa  xmm1, XMMWORD [edx]     ; xmm1=Cr(0123456789ABCDEF)
 
-	pcmpeqw	xmm4,xmm4
-	pcmpeqw	xmm7,xmm7
-	psrlw	xmm4,BYTE_BIT
-	psllw	xmm7,7			; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
-	movdqa	xmm0,xmm4		; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
+        pcmpeqw xmm4,xmm4
+        pcmpeqw xmm7,xmm7
+        psrlw   xmm4,BYTE_BIT
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        movdqa  xmm0,xmm4               ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
 
-	pand	xmm4,xmm5		; xmm4=Cb(02468ACE)=CbE
-	psrlw	xmm5,BYTE_BIT		; xmm5=Cb(13579BDF)=CbO
-	pand	xmm0,xmm1		; xmm0=Cr(02468ACE)=CrE
-	psrlw	xmm1,BYTE_BIT		; xmm1=Cr(13579BDF)=CrO
+        pand    xmm4,xmm5               ; xmm4=Cb(02468ACE)=CbE
+        psrlw   xmm5,BYTE_BIT           ; xmm5=Cb(13579BDF)=CbO
+        pand    xmm0,xmm1               ; xmm0=Cr(02468ACE)=CrE
+        psrlw   xmm1,BYTE_BIT           ; xmm1=Cr(13579BDF)=CrO
 
-	paddw	xmm4,xmm7
-	paddw	xmm5,xmm7
-	paddw	xmm0,xmm7
-	paddw	xmm1,xmm7
+        paddw   xmm4,xmm7
+        paddw   xmm5,xmm7
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
 
-	; (Original)
-	; R = Y                + 1.40200 * Cr
-	; G = Y - 0.34414 * Cb - 0.71414 * Cr
-	; B = Y + 1.77200 * Cb
-	;
-	; (This implementation)
-	; R = Y                + 0.40200 * Cr + Cr
-	; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
-	; B = Y - 0.22800 * Cb + Cb + Cb
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
 
-	movdqa	xmm2,xmm4		; xmm2=CbE
-	movdqa	xmm3,xmm5		; xmm3=CbO
-	paddw	xmm4,xmm4		; xmm4=2*CbE
-	paddw	xmm5,xmm5		; xmm5=2*CbO
-	movdqa	xmm6,xmm0		; xmm6=CrE
-	movdqa	xmm7,xmm1		; xmm7=CrO
-	paddw	xmm0,xmm0		; xmm0=2*CrE
-	paddw	xmm1,xmm1		; xmm1=2*CrO
+        movdqa  xmm2,xmm4               ; xmm2=CbE
+        movdqa  xmm3,xmm5               ; xmm3=CbO
+        paddw   xmm4,xmm4               ; xmm4=2*CbE
+        paddw   xmm5,xmm5               ; xmm5=2*CbO
+        movdqa  xmm6,xmm0               ; xmm6=CrE
+        movdqa  xmm7,xmm1               ; xmm7=CrO
+        paddw   xmm0,xmm0               ; xmm0=2*CrE
+        paddw   xmm1,xmm1               ; xmm1=2*CrO
 
-	pmulhw	xmm4,[GOTOFF(eax,PW_MF0228)]	; xmm4=(2*CbE * -FIX(0.22800))
-	pmulhw	xmm5,[GOTOFF(eax,PW_MF0228)]	; xmm5=(2*CbO * -FIX(0.22800))
-	pmulhw	xmm0,[GOTOFF(eax,PW_F0402)]	; xmm0=(2*CrE * FIX(0.40200))
-	pmulhw	xmm1,[GOTOFF(eax,PW_F0402)]	; xmm1=(2*CrO * FIX(0.40200))
+        pmulhw  xmm4,[GOTOFF(eax,PW_MF0228)]    ; xmm4=(2*CbE * -FIX(0.22800))
+        pmulhw  xmm5,[GOTOFF(eax,PW_MF0228)]    ; xmm5=(2*CbO * -FIX(0.22800))
+        pmulhw  xmm0,[GOTOFF(eax,PW_F0402)]     ; xmm0=(2*CrE * FIX(0.40200))
+        pmulhw  xmm1,[GOTOFF(eax,PW_F0402)]     ; xmm1=(2*CrO * FIX(0.40200))
 
-	paddw	xmm4,[GOTOFF(eax,PW_ONE)]
-	paddw	xmm5,[GOTOFF(eax,PW_ONE)]
-	psraw	xmm4,1			; xmm4=(CbE * -FIX(0.22800))
-	psraw	xmm5,1			; xmm5=(CbO * -FIX(0.22800))
-	paddw	xmm0,[GOTOFF(eax,PW_ONE)]
-	paddw	xmm1,[GOTOFF(eax,PW_ONE)]
-	psraw	xmm0,1			; xmm0=(CrE * FIX(0.40200))
-	psraw	xmm1,1			; xmm1=(CrO * FIX(0.40200))
+        paddw   xmm4,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm5,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm4,1                  ; xmm4=(CbE * -FIX(0.22800))
+        psraw   xmm5,1                  ; xmm5=(CbO * -FIX(0.22800))
+        paddw   xmm0,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm1,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm0,1                  ; xmm0=(CrE * FIX(0.40200))
+        psraw   xmm1,1                  ; xmm1=(CrO * FIX(0.40200))
 
-	paddw	xmm4,xmm2
-	paddw	xmm5,xmm3
-	paddw	xmm4,xmm2		; xmm4=(CbE * FIX(1.77200))=(B-Y)E
-	paddw	xmm5,xmm3		; xmm5=(CbO * FIX(1.77200))=(B-Y)O
-	paddw	xmm0,xmm6		; xmm0=(CrE * FIX(1.40200))=(R-Y)E
-	paddw	xmm1,xmm7		; xmm1=(CrO * FIX(1.40200))=(R-Y)O
+        paddw   xmm4,xmm2
+        paddw   xmm5,xmm3
+        paddw   xmm4,xmm2               ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
+        paddw   xmm5,xmm3               ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
+        paddw   xmm0,xmm6               ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
+        paddw   xmm1,xmm7               ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=(B-Y)E
-	movdqa	XMMWORD [wk(1)], xmm5	; wk(1)=(B-Y)O
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=(B-Y)E
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(B-Y)O
 
-	movdqa    xmm4,xmm2
-	movdqa    xmm5,xmm3
-	punpcklwd xmm2,xmm6
-	punpckhwd xmm4,xmm6
-	pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   xmm4,[GOTOFF(eax,PW_MF0344_F0285)]
-	punpcklwd xmm3,xmm7
-	punpckhwd xmm5,xmm7
-	pmaddwd   xmm3,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
+        movdqa    xmm4,xmm2
+        movdqa    xmm5,xmm3
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm4,xmm6
+        pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm4,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd xmm3,xmm7
+        punpckhwd xmm5,xmm7
+        pmaddwd   xmm3,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
 
-	paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     xmm4,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     xmm2,SCALEBITS
-	psrad     xmm4,SCALEBITS
-	paddd     xmm3,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     xmm3,SCALEBITS
-	psrad     xmm5,SCALEBITS
+        paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm4,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm4,SCALEBITS
+        paddd     xmm3,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm3,SCALEBITS
+        psrad     xmm5,SCALEBITS
 
-	packssdw  xmm2,xmm4	; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
-	packssdw  xmm3,xmm5	; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
-	psubw     xmm2,xmm6	; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
-	psubw     xmm3,xmm7	; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+        packssdw  xmm2,xmm4     ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+        packssdw  xmm3,xmm5     ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+        psubw     xmm2,xmm6     ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+        psubw     xmm3,xmm7     ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
 
-	movdqa    xmm5, XMMWORD [esi]	; xmm5=Y(0123456789ABCDEF)
+        movdqa    xmm5, XMMWORD [esi]   ; xmm5=Y(0123456789ABCDEF)
 
-	pcmpeqw   xmm4,xmm4
-	psrlw     xmm4,BYTE_BIT		; xmm4={0xFF 0x00 0xFF 0x00 ..}
-	pand      xmm4,xmm5		; xmm4=Y(02468ACE)=YE
-	psrlw     xmm5,BYTE_BIT		; xmm5=Y(13579BDF)=YO
+        pcmpeqw   xmm4,xmm4
+        psrlw     xmm4,BYTE_BIT         ; xmm4={0xFF 0x00 0xFF 0x00 ..}
+        pand      xmm4,xmm5             ; xmm4=Y(02468ACE)=YE
+        psrlw     xmm5,BYTE_BIT         ; xmm5=Y(13579BDF)=YO
 
-	paddw     xmm0,xmm4		; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
-	paddw     xmm1,xmm5		; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
-	packuswb  xmm0,xmm0		; xmm0=R(02468ACE********)
-	packuswb  xmm1,xmm1		; xmm1=R(13579BDF********)
+        paddw     xmm0,xmm4             ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm5             ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
 
-	paddw     xmm2,xmm4		; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
-	paddw     xmm3,xmm5		; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
-	packuswb  xmm2,xmm2		; xmm2=G(02468ACE********)
-	packuswb  xmm3,xmm3		; xmm3=G(13579BDF********)
+        paddw     xmm2,xmm4             ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm5             ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
 
-	paddw     xmm4, XMMWORD [wk(0)]	; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
-	paddw     xmm5, XMMWORD [wk(1)]	; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
-	packuswb  xmm4,xmm4		; xmm4=B(02468ACE********)
-	packuswb  xmm5,xmm5		; xmm5=B(13579BDF********)
+        paddw     xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
+        paddw     xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmB	; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
-	punpcklbw xmmD,xmmF	; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
 
-	movdqa    xmmG,xmmA
-	movdqa    xmmH,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
-	punpckhwd xmmG,xmmE	; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
 
-	psrldq    xmmH,2	; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
-	psrldq    xmmE,2	; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
 
-	movdqa    xmmC,xmmD
-	movdqa    xmmB,xmmD
-	punpcklwd xmmD,xmmH	; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
-	punpckhwd xmmC,xmmH	; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
 
-	psrldq    xmmB,2	; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
 
-	movdqa    xmmF,xmmE
-	punpcklwd xmmE,xmmB	; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
-	punpckhwd xmmF,xmmB	; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
 
-	pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
-	movdqa    xmmB,xmmE
-	punpckldq xmmA,xmmD	; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
-	punpckldq xmmE,xmmH	; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
-	punpckhdq xmmD,xmmB	; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
 
-	pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
-	movdqa    xmmB,xmmF
-	punpckldq xmmG,xmmC	; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
-	punpckldq xmmF,xmmH	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
-	punpckhdq xmmC,xmmB	; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
 
-	punpcklqdq xmmA,xmmE	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	punpcklqdq xmmD,xmmG	; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	punpcklqdq xmmF,xmmC	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	edi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
 .out0:
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	ecx, byte SIZEOF_XMMWORD
-	jz	near .nextrow
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
 
-	add	esi, byte SIZEOF_XMMWORD	; inptr0
-	add	ebx, byte SIZEOF_XMMWORD	; inptr1
-	add	edx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
-	alignx	16,7
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st32:
-	lea	ecx, [ecx+ecx*2]		; imul ecx, RGB_PIXELSIZE
-	cmp	ecx, byte 2*SIZEOF_XMMWORD
-	jb	short .column_st16
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	add	edi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmF
-	sub	ecx, byte 2*SIZEOF_XMMWORD
-	jmp	short .column_st15
+        lea     ecx, [ecx+ecx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     ecx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
 .column_st16:
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jb	short .column_st15
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	add	edi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	ecx, byte SIZEOF_XMMWORD
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD
 .column_st15:
-	; Store the lower 8 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st7
-	movq	XMM_MMWORD [edi], xmmA
-	add	edi, byte SIZEOF_MMWORD
-	sub	ecx, byte SIZEOF_MMWORD
-	psrldq	xmmA, SIZEOF_MMWORD
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_MMWORD
+        sub     ecx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
 .column_st7:
-	; Store the lower 4 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	ecx, byte SIZEOF_DWORD
-	jb	short .column_st3
-	movd	XMM_DWORD [edi], xmmA
-	add	edi, byte SIZEOF_DWORD
-	sub	ecx, byte SIZEOF_DWORD
-	psrldq	xmmA, SIZEOF_DWORD
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [edi], xmmA
+        add     edi, byte SIZEOF_DWORD
+        sub     ecx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
 .column_st3:
-	; Store the lower 2 bytes of eax to the output when it has enough
-	; space.
-	movd	eax, xmmA
-	cmp	ecx, byte SIZEOF_WORD
-	jb	short .column_st1
-	mov	WORD [edi], ax
-	add	edi, byte SIZEOF_WORD
-	sub	ecx, byte SIZEOF_WORD
-	shr	eax, 16
+        ; Store the lower 2 bytes of eax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi], ax
+        add     edi, byte SIZEOF_WORD
+        sub     ecx, byte SIZEOF_WORD
+        shr     eax, 16
 .column_st1:
-	; Store the lower 1 byte of eax to the output when it has enough
-	; space.
-	test	ecx, ecx
-	jz	short .nextrow
-	mov	BYTE [edi], al
+        ; Store the lower 1 byte of eax to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .nextrow
+        mov     BYTE [edi], al
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 %ifdef RGBX_FILLER_0XFF
-	pcmpeqb   xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pcmpeqb   xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %else
-	pxor      xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pxor      xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %endif
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmG	; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
-	punpcklbw xmmB,xmmD	; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
-	punpcklbw xmmF,xmmH	; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
 
-	movdqa    xmmC,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
-	punpckhwd xmmC,xmmE	; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
-	movdqa    xmmG,xmmB
-	punpcklwd xmmB,xmmF	; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
-	punpckhwd xmmG,xmmF	; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpckldq xmmA,xmmB	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	punpckhdq xmmD,xmmB	; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	movdqa    xmmH,xmmC
-	punpckldq xmmC,xmmG	; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	punpckhdq xmmH,xmmG	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	edi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
-	movntdq	XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
-	movdqu	XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
 .out0:
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	ecx, byte SIZEOF_XMMWORD
-	jz	near .nextrow
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
 
-	add	esi, byte SIZEOF_XMMWORD	; inptr0
-	add	ebx, byte SIZEOF_XMMWORD	; inptr1
-	add	edx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
-	alignx	16,7
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st32:
-	cmp	ecx, byte SIZEOF_XMMWORD/2
-	jb	short .column_st16
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	add	edi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmC
-	movdqa	xmmD,xmmH
-	sub	ecx, byte SIZEOF_XMMWORD/2
+        cmp     ecx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     ecx, byte SIZEOF_XMMWORD/2
 .column_st16:
-	cmp	ecx, byte SIZEOF_XMMWORD/4
-	jb	short .column_st15
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	add	edi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	ecx, byte SIZEOF_XMMWORD/4
+        cmp     ecx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD/4
 .column_st15:
-	; Store two pixels (8 bytes) of xmmA to the output when it has enough
-	; space.
-	cmp	ecx, byte SIZEOF_XMMWORD/8
-	jb	short .column_st7
-	movq	XMM_MMWORD [edi], xmmA
-	add	edi, byte SIZEOF_XMMWORD/8*4
-	sub	ecx, byte SIZEOF_XMMWORD/8
-	psrldq	xmmA, SIZEOF_XMMWORD/8*4
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_XMMWORD/8*4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
 .column_st7:
-	; Store one pixel (4 bytes) of xmmA to the output when it has enough
-	; space.
-	test	ecx, ecx
-	jz	short .nextrow
-	movd	XMM_DWORD [edi], xmmA
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .nextrow
+        movd    XMM_DWORD [edi], xmmA
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
-	alignx	16,7
+        alignx  16,7
 
 .nextrow:
-	pop	ecx
-	pop	esi
-	pop	ebx
-	pop	edx
-	pop	edi
-	pop	eax
+        pop     ecx
+        pop     esi
+        pop     ebx
+        pop     edx
+        pop     edi
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW
-	add	ebx, byte SIZEOF_JSAMPROW
-	add	edx, byte SIZEOF_JSAMPROW
-	add	edi, byte SIZEOF_JSAMPROW	; output_buf
-	dec	eax				; num_rows
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        add     edi, byte SIZEOF_JSAMPROW       ; output_buf
+        dec     eax                             ; num_rows
+        jg      near .rowloop
 
-	sfence		; flush the write buffer
+        sfence          ; flush the write buffer
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdcolmmx.asm b/simd/jdcolmmx.asm
index 5e4e47d..d2966fe 100644
--- a/simd/jdcolmmx.asm
+++ b/simd/jdcolmmx.asm
@@ -21,35 +21,35 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_344	equ	 22554			; FIX(0.34414)
-F_0_714	equ	 46802			; FIX(0.71414)
-F_1_402	equ	 91881			; FIX(1.40200)
-F_1_772	equ	116130			; FIX(1.77200)
-F_0_402	equ	(F_1_402 - 65536)	; FIX(1.40200) - FIX(1)
-F_0_285	equ	( 65536 - F_0_714)	; FIX(1) - FIX(0.71414)
-F_0_228	equ	(131072 - F_1_772)	; FIX(2) - FIX(1.77200)
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_ycc_rgb_convert_mmx)
+        alignz  16
+        global  EXTN(jconst_ycc_rgb_convert_mmx)
 
 EXTN(jconst_ycc_rgb_convert_mmx):
 
-PW_F0402	times 4 dw  F_0_402
-PW_MF0228	times 4 dw -F_0_228
-PW_MF0344_F0285	times 2 dw -F_0_344, F_0_285
-PW_ONE		times 4 dw  1
-PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
+PW_F0402        times 4 dw  F_0_402
+PW_MF0228       times 4 dw -F_0_228
+PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
+PW_ONE          times 4 dw  1
+PD_ONEHALF      times 2 dd  1 << (SCALEBITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jdclrmmx.asm"
 
diff --git a/simd/jdcolss2-64.asm b/simd/jdcolss2-64.asm
index 01b3dce..b484618 100644
--- a/simd/jdcolss2-64.asm
+++ b/simd/jdcolss2-64.asm
@@ -21,35 +21,35 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_344	equ	 22554			; FIX(0.34414)
-F_0_714	equ	 46802			; FIX(0.71414)
-F_1_402	equ	 91881			; FIX(1.40200)
-F_1_772	equ	116130			; FIX(1.77200)
-F_0_402	equ	(F_1_402 - 65536)	; FIX(1.40200) - FIX(1)
-F_0_285	equ	( 65536 - F_0_714)	; FIX(1) - FIX(0.71414)
-F_0_228	equ	(131072 - F_1_772)	; FIX(2) - FIX(1.77200)
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_ycc_rgb_convert_sse2)
+        alignz  16
+        global  EXTN(jconst_ycc_rgb_convert_sse2)
 
 EXTN(jconst_ycc_rgb_convert_sse2):
 
-PW_F0402	times 8 dw  F_0_402
-PW_MF0228	times 8 dw -F_0_228
-PW_MF0344_F0285	times 4 dw -F_0_344, F_0_285
-PW_ONE		times 8 dw  1
-PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 
 %include "jdclrss2-64.asm"
 
diff --git a/simd/jdcolss2.asm b/simd/jdcolss2.asm
index 1912d92..38ed416 100644
--- a/simd/jdcolss2.asm
+++ b/simd/jdcolss2.asm
@@ -21,35 +21,35 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_344	equ	 22554			; FIX(0.34414)
-F_0_714	equ	 46802			; FIX(0.71414)
-F_1_402	equ	 91881			; FIX(1.40200)
-F_1_772	equ	116130			; FIX(1.77200)
-F_0_402	equ	(F_1_402 - 65536)	; FIX(1.40200) - FIX(1)
-F_0_285	equ	( 65536 - F_0_714)	; FIX(1) - FIX(0.71414)
-F_0_228	equ	(131072 - F_1_772)	; FIX(2) - FIX(1.77200)
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_ycc_rgb_convert_sse2)
+        alignz  16
+        global  EXTN(jconst_ycc_rgb_convert_sse2)
 
 EXTN(jconst_ycc_rgb_convert_sse2):
 
-PW_F0402	times 8 dw  F_0_402
-PW_MF0228	times 8 dw -F_0_228
-PW_MF0344_F0285	times 4 dw -F_0_344, F_0_285
-PW_ONE		times 8 dw  1
-PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jdclrss2.asm"
 
diff --git a/simd/jdct.inc b/simd/jdct.inc
index cc62704..ad5890c 100644
--- a/simd/jdct.inc
+++ b/simd/jdct.inc
@@ -18,11 +18,11 @@
 ;
 %define RANGE_MASK  (MAXJSAMPLE * 4 + 3)  ; 2 bits wider than legal samples
 
-%define ROW(n,b,s)		((b)+(n)*(s))
-%define COL(n,b,s)		((b)+(n)*(s)*DCTSIZE)
+%define ROW(n,b,s)              ((b)+(n)*(s))
+%define COL(n,b,s)              ((b)+(n)*(s)*DCTSIZE)
 
-%define DWBLOCK(m,n,b,s)	((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
-%define MMBLOCK(m,n,b,s)	((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
-%define XMMBLOCK(m,n,b,s)	((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
+%define DWBLOCK(m,n,b,s)        ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
+%define MMBLOCK(m,n,b,s)        ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
+%define XMMBLOCK(m,n,b,s)       ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
 
 ; --------------------------------------------------------------------------
diff --git a/simd/jdmermmx.asm b/simd/jdmermmx.asm
index 7b86c74..c2093c2 100644
--- a/simd/jdmermmx.asm
+++ b/simd/jdmermmx.asm
@@ -21,35 +21,35 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_344	equ	 22554			; FIX(0.34414)
-F_0_714	equ	 46802			; FIX(0.71414)
-F_1_402	equ	 91881			; FIX(1.40200)
-F_1_772	equ	116130			; FIX(1.77200)
-F_0_402	equ	(F_1_402 - 65536)	; FIX(1.40200) - FIX(1)
-F_0_285	equ	( 65536 - F_0_714)	; FIX(1) - FIX(0.71414)
-F_0_228	equ	(131072 - F_1_772)	; FIX(2) - FIX(1.77200)
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_merged_upsample_mmx)
+        alignz  16
+        global  EXTN(jconst_merged_upsample_mmx)
 
 EXTN(jconst_merged_upsample_mmx):
 
-PW_F0402	times 4 dw  F_0_402
-PW_MF0228	times 4 dw -F_0_228
-PW_MF0344_F0285	times 2 dw -F_0_344, F_0_285
-PW_ONE		times 4 dw  1
-PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
+PW_F0402        times 4 dw  F_0_402
+PW_MF0228       times 4 dw -F_0_228
+PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
+PW_ONE          times 4 dw  1
+PD_ONEHALF      times 2 dd  1 << (SCALEBITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jdmrgmmx.asm"
 
diff --git a/simd/jdmerss2-64.asm b/simd/jdmerss2-64.asm
index a184ea6..1f0b339 100644
--- a/simd/jdmerss2-64.asm
+++ b/simd/jdmerss2-64.asm
@@ -21,35 +21,35 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_344	equ	 22554			; FIX(0.34414)
-F_0_714	equ	 46802			; FIX(0.71414)
-F_1_402	equ	 91881			; FIX(1.40200)
-F_1_772	equ	116130			; FIX(1.77200)
-F_0_402	equ	(F_1_402 - 65536)	; FIX(1.40200) - FIX(1)
-F_0_285	equ	( 65536 - F_0_714)	; FIX(1) - FIX(0.71414)
-F_0_228	equ	(131072 - F_1_772)	; FIX(2) - FIX(1.77200)
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_merged_upsample_sse2)
+        alignz  16
+        global  EXTN(jconst_merged_upsample_sse2)
 
 EXTN(jconst_merged_upsample_sse2):
 
-PW_F0402	times 8 dw  F_0_402
-PW_MF0228	times 8 dw -F_0_228
-PW_MF0344_F0285	times 4 dw -F_0_344, F_0_285
-PW_ONE		times 8 dw  1
-PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 
 %include "jdmrgss2-64.asm"
 
diff --git a/simd/jdmerss2.asm b/simd/jdmerss2.asm
index e536c80..60d0ebb 100644
--- a/simd/jdmerss2.asm
+++ b/simd/jdmerss2.asm
@@ -21,35 +21,35 @@
 
 ; --------------------------------------------------------------------------
 
-%define SCALEBITS	16
+%define SCALEBITS       16
 
-F_0_344	equ	 22554			; FIX(0.34414)
-F_0_714	equ	 46802			; FIX(0.71414)
-F_1_402	equ	 91881			; FIX(1.40200)
-F_1_772	equ	116130			; FIX(1.77200)
-F_0_402	equ	(F_1_402 - 65536)	; FIX(1.40200) - FIX(1)
-F_0_285	equ	( 65536 - F_0_714)	; FIX(1) - FIX(0.71414)
-F_0_228	equ	(131072 - F_1_772)	; FIX(2) - FIX(1.77200)
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_merged_upsample_sse2)
+        alignz  16
+        global  EXTN(jconst_merged_upsample_sse2)
 
 EXTN(jconst_merged_upsample_sse2):
 
-PW_F0402	times 8 dw  F_0_402
-PW_MF0228	times 8 dw -F_0_228
-PW_MF0344_F0285	times 4 dw -F_0_344, F_0_285
-PW_ONE		times 8 dw  1
-PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 
 %include "jdmrgss2.asm"
 
diff --git a/simd/jdmrgmmx.asm b/simd/jdmrgmmx.asm
index d0800a7..136f12c 100644
--- a/simd/jdmrgmmx.asm
+++ b/simd/jdmrgmmx.asm
@@ -29,368 +29,368 @@
 ;                                 JSAMPARRAY output_buf);
 ;
 
-%define output_width(b)	(b)+8			; JDIMENSION output_width
-%define input_buf(b)		(b)+12		; JSAMPIMAGE input_buf
-%define in_row_group_ctr(b)	(b)+16		; JDIMENSION in_row_group_ctr
-%define output_buf(b)		(b)+20		; JSAMPARRAY output_buf
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		3
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          3
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_merged_upsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v1_merged_upsample_mmx)
 
 EXTN(jsimd_h2v1_merged_upsample_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [output_width(eax)]	; col
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [output_width(eax)]     ; col
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	edi, JSAMPIMAGE [input_buf(eax)]
-	mov	ecx, JDIMENSION [in_row_group_ctr(eax)]
-	mov	esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
-	mov	edi, JSAMPARRAY [output_buf(eax)]
-	mov	esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]		; inptr0
-	mov	ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]		; inptr1
-	mov	edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]		; inptr2
-	mov	edi, JSAMPROW [edi]				; outptr
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]         ; inptr0
+        mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+        mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+        mov     edi, JSAMPROW [edi]                             ; outptr
 
-	pop	ecx			; col
+        pop     ecx                     ; col
 
-	alignx	16,7
+        alignx  16,7
 .columnloop:
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
 
-	movq      mm6, MMWORD [ebx]	; mm6=Cb(01234567)
-	movq      mm7, MMWORD [edx]	; mm7=Cr(01234567)
+        movq      mm6, MMWORD [ebx]     ; mm6=Cb(01234567)
+        movq      mm7, MMWORD [edx]     ; mm7=Cr(01234567)
 
-	pxor      mm1,mm1		; mm1=(all 0's)
-	pcmpeqw   mm3,mm3
-	psllw     mm3,7			; mm3={0xFF80 0xFF80 0xFF80 0xFF80}
+        pxor      mm1,mm1               ; mm1=(all 0's)
+        pcmpeqw   mm3,mm3
+        psllw     mm3,7                 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80}
 
-	movq      mm4,mm6
-	punpckhbw mm6,mm1		; mm6=Cb(4567)=CbH
-	punpcklbw mm4,mm1		; mm4=Cb(0123)=CbL
-	movq      mm0,mm7
-	punpckhbw mm7,mm1		; mm7=Cr(4567)=CrH
-	punpcklbw mm0,mm1		; mm0=Cr(0123)=CrL
+        movq      mm4,mm6
+        punpckhbw mm6,mm1               ; mm6=Cb(4567)=CbH
+        punpcklbw mm4,mm1               ; mm4=Cb(0123)=CbL
+        movq      mm0,mm7
+        punpckhbw mm7,mm1               ; mm7=Cr(4567)=CrH
+        punpcklbw mm0,mm1               ; mm0=Cr(0123)=CrL
 
-	paddw     mm6,mm3
-	paddw     mm4,mm3
-	paddw     mm7,mm3
-	paddw     mm0,mm3
+        paddw     mm6,mm3
+        paddw     mm4,mm3
+        paddw     mm7,mm3
+        paddw     mm0,mm3
 
-	; (Original)
-	; R = Y                + 1.40200 * Cr
-	; G = Y - 0.34414 * Cb - 0.71414 * Cr
-	; B = Y + 1.77200 * Cb
-	;
-	; (This implementation)
-	; R = Y                + 0.40200 * Cr + Cr
-	; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
-	; B = Y - 0.22800 * Cb + Cb + Cb
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
 
-	movq	mm5,mm6			; mm5=CbH
-	movq	mm2,mm4			; mm2=CbL
-	paddw	mm6,mm6			; mm6=2*CbH
-	paddw	mm4,mm4			; mm4=2*CbL
-	movq	mm1,mm7			; mm1=CrH
-	movq	mm3,mm0			; mm3=CrL
-	paddw	mm7,mm7			; mm7=2*CrH
-	paddw	mm0,mm0			; mm0=2*CrL
+        movq    mm5,mm6                 ; mm5=CbH
+        movq    mm2,mm4                 ; mm2=CbL
+        paddw   mm6,mm6                 ; mm6=2*CbH
+        paddw   mm4,mm4                 ; mm4=2*CbL
+        movq    mm1,mm7                 ; mm1=CrH
+        movq    mm3,mm0                 ; mm3=CrL
+        paddw   mm7,mm7                 ; mm7=2*CrH
+        paddw   mm0,mm0                 ; mm0=2*CrL
 
-	pmulhw	mm6,[GOTOFF(eax,PW_MF0228)]	; mm6=(2*CbH * -FIX(0.22800))
-	pmulhw	mm4,[GOTOFF(eax,PW_MF0228)]	; mm4=(2*CbL * -FIX(0.22800))
-	pmulhw	mm7,[GOTOFF(eax,PW_F0402)]	; mm7=(2*CrH * FIX(0.40200))
-	pmulhw	mm0,[GOTOFF(eax,PW_F0402)]	; mm0=(2*CrL * FIX(0.40200))
+        pmulhw  mm6,[GOTOFF(eax,PW_MF0228)]     ; mm6=(2*CbH * -FIX(0.22800))
+        pmulhw  mm4,[GOTOFF(eax,PW_MF0228)]     ; mm4=(2*CbL * -FIX(0.22800))
+        pmulhw  mm7,[GOTOFF(eax,PW_F0402)]      ; mm7=(2*CrH * FIX(0.40200))
+        pmulhw  mm0,[GOTOFF(eax,PW_F0402)]      ; mm0=(2*CrL * FIX(0.40200))
 
-	paddw	mm6,[GOTOFF(eax,PW_ONE)]
-	paddw	mm4,[GOTOFF(eax,PW_ONE)]
-	psraw	mm6,1			; mm6=(CbH * -FIX(0.22800))
-	psraw	mm4,1			; mm4=(CbL * -FIX(0.22800))
-	paddw	mm7,[GOTOFF(eax,PW_ONE)]
-	paddw	mm0,[GOTOFF(eax,PW_ONE)]
-	psraw	mm7,1			; mm7=(CrH * FIX(0.40200))
-	psraw	mm0,1			; mm0=(CrL * FIX(0.40200))
+        paddw   mm6,[GOTOFF(eax,PW_ONE)]
+        paddw   mm4,[GOTOFF(eax,PW_ONE)]
+        psraw   mm6,1                   ; mm6=(CbH * -FIX(0.22800))
+        psraw   mm4,1                   ; mm4=(CbL * -FIX(0.22800))
+        paddw   mm7,[GOTOFF(eax,PW_ONE)]
+        paddw   mm0,[GOTOFF(eax,PW_ONE)]
+        psraw   mm7,1                   ; mm7=(CrH * FIX(0.40200))
+        psraw   mm0,1                   ; mm0=(CrL * FIX(0.40200))
 
-	paddw	mm6,mm5
-	paddw	mm4,mm2
-	paddw	mm6,mm5			; mm6=(CbH * FIX(1.77200))=(B-Y)H
-	paddw	mm4,mm2			; mm4=(CbL * FIX(1.77200))=(B-Y)L
-	paddw	mm7,mm1			; mm7=(CrH * FIX(1.40200))=(R-Y)H
-	paddw	mm0,mm3			; mm0=(CrL * FIX(1.40200))=(R-Y)L
+        paddw   mm6,mm5
+        paddw   mm4,mm2
+        paddw   mm6,mm5                 ; mm6=(CbH * FIX(1.77200))=(B-Y)H
+        paddw   mm4,mm2                 ; mm4=(CbL * FIX(1.77200))=(B-Y)L
+        paddw   mm7,mm1                 ; mm7=(CrH * FIX(1.40200))=(R-Y)H
+        paddw   mm0,mm3                 ; mm0=(CrL * FIX(1.40200))=(R-Y)L
 
-	movq	MMWORD [wk(0)], mm6	; wk(0)=(B-Y)H
-	movq	MMWORD [wk(1)], mm7	; wk(1)=(R-Y)H
+        movq    MMWORD [wk(0)], mm6     ; wk(0)=(B-Y)H
+        movq    MMWORD [wk(1)], mm7     ; wk(1)=(R-Y)H
 
-	movq      mm6,mm5
-	movq      mm7,mm2
-	punpcklwd mm5,mm1
-	punpckhwd mm6,mm1
-	pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   mm6,[GOTOFF(eax,PW_MF0344_F0285)]
-	punpcklwd mm2,mm3
-	punpckhwd mm7,mm3
-	pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   mm7,[GOTOFF(eax,PW_MF0344_F0285)]
+        movq      mm6,mm5
+        movq      mm7,mm2
+        punpcklwd mm5,mm1
+        punpckhwd mm6,mm1
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm6,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd mm2,mm3
+        punpckhwd mm7,mm3
+        pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm7,[GOTOFF(eax,PW_MF0344_F0285)]
 
-	paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     mm6,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     mm5,SCALEBITS
-	psrad     mm6,SCALEBITS
-	paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     mm7,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     mm2,SCALEBITS
-	psrad     mm7,SCALEBITS
+        paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm6,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm5,SCALEBITS
+        psrad     mm6,SCALEBITS
+        paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm7,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm2,SCALEBITS
+        psrad     mm7,SCALEBITS
 
-	packssdw  mm5,mm6	; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
-	packssdw  mm2,mm7	; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
-	psubw     mm5,mm1	; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
-	psubw     mm2,mm3	; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+        packssdw  mm5,mm6       ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+        packssdw  mm2,mm7       ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+        psubw     mm5,mm1       ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+        psubw     mm2,mm3       ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
 
-	movq	MMWORD [wk(2)], mm5	; wk(2)=(G-Y)H
+        movq    MMWORD [wk(2)], mm5     ; wk(2)=(G-Y)H
 
-	mov	al,2			; Yctr
-	jmp	short .Yloop_1st
-	alignx	16,7
+        mov     al,2                    ; Yctr
+        jmp     short .Yloop_1st
+        alignx  16,7
 
 .Yloop_2nd:
-	movq	mm0, MMWORD [wk(1)]	; mm0=(R-Y)H
-	movq	mm2, MMWORD [wk(2)]	; mm2=(G-Y)H
-	movq	mm4, MMWORD [wk(0)]	; mm4=(B-Y)H
-	alignx	16,7
+        movq    mm0, MMWORD [wk(1)]     ; mm0=(R-Y)H
+        movq    mm2, MMWORD [wk(2)]     ; mm2=(G-Y)H
+        movq    mm4, MMWORD [wk(0)]     ; mm4=(B-Y)H
+        alignx  16,7
 
 .Yloop_1st:
-	movq	mm7, MMWORD [esi]	; mm7=Y(01234567)
+        movq    mm7, MMWORD [esi]       ; mm7=Y(01234567)
 
-	pcmpeqw	mm6,mm6
-	psrlw	mm6,BYTE_BIT		; mm6={0xFF 0x00 0xFF 0x00 ..}
-	pand	mm6,mm7			; mm6=Y(0246)=YE
-	psrlw	mm7,BYTE_BIT		; mm7=Y(1357)=YO
+        pcmpeqw mm6,mm6
+        psrlw   mm6,BYTE_BIT            ; mm6={0xFF 0x00 0xFF 0x00 ..}
+        pand    mm6,mm7                 ; mm6=Y(0246)=YE
+        psrlw   mm7,BYTE_BIT            ; mm7=Y(1357)=YO
 
-	movq	mm1,mm0			; mm1=mm0=(R-Y)(L/H)
-	movq	mm3,mm2			; mm3=mm2=(G-Y)(L/H)
-	movq	mm5,mm4			; mm5=mm4=(B-Y)(L/H)
+        movq    mm1,mm0                 ; mm1=mm0=(R-Y)(L/H)
+        movq    mm3,mm2                 ; mm3=mm2=(G-Y)(L/H)
+        movq    mm5,mm4                 ; mm5=mm4=(B-Y)(L/H)
 
-	paddw     mm0,mm6		; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6)
-	paddw     mm1,mm7		; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7)
-	packuswb  mm0,mm0		; mm0=(R0 R2 R4 R6 ** ** ** **)
-	packuswb  mm1,mm1		; mm1=(R1 R3 R5 R7 ** ** ** **)
+        paddw     mm0,mm6               ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6)
+        paddw     mm1,mm7               ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7)
+        packuswb  mm0,mm0               ; mm0=(R0 R2 R4 R6 ** ** ** **)
+        packuswb  mm1,mm1               ; mm1=(R1 R3 R5 R7 ** ** ** **)
 
-	paddw     mm2,mm6		; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6)
-	paddw     mm3,mm7		; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7)
-	packuswb  mm2,mm2		; mm2=(G0 G2 G4 G6 ** ** ** **)
-	packuswb  mm3,mm3		; mm3=(G1 G3 G5 G7 ** ** ** **)
+        paddw     mm2,mm6               ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6)
+        paddw     mm3,mm7               ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7)
+        packuswb  mm2,mm2               ; mm2=(G0 G2 G4 G6 ** ** ** **)
+        packuswb  mm3,mm3               ; mm3=(G1 G3 G5 G7 ** ** ** **)
 
-	paddw     mm4,mm6		; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6)
-	paddw     mm5,mm7		; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7)
-	packuswb  mm4,mm4		; mm4=(B0 B2 B4 B6 ** ** ** **)
-	packuswb  mm5,mm5		; mm5=(B1 B3 B5 B7 ** ** ** **)
+        paddw     mm4,mm6               ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6)
+        paddw     mm5,mm7               ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7)
+        packuswb  mm4,mm4               ; mm4=(B0 B2 B4 B6 ** ** ** **)
+        packuswb  mm5,mm5               ; mm5=(B1 B3 B5 B7 ** ** ** **)
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
-	; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
-	; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
-	; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
-	; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
 
-	punpcklbw mmA,mmC		; mmA=(00 10 02 12 04 14 06 16)
-	punpcklbw mmE,mmB		; mmE=(20 01 22 03 24 05 26 07)
-	punpcklbw mmD,mmF		; mmD=(11 21 13 23 15 25 17 27)
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmB               ; mmE=(20 01 22 03 24 05 26 07)
+        punpcklbw mmD,mmF               ; mmD=(11 21 13 23 15 25 17 27)
 
-	movq      mmG,mmA
-	movq      mmH,mmA
-	punpcklwd mmA,mmE		; mmA=(00 10 20 01 02 12 22 03)
-	punpckhwd mmG,mmE		; mmG=(04 14 24 05 06 16 26 07)
+        movq      mmG,mmA
+        movq      mmH,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 01 02 12 22 03)
+        punpckhwd mmG,mmE               ; mmG=(04 14 24 05 06 16 26 07)
 
-	psrlq     mmH,2*BYTE_BIT	; mmH=(02 12 04 14 06 16 -- --)
-	psrlq     mmE,2*BYTE_BIT	; mmE=(22 03 24 05 26 07 -- --)
+        psrlq     mmH,2*BYTE_BIT        ; mmH=(02 12 04 14 06 16 -- --)
+        psrlq     mmE,2*BYTE_BIT        ; mmE=(22 03 24 05 26 07 -- --)
 
-	movq      mmC,mmD
-	movq      mmB,mmD
-	punpcklwd mmD,mmH		; mmD=(11 21 02 12 13 23 04 14)
-	punpckhwd mmC,mmH		; mmC=(15 25 06 16 17 27 -- --)
+        movq      mmC,mmD
+        movq      mmB,mmD
+        punpcklwd mmD,mmH               ; mmD=(11 21 02 12 13 23 04 14)
+        punpckhwd mmC,mmH               ; mmC=(15 25 06 16 17 27 -- --)
 
-	psrlq     mmB,2*BYTE_BIT	; mmB=(13 23 15 25 17 27 -- --)
+        psrlq     mmB,2*BYTE_BIT        ; mmB=(13 23 15 25 17 27 -- --)
 
-	movq      mmF,mmE
-	punpcklwd mmE,mmB		; mmE=(22 03 13 23 24 05 15 25)
-	punpckhwd mmF,mmB		; mmF=(26 07 17 27 -- -- -- --)
+        movq      mmF,mmE
+        punpcklwd mmE,mmB               ; mmE=(22 03 13 23 24 05 15 25)
+        punpckhwd mmF,mmB               ; mmF=(26 07 17 27 -- -- -- --)
 
-	punpckldq mmA,mmD		; mmA=(00 10 20 01 11 21 02 12)
-	punpckldq mmE,mmG		; mmE=(22 03 13 23 04 14 24 05)
-	punpckldq mmC,mmF		; mmC=(15 25 06 16 26 07 17 27)
+        punpckldq mmA,mmD               ; mmA=(00 10 20 01 11 21 02 12)
+        punpckldq mmE,mmG               ; mmE=(22 03 13 23 04 14 24 05)
+        punpckldq mmC,mmF               ; mmC=(15 25 06 16 26 07 17 27)
 
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st16
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmE
-	movq	MMWORD [edi+2*SIZEOF_MMWORD], mmC
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
 
-	sub	ecx, byte SIZEOF_MMWORD
-	jz	near .endcolumn
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      near .endcolumn
 
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD	; outptr
-	add	esi, byte SIZEOF_MMWORD			; inptr0
-	dec	al			; Yctr
-	jnz	near .Yloop_2nd
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
 
-	add	ebx, byte SIZEOF_MMWORD			; inptr1
-	add	edx, byte SIZEOF_MMWORD			; inptr2
-	jmp	near .columnloop
-	alignx	16,7
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st16:
-	lea	ecx, [ecx+ecx*2]	; imul ecx, RGB_PIXELSIZE
-	cmp	ecx, byte 2*SIZEOF_MMWORD
-	jb	short .column_st8
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmE
-	movq	mmA,mmC
-	sub	ecx, byte 2*SIZEOF_MMWORD
-	add	edi, byte 2*SIZEOF_MMWORD
-	jmp	short .column_st4
+        lea     ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_MMWORD
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    mmA,mmC
+        sub     ecx, byte 2*SIZEOF_MMWORD
+        add     edi, byte 2*SIZEOF_MMWORD
+        jmp     short .column_st4
 .column_st8:
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st4
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	mmA,mmE
-	sub	ecx, byte SIZEOF_MMWORD
-	add	edi, byte SIZEOF_MMWORD
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmE
+        sub     ecx, byte SIZEOF_MMWORD
+        add     edi, byte SIZEOF_MMWORD
 .column_st4:
-	movd	eax,mmA
-	cmp	ecx, byte SIZEOF_DWORD
-	jb	short .column_st2
-	mov	DWORD [edi+0*SIZEOF_DWORD], eax
-	psrlq	mmA,DWORD_BIT
-	movd	eax,mmA
-	sub	ecx, byte SIZEOF_DWORD
-	add	edi, byte SIZEOF_DWORD
+        movd    eax,mmA
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st2
+        mov     DWORD [edi+0*SIZEOF_DWORD], eax
+        psrlq   mmA,DWORD_BIT
+        movd    eax,mmA
+        sub     ecx, byte SIZEOF_DWORD
+        add     edi, byte SIZEOF_DWORD
 .column_st2:
-	cmp	ecx, byte SIZEOF_WORD
-	jb	short .column_st1
-	mov	WORD [edi+0*SIZEOF_WORD], ax
-	shr	eax,WORD_BIT
-	sub	ecx, byte SIZEOF_WORD
-	add	edi, byte SIZEOF_WORD
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi+0*SIZEOF_WORD], ax
+        shr     eax,WORD_BIT
+        sub     ecx, byte SIZEOF_WORD
+        add     edi, byte SIZEOF_WORD
 .column_st1:
-	cmp	ecx, byte SIZEOF_BYTE
-	jb	short .endcolumn
-	mov	BYTE [edi+0*SIZEOF_BYTE], al
+        cmp     ecx, byte SIZEOF_BYTE
+        jb      short .endcolumn
+        mov     BYTE [edi+0*SIZEOF_BYTE], al
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 %ifdef RGBX_FILLER_0XFF
-	pcmpeqb   mm6,mm6		; mm6=(X0 X2 X4 X6 ** ** ** **)
-	pcmpeqb   mm7,mm7		; mm7=(X1 X3 X5 X7 ** ** ** **)
+        pcmpeqb   mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pcmpeqb   mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
 %else
-	pxor      mm6,mm6		; mm6=(X0 X2 X4 X6 ** ** ** **)
-	pxor      mm7,mm7		; mm7=(X1 X3 X5 X7 ** ** ** **)
+        pxor      mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pxor      mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
 %endif
-	; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
-	; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
-	; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
-	; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
 
-	punpcklbw mmA,mmC		; mmA=(00 10 02 12 04 14 06 16)
-	punpcklbw mmE,mmG		; mmE=(20 30 22 32 24 34 26 36)
-	punpcklbw mmB,mmD		; mmB=(01 11 03 13 05 15 07 17)
-	punpcklbw mmF,mmH		; mmF=(21 31 23 33 25 35 27 37)
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmG               ; mmE=(20 30 22 32 24 34 26 36)
+        punpcklbw mmB,mmD               ; mmB=(01 11 03 13 05 15 07 17)
+        punpcklbw mmF,mmH               ; mmF=(21 31 23 33 25 35 27 37)
 
-	movq      mmC,mmA
-	punpcklwd mmA,mmE		; mmA=(00 10 20 30 02 12 22 32)
-	punpckhwd mmC,mmE		; mmC=(04 14 24 34 06 16 26 36)
-	movq      mmG,mmB
-	punpcklwd mmB,mmF		; mmB=(01 11 21 31 03 13 23 33)
-	punpckhwd mmG,mmF		; mmG=(05 15 25 35 07 17 27 37)
+        movq      mmC,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 30 02 12 22 32)
+        punpckhwd mmC,mmE               ; mmC=(04 14 24 34 06 16 26 36)
+        movq      mmG,mmB
+        punpcklwd mmB,mmF               ; mmB=(01 11 21 31 03 13 23 33)
+        punpckhwd mmG,mmF               ; mmG=(05 15 25 35 07 17 27 37)
 
-	movq      mmD,mmA
-	punpckldq mmA,mmB		; mmA=(00 10 20 30 01 11 21 31)
-	punpckhdq mmD,mmB		; mmD=(02 12 22 32 03 13 23 33)
-	movq      mmH,mmC
-	punpckldq mmC,mmG		; mmC=(04 14 24 34 05 15 25 35)
-	punpckhdq mmH,mmG		; mmH=(06 16 26 36 07 17 27 37)
+        movq      mmD,mmA
+        punpckldq mmA,mmB               ; mmA=(00 10 20 30 01 11 21 31)
+        punpckhdq mmD,mmB               ; mmD=(02 12 22 32 03 13 23 33)
+        movq      mmH,mmC
+        punpckldq mmC,mmG               ; mmC=(04 14 24 34 05 15 25 35)
+        punpckhdq mmH,mmG               ; mmH=(06 16 26 36 07 17 27 37)
 
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st16
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmD
-	movq	MMWORD [edi+2*SIZEOF_MMWORD], mmC
-	movq	MMWORD [edi+3*SIZEOF_MMWORD], mmH
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mmH
 
-	sub	ecx, byte SIZEOF_MMWORD
-	jz	short .endcolumn
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      short .endcolumn
 
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD	; outptr
-	add	esi, byte SIZEOF_MMWORD			; inptr0
-	dec	al			; Yctr
-	jnz	near .Yloop_2nd
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
 
-	add	ebx, byte SIZEOF_MMWORD			; inptr1
-	add	edx, byte SIZEOF_MMWORD			; inptr2
-	jmp	near .columnloop
-	alignx	16,7
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st16:
-	cmp	ecx, byte SIZEOF_MMWORD/2
-	jb	short .column_st8
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mmD
-	movq	mmA,mmC
-	movq	mmD,mmH
-	sub	ecx, byte SIZEOF_MMWORD/2
-	add	edi, byte 2*SIZEOF_MMWORD
+        cmp     ecx, byte SIZEOF_MMWORD/2
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    mmA,mmC
+        movq    mmD,mmH
+        sub     ecx, byte SIZEOF_MMWORD/2
+        add     edi, byte 2*SIZEOF_MMWORD
 .column_st8:
-	cmp	ecx, byte SIZEOF_MMWORD/4
-	jb	short .column_st4
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mmA
-	movq	mmA,mmD
-	sub	ecx, byte SIZEOF_MMWORD/4
-	add	edi, byte 1*SIZEOF_MMWORD
+        cmp     ecx, byte SIZEOF_MMWORD/4
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmD
+        sub     ecx, byte SIZEOF_MMWORD/4
+        add     edi, byte 1*SIZEOF_MMWORD
 .column_st4:
-	cmp	ecx, byte SIZEOF_MMWORD/8
-	jb	short .endcolumn
-	movd	DWORD [edi+0*SIZEOF_DWORD], mmA
+        cmp     ecx, byte SIZEOF_MMWORD/8
+        jb      short .endcolumn
+        movd    DWORD [edi+0*SIZEOF_DWORD], mmA
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
 .endcolumn:
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -403,62 +403,62 @@
 ;                                 JSAMPARRAY output_buf);
 ;
 
-%define output_width(b)	(b)+8			; JDIMENSION output_width
-%define input_buf(b)		(b)+12		; JSAMPIMAGE input_buf
-%define in_row_group_ctr(b)	(b)+16		; JDIMENSION in_row_group_ctr
-%define output_buf(b)		(b)+20		; JSAMPARRAY output_buf
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
 
-	align	16
-	global	EXTN(jsimd_h2v2_merged_upsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v2_merged_upsample_mmx)
 
 EXTN(jsimd_h2v2_merged_upsample_mmx):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	eax, JDIMENSION [output_width(ebp)]
+        mov     eax, JDIMENSION [output_width(ebp)]
 
-	mov	edi, JSAMPIMAGE [input_buf(ebp)]
-	mov	ecx, JDIMENSION [in_row_group_ctr(ebp)]
-	mov	esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
-	mov	edi, JSAMPARRAY [output_buf(ebp)]
-	lea	esi, [esi+ecx*SIZEOF_JSAMPROW]
+        mov     edi, JSAMPIMAGE [input_buf(ebp)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(ebp)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(ebp)]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
 
-	push	edx			; inptr2
-	push	ebx			; inptr1
-	push	esi			; inptr00
-	mov	ebx,esp
+        push    edx                     ; inptr2
+        push    ebx                     ; inptr1
+        push    esi                     ; inptr00
+        mov     ebx,esp
 
-	push	edi			; output_buf (outptr0)
-	push	ecx			; in_row_group_ctr
-	push	ebx			; input_buf
-	push	eax			; output_width
+        push    edi                     ; output_buf (outptr0)
+        push    ecx                     ; in_row_group_ctr
+        push    ebx                     ; input_buf
+        push    eax                     ; output_width
 
-	call	near EXTN(jsimd_h2v1_merged_upsample_mmx)
+        call    near EXTN(jsimd_h2v1_merged_upsample_mmx)
 
-	add	esi, byte SIZEOF_JSAMPROW	; inptr01
-	add	edi, byte SIZEOF_JSAMPROW	; outptr1
-	mov	POINTER [ebx+0*SIZEOF_POINTER], esi
-	mov	POINTER [ebx-1*SIZEOF_POINTER], edi
+        add     esi, byte SIZEOF_JSAMPROW       ; inptr01
+        add     edi, byte SIZEOF_JSAMPROW       ; outptr1
+        mov     POINTER [ebx+0*SIZEOF_POINTER], esi
+        mov     POINTER [ebx-1*SIZEOF_POINTER], edi
 
-	call	near EXTN(jsimd_h2v1_merged_upsample_mmx)
+        call    near EXTN(jsimd_h2v1_merged_upsample_mmx)
 
-	add	esp, byte 7*SIZEOF_DWORD
+        add     esp, byte 7*SIZEOF_DWORD
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdmrgss2-64.asm b/simd/jdmrgss2-64.asm
index ffbf6b2..d0e1ea7 100644
--- a/simd/jdmrgss2-64.asm
+++ b/simd/jdmrgss2-64.asm
@@ -18,7 +18,7 @@
 ; [TAB8]
 
 %include "jcolsamp.inc"
-				
+
 ; --------------------------------------------------------------------------
 ;
 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
@@ -35,399 +35,399 @@
 ; r12 = JDIMENSION in_row_group_ctr
 ; r13 = JSAMPARRAY output_buf
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		3
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          3
 
-	align	16
-	global	EXTN(jsimd_h2v1_merged_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_merged_upsample_sse2)
 
 EXTN(jsimd_h2v1_merged_upsample_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
 
-	mov	rcx, r10	; col
-	test	rcx,rcx
-	jz	near .return
+        mov     rcx, r10        ; col
+        test    rcx,rcx
+        jz      near .return
 
-	push	rcx
+        push    rcx
 
-	mov	rdi, r11
-	mov	rcx, r12
-	mov	rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
-	mov	rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
-	mov	rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
-	mov	rdi, r13
-	mov	rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW]		; inptr0
-	mov	rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW]		; inptr1
-	mov	rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW]		; inptr2
-	mov	rdi, JSAMPROW [rdi]				; outptr
+        mov     rdi, r11
+        mov     rcx, r12
+        mov     rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+        mov     rdi, r13
+        mov     rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW]         ; inptr0
+        mov     rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW]         ; inptr1
+        mov     rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW]         ; inptr2
+        mov     rdi, JSAMPROW [rdi]                             ; outptr
 
-	pop	rcx			; col
+        pop     rcx                     ; col
 
 .columnloop:
 
-	movdqa    xmm6, XMMWORD [rbx]	; xmm6=Cb(0123456789ABCDEF)
-	movdqa    xmm7, XMMWORD [rdx]	; xmm7=Cr(0123456789ABCDEF)
+        movdqa    xmm6, XMMWORD [rbx]   ; xmm6=Cb(0123456789ABCDEF)
+        movdqa    xmm7, XMMWORD [rdx]   ; xmm7=Cr(0123456789ABCDEF)
 
-	pxor      xmm1,xmm1		; xmm1=(all 0's)
-	pcmpeqw   xmm3,xmm3
-	psllw     xmm3,7		; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        pxor      xmm1,xmm1             ; xmm1=(all 0's)
+        pcmpeqw   xmm3,xmm3
+        psllw     xmm3,7                ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
 
-	movdqa    xmm4,xmm6
-	punpckhbw xmm6,xmm1		; xmm6=Cb(89ABCDEF)=CbH
-	punpcklbw xmm4,xmm1		; xmm4=Cb(01234567)=CbL
-	movdqa    xmm0,xmm7
-	punpckhbw xmm7,xmm1		; xmm7=Cr(89ABCDEF)=CrH
-	punpcklbw xmm0,xmm1		; xmm0=Cr(01234567)=CrL
+        movdqa    xmm4,xmm6
+        punpckhbw xmm6,xmm1             ; xmm6=Cb(89ABCDEF)=CbH
+        punpcklbw xmm4,xmm1             ; xmm4=Cb(01234567)=CbL
+        movdqa    xmm0,xmm7
+        punpckhbw xmm7,xmm1             ; xmm7=Cr(89ABCDEF)=CrH
+        punpcklbw xmm0,xmm1             ; xmm0=Cr(01234567)=CrL
 
-	paddw     xmm6,xmm3
-	paddw     xmm4,xmm3
-	paddw     xmm7,xmm3
-	paddw     xmm0,xmm3
+        paddw     xmm6,xmm3
+        paddw     xmm4,xmm3
+        paddw     xmm7,xmm3
+        paddw     xmm0,xmm3
 
-	; (Original)
-	; R = Y                + 1.40200 * Cr
-	; G = Y - 0.34414 * Cb - 0.71414 * Cr
-	; B = Y + 1.77200 * Cb
-	;
-	; (This implementation)
-	; R = Y                + 0.40200 * Cr + Cr
-	; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
-	; B = Y - 0.22800 * Cb + Cb + Cb
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
 
-	movdqa	xmm5,xmm6		; xmm5=CbH
-	movdqa	xmm2,xmm4		; xmm2=CbL
-	paddw	xmm6,xmm6		; xmm6=2*CbH
-	paddw	xmm4,xmm4		; xmm4=2*CbL
-	movdqa	xmm1,xmm7		; xmm1=CrH
-	movdqa	xmm3,xmm0		; xmm3=CrL
-	paddw	xmm7,xmm7		; xmm7=2*CrH
-	paddw	xmm0,xmm0		; xmm0=2*CrL
+        movdqa  xmm5,xmm6               ; xmm5=CbH
+        movdqa  xmm2,xmm4               ; xmm2=CbL
+        paddw   xmm6,xmm6               ; xmm6=2*CbH
+        paddw   xmm4,xmm4               ; xmm4=2*CbL
+        movdqa  xmm1,xmm7               ; xmm1=CrH
+        movdqa  xmm3,xmm0               ; xmm3=CrL
+        paddw   xmm7,xmm7               ; xmm7=2*CrH
+        paddw   xmm0,xmm0               ; xmm0=2*CrL
 
-	pmulhw	xmm6,[rel PW_MF0228]	; xmm6=(2*CbH * -FIX(0.22800))
-	pmulhw	xmm4,[rel PW_MF0228]	; xmm4=(2*CbL * -FIX(0.22800))
-	pmulhw	xmm7,[rel PW_F0402]	; xmm7=(2*CrH * FIX(0.40200))
-	pmulhw	xmm0,[rel PW_F0402]	; xmm0=(2*CrL * FIX(0.40200))
+        pmulhw  xmm6,[rel PW_MF0228]    ; xmm6=(2*CbH * -FIX(0.22800))
+        pmulhw  xmm4,[rel PW_MF0228]    ; xmm4=(2*CbL * -FIX(0.22800))
+        pmulhw  xmm7,[rel PW_F0402]     ; xmm7=(2*CrH * FIX(0.40200))
+        pmulhw  xmm0,[rel PW_F0402]     ; xmm0=(2*CrL * FIX(0.40200))
 
-	paddw	xmm6,[rel PW_ONE]
-	paddw	xmm4,[rel PW_ONE]
-	psraw	xmm6,1			; xmm6=(CbH * -FIX(0.22800))
-	psraw	xmm4,1			; xmm4=(CbL * -FIX(0.22800))
-	paddw	xmm7,[rel PW_ONE]
-	paddw	xmm0,[rel PW_ONE]
-	psraw	xmm7,1			; xmm7=(CrH * FIX(0.40200))
-	psraw	xmm0,1			; xmm0=(CrL * FIX(0.40200))
+        paddw   xmm6,[rel PW_ONE]
+        paddw   xmm4,[rel PW_ONE]
+        psraw   xmm6,1                  ; xmm6=(CbH * -FIX(0.22800))
+        psraw   xmm4,1                  ; xmm4=(CbL * -FIX(0.22800))
+        paddw   xmm7,[rel PW_ONE]
+        paddw   xmm0,[rel PW_ONE]
+        psraw   xmm7,1                  ; xmm7=(CrH * FIX(0.40200))
+        psraw   xmm0,1                  ; xmm0=(CrL * FIX(0.40200))
 
-	paddw	xmm6,xmm5
-	paddw	xmm4,xmm2
-	paddw	xmm6,xmm5		; xmm6=(CbH * FIX(1.77200))=(B-Y)H
-	paddw	xmm4,xmm2		; xmm4=(CbL * FIX(1.77200))=(B-Y)L
-	paddw	xmm7,xmm1		; xmm7=(CrH * FIX(1.40200))=(R-Y)H
-	paddw	xmm0,xmm3		; xmm0=(CrL * FIX(1.40200))=(R-Y)L
+        paddw   xmm6,xmm5
+        paddw   xmm4,xmm2
+        paddw   xmm6,xmm5               ; xmm6=(CbH * FIX(1.77200))=(B-Y)H
+        paddw   xmm4,xmm2               ; xmm4=(CbL * FIX(1.77200))=(B-Y)L
+        paddw   xmm7,xmm1               ; xmm7=(CrH * FIX(1.40200))=(R-Y)H
+        paddw   xmm0,xmm3               ; xmm0=(CrL * FIX(1.40200))=(R-Y)L
 
-	movdqa	XMMWORD [wk(0)], xmm6	; wk(0)=(B-Y)H
-	movdqa	XMMWORD [wk(1)], xmm7	; wk(1)=(R-Y)H
+        movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=(B-Y)H
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(R-Y)H
 
-	movdqa    xmm6,xmm5
-	movdqa    xmm7,xmm2
-	punpcklwd xmm5,xmm1
-	punpckhwd xmm6,xmm1
-	pmaddwd   xmm5,[rel PW_MF0344_F0285]
-	pmaddwd   xmm6,[rel PW_MF0344_F0285]
-	punpcklwd xmm2,xmm3
-	punpckhwd xmm7,xmm3
-	pmaddwd   xmm2,[rel PW_MF0344_F0285]
-	pmaddwd   xmm7,[rel PW_MF0344_F0285]
+        movdqa    xmm6,xmm5
+        movdqa    xmm7,xmm2
+        punpcklwd xmm5,xmm1
+        punpckhwd xmm6,xmm1
+        pmaddwd   xmm5,[rel PW_MF0344_F0285]
+        pmaddwd   xmm6,[rel PW_MF0344_F0285]
+        punpcklwd xmm2,xmm3
+        punpckhwd xmm7,xmm3
+        pmaddwd   xmm2,[rel PW_MF0344_F0285]
+        pmaddwd   xmm7,[rel PW_MF0344_F0285]
 
-	paddd     xmm5,[rel PD_ONEHALF]
-	paddd     xmm6,[rel PD_ONEHALF]
-	psrad     xmm5,SCALEBITS
-	psrad     xmm6,SCALEBITS
-	paddd     xmm2,[rel PD_ONEHALF]
-	paddd     xmm7,[rel PD_ONEHALF]
-	psrad     xmm2,SCALEBITS
-	psrad     xmm7,SCALEBITS
+        paddd     xmm5,[rel PD_ONEHALF]
+        paddd     xmm6,[rel PD_ONEHALF]
+        psrad     xmm5,SCALEBITS
+        psrad     xmm6,SCALEBITS
+        paddd     xmm2,[rel PD_ONEHALF]
+        paddd     xmm7,[rel PD_ONEHALF]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm7,SCALEBITS
 
-	packssdw  xmm5,xmm6	; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
-	packssdw  xmm2,xmm7	; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
-	psubw     xmm5,xmm1	; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
-	psubw     xmm2,xmm3	; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+        packssdw  xmm5,xmm6     ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+        packssdw  xmm2,xmm7     ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+        psubw     xmm5,xmm1     ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+        psubw     xmm2,xmm3     ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
 
-	movdqa	XMMWORD [wk(2)], xmm5	; wk(2)=(G-Y)H
+        movdqa  XMMWORD [wk(2)], xmm5   ; wk(2)=(G-Y)H
 
-	mov	al,2			; Yctr
-	jmp	short .Yloop_1st
+        mov     al,2                    ; Yctr
+        jmp     short .Yloop_1st
 
 .Yloop_2nd:
-	movdqa	xmm0, XMMWORD [wk(1)]	; xmm0=(R-Y)H
-	movdqa	xmm2, XMMWORD [wk(2)]	; xmm2=(G-Y)H
-	movdqa	xmm4, XMMWORD [wk(0)]	; xmm4=(B-Y)H
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
+        movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
 
 .Yloop_1st:
-	movdqa	xmm7, XMMWORD [rsi]	; xmm7=Y(0123456789ABCDEF)
+        movdqa  xmm7, XMMWORD [rsi]     ; xmm7=Y(0123456789ABCDEF)
 
-	pcmpeqw	xmm6,xmm6
-	psrlw	xmm6,BYTE_BIT		; xmm6={0xFF 0x00 0xFF 0x00 ..}
-	pand	xmm6,xmm7		; xmm6=Y(02468ACE)=YE
-	psrlw	xmm7,BYTE_BIT		; xmm7=Y(13579BDF)=YO
+        pcmpeqw xmm6,xmm6
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        pand    xmm6,xmm7               ; xmm6=Y(02468ACE)=YE
+        psrlw   xmm7,BYTE_BIT           ; xmm7=Y(13579BDF)=YO
 
-	movdqa	xmm1,xmm0		; xmm1=xmm0=(R-Y)(L/H)
-	movdqa	xmm3,xmm2		; xmm3=xmm2=(G-Y)(L/H)
-	movdqa	xmm5,xmm4		; xmm5=xmm4=(B-Y)(L/H)
+        movdqa  xmm1,xmm0               ; xmm1=xmm0=(R-Y)(L/H)
+        movdqa  xmm3,xmm2               ; xmm3=xmm2=(G-Y)(L/H)
+        movdqa  xmm5,xmm4               ; xmm5=xmm4=(B-Y)(L/H)
 
-	paddw     xmm0,xmm6		; xmm0=((R-Y)+YE)=RE=R(02468ACE)
-	paddw     xmm1,xmm7		; xmm1=((R-Y)+YO)=RO=R(13579BDF)
-	packuswb  xmm0,xmm0		; xmm0=R(02468ACE********)
-	packuswb  xmm1,xmm1		; xmm1=R(13579BDF********)
+        paddw     xmm0,xmm6             ; xmm0=((R-Y)+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm7             ; xmm1=((R-Y)+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
 
-	paddw     xmm2,xmm6		; xmm2=((G-Y)+YE)=GE=G(02468ACE)
-	paddw     xmm3,xmm7		; xmm3=((G-Y)+YO)=GO=G(13579BDF)
-	packuswb  xmm2,xmm2		; xmm2=G(02468ACE********)
-	packuswb  xmm3,xmm3		; xmm3=G(13579BDF********)
+        paddw     xmm2,xmm6             ; xmm2=((G-Y)+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm7             ; xmm3=((G-Y)+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
 
-	paddw     xmm4,xmm6		; xmm4=((B-Y)+YE)=BE=B(02468ACE)
-	paddw     xmm5,xmm7		; xmm5=((B-Y)+YO)=BO=B(13579BDF)
-	packuswb  xmm4,xmm4		; xmm4=B(02468ACE********)
-	packuswb  xmm5,xmm5		; xmm5=B(13579BDF********)
+        paddw     xmm4,xmm6             ; xmm4=((B-Y)+YE)=BE=B(02468ACE)
+        paddw     xmm5,xmm7             ; xmm5=((B-Y)+YO)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmB	; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
-	punpcklbw xmmD,xmmF	; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
 
-	movdqa    xmmG,xmmA
-	movdqa    xmmH,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
-	punpckhwd xmmG,xmmE	; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
 
-	psrldq    xmmH,2	; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
-	psrldq    xmmE,2	; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
 
-	movdqa    xmmC,xmmD
-	movdqa    xmmB,xmmD
-	punpcklwd xmmD,xmmH	; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
-	punpckhwd xmmC,xmmH	; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
 
-	psrldq    xmmB,2	; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
 
-	movdqa    xmmF,xmmE
-	punpcklwd xmmE,xmmB	; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
-	punpckhwd xmmF,xmmB	; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
 
-	pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
-	movdqa    xmmB,xmmE
-	punpckldq xmmA,xmmD	; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
-	punpckldq xmmE,xmmH	; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
-	punpckhdq xmmD,xmmB	; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
 
-	pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
-	movdqa    xmmB,xmmF
-	punpckldq xmmG,xmmC	; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
-	punpckldq xmmF,xmmH	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
-	punpckhdq xmmC,xmmB	; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
 
-	punpcklqdq xmmA,xmmE	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	punpcklqdq xmmD,xmmG	; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	punpcklqdq xmmF,xmmC	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	rdi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
 .out0:
-	add	rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	rcx, byte SIZEOF_XMMWORD
-	jz	near .endcolumn
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
 
-	add	rsi, byte SIZEOF_XMMWORD	; inptr0
-	dec	al			; Yctr
-	jnz	near .Yloop_2nd
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
 
-	add	rbx, byte SIZEOF_XMMWORD	; inptr1
-	add	rdx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
 
 .column_st32:
-	lea	rcx, [rcx+rcx*2]		; imul ecx, RGB_PIXELSIZE
-	cmp	rcx, byte 2*SIZEOF_XMMWORD
-	jb	short .column_st16
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	add	rdi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmF
-	sub	rcx, byte 2*SIZEOF_XMMWORD
-	jmp	short .column_st15
+        lea     rcx, [rcx+rcx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     rcx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     rcx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
 .column_st16:
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jb	short .column_st15
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	add	rdi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	rcx, byte SIZEOF_XMMWORD
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD
 .column_st15:
-	; Store the lower 8 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	rcx, byte SIZEOF_MMWORD
-	jb	short .column_st7
-	movq	XMM_MMWORD [rdi], xmmA
-	add	rdi, byte SIZEOF_MMWORD
-	sub	rcx, byte SIZEOF_MMWORD
-	psrldq	xmmA, SIZEOF_MMWORD
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_MMWORD
+        sub     rcx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
 .column_st7:
-	; Store the lower 4 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	rcx, byte SIZEOF_DWORD
-	jb	short .column_st3
-	movd	XMM_DWORD [rdi], xmmA
-	add	rdi, byte SIZEOF_DWORD
-	sub	rcx, byte SIZEOF_DWORD
-	psrldq	xmmA, SIZEOF_DWORD
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_DWORD
+        sub     rcx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
 .column_st3:
-	; Store the lower 2 bytes of rax to the output when it has enough
-	; space.
-	movd	eax, xmmA
-	cmp	rcx, byte SIZEOF_WORD
-	jb	short .column_st1
-	mov	WORD [rdi], ax
-	add	rdi, byte SIZEOF_WORD
-	sub	rcx, byte SIZEOF_WORD
-	shr	rax, 16
+        ; Store the lower 2 bytes of rax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     rcx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [rdi], ax
+        add     rdi, byte SIZEOF_WORD
+        sub     rcx, byte SIZEOF_WORD
+        shr     rax, 16
 .column_st1:
-	; Store the lower 1 byte of rax to the output when it has enough
-	; space.
-	test	rcx, rcx
-	jz	short .endcolumn
-	mov	BYTE [rdi], al
+        ; Store the lower 1 byte of rax to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .endcolumn
+        mov     BYTE [rdi], al
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 %ifdef RGBX_FILLER_0XFF
-	pcmpeqb   xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pcmpeqb   xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %else
-	pxor      xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pxor      xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %endif
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmG	; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
-	punpcklbw xmmB,xmmD	; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
-	punpcklbw xmmF,xmmH	; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
 
-	movdqa    xmmC,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
-	punpckhwd xmmC,xmmE	; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
-	movdqa    xmmG,xmmB
-	punpcklwd xmmB,xmmF	; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
-	punpckhwd xmmG,xmmF	; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpckldq xmmA,xmmB	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	punpckhdq xmmD,xmmB	; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	movdqa    xmmH,xmmC
-	punpckldq xmmC,xmmG	; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	punpckhdq xmmH,xmmG	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	cmp	rcx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	rdi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
-	movntdq	XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
-	movdqu	XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
 .out0:
-	add	rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	rcx, byte SIZEOF_XMMWORD
-	jz	near .endcolumn
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
 
-	add	rsi, byte SIZEOF_XMMWORD	; inptr0
-	dec	al			; Yctr
-	jnz	near .Yloop_2nd
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
 
-	add	rbx, byte SIZEOF_XMMWORD	; inptr1
-	add	rdx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
 
 .column_st32:
-	cmp	rcx, byte SIZEOF_XMMWORD/2
-	jb	short .column_st16
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
-	add	rdi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmC
-	movdqa	xmmD,xmmH
-	sub	rcx, byte SIZEOF_XMMWORD/2
+        cmp     rcx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     rcx, byte SIZEOF_XMMWORD/2
 .column_st16:
-	cmp	rcx, byte SIZEOF_XMMWORD/4
-	jb	short .column_st15
-	movdqu	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
-	add	rdi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	rcx, byte SIZEOF_XMMWORD/4
+        cmp     rcx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD/4
 .column_st15:
-	; Store two pixels (8 bytes) of xmmA to the output when it has enough
-	; space.
-	cmp	rcx, byte SIZEOF_XMMWORD/8
-	jb	short .column_st7
-	movq	XMM_MMWORD [rdi], xmmA
-	add	rdi, byte SIZEOF_XMMWORD/8*4
-	sub	rcx, byte SIZEOF_XMMWORD/8
-	psrldq	xmmA, SIZEOF_XMMWORD/8*4
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    XMM_MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_XMMWORD/8*4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
 .column_st7:
-	; Store one pixel (4 bytes) of xmmA to the output when it has enough
-	; space.
-	test	rcx, rcx
-	jz	short .endcolumn
-	movd	XMM_DWORD [rdi], xmmA
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .endcolumn
+        movd    XMM_DWORD [rdi], xmmA
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
 .endcolumn:
-	sfence		; flush the write buffer
+        sfence          ; flush the write buffer
 
 .return:
-	pop	rbx
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -445,94 +445,94 @@
 ; r12 = JDIMENSION in_row_group_ctr
 ; r13 = JSAMPARRAY output_buf
 
-	align	16
-	global	EXTN(jsimd_h2v2_merged_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_merged_upsample_sse2)
 
 EXTN(jsimd_h2v2_merged_upsample_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
 
-	mov	rax, r10
+        mov     rax, r10
 
-	mov	rdi, r11
-	mov	rcx, r12
-	mov	rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
-	mov	rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
-	mov	rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
-	mov	rdi, r13
-	lea	rsi, [rsi+rcx*SIZEOF_JSAMPROW]
+        mov     rdi, r11
+        mov     rcx, r12
+        mov     rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+        mov     rdi, r13
+        lea     rsi, [rsi+rcx*SIZEOF_JSAMPROW]
 
-	push	rdx			; inptr2
-	push	rbx			; inptr1
-	push	rsi			; inptr00
-	mov	rbx,rsp
+        push    rdx                     ; inptr2
+        push    rbx                     ; inptr1
+        push    rsi                     ; inptr00
+        mov     rbx,rsp
 
-	push	rdi
-	push	rcx
-	push	rax
+        push    rdi
+        push    rcx
+        push    rax
 
-	%ifdef WIN64
-	mov r8, rcx
-	mov r9, rdi
-	mov rcx, rax
-	mov rdx, rbx
-	%else
-	mov rdx, rcx
-	mov rcx, rdi
-	mov	rdi, rax
-	mov rsi, rbx
-	%endif
+        %ifdef WIN64
+        mov r8, rcx
+        mov r9, rdi
+        mov rcx, rax
+        mov rdx, rbx
+        %else
+        mov rdx, rcx
+        mov rcx, rdi
+        mov     rdi, rax
+        mov rsi, rbx
+        %endif
 
-	call	EXTN(jsimd_h2v1_merged_upsample_sse2)
+        call    EXTN(jsimd_h2v1_merged_upsample_sse2)
 
-	pop rax
-	pop rcx
-	pop rdi
-	pop rsi
-	pop rbx
-	pop rdx
+        pop rax
+        pop rcx
+        pop rdi
+        pop rsi
+        pop rbx
+        pop rdx
 
-	add	rdi, byte SIZEOF_JSAMPROW	; outptr1
-	add	rsi, byte SIZEOF_JSAMPROW	; inptr01
+        add     rdi, byte SIZEOF_JSAMPROW       ; outptr1
+        add     rsi, byte SIZEOF_JSAMPROW       ; inptr01
 
-	push	rdx			; inptr2
-	push	rbx			; inptr1
-	push	rsi			; inptr00
-	mov	rbx,rsp
+        push    rdx                     ; inptr2
+        push    rbx                     ; inptr1
+        push    rsi                     ; inptr00
+        mov     rbx,rsp
 
-	push	rdi
-	push	rcx
-	push	rax
+        push    rdi
+        push    rcx
+        push    rax
 
-	%ifdef WIN64
-	mov r8, rcx
-	mov r9, rdi
-	mov rcx, rax
-	mov rdx, rbx
-	%else
-	mov rdx, rcx
-	mov rcx, rdi
-	mov	rdi, rax
-	mov rsi, rbx
-	%endif
+        %ifdef WIN64
+        mov r8, rcx
+        mov r9, rdi
+        mov rcx, rax
+        mov rdx, rbx
+        %else
+        mov rdx, rcx
+        mov rcx, rdi
+        mov     rdi, rax
+        mov rsi, rbx
+        %endif
 
-	call	EXTN(jsimd_h2v1_merged_upsample_sse2)
+        call    EXTN(jsimd_h2v1_merged_upsample_sse2)
 
-	pop rax
-	pop rcx
-	pop rdi
-	pop rsi
-	pop rbx
-	pop rdx
+        pop rax
+        pop rcx
+        pop rdi
+        pop rsi
+        pop rbx
+        pop rdx
 
-	pop	rbx
-	uncollect_args
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdmrgss2.asm b/simd/jdmrgss2.asm
index 6494340..0206f62 100644
--- a/simd/jdmrgss2.asm
+++ b/simd/jdmrgss2.asm
@@ -18,7 +18,7 @@
 ; [TAB8]
 
 %include "jcolsamp.inc"
-				
+
 ; --------------------------------------------------------------------------
 ;
 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
@@ -30,422 +30,422 @@
 ;                                  JSAMPARRAY output_buf);
 ;
 
-%define output_width(b)	(b)+8			; JDIMENSION output_width
-%define input_buf(b)		(b)+12		; JSAMPIMAGE input_buf
-%define in_row_group_ctr(b)	(b)+16		; JDIMENSION in_row_group_ctr
-%define output_buf(b)		(b)+20		; JSAMPARRAY output_buf
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		3
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          3
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_merged_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_merged_upsample_sse2)
 
 EXTN(jsimd_h2v1_merged_upsample_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	ecx, JDIMENSION [output_width(eax)]	; col
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, JDIMENSION [output_width(eax)]     ; col
+        test    ecx,ecx
+        jz      near .return
 
-	push	ecx
+        push    ecx
 
-	mov	edi, JSAMPIMAGE [input_buf(eax)]
-	mov	ecx, JDIMENSION [in_row_group_ctr(eax)]
-	mov	esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
-	mov	edi, JSAMPARRAY [output_buf(eax)]
-	mov	esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]		; inptr0
-	mov	ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]		; inptr1
-	mov	edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]		; inptr2
-	mov	edi, JSAMPROW [edi]				; outptr
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]         ; inptr0
+        mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+        mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+        mov     edi, JSAMPROW [edi]                             ; outptr
 
-	pop	ecx			; col
+        pop     ecx                     ; col
 
-	alignx	16,7
+        alignx  16,7
 .columnloop:
-	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
 
-	movdqa    xmm6, XMMWORD [ebx]	; xmm6=Cb(0123456789ABCDEF)
-	movdqa    xmm7, XMMWORD [edx]	; xmm7=Cr(0123456789ABCDEF)
+        movdqa    xmm6, XMMWORD [ebx]   ; xmm6=Cb(0123456789ABCDEF)
+        movdqa    xmm7, XMMWORD [edx]   ; xmm7=Cr(0123456789ABCDEF)
 
-	pxor      xmm1,xmm1		; xmm1=(all 0's)
-	pcmpeqw   xmm3,xmm3
-	psllw     xmm3,7		; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        pxor      xmm1,xmm1             ; xmm1=(all 0's)
+        pcmpeqw   xmm3,xmm3
+        psllw     xmm3,7                ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
 
-	movdqa    xmm4,xmm6
-	punpckhbw xmm6,xmm1		; xmm6=Cb(89ABCDEF)=CbH
-	punpcklbw xmm4,xmm1		; xmm4=Cb(01234567)=CbL
-	movdqa    xmm0,xmm7
-	punpckhbw xmm7,xmm1		; xmm7=Cr(89ABCDEF)=CrH
-	punpcklbw xmm0,xmm1		; xmm0=Cr(01234567)=CrL
+        movdqa    xmm4,xmm6
+        punpckhbw xmm6,xmm1             ; xmm6=Cb(89ABCDEF)=CbH
+        punpcklbw xmm4,xmm1             ; xmm4=Cb(01234567)=CbL
+        movdqa    xmm0,xmm7
+        punpckhbw xmm7,xmm1             ; xmm7=Cr(89ABCDEF)=CrH
+        punpcklbw xmm0,xmm1             ; xmm0=Cr(01234567)=CrL
 
-	paddw     xmm6,xmm3
-	paddw     xmm4,xmm3
-	paddw     xmm7,xmm3
-	paddw     xmm0,xmm3
+        paddw     xmm6,xmm3
+        paddw     xmm4,xmm3
+        paddw     xmm7,xmm3
+        paddw     xmm0,xmm3
 
-	; (Original)
-	; R = Y                + 1.40200 * Cr
-	; G = Y - 0.34414 * Cb - 0.71414 * Cr
-	; B = Y + 1.77200 * Cb
-	;
-	; (This implementation)
-	; R = Y                + 0.40200 * Cr + Cr
-	; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
-	; B = Y - 0.22800 * Cb + Cb + Cb
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
 
-	movdqa	xmm5,xmm6		; xmm5=CbH
-	movdqa	xmm2,xmm4		; xmm2=CbL
-	paddw	xmm6,xmm6		; xmm6=2*CbH
-	paddw	xmm4,xmm4		; xmm4=2*CbL
-	movdqa	xmm1,xmm7		; xmm1=CrH
-	movdqa	xmm3,xmm0		; xmm3=CrL
-	paddw	xmm7,xmm7		; xmm7=2*CrH
-	paddw	xmm0,xmm0		; xmm0=2*CrL
+        movdqa  xmm5,xmm6               ; xmm5=CbH
+        movdqa  xmm2,xmm4               ; xmm2=CbL
+        paddw   xmm6,xmm6               ; xmm6=2*CbH
+        paddw   xmm4,xmm4               ; xmm4=2*CbL
+        movdqa  xmm1,xmm7               ; xmm1=CrH
+        movdqa  xmm3,xmm0               ; xmm3=CrL
+        paddw   xmm7,xmm7               ; xmm7=2*CrH
+        paddw   xmm0,xmm0               ; xmm0=2*CrL
 
-	pmulhw	xmm6,[GOTOFF(eax,PW_MF0228)]	; xmm6=(2*CbH * -FIX(0.22800))
-	pmulhw	xmm4,[GOTOFF(eax,PW_MF0228)]	; xmm4=(2*CbL * -FIX(0.22800))
-	pmulhw	xmm7,[GOTOFF(eax,PW_F0402)]	; xmm7=(2*CrH * FIX(0.40200))
-	pmulhw	xmm0,[GOTOFF(eax,PW_F0402)]	; xmm0=(2*CrL * FIX(0.40200))
+        pmulhw  xmm6,[GOTOFF(eax,PW_MF0228)]    ; xmm6=(2*CbH * -FIX(0.22800))
+        pmulhw  xmm4,[GOTOFF(eax,PW_MF0228)]    ; xmm4=(2*CbL * -FIX(0.22800))
+        pmulhw  xmm7,[GOTOFF(eax,PW_F0402)]     ; xmm7=(2*CrH * FIX(0.40200))
+        pmulhw  xmm0,[GOTOFF(eax,PW_F0402)]     ; xmm0=(2*CrL * FIX(0.40200))
 
-	paddw	xmm6,[GOTOFF(eax,PW_ONE)]
-	paddw	xmm4,[GOTOFF(eax,PW_ONE)]
-	psraw	xmm6,1			; xmm6=(CbH * -FIX(0.22800))
-	psraw	xmm4,1			; xmm4=(CbL * -FIX(0.22800))
-	paddw	xmm7,[GOTOFF(eax,PW_ONE)]
-	paddw	xmm0,[GOTOFF(eax,PW_ONE)]
-	psraw	xmm7,1			; xmm7=(CrH * FIX(0.40200))
-	psraw	xmm0,1			; xmm0=(CrL * FIX(0.40200))
+        paddw   xmm6,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm4,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm6,1                  ; xmm6=(CbH * -FIX(0.22800))
+        psraw   xmm4,1                  ; xmm4=(CbL * -FIX(0.22800))
+        paddw   xmm7,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm0,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm7,1                  ; xmm7=(CrH * FIX(0.40200))
+        psraw   xmm0,1                  ; xmm0=(CrL * FIX(0.40200))
 
-	paddw	xmm6,xmm5
-	paddw	xmm4,xmm2
-	paddw	xmm6,xmm5		; xmm6=(CbH * FIX(1.77200))=(B-Y)H
-	paddw	xmm4,xmm2		; xmm4=(CbL * FIX(1.77200))=(B-Y)L
-	paddw	xmm7,xmm1		; xmm7=(CrH * FIX(1.40200))=(R-Y)H
-	paddw	xmm0,xmm3		; xmm0=(CrL * FIX(1.40200))=(R-Y)L
+        paddw   xmm6,xmm5
+        paddw   xmm4,xmm2
+        paddw   xmm6,xmm5               ; xmm6=(CbH * FIX(1.77200))=(B-Y)H
+        paddw   xmm4,xmm2               ; xmm4=(CbL * FIX(1.77200))=(B-Y)L
+        paddw   xmm7,xmm1               ; xmm7=(CrH * FIX(1.40200))=(R-Y)H
+        paddw   xmm0,xmm3               ; xmm0=(CrL * FIX(1.40200))=(R-Y)L
 
-	movdqa	XMMWORD [wk(0)], xmm6	; wk(0)=(B-Y)H
-	movdqa	XMMWORD [wk(1)], xmm7	; wk(1)=(R-Y)H
+        movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=(B-Y)H
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(R-Y)H
 
-	movdqa    xmm6,xmm5
-	movdqa    xmm7,xmm2
-	punpcklwd xmm5,xmm1
-	punpckhwd xmm6,xmm1
-	pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   xmm6,[GOTOFF(eax,PW_MF0344_F0285)]
-	punpcklwd xmm2,xmm3
-	punpckhwd xmm7,xmm3
-	pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
-	pmaddwd   xmm7,[GOTOFF(eax,PW_MF0344_F0285)]
+        movdqa    xmm6,xmm5
+        movdqa    xmm7,xmm2
+        punpcklwd xmm5,xmm1
+        punpckhwd xmm6,xmm1
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm6,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd xmm2,xmm3
+        punpckhwd xmm7,xmm3
+        pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm7,[GOTOFF(eax,PW_MF0344_F0285)]
 
-	paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     xmm6,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     xmm5,SCALEBITS
-	psrad     xmm6,SCALEBITS
-	paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
-	paddd     xmm7,[GOTOFF(eax,PD_ONEHALF)]
-	psrad     xmm2,SCALEBITS
-	psrad     xmm7,SCALEBITS
+        paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm6,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm5,SCALEBITS
+        psrad     xmm6,SCALEBITS
+        paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm7,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm7,SCALEBITS
 
-	packssdw  xmm5,xmm6	; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
-	packssdw  xmm2,xmm7	; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
-	psubw     xmm5,xmm1	; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
-	psubw     xmm2,xmm3	; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+        packssdw  xmm5,xmm6     ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+        packssdw  xmm2,xmm7     ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+        psubw     xmm5,xmm1     ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+        psubw     xmm2,xmm3     ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
 
-	movdqa	XMMWORD [wk(2)], xmm5	; wk(2)=(G-Y)H
+        movdqa  XMMWORD [wk(2)], xmm5   ; wk(2)=(G-Y)H
 
-	mov	al,2			; Yctr
-	jmp	short .Yloop_1st
-	alignx	16,7
+        mov     al,2                    ; Yctr
+        jmp     short .Yloop_1st
+        alignx  16,7
 
 .Yloop_2nd:
-	movdqa	xmm0, XMMWORD [wk(1)]	; xmm0=(R-Y)H
-	movdqa	xmm2, XMMWORD [wk(2)]	; xmm2=(G-Y)H
-	movdqa	xmm4, XMMWORD [wk(0)]	; xmm4=(B-Y)H
-	alignx	16,7
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
+        movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
+        alignx  16,7
 
 .Yloop_1st:
-	movdqa	xmm7, XMMWORD [esi]	; xmm7=Y(0123456789ABCDEF)
+        movdqa  xmm7, XMMWORD [esi]     ; xmm7=Y(0123456789ABCDEF)
 
-	pcmpeqw	xmm6,xmm6
-	psrlw	xmm6,BYTE_BIT		; xmm6={0xFF 0x00 0xFF 0x00 ..}
-	pand	xmm6,xmm7		; xmm6=Y(02468ACE)=YE
-	psrlw	xmm7,BYTE_BIT		; xmm7=Y(13579BDF)=YO
+        pcmpeqw xmm6,xmm6
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        pand    xmm6,xmm7               ; xmm6=Y(02468ACE)=YE
+        psrlw   xmm7,BYTE_BIT           ; xmm7=Y(13579BDF)=YO
 
-	movdqa	xmm1,xmm0		; xmm1=xmm0=(R-Y)(L/H)
-	movdqa	xmm3,xmm2		; xmm3=xmm2=(G-Y)(L/H)
-	movdqa	xmm5,xmm4		; xmm5=xmm4=(B-Y)(L/H)
+        movdqa  xmm1,xmm0               ; xmm1=xmm0=(R-Y)(L/H)
+        movdqa  xmm3,xmm2               ; xmm3=xmm2=(G-Y)(L/H)
+        movdqa  xmm5,xmm4               ; xmm5=xmm4=(B-Y)(L/H)
 
-	paddw     xmm0,xmm6		; xmm0=((R-Y)+YE)=RE=R(02468ACE)
-	paddw     xmm1,xmm7		; xmm1=((R-Y)+YO)=RO=R(13579BDF)
-	packuswb  xmm0,xmm0		; xmm0=R(02468ACE********)
-	packuswb  xmm1,xmm1		; xmm1=R(13579BDF********)
+        paddw     xmm0,xmm6             ; xmm0=((R-Y)+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm7             ; xmm1=((R-Y)+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
 
-	paddw     xmm2,xmm6		; xmm2=((G-Y)+YE)=GE=G(02468ACE)
-	paddw     xmm3,xmm7		; xmm3=((G-Y)+YO)=GO=G(13579BDF)
-	packuswb  xmm2,xmm2		; xmm2=G(02468ACE********)
-	packuswb  xmm3,xmm3		; xmm3=G(13579BDF********)
+        paddw     xmm2,xmm6             ; xmm2=((G-Y)+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm7             ; xmm3=((G-Y)+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
 
-	paddw     xmm4,xmm6		; xmm4=((B-Y)+YE)=BE=B(02468ACE)
-	paddw     xmm5,xmm7		; xmm5=((B-Y)+YO)=BO=B(13579BDF)
-	packuswb  xmm4,xmm4		; xmm4=B(02468ACE********)
-	packuswb  xmm5,xmm5		; xmm5=B(13579BDF********)
+        paddw     xmm4,xmm6             ; xmm4=((B-Y)+YE)=BE=B(02468ACE)
+        paddw     xmm5,xmm7             ; xmm5=((B-Y)+YO)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
 
 %if RGB_PIXELSIZE == 3 ; ---------------
 
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmB	; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
-	punpcklbw xmmD,xmmF	; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
 
-	movdqa    xmmG,xmmA
-	movdqa    xmmH,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
-	punpckhwd xmmG,xmmE	; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
 
-	psrldq    xmmH,2	; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
-	psrldq    xmmE,2	; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
 
-	movdqa    xmmC,xmmD
-	movdqa    xmmB,xmmD
-	punpcklwd xmmD,xmmH	; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
-	punpckhwd xmmC,xmmH	; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
 
-	psrldq    xmmB,2	; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
 
-	movdqa    xmmF,xmmE
-	punpcklwd xmmE,xmmB	; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
-	punpckhwd xmmF,xmmB	; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
 
-	pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
-	movdqa    xmmB,xmmE
-	punpckldq xmmA,xmmD	; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
-	punpckldq xmmE,xmmH	; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
-	punpckhdq xmmD,xmmB	; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
 
-	pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
-	movdqa    xmmB,xmmF
-	punpckldq xmmG,xmmC	; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
-	punpckldq xmmF,xmmH	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
-	punpckhdq xmmC,xmmB	; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
 
-	punpcklqdq xmmA,xmmE	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
-	punpcklqdq xmmD,xmmG	; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
-	punpcklqdq xmmF,xmmC	; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	edi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
 .out0:
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	ecx, byte SIZEOF_XMMWORD
-	jz	near .endcolumn
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
 
-	add	esi, byte SIZEOF_XMMWORD	; inptr0
-	dec	al			; Yctr
-	jnz	near .Yloop_2nd
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
 
-	add	ebx, byte SIZEOF_XMMWORD	; inptr1
-	add	edx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
-	alignx	16,7
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st32:
-	lea	ecx, [ecx+ecx*2]		; imul ecx, RGB_PIXELSIZE
-	cmp	ecx, byte 2*SIZEOF_XMMWORD
-	jb	short .column_st16
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	add	edi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmF
-	sub	ecx, byte 2*SIZEOF_XMMWORD
-	jmp	short .column_st15
+        lea     ecx, [ecx+ecx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     ecx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
 .column_st16:
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jb	short .column_st15
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	add	edi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	ecx, byte SIZEOF_XMMWORD
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD
 .column_st15:
-	; Store the lower 8 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	ecx, byte SIZEOF_MMWORD
-	jb	short .column_st7
-	movq	XMM_MMWORD [edi], xmmA
-	add	edi, byte SIZEOF_MMWORD
-	sub	ecx, byte SIZEOF_MMWORD
-	psrldq	xmmA, SIZEOF_MMWORD
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_MMWORD
+        sub     ecx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
 .column_st7:
-	; Store the lower 4 bytes of xmmA to the output when it has enough
-	; space.
-	cmp	ecx, byte SIZEOF_DWORD
-	jb	short .column_st3
-	movd	XMM_DWORD [edi], xmmA
-	add	edi, byte SIZEOF_DWORD
-	sub	ecx, byte SIZEOF_DWORD
-	psrldq	xmmA, SIZEOF_DWORD
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [edi], xmmA
+        add     edi, byte SIZEOF_DWORD
+        sub     ecx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
 .column_st3:
-	; Store the lower 2 bytes of eax to the output when it has enough
-	; space.
-	movd	eax, xmmA
-	cmp	ecx, byte SIZEOF_WORD
-	jb	short .column_st1
-	mov	WORD [edi], ax
-	add	edi, byte SIZEOF_WORD
-	sub	ecx, byte SIZEOF_WORD
-	shr	eax, 16
+        ; Store the lower 2 bytes of eax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi], ax
+        add     edi, byte SIZEOF_WORD
+        sub     ecx, byte SIZEOF_WORD
+        shr     eax, 16
 .column_st1:
-	; Store the lower 1 byte of eax to the output when it has enough
-	; space.
-	test	ecx, ecx
-	jz	short .endcolumn
-	mov	BYTE [edi], al
+        ; Store the lower 1 byte of eax to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .endcolumn
+        mov     BYTE [edi], al
 
 %else ; RGB_PIXELSIZE == 4 ; -----------
 
 %ifdef RGBX_FILLER_0XFF
-	pcmpeqb   xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pcmpeqb   xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %else
-	pxor      xmm6,xmm6		; xmm6=XE=X(02468ACE********)
-	pxor      xmm7,xmm7		; xmm7=XO=X(13579BDF********)
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
 %endif
-	; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
-	; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
-	; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
-	; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
 
-	punpcklbw xmmA,xmmC	; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
-	punpcklbw xmmE,xmmG	; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
-	punpcklbw xmmB,xmmD	; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
-	punpcklbw xmmF,xmmH	; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
 
-	movdqa    xmmC,xmmA
-	punpcklwd xmmA,xmmE	; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
-	punpckhwd xmmC,xmmE	; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
-	movdqa    xmmG,xmmB
-	punpcklwd xmmB,xmmF	; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
-	punpckhwd xmmG,xmmF	; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
 
-	movdqa    xmmD,xmmA
-	punpckldq xmmA,xmmB	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
-	punpckhdq xmmD,xmmB	; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
-	movdqa    xmmH,xmmC
-	punpckldq xmmC,xmmG	; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
-	punpckhdq xmmH,xmmG	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
 
-	cmp	ecx, byte SIZEOF_XMMWORD
-	jb	short .column_st32
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
 
-	test	edi, SIZEOF_XMMWORD-1
-	jnz	short .out1
-	; --(aligned)-------------------
-	movntdq	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movntdq	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movntdq	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
-	movntdq	XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
-	jmp	short .out0
-.out1:	; --(unaligned)-----------------
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	movdqu	XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
-	movdqu	XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
 .out0:
-	add	edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; outptr
-	sub	ecx, byte SIZEOF_XMMWORD
-	jz	near .endcolumn
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
 
-	add	esi, byte SIZEOF_XMMWORD	; inptr0
-	dec	al			; Yctr
-	jnz	near .Yloop_2nd
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
 
-	add	ebx, byte SIZEOF_XMMWORD	; inptr1
-	add	edx, byte SIZEOF_XMMWORD	; inptr2
-	jmp	near .columnloop
-	alignx	16,7
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
 
 .column_st32:
-	cmp	ecx, byte SIZEOF_XMMWORD/2
-	jb	short .column_st16
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	movdqu	XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
-	add	edi, byte 2*SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmC
-	movdqa	xmmD,xmmH
-	sub	ecx, byte SIZEOF_XMMWORD/2
+        cmp     ecx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     ecx, byte SIZEOF_XMMWORD/2
 .column_st16:
-	cmp	ecx, byte SIZEOF_XMMWORD/4
-	jb	short .column_st15
-	movdqu	XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
-	add	edi, byte SIZEOF_XMMWORD	; outptr
-	movdqa	xmmA,xmmD
-	sub	ecx, byte SIZEOF_XMMWORD/4
+        cmp     ecx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD/4
 .column_st15:
-	; Store two pixels (8 bytes) of xmmA to the output when it has enough
-	; space.
-	cmp	ecx, byte SIZEOF_XMMWORD/8
-	jb	short .column_st7
-	movq	XMM_MMWORD [edi], xmmA
-	add	edi, byte SIZEOF_XMMWORD/8*4
-	sub	ecx, byte SIZEOF_XMMWORD/8
-	psrldq	xmmA, SIZEOF_XMMWORD/8*4
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_XMMWORD/8*4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
 .column_st7:
-	; Store one pixel (4 bytes) of xmmA to the output when it has enough
-	; space.
-	test	ecx, ecx
-	jz	short .endcolumn
-	movd	XMM_DWORD [edi], xmmA
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .endcolumn
+        movd    XMM_DWORD [edi], xmmA
 
 %endif ; RGB_PIXELSIZE ; ---------------
 
 .endcolumn:
-	sfence		; flush the write buffer
+        sfence          ; flush the write buffer
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -458,62 +458,62 @@
 ;                                  JSAMPARRAY output_buf);
 ;
 
-%define output_width(b)	(b)+8			; JDIMENSION output_width
-%define input_buf(b)		(b)+12		; JSAMPIMAGE input_buf
-%define in_row_group_ctr(b)	(b)+16		; JDIMENSION in_row_group_ctr
-%define output_buf(b)		(b)+20		; JSAMPARRAY output_buf
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
 
-	align	16
-	global	EXTN(jsimd_h2v2_merged_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_merged_upsample_sse2)
 
 EXTN(jsimd_h2v2_merged_upsample_sse2):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	eax, POINTER [output_width(ebp)]
+        mov     eax, POINTER [output_width(ebp)]
 
-	mov	edi, JSAMPIMAGE [input_buf(ebp)]
-	mov	ecx, JDIMENSION [in_row_group_ctr(ebp)]
-	mov	esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
-	mov	ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
-	mov	edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
-	mov	edi, JSAMPARRAY [output_buf(ebp)]
-	lea	esi, [esi+ecx*SIZEOF_JSAMPROW]
+        mov     edi, JSAMPIMAGE [input_buf(ebp)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(ebp)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(ebp)]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
 
-	push	edx			; inptr2
-	push	ebx			; inptr1
-	push	esi			; inptr00
-	mov	ebx,esp
+        push    edx                     ; inptr2
+        push    ebx                     ; inptr1
+        push    esi                     ; inptr00
+        mov     ebx,esp
 
-	push	edi			; output_buf (outptr0)
-	push	ecx			; in_row_group_ctr
-	push	ebx			; input_buf
-	push	eax			; output_width
+        push    edi                     ; output_buf (outptr0)
+        push    ecx                     ; in_row_group_ctr
+        push    ebx                     ; input_buf
+        push    eax                     ; output_width
 
-	call	near EXTN(jsimd_h2v1_merged_upsample_sse2)
+        call    near EXTN(jsimd_h2v1_merged_upsample_sse2)
 
-	add	esi, byte SIZEOF_JSAMPROW	; inptr01
-	add	edi, byte SIZEOF_JSAMPROW	; outptr1
-	mov	POINTER [ebx+0*SIZEOF_POINTER], esi
-	mov	POINTER [ebx-1*SIZEOF_POINTER], edi
+        add     esi, byte SIZEOF_JSAMPROW       ; inptr01
+        add     edi, byte SIZEOF_JSAMPROW       ; outptr1
+        mov     POINTER [ebx+0*SIZEOF_POINTER], esi
+        mov     POINTER [ebx-1*SIZEOF_POINTER], edi
 
-	call	near EXTN(jsimd_h2v1_merged_upsample_sse2)
+        call    near EXTN(jsimd_h2v1_merged_upsample_sse2)
 
-	add	esp, byte 7*SIZEOF_DWORD
+        add     esp, byte 7*SIZEOF_DWORD
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdsammmx.asm b/simd/jdsammmx.asm
index c09e5b9..823fe19 100644
--- a/simd/jdsammmx.asm
+++ b/simd/jdsammmx.asm
@@ -19,24 +19,24 @@
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fancy_upsample_mmx)
+        alignz  16
+        global  EXTN(jconst_fancy_upsample_mmx)
 
 EXTN(jconst_fancy_upsample_mmx):
 
-PW_ONE		times 4 dw  1
-PW_TWO		times 4 dw  2
-PW_THREE	times 4 dw  3
-PW_SEVEN	times 4 dw  7
-PW_EIGHT	times 4 dw  8
+PW_ONE          times 4 dw  1
+PW_TWO          times 4 dw  2
+PW_THREE        times 4 dw  3
+PW_SEVEN        times 4 dw  7
+PW_EIGHT        times 4 dw  8
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
 ;
@@ -52,146 +52,146 @@
 ;                                JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define downsamp_width(b)	(b)+12	; JDIMENSION downsampled_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_fancy_upsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v1_fancy_upsample_mmx)
 
 EXTN(jsimd_h2v1_fancy_upsample_mmx):
-	push	ebp
-	mov	ebp,esp
-	pushpic	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	mov	eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
-	test	eax,eax
-	jz	near .return
+        mov     eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
+        test    eax,eax
+        jz      near .return
 
-	mov	ecx, INT [max_v_samp(ebp)]	; rowctr
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, POINTER [output_data_ptr(ebp)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	eax			; colctr
-	push	edi
-	push	esi
+        push    eax                     ; colctr
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]	; inptr
-	mov	edi, JSAMPROW [edi]	; outptr
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr
 
-	test	eax, SIZEOF_MMWORD-1
-	jz	short .skip
-	mov	dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl	; insert a dummy sample
+        test    eax, SIZEOF_MMWORD-1
+        jz      short .skip
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
 .skip:
-	pxor	mm0,mm0			; mm0=(all 0's)
-	pcmpeqb	mm7,mm7
-	psrlq	mm7,(SIZEOF_MMWORD-1)*BYTE_BIT
-	pand	mm7, MMWORD [esi+0*SIZEOF_MMWORD]
+        pxor    mm0,mm0                 ; mm0=(all 0's)
+        pcmpeqb mm7,mm7
+        psrlq   mm7,(SIZEOF_MMWORD-1)*BYTE_BIT
+        pand    mm7, MMWORD [esi+0*SIZEOF_MMWORD]
 
-	add	eax, byte SIZEOF_MMWORD-1
-	and	eax, byte -SIZEOF_MMWORD
-	cmp	eax, byte SIZEOF_MMWORD
-	ja	short .columnloop
-	alignx	16,7
+        add     eax, byte SIZEOF_MMWORD-1
+        and     eax, byte -SIZEOF_MMWORD
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      short .columnloop
+        alignx  16,7
 
 .columnloop_last:
-	pcmpeqb	mm6,mm6
-	psllq	mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
-	pand	mm6, MMWORD [esi+0*SIZEOF_MMWORD]
-	jmp	short .upsample
-	alignx	16,7
+        pcmpeqb mm6,mm6
+        psllq   mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+        pand    mm6, MMWORD [esi+0*SIZEOF_MMWORD]
+        jmp     short .upsample
+        alignx  16,7
 
 .columnloop:
-	movq	mm6, MMWORD [esi+1*SIZEOF_MMWORD]
-	psllq	mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+        movq    mm6, MMWORD [esi+1*SIZEOF_MMWORD]
+        psllq   mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
 
 .upsample:
-	movq	mm1, MMWORD [esi+0*SIZEOF_MMWORD]
-	movq	mm2,mm1
-	movq	mm3,mm1			; mm1=( 0 1 2 3 4 5 6 7)
-	psllq	mm2,BYTE_BIT		; mm2=( - 0 1 2 3 4 5 6)
-	psrlq	mm3,BYTE_BIT		; mm3=( 1 2 3 4 5 6 7 -)
+        movq    mm1, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm2,mm1
+        movq    mm3,mm1                 ; mm1=( 0 1 2 3 4 5 6 7)
+        psllq   mm2,BYTE_BIT            ; mm2=( - 0 1 2 3 4 5 6)
+        psrlq   mm3,BYTE_BIT            ; mm3=( 1 2 3 4 5 6 7 -)
 
-	por	mm2,mm7			; mm2=(-1 0 1 2 3 4 5 6)
-	por	mm3,mm6			; mm3=( 1 2 3 4 5 6 7 8)
+        por     mm2,mm7                 ; mm2=(-1 0 1 2 3 4 5 6)
+        por     mm3,mm6                 ; mm3=( 1 2 3 4 5 6 7 8)
 
-	movq	mm7,mm1
-	psrlq	mm7,(SIZEOF_MMWORD-1)*BYTE_BIT	; mm7=( 7 - - - - - - -)
+        movq    mm7,mm1
+        psrlq   mm7,(SIZEOF_MMWORD-1)*BYTE_BIT  ; mm7=( 7 - - - - - - -)
 
-	movq      mm4,mm1
-	punpcklbw mm1,mm0		; mm1=( 0 1 2 3)
-	punpckhbw mm4,mm0		; mm4=( 4 5 6 7)
-	movq      mm5,mm2
-	punpcklbw mm2,mm0		; mm2=(-1 0 1 2)
-	punpckhbw mm5,mm0		; mm5=( 3 4 5 6)
-	movq      mm6,mm3
-	punpcklbw mm3,mm0		; mm3=( 1 2 3 4)
-	punpckhbw mm6,mm0		; mm6=( 5 6 7 8)
+        movq      mm4,mm1
+        punpcklbw mm1,mm0               ; mm1=( 0 1 2 3)
+        punpckhbw mm4,mm0               ; mm4=( 4 5 6 7)
+        movq      mm5,mm2
+        punpcklbw mm2,mm0               ; mm2=(-1 0 1 2)
+        punpckhbw mm5,mm0               ; mm5=( 3 4 5 6)
+        movq      mm6,mm3
+        punpcklbw mm3,mm0               ; mm3=( 1 2 3 4)
+        punpckhbw mm6,mm0               ; mm6=( 5 6 7 8)
 
-	pmullw	mm1,[GOTOFF(ebx,PW_THREE)]
-	pmullw	mm4,[GOTOFF(ebx,PW_THREE)]
-	paddw	mm2,[GOTOFF(ebx,PW_ONE)]
-	paddw	mm5,[GOTOFF(ebx,PW_ONE)]
-	paddw	mm3,[GOTOFF(ebx,PW_TWO)]
-	paddw	mm6,[GOTOFF(ebx,PW_TWO)]
+        pmullw  mm1,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   mm2,[GOTOFF(ebx,PW_ONE)]
+        paddw   mm5,[GOTOFF(ebx,PW_ONE)]
+        paddw   mm3,[GOTOFF(ebx,PW_TWO)]
+        paddw   mm6,[GOTOFF(ebx,PW_TWO)]
 
-	paddw	mm2,mm1
-	paddw	mm5,mm4
-	psrlw	mm2,2			; mm2=OutLE=( 0  2  4  6)
-	psrlw	mm5,2			; mm5=OutHE=( 8 10 12 14)
-	paddw	mm3,mm1
-	paddw	mm6,mm4
-	psrlw	mm3,2			; mm3=OutLO=( 1  3  5  7)
-	psrlw	mm6,2			; mm6=OutHO=( 9 11 13 15)
+        paddw   mm2,mm1
+        paddw   mm5,mm4
+        psrlw   mm2,2                   ; mm2=OutLE=( 0  2  4  6)
+        psrlw   mm5,2                   ; mm5=OutHE=( 8 10 12 14)
+        paddw   mm3,mm1
+        paddw   mm6,mm4
+        psrlw   mm3,2                   ; mm3=OutLO=( 1  3  5  7)
+        psrlw   mm6,2                   ; mm6=OutHO=( 9 11 13 15)
 
-	psllw	mm3,BYTE_BIT
-	psllw	mm6,BYTE_BIT
-	por	mm2,mm3			; mm2=OutL=( 0  1  2  3  4  5  6  7)
-	por	mm5,mm6			; mm5=OutH=( 8  9 10 11 12 13 14 15)
+        psllw   mm3,BYTE_BIT
+        psllw   mm6,BYTE_BIT
+        por     mm2,mm3                 ; mm2=OutL=( 0  1  2  3  4  5  6  7)
+        por     mm5,mm6                 ; mm5=OutH=( 8  9 10 11 12 13 14 15)
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mm2
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mm5
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm5
 
-	sub	eax, byte SIZEOF_MMWORD
-	add	esi, byte 1*SIZEOF_MMWORD	; inptr
-	add	edi, byte 2*SIZEOF_MMWORD	; outptr
-	cmp	eax, byte SIZEOF_MMWORD
-	ja	near .columnloop
-	test	eax,eax
-	jnz	near .columnloop_last
+        sub     eax, byte SIZEOF_MMWORD
+        add     esi, byte 1*SIZEOF_MMWORD       ; inptr
+        add     edi, byte 2*SIZEOF_MMWORD       ; outptr
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
 
-	pop	esi
-	pop	edi
-	pop	eax
+        pop     esi
+        pop     edi
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_data
-	add	edi, byte SIZEOF_JSAMPROW	; output_data
-	dec	ecx				; rowctr
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	poppic	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -205,324 +205,324 @@
 ;                                JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define downsamp_width(b)	(b)+12	; JDIMENSION downsampled_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		4
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          4
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_h2v2_fancy_upsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v2_fancy_upsample_mmx)
 
 EXTN(jsimd_h2v2_fancy_upsample_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	edx,eax				; edx = original ebp
-	mov	eax, JDIMENSION [downsamp_width(edx)]  ; colctr
-	test	eax,eax
-	jz	near .return
+        mov     edx,eax                         ; edx = original ebp
+        mov     eax, JDIMENSION [downsamp_width(edx)]  ; colctr
+        test    eax,eax
+        jz      near .return
 
-	mov	ecx, INT [max_v_samp(edx)]	; rowctr
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, INT [max_v_samp(edx)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
 
-	mov	esi, JSAMPARRAY [input_data(edx)]	; input_data
-	mov	edi, POINTER [output_data_ptr(edx)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(edx)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(edx)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	eax					; colctr
-	push	ecx
-	push	edi
-	push	esi
+        push    eax                                     ; colctr
+        push    ecx
+        push    edi
+        push    esi
 
-	mov	ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]	; inptr1(above)
-	mov	ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; inptr0
-	mov	esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; inptr1(below)
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]	; outptr0
-	mov	edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]	; outptr1
+        mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
 
-	test	eax, SIZEOF_MMWORD-1
-	jz	short .skip
-	push	edx
-	mov	dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
-	mov	dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
-	mov	dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl	; insert a dummy sample
-	pop	edx
+        test    eax, SIZEOF_MMWORD-1
+        jz      short .skip
+        push    edx
+        mov     dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+        pop     edx
 .skip:
-	; -- process the first column block
+        ; -- process the first column block
 
-	movq	mm0, MMWORD [ebx+0*SIZEOF_MMWORD]	; mm0=row[ 0][0]
-	movq	mm1, MMWORD [ecx+0*SIZEOF_MMWORD]	; mm1=row[-1][0]
-	movq	mm2, MMWORD [esi+0*SIZEOF_MMWORD]	; mm2=row[+1][0]
+        movq    mm0, MMWORD [ebx+0*SIZEOF_MMWORD]       ; mm0=row[ 0][0]
+        movq    mm1, MMWORD [ecx+0*SIZEOF_MMWORD]       ; mm1=row[-1][0]
+        movq    mm2, MMWORD [esi+0*SIZEOF_MMWORD]       ; mm2=row[+1][0]
 
-	pushpic	ebx
-	movpic	ebx, POINTER [gotptr]	; load GOT address
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
 
-	pxor      mm3,mm3		; mm3=(all 0's)
-	movq      mm4,mm0
-	punpcklbw mm0,mm3		; mm0=row[ 0][0]( 0 1 2 3)
-	punpckhbw mm4,mm3		; mm4=row[ 0][0]( 4 5 6 7)
-	movq      mm5,mm1
-	punpcklbw mm1,mm3		; mm1=row[-1][0]( 0 1 2 3)
-	punpckhbw mm5,mm3		; mm5=row[-1][0]( 4 5 6 7)
-	movq      mm6,mm2
-	punpcklbw mm2,mm3		; mm2=row[+1][0]( 0 1 2 3)
-	punpckhbw mm6,mm3		; mm6=row[+1][0]( 4 5 6 7)
+        pxor      mm3,mm3               ; mm3=(all 0's)
+        movq      mm4,mm0
+        punpcklbw mm0,mm3               ; mm0=row[ 0][0]( 0 1 2 3)
+        punpckhbw mm4,mm3               ; mm4=row[ 0][0]( 4 5 6 7)
+        movq      mm5,mm1
+        punpcklbw mm1,mm3               ; mm1=row[-1][0]( 0 1 2 3)
+        punpckhbw mm5,mm3               ; mm5=row[-1][0]( 4 5 6 7)
+        movq      mm6,mm2
+        punpcklbw mm2,mm3               ; mm2=row[+1][0]( 0 1 2 3)
+        punpckhbw mm6,mm3               ; mm6=row[+1][0]( 4 5 6 7)
 
-	pmullw	mm0,[GOTOFF(ebx,PW_THREE)]
-	pmullw	mm4,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
 
-	pcmpeqb	mm7,mm7
-	psrlq	mm7,(SIZEOF_MMWORD-2)*BYTE_BIT
+        pcmpeqb mm7,mm7
+        psrlq   mm7,(SIZEOF_MMWORD-2)*BYTE_BIT
 
-	paddw	mm1,mm0			; mm1=Int0L=( 0 1 2 3)
-	paddw	mm5,mm4			; mm5=Int0H=( 4 5 6 7)
-	paddw	mm2,mm0			; mm2=Int1L=( 0 1 2 3)
-	paddw	mm6,mm4			; mm6=Int1H=( 4 5 6 7)
+        paddw   mm1,mm0                 ; mm1=Int0L=( 0 1 2 3)
+        paddw   mm5,mm4                 ; mm5=Int0H=( 4 5 6 7)
+        paddw   mm2,mm0                 ; mm2=Int1L=( 0 1 2 3)
+        paddw   mm6,mm4                 ; mm6=Int1H=( 4 5 6 7)
 
-	movq	MMWORD [edx+0*SIZEOF_MMWORD], mm1	; temporarily save
-	movq	MMWORD [edx+1*SIZEOF_MMWORD], mm5	; the intermediate data
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mm2
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mm6
+        movq    MMWORD [edx+0*SIZEOF_MMWORD], mm1       ; temporarily save
+        movq    MMWORD [edx+1*SIZEOF_MMWORD], mm5       ; the intermediate data
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm6
 
-	pand	mm1,mm7			; mm1=( 0 - - -)
-	pand	mm2,mm7			; mm2=( 0 - - -)
+        pand    mm1,mm7                 ; mm1=( 0 - - -)
+        pand    mm2,mm7                 ; mm2=( 0 - - -)
 
-	movq	MMWORD [wk(0)], mm1
-	movq	MMWORD [wk(1)], mm2
+        movq    MMWORD [wk(0)], mm1
+        movq    MMWORD [wk(1)], mm2
 
-	poppic	ebx
+        poppic  ebx
 
-	add	eax, byte SIZEOF_MMWORD-1
-	and	eax, byte -SIZEOF_MMWORD
-	cmp	eax, byte SIZEOF_MMWORD
-	ja	short .columnloop
-	alignx	16,7
+        add     eax, byte SIZEOF_MMWORD-1
+        and     eax, byte -SIZEOF_MMWORD
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      short .columnloop
+        alignx  16,7
 
 .columnloop_last:
-	; -- process the last column block
+        ; -- process the last column block
 
-	pushpic	ebx
-	movpic	ebx, POINTER [gotptr]	; load GOT address
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
 
-	pcmpeqb	mm1,mm1
-	psllq	mm1,(SIZEOF_MMWORD-2)*BYTE_BIT
-	movq	mm2,mm1
+        pcmpeqb mm1,mm1
+        psllq   mm1,(SIZEOF_MMWORD-2)*BYTE_BIT
+        movq    mm2,mm1
 
-	pand	mm1, MMWORD [edx+1*SIZEOF_MMWORD]	; mm1=( - - - 7)
-	pand	mm2, MMWORD [edi+1*SIZEOF_MMWORD]	; mm2=( - - - 7)
+        pand    mm1, MMWORD [edx+1*SIZEOF_MMWORD]       ; mm1=( - - - 7)
+        pand    mm2, MMWORD [edi+1*SIZEOF_MMWORD]       ; mm2=( - - - 7)
 
-	movq	MMWORD [wk(2)], mm1
-	movq	MMWORD [wk(3)], mm2
+        movq    MMWORD [wk(2)], mm1
+        movq    MMWORD [wk(3)], mm2
 
-	jmp	short .upsample
-	alignx	16,7
+        jmp     short .upsample
+        alignx  16,7
 
 .columnloop:
-	; -- process the next column block
+        ; -- process the next column block
 
-	movq	mm0, MMWORD [ebx+1*SIZEOF_MMWORD]	; mm0=row[ 0][1]
-	movq	mm1, MMWORD [ecx+1*SIZEOF_MMWORD]	; mm1=row[-1][1]
-	movq	mm2, MMWORD [esi+1*SIZEOF_MMWORD]	; mm2=row[+1][1]
+        movq    mm0, MMWORD [ebx+1*SIZEOF_MMWORD]       ; mm0=row[ 0][1]
+        movq    mm1, MMWORD [ecx+1*SIZEOF_MMWORD]       ; mm1=row[-1][1]
+        movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]       ; mm2=row[+1][1]
 
-	pushpic	ebx
-	movpic	ebx, POINTER [gotptr]	; load GOT address
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
 
-	pxor      mm3,mm3		; mm3=(all 0's)
-	movq      mm4,mm0
-	punpcklbw mm0,mm3		; mm0=row[ 0][1]( 0 1 2 3)
-	punpckhbw mm4,mm3		; mm4=row[ 0][1]( 4 5 6 7)
-	movq      mm5,mm1
-	punpcklbw mm1,mm3		; mm1=row[-1][1]( 0 1 2 3)
-	punpckhbw mm5,mm3		; mm5=row[-1][1]( 4 5 6 7)
-	movq      mm6,mm2
-	punpcklbw mm2,mm3		; mm2=row[+1][1]( 0 1 2 3)
-	punpckhbw mm6,mm3		; mm6=row[+1][1]( 4 5 6 7)
+        pxor      mm3,mm3               ; mm3=(all 0's)
+        movq      mm4,mm0
+        punpcklbw mm0,mm3               ; mm0=row[ 0][1]( 0 1 2 3)
+        punpckhbw mm4,mm3               ; mm4=row[ 0][1]( 4 5 6 7)
+        movq      mm5,mm1
+        punpcklbw mm1,mm3               ; mm1=row[-1][1]( 0 1 2 3)
+        punpckhbw mm5,mm3               ; mm5=row[-1][1]( 4 5 6 7)
+        movq      mm6,mm2
+        punpcklbw mm2,mm3               ; mm2=row[+1][1]( 0 1 2 3)
+        punpckhbw mm6,mm3               ; mm6=row[+1][1]( 4 5 6 7)
 
-	pmullw	mm0,[GOTOFF(ebx,PW_THREE)]
-	pmullw	mm4,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
 
-	paddw	mm1,mm0			; mm1=Int0L=( 0 1 2 3)
-	paddw	mm5,mm4			; mm5=Int0H=( 4 5 6 7)
-	paddw	mm2,mm0			; mm2=Int1L=( 0 1 2 3)
-	paddw	mm6,mm4			; mm6=Int1H=( 4 5 6 7)
+        paddw   mm1,mm0                 ; mm1=Int0L=( 0 1 2 3)
+        paddw   mm5,mm4                 ; mm5=Int0H=( 4 5 6 7)
+        paddw   mm2,mm0                 ; mm2=Int1L=( 0 1 2 3)
+        paddw   mm6,mm4                 ; mm6=Int1H=( 4 5 6 7)
 
-	movq	MMWORD [edx+2*SIZEOF_MMWORD], mm1	; temporarily save
-	movq	MMWORD [edx+3*SIZEOF_MMWORD], mm5	; the intermediate data
-	movq	MMWORD [edi+2*SIZEOF_MMWORD], mm2
-	movq	MMWORD [edi+3*SIZEOF_MMWORD], mm6
+        movq    MMWORD [edx+2*SIZEOF_MMWORD], mm1       ; temporarily save
+        movq    MMWORD [edx+3*SIZEOF_MMWORD], mm5       ; the intermediate data
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mm6
 
-	psllq	mm1,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm1=( - - - 0)
-	psllq	mm2,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm2=( - - - 0)
+        psllq   mm1,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm1=( - - - 0)
+        psllq   mm2,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm2=( - - - 0)
 
-	movq	MMWORD [wk(2)], mm1
-	movq	MMWORD [wk(3)], mm2
+        movq    MMWORD [wk(2)], mm1
+        movq    MMWORD [wk(3)], mm2
 
 .upsample:
-	; -- process the upper row
+        ; -- process the upper row
 
-	movq	mm7, MMWORD [edx+0*SIZEOF_MMWORD]	; mm7=Int0L=( 0 1 2 3)
-	movq	mm3, MMWORD [edx+1*SIZEOF_MMWORD]	; mm3=Int0H=( 4 5 6 7)
+        movq    mm7, MMWORD [edx+0*SIZEOF_MMWORD]       ; mm7=Int0L=( 0 1 2 3)
+        movq    mm3, MMWORD [edx+1*SIZEOF_MMWORD]       ; mm3=Int0H=( 4 5 6 7)
 
-	movq	mm0,mm7
-	movq	mm4,mm3
-	psrlq	mm0,2*BYTE_BIT			; mm0=( 1 2 3 -)
-	psllq	mm4,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm4=( - - - 4)
-	movq	mm5,mm7
-	movq	mm6,mm3
-	psrlq	mm5,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm5=( 3 - - -)
-	psllq	mm6,2*BYTE_BIT			; mm6=( - 4 5 6)
+        movq    mm0,mm7
+        movq    mm4,mm3
+        psrlq   mm0,2*BYTE_BIT                  ; mm0=( 1 2 3 -)
+        psllq   mm4,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm4=( - - - 4)
+        movq    mm5,mm7
+        movq    mm6,mm3
+        psrlq   mm5,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm5=( 3 - - -)
+        psllq   mm6,2*BYTE_BIT                  ; mm6=( - 4 5 6)
 
-	por	mm0,mm4				; mm0=( 1 2 3 4)
-	por	mm5,mm6				; mm5=( 3 4 5 6)
+        por     mm0,mm4                         ; mm0=( 1 2 3 4)
+        por     mm5,mm6                         ; mm5=( 3 4 5 6)
 
-	movq	mm1,mm7
-	movq	mm2,mm3
-	psllq	mm1,2*BYTE_BIT			; mm1=( - 0 1 2)
-	psrlq	mm2,2*BYTE_BIT			; mm2=( 5 6 7 -)
-	movq	mm4,mm3
-	psrlq	mm4,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm4=( 7 - - -)
+        movq    mm1,mm7
+        movq    mm2,mm3
+        psllq   mm1,2*BYTE_BIT                  ; mm1=( - 0 1 2)
+        psrlq   mm2,2*BYTE_BIT                  ; mm2=( 5 6 7 -)
+        movq    mm4,mm3
+        psrlq   mm4,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm4=( 7 - - -)
 
-	por	mm1, MMWORD [wk(0)]		; mm1=(-1 0 1 2)
-	por	mm2, MMWORD [wk(2)]		; mm2=( 5 6 7 8)
+        por     mm1, MMWORD [wk(0)]             ; mm1=(-1 0 1 2)
+        por     mm2, MMWORD [wk(2)]             ; mm2=( 5 6 7 8)
 
-	movq	MMWORD [wk(0)], mm4
+        movq    MMWORD [wk(0)], mm4
 
-	pmullw	mm7,[GOTOFF(ebx,PW_THREE)]
-	pmullw	mm3,[GOTOFF(ebx,PW_THREE)]
-	paddw	mm1,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	mm5,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	mm0,[GOTOFF(ebx,PW_SEVEN)]
-	paddw	mm2,[GOTOFF(ebx,PW_SEVEN)]
+        pmullw  mm7,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm3,[GOTOFF(ebx,PW_THREE)]
+        paddw   mm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm5,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm0,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   mm2,[GOTOFF(ebx,PW_SEVEN)]
 
-	paddw	mm1,mm7
-	paddw	mm5,mm3
-	psrlw	mm1,4			; mm1=Out0LE=( 0  2  4  6)
-	psrlw	mm5,4			; mm5=Out0HE=( 8 10 12 14)
-	paddw	mm0,mm7
-	paddw	mm2,mm3
-	psrlw	mm0,4			; mm0=Out0LO=( 1  3  5  7)
-	psrlw	mm2,4			; mm2=Out0HO=( 9 11 13 15)
+        paddw   mm1,mm7
+        paddw   mm5,mm3
+        psrlw   mm1,4                   ; mm1=Out0LE=( 0  2  4  6)
+        psrlw   mm5,4                   ; mm5=Out0HE=( 8 10 12 14)
+        paddw   mm0,mm7
+        paddw   mm2,mm3
+        psrlw   mm0,4                   ; mm0=Out0LO=( 1  3  5  7)
+        psrlw   mm2,4                   ; mm2=Out0HO=( 9 11 13 15)
 
-	psllw	mm0,BYTE_BIT
-	psllw	mm2,BYTE_BIT
-	por	mm1,mm0			; mm1=Out0L=( 0  1  2  3  4  5  6  7)
-	por	mm5,mm2			; mm5=Out0H=( 8  9 10 11 12 13 14 15)
+        psllw   mm0,BYTE_BIT
+        psllw   mm2,BYTE_BIT
+        por     mm1,mm0                 ; mm1=Out0L=( 0  1  2  3  4  5  6  7)
+        por     mm5,mm2                 ; mm5=Out0H=( 8  9 10 11 12 13 14 15)
 
-	movq	MMWORD [edx+0*SIZEOF_MMWORD], mm1
-	movq	MMWORD [edx+1*SIZEOF_MMWORD], mm5
+        movq    MMWORD [edx+0*SIZEOF_MMWORD], mm1
+        movq    MMWORD [edx+1*SIZEOF_MMWORD], mm5
 
-	; -- process the lower row
+        ; -- process the lower row
 
-	movq	mm6, MMWORD [edi+0*SIZEOF_MMWORD]	; mm6=Int1L=( 0 1 2 3)
-	movq	mm4, MMWORD [edi+1*SIZEOF_MMWORD]	; mm4=Int1H=( 4 5 6 7)
+        movq    mm6, MMWORD [edi+0*SIZEOF_MMWORD]       ; mm6=Int1L=( 0 1 2 3)
+        movq    mm4, MMWORD [edi+1*SIZEOF_MMWORD]       ; mm4=Int1H=( 4 5 6 7)
 
-	movq	mm7,mm6
-	movq	mm3,mm4
-	psrlq	mm7,2*BYTE_BIT			; mm7=( 1 2 3 -)
-	psllq	mm3,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm3=( - - - 4)
-	movq	mm0,mm6
-	movq	mm2,mm4
-	psrlq	mm0,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm0=( 3 - - -)
-	psllq	mm2,2*BYTE_BIT			; mm2=( - 4 5 6)
+        movq    mm7,mm6
+        movq    mm3,mm4
+        psrlq   mm7,2*BYTE_BIT                  ; mm7=( 1 2 3 -)
+        psllq   mm3,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm3=( - - - 4)
+        movq    mm0,mm6
+        movq    mm2,mm4
+        psrlq   mm0,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm0=( 3 - - -)
+        psllq   mm2,2*BYTE_BIT                  ; mm2=( - 4 5 6)
 
-	por	mm7,mm3				; mm7=( 1 2 3 4)
-	por	mm0,mm2				; mm0=( 3 4 5 6)
+        por     mm7,mm3                         ; mm7=( 1 2 3 4)
+        por     mm0,mm2                         ; mm0=( 3 4 5 6)
 
-	movq	mm1,mm6
-	movq	mm5,mm4
-	psllq	mm1,2*BYTE_BIT			; mm1=( - 0 1 2)
-	psrlq	mm5,2*BYTE_BIT			; mm5=( 5 6 7 -)
-	movq	mm3,mm4
-	psrlq	mm3,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm3=( 7 - - -)
+        movq    mm1,mm6
+        movq    mm5,mm4
+        psllq   mm1,2*BYTE_BIT                  ; mm1=( - 0 1 2)
+        psrlq   mm5,2*BYTE_BIT                  ; mm5=( 5 6 7 -)
+        movq    mm3,mm4
+        psrlq   mm3,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm3=( 7 - - -)
 
-	por	mm1, MMWORD [wk(1)]		; mm1=(-1 0 1 2)
-	por	mm5, MMWORD [wk(3)]		; mm5=( 5 6 7 8)
+        por     mm1, MMWORD [wk(1)]             ; mm1=(-1 0 1 2)
+        por     mm5, MMWORD [wk(3)]             ; mm5=( 5 6 7 8)
 
-	movq	MMWORD [wk(1)], mm3
+        movq    MMWORD [wk(1)], mm3
 
-	pmullw	mm6,[GOTOFF(ebx,PW_THREE)]
-	pmullw	mm4,[GOTOFF(ebx,PW_THREE)]
-	paddw	mm1,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	mm0,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	mm7,[GOTOFF(ebx,PW_SEVEN)]
-	paddw	mm5,[GOTOFF(ebx,PW_SEVEN)]
+        pmullw  mm6,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   mm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm0,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm7,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   mm5,[GOTOFF(ebx,PW_SEVEN)]
 
-	paddw	mm1,mm6
-	paddw	mm0,mm4
-	psrlw	mm1,4			; mm1=Out1LE=( 0  2  4  6)
-	psrlw	mm0,4			; mm0=Out1HE=( 8 10 12 14)
-	paddw	mm7,mm6
-	paddw	mm5,mm4
-	psrlw	mm7,4			; mm7=Out1LO=( 1  3  5  7)
-	psrlw	mm5,4			; mm5=Out1HO=( 9 11 13 15)
+        paddw   mm1,mm6
+        paddw   mm0,mm4
+        psrlw   mm1,4                   ; mm1=Out1LE=( 0  2  4  6)
+        psrlw   mm0,4                   ; mm0=Out1HE=( 8 10 12 14)
+        paddw   mm7,mm6
+        paddw   mm5,mm4
+        psrlw   mm7,4                   ; mm7=Out1LO=( 1  3  5  7)
+        psrlw   mm5,4                   ; mm5=Out1HO=( 9 11 13 15)
 
-	psllw	mm7,BYTE_BIT
-	psllw	mm5,BYTE_BIT
-	por	mm1,mm7			; mm1=Out1L=( 0  1  2  3  4  5  6  7)
-	por	mm0,mm5			; mm0=Out1H=( 8  9 10 11 12 13 14 15)
+        psllw   mm7,BYTE_BIT
+        psllw   mm5,BYTE_BIT
+        por     mm1,mm7                 ; mm1=Out1L=( 0  1  2  3  4  5  6  7)
+        por     mm0,mm5                 ; mm0=Out1H=( 8  9 10 11 12 13 14 15)
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mm1
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mm0
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm1
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm0
 
-	poppic	ebx
+        poppic  ebx
 
-	sub	eax, byte SIZEOF_MMWORD
-	add	ecx, byte 1*SIZEOF_MMWORD	; inptr1(above)
-	add	ebx, byte 1*SIZEOF_MMWORD	; inptr0
-	add	esi, byte 1*SIZEOF_MMWORD	; inptr1(below)
-	add	edx, byte 2*SIZEOF_MMWORD	; outptr0
-	add	edi, byte 2*SIZEOF_MMWORD	; outptr1
-	cmp	eax, byte SIZEOF_MMWORD
-	ja	near .columnloop
-	test	eax,eax
-	jnz	near .columnloop_last
+        sub     eax, byte SIZEOF_MMWORD
+        add     ecx, byte 1*SIZEOF_MMWORD       ; inptr1(above)
+        add     ebx, byte 1*SIZEOF_MMWORD       ; inptr0
+        add     esi, byte 1*SIZEOF_MMWORD       ; inptr1(below)
+        add     edx, byte 2*SIZEOF_MMWORD       ; outptr0
+        add     edi, byte 2*SIZEOF_MMWORD       ; outptr1
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
 
-	pop	esi
-	pop	edi
-	pop	ecx
-	pop	eax
+        pop     esi
+        pop     edi
+        pop     ecx
+        pop     eax
 
-	add	esi, byte 1*SIZEOF_JSAMPROW	; input_data
-	add	edi, byte 2*SIZEOF_JSAMPROW	; output_data
-	sub	ecx, byte 2			; rowctr
-	jg	near .rowloop
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -536,94 +536,94 @@
 ;                          JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define output_width(b)	(b)+12		; JDIMENSION output_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_upsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v1_upsample_mmx)
 
 EXTN(jsimd_h2v1_upsample_mmx):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	edx, JDIMENSION [output_width(ebp)]
-	add	edx, byte (2*SIZEOF_MMWORD)-1
-	and	edx, byte -(2*SIZEOF_MMWORD)
-	jz	short .return
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_MMWORD)-1
+        and     edx, byte -(2*SIZEOF_MMWORD)
+        jz      short .return
 
-	mov	ecx, INT [max_v_samp(ebp)]	; rowctr
-	test	ecx,ecx
-	jz	short .return
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      short .return
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, POINTER [output_data_ptr(ebp)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	edi
-	push	esi
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]		; inptr
-	mov	edi, JSAMPROW [edi]		; outptr
-	mov	eax,edx				; colctr
-	alignx	16,7
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
+        mov     eax,edx                         ; colctr
+        alignx  16,7
 .columnloop:
 
-	movq	mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
 
-	movq      mm1,mm0
-	punpcklbw mm0,mm0
-	punpckhbw mm1,mm1
+        movq      mm1,mm0
+        punpcklbw mm0,mm0
+        punpckhbw mm1,mm1
 
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mm0
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mm1
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm1
 
-	sub	eax, byte 2*SIZEOF_MMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
 
-	movq	mm2, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]
 
-	movq      mm3,mm2
-	punpcklbw mm2,mm2
-	punpckhbw mm3,mm3
+        movq      mm3,mm2
+        punpcklbw mm2,mm2
+        punpckhbw mm3,mm3
 
-	movq	MMWORD [edi+2*SIZEOF_MMWORD], mm2
-	movq	MMWORD [edi+3*SIZEOF_MMWORD], mm3
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mm3
 
-	sub	eax, byte 2*SIZEOF_MMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
 
-	add	esi, byte 2*SIZEOF_MMWORD	; inptr
-	add	edi, byte 4*SIZEOF_MMWORD	; outptr
-	jmp	short .columnloop
-	alignx	16,7
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+        add     edi, byte 4*SIZEOF_MMWORD       ; outptr
+        jmp     short .columnloop
+        alignx  16,7
 
 .nextrow:
-	pop	esi
-	pop	edi
+        pop     esi
+        pop     edi
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_data
-	add	edi, byte SIZEOF_JSAMPROW	; output_data
-	dec	ecx				; rowctr
-	jg	short .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      short .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -637,101 +637,101 @@
 ;                          JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define output_width(b)	(b)+12		; JDIMENSION output_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v2_upsample_mmx)
+        align   16
+        global  EXTN(jsimd_h2v2_upsample_mmx)
 
 EXTN(jsimd_h2v2_upsample_mmx):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	edx, JDIMENSION [output_width(ebp)]
-	add	edx, byte (2*SIZEOF_MMWORD)-1
-	and	edx, byte -(2*SIZEOF_MMWORD)
-	jz	near .return
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_MMWORD)-1
+        and     edx, byte -(2*SIZEOF_MMWORD)
+        jz      near .return
 
-	mov	ecx, INT [max_v_samp(ebp)]	; rowctr
-	test	ecx,ecx
-	jz	short .return
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      short .return
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, POINTER [output_data_ptr(ebp)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	edi
-	push	esi
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]			; inptr
-	mov	ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]	; outptr0
-	mov	edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]	; outptr1
-	mov	eax,edx					; colctr
-	alignx	16,7
+        mov     esi, JSAMPROW [esi]                     ; inptr
+        mov     ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+        mov     eax,edx                                 ; colctr
+        alignx  16,7
 .columnloop:
 
-	movq	mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
 
-	movq      mm1,mm0
-	punpcklbw mm0,mm0
-	punpckhbw mm1,mm1
+        movq      mm1,mm0
+        punpcklbw mm0,mm0
+        punpckhbw mm1,mm1
 
-	movq	MMWORD [ebx+0*SIZEOF_MMWORD], mm0
-	movq	MMWORD [ebx+1*SIZEOF_MMWORD], mm1
-	movq	MMWORD [edi+0*SIZEOF_MMWORD], mm0
-	movq	MMWORD [edi+1*SIZEOF_MMWORD], mm1
+        movq    MMWORD [ebx+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [ebx+1*SIZEOF_MMWORD], mm1
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm1
 
-	sub	eax, byte 2*SIZEOF_MMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
 
-	movq	mm2, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]
 
-	movq      mm3,mm2
-	punpcklbw mm2,mm2
-	punpckhbw mm3,mm3
+        movq      mm3,mm2
+        punpcklbw mm2,mm2
+        punpckhbw mm3,mm3
 
-	movq	MMWORD [ebx+2*SIZEOF_MMWORD], mm2
-	movq	MMWORD [ebx+3*SIZEOF_MMWORD], mm3
-	movq	MMWORD [edi+2*SIZEOF_MMWORD], mm2
-	movq	MMWORD [edi+3*SIZEOF_MMWORD], mm3
+        movq    MMWORD [ebx+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [ebx+3*SIZEOF_MMWORD], mm3
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mm3
 
-	sub	eax, byte 2*SIZEOF_MMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
 
-	add	esi, byte 2*SIZEOF_MMWORD	; inptr
-	add	ebx, byte 4*SIZEOF_MMWORD	; outptr0
-	add	edi, byte 4*SIZEOF_MMWORD	; outptr1
-	jmp	short .columnloop
-	alignx	16,7
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+        add     ebx, byte 4*SIZEOF_MMWORD       ; outptr0
+        add     edi, byte 4*SIZEOF_MMWORD       ; outptr1
+        jmp     short .columnloop
+        alignx  16,7
 
 .nextrow:
-	pop	esi
-	pop	edi
+        pop     esi
+        pop     edi
 
-	add	esi, byte 1*SIZEOF_JSAMPROW	; input_data
-	add	edi, byte 2*SIZEOF_JSAMPROW	; output_data
-	sub	ecx, byte 2			; rowctr
-	jg	short .rowloop
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      short .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdsamss2-64.asm b/simd/jdsamss2-64.asm
index f36c156..a41d059 100644
--- a/simd/jdsamss2-64.asm
+++ b/simd/jdsamss2-64.asm
@@ -20,24 +20,24 @@
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fancy_upsample_sse2)
+        alignz  16
+        global  EXTN(jconst_fancy_upsample_sse2)
 
 EXTN(jconst_fancy_upsample_sse2):
 
-PW_ONE		times 8 dw  1
-PW_TWO		times 8 dw  2
-PW_THREE	times 8 dw  3
-PW_SEVEN	times 8 dw  7
-PW_EIGHT	times 8 dw  8
+PW_ONE          times 8 dw  1
+PW_TWO          times 8 dw  2
+PW_THREE        times 8 dw  3
+PW_SEVEN        times 8 dw  7
+PW_EIGHT        times 8 dw  8
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
 ;
@@ -58,127 +58,127 @@
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_fancy_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
 
-	mov	rax, r11  ; colctr
-	test	rax,rax
-	jz	near .return
+        mov     rax, r11  ; colctr
+        test    rax,rax
+        jz      near .return
 
-	mov	rcx, r10	; rowctr
-	test	rcx,rcx
-	jz	near .return
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      near .return
 
-	mov	rsi, r12	; input_data
-	mov	rdi, r13
-	mov	rdi, JSAMPARRAY [rdi]			; output_data
+        mov     rsi, r12        ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
 .rowloop:
-	push	rax			; colctr
-	push	rdi
-	push	rsi
+        push    rax                     ; colctr
+        push    rdi
+        push    rsi
 
-	mov	rsi, JSAMPROW [rsi]	; inptr
-	mov	rdi, JSAMPROW [rdi]	; outptr
+        mov     rsi, JSAMPROW [rsi]     ; inptr
+        mov     rdi, JSAMPROW [rdi]     ; outptr
 
-	test	rax, SIZEOF_XMMWORD-1
-	jz	short .skip
-	mov	dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl	; insert a dummy sample
+        test    rax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        mov     dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
 .skip:
-	pxor	xmm0,xmm0		; xmm0=(all 0's)
-	pcmpeqb	xmm7,xmm7
-	psrldq	xmm7,(SIZEOF_XMMWORD-1)
-	pand	xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        pxor    xmm0,xmm0               ; xmm0=(all 0's)
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-1)
+        pand    xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD]
 
-	add	rax, byte SIZEOF_XMMWORD-1
-	and	rax, byte -SIZEOF_XMMWORD
-	cmp	rax, byte SIZEOF_XMMWORD
-	ja	short .columnloop
+        add     rax, byte SIZEOF_XMMWORD-1
+        and     rax, byte -SIZEOF_XMMWORD
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
 
 .columnloop_last:
-	pcmpeqb	xmm6,xmm6
-	pslldq	xmm6,(SIZEOF_XMMWORD-1)
-	pand	xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	jmp	short .upsample
+        pcmpeqb xmm6,xmm6
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
+        pand    xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        jmp     short .upsample
 
 .columnloop:
-	movdqa	xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD]
-	pslldq	xmm6,(SIZEOF_XMMWORD-1)
+        movdqa  xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
 
 .upsample:
-	movdqa	xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
-	movdqa	xmm2,xmm1
-	movdqa	xmm3,xmm1		; xmm1=( 0  1  2 ... 13 14 15)
-	pslldq	xmm2,1			; xmm2=(--  0  1 ... 12 13 14)
-	psrldq	xmm3,1			; xmm3=( 1  2  3 ... 14 15 --)
+        movdqa  xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2,xmm1
+        movdqa  xmm3,xmm1               ; xmm1=( 0  1  2 ... 13 14 15)
+        pslldq  xmm2,1                  ; xmm2=(--  0  1 ... 12 13 14)
+        psrldq  xmm3,1                  ; xmm3=( 1  2  3 ... 14 15 --)
 
-	por	xmm2,xmm7		; xmm2=(-1  0  1 ... 12 13 14)
-	por	xmm3,xmm6		; xmm3=( 1  2  3 ... 14 15 16)
+        por     xmm2,xmm7               ; xmm2=(-1  0  1 ... 12 13 14)
+        por     xmm3,xmm6               ; xmm3=( 1  2  3 ... 14 15 16)
 
-	movdqa	xmm7,xmm1
-	psrldq	xmm7,(SIZEOF_XMMWORD-1)	; xmm7=(15 -- -- ... -- -- --)
+        movdqa  xmm7,xmm1
+        psrldq  xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --)
 
-	movdqa    xmm4,xmm1
-	punpcklbw xmm1,xmm0		; xmm1=( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm4,xmm0		; xmm4=( 8  9 10 11 12 13 14 15)
-	movdqa    xmm5,xmm2
-	punpcklbw xmm2,xmm0		; xmm2=(-1  0  1  2  3  4  5  6)
-	punpckhbw xmm5,xmm0		; xmm5=( 7  8  9 10 11 12 13 14)
-	movdqa    xmm6,xmm3
-	punpcklbw xmm3,xmm0		; xmm3=( 1  2  3  4  5  6  7  8)
-	punpckhbw xmm6,xmm0		; xmm6=( 9 10 11 12 13 14 15 16)
+        movdqa    xmm4,xmm1
+        punpcklbw xmm1,xmm0             ; xmm1=( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm0             ; xmm4=( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm2
+        punpcklbw xmm2,xmm0             ; xmm2=(-1  0  1  2  3  4  5  6)
+        punpckhbw xmm5,xmm0             ; xmm5=( 7  8  9 10 11 12 13 14)
+        movdqa    xmm6,xmm3
+        punpcklbw xmm3,xmm0             ; xmm3=( 1  2  3  4  5  6  7  8)
+        punpckhbw xmm6,xmm0             ; xmm6=( 9 10 11 12 13 14 15 16)
 
-	pmullw	xmm1,[rel PW_THREE]
-	pmullw	xmm4,[rel PW_THREE]
-	paddw	xmm2,[rel PW_ONE]
-	paddw	xmm5,[rel PW_ONE]
-	paddw	xmm3,[rel PW_TWO]
-	paddw	xmm6,[rel PW_TWO]
+        pmullw  xmm1,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
+        paddw   xmm2,[rel PW_ONE]
+        paddw   xmm5,[rel PW_ONE]
+        paddw   xmm3,[rel PW_TWO]
+        paddw   xmm6,[rel PW_TWO]
 
-	paddw	xmm2,xmm1
-	paddw	xmm5,xmm4
-	psrlw	xmm2,2			; xmm2=OutLE=( 0  2  4  6  8 10 12 14)
-	psrlw	xmm5,2			; xmm5=OutHE=(16 18 20 22 24 26 28 30)
-	paddw	xmm3,xmm1
-	paddw	xmm6,xmm4
-	psrlw	xmm3,2			; xmm3=OutLO=( 1  3  5  7  9 11 13 15)
-	psrlw	xmm6,2			; xmm6=OutHO=(17 19 21 23 25 27 29 31)
+        paddw   xmm2,xmm1
+        paddw   xmm5,xmm4
+        psrlw   xmm2,2                  ; xmm2=OutLE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,2                  ; xmm5=OutHE=(16 18 20 22 24 26 28 30)
+        paddw   xmm3,xmm1
+        paddw   xmm6,xmm4
+        psrlw   xmm3,2                  ; xmm3=OutLO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm6,2                  ; xmm6=OutHO=(17 19 21 23 25 27 29 31)
 
-	psllw	xmm3,BYTE_BIT
-	psllw	xmm6,BYTE_BIT
-	por	xmm2,xmm3		; xmm2=OutL=( 0  1  2 ... 13 14 15)
-	por	xmm5,xmm6		; xmm5=OutH=(16 17 18 ... 29 30 31)
+        psllw   xmm3,BYTE_BIT
+        psllw   xmm6,BYTE_BIT
+        por     xmm2,xmm3               ; xmm2=OutL=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm6               ; xmm5=OutH=(16 17 18 ... 29 30 31)
 
-	movdqa	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5
 
-	sub	rax, byte SIZEOF_XMMWORD
-	add	rsi, byte 1*SIZEOF_XMMWORD	; inptr
-	add	rdi, byte 2*SIZEOF_XMMWORD	; outptr
-	cmp	rax, byte SIZEOF_XMMWORD
-	ja	near .columnloop
-	test	eax,eax
-	jnz	near .columnloop_last
+        sub     rax, byte SIZEOF_XMMWORD
+        add     rsi, byte 1*SIZEOF_XMMWORD      ; inptr
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
 
-	pop	rsi
-	pop	rdi
-	pop	rax
+        pop     rsi
+        pop     rdi
+        pop     rax
 
-	add	rsi, byte SIZEOF_JSAMPROW	; input_data
-	add	rdi, byte SIZEOF_JSAMPROW	; output_data
-	dec	rcx				; rowctr
-	jg	near .rowloop
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_data
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     rcx                             ; rowctr
+        jg      near .rowloop
 
 .return:
-	uncollect_args
-	pop	rbp
-	ret
+        uncollect_args
+        pop     rbp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -197,288 +197,288 @@
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY * output_data_ptr
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		4
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          4
 
-	align	16
-	global	EXTN(jsimd_h2v2_fancy_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v2_fancy_upsample_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
 
-	mov	rax, r11  ; colctr
-	test	rax,rax
-	jz	near .return
+        mov     rax, r11  ; colctr
+        test    rax,rax
+        jz      near .return
 
-	mov	rcx, r10	; rowctr
-	test	rcx,rcx
-	jz	near .return
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      near .return
 
-	mov	rsi, r12	; input_data
-	mov	rdi, r13
-	mov	rdi, JSAMPARRAY [rdi]			; output_data
+        mov     rsi, r12        ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
 .rowloop:
-	push	rax					; colctr
-	push	rcx
-	push	rdi
-	push	rsi
+        push    rax                                     ; colctr
+        push    rcx
+        push    rdi
+        push    rsi
 
-	mov	rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW]	; inptr1(above)
-	mov	rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]	; inptr0
-	mov	rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]	; inptr1(below)
-	mov	rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]	; outptr0
-	mov	rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]	; outptr1
+        mov     rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+        mov     rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]   ; outptr1
 
-	test	rax, SIZEOF_XMMWORD-1
-	jz	short .skip
-	push	rdx
-	mov	dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl
-	mov	dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl
-	mov	dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl	; insert a dummy sample
-	pop	rdx
+        test    rax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        push    rdx
+        mov     dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+        pop     rdx
 .skip:
-	; -- process the first column block
+        ; -- process the first column block
 
-	movdqa	xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD]	; xmm0=row[ 0][0]
-	movdqa	xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD]	; xmm1=row[-1][0]
-	movdqa	xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD]	; xmm2=row[+1][0]
+        movdqa  xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD]    ; xmm0=row[ 0][0]
+        movdqa  xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD]    ; xmm1=row[-1][0]
+        movdqa  xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD]    ; xmm2=row[+1][0]
 
-	pxor      xmm3,xmm3		; xmm3=(all 0's)
-	movdqa    xmm4,xmm0
-	punpcklbw xmm0,xmm3		; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm4,xmm3		; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm5,xmm1
-	punpcklbw xmm1,xmm3		; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm5,xmm3		; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm6,xmm2
-	punpcklbw xmm2,xmm3		; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm6,xmm3		; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
 
-	pmullw	xmm0,[rel PW_THREE]
-	pmullw	xmm4,[rel PW_THREE]
+        pmullw  xmm0,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
 
-	pcmpeqb	xmm7,xmm7
-	psrldq	xmm7,(SIZEOF_XMMWORD-2)
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-2)
 
-	paddw	xmm1,xmm0		; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm5,xmm4		; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
-	paddw	xmm2,xmm0		; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm6,xmm4		; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
 
-	movdqa	XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1	; temporarily save
-	movdqa	XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5	; the intermediate data
-	movdqa	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6
+        movdqa  XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6
 
-	pand	xmm1,xmm7		; xmm1=( 0 -- -- -- -- -- -- --)
-	pand	xmm2,xmm7		; xmm2=( 0 -- -- -- -- -- -- --)
+        pand    xmm1,xmm7               ; xmm1=( 0 -- -- -- -- -- -- --)
+        pand    xmm2,xmm7               ; xmm2=( 0 -- -- -- -- -- -- --)
 
-	movdqa	XMMWORD [wk(0)], xmm1
-	movdqa	XMMWORD [wk(1)], xmm2
+        movdqa  XMMWORD [wk(0)], xmm1
+        movdqa  XMMWORD [wk(1)], xmm2
 
-	add	rax, byte SIZEOF_XMMWORD-1
-	and	rax, byte -SIZEOF_XMMWORD
-	cmp	rax, byte SIZEOF_XMMWORD
-	ja	short .columnloop
+        add     rax, byte SIZEOF_XMMWORD-1
+        and     rax, byte -SIZEOF_XMMWORD
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
 
 .columnloop_last:
-	; -- process the last column block
+        ; -- process the last column block
 
-	pcmpeqb	xmm1,xmm1
-	pslldq	xmm1,(SIZEOF_XMMWORD-2)
-	movdqa	xmm2,xmm1
+        pcmpeqb xmm1,xmm1
+        pslldq  xmm1,(SIZEOF_XMMWORD-2)
+        movdqa  xmm2,xmm1
 
-	pand	xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD]
-	pand	xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD]
+        pand    xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD]
+        pand    xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD]
 
-	movdqa	XMMWORD [wk(2)], xmm1	; xmm1=(-- -- -- -- -- -- -- 15)
-	movdqa	XMMWORD [wk(3)], xmm2	; xmm2=(-- -- -- -- -- -- -- 15)
+        movdqa  XMMWORD [wk(2)], xmm1   ; xmm1=(-- -- -- -- -- -- -- 15)
+        movdqa  XMMWORD [wk(3)], xmm2   ; xmm2=(-- -- -- -- -- -- -- 15)
 
-	jmp	near .upsample
+        jmp     near .upsample
 
 .columnloop:
-	; -- process the next column block
+        ; -- process the next column block
 
-	movdqa	xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD]	; xmm0=row[ 0][1]
-	movdqa	xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD]	; xmm1=row[-1][1]
-	movdqa	xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]	; xmm2=row[+1][1]
+        movdqa  xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD]    ; xmm0=row[ 0][1]
+        movdqa  xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD]    ; xmm1=row[-1][1]
+        movdqa  xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]    ; xmm2=row[+1][1]
 
-	pxor      xmm3,xmm3		; xmm3=(all 0's)
-	movdqa    xmm4,xmm0
-	punpcklbw xmm0,xmm3		; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm4,xmm3		; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm5,xmm1
-	punpcklbw xmm1,xmm3		; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm5,xmm3		; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm6,xmm2
-	punpcklbw xmm2,xmm3		; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm6,xmm3		; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
 
-	pmullw	xmm0,[rel PW_THREE]
-	pmullw	xmm4,[rel PW_THREE]
+        pmullw  xmm0,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
 
-	paddw	xmm1,xmm0		; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm5,xmm4		; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
-	paddw	xmm2,xmm0		; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm6,xmm4		; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
 
-	movdqa	XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1	; temporarily save
-	movdqa	XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5	; the intermediate data
-	movdqa	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6
+        movdqa  XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6
 
-	pslldq	xmm1,(SIZEOF_XMMWORD-2)	; xmm1=(-- -- -- -- -- -- --  0)
-	pslldq	xmm2,(SIZEOF_XMMWORD-2)	; xmm2=(-- -- -- -- -- -- --  0)
+        pslldq  xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- --  0)
+        pslldq  xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- --  0)
 
-	movdqa	XMMWORD [wk(2)], xmm1
-	movdqa	XMMWORD [wk(3)], xmm2
+        movdqa  XMMWORD [wk(2)], xmm1
+        movdqa  XMMWORD [wk(3)], xmm2
 
 .upsample:
-	; -- process the upper row
+        ; -- process the upper row
 
-	movdqa	xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD]
-	movdqa	xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD]
+        movdqa  xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD]
 
-	movdqa	xmm0,xmm7		; xmm7=Int0L=( 0  1  2  3  4  5  6  7)
-	movdqa	xmm4,xmm3		; xmm3=Int0H=( 8  9 10 11 12 13 14 15)
-	psrldq	xmm0,2			; xmm0=( 1  2  3  4  5  6  7 --)
-	pslldq	xmm4,(SIZEOF_XMMWORD-2)	; xmm4=(-- -- -- -- -- -- --  8)
-	movdqa	xmm5,xmm7
-	movdqa	xmm6,xmm3
-	psrldq	xmm5,(SIZEOF_XMMWORD-2)	; xmm5=( 7 -- -- -- -- -- -- --)
-	pslldq	xmm6,2			; xmm6=(--  8  9 10 11 12 13 14)
+        movdqa  xmm0,xmm7               ; xmm7=Int0L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm4,xmm3               ; xmm3=Int0H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm0,2                  ; xmm0=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- --  8)
+        movdqa  xmm5,xmm7
+        movdqa  xmm6,xmm3
+        psrldq  xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm6,2                  ; xmm6=(--  8  9 10 11 12 13 14)
 
-	por	xmm0,xmm4		; xmm0=( 1  2  3  4  5  6  7  8)
-	por	xmm5,xmm6		; xmm5=( 7  8  9 10 11 12 13 14)
+        por     xmm0,xmm4               ; xmm0=( 1  2  3  4  5  6  7  8)
+        por     xmm5,xmm6               ; xmm5=( 7  8  9 10 11 12 13 14)
 
-	movdqa	xmm1,xmm7
-	movdqa	xmm2,xmm3
-	pslldq	xmm1,2			; xmm1=(--  0  1  2  3  4  5  6)
-	psrldq	xmm2,2			; xmm2=( 9 10 11 12 13 14 15 --)
-	movdqa	xmm4,xmm3
-	psrldq	xmm4,(SIZEOF_XMMWORD-2)	; xmm4=(15 -- -- -- -- -- -- --)
+        movdqa  xmm1,xmm7
+        movdqa  xmm2,xmm3
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm2,2                  ; xmm2=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm4,xmm3
+        psrldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --)
 
-	por	xmm1, XMMWORD [wk(0)]	; xmm1=(-1  0  1  2  3  4  5  6)
-	por	xmm2, XMMWORD [wk(2)]	; xmm2=( 9 10 11 12 13 14 15 16)
+        por     xmm1, XMMWORD [wk(0)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm2, XMMWORD [wk(2)]   ; xmm2=( 9 10 11 12 13 14 15 16)
 
-	movdqa	XMMWORD [wk(0)], xmm4
+        movdqa  XMMWORD [wk(0)], xmm4
 
-	pmullw	xmm7,[rel PW_THREE]
-	pmullw	xmm3,[rel PW_THREE]
-	paddw	xmm1,[rel PW_EIGHT]
-	paddw	xmm5,[rel PW_EIGHT]
-	paddw	xmm0,[rel PW_SEVEN]
-	paddw	xmm2,[rel PW_SEVEN]
+        pmullw  xmm7,[rel PW_THREE]
+        pmullw  xmm3,[rel PW_THREE]
+        paddw   xmm1,[rel PW_EIGHT]
+        paddw   xmm5,[rel PW_EIGHT]
+        paddw   xmm0,[rel PW_SEVEN]
+        paddw   xmm2,[rel PW_SEVEN]
 
-	paddw	xmm1,xmm7
-	paddw	xmm5,xmm3
-	psrlw	xmm1,4			; xmm1=Out0LE=( 0  2  4  6  8 10 12 14)
-	psrlw	xmm5,4			; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
-	paddw	xmm0,xmm7
-	paddw	xmm2,xmm3
-	psrlw	xmm0,4			; xmm0=Out0LO=( 1  3  5  7  9 11 13 15)
-	psrlw	xmm2,4			; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
+        paddw   xmm1,xmm7
+        paddw   xmm5,xmm3
+        psrlw   xmm1,4                  ; xmm1=Out0LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,4                  ; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm3
+        psrlw   xmm0,4                  ; xmm0=Out0LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm2,4                  ; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
 
-	psllw	xmm0,BYTE_BIT
-	psllw	xmm2,BYTE_BIT
-	por	xmm1,xmm0		; xmm1=Out0L=( 0  1  2 ... 13 14 15)
-	por	xmm5,xmm2		; xmm5=Out0H=(16 17 18 ... 29 30 31)
+        psllw   xmm0,BYTE_BIT
+        psllw   xmm2,BYTE_BIT
+        por     xmm1,xmm0               ; xmm1=Out0L=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm2               ; xmm5=Out0H=(16 17 18 ... 29 30 31)
 
-	movdqa	XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1
-	movdqa	XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5
+        movdqa  XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5
 
-	; -- process the lower row
+        ; -- process the lower row
 
-	movdqa	xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD]
-	movdqa	xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD]
+        movdqa  xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD]
+        movdqa  xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD]
 
-	movdqa	xmm7,xmm6		; xmm6=Int1L=( 0  1  2  3  4  5  6  7)
-	movdqa	xmm3,xmm4		; xmm4=Int1H=( 8  9 10 11 12 13 14 15)
-	psrldq	xmm7,2			; xmm7=( 1  2  3  4  5  6  7 --)
-	pslldq	xmm3,(SIZEOF_XMMWORD-2)	; xmm3=(-- -- -- -- -- -- --  8)
-	movdqa	xmm0,xmm6
-	movdqa	xmm2,xmm4
-	psrldq	xmm0,(SIZEOF_XMMWORD-2)	; xmm0=( 7 -- -- -- -- -- -- --)
-	pslldq	xmm2,2			; xmm2=(--  8  9 10 11 12 13 14)
+        movdqa  xmm7,xmm6               ; xmm6=Int1L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm3,xmm4               ; xmm4=Int1H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm7,2                  ; xmm7=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- --  8)
+        movdqa  xmm0,xmm6
+        movdqa  xmm2,xmm4
+        psrldq  xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm2,2                  ; xmm2=(--  8  9 10 11 12 13 14)
 
-	por	xmm7,xmm3		; xmm7=( 1  2  3  4  5  6  7  8)
-	por	xmm0,xmm2		; xmm0=( 7  8  9 10 11 12 13 14)
+        por     xmm7,xmm3               ; xmm7=( 1  2  3  4  5  6  7  8)
+        por     xmm0,xmm2               ; xmm0=( 7  8  9 10 11 12 13 14)
 
-	movdqa	xmm1,xmm6
-	movdqa	xmm5,xmm4
-	pslldq	xmm1,2			; xmm1=(--  0  1  2  3  4  5  6)
-	psrldq	xmm5,2			; xmm5=( 9 10 11 12 13 14 15 --)
-	movdqa	xmm3,xmm4
-	psrldq	xmm3,(SIZEOF_XMMWORD-2)	; xmm3=(15 -- -- -- -- -- -- --)
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm4
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm5,2                  ; xmm5=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm3,xmm4
+        psrldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --)
 
-	por	xmm1, XMMWORD [wk(1)]	; xmm1=(-1  0  1  2  3  4  5  6)
-	por	xmm5, XMMWORD [wk(3)]	; xmm5=( 9 10 11 12 13 14 15 16)
+        por     xmm1, XMMWORD [wk(1)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm5, XMMWORD [wk(3)]   ; xmm5=( 9 10 11 12 13 14 15 16)
 
-	movdqa	XMMWORD [wk(1)], xmm3
+        movdqa  XMMWORD [wk(1)], xmm3
 
-	pmullw	xmm6,[rel PW_THREE]
-	pmullw	xmm4,[rel PW_THREE]
-	paddw	xmm1,[rel PW_EIGHT]
-	paddw	xmm0,[rel PW_EIGHT]
-	paddw	xmm7,[rel PW_SEVEN]
-	paddw	xmm5,[rel PW_SEVEN]
+        pmullw  xmm6,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
+        paddw   xmm1,[rel PW_EIGHT]
+        paddw   xmm0,[rel PW_EIGHT]
+        paddw   xmm7,[rel PW_SEVEN]
+        paddw   xmm5,[rel PW_SEVEN]
 
-	paddw	xmm1,xmm6
-	paddw	xmm0,xmm4
-	psrlw	xmm1,4			; xmm1=Out1LE=( 0  2  4  6  8 10 12 14)
-	psrlw	xmm0,4			; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
-	paddw	xmm7,xmm6
-	paddw	xmm5,xmm4
-	psrlw	xmm7,4			; xmm7=Out1LO=( 1  3  5  7  9 11 13 15)
-	psrlw	xmm5,4			; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
+        paddw   xmm1,xmm6
+        paddw   xmm0,xmm4
+        psrlw   xmm1,4                  ; xmm1=Out1LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm0,4                  ; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm7,xmm6
+        paddw   xmm5,xmm4
+        psrlw   xmm7,4                  ; xmm7=Out1LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm5,4                  ; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
 
-	psllw	xmm7,BYTE_BIT
-	psllw	xmm5,BYTE_BIT
-	por	xmm1,xmm7		; xmm1=Out1L=( 0  1  2 ... 13 14 15)
-	por	xmm0,xmm5		; xmm0=Out1H=(16 17 18 ... 29 30 31)
+        psllw   xmm7,BYTE_BIT
+        psllw   xmm5,BYTE_BIT
+        por     xmm1,xmm7               ; xmm1=Out1L=( 0  1  2 ... 13 14 15)
+        por     xmm0,xmm5               ; xmm0=Out1H=(16 17 18 ... 29 30 31)
 
-	movdqa	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1
-	movdqa	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0
 
-	sub	rax, byte SIZEOF_XMMWORD
-	add	rcx, byte 1*SIZEOF_XMMWORD	; inptr1(above)
-	add	rbx, byte 1*SIZEOF_XMMWORD	; inptr0
-	add	rsi, byte 1*SIZEOF_XMMWORD	; inptr1(below)
-	add	rdx, byte 2*SIZEOF_XMMWORD	; outptr0
-	add	rdi, byte 2*SIZEOF_XMMWORD	; outptr1
-	cmp	rax, byte SIZEOF_XMMWORD
-	ja	near .columnloop
-	test	rax,rax
-	jnz	near .columnloop_last
+        sub     rax, byte SIZEOF_XMMWORD
+        add     rcx, byte 1*SIZEOF_XMMWORD      ; inptr1(above)
+        add     rbx, byte 1*SIZEOF_XMMWORD      ; inptr0
+        add     rsi, byte 1*SIZEOF_XMMWORD      ; inptr1(below)
+        add     rdx, byte 2*SIZEOF_XMMWORD      ; outptr0
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr1
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    rax,rax
+        jnz     near .columnloop_last
 
-	pop	rsi
-	pop	rdi
-	pop	rcx
-	pop	rax
+        pop     rsi
+        pop     rdi
+        pop     rcx
+        pop     rax
 
-	add	rsi, byte 1*SIZEOF_JSAMPROW	; input_data
-	add	rdi, byte 2*SIZEOF_JSAMPROW	; output_data
-	sub	rcx, byte 2			; rowctr
-	jg	near .rowloop
+        add     rsi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     rdi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     rcx, byte 2                     ; rowctr
+        jg      near .rowloop
 
 .return:
-	pop	rbx
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -497,77 +497,77 @@
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_upsample_sse2)
 
 EXTN(jsimd_h2v1_upsample_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
 
-	mov	rdx, r11
-	add	rdx, byte (2*SIZEOF_XMMWORD)-1
-	and	rdx, byte -(2*SIZEOF_XMMWORD)
-	jz	near .return
+        mov     rdx, r11
+        add     rdx, byte (2*SIZEOF_XMMWORD)-1
+        and     rdx, byte -(2*SIZEOF_XMMWORD)
+        jz      near .return
 
-	mov	rcx, r10	; rowctr
-	test	rcx,rcx
-	jz	short .return
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      short .return
 
-	mov	rsi, r12 ; input_data
-	mov	rdi, r13
-	mov	rdi, JSAMPARRAY [rdi]			; output_data
+        mov     rsi, r12 ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
 .rowloop:
-	push	rdi
-	push	rsi
+        push    rdi
+        push    rsi
 
-	mov	rsi, JSAMPROW [rsi]		; inptr
-	mov	rdi, JSAMPROW [rdi]		; outptr
-	mov	rax,rdx				; colctr
+        mov     rsi, JSAMPROW [rsi]             ; inptr
+        mov     rdi, JSAMPROW [rdi]             ; outptr
+        mov     rax,rdx                         ; colctr
 .columnloop:
 
-	movdqa	xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
 
-	movdqa    xmm1,xmm0
-	punpcklbw xmm0,xmm0
-	punpckhbw xmm1,xmm1
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
 
-	movdqa	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
-	movdqa	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
 
-	sub	rax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	movdqa	xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
 
-	movdqa    xmm3,xmm2
-	punpcklbw xmm2,xmm2
-	punpckhbw xmm3,xmm3
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
 
-	movdqa	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
 
-	sub	rax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	add	rsi, byte 2*SIZEOF_XMMWORD	; inptr
-	add	rdi, byte 4*SIZEOF_XMMWORD	; outptr
-	jmp	short .columnloop
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     rdi, byte 4*SIZEOF_XMMWORD      ; outptr
+        jmp     short .columnloop
 
 .nextrow:
-	pop	rsi
-	pop	rdi
+        pop     rsi
+        pop     rdi
 
-	add	rsi, byte SIZEOF_JSAMPROW	; input_data
-	add	rdi, byte SIZEOF_JSAMPROW	; output_data
-	dec	rcx				; rowctr
-	jg	short .rowloop
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_data
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     rcx                             ; rowctr
+        jg      short .rowloop
 
 .return:
-	uncollect_args
-	pop	rbp
-	ret
+        uncollect_args
+        pop     rbp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -586,86 +586,86 @@
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v2_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_upsample_sse2)
 
 EXTN(jsimd_h2v2_upsample_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
 
-	mov	rdx, r11
-	add	rdx, byte (2*SIZEOF_XMMWORD)-1
-	and	rdx, byte -(2*SIZEOF_XMMWORD)
-	jz	near .return
+        mov     rdx, r11
+        add     rdx, byte (2*SIZEOF_XMMWORD)-1
+        and     rdx, byte -(2*SIZEOF_XMMWORD)
+        jz      near .return
 
-	mov	rcx, r10	; rowctr
-	test	rcx,rcx
-	jz	near .return
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      near .return
 
-	mov	rsi, r12	; input_data
-	mov	rdi, r13
-	mov	rdi, JSAMPARRAY [rdi]			; output_data
+        mov     rsi, r12        ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
 .rowloop:
-	push	rdi
-	push	rsi
+        push    rdi
+        push    rsi
 
-	mov	rsi, JSAMPROW [rsi]			; inptr
-	mov	rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]	; outptr0
-	mov	rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]	; outptr1
-	mov	rax,rdx					; colctr
+        mov     rsi, JSAMPROW [rsi]                     ; inptr
+        mov     rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]   ; outptr1
+        mov     rax,rdx                                 ; colctr
 .columnloop:
 
-	movdqa	xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
 
-	movdqa    xmm1,xmm0
-	punpcklbw xmm0,xmm0
-	punpckhbw xmm1,xmm1
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
 
-	movdqa	XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0
-	movdqa	XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1
-	movdqa	XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
-	movdqa	XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
 
-	sub	rax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	movdqa	xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
 
-	movdqa    xmm3,xmm2
-	punpcklbw xmm2,xmm2
-	punpckhbw xmm3,xmm3
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
 
-	movdqa	XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3
-	movdqa	XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
 
-	sub	rax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	add	rsi, byte 2*SIZEOF_XMMWORD	; inptr
-	add	rbx, byte 4*SIZEOF_XMMWORD	; outptr0
-	add	rdi, byte 4*SIZEOF_XMMWORD	; outptr1
-	jmp	short .columnloop
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     rbx, byte 4*SIZEOF_XMMWORD      ; outptr0
+        add     rdi, byte 4*SIZEOF_XMMWORD      ; outptr1
+        jmp     short .columnloop
 
 .nextrow:
-	pop	rsi
-	pop	rdi
+        pop     rsi
+        pop     rdi
 
-	add	rsi, byte 1*SIZEOF_JSAMPROW	; input_data
-	add	rdi, byte 2*SIZEOF_JSAMPROW	; output_data
-	sub	rcx, byte 2			; rowctr
-	jg	near .rowloop
+        add     rsi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     rdi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     rcx, byte 2                     ; rowctr
+        jg      near .rowloop
 
 .return:
-	pop	rbx
-	uncollect_args
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jdsamss2.asm b/simd/jdsamss2.asm
index b5c863b..2259026 100644
--- a/simd/jdsamss2.asm
+++ b/simd/jdsamss2.asm
@@ -19,24 +19,24 @@
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fancy_upsample_sse2)
+        alignz  16
+        global  EXTN(jconst_fancy_upsample_sse2)
 
 EXTN(jconst_fancy_upsample_sse2):
 
-PW_ONE		times 8 dw  1
-PW_TWO		times 8 dw  2
-PW_THREE	times 8 dw  3
-PW_SEVEN	times 8 dw  7
-PW_EIGHT	times 8 dw  8
+PW_ONE          times 8 dw  1
+PW_TWO          times 8 dw  2
+PW_THREE        times 8 dw  3
+PW_SEVEN        times 8 dw  7
+PW_EIGHT        times 8 dw  8
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
 ;
@@ -52,144 +52,144 @@
 ;                                 JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define downsamp_width(b)	(b)+12	; JDIMENSION downsampled_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_fancy_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
-	push	ebp
-	mov	ebp,esp
-	pushpic	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	mov	eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
-	test	eax,eax
-	jz	near .return
+        mov     eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
+        test    eax,eax
+        jz      near .return
 
-	mov	ecx, INT [max_v_samp(ebp)]	; rowctr
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, POINTER [output_data_ptr(ebp)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	eax			; colctr
-	push	edi
-	push	esi
+        push    eax                     ; colctr
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]	; inptr
-	mov	edi, JSAMPROW [edi]	; outptr
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr
 
-	test	eax, SIZEOF_XMMWORD-1
-	jz	short .skip
-	mov	dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl	; insert a dummy sample
+        test    eax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
 .skip:
-	pxor	xmm0,xmm0		; xmm0=(all 0's)
-	pcmpeqb	xmm7,xmm7
-	psrldq	xmm7,(SIZEOF_XMMWORD-1)
-	pand	xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        pxor    xmm0,xmm0               ; xmm0=(all 0's)
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-1)
+        pand    xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD]
 
-	add	eax, byte SIZEOF_XMMWORD-1
-	and	eax, byte -SIZEOF_XMMWORD
-	cmp	eax, byte SIZEOF_XMMWORD
-	ja	short .columnloop
-	alignx	16,7
+        add     eax, byte SIZEOF_XMMWORD-1
+        and     eax, byte -SIZEOF_XMMWORD
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
+        alignx  16,7
 
 .columnloop_last:
-	pcmpeqb	xmm6,xmm6
-	pslldq	xmm6,(SIZEOF_XMMWORD-1)
-	pand	xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	jmp	short .upsample
-	alignx	16,7
+        pcmpeqb xmm6,xmm6
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
+        pand    xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        jmp     short .upsample
+        alignx  16,7
 
 .columnloop:
-	movdqa	xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
-	pslldq	xmm6,(SIZEOF_XMMWORD-1)
+        movdqa  xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
 
 .upsample:
-	movdqa	xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
-	movdqa	xmm2,xmm1
-	movdqa	xmm3,xmm1		; xmm1=( 0  1  2 ... 13 14 15)
-	pslldq	xmm2,1			; xmm2=(--  0  1 ... 12 13 14)
-	psrldq	xmm3,1			; xmm3=( 1  2  3 ... 14 15 --)
+        movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2,xmm1
+        movdqa  xmm3,xmm1               ; xmm1=( 0  1  2 ... 13 14 15)
+        pslldq  xmm2,1                  ; xmm2=(--  0  1 ... 12 13 14)
+        psrldq  xmm3,1                  ; xmm3=( 1  2  3 ... 14 15 --)
 
-	por	xmm2,xmm7		; xmm2=(-1  0  1 ... 12 13 14)
-	por	xmm3,xmm6		; xmm3=( 1  2  3 ... 14 15 16)
+        por     xmm2,xmm7               ; xmm2=(-1  0  1 ... 12 13 14)
+        por     xmm3,xmm6               ; xmm3=( 1  2  3 ... 14 15 16)
 
-	movdqa	xmm7,xmm1
-	psrldq	xmm7,(SIZEOF_XMMWORD-1)	; xmm7=(15 -- -- ... -- -- --)
+        movdqa  xmm7,xmm1
+        psrldq  xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --)
 
-	movdqa    xmm4,xmm1
-	punpcklbw xmm1,xmm0		; xmm1=( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm4,xmm0		; xmm4=( 8  9 10 11 12 13 14 15)
-	movdqa    xmm5,xmm2
-	punpcklbw xmm2,xmm0		; xmm2=(-1  0  1  2  3  4  5  6)
-	punpckhbw xmm5,xmm0		; xmm5=( 7  8  9 10 11 12 13 14)
-	movdqa    xmm6,xmm3
-	punpcklbw xmm3,xmm0		; xmm3=( 1  2  3  4  5  6  7  8)
-	punpckhbw xmm6,xmm0		; xmm6=( 9 10 11 12 13 14 15 16)
+        movdqa    xmm4,xmm1
+        punpcklbw xmm1,xmm0             ; xmm1=( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm0             ; xmm4=( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm2
+        punpcklbw xmm2,xmm0             ; xmm2=(-1  0  1  2  3  4  5  6)
+        punpckhbw xmm5,xmm0             ; xmm5=( 7  8  9 10 11 12 13 14)
+        movdqa    xmm6,xmm3
+        punpcklbw xmm3,xmm0             ; xmm3=( 1  2  3  4  5  6  7  8)
+        punpckhbw xmm6,xmm0             ; xmm6=( 9 10 11 12 13 14 15 16)
 
-	pmullw	xmm1,[GOTOFF(ebx,PW_THREE)]
-	pmullw	xmm4,[GOTOFF(ebx,PW_THREE)]
-	paddw	xmm2,[GOTOFF(ebx,PW_ONE)]
-	paddw	xmm5,[GOTOFF(ebx,PW_ONE)]
-	paddw	xmm3,[GOTOFF(ebx,PW_TWO)]
-	paddw	xmm6,[GOTOFF(ebx,PW_TWO)]
+        pmullw  xmm1,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   xmm2,[GOTOFF(ebx,PW_ONE)]
+        paddw   xmm5,[GOTOFF(ebx,PW_ONE)]
+        paddw   xmm3,[GOTOFF(ebx,PW_TWO)]
+        paddw   xmm6,[GOTOFF(ebx,PW_TWO)]
 
-	paddw	xmm2,xmm1
-	paddw	xmm5,xmm4
-	psrlw	xmm2,2			; xmm2=OutLE=( 0  2  4  6  8 10 12 14)
-	psrlw	xmm5,2			; xmm5=OutHE=(16 18 20 22 24 26 28 30)
-	paddw	xmm3,xmm1
-	paddw	xmm6,xmm4
-	psrlw	xmm3,2			; xmm3=OutLO=( 1  3  5  7  9 11 13 15)
-	psrlw	xmm6,2			; xmm6=OutHO=(17 19 21 23 25 27 29 31)
+        paddw   xmm2,xmm1
+        paddw   xmm5,xmm4
+        psrlw   xmm2,2                  ; xmm2=OutLE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,2                  ; xmm5=OutHE=(16 18 20 22 24 26 28 30)
+        paddw   xmm3,xmm1
+        paddw   xmm6,xmm4
+        psrlw   xmm3,2                  ; xmm3=OutLO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm6,2                  ; xmm6=OutHO=(17 19 21 23 25 27 29 31)
 
-	psllw	xmm3,BYTE_BIT
-	psllw	xmm6,BYTE_BIT
-	por	xmm2,xmm3		; xmm2=OutL=( 0  1  2 ... 13 14 15)
-	por	xmm5,xmm6		; xmm5=OutH=(16 17 18 ... 29 30 31)
+        psllw   xmm3,BYTE_BIT
+        psllw   xmm6,BYTE_BIT
+        por     xmm2,xmm3               ; xmm2=OutL=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm6               ; xmm5=OutH=(16 17 18 ... 29 30 31)
 
-	movdqa	XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5
 
-	sub	eax, byte SIZEOF_XMMWORD
-	add	esi, byte 1*SIZEOF_XMMWORD	; inptr
-	add	edi, byte 2*SIZEOF_XMMWORD	; outptr
-	cmp	eax, byte SIZEOF_XMMWORD
-	ja	near .columnloop
-	test	eax,eax
-	jnz	near .columnloop_last
+        sub     eax, byte SIZEOF_XMMWORD
+        add     esi, byte 1*SIZEOF_XMMWORD      ; inptr
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
 
-	pop	esi
-	pop	edi
-	pop	eax
+        pop     esi
+        pop     edi
+        pop     eax
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_data
-	add	edi, byte SIZEOF_JSAMPROW	; output_data
-	dec	ecx				; rowctr
-	jg	near .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      near .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	poppic	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -203,322 +203,322 @@
 ;                                 JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define downsamp_width(b)	(b)+12	; JDIMENSION downsampled_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		4
-%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          4
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
 
-	align	16
-	global	EXTN(jsimd_h2v2_fancy_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v2_fancy_upsample_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	eax		; make a room for GOT address
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx			; get GOT address
-	movpic	POINTER [gotptr], ebx	; save GOT address
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
 
-	mov	edx,eax				; edx = original ebp
-	mov	eax, JDIMENSION [downsamp_width(edx)]  ; colctr
-	test	eax,eax
-	jz	near .return
+        mov     edx,eax                         ; edx = original ebp
+        mov     eax, JDIMENSION [downsamp_width(edx)]  ; colctr
+        test    eax,eax
+        jz      near .return
 
-	mov	ecx, INT [max_v_samp(edx)]	; rowctr
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, INT [max_v_samp(edx)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
 
-	mov	esi, JSAMPARRAY [input_data(edx)]	; input_data
-	mov	edi, POINTER [output_data_ptr(edx)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(edx)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(edx)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	eax					; colctr
-	push	ecx
-	push	edi
-	push	esi
+        push    eax                                     ; colctr
+        push    ecx
+        push    edi
+        push    esi
 
-	mov	ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]	; inptr1(above)
-	mov	ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]	; inptr0
-	mov	esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]	; inptr1(below)
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]	; outptr0
-	mov	edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]	; outptr1
+        mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
 
-	test	eax, SIZEOF_XMMWORD-1
-	jz	short .skip
-	push	edx
-	mov	dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
-	mov	dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
-	mov	dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
-	mov	JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl	; insert a dummy sample
-	pop	edx
+        test    eax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        push    edx
+        mov     dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+        pop     edx
 .skip:
-	; -- process the first column block
+        ; -- process the first column block
 
-	movdqa	xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD]	; xmm0=row[ 0][0]
-	movdqa	xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD]	; xmm1=row[-1][0]
-	movdqa	xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD]	; xmm2=row[+1][0]
+        movdqa  xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD]    ; xmm0=row[ 0][0]
+        movdqa  xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD]    ; xmm1=row[-1][0]
+        movdqa  xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD]    ; xmm2=row[+1][0]
 
-	pushpic	ebx
-	movpic	ebx, POINTER [gotptr]	; load GOT address
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
 
-	pxor      xmm3,xmm3		; xmm3=(all 0's)
-	movdqa    xmm4,xmm0
-	punpcklbw xmm0,xmm3		; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm4,xmm3		; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm5,xmm1
-	punpcklbw xmm1,xmm3		; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm5,xmm3		; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm6,xmm2
-	punpcklbw xmm2,xmm3		; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm6,xmm3		; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
 
-	pmullw	xmm0,[GOTOFF(ebx,PW_THREE)]
-	pmullw	xmm4,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
 
-	pcmpeqb	xmm7,xmm7
-	psrldq	xmm7,(SIZEOF_XMMWORD-2)
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-2)
 
-	paddw	xmm1,xmm0		; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm5,xmm4		; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
-	paddw	xmm2,xmm0		; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm6,xmm4		; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
 
-	movdqa	XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1	; temporarily save
-	movdqa	XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5	; the intermediate data
-	movdqa	XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6
+        movdqa  XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6
 
-	pand	xmm1,xmm7		; xmm1=( 0 -- -- -- -- -- -- --)
-	pand	xmm2,xmm7		; xmm2=( 0 -- -- -- -- -- -- --)
+        pand    xmm1,xmm7               ; xmm1=( 0 -- -- -- -- -- -- --)
+        pand    xmm2,xmm7               ; xmm2=( 0 -- -- -- -- -- -- --)
 
-	movdqa	XMMWORD [wk(0)], xmm1
-	movdqa	XMMWORD [wk(1)], xmm2
+        movdqa  XMMWORD [wk(0)], xmm1
+        movdqa  XMMWORD [wk(1)], xmm2
 
-	poppic	ebx
+        poppic  ebx
 
-	add	eax, byte SIZEOF_XMMWORD-1
-	and	eax, byte -SIZEOF_XMMWORD
-	cmp	eax, byte SIZEOF_XMMWORD
-	ja	short .columnloop
-	alignx	16,7
+        add     eax, byte SIZEOF_XMMWORD-1
+        and     eax, byte -SIZEOF_XMMWORD
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
+        alignx  16,7
 
 .columnloop_last:
-	; -- process the last column block
+        ; -- process the last column block
 
-	pushpic	ebx
-	movpic	ebx, POINTER [gotptr]	; load GOT address
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
 
-	pcmpeqb	xmm1,xmm1
-	pslldq	xmm1,(SIZEOF_XMMWORD-2)
-	movdqa	xmm2,xmm1
+        pcmpeqb xmm1,xmm1
+        pslldq  xmm1,(SIZEOF_XMMWORD-2)
+        movdqa  xmm2,xmm1
 
-	pand	xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD]
-	pand	xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD]
+        pand    xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD]
+        pand    xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD]
 
-	movdqa	XMMWORD [wk(2)], xmm1	; xmm1=(-- -- -- -- -- -- -- 15)
-	movdqa	XMMWORD [wk(3)], xmm2	; xmm2=(-- -- -- -- -- -- -- 15)
+        movdqa  XMMWORD [wk(2)], xmm1   ; xmm1=(-- -- -- -- -- -- -- 15)
+        movdqa  XMMWORD [wk(3)], xmm2   ; xmm2=(-- -- -- -- -- -- -- 15)
 
-	jmp	near .upsample
-	alignx	16,7
+        jmp     near .upsample
+        alignx  16,7
 
 .columnloop:
-	; -- process the next column block
+        ; -- process the next column block
 
-	movdqa	xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD]	; xmm0=row[ 0][1]
-	movdqa	xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD]	; xmm1=row[-1][1]
-	movdqa	xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]	; xmm2=row[+1][1]
+        movdqa  xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD]    ; xmm0=row[ 0][1]
+        movdqa  xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD]    ; xmm1=row[-1][1]
+        movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]    ; xmm2=row[+1][1]
 
-	pushpic	ebx
-	movpic	ebx, POINTER [gotptr]	; load GOT address
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
 
-	pxor      xmm3,xmm3		; xmm3=(all 0's)
-	movdqa    xmm4,xmm0
-	punpcklbw xmm0,xmm3		; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm4,xmm3		; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm5,xmm1
-	punpcklbw xmm1,xmm3		; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm5,xmm3		; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
-	movdqa    xmm6,xmm2
-	punpcklbw xmm2,xmm3		; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
-	punpckhbw xmm6,xmm3		; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
 
-	pmullw	xmm0,[GOTOFF(ebx,PW_THREE)]
-	pmullw	xmm4,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
 
-	paddw	xmm1,xmm0		; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm5,xmm4		; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
-	paddw	xmm2,xmm0		; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
-	paddw	xmm6,xmm4		; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
 
-	movdqa	XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1	; temporarily save
-	movdqa	XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5	; the intermediate data
-	movdqa	XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6
+        movdqa  XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6
 
-	pslldq	xmm1,(SIZEOF_XMMWORD-2)	; xmm1=(-- -- -- -- -- -- --  0)
-	pslldq	xmm2,(SIZEOF_XMMWORD-2)	; xmm2=(-- -- -- -- -- -- --  0)
+        pslldq  xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- --  0)
+        pslldq  xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- --  0)
 
-	movdqa	XMMWORD [wk(2)], xmm1
-	movdqa	XMMWORD [wk(3)], xmm2
+        movdqa  XMMWORD [wk(2)], xmm1
+        movdqa  XMMWORD [wk(3)], xmm2
 
 .upsample:
-	; -- process the upper row
+        ; -- process the upper row
 
-	movdqa	xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD]
-	movdqa	xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD]
+        movdqa  xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD]
 
-	movdqa	xmm0,xmm7		; xmm7=Int0L=( 0  1  2  3  4  5  6  7)
-	movdqa	xmm4,xmm3		; xmm3=Int0H=( 8  9 10 11 12 13 14 15)
-	psrldq	xmm0,2			; xmm0=( 1  2  3  4  5  6  7 --)
-	pslldq	xmm4,(SIZEOF_XMMWORD-2)	; xmm4=(-- -- -- -- -- -- --  8)
-	movdqa	xmm5,xmm7
-	movdqa	xmm6,xmm3
-	psrldq	xmm5,(SIZEOF_XMMWORD-2)	; xmm5=( 7 -- -- -- -- -- -- --)
-	pslldq	xmm6,2			; xmm6=(--  8  9 10 11 12 13 14)
+        movdqa  xmm0,xmm7               ; xmm7=Int0L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm4,xmm3               ; xmm3=Int0H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm0,2                  ; xmm0=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- --  8)
+        movdqa  xmm5,xmm7
+        movdqa  xmm6,xmm3
+        psrldq  xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm6,2                  ; xmm6=(--  8  9 10 11 12 13 14)
 
-	por	xmm0,xmm4		; xmm0=( 1  2  3  4  5  6  7  8)
-	por	xmm5,xmm6		; xmm5=( 7  8  9 10 11 12 13 14)
+        por     xmm0,xmm4               ; xmm0=( 1  2  3  4  5  6  7  8)
+        por     xmm5,xmm6               ; xmm5=( 7  8  9 10 11 12 13 14)
 
-	movdqa	xmm1,xmm7
-	movdqa	xmm2,xmm3
-	pslldq	xmm1,2			; xmm1=(--  0  1  2  3  4  5  6)
-	psrldq	xmm2,2			; xmm2=( 9 10 11 12 13 14 15 --)
-	movdqa	xmm4,xmm3
-	psrldq	xmm4,(SIZEOF_XMMWORD-2)	; xmm4=(15 -- -- -- -- -- -- --)
+        movdqa  xmm1,xmm7
+        movdqa  xmm2,xmm3
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm2,2                  ; xmm2=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm4,xmm3
+        psrldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --)
 
-	por	xmm1, XMMWORD [wk(0)]	; xmm1=(-1  0  1  2  3  4  5  6)
-	por	xmm2, XMMWORD [wk(2)]	; xmm2=( 9 10 11 12 13 14 15 16)
+        por     xmm1, XMMWORD [wk(0)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm2, XMMWORD [wk(2)]   ; xmm2=( 9 10 11 12 13 14 15 16)
 
-	movdqa	XMMWORD [wk(0)], xmm4
+        movdqa  XMMWORD [wk(0)], xmm4
 
-	pmullw	xmm7,[GOTOFF(ebx,PW_THREE)]
-	pmullw	xmm3,[GOTOFF(ebx,PW_THREE)]
-	paddw	xmm1,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	xmm5,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	xmm0,[GOTOFF(ebx,PW_SEVEN)]
-	paddw	xmm2,[GOTOFF(ebx,PW_SEVEN)]
+        pmullw  xmm7,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm3,[GOTOFF(ebx,PW_THREE)]
+        paddw   xmm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm5,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm0,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   xmm2,[GOTOFF(ebx,PW_SEVEN)]
 
-	paddw	xmm1,xmm7
-	paddw	xmm5,xmm3
-	psrlw	xmm1,4			; xmm1=Out0LE=( 0  2  4  6  8 10 12 14)
-	psrlw	xmm5,4			; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
-	paddw	xmm0,xmm7
-	paddw	xmm2,xmm3
-	psrlw	xmm0,4			; xmm0=Out0LO=( 1  3  5  7  9 11 13 15)
-	psrlw	xmm2,4			; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
+        paddw   xmm1,xmm7
+        paddw   xmm5,xmm3
+        psrlw   xmm1,4                  ; xmm1=Out0LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,4                  ; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm3
+        psrlw   xmm0,4                  ; xmm0=Out0LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm2,4                  ; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
 
-	psllw	xmm0,BYTE_BIT
-	psllw	xmm2,BYTE_BIT
-	por	xmm1,xmm0		; xmm1=Out0L=( 0  1  2 ... 13 14 15)
-	por	xmm5,xmm2		; xmm5=Out0H=(16 17 18 ... 29 30 31)
+        psllw   xmm0,BYTE_BIT
+        psllw   xmm2,BYTE_BIT
+        por     xmm1,xmm0               ; xmm1=Out0L=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm2               ; xmm5=Out0H=(16 17 18 ... 29 30 31)
 
-	movdqa	XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1
-	movdqa	XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5
+        movdqa  XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5
 
-	; -- process the lower row
+        ; -- process the lower row
 
-	movdqa	xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD]
-	movdqa	xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD]
+        movdqa  xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD]
+        movdqa  xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD]
 
-	movdqa	xmm7,xmm6		; xmm6=Int1L=( 0  1  2  3  4  5  6  7)
-	movdqa	xmm3,xmm4		; xmm4=Int1H=( 8  9 10 11 12 13 14 15)
-	psrldq	xmm7,2			; xmm7=( 1  2  3  4  5  6  7 --)
-	pslldq	xmm3,(SIZEOF_XMMWORD-2)	; xmm3=(-- -- -- -- -- -- --  8)
-	movdqa	xmm0,xmm6
-	movdqa	xmm2,xmm4
-	psrldq	xmm0,(SIZEOF_XMMWORD-2)	; xmm0=( 7 -- -- -- -- -- -- --)
-	pslldq	xmm2,2			; xmm2=(--  8  9 10 11 12 13 14)
+        movdqa  xmm7,xmm6               ; xmm6=Int1L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm3,xmm4               ; xmm4=Int1H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm7,2                  ; xmm7=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- --  8)
+        movdqa  xmm0,xmm6
+        movdqa  xmm2,xmm4
+        psrldq  xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm2,2                  ; xmm2=(--  8  9 10 11 12 13 14)
 
-	por	xmm7,xmm3		; xmm7=( 1  2  3  4  5  6  7  8)
-	por	xmm0,xmm2		; xmm0=( 7  8  9 10 11 12 13 14)
+        por     xmm7,xmm3               ; xmm7=( 1  2  3  4  5  6  7  8)
+        por     xmm0,xmm2               ; xmm0=( 7  8  9 10 11 12 13 14)
 
-	movdqa	xmm1,xmm6
-	movdqa	xmm5,xmm4
-	pslldq	xmm1,2			; xmm1=(--  0  1  2  3  4  5  6)
-	psrldq	xmm5,2			; xmm5=( 9 10 11 12 13 14 15 --)
-	movdqa	xmm3,xmm4
-	psrldq	xmm3,(SIZEOF_XMMWORD-2)	; xmm3=(15 -- -- -- -- -- -- --)
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm4
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm5,2                  ; xmm5=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm3,xmm4
+        psrldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --)
 
-	por	xmm1, XMMWORD [wk(1)]	; xmm1=(-1  0  1  2  3  4  5  6)
-	por	xmm5, XMMWORD [wk(3)]	; xmm5=( 9 10 11 12 13 14 15 16)
+        por     xmm1, XMMWORD [wk(1)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm5, XMMWORD [wk(3)]   ; xmm5=( 9 10 11 12 13 14 15 16)
 
-	movdqa	XMMWORD [wk(1)], xmm3
+        movdqa  XMMWORD [wk(1)], xmm3
 
-	pmullw	xmm6,[GOTOFF(ebx,PW_THREE)]
-	pmullw	xmm4,[GOTOFF(ebx,PW_THREE)]
-	paddw	xmm1,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	xmm0,[GOTOFF(ebx,PW_EIGHT)]
-	paddw	xmm7,[GOTOFF(ebx,PW_SEVEN)]
-	paddw	xmm5,[GOTOFF(ebx,PW_SEVEN)]
+        pmullw  xmm6,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   xmm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm0,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm7,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   xmm5,[GOTOFF(ebx,PW_SEVEN)]
 
-	paddw	xmm1,xmm6
-	paddw	xmm0,xmm4
-	psrlw	xmm1,4			; xmm1=Out1LE=( 0  2  4  6  8 10 12 14)
-	psrlw	xmm0,4			; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
-	paddw	xmm7,xmm6
-	paddw	xmm5,xmm4
-	psrlw	xmm7,4			; xmm7=Out1LO=( 1  3  5  7  9 11 13 15)
-	psrlw	xmm5,4			; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
+        paddw   xmm1,xmm6
+        paddw   xmm0,xmm4
+        psrlw   xmm1,4                  ; xmm1=Out1LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm0,4                  ; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm7,xmm6
+        paddw   xmm5,xmm4
+        psrlw   xmm7,4                  ; xmm7=Out1LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm5,4                  ; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
 
-	psllw	xmm7,BYTE_BIT
-	psllw	xmm5,BYTE_BIT
-	por	xmm1,xmm7		; xmm1=Out1L=( 0  1  2 ... 13 14 15)
-	por	xmm0,xmm5		; xmm0=Out1H=(16 17 18 ... 29 30 31)
+        psllw   xmm7,BYTE_BIT
+        psllw   xmm5,BYTE_BIT
+        por     xmm1,xmm7               ; xmm1=Out1L=( 0  1  2 ... 13 14 15)
+        por     xmm0,xmm5               ; xmm0=Out1H=(16 17 18 ... 29 30 31)
 
-	movdqa	XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
-	movdqa	XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
 
-	poppic	ebx
+        poppic  ebx
 
-	sub	eax, byte SIZEOF_XMMWORD
-	add	ecx, byte 1*SIZEOF_XMMWORD	; inptr1(above)
-	add	ebx, byte 1*SIZEOF_XMMWORD	; inptr0
-	add	esi, byte 1*SIZEOF_XMMWORD	; inptr1(below)
-	add	edx, byte 2*SIZEOF_XMMWORD	; outptr0
-	add	edi, byte 2*SIZEOF_XMMWORD	; outptr1
-	cmp	eax, byte SIZEOF_XMMWORD
-	ja	near .columnloop
-	test	eax,eax
-	jnz	near .columnloop_last
+        sub     eax, byte SIZEOF_XMMWORD
+        add     ecx, byte 1*SIZEOF_XMMWORD      ; inptr1(above)
+        add     ebx, byte 1*SIZEOF_XMMWORD      ; inptr0
+        add     esi, byte 1*SIZEOF_XMMWORD      ; inptr1(below)
+        add     edx, byte 2*SIZEOF_XMMWORD      ; outptr0
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr1
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
 
-	pop	esi
-	pop	edi
-	pop	ecx
-	pop	eax
+        pop     esi
+        pop     edi
+        pop     ecx
+        pop     eax
 
-	add	esi, byte 1*SIZEOF_JSAMPROW	; input_data
-	add	edi, byte 2*SIZEOF_JSAMPROW	; output_data
-	sub	ecx, byte 2			; rowctr
-	jg	near .rowloop
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      near .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -532,92 +532,92 @@
 ;                           JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define output_width(b)	(b)+12		; JDIMENSION output_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v1_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v1_upsample_sse2)
 
 EXTN(jsimd_h2v1_upsample_sse2):
-	push	ebp
-	mov	ebp,esp
-;	push	ebx		; unused
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	edx, JDIMENSION [output_width(ebp)]
-	add	edx, byte (2*SIZEOF_XMMWORD)-1
-	and	edx, byte -(2*SIZEOF_XMMWORD)
-	jz	short .return
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_XMMWORD)-1
+        and     edx, byte -(2*SIZEOF_XMMWORD)
+        jz      short .return
 
-	mov	ecx, INT [max_v_samp(ebp)]	; rowctr
-	test	ecx,ecx
-	jz	short .return
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      short .return
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, POINTER [output_data_ptr(ebp)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	edi
-	push	esi
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]		; inptr
-	mov	edi, JSAMPROW [edi]		; outptr
-	mov	eax,edx				; colctr
-	alignx	16,7
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
+        mov     eax,edx                         ; colctr
+        alignx  16,7
 .columnloop:
 
-	movdqa	xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
 
-	movdqa    xmm1,xmm0
-	punpcklbw xmm0,xmm0
-	punpckhbw xmm1,xmm1
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
 
-	movdqa	XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
-	movdqa	XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
 
-	sub	eax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	movdqa	xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
 
-	movdqa    xmm3,xmm2
-	punpcklbw xmm2,xmm2
-	punpckhbw xmm3,xmm3
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
 
-	movdqa	XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
 
-	sub	eax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	add	esi, byte 2*SIZEOF_XMMWORD	; inptr
-	add	edi, byte 4*SIZEOF_XMMWORD	; outptr
-	jmp	short .columnloop
-	alignx	16,7
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     edi, byte 4*SIZEOF_XMMWORD      ; outptr
+        jmp     short .columnloop
+        alignx  16,7
 
 .nextrow:
-	pop	esi
-	pop	edi
+        pop     esi
+        pop     edi
 
-	add	esi, byte SIZEOF_JSAMPROW	; input_data
-	add	edi, byte SIZEOF_JSAMPROW	; output_data
-	dec	ecx				; rowctr
-	jg	short .rowloop
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      short .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-;	pop	ebx		; unused
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
 
 ; --------------------------------------------------------------------------
 ;
@@ -631,99 +631,99 @@
 ;                           JSAMPARRAY * output_data_ptr);
 ;
 
-%define max_v_samp(b)		(b)+8			; int max_v_samp_factor
-%define output_width(b)	(b)+12		; JDIMENSION output_width
-%define input_data(b)		(b)+16		; JSAMPARRAY input_data
-%define output_data_ptr(b)	(b)+20		; JSAMPARRAY * output_data_ptr
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
 
-	align	16
-	global	EXTN(jsimd_h2v2_upsample_sse2)
+        align   16
+        global  EXTN(jsimd_h2v2_upsample_sse2)
 
 EXTN(jsimd_h2v2_upsample_sse2):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	mov	edx, JDIMENSION [output_width(ebp)]
-	add	edx, byte (2*SIZEOF_XMMWORD)-1
-	and	edx, byte -(2*SIZEOF_XMMWORD)
-	jz	near .return
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_XMMWORD)-1
+        and     edx, byte -(2*SIZEOF_XMMWORD)
+        jz      near .return
 
-	mov	ecx, INT [max_v_samp(ebp)]	; rowctr
-	test	ecx,ecx
-	jz	near .return
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
 
-	mov	esi, JSAMPARRAY [input_data(ebp)]	; input_data
-	mov	edi, POINTER [output_data_ptr(ebp)]
-	mov	edi, JSAMPARRAY [edi]			; output_data
-	alignx	16,7
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
 .rowloop:
-	push	edi
-	push	esi
+        push    edi
+        push    esi
 
-	mov	esi, JSAMPROW [esi]			; inptr
-	mov	ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]	; outptr0
-	mov	edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]	; outptr1
-	mov	eax,edx					; colctr
-	alignx	16,7
+        mov     esi, JSAMPROW [esi]                     ; inptr
+        mov     ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+        mov     eax,edx                                 ; colctr
+        alignx  16,7
 .columnloop:
 
-	movdqa	xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
 
-	movdqa    xmm1,xmm0
-	punpcklbw xmm0,xmm0
-	punpckhbw xmm1,xmm1
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
 
-	movdqa	XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0
-	movdqa	XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1
-	movdqa	XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
-	movdqa	XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
 
-	sub	eax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	movdqa	xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
 
-	movdqa    xmm3,xmm2
-	punpcklbw xmm2,xmm2
-	punpckhbw xmm3,xmm3
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
 
-	movdqa	XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3
-	movdqa	XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
-	movdqa	XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
 
-	sub	eax, byte 2*SIZEOF_XMMWORD
-	jz	short .nextrow
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
 
-	add	esi, byte 2*SIZEOF_XMMWORD	; inptr
-	add	ebx, byte 4*SIZEOF_XMMWORD	; outptr0
-	add	edi, byte 4*SIZEOF_XMMWORD	; outptr1
-	jmp	short .columnloop
-	alignx	16,7
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     ebx, byte 4*SIZEOF_XMMWORD      ; outptr0
+        add     edi, byte 4*SIZEOF_XMMWORD      ; outptr1
+        jmp     short .columnloop
+        alignx  16,7
 
 .nextrow:
-	pop	esi
-	pop	edi
+        pop     esi
+        pop     edi
 
-	add	esi, byte 1*SIZEOF_JSAMPROW	; input_data
-	add	edi, byte 2*SIZEOF_JSAMPROW	; output_data
-	sub	ecx, byte 2			; rowctr
-	jg	short .rowloop
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      short .rowloop
 
 .return:
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jf3dnflt.asm b/simd/jf3dnflt.asm
index 542672d..06712e8 100644
--- a/simd/jf3dnflt.asm
+++ b/simd/jf3dnflt.asm
@@ -24,23 +24,23 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fdct_float_3dnow)
+        alignz  16
+        global  EXTN(jconst_fdct_float_3dnow)
 
 EXTN(jconst_fdct_float_3dnow):
 
-PD_0_382	times 2 dd  0.382683432365089771728460
-PD_0_707	times 2 dd  0.707106781186547524400844
-PD_0_541	times 2 dd  0.541196100146196984399723
-PD_1_306	times 2 dd  1.306562964876376527856643
+PD_0_382        times 2 dd  0.382683432365089771728460
+PD_0_707        times 2 dd  0.707106781186547524400844
+PD_0_541        times 2 dd  0.541196100146196984399723
+PD_1_306        times 2 dd  1.306562964876376527856643
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -48,273 +48,273 @@
 ; jsimd_fdct_float_3dnow (FAST_FLOAT * data)
 ;
 
-%define data(b)		(b)+8		; FAST_FLOAT * data
+%define data(b)         (b)+8           ; FAST_FLOAT * data
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_fdct_float_3dnow)
+        align   16
+        global  EXTN(jsimd_fdct_float_3dnow)
 
 EXTN(jsimd_fdct_float_3dnow):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-;	push	esi		; unused
-;	push	edi		; unused
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	edx, POINTER [data(eax)]	; (FAST_FLOAT *)
-	mov	ecx, DCTSIZE/2
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
 .rowloop:
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
 
-	; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17)
+        ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17)
 
-	movq      mm4,mm0		; transpose coefficients
-	punpckldq mm0,mm1		; mm0=(00 10)=data0
-	punpckhdq mm4,mm1		; mm4=(01 11)=data1
-	movq      mm5,mm2		; transpose coefficients
-	punpckldq mm2,mm3		; mm2=(06 16)=data6
-	punpckhdq mm5,mm3		; mm5=(07 17)=data7
+        movq      mm4,mm0               ; transpose coefficients
+        punpckldq mm0,mm1               ; mm0=(00 10)=data0
+        punpckhdq mm4,mm1               ; mm4=(01 11)=data1
+        movq      mm5,mm2               ; transpose coefficients
+        punpckldq mm2,mm3               ; mm2=(06 16)=data6
+        punpckhdq mm5,mm3               ; mm5=(07 17)=data7
 
-	movq	mm6,mm4
-	movq	mm7,mm0
-	pfsub	mm4,mm2			; mm4=data1-data6=tmp6
-	pfsub	mm0,mm5			; mm0=data0-data7=tmp7
-	pfadd	mm6,mm2			; mm6=data1+data6=tmp1
-	pfadd	mm7,mm5			; mm7=data0+data7=tmp0
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm2                 ; mm4=data1-data6=tmp6
+        pfsub   mm0,mm5                 ; mm0=data0-data7=tmp7
+        pfadd   mm6,mm2                 ; mm6=data1+data6=tmp1
+        pfadd   mm7,mm5                 ; mm7=data0+data7=tmp0
 
-	movq	mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
 
-	; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15)
+        ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15)
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=tmp6
-	movq	MMWORD [wk(1)], mm0	; wk(1)=tmp7
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm0     ; wk(1)=tmp7
 
-	movq      mm4,mm1		; transpose coefficients
-	punpckldq mm1,mm3		; mm1=(02 12)=data2
-	punpckhdq mm4,mm3		; mm4=(03 13)=data3
-	movq      mm0,mm2		; transpose coefficients
-	punpckldq mm2,mm5		; mm2=(04 14)=data4
-	punpckhdq mm0,mm5		; mm0=(05 15)=data5
+        movq      mm4,mm1               ; transpose coefficients
+        punpckldq mm1,mm3               ; mm1=(02 12)=data2
+        punpckhdq mm4,mm3               ; mm4=(03 13)=data3
+        movq      mm0,mm2               ; transpose coefficients
+        punpckldq mm2,mm5               ; mm2=(04 14)=data4
+        punpckhdq mm0,mm5               ; mm0=(05 15)=data5
 
-	movq	mm3,mm4
-	movq	mm5,mm1
-	pfadd	mm4,mm2			; mm4=data3+data4=tmp3
-	pfadd	mm1,mm0			; mm1=data2+data5=tmp2
-	pfsub	mm3,mm2			; mm3=data3-data4=tmp4
-	pfsub	mm5,mm0			; mm5=data2-data5=tmp5
+        movq    mm3,mm4
+        movq    mm5,mm1
+        pfadd   mm4,mm2                 ; mm4=data3+data4=tmp3
+        pfadd   mm1,mm0                 ; mm1=data2+data5=tmp2
+        pfsub   mm3,mm2                 ; mm3=data3-data4=tmp4
+        pfsub   mm5,mm0                 ; mm5=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm2,mm7
-	movq	mm0,mm6
-	pfsub	mm7,mm4			; mm7=tmp13
-	pfsub	mm6,mm1			; mm6=tmp12
-	pfadd	mm2,mm4			; mm2=tmp10
-	pfadd	mm0,mm1			; mm0=tmp11
+        movq    mm2,mm7
+        movq    mm0,mm6
+        pfsub   mm7,mm4                 ; mm7=tmp13
+        pfsub   mm6,mm1                 ; mm6=tmp12
+        pfadd   mm2,mm4                 ; mm2=tmp10
+        pfadd   mm0,mm1                 ; mm0=tmp11
 
-	pfadd	mm6,mm7
-	pfmul	mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
+        pfadd   mm6,mm7
+        pfmul   mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
 
-	movq	mm4,mm2
-	movq	mm1,mm7
-	pfsub	mm2,mm0			; mm2=data4
-	pfsub	mm7,mm6			; mm7=data6
-	pfadd	mm4,mm0			; mm4=data0
-	pfadd	mm1,mm6			; mm1=data2
+        movq    mm4,mm2
+        movq    mm1,mm7
+        pfsub   mm2,mm0                 ; mm2=data4
+        pfsub   mm7,mm6                 ; mm7=data6
+        pfadd   mm4,mm0                 ; mm4=data0
+        pfadd   mm1,mm6                 ; mm1=data2
 
-	movq	MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2
-	movq	MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7
-	movq	MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
-	movq	MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2
+        movq    MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm0, MMWORD [wk(0)]	; mm0=tmp6
-	movq	mm6, MMWORD [wk(1)]	; mm6=tmp7
+        movq    mm0, MMWORD [wk(0)]     ; mm0=tmp6
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp7
 
-	pfadd	mm3,mm5			; mm3=tmp10
-	pfadd	mm5,mm0			; mm5=tmp11
-	pfadd	mm0,mm6			; mm0=tmp12, mm6=tmp7
+        pfadd   mm3,mm5                 ; mm3=tmp10
+        pfadd   mm5,mm0                 ; mm5=tmp11
+        pfadd   mm0,mm6                 ; mm0=tmp12, mm6=tmp7
 
-	pfmul	mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
+        pfmul   mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
 
-	movq	mm2,mm3			; mm2=tmp10
-	pfsub	mm3,mm0
-	pfmul	mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
-	pfmul	mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
-	pfmul	mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
-	pfadd	mm2,mm3			; mm2=z2
-	pfadd	mm0,mm3			; mm0=z4
+        movq    mm2,mm3                 ; mm2=tmp10
+        pfsub   mm3,mm0
+        pfmul   mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
+        pfmul   mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
+        pfmul   mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
+        pfadd   mm2,mm3                 ; mm2=z2
+        pfadd   mm0,mm3                 ; mm0=z4
 
-	movq	mm7,mm6
-	pfsub	mm6,mm5			; mm6=z13
-	pfadd	mm7,mm5			; mm7=z11
+        movq    mm7,mm6
+        pfsub   mm6,mm5                 ; mm6=z13
+        pfadd   mm7,mm5                 ; mm7=z11
 
-	movq	mm4,mm6
-	movq	mm1,mm7
-	pfsub	mm6,mm2			; mm6=data3
-	pfsub	mm7,mm0			; mm7=data7
-	pfadd	mm4,mm2			; mm4=data5
-	pfadd	mm1,mm0			; mm1=data1
+        movq    mm4,mm6
+        movq    mm1,mm7
+        pfsub   mm6,mm2                 ; mm6=data3
+        pfsub   mm7,mm0                 ; mm7=data7
+        pfadd   mm4,mm2                 ; mm4=data5
+        pfadd   mm1,mm0                 ; mm1=data1
 
-	movq	MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6
-	movq	MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7
-	movq	MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4
-	movq	MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
 
-	add	edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
-	dec	ecx
-	jnz	near .rowloop
+        add     edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .rowloop
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-	mov	edx, POINTER [data(eax)]	; (FAST_FLOAT *)
-	mov	ecx, DCTSIZE/2
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
 .columnloop:
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
 
-	; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71)
+        ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71)
 
-	movq      mm4,mm0		; transpose coefficients
-	punpckldq mm0,mm1		; mm0=(00 01)=data0
-	punpckhdq mm4,mm1		; mm4=(10 11)=data1
-	movq      mm5,mm2		; transpose coefficients
-	punpckldq mm2,mm3		; mm2=(60 61)=data6
-	punpckhdq mm5,mm3		; mm5=(70 71)=data7
+        movq      mm4,mm0               ; transpose coefficients
+        punpckldq mm0,mm1               ; mm0=(00 01)=data0
+        punpckhdq mm4,mm1               ; mm4=(10 11)=data1
+        movq      mm5,mm2               ; transpose coefficients
+        punpckldq mm2,mm3               ; mm2=(60 61)=data6
+        punpckhdq mm5,mm3               ; mm5=(70 71)=data7
 
-	movq	mm6,mm4
-	movq	mm7,mm0
-	pfsub	mm4,mm2			; mm4=data1-data6=tmp6
-	pfsub	mm0,mm5			; mm0=data0-data7=tmp7
-	pfadd	mm6,mm2			; mm6=data1+data6=tmp1
-	pfadd	mm7,mm5			; mm7=data0+data7=tmp0
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm2                 ; mm4=data1-data6=tmp6
+        pfsub   mm0,mm5                 ; mm0=data0-data7=tmp7
+        pfadd   mm6,mm2                 ; mm6=data1+data6=tmp1
+        pfadd   mm7,mm5                 ; mm7=data0+data7=tmp0
 
-	movq	mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
-	movq	mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
 
-	; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51)
+        ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51)
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=tmp6
-	movq	MMWORD [wk(1)], mm0	; wk(1)=tmp7
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm0     ; wk(1)=tmp7
 
-	movq      mm4,mm1		; transpose coefficients
-	punpckldq mm1,mm3		; mm1=(20 21)=data2
-	punpckhdq mm4,mm3		; mm4=(30 31)=data3
-	movq      mm0,mm2		; transpose coefficients
-	punpckldq mm2,mm5		; mm2=(40 41)=data4
-	punpckhdq mm0,mm5		; mm0=(50 51)=data5
+        movq      mm4,mm1               ; transpose coefficients
+        punpckldq mm1,mm3               ; mm1=(20 21)=data2
+        punpckhdq mm4,mm3               ; mm4=(30 31)=data3
+        movq      mm0,mm2               ; transpose coefficients
+        punpckldq mm2,mm5               ; mm2=(40 41)=data4
+        punpckhdq mm0,mm5               ; mm0=(50 51)=data5
 
-	movq	mm3,mm4
-	movq	mm5,mm1
-	pfadd	mm4,mm2			; mm4=data3+data4=tmp3
-	pfadd	mm1,mm0			; mm1=data2+data5=tmp2
-	pfsub	mm3,mm2			; mm3=data3-data4=tmp4
-	pfsub	mm5,mm0			; mm5=data2-data5=tmp5
+        movq    mm3,mm4
+        movq    mm5,mm1
+        pfadd   mm4,mm2                 ; mm4=data3+data4=tmp3
+        pfadd   mm1,mm0                 ; mm1=data2+data5=tmp2
+        pfsub   mm3,mm2                 ; mm3=data3-data4=tmp4
+        pfsub   mm5,mm0                 ; mm5=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm2,mm7
-	movq	mm0,mm6
-	pfsub	mm7,mm4			; mm7=tmp13
-	pfsub	mm6,mm1			; mm6=tmp12
-	pfadd	mm2,mm4			; mm2=tmp10
-	pfadd	mm0,mm1			; mm0=tmp11
+        movq    mm2,mm7
+        movq    mm0,mm6
+        pfsub   mm7,mm4                 ; mm7=tmp13
+        pfsub   mm6,mm1                 ; mm6=tmp12
+        pfadd   mm2,mm4                 ; mm2=tmp10
+        pfadd   mm0,mm1                 ; mm0=tmp11
 
-	pfadd	mm6,mm7
-	pfmul	mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
+        pfadd   mm6,mm7
+        pfmul   mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
 
-	movq	mm4,mm2
-	movq	mm1,mm7
-	pfsub	mm2,mm0			; mm2=data4
-	pfsub	mm7,mm6			; mm7=data6
-	pfadd	mm4,mm0			; mm4=data0
-	pfadd	mm1,mm6			; mm1=data2
+        movq    mm4,mm2
+        movq    mm1,mm7
+        pfsub   mm2,mm0                 ; mm2=data4
+        pfsub   mm7,mm6                 ; mm7=data6
+        pfadd   mm4,mm0                 ; mm4=data0
+        pfadd   mm1,mm6                 ; mm1=data2
 
-	movq	MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2
-	movq	MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7
-	movq	MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
-	movq	MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2
+        movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm0, MMWORD [wk(0)]	; mm0=tmp6
-	movq	mm6, MMWORD [wk(1)]	; mm6=tmp7
+        movq    mm0, MMWORD [wk(0)]     ; mm0=tmp6
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp7
 
-	pfadd	mm3,mm5			; mm3=tmp10
-	pfadd	mm5,mm0			; mm5=tmp11
-	pfadd	mm0,mm6			; mm0=tmp12, mm6=tmp7
+        pfadd   mm3,mm5                 ; mm3=tmp10
+        pfadd   mm5,mm0                 ; mm5=tmp11
+        pfadd   mm0,mm6                 ; mm0=tmp12, mm6=tmp7
 
-	pfmul	mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
+        pfmul   mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
 
-	movq	mm2,mm3			; mm2=tmp10
-	pfsub	mm3,mm0
-	pfmul	mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
-	pfmul	mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
-	pfmul	mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
-	pfadd	mm2,mm3			; mm2=z2
-	pfadd	mm0,mm3			; mm0=z4
+        movq    mm2,mm3                 ; mm2=tmp10
+        pfsub   mm3,mm0
+        pfmul   mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
+        pfmul   mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
+        pfmul   mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
+        pfadd   mm2,mm3                 ; mm2=z2
+        pfadd   mm0,mm3                 ; mm0=z4
 
-	movq	mm7,mm6
-	pfsub	mm6,mm5			; mm6=z13
-	pfadd	mm7,mm5			; mm7=z11
+        movq    mm7,mm6
+        pfsub   mm6,mm5                 ; mm6=z13
+        pfadd   mm7,mm5                 ; mm7=z11
 
-	movq	mm4,mm6
-	movq	mm1,mm7
-	pfsub	mm6,mm2			; mm6=data3
-	pfsub	mm7,mm0			; mm7=data7
-	pfadd	mm4,mm2			; mm4=data5
-	pfadd	mm1,mm0			; mm1=data1
+        movq    mm4,mm6
+        movq    mm1,mm7
+        pfsub   mm6,mm2                 ; mm6=data3
+        pfsub   mm7,mm0                 ; mm7=data7
+        pfadd   mm4,mm2                 ; mm4=data5
+        pfadd   mm1,mm0                 ; mm1=data1
 
-	movq	MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6
-	movq	MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7
-	movq	MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4
-	movq	MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
 
-	add	edx, byte 2*SIZEOF_FAST_FLOAT
-	dec	ecx
-	jnz	near .columnloop
+        add     edx, byte 2*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .columnloop
 
-	femms		; empty MMX/3DNow! state
+        femms           ; empty MMX/3DNow! state
 
-;	pop	edi		; unused
-;	pop	esi		; unused
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfmmxfst.asm b/simd/jfmmxfst.asm
index 0647242..d8cd4b9 100644
--- a/simd/jfmmxfst.asm
+++ b/simd/jfmmxfst.asm
@@ -26,24 +26,24 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	8	; 14 is also OK.
+%define CONST_BITS      8       ; 14 is also OK.
 
 %if CONST_BITS == 8
-F_0_382	equ	 98		; FIX(0.382683433)
-F_0_541	equ	139		; FIX(0.541196100)
-F_0_707	equ	181		; FIX(0.707106781)
-F_1_306	equ	334		; FIX(1.306562965)
+F_0_382 equ      98             ; FIX(0.382683433)
+F_0_541 equ     139             ; FIX(0.541196100)
+F_0_707 equ     181             ; FIX(0.707106781)
+F_1_306 equ     334             ; FIX(1.306562965)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_382	equ	DESCALE( 410903207,30-CONST_BITS)	; FIX(0.382683433)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_707	equ	DESCALE( 759250124,30-CONST_BITS)	; FIX(0.707106781)
-F_1_306	equ	DESCALE(1402911301,30-CONST_BITS)	; FIX(1.306562965)
+F_0_382 equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707 equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306 equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
@@ -51,21 +51,21 @@
 %define PRE_MULTIPLY_SCALE_BITS   2
 %define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-	alignz	16
-	global	EXTN(jconst_fdct_ifast_mmx)
+        alignz  16
+        global  EXTN(jconst_fdct_ifast_mmx)
 
 EXTN(jconst_fdct_ifast_mmx):
 
-PW_F0707	times 4 dw  F_0_707 << CONST_SHIFT
-PW_F0382	times 4 dw  F_0_382 << CONST_SHIFT
-PW_F0541	times 4 dw  F_0_541 << CONST_SHIFT
-PW_F1306	times 4 dw  F_1_306 << CONST_SHIFT
+PW_F0707        times 4 dw  F_0_707 << CONST_SHIFT
+PW_F0382        times 4 dw  F_0_382 << CONST_SHIFT
+PW_F0541        times 4 dw  F_0_541 << CONST_SHIFT
+PW_F1306        times 4 dw  F_1_306 << CONST_SHIFT
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -73,325 +73,325 @@
 ; jsimd_fdct_ifast_mmx (DCTELEM * data)
 ;
 
-%define data(b)		(b)+8		; DCTELEM * data
+%define data(b)         (b)+8           ; DCTELEM * data
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_fdct_ifast_mmx)
+        align   16
+        global  EXTN(jsimd_fdct_ifast_mmx)
 
 EXTN(jsimd_fdct_ifast_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-;	push	esi		; unused
-;	push	edi		; unused
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .rowloop:
 
-	movq	mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
-	movq	mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
 
-	; mm0=(20 21 22 23), mm2=(24 25 26 27)
-	; mm1=(30 31 32 33), mm3=(34 35 36 37)
+        ; mm0=(20 21 22 23), mm2=(24 25 26 27)
+        ; mm1=(30 31 32 33), mm3=(34 35 36 37)
 
-	movq      mm4,mm0		; transpose coefficients(phase 1)
-	punpcklwd mm0,mm1		; mm0=(20 30 21 31)
-	punpckhwd mm4,mm1		; mm4=(22 32 23 33)
-	movq      mm5,mm2		; transpose coefficients(phase 1)
-	punpcklwd mm2,mm3		; mm2=(24 34 25 35)
-	punpckhwd mm5,mm3		; mm5=(26 36 27 37)
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(20 30 21 31)
+        punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(24 34 25 35)
+        punpckhwd mm5,mm3               ; mm5=(26 36 27 37)
 
-	movq	mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
-	movq	mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
 
-	; mm6=(00 01 02 03), mm1=(04 05 06 07)
-	; mm7=(10 11 12 13), mm3=(14 15 16 17)
+        ; mm6=(00 01 02 03), mm1=(04 05 06 07)
+        ; mm7=(10 11 12 13), mm3=(14 15 16 17)
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=(22 32 23 33)
-	movq	MMWORD [wk(1)], mm2	; wk(1)=(24 34 25 35)
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 32 23 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(24 34 25 35)
 
-	movq      mm4,mm6		; transpose coefficients(phase 1)
-	punpcklwd mm6,mm7		; mm6=(00 10 01 11)
-	punpckhwd mm4,mm7		; mm4=(02 12 03 13)
-	movq      mm2,mm1		; transpose coefficients(phase 1)
-	punpcklwd mm1,mm3		; mm1=(04 14 05 15)
-	punpckhwd mm2,mm3		; mm2=(06 16 07 17)
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+        punpckhwd mm4,mm7               ; mm4=(02 12 03 13)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(04 14 05 15)
+        punpckhwd mm2,mm3               ; mm2=(06 16 07 17)
 
-	movq      mm7,mm6		; transpose coefficients(phase 2)
-	punpckldq mm6,mm0		; mm6=(00 10 20 30)=data0
-	punpckhdq mm7,mm0		; mm7=(01 11 21 31)=data1
-	movq      mm3,mm2		; transpose coefficients(phase 2)
-	punpckldq mm2,mm5		; mm2=(06 16 26 36)=data6
-	punpckhdq mm3,mm5		; mm3=(07 17 27 37)=data7
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 10 20 30)=data0
+        punpckhdq mm7,mm0               ; mm7=(01 11 21 31)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(06 16 26 36)=data6
+        punpckhdq mm3,mm5               ; mm3=(07 17 27 37)=data7
 
-	movq	mm0,mm7
-	movq	mm5,mm6
-	psubw	mm7,mm2			; mm7=data1-data6=tmp6
-	psubw	mm6,mm3			; mm6=data0-data7=tmp7
-	paddw	mm0,mm2			; mm0=data1+data6=tmp1
-	paddw	mm5,mm3			; mm5=data0+data7=tmp0
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
 
-	movq	mm2, MMWORD [wk(0)]	; mm2=(22 32 23 33)
-	movq	mm3, MMWORD [wk(1)]	; mm3=(24 34 25 35)
-	movq	MMWORD [wk(0)], mm7	; wk(0)=tmp6
-	movq	MMWORD [wk(1)], mm6	; wk(1)=tmp7
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 32 23 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(24 34 25 35)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
 
-	movq      mm7,mm4		; transpose coefficients(phase 2)
-	punpckldq mm4,mm2		; mm4=(02 12 22 32)=data2
-	punpckhdq mm7,mm2		; mm7=(03 13 23 33)=data3
-	movq      mm6,mm1		; transpose coefficients(phase 2)
-	punpckldq mm1,mm3		; mm1=(04 14 24 34)=data4
-	punpckhdq mm6,mm3		; mm6=(05 15 25 35)=data5
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(02 12 22 32)=data2
+        punpckhdq mm7,mm2               ; mm7=(03 13 23 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(04 14 24 34)=data4
+        punpckhdq mm6,mm3               ; mm6=(05 15 25 35)=data5
 
-	movq	mm2,mm7
-	movq	mm3,mm4
-	paddw	mm7,mm1			; mm7=data3+data4=tmp3
-	paddw	mm4,mm6			; mm4=data2+data5=tmp2
-	psubw	mm2,mm1			; mm2=data3-data4=tmp4
-	psubw	mm3,mm6			; mm3=data2-data5=tmp5
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm1,mm5
-	movq	mm6,mm0
-	psubw	mm5,mm7			; mm5=tmp13
-	psubw	mm0,mm4			; mm0=tmp12
-	paddw	mm1,mm7			; mm1=tmp10
-	paddw	mm6,mm4			; mm6=tmp11
+        movq    mm1,mm5
+        movq    mm6,mm0
+        psubw   mm5,mm7                 ; mm5=tmp13
+        psubw   mm0,mm4                 ; mm0=tmp12
+        paddw   mm1,mm7                 ; mm1=tmp10
+        paddw   mm6,mm4                 ; mm6=tmp11
 
-	paddw	mm0,mm5
-	psllw	mm0,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
+        paddw   mm0,mm5
+        psllw   mm0,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
 
-	movq	mm7,mm1
-	movq	mm4,mm5
-	psubw	mm1,mm6			; mm1=data4
-	psubw	mm5,mm0			; mm5=data6
-	paddw	mm7,mm6			; mm7=data0
-	paddw	mm4,mm0			; mm4=data2
+        movq    mm7,mm1
+        movq    mm4,mm5
+        psubw   mm1,mm6                 ; mm1=data4
+        psubw   mm5,mm0                 ; mm5=data6
+        paddw   mm7,mm6                 ; mm7=data0
+        paddw   mm4,mm0                 ; mm4=data2
 
-	movq	MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1
-	movq	MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5
-	movq	MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
-	movq	MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm6, MMWORD [wk(0)]	; mm6=tmp6
-	movq	mm0, MMWORD [wk(1)]	; mm0=tmp7
+        movq    mm6, MMWORD [wk(0)]     ; mm6=tmp6
+        movq    mm0, MMWORD [wk(1)]     ; mm0=tmp7
 
-	paddw	mm2,mm3			; mm2=tmp10
-	paddw	mm3,mm6			; mm3=tmp11
-	paddw	mm6,mm0			; mm6=tmp12, mm0=tmp7
+        paddw   mm2,mm3                 ; mm2=tmp10
+        paddw   mm3,mm6                 ; mm3=tmp11
+        paddw   mm6,mm0                 ; mm6=tmp12, mm0=tmp7
 
-	psllw	mm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	mm6,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm6,PRE_MULTIPLY_SCALE_BITS
 
-	psllw	mm3,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
+        psllw   mm3,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
 
-	movq	mm1,mm2			; mm1=tmp10
-	psubw	mm2,mm6
-	pmulhw	mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
-	pmulhw	mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
-	pmulhw	mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
-	paddw	mm1,mm2			; mm1=z2
-	paddw	mm6,mm2			; mm6=z4
+        movq    mm1,mm2                 ; mm1=tmp10
+        psubw   mm2,mm6
+        pmulhw  mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
+        pmulhw  mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
+        pmulhw  mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
+        paddw   mm1,mm2                 ; mm1=z2
+        paddw   mm6,mm2                 ; mm6=z4
 
-	movq	mm5,mm0
-	psubw	mm0,mm3			; mm0=z13
-	paddw	mm5,mm3			; mm5=z11
+        movq    mm5,mm0
+        psubw   mm0,mm3                 ; mm0=z13
+        paddw   mm5,mm3                 ; mm5=z11
 
-	movq	mm7,mm0
-	movq	mm4,mm5
-	psubw	mm0,mm1			; mm0=data3
-	psubw	mm5,mm6			; mm5=data7
-	paddw	mm7,mm1			; mm7=data5
-	paddw	mm4,mm6			; mm4=data1
+        movq    mm7,mm0
+        movq    mm4,mm5
+        psubw   mm0,mm1                 ; mm0=data3
+        psubw   mm5,mm6                 ; mm5=data7
+        paddw   mm7,mm1                 ; mm7=data5
+        paddw   mm4,mm6                 ; mm4=data1
 
-	movq	MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
-	movq	MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5
-	movq	MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7
-	movq	MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
 
-	add	edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
-	dec	ecx
-	jnz	near .rowloop
+        add     edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .rowloop
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .columnloop:
 
-	movq	mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
-	movq	mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
 
-	; mm0=(02 12 22 32), mm2=(42 52 62 72)
-	; mm1=(03 13 23 33), mm3=(43 53 63 73)
+        ; mm0=(02 12 22 32), mm2=(42 52 62 72)
+        ; mm1=(03 13 23 33), mm3=(43 53 63 73)
 
-	movq      mm4,mm0		; transpose coefficients(phase 1)
-	punpcklwd mm0,mm1		; mm0=(02 03 12 13)
-	punpckhwd mm4,mm1		; mm4=(22 23 32 33)
-	movq      mm5,mm2		; transpose coefficients(phase 1)
-	punpcklwd mm2,mm3		; mm2=(42 43 52 53)
-	punpckhwd mm5,mm3		; mm5=(62 63 72 73)
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(02 03 12 13)
+        punpckhwd mm4,mm1               ; mm4=(22 23 32 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(42 43 52 53)
+        punpckhwd mm5,mm3               ; mm5=(62 63 72 73)
 
-	movq	mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
-	movq	mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
 
-	; mm6=(00 10 20 30), mm1=(40 50 60 70)
-	; mm7=(01 11 21 31), mm3=(41 51 61 71)
+        ; mm6=(00 10 20 30), mm1=(40 50 60 70)
+        ; mm7=(01 11 21 31), mm3=(41 51 61 71)
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=(22 23 32 33)
-	movq	MMWORD [wk(1)], mm2	; wk(1)=(42 43 52 53)
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 23 32 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(42 43 52 53)
 
-	movq      mm4,mm6		; transpose coefficients(phase 1)
-	punpcklwd mm6,mm7		; mm6=(00 01 10 11)
-	punpckhwd mm4,mm7		; mm4=(20 21 30 31)
-	movq      mm2,mm1		; transpose coefficients(phase 1)
-	punpcklwd mm1,mm3		; mm1=(40 41 50 51)
-	punpckhwd mm2,mm3		; mm2=(60 61 70 71)
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 01 10 11)
+        punpckhwd mm4,mm7               ; mm4=(20 21 30 31)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(40 41 50 51)
+        punpckhwd mm2,mm3               ; mm2=(60 61 70 71)
 
-	movq      mm7,mm6		; transpose coefficients(phase 2)
-	punpckldq mm6,mm0		; mm6=(00 01 02 03)=data0
-	punpckhdq mm7,mm0		; mm7=(10 11 12 13)=data1
-	movq      mm3,mm2		; transpose coefficients(phase 2)
-	punpckldq mm2,mm5		; mm2=(60 61 62 63)=data6
-	punpckhdq mm3,mm5		; mm3=(70 71 72 73)=data7
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 01 02 03)=data0
+        punpckhdq mm7,mm0               ; mm7=(10 11 12 13)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(60 61 62 63)=data6
+        punpckhdq mm3,mm5               ; mm3=(70 71 72 73)=data7
 
-	movq	mm0,mm7
-	movq	mm5,mm6
-	psubw	mm7,mm2			; mm7=data1-data6=tmp6
-	psubw	mm6,mm3			; mm6=data0-data7=tmp7
-	paddw	mm0,mm2			; mm0=data1+data6=tmp1
-	paddw	mm5,mm3			; mm5=data0+data7=tmp0
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
 
-	movq	mm2, MMWORD [wk(0)]	; mm2=(22 23 32 33)
-	movq	mm3, MMWORD [wk(1)]	; mm3=(42 43 52 53)
-	movq	MMWORD [wk(0)], mm7	; wk(0)=tmp6
-	movq	MMWORD [wk(1)], mm6	; wk(1)=tmp7
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 23 32 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(42 43 52 53)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
 
-	movq      mm7,mm4		; transpose coefficients(phase 2)
-	punpckldq mm4,mm2		; mm4=(20 21 22 23)=data2
-	punpckhdq mm7,mm2		; mm7=(30 31 32 33)=data3
-	movq      mm6,mm1		; transpose coefficients(phase 2)
-	punpckldq mm1,mm3		; mm1=(40 41 42 43)=data4
-	punpckhdq mm6,mm3		; mm6=(50 51 52 53)=data5
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(20 21 22 23)=data2
+        punpckhdq mm7,mm2               ; mm7=(30 31 32 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(40 41 42 43)=data4
+        punpckhdq mm6,mm3               ; mm6=(50 51 52 53)=data5
 
-	movq	mm2,mm7
-	movq	mm3,mm4
-	paddw	mm7,mm1			; mm7=data3+data4=tmp3
-	paddw	mm4,mm6			; mm4=data2+data5=tmp2
-	psubw	mm2,mm1			; mm2=data3-data4=tmp4
-	psubw	mm3,mm6			; mm3=data2-data5=tmp5
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm1,mm5
-	movq	mm6,mm0
-	psubw	mm5,mm7			; mm5=tmp13
-	psubw	mm0,mm4			; mm0=tmp12
-	paddw	mm1,mm7			; mm1=tmp10
-	paddw	mm6,mm4			; mm6=tmp11
+        movq    mm1,mm5
+        movq    mm6,mm0
+        psubw   mm5,mm7                 ; mm5=tmp13
+        psubw   mm0,mm4                 ; mm0=tmp12
+        paddw   mm1,mm7                 ; mm1=tmp10
+        paddw   mm6,mm4                 ; mm6=tmp11
 
-	paddw	mm0,mm5
-	psllw	mm0,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
+        paddw   mm0,mm5
+        psllw   mm0,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
 
-	movq	mm7,mm1
-	movq	mm4,mm5
-	psubw	mm1,mm6			; mm1=data4
-	psubw	mm5,mm0			; mm5=data6
-	paddw	mm7,mm6			; mm7=data0
-	paddw	mm4,mm0			; mm4=data2
+        movq    mm7,mm1
+        movq    mm4,mm5
+        psubw   mm1,mm6                 ; mm1=data4
+        psubw   mm5,mm0                 ; mm5=data6
+        paddw   mm7,mm6                 ; mm7=data0
+        paddw   mm4,mm0                 ; mm4=data2
 
-	movq	MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1
-	movq	MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5
-	movq	MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
-	movq	MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm6, MMWORD [wk(0)]	; mm6=tmp6
-	movq	mm0, MMWORD [wk(1)]	; mm0=tmp7
+        movq    mm6, MMWORD [wk(0)]     ; mm6=tmp6
+        movq    mm0, MMWORD [wk(1)]     ; mm0=tmp7
 
-	paddw	mm2,mm3			; mm2=tmp10
-	paddw	mm3,mm6			; mm3=tmp11
-	paddw	mm6,mm0			; mm6=tmp12, mm0=tmp7
+        paddw   mm2,mm3                 ; mm2=tmp10
+        paddw   mm3,mm6                 ; mm3=tmp11
+        paddw   mm6,mm0                 ; mm6=tmp12, mm0=tmp7
 
-	psllw	mm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	mm6,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm6,PRE_MULTIPLY_SCALE_BITS
 
-	psllw	mm3,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
+        psllw   mm3,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
 
-	movq	mm1,mm2			; mm1=tmp10
-	psubw	mm2,mm6
-	pmulhw	mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
-	pmulhw	mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
-	pmulhw	mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
-	paddw	mm1,mm2			; mm1=z2
-	paddw	mm6,mm2			; mm6=z4
+        movq    mm1,mm2                 ; mm1=tmp10
+        psubw   mm2,mm6
+        pmulhw  mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
+        pmulhw  mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
+        pmulhw  mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
+        paddw   mm1,mm2                 ; mm1=z2
+        paddw   mm6,mm2                 ; mm6=z4
 
-	movq	mm5,mm0
-	psubw	mm0,mm3			; mm0=z13
-	paddw	mm5,mm3			; mm5=z11
+        movq    mm5,mm0
+        psubw   mm0,mm3                 ; mm0=z13
+        paddw   mm5,mm3                 ; mm5=z11
 
-	movq	mm7,mm0
-	movq	mm4,mm5
-	psubw	mm0,mm1			; mm0=data3
-	psubw	mm5,mm6			; mm5=data7
-	paddw	mm7,mm1			; mm7=data5
-	paddw	mm4,mm6			; mm4=data1
+        movq    mm7,mm0
+        movq    mm4,mm5
+        psubw   mm0,mm1                 ; mm0=data3
+        psubw   mm5,mm6                 ; mm5=data7
+        paddw   mm7,mm1                 ; mm7=data5
+        paddw   mm4,mm6                 ; mm4=data1
 
-	movq	MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
-	movq	MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5
-	movq	MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7
-	movq	MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
 
-	add	edx, byte 4*SIZEOF_DCTELEM
-	dec	ecx
-	jnz	near .columnloop
+        add     edx, byte 4*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .columnloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-;	pop	edi		; unused
-;	pop	esi		; unused
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfmmxint.asm b/simd/jfmmxint.asm
index a7e73f7..c16f262 100644
--- a/simd/jfmmxint.asm
+++ b/simd/jfmmxint.asm
@@ -26,67 +26,67 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1	(CONST_BITS-PASS1_BITS)
-%define DESCALE_P2	(CONST_BITS+PASS1_BITS)
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS)
 
 %if CONST_BITS == 13
-F_0_298	equ	 2446		; FIX(0.298631336)
-F_0_390	equ	 3196		; FIX(0.390180644)
-F_0_541	equ	 4433		; FIX(0.541196100)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_175	equ	 9633		; FIX(1.175875602)
-F_1_501	equ	12299		; FIX(1.501321110)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_1_961	equ	16069		; FIX(1.961570560)
-F_2_053	equ	16819		; FIX(2.053119869)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_072	equ	25172		; FIX(3.072711026)
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_298	equ	DESCALE( 320652955,30-CONST_BITS)	; FIX(0.298631336)
-F_0_390	equ	DESCALE( 418953276,30-CONST_BITS)	; FIX(0.390180644)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_175	equ	DESCALE(1262586813,30-CONST_BITS)	; FIX(1.175875602)
-F_1_501	equ	DESCALE(1612031267,30-CONST_BITS)	; FIX(1.501321110)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_1_961	equ	DESCALE(2106220350,30-CONST_BITS)	; FIX(1.961570560)
-F_2_053	equ	DESCALE(2204520673,30-CONST_BITS)	; FIX(2.053119869)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_072	equ	DESCALE(3299298341,30-CONST_BITS)	; FIX(3.072711026)
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fdct_islow_mmx)
+        alignz  16
+        global  EXTN(jconst_fdct_islow_mmx)
 
 EXTN(jconst_fdct_islow_mmx):
 
-PW_F130_F054	times 2 dw  (F_0_541+F_0_765), F_0_541
-PW_F054_MF130	times 2 dw  F_0_541, (F_0_541-F_1_847)
-PW_MF078_F117	times 2 dw  (F_1_175-F_1_961), F_1_175
-PW_F117_F078	times 2 dw  F_1_175, (F_1_175-F_0_390)
-PW_MF060_MF089	times 2 dw  (F_0_298-F_0_899),-F_0_899
-PW_MF089_F060	times 2 dw -F_0_899, (F_1_501-F_0_899)
-PW_MF050_MF256	times 2 dw  (F_2_053-F_2_562),-F_2_562
-PW_MF256_F050	times 2 dw -F_2_562, (F_3_072-F_2_562)
-PD_DESCALE_P1	times 2 dd  1 << (DESCALE_P1-1)
-PD_DESCALE_P2	times 2 dd  1 << (DESCALE_P2-1)
-PW_DESCALE_P2X	times 4 dw  1 << (PASS1_BITS-1)
+PW_F130_F054    times 2 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 2 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 2 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 2 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 2 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 2 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 2 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 2 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 2 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 2 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X  times 4 dw  1 << (PASS1_BITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -94,529 +94,529 @@
 ; jsimd_fdct_islow_mmx (DCTELEM * data)
 ;
 
-%define data(b)		(b)+8		; DCTELEM * data
+%define data(b)         (b)+8           ; DCTELEM * data
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_fdct_islow_mmx)
+        align   16
+        global  EXTN(jsimd_fdct_islow_mmx)
 
 EXTN(jsimd_fdct_islow_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-;	push	esi		; unused
-;	push	edi		; unused
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .rowloop:
 
-	movq	mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
-	movq	mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
 
-	; mm0=(20 21 22 23), mm2=(24 25 26 27)
-	; mm1=(30 31 32 33), mm3=(34 35 36 37)
+        ; mm0=(20 21 22 23), mm2=(24 25 26 27)
+        ; mm1=(30 31 32 33), mm3=(34 35 36 37)
 
-	movq      mm4,mm0		; transpose coefficients(phase 1)
-	punpcklwd mm0,mm1		; mm0=(20 30 21 31)
-	punpckhwd mm4,mm1		; mm4=(22 32 23 33)
-	movq      mm5,mm2		; transpose coefficients(phase 1)
-	punpcklwd mm2,mm3		; mm2=(24 34 25 35)
-	punpckhwd mm5,mm3		; mm5=(26 36 27 37)
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(20 30 21 31)
+        punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(24 34 25 35)
+        punpckhwd mm5,mm3               ; mm5=(26 36 27 37)
 
-	movq	mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
-	movq	mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
 
-	; mm6=(00 01 02 03), mm1=(04 05 06 07)
-	; mm7=(10 11 12 13), mm3=(14 15 16 17)
+        ; mm6=(00 01 02 03), mm1=(04 05 06 07)
+        ; mm7=(10 11 12 13), mm3=(14 15 16 17)
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=(22 32 23 33)
-	movq	MMWORD [wk(1)], mm2	; wk(1)=(24 34 25 35)
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 32 23 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(24 34 25 35)
 
-	movq      mm4,mm6		; transpose coefficients(phase 1)
-	punpcklwd mm6,mm7		; mm6=(00 10 01 11)
-	punpckhwd mm4,mm7		; mm4=(02 12 03 13)
-	movq      mm2,mm1		; transpose coefficients(phase 1)
-	punpcklwd mm1,mm3		; mm1=(04 14 05 15)
-	punpckhwd mm2,mm3		; mm2=(06 16 07 17)
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+        punpckhwd mm4,mm7               ; mm4=(02 12 03 13)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(04 14 05 15)
+        punpckhwd mm2,mm3               ; mm2=(06 16 07 17)
 
-	movq      mm7,mm6		; transpose coefficients(phase 2)
-	punpckldq mm6,mm0		; mm6=(00 10 20 30)=data0
-	punpckhdq mm7,mm0		; mm7=(01 11 21 31)=data1
-	movq      mm3,mm2		; transpose coefficients(phase 2)
-	punpckldq mm2,mm5		; mm2=(06 16 26 36)=data6
-	punpckhdq mm3,mm5		; mm3=(07 17 27 37)=data7
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 10 20 30)=data0
+        punpckhdq mm7,mm0               ; mm7=(01 11 21 31)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(06 16 26 36)=data6
+        punpckhdq mm3,mm5               ; mm3=(07 17 27 37)=data7
 
-	movq	mm0,mm7
-	movq	mm5,mm6
-	psubw	mm7,mm2			; mm7=data1-data6=tmp6
-	psubw	mm6,mm3			; mm6=data0-data7=tmp7
-	paddw	mm0,mm2			; mm0=data1+data6=tmp1
-	paddw	mm5,mm3			; mm5=data0+data7=tmp0
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
 
-	movq	mm2, MMWORD [wk(0)]	; mm2=(22 32 23 33)
-	movq	mm3, MMWORD [wk(1)]	; mm3=(24 34 25 35)
-	movq	MMWORD [wk(0)], mm7	; wk(0)=tmp6
-	movq	MMWORD [wk(1)], mm6	; wk(1)=tmp7
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 32 23 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(24 34 25 35)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
 
-	movq      mm7,mm4		; transpose coefficients(phase 2)
-	punpckldq mm4,mm2		; mm4=(02 12 22 32)=data2
-	punpckhdq mm7,mm2		; mm7=(03 13 23 33)=data3
-	movq      mm6,mm1		; transpose coefficients(phase 2)
-	punpckldq mm1,mm3		; mm1=(04 14 24 34)=data4
-	punpckhdq mm6,mm3		; mm6=(05 15 25 35)=data5
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(02 12 22 32)=data2
+        punpckhdq mm7,mm2               ; mm7=(03 13 23 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(04 14 24 34)=data4
+        punpckhdq mm6,mm3               ; mm6=(05 15 25 35)=data5
 
-	movq	mm2,mm7
-	movq	mm3,mm4
-	paddw	mm7,mm1			; mm7=data3+data4=tmp3
-	paddw	mm4,mm6			; mm4=data2+data5=tmp2
-	psubw	mm2,mm1			; mm2=data3-data4=tmp4
-	psubw	mm3,mm6			; mm3=data2-data5=tmp5
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm1,mm5
-	movq	mm6,mm0
-	paddw	mm5,mm7			; mm5=tmp10
-	paddw	mm0,mm4			; mm0=tmp11
-	psubw	mm1,mm7			; mm1=tmp13
-	psubw	mm6,mm4			; mm6=tmp12
+        movq    mm1,mm5
+        movq    mm6,mm0
+        paddw   mm5,mm7                 ; mm5=tmp10
+        paddw   mm0,mm4                 ; mm0=tmp11
+        psubw   mm1,mm7                 ; mm1=tmp13
+        psubw   mm6,mm4                 ; mm6=tmp12
 
-	movq	mm7,mm5
-	paddw	mm5,mm0			; mm5=tmp10+tmp11
-	psubw	mm7,mm0			; mm7=tmp10-tmp11
+        movq    mm7,mm5
+        paddw   mm5,mm0                 ; mm5=tmp10+tmp11
+        psubw   mm7,mm0                 ; mm7=tmp10-tmp11
 
-	psllw	mm5,PASS1_BITS		; mm5=data0
-	psllw	mm7,PASS1_BITS		; mm7=data4
+        psllw   mm5,PASS1_BITS          ; mm5=data0
+        psllw   mm7,PASS1_BITS          ; mm7=data4
 
-	movq	MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
-	movq	MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7
 
-	; (Original)
-	; z1 = (tmp12 + tmp13) * 0.541196100;
-	; data2 = z1 + tmp13 * 0.765366865;
-	; data6 = z1 + tmp12 * -1.847759065;
-	;
-	; (This implementation)
-	; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
-	; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
 
-	movq      mm4,mm1		; mm1=tmp13
-	movq      mm0,mm1
-	punpcklwd mm4,mm6		; mm6=tmp12
-	punpckhwd mm0,mm6
-	movq      mm1,mm4
-	movq      mm6,mm0
-	pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]	; mm4=data2L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]	; mm0=data2H
-	pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]	; mm1=data6L
-	pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]	; mm6=data6H
+        movq      mm4,mm1               ; mm1=tmp13
+        movq      mm0,mm1
+        punpcklwd mm4,mm6               ; mm6=tmp12
+        punpckhwd mm0,mm6
+        movq      mm1,mm4
+        movq      mm6,mm0
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=data2L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]        ; mm0=data2H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=data6L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]       ; mm6=data6H
 
-	paddd	mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	mm0,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	mm4,DESCALE_P1
-	psrad	mm0,DESCALE_P1
-	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	mm6,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	mm1,DESCALE_P1
-	psrad	mm6,DESCALE_P1
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm0,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm4,DESCALE_P1
+        psrad   mm0,DESCALE_P1
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm6,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm1,DESCALE_P1
+        psrad   mm6,DESCALE_P1
 
-	packssdw  mm4,mm0		; mm4=data2
-	packssdw  mm1,mm6		; mm1=data6
+        packssdw  mm4,mm0               ; mm4=data2
+        packssdw  mm1,mm6               ; mm1=data6
 
-	movq	MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
-	movq	MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm5, MMWORD [wk(0)]	; mm5=tmp6
-	movq	mm7, MMWORD [wk(1)]	; mm7=tmp7
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp6
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp7
 
-	movq	mm0,mm2			; mm2=tmp4
-	movq	mm6,mm3			; mm3=tmp5
-	paddw	mm0,mm5			; mm0=z3
-	paddw	mm6,mm7			; mm6=z4
+        movq    mm0,mm2                 ; mm2=tmp4
+        movq    mm6,mm3                 ; mm3=tmp5
+        paddw   mm0,mm5                 ; mm0=z3
+        paddw   mm6,mm7                 ; mm6=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movq      mm4,mm0
-	movq      mm1,mm0
-	punpcklwd mm4,mm6
-	punpckhwd mm1,mm6
-	movq      mm0,mm4
-	movq      mm6,mm1
-	pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]	; mm4=z3L
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]	; mm1=z3H
-	pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]	; mm0=z4L
-	pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]	; mm6=z4H
+        movq      mm4,mm0
+        movq      mm1,mm0
+        punpcklwd mm4,mm6
+        punpckhwd mm1,mm6
+        movq      mm0,mm4
+        movq      mm6,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]       ; mm4=z3L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]       ; mm1=z3H
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]        ; mm0=z4L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]        ; mm6=z4H
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=z3L
-	movq	MMWORD [wk(1)], mm1	; wk(1)=z3H
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=z3L
+        movq    MMWORD [wk(1)], mm1     ; wk(1)=z3H
 
-	; (Original)
-	; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
-	; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
-	; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
-	; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
-	;
-	; (This implementation)
-	; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
-	; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
-	; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
-	; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
-	; data7 = tmp4 + z3;  data5 = tmp5 + z4;
-	; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
 
-	movq      mm4,mm2
-	movq      mm1,mm2
-	punpcklwd mm4,mm7
-	punpckhwd mm1,mm7
-	movq      mm2,mm4
-	movq      mm7,mm1
-	pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]	; mm4=tmp4L
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]	; mm1=tmp4H
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]	; mm2=tmp7L
-	pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]	; mm7=tmp7H
+        movq      mm4,mm2
+        movq      mm1,mm2
+        punpcklwd mm4,mm7
+        punpckhwd mm1,mm7
+        movq      mm2,mm4
+        movq      mm7,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm4=tmp4L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm1=tmp4H
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]       ; mm2=tmp7L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]       ; mm7=tmp7H
 
-	paddd	mm4, MMWORD [wk(0)]	; mm4=data7L
-	paddd	mm1, MMWORD [wk(1)]	; mm1=data7H
-	paddd	mm2,mm0			; mm2=data1L
-	paddd	mm7,mm6			; mm7=data1H
+        paddd   mm4, MMWORD [wk(0)]     ; mm4=data7L
+        paddd   mm1, MMWORD [wk(1)]     ; mm1=data7H
+        paddd   mm2,mm0                 ; mm2=data1L
+        paddd   mm7,mm6                 ; mm7=data1H
 
-	paddd	mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	mm4,DESCALE_P1
-	psrad	mm1,DESCALE_P1
-	paddd	mm2,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	mm2,DESCALE_P1
-	psrad	mm7,DESCALE_P1
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm4,DESCALE_P1
+        psrad   mm1,DESCALE_P1
+        paddd   mm2,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm2,DESCALE_P1
+        psrad   mm7,DESCALE_P1
 
-	packssdw  mm4,mm1		; mm4=data7
-	packssdw  mm2,mm7		; mm2=data1
+        packssdw  mm4,mm1               ; mm4=data7
+        packssdw  mm2,mm7               ; mm2=data1
 
-	movq	MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4
-	movq	MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
+        movq    MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
 
-	movq      mm1,mm3
-	movq      mm7,mm3
-	punpcklwd mm1,mm5
-	punpckhwd mm7,mm5
-	movq      mm3,mm1
-	movq      mm5,mm7
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]	; mm1=tmp5L
-	pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]	; mm7=tmp5H
-	pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]	; mm3=tmp6L
-	pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]	; mm5=tmp6H
+        movq      mm1,mm3
+        movq      mm7,mm3
+        punpcklwd mm1,mm5
+        punpckhwd mm7,mm5
+        movq      mm3,mm1
+        movq      mm5,mm7
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm1=tmp5L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm7=tmp5H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]       ; mm3=tmp6L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]       ; mm5=tmp6H
 
-	paddd	mm1,mm0			; mm1=data5L
-	paddd	mm7,mm6			; mm7=data5H
-	paddd	mm3, MMWORD [wk(0)]	; mm3=data3L
-	paddd	mm5, MMWORD [wk(1)]	; mm5=data3H
+        paddd   mm1,mm0                 ; mm1=data5L
+        paddd   mm7,mm6                 ; mm7=data5H
+        paddd   mm3, MMWORD [wk(0)]     ; mm3=data3L
+        paddd   mm5, MMWORD [wk(1)]     ; mm5=data3H
 
-	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	mm1,DESCALE_P1
-	psrad	mm7,DESCALE_P1
-	paddd	mm3,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	mm5,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	mm3,DESCALE_P1
-	psrad	mm5,DESCALE_P1
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm1,DESCALE_P1
+        psrad   mm7,DESCALE_P1
+        paddd   mm3,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm5,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm3,DESCALE_P1
+        psrad   mm5,DESCALE_P1
 
-	packssdw  mm1,mm7		; mm1=data5
-	packssdw  mm3,mm5		; mm3=data3
+        packssdw  mm1,mm7               ; mm1=data5
+        packssdw  mm3,mm5               ; mm3=data3
 
-	movq	MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1
-	movq	MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
+        movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
 
-	add	edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
-	dec	ecx
-	jnz	near .rowloop
+        add     edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .rowloop
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .columnloop:
 
-	movq	mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
-	movq	mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
 
-	; mm0=(02 12 22 32), mm2=(42 52 62 72)
-	; mm1=(03 13 23 33), mm3=(43 53 63 73)
+        ; mm0=(02 12 22 32), mm2=(42 52 62 72)
+        ; mm1=(03 13 23 33), mm3=(43 53 63 73)
 
-	movq      mm4,mm0		; transpose coefficients(phase 1)
-	punpcklwd mm0,mm1		; mm0=(02 03 12 13)
-	punpckhwd mm4,mm1		; mm4=(22 23 32 33)
-	movq      mm5,mm2		; transpose coefficients(phase 1)
-	punpcklwd mm2,mm3		; mm2=(42 43 52 53)
-	punpckhwd mm5,mm3		; mm5=(62 63 72 73)
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(02 03 12 13)
+        punpckhwd mm4,mm1               ; mm4=(22 23 32 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(42 43 52 53)
+        punpckhwd mm5,mm3               ; mm5=(62 63 72 73)
 
-	movq	mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
-	movq	mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
-	movq	mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
-	movq	mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
 
-	; mm6=(00 10 20 30), mm1=(40 50 60 70)
-	; mm7=(01 11 21 31), mm3=(41 51 61 71)
+        ; mm6=(00 10 20 30), mm1=(40 50 60 70)
+        ; mm7=(01 11 21 31), mm3=(41 51 61 71)
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=(22 23 32 33)
-	movq	MMWORD [wk(1)], mm2	; wk(1)=(42 43 52 53)
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 23 32 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(42 43 52 53)
 
-	movq      mm4,mm6		; transpose coefficients(phase 1)
-	punpcklwd mm6,mm7		; mm6=(00 01 10 11)
-	punpckhwd mm4,mm7		; mm4=(20 21 30 31)
-	movq      mm2,mm1		; transpose coefficients(phase 1)
-	punpcklwd mm1,mm3		; mm1=(40 41 50 51)
-	punpckhwd mm2,mm3		; mm2=(60 61 70 71)
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 01 10 11)
+        punpckhwd mm4,mm7               ; mm4=(20 21 30 31)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(40 41 50 51)
+        punpckhwd mm2,mm3               ; mm2=(60 61 70 71)
 
-	movq      mm7,mm6		; transpose coefficients(phase 2)
-	punpckldq mm6,mm0		; mm6=(00 01 02 03)=data0
-	punpckhdq mm7,mm0		; mm7=(10 11 12 13)=data1
-	movq      mm3,mm2		; transpose coefficients(phase 2)
-	punpckldq mm2,mm5		; mm2=(60 61 62 63)=data6
-	punpckhdq mm3,mm5		; mm3=(70 71 72 73)=data7
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 01 02 03)=data0
+        punpckhdq mm7,mm0               ; mm7=(10 11 12 13)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(60 61 62 63)=data6
+        punpckhdq mm3,mm5               ; mm3=(70 71 72 73)=data7
 
-	movq	mm0,mm7
-	movq	mm5,mm6
-	psubw	mm7,mm2			; mm7=data1-data6=tmp6
-	psubw	mm6,mm3			; mm6=data0-data7=tmp7
-	paddw	mm0,mm2			; mm0=data1+data6=tmp1
-	paddw	mm5,mm3			; mm5=data0+data7=tmp0
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
 
-	movq	mm2, MMWORD [wk(0)]	; mm2=(22 23 32 33)
-	movq	mm3, MMWORD [wk(1)]	; mm3=(42 43 52 53)
-	movq	MMWORD [wk(0)], mm7	; wk(0)=tmp6
-	movq	MMWORD [wk(1)], mm6	; wk(1)=tmp7
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 23 32 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(42 43 52 53)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
 
-	movq      mm7,mm4		; transpose coefficients(phase 2)
-	punpckldq mm4,mm2		; mm4=(20 21 22 23)=data2
-	punpckhdq mm7,mm2		; mm7=(30 31 32 33)=data3
-	movq      mm6,mm1		; transpose coefficients(phase 2)
-	punpckldq mm1,mm3		; mm1=(40 41 42 43)=data4
-	punpckhdq mm6,mm3		; mm6=(50 51 52 53)=data5
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(20 21 22 23)=data2
+        punpckhdq mm7,mm2               ; mm7=(30 31 32 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(40 41 42 43)=data4
+        punpckhdq mm6,mm3               ; mm6=(50 51 52 53)=data5
 
-	movq	mm2,mm7
-	movq	mm3,mm4
-	paddw	mm7,mm1			; mm7=data3+data4=tmp3
-	paddw	mm4,mm6			; mm4=data2+data5=tmp2
-	psubw	mm2,mm1			; mm2=data3-data4=tmp4
-	psubw	mm3,mm6			; mm3=data2-data5=tmp5
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm1,mm5
-	movq	mm6,mm0
-	paddw	mm5,mm7			; mm5=tmp10
-	paddw	mm0,mm4			; mm0=tmp11
-	psubw	mm1,mm7			; mm1=tmp13
-	psubw	mm6,mm4			; mm6=tmp12
+        movq    mm1,mm5
+        movq    mm6,mm0
+        paddw   mm5,mm7                 ; mm5=tmp10
+        paddw   mm0,mm4                 ; mm0=tmp11
+        psubw   mm1,mm7                 ; mm1=tmp13
+        psubw   mm6,mm4                 ; mm6=tmp12
 
-	movq	mm7,mm5
-	paddw	mm5,mm0			; mm5=tmp10+tmp11
-	psubw	mm7,mm0			; mm7=tmp10-tmp11
+        movq    mm7,mm5
+        paddw   mm5,mm0                 ; mm5=tmp10+tmp11
+        psubw   mm7,mm0                 ; mm7=tmp10-tmp11
 
-	paddw	mm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
-	paddw	mm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
-	psraw	mm5,PASS1_BITS		; mm5=data0
-	psraw	mm7,PASS1_BITS		; mm7=data4
+        paddw   mm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        paddw   mm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        psraw   mm5,PASS1_BITS          ; mm5=data0
+        psraw   mm7,PASS1_BITS          ; mm7=data4
 
-	movq	MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
-	movq	MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7
 
-	; (Original)
-	; z1 = (tmp12 + tmp13) * 0.541196100;
-	; data2 = z1 + tmp13 * 0.765366865;
-	; data6 = z1 + tmp12 * -1.847759065;
-	;
-	; (This implementation)
-	; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
-	; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
 
-	movq      mm4,mm1		; mm1=tmp13
-	movq      mm0,mm1
-	punpcklwd mm4,mm6		; mm6=tmp12
-	punpckhwd mm0,mm6
-	movq      mm1,mm4
-	movq      mm6,mm0
-	pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]	; mm4=data2L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]	; mm0=data2H
-	pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]	; mm1=data6L
-	pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]	; mm6=data6H
+        movq      mm4,mm1               ; mm1=tmp13
+        movq      mm0,mm1
+        punpcklwd mm4,mm6               ; mm6=tmp12
+        punpckhwd mm0,mm6
+        movq      mm1,mm4
+        movq      mm6,mm0
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=data2L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]        ; mm0=data2H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=data6L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]       ; mm6=data6H
 
-	paddd	mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	mm0,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	mm4,DESCALE_P2
-	psrad	mm0,DESCALE_P2
-	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	mm6,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	mm1,DESCALE_P2
-	psrad	mm6,DESCALE_P2
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm0,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm4,DESCALE_P2
+        psrad   mm0,DESCALE_P2
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm6,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm1,DESCALE_P2
+        psrad   mm6,DESCALE_P2
 
-	packssdw  mm4,mm0		; mm4=data2
-	packssdw  mm1,mm6		; mm1=data6
+        packssdw  mm4,mm0               ; mm4=data2
+        packssdw  mm1,mm6               ; mm1=data6
 
-	movq	MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
-	movq	MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm5, MMWORD [wk(0)]	; mm5=tmp6
-	movq	mm7, MMWORD [wk(1)]	; mm7=tmp7
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp6
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp7
 
-	movq	mm0,mm2			; mm2=tmp4
-	movq	mm6,mm3			; mm3=tmp5
-	paddw	mm0,mm5			; mm0=z3
-	paddw	mm6,mm7			; mm6=z4
+        movq    mm0,mm2                 ; mm2=tmp4
+        movq    mm6,mm3                 ; mm3=tmp5
+        paddw   mm0,mm5                 ; mm0=z3
+        paddw   mm6,mm7                 ; mm6=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movq      mm4,mm0
-	movq      mm1,mm0
-	punpcklwd mm4,mm6
-	punpckhwd mm1,mm6
-	movq      mm0,mm4
-	movq      mm6,mm1
-	pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]	; mm4=z3L
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]	; mm1=z3H
-	pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]	; mm0=z4L
-	pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]	; mm6=z4H
+        movq      mm4,mm0
+        movq      mm1,mm0
+        punpcklwd mm4,mm6
+        punpckhwd mm1,mm6
+        movq      mm0,mm4
+        movq      mm6,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]       ; mm4=z3L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]       ; mm1=z3H
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]        ; mm0=z4L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]        ; mm6=z4H
 
-	movq	MMWORD [wk(0)], mm4	; wk(0)=z3L
-	movq	MMWORD [wk(1)], mm1	; wk(1)=z3H
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=z3L
+        movq    MMWORD [wk(1)], mm1     ; wk(1)=z3H
 
-	; (Original)
-	; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
-	; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
-	; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
-	; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
-	;
-	; (This implementation)
-	; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
-	; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
-	; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
-	; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
-	; data7 = tmp4 + z3;  data5 = tmp5 + z4;
-	; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
 
-	movq      mm4,mm2
-	movq      mm1,mm2
-	punpcklwd mm4,mm7
-	punpckhwd mm1,mm7
-	movq      mm2,mm4
-	movq      mm7,mm1
-	pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]	; mm4=tmp4L
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]	; mm1=tmp4H
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]	; mm2=tmp7L
-	pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]	; mm7=tmp7H
+        movq      mm4,mm2
+        movq      mm1,mm2
+        punpcklwd mm4,mm7
+        punpckhwd mm1,mm7
+        movq      mm2,mm4
+        movq      mm7,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm4=tmp4L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm1=tmp4H
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]       ; mm2=tmp7L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]       ; mm7=tmp7H
 
-	paddd	mm4, MMWORD [wk(0)]	; mm4=data7L
-	paddd	mm1, MMWORD [wk(1)]	; mm1=data7H
-	paddd	mm2,mm0			; mm2=data1L
-	paddd	mm7,mm6			; mm7=data1H
+        paddd   mm4, MMWORD [wk(0)]     ; mm4=data7L
+        paddd   mm1, MMWORD [wk(1)]     ; mm1=data7H
+        paddd   mm2,mm0                 ; mm2=data1L
+        paddd   mm7,mm6                 ; mm7=data1H
 
-	paddd	mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	mm4,DESCALE_P2
-	psrad	mm1,DESCALE_P2
-	paddd	mm2,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	mm2,DESCALE_P2
-	psrad	mm7,DESCALE_P2
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm4,DESCALE_P2
+        psrad   mm1,DESCALE_P2
+        paddd   mm2,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm2,DESCALE_P2
+        psrad   mm7,DESCALE_P2
 
-	packssdw  mm4,mm1		; mm4=data7
-	packssdw  mm2,mm7		; mm2=data1
+        packssdw  mm4,mm1               ; mm4=data7
+        packssdw  mm2,mm7               ; mm2=data1
 
-	movq	MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4
-	movq	MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
+        movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
 
-	movq      mm1,mm3
-	movq      mm7,mm3
-	punpcklwd mm1,mm5
-	punpckhwd mm7,mm5
-	movq      mm3,mm1
-	movq      mm5,mm7
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]	; mm1=tmp5L
-	pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]	; mm7=tmp5H
-	pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]	; mm3=tmp6L
-	pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]	; mm5=tmp6H
+        movq      mm1,mm3
+        movq      mm7,mm3
+        punpcklwd mm1,mm5
+        punpckhwd mm7,mm5
+        movq      mm3,mm1
+        movq      mm5,mm7
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm1=tmp5L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm7=tmp5H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]       ; mm3=tmp6L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]       ; mm5=tmp6H
 
-	paddd	mm1,mm0			; mm1=data5L
-	paddd	mm7,mm6			; mm7=data5H
-	paddd	mm3, MMWORD [wk(0)]	; mm3=data3L
-	paddd	mm5, MMWORD [wk(1)]	; mm5=data3H
+        paddd   mm1,mm0                 ; mm1=data5L
+        paddd   mm7,mm6                 ; mm7=data5H
+        paddd   mm3, MMWORD [wk(0)]     ; mm3=data3L
+        paddd   mm5, MMWORD [wk(1)]     ; mm5=data3H
 
-	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	mm1,DESCALE_P2
-	psrad	mm7,DESCALE_P2
-	paddd	mm3,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	mm5,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	mm3,DESCALE_P2
-	psrad	mm5,DESCALE_P2
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm1,DESCALE_P2
+        psrad   mm7,DESCALE_P2
+        paddd   mm3,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm3,DESCALE_P2
+        psrad   mm5,DESCALE_P2
 
-	packssdw  mm1,mm7		; mm1=data5
-	packssdw  mm3,mm5		; mm3=data3
+        packssdw  mm1,mm7               ; mm1=data5
+        packssdw  mm3,mm5               ; mm3=data3
 
-	movq	MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1
-	movq	MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
+        movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
 
-	add	edx, byte 4*SIZEOF_DCTELEM
-	dec	ecx
-	jnz	near .columnloop
+        add     edx, byte 4*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .columnloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-;	pop	edi		; unused
-;	pop	esi		; unused
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfss2fst-64.asm b/simd/jfss2fst-64.asm
index 6953caf..41483bf 100644
--- a/simd/jfss2fst-64.asm
+++ b/simd/jfss2fst-64.asm
@@ -27,24 +27,24 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	8	; 14 is also OK.
+%define CONST_BITS      8       ; 14 is also OK.
 
 %if CONST_BITS == 8
-F_0_382	equ	 98		; FIX(0.382683433)
-F_0_541	equ	139		; FIX(0.541196100)
-F_0_707	equ	181		; FIX(0.707106781)
-F_1_306	equ	334		; FIX(1.306562965)
+F_0_382 equ      98             ; FIX(0.382683433)
+F_0_541 equ     139             ; FIX(0.541196100)
+F_0_707 equ     181             ; FIX(0.707106781)
+F_1_306 equ     334             ; FIX(1.306562965)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_382	equ	DESCALE( 410903207,30-CONST_BITS)	; FIX(0.382683433)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_707	equ	DESCALE( 759250124,30-CONST_BITS)	; FIX(0.707106781)
-F_1_306	equ	DESCALE(1402911301,30-CONST_BITS)	; FIX(1.306562965)
+F_0_382 equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707 equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306 equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
@@ -52,21 +52,21 @@
 %define PRE_MULTIPLY_SCALE_BITS   2
 %define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-	alignz	16
-	global	EXTN(jconst_fdct_ifast_sse2)
+        alignz  16
+        global  EXTN(jconst_fdct_ifast_sse2)
 
 EXTN(jconst_fdct_ifast_sse2):
 
-PW_F0707	times 8 dw  F_0_707 << CONST_SHIFT
-PW_F0382	times 8 dw  F_0_382 << CONST_SHIFT
-PW_F0541	times 8 dw  F_0_541 << CONST_SHIFT
-PW_F1306	times 8 dw  F_1_306 << CONST_SHIFT
+PW_F0707        times 8 dw  F_0_707 << CONST_SHIFT
+PW_F0382        times 8 dw  F_0_382 << CONST_SHIFT
+PW_F0541        times 8 dw  F_0_541 << CONST_SHIFT
+PW_F1306        times 8 dw  F_1_306 << CONST_SHIFT
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -76,317 +76,317 @@
 
 ; r10 = DCTELEM * data
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_fdct_ifast_sse2)
+        align   16
+        global  EXTN(jsimd_fdct_ifast_sse2)
 
 EXTN(jsimd_fdct_ifast_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	rdx, r10	; (DCTELEM *)
+        mov     rdx, r10        ; (DCTELEM *)
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)]
 
-	; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
-	; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+        ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+        ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
 
-	movdqa    xmm4,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm1		; xmm0=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm4,xmm1		; xmm4=(04 14 05 15 06 16 07 17)
-	movdqa    xmm5,xmm2		; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm3		; xmm2=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm5,xmm3		; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
 
-	movdqa	xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)]
 
-	; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
-	; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+        ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+        ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=(20 30 21 31 22 32 23 33)
-	movdqa	XMMWORD [wk(1)], xmm5	; wk(1)=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
 
-	movdqa    xmm2,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm7		; xmm6=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm2,xmm7		; xmm2=(44 54 45 55 46 56 47 57)
-	movdqa    xmm5,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm3		; xmm1=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm5,xmm3		; xmm5=(64 74 65 75 66 76 67 77)
+        movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
 
-	movdqa    xmm7,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm1		; xmm6=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm7,xmm1		; xmm7=(42 52 62 72 43 53 63 73)
-	movdqa    xmm3,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm5		; xmm2=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm3,xmm5		; xmm3=(46 56 66 76 47 57 67 77)
+        movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+        movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=(20 30 21 31 22 32 23 33)
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=(24 34 25 35 26 36 27 37)
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=(42 52 62 72 43 53 63 73)
-	movdqa	XMMWORD [wk(1)], xmm2	; wk(1)=(44 54 64 74 45 55 65 75)
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(42 52 62 72 43 53 63 73)
+        movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=(44 54 64 74 45 55 65 75)
 
-	movdqa    xmm7,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm1		; xmm0=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm7,xmm1		; xmm7=(02 12 22 32 03 13 23 33)
-	movdqa    xmm2,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm5		; xmm4=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm2,xmm5		; xmm2=(06 16 26 36 07 17 27 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+        movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
 
-	movdqa     xmm1,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm6		; xmm0=(00 10 20 30 40 50 60 70)=data0
-	punpckhqdq xmm1,xmm6		; xmm1=(01 11 21 31 41 51 61 71)=data1
-	movdqa     xmm5,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm3		; xmm2=(06 16 26 36 46 56 66 76)=data6
-	punpckhqdq xmm5,xmm3		; xmm5=(07 17 27 37 47 57 67 77)=data7
+        movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
 
-	movdqa	xmm6,xmm1
-	movdqa	xmm3,xmm0
-	psubw	xmm1,xmm2		; xmm1=data1-data6=tmp6
-	psubw	xmm0,xmm5		; xmm0=data0-data7=tmp7
-	paddw	xmm6,xmm2		; xmm6=data1+data6=tmp1
-	paddw	xmm3,xmm5		; xmm3=data0+data7=tmp0
+        movdqa  xmm6,xmm1
+        movdqa  xmm3,xmm0
+        psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+        psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+        paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+        paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
 
-	movdqa	xmm2, XMMWORD [wk(0)]	; xmm2=(42 52 62 72 43 53 63 73)
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=(44 54 64 74 45 55 65 75)
-	movdqa	XMMWORD [wk(0)], xmm1	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=tmp7
+        movdqa  xmm2, XMMWORD [wk(0)]   ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(44 54 64 74 45 55 65 75)
+        movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
 
-	movdqa     xmm1,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm2		; xmm7=(02 12 22 32 42 52 62 72)=data2
-	punpckhqdq xmm1,xmm2		; xmm1=(03 13 23 33 43 53 63 73)=data3
-	movdqa     xmm0,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm5		; xmm4=(04 14 24 34 44 54 64 74)=data4
-	punpckhqdq xmm0,xmm5		; xmm0=(05 15 25 35 45 55 65 75)=data5
+        movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+        movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
 
-	movdqa	xmm2,xmm1
-	movdqa	xmm5,xmm7
-	paddw	xmm1,xmm4		; xmm1=data3+data4=tmp3
-	paddw	xmm7,xmm0		; xmm7=data2+data5=tmp2
-	psubw	xmm2,xmm4		; xmm2=data3-data4=tmp4
-	psubw	xmm5,xmm0		; xmm5=data2-data5=tmp5
+        movdqa  xmm2,xmm1
+        movdqa  xmm5,xmm7
+        paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+        paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+        psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+        psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm0,xmm6
-	psubw	xmm3,xmm1		; xmm3=tmp13
-	psubw	xmm6,xmm7		; xmm6=tmp12
-	paddw	xmm4,xmm1		; xmm4=tmp10
-	paddw	xmm0,xmm7		; xmm0=tmp11
+        movdqa  xmm4,xmm3
+        movdqa  xmm0,xmm6
+        psubw   xmm3,xmm1               ; xmm3=tmp13
+        psubw   xmm6,xmm7               ; xmm6=tmp12
+        paddw   xmm4,xmm1               ; xmm4=tmp10
+        paddw   xmm0,xmm7               ; xmm0=tmp11
 
-	paddw	xmm6,xmm3
-	psllw	xmm6,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm6,[rel PW_F0707] ; xmm6=z1
+        paddw   xmm6,xmm3
+        psllw   xmm6,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm6,[rel PW_F0707] ; xmm6=z1
 
-	movdqa	xmm1,xmm4
-	movdqa	xmm7,xmm3
-	psubw	xmm4,xmm0		; xmm4=data4
-	psubw	xmm3,xmm6		; xmm3=data6
-	paddw	xmm1,xmm0		; xmm1=data0
-	paddw	xmm7,xmm6		; xmm7=data2
+        movdqa  xmm1,xmm4
+        movdqa  xmm7,xmm3
+        psubw   xmm4,xmm0               ; xmm4=data4
+        psubw   xmm3,xmm6               ; xmm3=data6
+        paddw   xmm1,xmm0               ; xmm1=data0
+        paddw   xmm7,xmm6               ; xmm7=data2
 
-	movdqa	xmm0, XMMWORD [wk(0)]	; xmm0=tmp6
-	movdqa	xmm6, XMMWORD [wk(1)]	; xmm6=tmp7
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=data4
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=data6
+        movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=tmp6
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp7
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=data4
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=data6
 
-	; -- Odd part
+        ; -- Odd part
 
-	paddw	xmm2,xmm5		; xmm2=tmp10
-	paddw	xmm5,xmm0		; xmm5=tmp11
-	paddw	xmm0,xmm6		; xmm0=tmp12, xmm6=tmp7
+        paddw   xmm2,xmm5               ; xmm2=tmp10
+        paddw   xmm5,xmm0               ; xmm5=tmp11
+        paddw   xmm0,xmm6               ; xmm0=tmp12, xmm6=tmp7
 
-	psllw	xmm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm0,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
 
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm5,[rel PW_F0707] ; xmm5=z3
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[rel PW_F0707] ; xmm5=z3
 
-	movdqa	xmm4,xmm2		; xmm4=tmp10
-	psubw	xmm2,xmm0
-	pmulhw	xmm2,[rel PW_F0382] ; xmm2=z5
-	pmulhw	xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
-	pmulhw	xmm0,[rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
-	paddw	xmm4,xmm2		; xmm4=z2
-	paddw	xmm0,xmm2		; xmm0=z4
+        movdqa  xmm4,xmm2               ; xmm4=tmp10
+        psubw   xmm2,xmm0
+        pmulhw  xmm2,[rel PW_F0382] ; xmm2=z5
+        pmulhw  xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+        pmulhw  xmm0,[rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm4,xmm2               ; xmm4=z2
+        paddw   xmm0,xmm2               ; xmm0=z4
 
-	movdqa	xmm3,xmm6
-	psubw	xmm6,xmm5		; xmm6=z13
-	paddw	xmm3,xmm5		; xmm3=z11
+        movdqa  xmm3,xmm6
+        psubw   xmm6,xmm5               ; xmm6=z13
+        paddw   xmm3,xmm5               ; xmm3=z11
 
-	movdqa	xmm2,xmm6
-	movdqa	xmm5,xmm3
-	psubw	xmm6,xmm4		; xmm6=data3
-	psubw	xmm3,xmm0		; xmm3=data7
-	paddw	xmm2,xmm4		; xmm2=data5
-	paddw	xmm5,xmm0		; xmm5=data1
+        movdqa  xmm2,xmm6
+        movdqa  xmm5,xmm3
+        psubw   xmm6,xmm4               ; xmm6=data3
+        psubw   xmm3,xmm0               ; xmm3=data7
+        paddw   xmm2,xmm4               ; xmm2=data5
+        paddw   xmm5,xmm0               ; xmm5=data1
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-	; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72)
-	; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73)
+        ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72)
+        ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73)
 
-	movdqa    xmm4,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm5		; xmm1=(00 01 10 11 20 21 30 31)
-	punpckhwd xmm4,xmm5		; xmm4=(40 41 50 51 60 61 70 71)
-	movdqa    xmm0,xmm7		; transpose coefficients(phase 1)
-	punpcklwd xmm7,xmm6		; xmm7=(02 03 12 13 22 23 32 33)
-	punpckhwd xmm0,xmm6		; xmm0=(42 43 52 53 62 63 72 73)
+        movdqa    xmm4,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm5             ; xmm1=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm4,xmm5             ; xmm4=(40 41 50 51 60 61 70 71)
+        movdqa    xmm0,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm6             ; xmm7=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm0,xmm6             ; xmm0=(42 43 52 53 62 63 72 73)
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=col4
-	movdqa	xmm6, XMMWORD [wk(1)]	; xmm6=col6
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=col4
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=col6
 
-	; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76)
-	; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77)
+        ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76)
+        ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77)
 
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=(02 03 12 13 22 23 32 33)
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(02 03 12 13 22 23 32 33)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(42 43 52 53 62 63 72 73)
 
-	movdqa    xmm7,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm2		; xmm5=(04 05 14 15 24 25 34 35)
-	punpckhwd xmm7,xmm2		; xmm7=(44 45 54 55 64 65 74 75)
-	movdqa    xmm0,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm3		; xmm6=(06 07 16 17 26 27 36 37)
-	punpckhwd xmm0,xmm3		; xmm0=(46 47 56 57 66 67 76 77)
+        movdqa    xmm7,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm2             ; xmm5=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm7,xmm2             ; xmm7=(44 45 54 55 64 65 74 75)
+        movdqa    xmm0,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm3             ; xmm6=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm0,xmm3             ; xmm0=(46 47 56 57 66 67 76 77)
 
-	movdqa    xmm2,xmm5		; transpose coefficients(phase 2)
-	punpckldq xmm5,xmm6		; xmm5=(04 05 06 07 14 15 16 17)
-	punpckhdq xmm2,xmm6		; xmm2=(24 25 26 27 34 35 36 37)
-	movdqa    xmm3,xmm7		; transpose coefficients(phase 2)
-	punpckldq xmm7,xmm0		; xmm7=(44 45 46 47 54 55 56 57)
-	punpckhdq xmm3,xmm0		; xmm3=(64 65 66 67 74 75 76 77)
+        movdqa    xmm2,xmm5             ; transpose coefficients(phase 2)
+        punpckldq xmm5,xmm6             ; xmm5=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm2,xmm6             ; xmm2=(24 25 26 27 34 35 36 37)
+        movdqa    xmm3,xmm7             ; transpose coefficients(phase 2)
+        punpckldq xmm7,xmm0             ; xmm7=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm3,xmm0             ; xmm3=(64 65 66 67 74 75 76 77)
 
-	movdqa	xmm6, XMMWORD [wk(0)]	; xmm6=(02 03 12 13 22 23 32 33)
-	movdqa	xmm0, XMMWORD [wk(1)]	; xmm0=(42 43 52 53 62 63 72 73)
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=(24 25 26 27 34 35 36 37)
-	movdqa	XMMWORD [wk(1)], xmm7	; wk(1)=(44 45 46 47 54 55 56 57)
+        movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=(02 03 12 13 22 23 32 33)
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(24 25 26 27 34 35 36 37)
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(44 45 46 47 54 55 56 57)
 
-	movdqa    xmm2,xmm1		; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm6		; xmm1=(00 01 02 03 10 11 12 13)
-	punpckhdq xmm2,xmm6		; xmm2=(20 21 22 23 30 31 32 33)
-	movdqa    xmm7,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm0		; xmm4=(40 41 42 43 50 51 52 53)
-	punpckhdq xmm7,xmm0		; xmm7=(60 61 62 63 70 71 72 73)
+        movdqa    xmm2,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm6             ; xmm1=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm2,xmm6             ; xmm2=(20 21 22 23 30 31 32 33)
+        movdqa    xmm7,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm0             ; xmm4=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm7,xmm0             ; xmm7=(60 61 62 63 70 71 72 73)
 
-	movdqa     xmm6,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm5		; xmm1=(00 01 02 03 04 05 06 07)=data0
-	punpckhqdq xmm6,xmm5		; xmm6=(10 11 12 13 14 15 16 17)=data1
-	movdqa     xmm0,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm3		; xmm7=(60 61 62 63 64 65 66 67)=data6
-	punpckhqdq xmm0,xmm3		; xmm0=(70 71 72 73 74 75 76 77)=data7
+        movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm5            ; xmm1=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm6,xmm5            ; xmm6=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm0,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm3            ; xmm7=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm0,xmm3            ; xmm0=(70 71 72 73 74 75 76 77)=data7
 
-	movdqa	xmm5,xmm6
-	movdqa	xmm3,xmm1
-	psubw	xmm6,xmm7		; xmm6=data1-data6=tmp6
-	psubw	xmm1,xmm0		; xmm1=data0-data7=tmp7
-	paddw	xmm5,xmm7		; xmm5=data1+data6=tmp1
-	paddw	xmm3,xmm0		; xmm3=data0+data7=tmp0
+        movdqa  xmm5,xmm6
+        movdqa  xmm3,xmm1
+        psubw   xmm6,xmm7               ; xmm6=data1-data6=tmp6
+        psubw   xmm1,xmm0               ; xmm1=data0-data7=tmp7
+        paddw   xmm5,xmm7               ; xmm5=data1+data6=tmp1
+        paddw   xmm3,xmm0               ; xmm3=data0+data7=tmp0
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=(24 25 26 27 34 35 36 37)
-	movdqa	xmm0, XMMWORD [wk(1)]	; xmm0=(44 45 46 47 54 55 56 57)
-	movdqa	XMMWORD [wk(0)], xmm6	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm1	; wk(1)=tmp7
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(24 25 26 27 34 35 36 37)
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(44 45 46 47 54 55 56 57)
+        movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=tmp7
 
-	movdqa     xmm6,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm7		; xmm2=(20 21 22 23 24 25 26 27)=data2
-	punpckhqdq xmm6,xmm7		; xmm6=(30 31 32 33 34 35 36 37)=data3
-	movdqa     xmm1,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm0		; xmm4=(40 41 42 43 44 45 46 47)=data4
-	punpckhqdq xmm1,xmm0		; xmm1=(50 51 52 53 54 55 56 57)=data5
+        movdqa     xmm6,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm7            ; xmm2=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm6,xmm7            ; xmm6=(30 31 32 33 34 35 36 37)=data3
+        movdqa     xmm1,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm0            ; xmm4=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm1,xmm0            ; xmm1=(50 51 52 53 54 55 56 57)=data5
 
-	movdqa	xmm7,xmm6
-	movdqa	xmm0,xmm2
-	paddw	xmm6,xmm4		; xmm6=data3+data4=tmp3
-	paddw	xmm2,xmm1		; xmm2=data2+data5=tmp2
-	psubw	xmm7,xmm4		; xmm7=data3-data4=tmp4
-	psubw	xmm0,xmm1		; xmm0=data2-data5=tmp5
+        movdqa  xmm7,xmm6
+        movdqa  xmm0,xmm2
+        paddw   xmm6,xmm4               ; xmm6=data3+data4=tmp3
+        paddw   xmm2,xmm1               ; xmm2=data2+data5=tmp2
+        psubw   xmm7,xmm4               ; xmm7=data3-data4=tmp4
+        psubw   xmm0,xmm1               ; xmm0=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm1,xmm5
-	psubw	xmm3,xmm6		; xmm3=tmp13
-	psubw	xmm5,xmm2		; xmm5=tmp12
-	paddw	xmm4,xmm6		; xmm4=tmp10
-	paddw	xmm1,xmm2		; xmm1=tmp11
+        movdqa  xmm4,xmm3
+        movdqa  xmm1,xmm5
+        psubw   xmm3,xmm6               ; xmm3=tmp13
+        psubw   xmm5,xmm2               ; xmm5=tmp12
+        paddw   xmm4,xmm6               ; xmm4=tmp10
+        paddw   xmm1,xmm2               ; xmm1=tmp11
 
-	paddw	xmm5,xmm3
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm5,[rel PW_F0707] ; xmm5=z1
+        paddw   xmm5,xmm3
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[rel PW_F0707] ; xmm5=z1
 
-	movdqa	xmm6,xmm4
-	movdqa	xmm2,xmm3
-	psubw	xmm4,xmm1		; xmm4=data4
-	psubw	xmm3,xmm5		; xmm3=data6
-	paddw	xmm6,xmm1		; xmm6=data0
-	paddw	xmm2,xmm5		; xmm2=data2
+        movdqa  xmm6,xmm4
+        movdqa  xmm2,xmm3
+        psubw   xmm4,xmm1               ; xmm4=data4
+        psubw   xmm3,xmm5               ; xmm3=data6
+        paddw   xmm6,xmm1               ; xmm6=data0
+        paddw   xmm2,xmm5               ; xmm2=data2
 
-	movdqa	XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4
-	movdqa	XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm3
-	movdqa	XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm6
-	movdqa	XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm6
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=tmp6
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=tmp7
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp6
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
 
-	paddw	xmm7,xmm0		; xmm7=tmp10
-	paddw	xmm0,xmm1		; xmm0=tmp11
-	paddw	xmm1,xmm5		; xmm1=tmp12, xmm5=tmp7
+        paddw   xmm7,xmm0               ; xmm7=tmp10
+        paddw   xmm0,xmm1               ; xmm0=tmp11
+        paddw   xmm1,xmm5               ; xmm1=tmp12, xmm5=tmp7
 
-	psllw	xmm7,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm1,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm7,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
 
-	psllw	xmm0,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm0,[rel PW_F0707] ; xmm0=z3
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm0,[rel PW_F0707] ; xmm0=z3
 
-	movdqa	xmm4,xmm7		; xmm4=tmp10
-	psubw	xmm7,xmm1
-	pmulhw	xmm7,[rel PW_F0382] ; xmm7=z5
-	pmulhw	xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
-	pmulhw	xmm1,[rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
-	paddw	xmm4,xmm7		; xmm4=z2
-	paddw	xmm1,xmm7		; xmm1=z4
+        movdqa  xmm4,xmm7               ; xmm4=tmp10
+        psubw   xmm7,xmm1
+        pmulhw  xmm7,[rel PW_F0382] ; xmm7=z5
+        pmulhw  xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+        pmulhw  xmm1,[rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm4,xmm7               ; xmm4=z2
+        paddw   xmm1,xmm7               ; xmm1=z4
 
-	movdqa	xmm3,xmm5
-	psubw	xmm5,xmm0		; xmm5=z13
-	paddw	xmm3,xmm0		; xmm3=z11
+        movdqa  xmm3,xmm5
+        psubw   xmm5,xmm0               ; xmm5=z13
+        paddw   xmm3,xmm0               ; xmm3=z11
 
-	movdqa	xmm6,xmm5
-	movdqa	xmm2,xmm3
-	psubw	xmm5,xmm4		; xmm5=data3
-	psubw	xmm3,xmm1		; xmm3=data7
-	paddw	xmm6,xmm4		; xmm6=data5
-	paddw	xmm2,xmm1		; xmm2=data1
+        movdqa  xmm6,xmm5
+        movdqa  xmm2,xmm3
+        psubw   xmm5,xmm4               ; xmm5=data3
+        psubw   xmm3,xmm1               ; xmm3=data7
+        paddw   xmm6,xmm4               ; xmm6=data5
+        paddw   xmm2,xmm1               ; xmm2=data1
 
-	movdqa	XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm5
-	movdqa	XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm3
-	movdqa	XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
-	movdqa	XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm5
+        movdqa  XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
 
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfss2fst.asm b/simd/jfss2fst.asm
index 73fc9e5..8bc05f4 100644
--- a/simd/jfss2fst.asm
+++ b/simd/jfss2fst.asm
@@ -26,24 +26,24 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	8	; 14 is also OK.
+%define CONST_BITS      8       ; 14 is also OK.
 
 %if CONST_BITS == 8
-F_0_382	equ	 98		; FIX(0.382683433)
-F_0_541	equ	139		; FIX(0.541196100)
-F_0_707	equ	181		; FIX(0.707106781)
-F_1_306	equ	334		; FIX(1.306562965)
+F_0_382 equ      98             ; FIX(0.382683433)
+F_0_541 equ     139             ; FIX(0.541196100)
+F_0_707 equ     181             ; FIX(0.707106781)
+F_1_306 equ     334             ; FIX(1.306562965)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_382	equ	DESCALE( 410903207,30-CONST_BITS)	; FIX(0.382683433)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_707	equ	DESCALE( 759250124,30-CONST_BITS)	; FIX(0.707106781)
-F_1_306	equ	DESCALE(1402911301,30-CONST_BITS)	; FIX(1.306562965)
+F_0_382 equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707 equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306 equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
@@ -51,21 +51,21 @@
 %define PRE_MULTIPLY_SCALE_BITS   2
 %define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-	alignz	16
-	global	EXTN(jconst_fdct_ifast_sse2)
+        alignz  16
+        global  EXTN(jconst_fdct_ifast_sse2)
 
 EXTN(jconst_fdct_ifast_sse2):
 
-PW_F0707	times 8 dw  F_0_707 << CONST_SHIFT
-PW_F0382	times 8 dw  F_0_382 << CONST_SHIFT
-PW_F0541	times 8 dw  F_0_541 << CONST_SHIFT
-PW_F1306	times 8 dw  F_1_306 << CONST_SHIFT
+PW_F0707        times 8 dw  F_0_707 << CONST_SHIFT
+PW_F0382        times 8 dw  F_0_382 << CONST_SHIFT
+PW_F0541        times 8 dw  F_0_541 << CONST_SHIFT
+PW_F1306        times 8 dw  F_1_306 << CONST_SHIFT
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -73,332 +73,332 @@
 ; jsimd_fdct_ifast_sse2 (DCTELEM * data)
 ;
 
-%define data(b)		(b)+8		; DCTELEM * data
+%define data(b)         (b)+8           ; DCTELEM * data
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_fdct_ifast_sse2)
+        align   16
+        global  EXTN(jsimd_fdct_ifast_sse2)
 
 EXTN(jsimd_fdct_ifast_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-;	push	esi		; unused
-;	push	edi		; unused
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
 
-	; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
-	; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+        ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+        ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
 
-	movdqa    xmm4,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm1		; xmm0=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm4,xmm1		; xmm4=(04 14 05 15 06 16 07 17)
-	movdqa    xmm5,xmm2		; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm3		; xmm2=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm5,xmm3		; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
 
-	movdqa	xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
 
-	; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
-	; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+        ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+        ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=(20 30 21 31 22 32 23 33)
-	movdqa	XMMWORD [wk(1)], xmm5	; wk(1)=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
 
-	movdqa    xmm2,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm7		; xmm6=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm2,xmm7		; xmm2=(44 54 45 55 46 56 47 57)
-	movdqa    xmm5,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm3		; xmm1=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm5,xmm3		; xmm5=(64 74 65 75 66 76 67 77)
+        movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
 
-	movdqa    xmm7,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm1		; xmm6=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm7,xmm1		; xmm7=(42 52 62 72 43 53 63 73)
-	movdqa    xmm3,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm5		; xmm2=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm3,xmm5		; xmm3=(46 56 66 76 47 57 67 77)
+        movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+        movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=(20 30 21 31 22 32 23 33)
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=(24 34 25 35 26 36 27 37)
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=(42 52 62 72 43 53 63 73)
-	movdqa	XMMWORD [wk(1)], xmm2	; wk(1)=(44 54 64 74 45 55 65 75)
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(42 52 62 72 43 53 63 73)
+        movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=(44 54 64 74 45 55 65 75)
 
-	movdqa    xmm7,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm1		; xmm0=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm7,xmm1		; xmm7=(02 12 22 32 03 13 23 33)
-	movdqa    xmm2,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm5		; xmm4=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm2,xmm5		; xmm2=(06 16 26 36 07 17 27 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+        movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
 
-	movdqa     xmm1,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm6		; xmm0=(00 10 20 30 40 50 60 70)=data0
-	punpckhqdq xmm1,xmm6		; xmm1=(01 11 21 31 41 51 61 71)=data1
-	movdqa     xmm5,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm3		; xmm2=(06 16 26 36 46 56 66 76)=data6
-	punpckhqdq xmm5,xmm3		; xmm5=(07 17 27 37 47 57 67 77)=data7
+        movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
 
-	movdqa	xmm6,xmm1
-	movdqa	xmm3,xmm0
-	psubw	xmm1,xmm2		; xmm1=data1-data6=tmp6
-	psubw	xmm0,xmm5		; xmm0=data0-data7=tmp7
-	paddw	xmm6,xmm2		; xmm6=data1+data6=tmp1
-	paddw	xmm3,xmm5		; xmm3=data0+data7=tmp0
+        movdqa  xmm6,xmm1
+        movdqa  xmm3,xmm0
+        psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+        psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+        paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+        paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
 
-	movdqa	xmm2, XMMWORD [wk(0)]	; xmm2=(42 52 62 72 43 53 63 73)
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=(44 54 64 74 45 55 65 75)
-	movdqa	XMMWORD [wk(0)], xmm1	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=tmp7
+        movdqa  xmm2, XMMWORD [wk(0)]   ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(44 54 64 74 45 55 65 75)
+        movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
 
-	movdqa     xmm1,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm2		; xmm7=(02 12 22 32 42 52 62 72)=data2
-	punpckhqdq xmm1,xmm2		; xmm1=(03 13 23 33 43 53 63 73)=data3
-	movdqa     xmm0,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm5		; xmm4=(04 14 24 34 44 54 64 74)=data4
-	punpckhqdq xmm0,xmm5		; xmm0=(05 15 25 35 45 55 65 75)=data5
+        movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+        movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
 
-	movdqa	xmm2,xmm1
-	movdqa	xmm5,xmm7
-	paddw	xmm1,xmm4		; xmm1=data3+data4=tmp3
-	paddw	xmm7,xmm0		; xmm7=data2+data5=tmp2
-	psubw	xmm2,xmm4		; xmm2=data3-data4=tmp4
-	psubw	xmm5,xmm0		; xmm5=data2-data5=tmp5
+        movdqa  xmm2,xmm1
+        movdqa  xmm5,xmm7
+        paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+        paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+        psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+        psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm0,xmm6
-	psubw	xmm3,xmm1		; xmm3=tmp13
-	psubw	xmm6,xmm7		; xmm6=tmp12
-	paddw	xmm4,xmm1		; xmm4=tmp10
-	paddw	xmm0,xmm7		; xmm0=tmp11
+        movdqa  xmm4,xmm3
+        movdqa  xmm0,xmm6
+        psubw   xmm3,xmm1               ; xmm3=tmp13
+        psubw   xmm6,xmm7               ; xmm6=tmp12
+        paddw   xmm4,xmm1               ; xmm4=tmp10
+        paddw   xmm0,xmm7               ; xmm0=tmp11
 
-	paddw	xmm6,xmm3
-	psllw	xmm6,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1
+        paddw   xmm6,xmm3
+        psllw   xmm6,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1
 
-	movdqa	xmm1,xmm4
-	movdqa	xmm7,xmm3
-	psubw	xmm4,xmm0		; xmm4=data4
-	psubw	xmm3,xmm6		; xmm3=data6
-	paddw	xmm1,xmm0		; xmm1=data0
-	paddw	xmm7,xmm6		; xmm7=data2
+        movdqa  xmm1,xmm4
+        movdqa  xmm7,xmm3
+        psubw   xmm4,xmm0               ; xmm4=data4
+        psubw   xmm3,xmm6               ; xmm3=data6
+        paddw   xmm1,xmm0               ; xmm1=data0
+        paddw   xmm7,xmm6               ; xmm7=data2
 
-	movdqa	xmm0, XMMWORD [wk(0)]	; xmm0=tmp6
-	movdqa	xmm6, XMMWORD [wk(1)]	; xmm6=tmp7
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=data4
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=data6
+        movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=tmp6
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp7
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=data4
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=data6
 
-	; -- Odd part
+        ; -- Odd part
 
-	paddw	xmm2,xmm5		; xmm2=tmp10
-	paddw	xmm5,xmm0		; xmm5=tmp11
-	paddw	xmm0,xmm6		; xmm0=tmp12, xmm6=tmp7
+        paddw   xmm2,xmm5               ; xmm2=tmp10
+        paddw   xmm5,xmm0               ; xmm5=tmp11
+        paddw   xmm0,xmm6               ; xmm0=tmp12, xmm6=tmp7
 
-	psllw	xmm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm0,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
 
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3
 
-	movdqa	xmm4,xmm2		; xmm4=tmp10
-	psubw	xmm2,xmm0
-	pmulhw	xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5
-	pmulhw	xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
-	pmulhw	xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
-	paddw	xmm4,xmm2		; xmm4=z2
-	paddw	xmm0,xmm2		; xmm0=z4
+        movdqa  xmm4,xmm2               ; xmm4=tmp10
+        psubw   xmm2,xmm0
+        pmulhw  xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+        pmulhw  xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm4,xmm2               ; xmm4=z2
+        paddw   xmm0,xmm2               ; xmm0=z4
 
-	movdqa	xmm3,xmm6
-	psubw	xmm6,xmm5		; xmm6=z13
-	paddw	xmm3,xmm5		; xmm3=z11
+        movdqa  xmm3,xmm6
+        psubw   xmm6,xmm5               ; xmm6=z13
+        paddw   xmm3,xmm5               ; xmm3=z11
 
-	movdqa	xmm2,xmm6
-	movdqa	xmm5,xmm3
-	psubw	xmm6,xmm4		; xmm6=data3
-	psubw	xmm3,xmm0		; xmm3=data7
-	paddw	xmm2,xmm4		; xmm2=data5
-	paddw	xmm5,xmm0		; xmm5=data1
+        movdqa  xmm2,xmm6
+        movdqa  xmm5,xmm3
+        psubw   xmm6,xmm4               ; xmm6=data3
+        psubw   xmm3,xmm0               ; xmm3=data7
+        paddw   xmm2,xmm4               ; xmm2=data5
+        paddw   xmm5,xmm0               ; xmm5=data1
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-;	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
+;       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
 
-	; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72)
-	; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73)
+        ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72)
+        ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73)
 
-	movdqa    xmm4,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm5		; xmm1=(00 01 10 11 20 21 30 31)
-	punpckhwd xmm4,xmm5		; xmm4=(40 41 50 51 60 61 70 71)
-	movdqa    xmm0,xmm7		; transpose coefficients(phase 1)
-	punpcklwd xmm7,xmm6		; xmm7=(02 03 12 13 22 23 32 33)
-	punpckhwd xmm0,xmm6		; xmm0=(42 43 52 53 62 63 72 73)
+        movdqa    xmm4,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm5             ; xmm1=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm4,xmm5             ; xmm4=(40 41 50 51 60 61 70 71)
+        movdqa    xmm0,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm6             ; xmm7=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm0,xmm6             ; xmm0=(42 43 52 53 62 63 72 73)
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=col4
-	movdqa	xmm6, XMMWORD [wk(1)]	; xmm6=col6
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=col4
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=col6
 
-	; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76)
-	; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77)
+        ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76)
+        ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77)
 
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=(02 03 12 13 22 23 32 33)
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(02 03 12 13 22 23 32 33)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(42 43 52 53 62 63 72 73)
 
-	movdqa    xmm7,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm2		; xmm5=(04 05 14 15 24 25 34 35)
-	punpckhwd xmm7,xmm2		; xmm7=(44 45 54 55 64 65 74 75)
-	movdqa    xmm0,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm3		; xmm6=(06 07 16 17 26 27 36 37)
-	punpckhwd xmm0,xmm3		; xmm0=(46 47 56 57 66 67 76 77)
+        movdqa    xmm7,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm2             ; xmm5=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm7,xmm2             ; xmm7=(44 45 54 55 64 65 74 75)
+        movdqa    xmm0,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm3             ; xmm6=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm0,xmm3             ; xmm0=(46 47 56 57 66 67 76 77)
 
-	movdqa    xmm2,xmm5		; transpose coefficients(phase 2)
-	punpckldq xmm5,xmm6		; xmm5=(04 05 06 07 14 15 16 17)
-	punpckhdq xmm2,xmm6		; xmm2=(24 25 26 27 34 35 36 37)
-	movdqa    xmm3,xmm7		; transpose coefficients(phase 2)
-	punpckldq xmm7,xmm0		; xmm7=(44 45 46 47 54 55 56 57)
-	punpckhdq xmm3,xmm0		; xmm3=(64 65 66 67 74 75 76 77)
+        movdqa    xmm2,xmm5             ; transpose coefficients(phase 2)
+        punpckldq xmm5,xmm6             ; xmm5=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm2,xmm6             ; xmm2=(24 25 26 27 34 35 36 37)
+        movdqa    xmm3,xmm7             ; transpose coefficients(phase 2)
+        punpckldq xmm7,xmm0             ; xmm7=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm3,xmm0             ; xmm3=(64 65 66 67 74 75 76 77)
 
-	movdqa	xmm6, XMMWORD [wk(0)]	; xmm6=(02 03 12 13 22 23 32 33)
-	movdqa	xmm0, XMMWORD [wk(1)]	; xmm0=(42 43 52 53 62 63 72 73)
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=(24 25 26 27 34 35 36 37)
-	movdqa	XMMWORD [wk(1)], xmm7	; wk(1)=(44 45 46 47 54 55 56 57)
+        movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=(02 03 12 13 22 23 32 33)
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(24 25 26 27 34 35 36 37)
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(44 45 46 47 54 55 56 57)
 
-	movdqa    xmm2,xmm1		; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm6		; xmm1=(00 01 02 03 10 11 12 13)
-	punpckhdq xmm2,xmm6		; xmm2=(20 21 22 23 30 31 32 33)
-	movdqa    xmm7,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm0		; xmm4=(40 41 42 43 50 51 52 53)
-	punpckhdq xmm7,xmm0		; xmm7=(60 61 62 63 70 71 72 73)
+        movdqa    xmm2,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm6             ; xmm1=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm2,xmm6             ; xmm2=(20 21 22 23 30 31 32 33)
+        movdqa    xmm7,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm0             ; xmm4=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm7,xmm0             ; xmm7=(60 61 62 63 70 71 72 73)
 
-	movdqa     xmm6,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm5		; xmm1=(00 01 02 03 04 05 06 07)=data0
-	punpckhqdq xmm6,xmm5		; xmm6=(10 11 12 13 14 15 16 17)=data1
-	movdqa     xmm0,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm3		; xmm7=(60 61 62 63 64 65 66 67)=data6
-	punpckhqdq xmm0,xmm3		; xmm0=(70 71 72 73 74 75 76 77)=data7
+        movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm5            ; xmm1=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm6,xmm5            ; xmm6=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm0,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm3            ; xmm7=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm0,xmm3            ; xmm0=(70 71 72 73 74 75 76 77)=data7
 
-	movdqa	xmm5,xmm6
-	movdqa	xmm3,xmm1
-	psubw	xmm6,xmm7		; xmm6=data1-data6=tmp6
-	psubw	xmm1,xmm0		; xmm1=data0-data7=tmp7
-	paddw	xmm5,xmm7		; xmm5=data1+data6=tmp1
-	paddw	xmm3,xmm0		; xmm3=data0+data7=tmp0
+        movdqa  xmm5,xmm6
+        movdqa  xmm3,xmm1
+        psubw   xmm6,xmm7               ; xmm6=data1-data6=tmp6
+        psubw   xmm1,xmm0               ; xmm1=data0-data7=tmp7
+        paddw   xmm5,xmm7               ; xmm5=data1+data6=tmp1
+        paddw   xmm3,xmm0               ; xmm3=data0+data7=tmp0
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=(24 25 26 27 34 35 36 37)
-	movdqa	xmm0, XMMWORD [wk(1)]	; xmm0=(44 45 46 47 54 55 56 57)
-	movdqa	XMMWORD [wk(0)], xmm6	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm1	; wk(1)=tmp7
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(24 25 26 27 34 35 36 37)
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(44 45 46 47 54 55 56 57)
+        movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=tmp7
 
-	movdqa     xmm6,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm7		; xmm2=(20 21 22 23 24 25 26 27)=data2
-	punpckhqdq xmm6,xmm7		; xmm6=(30 31 32 33 34 35 36 37)=data3
-	movdqa     xmm1,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm0		; xmm4=(40 41 42 43 44 45 46 47)=data4
-	punpckhqdq xmm1,xmm0		; xmm1=(50 51 52 53 54 55 56 57)=data5
+        movdqa     xmm6,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm7            ; xmm2=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm6,xmm7            ; xmm6=(30 31 32 33 34 35 36 37)=data3
+        movdqa     xmm1,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm0            ; xmm4=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm1,xmm0            ; xmm1=(50 51 52 53 54 55 56 57)=data5
 
-	movdqa	xmm7,xmm6
-	movdqa	xmm0,xmm2
-	paddw	xmm6,xmm4		; xmm6=data3+data4=tmp3
-	paddw	xmm2,xmm1		; xmm2=data2+data5=tmp2
-	psubw	xmm7,xmm4		; xmm7=data3-data4=tmp4
-	psubw	xmm0,xmm1		; xmm0=data2-data5=tmp5
+        movdqa  xmm7,xmm6
+        movdqa  xmm0,xmm2
+        paddw   xmm6,xmm4               ; xmm6=data3+data4=tmp3
+        paddw   xmm2,xmm1               ; xmm2=data2+data5=tmp2
+        psubw   xmm7,xmm4               ; xmm7=data3-data4=tmp4
+        psubw   xmm0,xmm1               ; xmm0=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm1,xmm5
-	psubw	xmm3,xmm6		; xmm3=tmp13
-	psubw	xmm5,xmm2		; xmm5=tmp12
-	paddw	xmm4,xmm6		; xmm4=tmp10
-	paddw	xmm1,xmm2		; xmm1=tmp11
+        movdqa  xmm4,xmm3
+        movdqa  xmm1,xmm5
+        psubw   xmm3,xmm6               ; xmm3=tmp13
+        psubw   xmm5,xmm2               ; xmm5=tmp12
+        paddw   xmm4,xmm6               ; xmm4=tmp10
+        paddw   xmm1,xmm2               ; xmm1=tmp11
 
-	paddw	xmm5,xmm3
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1
+        paddw   xmm5,xmm3
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1
 
-	movdqa	xmm6,xmm4
-	movdqa	xmm2,xmm3
-	psubw	xmm4,xmm1		; xmm4=data4
-	psubw	xmm3,xmm5		; xmm3=data6
-	paddw	xmm6,xmm1		; xmm6=data0
-	paddw	xmm2,xmm5		; xmm2=data2
+        movdqa  xmm6,xmm4
+        movdqa  xmm2,xmm3
+        psubw   xmm4,xmm1               ; xmm4=data4
+        psubw   xmm3,xmm5               ; xmm3=data6
+        paddw   xmm6,xmm1               ; xmm6=data0
+        paddw   xmm2,xmm5               ; xmm2=data2
 
-	movdqa	XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4
-	movdqa	XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3
-	movdqa	XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6
-	movdqa	XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6
+        movdqa  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=tmp6
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=tmp7
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp6
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
 
-	paddw	xmm7,xmm0		; xmm7=tmp10
-	paddw	xmm0,xmm1		; xmm0=tmp11
-	paddw	xmm1,xmm5		; xmm1=tmp12, xmm5=tmp7
+        paddw   xmm7,xmm0               ; xmm7=tmp10
+        paddw   xmm0,xmm1               ; xmm0=tmp11
+        paddw   xmm1,xmm5               ; xmm1=tmp12, xmm5=tmp7
 
-	psllw	xmm7,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm1,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm7,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
 
-	psllw	xmm0,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3
 
-	movdqa	xmm4,xmm7		; xmm4=tmp10
-	psubw	xmm7,xmm1
-	pmulhw	xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5
-	pmulhw	xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
-	pmulhw	xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
-	paddw	xmm4,xmm7		; xmm4=z2
-	paddw	xmm1,xmm7		; xmm1=z4
+        movdqa  xmm4,xmm7               ; xmm4=tmp10
+        psubw   xmm7,xmm1
+        pmulhw  xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+        pmulhw  xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm4,xmm7               ; xmm4=z2
+        paddw   xmm1,xmm7               ; xmm1=z4
 
-	movdqa	xmm3,xmm5
-	psubw	xmm5,xmm0		; xmm5=z13
-	paddw	xmm3,xmm0		; xmm3=z11
+        movdqa  xmm3,xmm5
+        psubw   xmm5,xmm0               ; xmm5=z13
+        paddw   xmm3,xmm0               ; xmm3=z11
 
-	movdqa	xmm6,xmm5
-	movdqa	xmm2,xmm3
-	psubw	xmm5,xmm4		; xmm5=data3
-	psubw	xmm3,xmm1		; xmm3=data7
-	paddw	xmm6,xmm4		; xmm6=data5
-	paddw	xmm2,xmm1		; xmm2=data1
+        movdqa  xmm6,xmm5
+        movdqa  xmm2,xmm3
+        psubw   xmm5,xmm4               ; xmm5=data3
+        psubw   xmm3,xmm1               ; xmm3=data7
+        paddw   xmm6,xmm4               ; xmm6=data5
+        paddw   xmm2,xmm1               ; xmm2=data1
 
-	movdqa	XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5
-	movdqa	XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3
-	movdqa	XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6
-	movdqa	XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5
+        movdqa  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6
+        movdqa  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2
 
-;	pop	edi		; unused
-;	pop	esi		; unused
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfss2int-64.asm b/simd/jfss2int-64.asm
index bd1bd45..f5ec346 100644
--- a/simd/jfss2int-64.asm
+++ b/simd/jfss2int-64.asm
@@ -27,67 +27,67 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1	(CONST_BITS-PASS1_BITS)
-%define DESCALE_P2	(CONST_BITS+PASS1_BITS)
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS)
 
 %if CONST_BITS == 13
-F_0_298	equ	 2446		; FIX(0.298631336)
-F_0_390	equ	 3196		; FIX(0.390180644)
-F_0_541	equ	 4433		; FIX(0.541196100)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_175	equ	 9633		; FIX(1.175875602)
-F_1_501	equ	12299		; FIX(1.501321110)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_1_961	equ	16069		; FIX(1.961570560)
-F_2_053	equ	16819		; FIX(2.053119869)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_072	equ	25172		; FIX(3.072711026)
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_298	equ	DESCALE( 320652955,30-CONST_BITS)	; FIX(0.298631336)
-F_0_390	equ	DESCALE( 418953276,30-CONST_BITS)	; FIX(0.390180644)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_175	equ	DESCALE(1262586813,30-CONST_BITS)	; FIX(1.175875602)
-F_1_501	equ	DESCALE(1612031267,30-CONST_BITS)	; FIX(1.501321110)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_1_961	equ	DESCALE(2106220350,30-CONST_BITS)	; FIX(1.961570560)
-F_2_053	equ	DESCALE(2204520673,30-CONST_BITS)	; FIX(2.053119869)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_072	equ	DESCALE(3299298341,30-CONST_BITS)	; FIX(3.072711026)
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fdct_islow_sse2)
+        alignz  16
+        global  EXTN(jconst_fdct_islow_sse2)
 
 EXTN(jconst_fdct_islow_sse2):
 
-PW_F130_F054	times 4 dw  (F_0_541+F_0_765), F_0_541
-PW_F054_MF130	times 4 dw  F_0_541, (F_0_541-F_1_847)
-PW_MF078_F117	times 4 dw  (F_1_175-F_1_961), F_1_175
-PW_F117_F078	times 4 dw  F_1_175, (F_1_175-F_0_390)
-PW_MF060_MF089	times 4 dw  (F_0_298-F_0_899),-F_0_899
-PW_MF089_F060	times 4 dw -F_0_899, (F_1_501-F_0_899)
-PW_MF050_MF256	times 4 dw  (F_2_053-F_2_562),-F_2_562
-PW_MF256_F050	times 4 dw -F_2_562, (F_3_072-F_2_562)
-PD_DESCALE_P1	times 4 dd  1 << (DESCALE_P1-1)
-PD_DESCALE_P2	times 4 dd  1 << (DESCALE_P2-1)
-PW_DESCALE_P2X	times 8 dw  1 << (PASS1_BITS-1)
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X  times 8 dw  1 << (PASS1_BITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -97,526 +97,526 @@
 
 ; r10 = DCTELEM * data
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		6
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          6
 
-	align	16
-	global	EXTN(jsimd_fdct_islow_sse2)
+        align   16
+        global  EXTN(jsimd_fdct_islow_sse2)
 
 EXTN(jsimd_fdct_islow_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	rdx, r10	; (DCTELEM *)
+        mov     rdx, r10        ; (DCTELEM *)
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)]
 
-	; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
-	; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+        ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+        ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
 
-	movdqa    xmm4,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm1		; xmm0=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm4,xmm1		; xmm4=(04 14 05 15 06 16 07 17)
-	movdqa    xmm5,xmm2		; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm3		; xmm2=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm5,xmm3		; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
 
-	movdqa	xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)]
 
-	; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
-	; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+        ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+        ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=(20 30 21 31 22 32 23 33)
-	movdqa	XMMWORD [wk(1)], xmm5	; wk(1)=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
 
-	movdqa    xmm2,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm7		; xmm6=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm2,xmm7		; xmm2=(44 54 45 55 46 56 47 57)
-	movdqa    xmm5,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm3		; xmm1=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm5,xmm3		; xmm5=(64 74 65 75 66 76 67 77)
+        movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
 
-	movdqa    xmm7,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm1		; xmm6=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm7,xmm1		; xmm7=(42 52 62 72 43 53 63 73)
-	movdqa    xmm3,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm5		; xmm2=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm3,xmm5		; xmm3=(46 56 66 76 47 57 67 77)
+        movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+        movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=(20 30 21 31 22 32 23 33)
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=(24 34 25 35 26 36 27 37)
-	movdqa	XMMWORD [wk(2)], xmm7	; wk(2)=(42 52 62 72 43 53 63 73)
-	movdqa	XMMWORD [wk(3)], xmm2	; wk(3)=(44 54 64 74 45 55 65 75)
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=(42 52 62 72 43 53 63 73)
+        movdqa  XMMWORD [wk(3)], xmm2   ; wk(3)=(44 54 64 74 45 55 65 75)
 
-	movdqa    xmm7,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm1		; xmm0=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm7,xmm1		; xmm7=(02 12 22 32 03 13 23 33)
-	movdqa    xmm2,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm5		; xmm4=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm2,xmm5		; xmm2=(06 16 26 36 07 17 27 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+        movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
 
-	movdqa     xmm1,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm6		; xmm0=(00 10 20 30 40 50 60 70)=data0
-	punpckhqdq xmm1,xmm6		; xmm1=(01 11 21 31 41 51 61 71)=data1
-	movdqa     xmm5,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm3		; xmm2=(06 16 26 36 46 56 66 76)=data6
-	punpckhqdq xmm5,xmm3		; xmm5=(07 17 27 37 47 57 67 77)=data7
+        movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
 
-	movdqa	xmm6,xmm1
-	movdqa	xmm3,xmm0
-	psubw	xmm1,xmm2		; xmm1=data1-data6=tmp6
-	psubw	xmm0,xmm5		; xmm0=data0-data7=tmp7
-	paddw	xmm6,xmm2		; xmm6=data1+data6=tmp1
-	paddw	xmm3,xmm5		; xmm3=data0+data7=tmp0
+        movdqa  xmm6,xmm1
+        movdqa  xmm3,xmm0
+        psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+        psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+        paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+        paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
 
-	movdqa	xmm2, XMMWORD [wk(2)]	; xmm2=(42 52 62 72 43 53 63 73)
-	movdqa	xmm5, XMMWORD [wk(3)]	; xmm5=(44 54 64 74 45 55 65 75)
-	movdqa	XMMWORD [wk(0)], xmm1	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=tmp7
+        movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa  xmm5, XMMWORD [wk(3)]   ; xmm5=(44 54 64 74 45 55 65 75)
+        movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
 
-	movdqa     xmm1,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm2		; xmm7=(02 12 22 32 42 52 62 72)=data2
-	punpckhqdq xmm1,xmm2		; xmm1=(03 13 23 33 43 53 63 73)=data3
-	movdqa     xmm0,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm5		; xmm4=(04 14 24 34 44 54 64 74)=data4
-	punpckhqdq xmm0,xmm5		; xmm0=(05 15 25 35 45 55 65 75)=data5
+        movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+        movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
 
-	movdqa	xmm2,xmm1
-	movdqa	xmm5,xmm7
-	paddw	xmm1,xmm4		; xmm1=data3+data4=tmp3
-	paddw	xmm7,xmm0		; xmm7=data2+data5=tmp2
-	psubw	xmm2,xmm4		; xmm2=data3-data4=tmp4
-	psubw	xmm5,xmm0		; xmm5=data2-data5=tmp5
+        movdqa  xmm2,xmm1
+        movdqa  xmm5,xmm7
+        paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+        paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+        psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+        psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm0,xmm6
-	paddw	xmm3,xmm1		; xmm3=tmp10
-	paddw	xmm6,xmm7		; xmm6=tmp11
-	psubw	xmm4,xmm1		; xmm4=tmp13
-	psubw	xmm0,xmm7		; xmm0=tmp12
+        movdqa  xmm4,xmm3
+        movdqa  xmm0,xmm6
+        paddw   xmm3,xmm1               ; xmm3=tmp10
+        paddw   xmm6,xmm7               ; xmm6=tmp11
+        psubw   xmm4,xmm1               ; xmm4=tmp13
+        psubw   xmm0,xmm7               ; xmm0=tmp12
 
-	movdqa	xmm1,xmm3
-	paddw	xmm3,xmm6		; xmm3=tmp10+tmp11
-	psubw	xmm1,xmm6		; xmm1=tmp10-tmp11
+        movdqa  xmm1,xmm3
+        paddw   xmm3,xmm6               ; xmm3=tmp10+tmp11
+        psubw   xmm1,xmm6               ; xmm1=tmp10-tmp11
 
-	psllw	xmm3,PASS1_BITS		; xmm3=data0
-	psllw	xmm1,PASS1_BITS		; xmm1=data4
+        psllw   xmm3,PASS1_BITS         ; xmm3=data0
+        psllw   xmm1,PASS1_BITS         ; xmm1=data4
 
-	movdqa	XMMWORD [wk(2)], xmm3	; wk(2)=data0
-	movdqa	XMMWORD [wk(3)], xmm1	; wk(3)=data4
+        movdqa  XMMWORD [wk(2)], xmm3   ; wk(2)=data0
+        movdqa  XMMWORD [wk(3)], xmm1   ; wk(3)=data4
 
-	; (Original)
-	; z1 = (tmp12 + tmp13) * 0.541196100;
-	; data2 = z1 + tmp13 * 0.765366865;
-	; data6 = z1 + tmp12 * -1.847759065;
-	;
-	; (This implementation)
-	; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
-	; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
 
-	movdqa    xmm7,xmm4		; xmm4=tmp13
-	movdqa    xmm6,xmm4
-	punpcklwd xmm7,xmm0		; xmm0=tmp12
-	punpckhwd xmm6,xmm0
-	movdqa    xmm4,xmm7
-	movdqa    xmm0,xmm6
-	pmaddwd   xmm7,[rel PW_F130_F054]	; xmm7=data2L
-	pmaddwd   xmm6,[rel PW_F130_F054]	; xmm6=data2H
-	pmaddwd   xmm4,[rel PW_F054_MF130]	; xmm4=data6L
-	pmaddwd   xmm0,[rel PW_F054_MF130]	; xmm0=data6H
+        movdqa    xmm7,xmm4             ; xmm4=tmp13
+        movdqa    xmm6,xmm4
+        punpcklwd xmm7,xmm0             ; xmm0=tmp12
+        punpckhwd xmm6,xmm0
+        movdqa    xmm4,xmm7
+        movdqa    xmm0,xmm6
+        pmaddwd   xmm7,[rel PW_F130_F054]       ; xmm7=data2L
+        pmaddwd   xmm6,[rel PW_F130_F054]       ; xmm6=data2H
+        pmaddwd   xmm4,[rel PW_F054_MF130]      ; xmm4=data6L
+        pmaddwd   xmm0,[rel PW_F054_MF130]      ; xmm0=data6H
 
-	paddd	xmm7,[rel PD_DESCALE_P1]
-	paddd	xmm6,[rel PD_DESCALE_P1]
-	psrad	xmm7,DESCALE_P1
-	psrad	xmm6,DESCALE_P1
-	paddd	xmm4,[rel PD_DESCALE_P1]
-	paddd	xmm0,[rel PD_DESCALE_P1]
-	psrad	xmm4,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
+        paddd   xmm7,[rel PD_DESCALE_P1]
+        paddd   xmm6,[rel PD_DESCALE_P1]
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+        paddd   xmm4,[rel PD_DESCALE_P1]
+        paddd   xmm0,[rel PD_DESCALE_P1]
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
 
-	packssdw  xmm7,xmm6		; xmm7=data2
-	packssdw  xmm4,xmm0		; xmm4=data6
+        packssdw  xmm7,xmm6             ; xmm7=data2
+        packssdw  xmm4,xmm0             ; xmm4=data6
 
-	movdqa	XMMWORD [wk(4)], xmm7	; wk(4)=data2
-	movdqa	XMMWORD [wk(5)], xmm4	; wk(5)=data6
+        movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=data2
+        movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=data6
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm3, XMMWORD [wk(0)]	; xmm3=tmp6
-	movdqa	xmm1, XMMWORD [wk(1)]	; xmm1=tmp7
+        movdqa  xmm3, XMMWORD [wk(0)]   ; xmm3=tmp6
+        movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp7
 
-	movdqa	xmm6,xmm2		; xmm2=tmp4
-	movdqa	xmm0,xmm5		; xmm5=tmp5
-	paddw	xmm6,xmm3		; xmm6=z3
-	paddw	xmm0,xmm1		; xmm0=z4
+        movdqa  xmm6,xmm2               ; xmm2=tmp4
+        movdqa  xmm0,xmm5               ; xmm5=tmp5
+        paddw   xmm6,xmm3               ; xmm6=z3
+        paddw   xmm0,xmm1               ; xmm0=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm7,xmm6
-	movdqa    xmm4,xmm6
-	punpcklwd xmm7,xmm0
-	punpckhwd xmm4,xmm0
-	movdqa    xmm6,xmm7
-	movdqa    xmm0,xmm4
-	pmaddwd   xmm7,[rel PW_MF078_F117]	; xmm7=z3L
-	pmaddwd   xmm4,[rel PW_MF078_F117]	; xmm4=z3H
-	pmaddwd   xmm6,[rel PW_F117_F078]	; xmm6=z4L
-	pmaddwd   xmm0,[rel PW_F117_F078]	; xmm0=z4H
+        movdqa    xmm7,xmm6
+        movdqa    xmm4,xmm6
+        punpcklwd xmm7,xmm0
+        punpckhwd xmm4,xmm0
+        movdqa    xmm6,xmm7
+        movdqa    xmm0,xmm4
+        pmaddwd   xmm7,[rel PW_MF078_F117]      ; xmm7=z3L
+        pmaddwd   xmm4,[rel PW_MF078_F117]      ; xmm4=z3H
+        pmaddwd   xmm6,[rel PW_F117_F078]       ; xmm6=z4L
+        pmaddwd   xmm0,[rel PW_F117_F078]       ; xmm0=z4H
 
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=z3L
-	movdqa	XMMWORD [wk(1)], xmm4	; wk(1)=z3H
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=z3L
+        movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=z3H
 
-	; (Original)
-	; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
-	; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
-	; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
-	; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
-	;
-	; (This implementation)
-	; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
-	; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
-	; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
-	; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
-	; data7 = tmp4 + z3;  data5 = tmp5 + z4;
-	; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
 
-	movdqa    xmm7,xmm2
-	movdqa    xmm4,xmm2
-	punpcklwd xmm7,xmm1
-	punpckhwd xmm4,xmm1
-	movdqa    xmm2,xmm7
-	movdqa    xmm1,xmm4
-	pmaddwd   xmm7,[rel PW_MF060_MF089]	; xmm7=tmp4L
-	pmaddwd   xmm4,[rel PW_MF060_MF089]	; xmm4=tmp4H
-	pmaddwd   xmm2,[rel PW_MF089_F060]	; xmm2=tmp7L
-	pmaddwd   xmm1,[rel PW_MF089_F060]	; xmm1=tmp7H
+        movdqa    xmm7,xmm2
+        movdqa    xmm4,xmm2
+        punpcklwd xmm7,xmm1
+        punpckhwd xmm4,xmm1
+        movdqa    xmm2,xmm7
+        movdqa    xmm1,xmm4
+        pmaddwd   xmm7,[rel PW_MF060_MF089]     ; xmm7=tmp4L
+        pmaddwd   xmm4,[rel PW_MF060_MF089]     ; xmm4=tmp4H
+        pmaddwd   xmm2,[rel PW_MF089_F060]      ; xmm2=tmp7L
+        pmaddwd   xmm1,[rel PW_MF089_F060]      ; xmm1=tmp7H
 
-	paddd	xmm7, XMMWORD [wk(0)]	; xmm7=data7L
-	paddd	xmm4, XMMWORD [wk(1)]	; xmm4=data7H
-	paddd	xmm2,xmm6		; xmm2=data1L
-	paddd	xmm1,xmm0		; xmm1=data1H
+        paddd   xmm7, XMMWORD [wk(0)]   ; xmm7=data7L
+        paddd   xmm4, XMMWORD [wk(1)]   ; xmm4=data7H
+        paddd   xmm2,xmm6               ; xmm2=data1L
+        paddd   xmm1,xmm0               ; xmm1=data1H
 
-	paddd	xmm7,[rel PD_DESCALE_P1]
-	paddd	xmm4,[rel PD_DESCALE_P1]
-	psrad	xmm7,DESCALE_P1
-	psrad	xmm4,DESCALE_P1
-	paddd	xmm2,[rel PD_DESCALE_P1]
-	paddd	xmm1,[rel PD_DESCALE_P1]
-	psrad	xmm2,DESCALE_P1
-	psrad	xmm1,DESCALE_P1
+        paddd   xmm7,[rel PD_DESCALE_P1]
+        paddd   xmm4,[rel PD_DESCALE_P1]
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm4,DESCALE_P1
+        paddd   xmm2,[rel PD_DESCALE_P1]
+        paddd   xmm1,[rel PD_DESCALE_P1]
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
 
-	packssdw  xmm7,xmm4		; xmm7=data7
-	packssdw  xmm2,xmm1		; xmm2=data1
+        packssdw  xmm7,xmm4             ; xmm7=data7
+        packssdw  xmm2,xmm1             ; xmm2=data1
 
-	movdqa    xmm4,xmm5
-	movdqa    xmm1,xmm5
-	punpcklwd xmm4,xmm3
-	punpckhwd xmm1,xmm3
-	movdqa    xmm5,xmm4
-	movdqa    xmm3,xmm1
-	pmaddwd   xmm4,[rel PW_MF050_MF256]	; xmm4=tmp5L
-	pmaddwd   xmm1,[rel PW_MF050_MF256]	; xmm1=tmp5H
-	pmaddwd   xmm5,[rel PW_MF256_F050]	; xmm5=tmp6L
-	pmaddwd   xmm3,[rel PW_MF256_F050]	; xmm3=tmp6H
+        movdqa    xmm4,xmm5
+        movdqa    xmm1,xmm5
+        punpcklwd xmm4,xmm3
+        punpckhwd xmm1,xmm3
+        movdqa    xmm5,xmm4
+        movdqa    xmm3,xmm1
+        pmaddwd   xmm4,[rel PW_MF050_MF256]     ; xmm4=tmp5L
+        pmaddwd   xmm1,[rel PW_MF050_MF256]     ; xmm1=tmp5H
+        pmaddwd   xmm5,[rel PW_MF256_F050]      ; xmm5=tmp6L
+        pmaddwd   xmm3,[rel PW_MF256_F050]      ; xmm3=tmp6H
 
-	paddd	xmm4,xmm6		; xmm4=data5L
-	paddd	xmm1,xmm0		; xmm1=data5H
-	paddd	xmm5, XMMWORD [wk(0)]	; xmm5=data3L
-	paddd	xmm3, XMMWORD [wk(1)]	; xmm3=data3H
+        paddd   xmm4,xmm6               ; xmm4=data5L
+        paddd   xmm1,xmm0               ; xmm1=data5H
+        paddd   xmm5, XMMWORD [wk(0)]   ; xmm5=data3L
+        paddd   xmm3, XMMWORD [wk(1)]   ; xmm3=data3H
 
-	paddd	xmm4,[rel PD_DESCALE_P1]
-	paddd	xmm1,[rel PD_DESCALE_P1]
-	psrad	xmm4,DESCALE_P1
-	psrad	xmm1,DESCALE_P1
-	paddd	xmm5,[rel PD_DESCALE_P1]
-	paddd	xmm3,[rel PD_DESCALE_P1]
-	psrad	xmm5,DESCALE_P1
-	psrad	xmm3,DESCALE_P1
+        paddd   xmm4,[rel PD_DESCALE_P1]
+        paddd   xmm1,[rel PD_DESCALE_P1]
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
+        paddd   xmm5,[rel PD_DESCALE_P1]
+        paddd   xmm3,[rel PD_DESCALE_P1]
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
 
-	packssdw  xmm4,xmm1		; xmm4=data5
-	packssdw  xmm5,xmm3		; xmm5=data3
+        packssdw  xmm4,xmm1             ; xmm4=data5
+        packssdw  xmm5,xmm3             ; xmm5=data3
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-	movdqa	xmm6, XMMWORD [wk(2)]	; xmm6=col0
-	movdqa	xmm0, XMMWORD [wk(4)]	; xmm0=col2
+        movdqa  xmm6, XMMWORD [wk(2)]   ; xmm6=col0
+        movdqa  xmm0, XMMWORD [wk(4)]   ; xmm0=col2
 
-	; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72)
-	; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73)
+        ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72)
+        ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73)
 
-	movdqa    xmm1,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm2		; xmm6=(00 01 10 11 20 21 30 31)
-	punpckhwd xmm1,xmm2		; xmm1=(40 41 50 51 60 61 70 71)
-	movdqa    xmm3,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm5		; xmm0=(02 03 12 13 22 23 32 33)
-	punpckhwd xmm3,xmm5		; xmm3=(42 43 52 53 62 63 72 73)
+        movdqa    xmm1,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm2             ; xmm6=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm1,xmm2             ; xmm1=(40 41 50 51 60 61 70 71)
+        movdqa    xmm3,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm5             ; xmm0=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm3,xmm5             ; xmm3=(42 43 52 53 62 63 72 73)
 
-	movdqa	xmm2, XMMWORD [wk(3)]	; xmm2=col4
-	movdqa	xmm5, XMMWORD [wk(5)]	; xmm5=col6
+        movdqa  xmm2, XMMWORD [wk(3)]   ; xmm2=col4
+        movdqa  xmm5, XMMWORD [wk(5)]   ; xmm5=col6
 
-	; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76)
-	; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77)
+        ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76)
+        ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77)
 
-	movdqa	XMMWORD [wk(0)], xmm0	; wk(0)=(02 03 12 13 22 23 32 33)
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=(02 03 12 13 22 23 32 33)
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(42 43 52 53 62 63 72 73)
 
-	movdqa    xmm0,xmm2		; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm4		; xmm2=(04 05 14 15 24 25 34 35)
-	punpckhwd xmm0,xmm4		; xmm0=(44 45 54 55 64 65 74 75)
-	movdqa    xmm3,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm7		; xmm5=(06 07 16 17 26 27 36 37)
-	punpckhwd xmm3,xmm7		; xmm3=(46 47 56 57 66 67 76 77)
+        movdqa    xmm0,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm4             ; xmm2=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm0,xmm4             ; xmm0=(44 45 54 55 64 65 74 75)
+        movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm7             ; xmm5=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm3,xmm7             ; xmm3=(46 47 56 57 66 67 76 77)
 
-	movdqa    xmm4,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm5		; xmm2=(04 05 06 07 14 15 16 17)
-	punpckhdq xmm4,xmm5		; xmm4=(24 25 26 27 34 35 36 37)
-	movdqa    xmm7,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm3		; xmm0=(44 45 46 47 54 55 56 57)
-	punpckhdq xmm7,xmm3		; xmm7=(64 65 66 67 74 75 76 77)
+        movdqa    xmm4,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm4,xmm5             ; xmm4=(24 25 26 27 34 35 36 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm7,xmm3             ; xmm7=(64 65 66 67 74 75 76 77)
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=(02 03 12 13 22 23 32 33)
-	movdqa	xmm3, XMMWORD [wk(1)]	; xmm3=(42 43 52 53 62 63 72 73)
-	movdqa	XMMWORD [wk(2)], xmm4	; wk(2)=(24 25 26 27 34 35 36 37)
-	movdqa	XMMWORD [wk(3)], xmm0	; wk(3)=(44 45 46 47 54 55 56 57)
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=(02 03 12 13 22 23 32 33)
+        movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(2)], xmm4   ; wk(2)=(24 25 26 27 34 35 36 37)
+        movdqa  XMMWORD [wk(3)], xmm0   ; wk(3)=(44 45 46 47 54 55 56 57)
 
-	movdqa    xmm4,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm5		; xmm6=(00 01 02 03 10 11 12 13)
-	punpckhdq xmm4,xmm5		; xmm4=(20 21 22 23 30 31 32 33)
-	movdqa    xmm0,xmm1		; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm3		; xmm1=(40 41 42 43 50 51 52 53)
-	punpckhdq xmm0,xmm3		; xmm0=(60 61 62 63 70 71 72 73)
+        movdqa    xmm4,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm5             ; xmm6=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm4,xmm5             ; xmm4=(20 21 22 23 30 31 32 33)
+        movdqa    xmm0,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm3             ; xmm1=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm0,xmm3             ; xmm0=(60 61 62 63 70 71 72 73)
 
-	movdqa     xmm5,xmm6		; transpose coefficients(phase 3)
-	punpcklqdq xmm6,xmm2		; xmm6=(00 01 02 03 04 05 06 07)=data0
-	punpckhqdq xmm5,xmm2		; xmm5=(10 11 12 13 14 15 16 17)=data1
-	movdqa     xmm3,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm7		; xmm0=(60 61 62 63 64 65 66 67)=data6
-	punpckhqdq xmm3,xmm7		; xmm3=(70 71 72 73 74 75 76 77)=data7
+        movdqa     xmm5,xmm6            ; transpose coefficients(phase 3)
+        punpcklqdq xmm6,xmm2            ; xmm6=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm5,xmm2            ; xmm5=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm7            ; xmm0=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm3,xmm7            ; xmm3=(70 71 72 73 74 75 76 77)=data7
 
-	movdqa	xmm2,xmm5
-	movdqa	xmm7,xmm6
-	psubw	xmm5,xmm0		; xmm5=data1-data6=tmp6
-	psubw	xmm6,xmm3		; xmm6=data0-data7=tmp7
-	paddw	xmm2,xmm0		; xmm2=data1+data6=tmp1
-	paddw	xmm7,xmm3		; xmm7=data0+data7=tmp0
+        movdqa  xmm2,xmm5
+        movdqa  xmm7,xmm6
+        psubw   xmm5,xmm0               ; xmm5=data1-data6=tmp6
+        psubw   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        paddw   xmm2,xmm0               ; xmm2=data1+data6=tmp1
+        paddw   xmm7,xmm3               ; xmm7=data0+data7=tmp0
 
-	movdqa	xmm0, XMMWORD [wk(2)]	; xmm0=(24 25 26 27 34 35 36 37)
-	movdqa	xmm3, XMMWORD [wk(3)]	; xmm3=(44 45 46 47 54 55 56 57)
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm6	; wk(1)=tmp7
+        movdqa  xmm0, XMMWORD [wk(2)]   ; xmm0=(24 25 26 27 34 35 36 37)
+        movdqa  xmm3, XMMWORD [wk(3)]   ; xmm3=(44 45 46 47 54 55 56 57)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
 
-	movdqa     xmm5,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm0		; xmm4=(20 21 22 23 24 25 26 27)=data2
-	punpckhqdq xmm5,xmm0		; xmm5=(30 31 32 33 34 35 36 37)=data3
-	movdqa     xmm6,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm3		; xmm1=(40 41 42 43 44 45 46 47)=data4
-	punpckhqdq xmm6,xmm3		; xmm6=(50 51 52 53 54 55 56 57)=data5
+        movdqa     xmm5,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm0            ; xmm4=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm5,xmm0            ; xmm5=(30 31 32 33 34 35 36 37)=data3
+        movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm3            ; xmm1=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm6,xmm3            ; xmm6=(50 51 52 53 54 55 56 57)=data5
 
-	movdqa	xmm0,xmm5
-	movdqa	xmm3,xmm4
-	paddw	xmm5,xmm1		; xmm5=data3+data4=tmp3
-	paddw	xmm4,xmm6		; xmm4=data2+data5=tmp2
-	psubw	xmm0,xmm1		; xmm0=data3-data4=tmp4
-	psubw	xmm3,xmm6		; xmm3=data2-data5=tmp5
+        movdqa  xmm0,xmm5
+        movdqa  xmm3,xmm4
+        paddw   xmm5,xmm1               ; xmm5=data3+data4=tmp3
+        paddw   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        psubw   xmm0,xmm1               ; xmm0=data3-data4=tmp4
+        psubw   xmm3,xmm6               ; xmm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm1,xmm7
-	movdqa	xmm6,xmm2
-	paddw	xmm7,xmm5		; xmm7=tmp10
-	paddw	xmm2,xmm4		; xmm2=tmp11
-	psubw	xmm1,xmm5		; xmm1=tmp13
-	psubw	xmm6,xmm4		; xmm6=tmp12
+        movdqa  xmm1,xmm7
+        movdqa  xmm6,xmm2
+        paddw   xmm7,xmm5               ; xmm7=tmp10
+        paddw   xmm2,xmm4               ; xmm2=tmp11
+        psubw   xmm1,xmm5               ; xmm1=tmp13
+        psubw   xmm6,xmm4               ; xmm6=tmp12
 
-	movdqa	xmm5,xmm7
-	paddw	xmm7,xmm2		; xmm7=tmp10+tmp11
-	psubw	xmm5,xmm2		; xmm5=tmp10-tmp11
+        movdqa  xmm5,xmm7
+        paddw   xmm7,xmm2               ; xmm7=tmp10+tmp11
+        psubw   xmm5,xmm2               ; xmm5=tmp10-tmp11
 
-	paddw	xmm7,[rel PW_DESCALE_P2X]
-	paddw	xmm5,[rel PW_DESCALE_P2X]
-	psraw	xmm7,PASS1_BITS		; xmm7=data0
-	psraw	xmm5,PASS1_BITS		; xmm5=data4
+        paddw   xmm7,[rel PW_DESCALE_P2X]
+        paddw   xmm5,[rel PW_DESCALE_P2X]
+        psraw   xmm7,PASS1_BITS         ; xmm7=data0
+        psraw   xmm5,PASS1_BITS         ; xmm5=data4
 
-	movdqa	XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm7
-	movdqa	XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm5
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm7
+        movdqa  XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm5
 
-	; (Original)
-	; z1 = (tmp12 + tmp13) * 0.541196100;
-	; data2 = z1 + tmp13 * 0.765366865;
-	; data6 = z1 + tmp12 * -1.847759065;
-	;
-	; (This implementation)
-	; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
-	; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
 
-	movdqa    xmm4,xmm1		; xmm1=tmp13
-	movdqa    xmm2,xmm1
-	punpcklwd xmm4,xmm6		; xmm6=tmp12
-	punpckhwd xmm2,xmm6
-	movdqa    xmm1,xmm4
-	movdqa    xmm6,xmm2
-	pmaddwd   xmm4,[rel PW_F130_F054]	; xmm4=data2L
-	pmaddwd   xmm2,[rel PW_F130_F054]	; xmm2=data2H
-	pmaddwd   xmm1,[rel PW_F054_MF130]	; xmm1=data6L
-	pmaddwd   xmm6,[rel PW_F054_MF130]	; xmm6=data6H
+        movdqa    xmm4,xmm1             ; xmm1=tmp13
+        movdqa    xmm2,xmm1
+        punpcklwd xmm4,xmm6             ; xmm6=tmp12
+        punpckhwd xmm2,xmm6
+        movdqa    xmm1,xmm4
+        movdqa    xmm6,xmm2
+        pmaddwd   xmm4,[rel PW_F130_F054]       ; xmm4=data2L
+        pmaddwd   xmm2,[rel PW_F130_F054]       ; xmm2=data2H
+        pmaddwd   xmm1,[rel PW_F054_MF130]      ; xmm1=data6L
+        pmaddwd   xmm6,[rel PW_F054_MF130]      ; xmm6=data6H
 
-	paddd	xmm4,[rel PD_DESCALE_P2]
-	paddd	xmm2,[rel PD_DESCALE_P2]
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm2,DESCALE_P2
-	paddd	xmm1,[rel PD_DESCALE_P2]
-	paddd	xmm6,[rel PD_DESCALE_P2]
-	psrad	xmm1,DESCALE_P2
-	psrad	xmm6,DESCALE_P2
+        paddd   xmm4,[rel PD_DESCALE_P2]
+        paddd   xmm2,[rel PD_DESCALE_P2]
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm2,DESCALE_P2
+        paddd   xmm1,[rel PD_DESCALE_P2]
+        paddd   xmm6,[rel PD_DESCALE_P2]
+        psrad   xmm1,DESCALE_P2
+        psrad   xmm6,DESCALE_P2
 
-	packssdw  xmm4,xmm2		; xmm4=data2
-	packssdw  xmm1,xmm6		; xmm1=data6
+        packssdw  xmm4,xmm2             ; xmm4=data2
+        packssdw  xmm1,xmm6             ; xmm1=data6
 
-	movdqa	XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm4
-	movdqa	XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm1
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=tmp6
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=tmp7
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp6
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
 
-	movdqa	xmm2,xmm0		; xmm0=tmp4
-	movdqa	xmm6,xmm3		; xmm3=tmp5
-	paddw	xmm2,xmm7		; xmm2=z3
-	paddw	xmm6,xmm5		; xmm6=z4
+        movdqa  xmm2,xmm0               ; xmm0=tmp4
+        movdqa  xmm6,xmm3               ; xmm3=tmp5
+        paddw   xmm2,xmm7               ; xmm2=z3
+        paddw   xmm6,xmm5               ; xmm6=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm4,xmm2
-	movdqa    xmm1,xmm2
-	punpcklwd xmm4,xmm6
-	punpckhwd xmm1,xmm6
-	movdqa    xmm2,xmm4
-	movdqa    xmm6,xmm1
-	pmaddwd   xmm4,[rel PW_MF078_F117]	; xmm4=z3L
-	pmaddwd   xmm1,[rel PW_MF078_F117]	; xmm1=z3H
-	pmaddwd   xmm2,[rel PW_F117_F078]	; xmm2=z4L
-	pmaddwd   xmm6,[rel PW_F117_F078]	; xmm6=z4H
+        movdqa    xmm4,xmm2
+        movdqa    xmm1,xmm2
+        punpcklwd xmm4,xmm6
+        punpckhwd xmm1,xmm6
+        movdqa    xmm2,xmm4
+        movdqa    xmm6,xmm1
+        pmaddwd   xmm4,[rel PW_MF078_F117]      ; xmm4=z3L
+        pmaddwd   xmm1,[rel PW_MF078_F117]      ; xmm1=z3H
+        pmaddwd   xmm2,[rel PW_F117_F078]       ; xmm2=z4L
+        pmaddwd   xmm6,[rel PW_F117_F078]       ; xmm6=z4H
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=z3L
-	movdqa	XMMWORD [wk(1)], xmm1	; wk(1)=z3H
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=z3L
+        movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=z3H
 
-	; (Original)
-	; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
-	; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
-	; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
-	; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
-	;
-	; (This implementation)
-	; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
-	; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
-	; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
-	; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
-	; data7 = tmp4 + z3;  data5 = tmp5 + z4;
-	; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
 
-	movdqa    xmm4,xmm0
-	movdqa    xmm1,xmm0
-	punpcklwd xmm4,xmm5
-	punpckhwd xmm1,xmm5
-	movdqa    xmm0,xmm4
-	movdqa    xmm5,xmm1
-	pmaddwd   xmm4,[rel PW_MF060_MF089]	; xmm4=tmp4L
-	pmaddwd   xmm1,[rel PW_MF060_MF089]	; xmm1=tmp4H
-	pmaddwd   xmm0,[rel PW_MF089_F060]	; xmm0=tmp7L
-	pmaddwd   xmm5,[rel PW_MF089_F060]	; xmm5=tmp7H
+        movdqa    xmm4,xmm0
+        movdqa    xmm1,xmm0
+        punpcklwd xmm4,xmm5
+        punpckhwd xmm1,xmm5
+        movdqa    xmm0,xmm4
+        movdqa    xmm5,xmm1
+        pmaddwd   xmm4,[rel PW_MF060_MF089]     ; xmm4=tmp4L
+        pmaddwd   xmm1,[rel PW_MF060_MF089]     ; xmm1=tmp4H
+        pmaddwd   xmm0,[rel PW_MF089_F060]      ; xmm0=tmp7L
+        pmaddwd   xmm5,[rel PW_MF089_F060]      ; xmm5=tmp7H
 
-	paddd	xmm4, XMMWORD [wk(0)]	; xmm4=data7L
-	paddd	xmm1, XMMWORD [wk(1)]	; xmm1=data7H
-	paddd	xmm0,xmm2		; xmm0=data1L
-	paddd	xmm5,xmm6		; xmm5=data1H
+        paddd   xmm4, XMMWORD [wk(0)]   ; xmm4=data7L
+        paddd   xmm1, XMMWORD [wk(1)]   ; xmm1=data7H
+        paddd   xmm0,xmm2               ; xmm0=data1L
+        paddd   xmm5,xmm6               ; xmm5=data1H
 
-	paddd	xmm4,[rel PD_DESCALE_P2]
-	paddd	xmm1,[rel PD_DESCALE_P2]
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm1,DESCALE_P2
-	paddd	xmm0,[rel PD_DESCALE_P2]
-	paddd	xmm5,[rel PD_DESCALE_P2]
-	psrad	xmm0,DESCALE_P2
-	psrad	xmm5,DESCALE_P2
+        paddd   xmm4,[rel PD_DESCALE_P2]
+        paddd   xmm1,[rel PD_DESCALE_P2]
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm0,[rel PD_DESCALE_P2]
+        paddd   xmm5,[rel PD_DESCALE_P2]
+        psrad   xmm0,DESCALE_P2
+        psrad   xmm5,DESCALE_P2
 
-	packssdw  xmm4,xmm1		; xmm4=data7
-	packssdw  xmm0,xmm5		; xmm0=data1
+        packssdw  xmm4,xmm1             ; xmm4=data7
+        packssdw  xmm0,xmm5             ; xmm0=data1
 
-	movdqa	XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm4
-	movdqa	XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm0
 
-	movdqa    xmm1,xmm3
-	movdqa    xmm5,xmm3
-	punpcklwd xmm1,xmm7
-	punpckhwd xmm5,xmm7
-	movdqa    xmm3,xmm1
-	movdqa    xmm7,xmm5
-	pmaddwd   xmm1,[rel PW_MF050_MF256]	; xmm1=tmp5L
-	pmaddwd   xmm5,[rel PW_MF050_MF256]	; xmm5=tmp5H
-	pmaddwd   xmm3,[rel PW_MF256_F050]	; xmm3=tmp6L
-	pmaddwd   xmm7,[rel PW_MF256_F050]	; xmm7=tmp6H
+        movdqa    xmm1,xmm3
+        movdqa    xmm5,xmm3
+        punpcklwd xmm1,xmm7
+        punpckhwd xmm5,xmm7
+        movdqa    xmm3,xmm1
+        movdqa    xmm7,xmm5
+        pmaddwd   xmm1,[rel PW_MF050_MF256]     ; xmm1=tmp5L
+        pmaddwd   xmm5,[rel PW_MF050_MF256]     ; xmm5=tmp5H
+        pmaddwd   xmm3,[rel PW_MF256_F050]      ; xmm3=tmp6L
+        pmaddwd   xmm7,[rel PW_MF256_F050]      ; xmm7=tmp6H
 
-	paddd	xmm1,xmm2		; xmm1=data5L
-	paddd	xmm5,xmm6		; xmm5=data5H
-	paddd	xmm3, XMMWORD [wk(0)]	; xmm3=data3L
-	paddd	xmm7, XMMWORD [wk(1)]	; xmm7=data3H
+        paddd   xmm1,xmm2               ; xmm1=data5L
+        paddd   xmm5,xmm6               ; xmm5=data5H
+        paddd   xmm3, XMMWORD [wk(0)]   ; xmm3=data3L
+        paddd   xmm7, XMMWORD [wk(1)]   ; xmm7=data3H
 
-	paddd	xmm1,[rel PD_DESCALE_P2]
-	paddd	xmm5,[rel PD_DESCALE_P2]
-	psrad	xmm1,DESCALE_P2
-	psrad	xmm5,DESCALE_P2
-	paddd	xmm3,[rel PD_DESCALE_P2]
-	paddd	xmm7,[rel PD_DESCALE_P2]
-	psrad	xmm3,DESCALE_P2
-	psrad	xmm7,DESCALE_P2
+        paddd   xmm1,[rel PD_DESCALE_P2]
+        paddd   xmm5,[rel PD_DESCALE_P2]
+        psrad   xmm1,DESCALE_P2
+        psrad   xmm5,DESCALE_P2
+        paddd   xmm3,[rel PD_DESCALE_P2]
+        paddd   xmm7,[rel PD_DESCALE_P2]
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
 
-	packssdw  xmm1,xmm5		; xmm1=data5
-	packssdw  xmm3,xmm7		; xmm3=data3
+        packssdw  xmm1,xmm5             ; xmm1=data5
+        packssdw  xmm3,xmm7             ; xmm3=data3
 
-	movdqa	XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
-	movdqa	XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
 
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfss2int.asm b/simd/jfss2int.asm
index 5e3f2aa..d1bcb2e 100644
--- a/simd/jfss2int.asm
+++ b/simd/jfss2int.asm
@@ -26,67 +26,67 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1	(CONST_BITS-PASS1_BITS)
-%define DESCALE_P2	(CONST_BITS+PASS1_BITS)
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS)
 
 %if CONST_BITS == 13
-F_0_298	equ	 2446		; FIX(0.298631336)
-F_0_390	equ	 3196		; FIX(0.390180644)
-F_0_541	equ	 4433		; FIX(0.541196100)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_175	equ	 9633		; FIX(1.175875602)
-F_1_501	equ	12299		; FIX(1.501321110)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_1_961	equ	16069		; FIX(1.961570560)
-F_2_053	equ	16819		; FIX(2.053119869)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_072	equ	25172		; FIX(3.072711026)
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_298	equ	DESCALE( 320652955,30-CONST_BITS)	; FIX(0.298631336)
-F_0_390	equ	DESCALE( 418953276,30-CONST_BITS)	; FIX(0.390180644)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_175	equ	DESCALE(1262586813,30-CONST_BITS)	; FIX(1.175875602)
-F_1_501	equ	DESCALE(1612031267,30-CONST_BITS)	; FIX(1.501321110)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_1_961	equ	DESCALE(2106220350,30-CONST_BITS)	; FIX(1.961570560)
-F_2_053	equ	DESCALE(2204520673,30-CONST_BITS)	; FIX(2.053119869)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_072	equ	DESCALE(3299298341,30-CONST_BITS)	; FIX(3.072711026)
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fdct_islow_sse2)
+        alignz  16
+        global  EXTN(jconst_fdct_islow_sse2)
 
 EXTN(jconst_fdct_islow_sse2):
 
-PW_F130_F054	times 4 dw  (F_0_541+F_0_765), F_0_541
-PW_F054_MF130	times 4 dw  F_0_541, (F_0_541-F_1_847)
-PW_MF078_F117	times 4 dw  (F_1_175-F_1_961), F_1_175
-PW_F117_F078	times 4 dw  F_1_175, (F_1_175-F_0_390)
-PW_MF060_MF089	times 4 dw  (F_0_298-F_0_899),-F_0_899
-PW_MF089_F060	times 4 dw -F_0_899, (F_1_501-F_0_899)
-PW_MF050_MF256	times 4 dw  (F_2_053-F_2_562),-F_2_562
-PW_MF256_F050	times 4 dw -F_2_562, (F_3_072-F_2_562)
-PD_DESCALE_P1	times 4 dd  1 << (DESCALE_P1-1)
-PD_DESCALE_P2	times 4 dd  1 << (DESCALE_P2-1)
-PW_DESCALE_P2X	times 8 dw  1 << (PASS1_BITS-1)
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X  times 8 dw  1 << (PASS1_BITS-1)
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -94,541 +94,541 @@
 ; jsimd_fdct_islow_sse2 (DCTELEM * data)
 ;
 
-%define data(b)		(b)+8		; DCTELEM * data
+%define data(b)         (b)+8           ; DCTELEM * data
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		6
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          6
 
-	align	16
-	global	EXTN(jsimd_fdct_islow_sse2)
+        align   16
+        global  EXTN(jsimd_fdct_islow_sse2)
 
 EXTN(jsimd_fdct_islow_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-;	push	esi		; unused
-;	push	edi		; unused
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
 
-	; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
-	; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+        ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+        ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
 
-	movdqa    xmm4,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm1		; xmm0=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm4,xmm1		; xmm4=(04 14 05 15 06 16 07 17)
-	movdqa    xmm5,xmm2		; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm3		; xmm2=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm5,xmm3		; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
 
-	movdqa	xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
 
-	; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
-	; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+        ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+        ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=(20 30 21 31 22 32 23 33)
-	movdqa	XMMWORD [wk(1)], xmm5	; wk(1)=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
 
-	movdqa    xmm2,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm7		; xmm6=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm2,xmm7		; xmm2=(44 54 45 55 46 56 47 57)
-	movdqa    xmm5,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm3		; xmm1=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm5,xmm3		; xmm5=(64 74 65 75 66 76 67 77)
+        movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
 
-	movdqa    xmm7,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm1		; xmm6=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm7,xmm1		; xmm7=(42 52 62 72 43 53 63 73)
-	movdqa    xmm3,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm5		; xmm2=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm3,xmm5		; xmm3=(46 56 66 76 47 57 67 77)
+        movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+        movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=(20 30 21 31 22 32 23 33)
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=(24 34 25 35 26 36 27 37)
-	movdqa	XMMWORD [wk(2)], xmm7	; wk(2)=(42 52 62 72 43 53 63 73)
-	movdqa	XMMWORD [wk(3)], xmm2	; wk(3)=(44 54 64 74 45 55 65 75)
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=(42 52 62 72 43 53 63 73)
+        movdqa  XMMWORD [wk(3)], xmm2   ; wk(3)=(44 54 64 74 45 55 65 75)
 
-	movdqa    xmm7,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm1		; xmm0=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm7,xmm1		; xmm7=(02 12 22 32 03 13 23 33)
-	movdqa    xmm2,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm5		; xmm4=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm2,xmm5		; xmm2=(06 16 26 36 07 17 27 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+        movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
 
-	movdqa     xmm1,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm6		; xmm0=(00 10 20 30 40 50 60 70)=data0
-	punpckhqdq xmm1,xmm6		; xmm1=(01 11 21 31 41 51 61 71)=data1
-	movdqa     xmm5,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm3		; xmm2=(06 16 26 36 46 56 66 76)=data6
-	punpckhqdq xmm5,xmm3		; xmm5=(07 17 27 37 47 57 67 77)=data7
+        movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
 
-	movdqa	xmm6,xmm1
-	movdqa	xmm3,xmm0
-	psubw	xmm1,xmm2		; xmm1=data1-data6=tmp6
-	psubw	xmm0,xmm5		; xmm0=data0-data7=tmp7
-	paddw	xmm6,xmm2		; xmm6=data1+data6=tmp1
-	paddw	xmm3,xmm5		; xmm3=data0+data7=tmp0
+        movdqa  xmm6,xmm1
+        movdqa  xmm3,xmm0
+        psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+        psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+        paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+        paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
 
-	movdqa	xmm2, XMMWORD [wk(2)]	; xmm2=(42 52 62 72 43 53 63 73)
-	movdqa	xmm5, XMMWORD [wk(3)]	; xmm5=(44 54 64 74 45 55 65 75)
-	movdqa	XMMWORD [wk(0)], xmm1	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=tmp7
+        movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa  xmm5, XMMWORD [wk(3)]   ; xmm5=(44 54 64 74 45 55 65 75)
+        movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
 
-	movdqa     xmm1,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm2		; xmm7=(02 12 22 32 42 52 62 72)=data2
-	punpckhqdq xmm1,xmm2		; xmm1=(03 13 23 33 43 53 63 73)=data3
-	movdqa     xmm0,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm5		; xmm4=(04 14 24 34 44 54 64 74)=data4
-	punpckhqdq xmm0,xmm5		; xmm0=(05 15 25 35 45 55 65 75)=data5
+        movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+        movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
 
-	movdqa	xmm2,xmm1
-	movdqa	xmm5,xmm7
-	paddw	xmm1,xmm4		; xmm1=data3+data4=tmp3
-	paddw	xmm7,xmm0		; xmm7=data2+data5=tmp2
-	psubw	xmm2,xmm4		; xmm2=data3-data4=tmp4
-	psubw	xmm5,xmm0		; xmm5=data2-data5=tmp5
+        movdqa  xmm2,xmm1
+        movdqa  xmm5,xmm7
+        paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+        paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+        psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+        psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm0,xmm6
-	paddw	xmm3,xmm1		; xmm3=tmp10
-	paddw	xmm6,xmm7		; xmm6=tmp11
-	psubw	xmm4,xmm1		; xmm4=tmp13
-	psubw	xmm0,xmm7		; xmm0=tmp12
+        movdqa  xmm4,xmm3
+        movdqa  xmm0,xmm6
+        paddw   xmm3,xmm1               ; xmm3=tmp10
+        paddw   xmm6,xmm7               ; xmm6=tmp11
+        psubw   xmm4,xmm1               ; xmm4=tmp13
+        psubw   xmm0,xmm7               ; xmm0=tmp12
 
-	movdqa	xmm1,xmm3
-	paddw	xmm3,xmm6		; xmm3=tmp10+tmp11
-	psubw	xmm1,xmm6		; xmm1=tmp10-tmp11
+        movdqa  xmm1,xmm3
+        paddw   xmm3,xmm6               ; xmm3=tmp10+tmp11
+        psubw   xmm1,xmm6               ; xmm1=tmp10-tmp11
 
-	psllw	xmm3,PASS1_BITS		; xmm3=data0
-	psllw	xmm1,PASS1_BITS		; xmm1=data4
+        psllw   xmm3,PASS1_BITS         ; xmm3=data0
+        psllw   xmm1,PASS1_BITS         ; xmm1=data4
 
-	movdqa	XMMWORD [wk(2)], xmm3	; wk(2)=data0
-	movdqa	XMMWORD [wk(3)], xmm1	; wk(3)=data4
+        movdqa  XMMWORD [wk(2)], xmm3   ; wk(2)=data0
+        movdqa  XMMWORD [wk(3)], xmm1   ; wk(3)=data4
 
-	; (Original)
-	; z1 = (tmp12 + tmp13) * 0.541196100;
-	; data2 = z1 + tmp13 * 0.765366865;
-	; data6 = z1 + tmp12 * -1.847759065;
-	;
-	; (This implementation)
-	; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
-	; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
 
-	movdqa    xmm7,xmm4		; xmm4=tmp13
-	movdqa    xmm6,xmm4
-	punpcklwd xmm7,xmm0		; xmm0=tmp12
-	punpckhwd xmm6,xmm0
-	movdqa    xmm4,xmm7
-	movdqa    xmm0,xmm6
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_F130_F054)]	; xmm7=data2L
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]	; xmm6=data2H
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_F054_MF130)]	; xmm4=data6L
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_F054_MF130)]	; xmm0=data6H
+        movdqa    xmm7,xmm4             ; xmm4=tmp13
+        movdqa    xmm6,xmm4
+        punpcklwd xmm7,xmm0             ; xmm0=tmp12
+        punpckhwd xmm6,xmm0
+        movdqa    xmm4,xmm7
+        movdqa    xmm0,xmm6
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_F130_F054)]       ; xmm7=data2L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]       ; xmm6=data2H
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm4=data6L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm0=data6H
 
-	paddd	xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	xmm6,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	xmm7,DESCALE_P1
-	psrad	xmm6,DESCALE_P1
-	paddd	xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	xmm0,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	xmm4,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
+        paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm6,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm0,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
 
-	packssdw  xmm7,xmm6		; xmm7=data2
-	packssdw  xmm4,xmm0		; xmm4=data6
+        packssdw  xmm7,xmm6             ; xmm7=data2
+        packssdw  xmm4,xmm0             ; xmm4=data6
 
-	movdqa	XMMWORD [wk(4)], xmm7	; wk(4)=data2
-	movdqa	XMMWORD [wk(5)], xmm4	; wk(5)=data6
+        movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=data2
+        movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=data6
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm3, XMMWORD [wk(0)]	; xmm3=tmp6
-	movdqa	xmm1, XMMWORD [wk(1)]	; xmm1=tmp7
+        movdqa  xmm3, XMMWORD [wk(0)]   ; xmm3=tmp6
+        movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp7
 
-	movdqa	xmm6,xmm2		; xmm2=tmp4
-	movdqa	xmm0,xmm5		; xmm5=tmp5
-	paddw	xmm6,xmm3		; xmm6=z3
-	paddw	xmm0,xmm1		; xmm0=z4
+        movdqa  xmm6,xmm2               ; xmm2=tmp4
+        movdqa  xmm0,xmm5               ; xmm5=tmp5
+        paddw   xmm6,xmm3               ; xmm6=z3
+        paddw   xmm0,xmm1               ; xmm0=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm7,xmm6
-	movdqa    xmm4,xmm6
-	punpcklwd xmm7,xmm0
-	punpckhwd xmm4,xmm0
-	movdqa    xmm6,xmm7
-	movdqa    xmm0,xmm4
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]	; xmm7=z3L
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]	; xmm4=z3H
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]	; xmm6=z4L
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_F117_F078)]	; xmm0=z4H
+        movdqa    xmm7,xmm6
+        movdqa    xmm4,xmm6
+        punpcklwd xmm7,xmm0
+        punpckhwd xmm4,xmm0
+        movdqa    xmm6,xmm7
+        movdqa    xmm0,xmm4
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm7=z3L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm4=z3H
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]       ; xmm6=z4L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F117_F078)]       ; xmm0=z4H
 
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=z3L
-	movdqa	XMMWORD [wk(1)], xmm4	; wk(1)=z3H
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=z3L
+        movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=z3H
 
-	; (Original)
-	; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
-	; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
-	; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
-	; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
-	;
-	; (This implementation)
-	; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
-	; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
-	; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
-	; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
-	; data7 = tmp4 + z3;  data5 = tmp5 + z4;
-	; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
 
-	movdqa    xmm7,xmm2
-	movdqa    xmm4,xmm2
-	punpcklwd xmm7,xmm1
-	punpckhwd xmm4,xmm1
-	movdqa    xmm2,xmm7
-	movdqa    xmm1,xmm4
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm7=tmp4L
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm4=tmp4H
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_MF089_F060)]	; xmm2=tmp7L
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]	; xmm1=tmp7H
+        movdqa    xmm7,xmm2
+        movdqa    xmm4,xmm2
+        punpcklwd xmm7,xmm1
+        punpckhwd xmm4,xmm1
+        movdqa    xmm2,xmm7
+        movdqa    xmm1,xmm4
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm7=tmp4L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm4=tmp4H
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm2=tmp7L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm1=tmp7H
 
-	paddd	xmm7, XMMWORD [wk(0)]	; xmm7=data7L
-	paddd	xmm4, XMMWORD [wk(1)]	; xmm4=data7H
-	paddd	xmm2,xmm6		; xmm2=data1L
-	paddd	xmm1,xmm0		; xmm1=data1H
+        paddd   xmm7, XMMWORD [wk(0)]   ; xmm7=data7L
+        paddd   xmm4, XMMWORD [wk(1)]   ; xmm4=data7H
+        paddd   xmm2,xmm6               ; xmm2=data1L
+        paddd   xmm1,xmm0               ; xmm1=data1H
 
-	paddd	xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	xmm7,DESCALE_P1
-	psrad	xmm4,DESCALE_P1
-	paddd	xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	xmm2,DESCALE_P1
-	psrad	xmm1,DESCALE_P1
+        paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm4,DESCALE_P1
+        paddd   xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
 
-	packssdw  xmm7,xmm4		; xmm7=data7
-	packssdw  xmm2,xmm1		; xmm2=data1
+        packssdw  xmm7,xmm4             ; xmm7=data7
+        packssdw  xmm2,xmm1             ; xmm2=data1
 
-	movdqa    xmm4,xmm5
-	movdqa    xmm1,xmm5
-	punpcklwd xmm4,xmm3
-	punpckhwd xmm1,xmm3
-	movdqa    xmm5,xmm4
-	movdqa    xmm3,xmm1
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm4=tmp5L
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm1=tmp5H
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_MF256_F050)]	; xmm5=tmp6L
-	pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]	; xmm3=tmp6H
+        movdqa    xmm4,xmm5
+        movdqa    xmm1,xmm5
+        punpcklwd xmm4,xmm3
+        punpckhwd xmm1,xmm3
+        movdqa    xmm5,xmm4
+        movdqa    xmm3,xmm1
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm4=tmp5L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm1=tmp5H
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm5=tmp6L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm3=tmp6H
 
-	paddd	xmm4,xmm6		; xmm4=data5L
-	paddd	xmm1,xmm0		; xmm1=data5H
-	paddd	xmm5, XMMWORD [wk(0)]	; xmm5=data3L
-	paddd	xmm3, XMMWORD [wk(1)]	; xmm3=data3H
+        paddd   xmm4,xmm6               ; xmm4=data5L
+        paddd   xmm1,xmm0               ; xmm1=data5H
+        paddd   xmm5, XMMWORD [wk(0)]   ; xmm5=data3L
+        paddd   xmm3, XMMWORD [wk(1)]   ; xmm3=data3H
 
-	paddd	xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	xmm4,DESCALE_P1
-	psrad	xmm1,DESCALE_P1
-	paddd	xmm5,[GOTOFF(ebx,PD_DESCALE_P1)]
-	paddd	xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]
-	psrad	xmm5,DESCALE_P1
-	psrad	xmm3,DESCALE_P1
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
+        paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
 
-	packssdw  xmm4,xmm1		; xmm4=data5
-	packssdw  xmm5,xmm3		; xmm5=data3
+        packssdw  xmm4,xmm1             ; xmm4=data5
+        packssdw  xmm5,xmm3             ; xmm5=data3
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-;	mov	edx, POINTER [data(eax)]	; (DCTELEM *)
+;       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
 
-	movdqa	xmm6, XMMWORD [wk(2)]	; xmm6=col0
-	movdqa	xmm0, XMMWORD [wk(4)]	; xmm0=col2
+        movdqa  xmm6, XMMWORD [wk(2)]   ; xmm6=col0
+        movdqa  xmm0, XMMWORD [wk(4)]   ; xmm0=col2
 
-	; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72)
-	; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73)
+        ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72)
+        ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73)
 
-	movdqa    xmm1,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm2		; xmm6=(00 01 10 11 20 21 30 31)
-	punpckhwd xmm1,xmm2		; xmm1=(40 41 50 51 60 61 70 71)
-	movdqa    xmm3,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm5		; xmm0=(02 03 12 13 22 23 32 33)
-	punpckhwd xmm3,xmm5		; xmm3=(42 43 52 53 62 63 72 73)
+        movdqa    xmm1,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm2             ; xmm6=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm1,xmm2             ; xmm1=(40 41 50 51 60 61 70 71)
+        movdqa    xmm3,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm5             ; xmm0=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm3,xmm5             ; xmm3=(42 43 52 53 62 63 72 73)
 
-	movdqa	xmm2, XMMWORD [wk(3)]	; xmm2=col4
-	movdqa	xmm5, XMMWORD [wk(5)]	; xmm5=col6
+        movdqa  xmm2, XMMWORD [wk(3)]   ; xmm2=col4
+        movdqa  xmm5, XMMWORD [wk(5)]   ; xmm5=col6
 
-	; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76)
-	; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77)
+        ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76)
+        ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77)
 
-	movdqa	XMMWORD [wk(0)], xmm0	; wk(0)=(02 03 12 13 22 23 32 33)
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=(02 03 12 13 22 23 32 33)
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(42 43 52 53 62 63 72 73)
 
-	movdqa    xmm0,xmm2		; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm4		; xmm2=(04 05 14 15 24 25 34 35)
-	punpckhwd xmm0,xmm4		; xmm0=(44 45 54 55 64 65 74 75)
-	movdqa    xmm3,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm7		; xmm5=(06 07 16 17 26 27 36 37)
-	punpckhwd xmm3,xmm7		; xmm3=(46 47 56 57 66 67 76 77)
+        movdqa    xmm0,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm4             ; xmm2=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm0,xmm4             ; xmm0=(44 45 54 55 64 65 74 75)
+        movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm7             ; xmm5=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm3,xmm7             ; xmm3=(46 47 56 57 66 67 76 77)
 
-	movdqa    xmm4,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm5		; xmm2=(04 05 06 07 14 15 16 17)
-	punpckhdq xmm4,xmm5		; xmm4=(24 25 26 27 34 35 36 37)
-	movdqa    xmm7,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm3		; xmm0=(44 45 46 47 54 55 56 57)
-	punpckhdq xmm7,xmm3		; xmm7=(64 65 66 67 74 75 76 77)
+        movdqa    xmm4,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm4,xmm5             ; xmm4=(24 25 26 27 34 35 36 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm7,xmm3             ; xmm7=(64 65 66 67 74 75 76 77)
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=(02 03 12 13 22 23 32 33)
-	movdqa	xmm3, XMMWORD [wk(1)]	; xmm3=(42 43 52 53 62 63 72 73)
-	movdqa	XMMWORD [wk(2)], xmm4	; wk(2)=(24 25 26 27 34 35 36 37)
-	movdqa	XMMWORD [wk(3)], xmm0	; wk(3)=(44 45 46 47 54 55 56 57)
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=(02 03 12 13 22 23 32 33)
+        movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(2)], xmm4   ; wk(2)=(24 25 26 27 34 35 36 37)
+        movdqa  XMMWORD [wk(3)], xmm0   ; wk(3)=(44 45 46 47 54 55 56 57)
 
-	movdqa    xmm4,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm5		; xmm6=(00 01 02 03 10 11 12 13)
-	punpckhdq xmm4,xmm5		; xmm4=(20 21 22 23 30 31 32 33)
-	movdqa    xmm0,xmm1		; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm3		; xmm1=(40 41 42 43 50 51 52 53)
-	punpckhdq xmm0,xmm3		; xmm0=(60 61 62 63 70 71 72 73)
+        movdqa    xmm4,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm5             ; xmm6=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm4,xmm5             ; xmm4=(20 21 22 23 30 31 32 33)
+        movdqa    xmm0,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm3             ; xmm1=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm0,xmm3             ; xmm0=(60 61 62 63 70 71 72 73)
 
-	movdqa     xmm5,xmm6		; transpose coefficients(phase 3)
-	punpcklqdq xmm6,xmm2		; xmm6=(00 01 02 03 04 05 06 07)=data0
-	punpckhqdq xmm5,xmm2		; xmm5=(10 11 12 13 14 15 16 17)=data1
-	movdqa     xmm3,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm7		; xmm0=(60 61 62 63 64 65 66 67)=data6
-	punpckhqdq xmm3,xmm7		; xmm3=(70 71 72 73 74 75 76 77)=data7
+        movdqa     xmm5,xmm6            ; transpose coefficients(phase 3)
+        punpcklqdq xmm6,xmm2            ; xmm6=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm5,xmm2            ; xmm5=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm7            ; xmm0=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm3,xmm7            ; xmm3=(70 71 72 73 74 75 76 77)=data7
 
-	movdqa	xmm2,xmm5
-	movdqa	xmm7,xmm6
-	psubw	xmm5,xmm0		; xmm5=data1-data6=tmp6
-	psubw	xmm6,xmm3		; xmm6=data0-data7=tmp7
-	paddw	xmm2,xmm0		; xmm2=data1+data6=tmp1
-	paddw	xmm7,xmm3		; xmm7=data0+data7=tmp0
+        movdqa  xmm2,xmm5
+        movdqa  xmm7,xmm6
+        psubw   xmm5,xmm0               ; xmm5=data1-data6=tmp6
+        psubw   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        paddw   xmm2,xmm0               ; xmm2=data1+data6=tmp1
+        paddw   xmm7,xmm3               ; xmm7=data0+data7=tmp0
 
-	movdqa	xmm0, XMMWORD [wk(2)]	; xmm0=(24 25 26 27 34 35 36 37)
-	movdqa	xmm3, XMMWORD [wk(3)]	; xmm3=(44 45 46 47 54 55 56 57)
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=tmp6
-	movdqa	XMMWORD [wk(1)], xmm6	; wk(1)=tmp7
+        movdqa  xmm0, XMMWORD [wk(2)]   ; xmm0=(24 25 26 27 34 35 36 37)
+        movdqa  xmm3, XMMWORD [wk(3)]   ; xmm3=(44 45 46 47 54 55 56 57)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
 
-	movdqa     xmm5,xmm4		; transpose coefficients(phase 3)
-	punpcklqdq xmm4,xmm0		; xmm4=(20 21 22 23 24 25 26 27)=data2
-	punpckhqdq xmm5,xmm0		; xmm5=(30 31 32 33 34 35 36 37)=data3
-	movdqa     xmm6,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm3		; xmm1=(40 41 42 43 44 45 46 47)=data4
-	punpckhqdq xmm6,xmm3		; xmm6=(50 51 52 53 54 55 56 57)=data5
+        movdqa     xmm5,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm0            ; xmm4=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm5,xmm0            ; xmm5=(30 31 32 33 34 35 36 37)=data3
+        movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm3            ; xmm1=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm6,xmm3            ; xmm6=(50 51 52 53 54 55 56 57)=data5
 
-	movdqa	xmm0,xmm5
-	movdqa	xmm3,xmm4
-	paddw	xmm5,xmm1		; xmm5=data3+data4=tmp3
-	paddw	xmm4,xmm6		; xmm4=data2+data5=tmp2
-	psubw	xmm0,xmm1		; xmm0=data3-data4=tmp4
-	psubw	xmm3,xmm6		; xmm3=data2-data5=tmp5
+        movdqa  xmm0,xmm5
+        movdqa  xmm3,xmm4
+        paddw   xmm5,xmm1               ; xmm5=data3+data4=tmp3
+        paddw   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        psubw   xmm0,xmm1               ; xmm0=data3-data4=tmp4
+        psubw   xmm3,xmm6               ; xmm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm1,xmm7
-	movdqa	xmm6,xmm2
-	paddw	xmm7,xmm5		; xmm7=tmp10
-	paddw	xmm2,xmm4		; xmm2=tmp11
-	psubw	xmm1,xmm5		; xmm1=tmp13
-	psubw	xmm6,xmm4		; xmm6=tmp12
+        movdqa  xmm1,xmm7
+        movdqa  xmm6,xmm2
+        paddw   xmm7,xmm5               ; xmm7=tmp10
+        paddw   xmm2,xmm4               ; xmm2=tmp11
+        psubw   xmm1,xmm5               ; xmm1=tmp13
+        psubw   xmm6,xmm4               ; xmm6=tmp12
 
-	movdqa	xmm5,xmm7
-	paddw	xmm7,xmm2		; xmm7=tmp10+tmp11
-	psubw	xmm5,xmm2		; xmm5=tmp10-tmp11
+        movdqa  xmm5,xmm7
+        paddw   xmm7,xmm2               ; xmm7=tmp10+tmp11
+        psubw   xmm5,xmm2               ; xmm5=tmp10-tmp11
 
-	paddw	xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
-	paddw	xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
-	psraw	xmm7,PASS1_BITS		; xmm7=data0
-	psraw	xmm5,PASS1_BITS		; xmm5=data4
+        paddw   xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        paddw   xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        psraw   xmm7,PASS1_BITS         ; xmm7=data0
+        psraw   xmm5,PASS1_BITS         ; xmm5=data4
 
-	movdqa	XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7
-	movdqa	XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5
+        movdqa  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7
+        movdqa  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5
 
-	; (Original)
-	; z1 = (tmp12 + tmp13) * 0.541196100;
-	; data2 = z1 + tmp13 * 0.765366865;
-	; data6 = z1 + tmp12 * -1.847759065;
-	;
-	; (This implementation)
-	; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
-	; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
 
-	movdqa    xmm4,xmm1		; xmm1=tmp13
-	movdqa    xmm2,xmm1
-	punpcklwd xmm4,xmm6		; xmm6=tmp12
-	punpckhwd xmm2,xmm6
-	movdqa    xmm1,xmm4
-	movdqa    xmm6,xmm2
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]	; xmm4=data2L
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_F130_F054)]	; xmm2=data2H
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]	; xmm1=data6L
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_F054_MF130)]	; xmm6=data6H
+        movdqa    xmm4,xmm1             ; xmm1=tmp13
+        movdqa    xmm2,xmm1
+        punpcklwd xmm4,xmm6             ; xmm6=tmp12
+        punpckhwd xmm2,xmm6
+        movdqa    xmm1,xmm4
+        movdqa    xmm6,xmm2
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]       ; xmm4=data2L
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F130_F054)]       ; xmm2=data2H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=data6L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm6=data6H
 
-	paddd	xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm2,DESCALE_P2
-	paddd	xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	xmm6,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	xmm1,DESCALE_P2
-	psrad	xmm6,DESCALE_P2
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm2,DESCALE_P2
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm6,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm1,DESCALE_P2
+        psrad   xmm6,DESCALE_P2
 
-	packssdw  xmm4,xmm2		; xmm4=data2
-	packssdw  xmm1,xmm6		; xmm1=data6
+        packssdw  xmm4,xmm2             ; xmm4=data2
+        packssdw  xmm1,xmm6             ; xmm1=data6
 
-	movdqa	XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4
-	movdqa	XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=tmp6
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=tmp7
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp6
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
 
-	movdqa	xmm2,xmm0		; xmm0=tmp4
-	movdqa	xmm6,xmm3		; xmm3=tmp5
-	paddw	xmm2,xmm7		; xmm2=z3
-	paddw	xmm6,xmm5		; xmm6=z4
+        movdqa  xmm2,xmm0               ; xmm0=tmp4
+        movdqa  xmm6,xmm3               ; xmm3=tmp5
+        paddw   xmm2,xmm7               ; xmm2=z3
+        paddw   xmm6,xmm5               ; xmm6=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm4,xmm2
-	movdqa    xmm1,xmm2
-	punpcklwd xmm4,xmm6
-	punpckhwd xmm1,xmm6
-	movdqa    xmm2,xmm4
-	movdqa    xmm6,xmm1
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]	; xmm4=z3L
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_MF078_F117)]	; xmm1=z3H
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_F117_F078)]	; xmm2=z4L
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]	; xmm6=z4H
+        movdqa    xmm4,xmm2
+        movdqa    xmm1,xmm2
+        punpcklwd xmm4,xmm6
+        punpckhwd xmm1,xmm6
+        movdqa    xmm2,xmm4
+        movdqa    xmm6,xmm1
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm4=z3L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm1=z3H
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F117_F078)]       ; xmm2=z4L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]       ; xmm6=z4H
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=z3L
-	movdqa	XMMWORD [wk(1)], xmm1	; wk(1)=z3H
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=z3L
+        movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=z3H
 
-	; (Original)
-	; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
-	; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
-	; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
-	; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
-	;
-	; (This implementation)
-	; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
-	; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
-	; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
-	; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
-	; data7 = tmp4 + z3;  data5 = tmp5 + z4;
-	; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
 
-	movdqa    xmm4,xmm0
-	movdqa    xmm1,xmm0
-	punpcklwd xmm4,xmm5
-	punpckhwd xmm1,xmm5
-	movdqa    xmm0,xmm4
-	movdqa    xmm5,xmm1
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm4=tmp4L
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm1=tmp4H
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_MF089_F060)]	; xmm0=tmp7L
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_MF089_F060)]	; xmm5=tmp7H
+        movdqa    xmm4,xmm0
+        movdqa    xmm1,xmm0
+        punpcklwd xmm4,xmm5
+        punpckhwd xmm1,xmm5
+        movdqa    xmm0,xmm4
+        movdqa    xmm5,xmm1
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm4=tmp4L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm1=tmp4H
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm0=tmp7L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm5=tmp7H
 
-	paddd	xmm4, XMMWORD [wk(0)]	; xmm4=data7L
-	paddd	xmm1, XMMWORD [wk(1)]	; xmm1=data7H
-	paddd	xmm0,xmm2		; xmm0=data1L
-	paddd	xmm5,xmm6		; xmm5=data1H
+        paddd   xmm4, XMMWORD [wk(0)]   ; xmm4=data7L
+        paddd   xmm1, XMMWORD [wk(1)]   ; xmm1=data7H
+        paddd   xmm0,xmm2               ; xmm0=data1L
+        paddd   xmm5,xmm6               ; xmm5=data1H
 
-	paddd	xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm1,DESCALE_P2
-	paddd	xmm0,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	xmm0,DESCALE_P2
-	psrad	xmm5,DESCALE_P2
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm0,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm0,DESCALE_P2
+        psrad   xmm5,DESCALE_P2
 
-	packssdw  xmm4,xmm1		; xmm4=data7
-	packssdw  xmm0,xmm5		; xmm0=data1
+        packssdw  xmm4,xmm1             ; xmm4=data7
+        packssdw  xmm0,xmm5             ; xmm0=data1
 
-	movdqa	XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4
-	movdqa	XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0
 
-	movdqa    xmm1,xmm3
-	movdqa    xmm5,xmm3
-	punpcklwd xmm1,xmm7
-	punpckhwd xmm5,xmm7
-	movdqa    xmm3,xmm1
-	movdqa    xmm7,xmm5
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm1=tmp5L
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm5=tmp5H
-	pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]	; xmm3=tmp6L
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_MF256_F050)]	; xmm7=tmp6H
+        movdqa    xmm1,xmm3
+        movdqa    xmm5,xmm3
+        punpcklwd xmm1,xmm7
+        punpckhwd xmm5,xmm7
+        movdqa    xmm3,xmm1
+        movdqa    xmm7,xmm5
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm1=tmp5L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm5=tmp5H
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm3=tmp6L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm7=tmp6H
 
-	paddd	xmm1,xmm2		; xmm1=data5L
-	paddd	xmm5,xmm6		; xmm5=data5H
-	paddd	xmm3, XMMWORD [wk(0)]	; xmm3=data3L
-	paddd	xmm7, XMMWORD [wk(1)]	; xmm7=data3H
+        paddd   xmm1,xmm2               ; xmm1=data5L
+        paddd   xmm5,xmm6               ; xmm5=data5H
+        paddd   xmm3, XMMWORD [wk(0)]   ; xmm3=data3L
+        paddd   xmm7, XMMWORD [wk(1)]   ; xmm7=data3H
 
-	paddd	xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	xmm1,DESCALE_P2
-	psrad	xmm5,DESCALE_P2
-	paddd	xmm3,[GOTOFF(ebx,PD_DESCALE_P2)]
-	paddd	xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]
-	psrad	xmm3,DESCALE_P2
-	psrad	xmm7,DESCALE_P2
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm1,DESCALE_P2
+        psrad   xmm5,DESCALE_P2
+        paddd   xmm3,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
 
-	packssdw  xmm1,xmm5		; xmm1=data5
-	packssdw  xmm3,xmm7		; xmm3=data3
+        packssdw  xmm1,xmm5             ; xmm1=data5
+        packssdw  xmm3,xmm7             ; xmm3=data3
 
-	movdqa	XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1
-	movdqa	XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3
 
-;	pop	edi		; unused
-;	pop	esi		; unused
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfsseflt-64.asm b/simd/jfsseflt-64.asm
index 07245d2..be2f577 100644
--- a/simd/jfsseflt-64.asm
+++ b/simd/jfsseflt-64.asm
@@ -26,32 +26,32 @@
 
 ; --------------------------------------------------------------------------
 
-%macro	unpcklps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
-	shufps	%1,%2,0x44
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
 %endmacro
 
-%macro	unpckhps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
-	shufps	%1,%2,0xEE
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fdct_float_sse)
+        alignz  16
+        global  EXTN(jconst_fdct_float_sse)
 
 EXTN(jconst_fdct_float_sse):
 
-PD_0_382	times 4 dd  0.382683432365089771728460
-PD_0_707	times 4 dd  0.707106781186547524400844
-PD_0_541	times 4 dd  0.541196100146196984399723
-PD_1_306	times 4 dd  1.306562964876376527856643
+PD_0_382        times 4 dd  0.382683432365089771728460
+PD_0_707        times 4 dd  0.707106781186547524400844
+PD_0_541        times 4 dd  0.541196100146196984399723
+PD_1_306        times 4 dd  1.306562964876376527856643
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -61,298 +61,298 @@
 
 ; r10 = FAST_FLOAT * data
 
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_fdct_float_sse)
+        align   16
+        global  EXTN(jsimd_fdct_float_sse)
 
 EXTN(jsimd_fdct_float_sse):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	rdx, r10	; (FAST_FLOAT *)
-	mov	rcx, DCTSIZE/4
+        mov     rdx, r10        ; (FAST_FLOAT *)
+        mov     rcx, DCTSIZE/4
 .rowloop:
 
-	movaps	xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)]
 
-	; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
-	; xmm1=(30 31 32 33), xmm3=(34 35 36 37)
+        ; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
+        ; xmm1=(30 31 32 33), xmm3=(34 35 36 37)
 
-	movaps   xmm4,xmm0		; transpose coefficients(phase 1)
-	unpcklps xmm0,xmm1		; xmm0=(20 30 21 31)
-	unpckhps xmm4,xmm1		; xmm4=(22 32 23 33)
-	movaps   xmm5,xmm2		; transpose coefficients(phase 1)
-	unpcklps xmm2,xmm3		; xmm2=(24 34 25 35)
-	unpckhps xmm5,xmm3		; xmm5=(26 36 27 37)
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(20 30 21 31)
+        unpckhps xmm4,xmm1              ; xmm4=(22 32 23 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(24 34 25 35)
+        unpckhps xmm5,xmm3              ; xmm5=(26 36 27 37)
 
-	movaps	xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
 
-	; xmm6=(00 01 02 03), xmm1=(04 05 06 07)
-	; xmm7=(10 11 12 13), xmm3=(14 15 16 17)
+        ; xmm6=(00 01 02 03), xmm1=(04 05 06 07)
+        ; xmm7=(10 11 12 13), xmm3=(14 15 16 17)
 
-	movaps	XMMWORD [wk(0)], xmm4	; wk(0)=(22 32 23 33)
-	movaps	XMMWORD [wk(1)], xmm2	; wk(1)=(24 34 25 35)
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 32 23 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(24 34 25 35)
 
-	movaps   xmm4,xmm6		; transpose coefficients(phase 1)
-	unpcklps xmm6,xmm7		; xmm6=(00 10 01 11)
-	unpckhps xmm4,xmm7		; xmm4=(02 12 03 13)
-	movaps   xmm2,xmm1		; transpose coefficients(phase 1)
-	unpcklps xmm1,xmm3		; xmm1=(04 14 05 15)
-	unpckhps xmm2,xmm3		; xmm2=(06 16 07 17)
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 10 01 11)
+        unpckhps xmm4,xmm7              ; xmm4=(02 12 03 13)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(04 14 05 15)
+        unpckhps xmm2,xmm3              ; xmm2=(06 16 07 17)
 
-	movaps    xmm7,xmm6		; transpose coefficients(phase 2)
-	unpcklps2 xmm6,xmm0		; xmm6=(00 10 20 30)=data0
-	unpckhps2 xmm7,xmm0		; xmm7=(01 11 21 31)=data1
-	movaps    xmm3,xmm2		; transpose coefficients(phase 2)
-	unpcklps2 xmm2,xmm5		; xmm2=(06 16 26 36)=data6
-	unpckhps2 xmm3,xmm5		; xmm3=(07 17 27 37)=data7
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 10 20 30)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(01 11 21 31)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(06 16 26 36)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(07 17 27 37)=data7
 
-	movaps	xmm0,xmm7
-	movaps	xmm5,xmm6
-	subps	xmm7,xmm2		; xmm7=data1-data6=tmp6
-	subps	xmm6,xmm3		; xmm6=data0-data7=tmp7
-	addps	xmm0,xmm2		; xmm0=data1+data6=tmp1
-	addps	xmm5,xmm3		; xmm5=data0+data7=tmp0
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
 
-	movaps	xmm2, XMMWORD [wk(0)]	; xmm2=(22 32 23 33)
-	movaps	xmm3, XMMWORD [wk(1)]	; xmm3=(24 34 25 35)
-	movaps	XMMWORD [wk(0)], xmm7	; wk(0)=tmp6
-	movaps	XMMWORD [wk(1)], xmm6	; wk(1)=tmp7
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 32 23 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(24 34 25 35)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
 
-	movaps    xmm7,xmm4		; transpose coefficients(phase 2)
-	unpcklps2 xmm4,xmm2		; xmm4=(02 12 22 32)=data2
-	unpckhps2 xmm7,xmm2		; xmm7=(03 13 23 33)=data3
-	movaps    xmm6,xmm1		; transpose coefficients(phase 2)
-	unpcklps2 xmm1,xmm3		; xmm1=(04 14 24 34)=data4
-	unpckhps2 xmm6,xmm3		; xmm6=(05 15 25 35)=data5
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(02 12 22 32)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(03 13 23 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(04 14 24 34)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(05 15 25 35)=data5
 
-	movaps	xmm2,xmm7
-	movaps	xmm3,xmm4
-	addps	xmm7,xmm1		; xmm7=data3+data4=tmp3
-	addps	xmm4,xmm6		; xmm4=data2+data5=tmp2
-	subps	xmm2,xmm1		; xmm2=data3-data4=tmp4
-	subps	xmm3,xmm6		; xmm3=data2-data5=tmp5
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movaps	xmm1,xmm5
-	movaps	xmm6,xmm0
-	subps	xmm5,xmm7		; xmm5=tmp13
-	subps	xmm0,xmm4		; xmm0=tmp12
-	addps	xmm1,xmm7		; xmm1=tmp10
-	addps	xmm6,xmm4		; xmm6=tmp11
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
 
-	addps	xmm0,xmm5
-	mulps	xmm0,[rel PD_0_707] ; xmm0=z1
+        addps   xmm0,xmm5
+        mulps   xmm0,[rel PD_0_707] ; xmm0=z1
 
-	movaps	xmm7,xmm1
-	movaps	xmm4,xmm5
-	subps	xmm1,xmm6		; xmm1=data4
-	subps	xmm5,xmm0		; xmm5=data6
-	addps	xmm7,xmm6		; xmm7=data0
-	addps	xmm4,xmm0		; xmm4=data2
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
 
-	movaps	XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
 
-	; -- Odd part
+        ; -- Odd part
 
-	movaps	xmm6, XMMWORD [wk(0)]	; xmm6=tmp6
-	movaps	xmm0, XMMWORD [wk(1)]	; xmm0=tmp7
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
 
-	addps	xmm2,xmm3		; xmm2=tmp10
-	addps	xmm3,xmm6		; xmm3=tmp11
-	addps	xmm6,xmm0		; xmm6=tmp12, xmm0=tmp7
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
 
-	mulps	xmm3,[rel PD_0_707] ; xmm3=z3
+        mulps   xmm3,[rel PD_0_707] ; xmm3=z3
 
-	movaps	xmm1,xmm2		; xmm1=tmp10
-	subps	xmm2,xmm6
-	mulps	xmm2,[rel PD_0_382] ; xmm2=z5
-	mulps	xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
-	mulps	xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
-	addps	xmm1,xmm2		; xmm1=z2
-	addps	xmm6,xmm2		; xmm6=z4
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[rel PD_0_382] ; xmm2=z5
+        mulps   xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
 
-	movaps	xmm5,xmm0
-	subps	xmm0,xmm3		; xmm0=z13
-	addps	xmm5,xmm3		; xmm5=z11
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
 
-	movaps	xmm7,xmm0
-	movaps	xmm4,xmm5
-	subps	xmm0,xmm1		; xmm0=data3
-	subps	xmm5,xmm6		; xmm5=data7
-	addps	xmm7,xmm1		; xmm7=data5
-	addps	xmm4,xmm6		; xmm4=data1
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
 
-	movaps	XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
 
-	add	rdx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
-	dec	rcx
-	jnz	near .rowloop
+        add     rdx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     rcx
+        jnz     near .rowloop
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-	mov	rdx, r10	; (FAST_FLOAT *)
-	mov	rcx, DCTSIZE/4
+        mov     rdx, r10        ; (FAST_FLOAT *)
+        mov     rcx, DCTSIZE/4
 .columnloop:
 
-	movaps	xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)]
 
-	; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
-	; xmm1=(03 13 23 33), xmm3=(43 53 63 73)
+        ; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
+        ; xmm1=(03 13 23 33), xmm3=(43 53 63 73)
 
-	movaps   xmm4,xmm0		; transpose coefficients(phase 1)
-	unpcklps xmm0,xmm1		; xmm0=(02 03 12 13)
-	unpckhps xmm4,xmm1		; xmm4=(22 23 32 33)
-	movaps   xmm5,xmm2		; transpose coefficients(phase 1)
-	unpcklps xmm2,xmm3		; xmm2=(42 43 52 53)
-	unpckhps xmm5,xmm3		; xmm5=(62 63 72 73)
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(02 03 12 13)
+        unpckhps xmm4,xmm1              ; xmm4=(22 23 32 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(42 43 52 53)
+        unpckhps xmm5,xmm3              ; xmm5=(62 63 72 73)
 
-	movaps	xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)]
 
-	; xmm6=(00 10 20 30), xmm1=(40 50 60 70)
-	; xmm7=(01 11 21 31), xmm3=(41 51 61 71)
+        ; xmm6=(00 10 20 30), xmm1=(40 50 60 70)
+        ; xmm7=(01 11 21 31), xmm3=(41 51 61 71)
 
-	movaps	XMMWORD [wk(0)], xmm4	; wk(0)=(22 23 32 33)
-	movaps	XMMWORD [wk(1)], xmm2	; wk(1)=(42 43 52 53)
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 23 32 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(42 43 52 53)
 
-	movaps   xmm4,xmm6		; transpose coefficients(phase 1)
-	unpcklps xmm6,xmm7		; xmm6=(00 01 10 11)
-	unpckhps xmm4,xmm7		; xmm4=(20 21 30 31)
-	movaps   xmm2,xmm1		; transpose coefficients(phase 1)
-	unpcklps xmm1,xmm3		; xmm1=(40 41 50 51)
-	unpckhps xmm2,xmm3		; xmm2=(60 61 70 71)
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 01 10 11)
+        unpckhps xmm4,xmm7              ; xmm4=(20 21 30 31)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(40 41 50 51)
+        unpckhps xmm2,xmm3              ; xmm2=(60 61 70 71)
 
-	movaps    xmm7,xmm6		; transpose coefficients(phase 2)
-	unpcklps2 xmm6,xmm0		; xmm6=(00 01 02 03)=data0
-	unpckhps2 xmm7,xmm0		; xmm7=(10 11 12 13)=data1
-	movaps    xmm3,xmm2		; transpose coefficients(phase 2)
-	unpcklps2 xmm2,xmm5		; xmm2=(60 61 62 63)=data6
-	unpckhps2 xmm3,xmm5		; xmm3=(70 71 72 73)=data7
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 01 02 03)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(10 11 12 13)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(60 61 62 63)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(70 71 72 73)=data7
 
-	movaps	xmm0,xmm7
-	movaps	xmm5,xmm6
-	subps	xmm7,xmm2		; xmm7=data1-data6=tmp6
-	subps	xmm6,xmm3		; xmm6=data0-data7=tmp7
-	addps	xmm0,xmm2		; xmm0=data1+data6=tmp1
-	addps	xmm5,xmm3		; xmm5=data0+data7=tmp0
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
 
-	movaps	xmm2, XMMWORD [wk(0)]	; xmm2=(22 23 32 33)
-	movaps	xmm3, XMMWORD [wk(1)]	; xmm3=(42 43 52 53)
-	movaps	XMMWORD [wk(0)], xmm7	; wk(0)=tmp6
-	movaps	XMMWORD [wk(1)], xmm6	; wk(1)=tmp7
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 23 32 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
 
-	movaps    xmm7,xmm4		; transpose coefficients(phase 2)
-	unpcklps2 xmm4,xmm2		; xmm4=(20 21 22 23)=data2
-	unpckhps2 xmm7,xmm2		; xmm7=(30 31 32 33)=data3
-	movaps    xmm6,xmm1		; transpose coefficients(phase 2)
-	unpcklps2 xmm1,xmm3		; xmm1=(40 41 42 43)=data4
-	unpckhps2 xmm6,xmm3		; xmm6=(50 51 52 53)=data5
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(20 21 22 23)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(30 31 32 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(40 41 42 43)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(50 51 52 53)=data5
 
-	movaps	xmm2,xmm7
-	movaps	xmm3,xmm4
-	addps	xmm7,xmm1		; xmm7=data3+data4=tmp3
-	addps	xmm4,xmm6		; xmm4=data2+data5=tmp2
-	subps	xmm2,xmm1		; xmm2=data3-data4=tmp4
-	subps	xmm3,xmm6		; xmm3=data2-data5=tmp5
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movaps	xmm1,xmm5
-	movaps	xmm6,xmm0
-	subps	xmm5,xmm7		; xmm5=tmp13
-	subps	xmm0,xmm4		; xmm0=tmp12
-	addps	xmm1,xmm7		; xmm1=tmp10
-	addps	xmm6,xmm4		; xmm6=tmp11
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
 
-	addps	xmm0,xmm5
-	mulps	xmm0,[rel PD_0_707] ; xmm0=z1
+        addps   xmm0,xmm5
+        mulps   xmm0,[rel PD_0_707] ; xmm0=z1
 
-	movaps	xmm7,xmm1
-	movaps	xmm4,xmm5
-	subps	xmm1,xmm6		; xmm1=data4
-	subps	xmm5,xmm0		; xmm5=data6
-	addps	xmm7,xmm6		; xmm7=data0
-	addps	xmm4,xmm0		; xmm4=data2
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
 
-	movaps	XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
 
-	; -- Odd part
+        ; -- Odd part
 
-	movaps	xmm6, XMMWORD [wk(0)]	; xmm6=tmp6
-	movaps	xmm0, XMMWORD [wk(1)]	; xmm0=tmp7
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
 
-	addps	xmm2,xmm3		; xmm2=tmp10
-	addps	xmm3,xmm6		; xmm3=tmp11
-	addps	xmm6,xmm0		; xmm6=tmp12, xmm0=tmp7
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
 
-	mulps	xmm3,[rel PD_0_707] ; xmm3=z3
+        mulps   xmm3,[rel PD_0_707] ; xmm3=z3
 
-	movaps	xmm1,xmm2		; xmm1=tmp10
-	subps	xmm2,xmm6
-	mulps	xmm2,[rel PD_0_382] ; xmm2=z5
-	mulps	xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
-	mulps	xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
-	addps	xmm1,xmm2		; xmm1=z2
-	addps	xmm6,xmm2		; xmm6=z4
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[rel PD_0_382] ; xmm2=z5
+        mulps   xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
 
-	movaps	xmm5,xmm0
-	subps	xmm0,xmm3		; xmm0=z13
-	addps	xmm5,xmm3		; xmm5=z11
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
 
-	movaps	xmm7,xmm0
-	movaps	xmm4,xmm5
-	subps	xmm0,xmm1		; xmm0=data3
-	subps	xmm5,xmm6		; xmm5=data7
-	addps	xmm7,xmm1		; xmm7=data5
-	addps	xmm4,xmm6		; xmm4=data1
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
 
-	movaps	XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
 
-	add	rdx, byte 4*SIZEOF_FAST_FLOAT
-	dec	rcx
-	jnz	near .columnloop
+        add     rdx, byte 4*SIZEOF_FAST_FLOAT
+        dec     rcx
+        jnz     near .columnloop
 
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jfsseflt.asm b/simd/jfsseflt.asm
index bc54ccc..5ece3f4 100644
--- a/simd/jfsseflt.asm
+++ b/simd/jfsseflt.asm
@@ -25,32 +25,32 @@
 
 ; --------------------------------------------------------------------------
 
-%macro	unpcklps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
-	shufps	%1,%2,0x44
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
 %endmacro
 
-%macro	unpckhps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
-	shufps	%1,%2,0xEE
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_fdct_float_sse)
+        alignz  16
+        global  EXTN(jconst_fdct_float_sse)
 
 EXTN(jconst_fdct_float_sse):
 
-PD_0_382	times 4 dd  0.382683432365089771728460
-PD_0_707	times 4 dd  0.707106781186547524400844
-PD_0_541	times 4 dd  0.541196100146196984399723
-PD_1_306	times 4 dd  1.306562964876376527856643
+PD_0_382        times 4 dd  0.382683432365089771728460
+PD_0_707        times 4 dd  0.707106781186547524400844
+PD_0_541        times 4 dd  0.541196100146196984399723
+PD_1_306        times 4 dd  1.306562964876376527856643
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform the forward DCT on one block of samples.
 ;
@@ -58,313 +58,313 @@
 ; jsimd_fdct_float_sse (FAST_FLOAT * data)
 ;
 
-%define data(b)		(b)+8		; FAST_FLOAT * data
+%define data(b)         (b)+8           ; FAST_FLOAT * data
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_fdct_float_sse)
+        align   16
+        global  EXTN(jsimd_fdct_float_sse)
 
 EXTN(jsimd_fdct_float_sse):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-;	push	esi		; unused
-;	push	edi		; unused
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process rows.
+        ; ---- Pass 1: process rows.
 
-	mov	edx, POINTER [data(eax)]	; (FAST_FLOAT *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .rowloop:
 
-	movaps	xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)]
 
-	; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
-	; xmm1=(30 31 32 33), xmm3=(34 35 36 37)
+        ; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
+        ; xmm1=(30 31 32 33), xmm3=(34 35 36 37)
 
-	movaps   xmm4,xmm0		; transpose coefficients(phase 1)
-	unpcklps xmm0,xmm1		; xmm0=(20 30 21 31)
-	unpckhps xmm4,xmm1		; xmm4=(22 32 23 33)
-	movaps   xmm5,xmm2		; transpose coefficients(phase 1)
-	unpcklps xmm2,xmm3		; xmm2=(24 34 25 35)
-	unpckhps xmm5,xmm3		; xmm5=(26 36 27 37)
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(20 30 21 31)
+        unpckhps xmm4,xmm1              ; xmm4=(22 32 23 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(24 34 25 35)
+        unpckhps xmm5,xmm3              ; xmm5=(26 36 27 37)
 
-	movaps	xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
 
-	; xmm6=(00 01 02 03), xmm1=(04 05 06 07)
-	; xmm7=(10 11 12 13), xmm3=(14 15 16 17)
+        ; xmm6=(00 01 02 03), xmm1=(04 05 06 07)
+        ; xmm7=(10 11 12 13), xmm3=(14 15 16 17)
 
-	movaps	XMMWORD [wk(0)], xmm4	; wk(0)=(22 32 23 33)
-	movaps	XMMWORD [wk(1)], xmm2	; wk(1)=(24 34 25 35)
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 32 23 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(24 34 25 35)
 
-	movaps   xmm4,xmm6		; transpose coefficients(phase 1)
-	unpcklps xmm6,xmm7		; xmm6=(00 10 01 11)
-	unpckhps xmm4,xmm7		; xmm4=(02 12 03 13)
-	movaps   xmm2,xmm1		; transpose coefficients(phase 1)
-	unpcklps xmm1,xmm3		; xmm1=(04 14 05 15)
-	unpckhps xmm2,xmm3		; xmm2=(06 16 07 17)
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 10 01 11)
+        unpckhps xmm4,xmm7              ; xmm4=(02 12 03 13)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(04 14 05 15)
+        unpckhps xmm2,xmm3              ; xmm2=(06 16 07 17)
 
-	movaps    xmm7,xmm6		; transpose coefficients(phase 2)
-	unpcklps2 xmm6,xmm0		; xmm6=(00 10 20 30)=data0
-	unpckhps2 xmm7,xmm0		; xmm7=(01 11 21 31)=data1
-	movaps    xmm3,xmm2		; transpose coefficients(phase 2)
-	unpcklps2 xmm2,xmm5		; xmm2=(06 16 26 36)=data6
-	unpckhps2 xmm3,xmm5		; xmm3=(07 17 27 37)=data7
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 10 20 30)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(01 11 21 31)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(06 16 26 36)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(07 17 27 37)=data7
 
-	movaps	xmm0,xmm7
-	movaps	xmm5,xmm6
-	subps	xmm7,xmm2		; xmm7=data1-data6=tmp6
-	subps	xmm6,xmm3		; xmm6=data0-data7=tmp7
-	addps	xmm0,xmm2		; xmm0=data1+data6=tmp1
-	addps	xmm5,xmm3		; xmm5=data0+data7=tmp0
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
 
-	movaps	xmm2, XMMWORD [wk(0)]	; xmm2=(22 32 23 33)
-	movaps	xmm3, XMMWORD [wk(1)]	; xmm3=(24 34 25 35)
-	movaps	XMMWORD [wk(0)], xmm7	; wk(0)=tmp6
-	movaps	XMMWORD [wk(1)], xmm6	; wk(1)=tmp7
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 32 23 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(24 34 25 35)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
 
-	movaps    xmm7,xmm4		; transpose coefficients(phase 2)
-	unpcklps2 xmm4,xmm2		; xmm4=(02 12 22 32)=data2
-	unpckhps2 xmm7,xmm2		; xmm7=(03 13 23 33)=data3
-	movaps    xmm6,xmm1		; transpose coefficients(phase 2)
-	unpcklps2 xmm1,xmm3		; xmm1=(04 14 24 34)=data4
-	unpckhps2 xmm6,xmm3		; xmm6=(05 15 25 35)=data5
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(02 12 22 32)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(03 13 23 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(04 14 24 34)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(05 15 25 35)=data5
 
-	movaps	xmm2,xmm7
-	movaps	xmm3,xmm4
-	addps	xmm7,xmm1		; xmm7=data3+data4=tmp3
-	addps	xmm4,xmm6		; xmm4=data2+data5=tmp2
-	subps	xmm2,xmm1		; xmm2=data3-data4=tmp4
-	subps	xmm3,xmm6		; xmm3=data2-data5=tmp5
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movaps	xmm1,xmm5
-	movaps	xmm6,xmm0
-	subps	xmm5,xmm7		; xmm5=tmp13
-	subps	xmm0,xmm4		; xmm0=tmp12
-	addps	xmm1,xmm7		; xmm1=tmp10
-	addps	xmm6,xmm4		; xmm6=tmp11
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
 
-	addps	xmm0,xmm5
-	mulps	xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
+        addps   xmm0,xmm5
+        mulps   xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
 
-	movaps	xmm7,xmm1
-	movaps	xmm4,xmm5
-	subps	xmm1,xmm6		; xmm1=data4
-	subps	xmm5,xmm0		; xmm5=data6
-	addps	xmm7,xmm6		; xmm7=data0
-	addps	xmm4,xmm0		; xmm4=data2
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
 
-	movaps	XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
 
-	; -- Odd part
+        ; -- Odd part
 
-	movaps	xmm6, XMMWORD [wk(0)]	; xmm6=tmp6
-	movaps	xmm0, XMMWORD [wk(1)]	; xmm0=tmp7
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
 
-	addps	xmm2,xmm3		; xmm2=tmp10
-	addps	xmm3,xmm6		; xmm3=tmp11
-	addps	xmm6,xmm0		; xmm6=tmp12, xmm0=tmp7
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
 
-	mulps	xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
+        mulps   xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
 
-	movaps	xmm1,xmm2		; xmm1=tmp10
-	subps	xmm2,xmm6
-	mulps	xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
-	mulps	xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
-	mulps	xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
-	addps	xmm1,xmm2		; xmm1=z2
-	addps	xmm6,xmm2		; xmm6=z4
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
+        mulps   xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
 
-	movaps	xmm5,xmm0
-	subps	xmm0,xmm3		; xmm0=z13
-	addps	xmm5,xmm3		; xmm5=z11
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
 
-	movaps	xmm7,xmm0
-	movaps	xmm4,xmm5
-	subps	xmm0,xmm1		; xmm0=data3
-	subps	xmm5,xmm6		; xmm5=data7
-	addps	xmm7,xmm1		; xmm7=data5
-	addps	xmm4,xmm6		; xmm4=data1
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
 
-	movaps	XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
 
-	add	edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
-	dec	ecx
-	jnz	near .rowloop
+        add     edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .rowloop
 
-	; ---- Pass 2: process columns.
+        ; ---- Pass 2: process columns.
 
-	mov	edx, POINTER [data(eax)]	; (FAST_FLOAT *)
-	mov	ecx, DCTSIZE/4
-	alignx	16,7
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
 .columnloop:
 
-	movaps	xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
 
-	; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
-	; xmm1=(03 13 23 33), xmm3=(43 53 63 73)
+        ; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
+        ; xmm1=(03 13 23 33), xmm3=(43 53 63 73)
 
-	movaps   xmm4,xmm0		; transpose coefficients(phase 1)
-	unpcklps xmm0,xmm1		; xmm0=(02 03 12 13)
-	unpckhps xmm4,xmm1		; xmm4=(22 23 32 33)
-	movaps   xmm5,xmm2		; transpose coefficients(phase 1)
-	unpcklps xmm2,xmm3		; xmm2=(42 43 52 53)
-	unpckhps xmm5,xmm3		; xmm5=(62 63 72 73)
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(02 03 12 13)
+        unpckhps xmm4,xmm1              ; xmm4=(22 23 32 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(42 43 52 53)
+        unpckhps xmm5,xmm3              ; xmm5=(62 63 72 73)
 
-	movaps	xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
 
-	; xmm6=(00 10 20 30), xmm1=(40 50 60 70)
-	; xmm7=(01 11 21 31), xmm3=(41 51 61 71)
+        ; xmm6=(00 10 20 30), xmm1=(40 50 60 70)
+        ; xmm7=(01 11 21 31), xmm3=(41 51 61 71)
 
-	movaps	XMMWORD [wk(0)], xmm4	; wk(0)=(22 23 32 33)
-	movaps	XMMWORD [wk(1)], xmm2	; wk(1)=(42 43 52 53)
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 23 32 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(42 43 52 53)
 
-	movaps   xmm4,xmm6		; transpose coefficients(phase 1)
-	unpcklps xmm6,xmm7		; xmm6=(00 01 10 11)
-	unpckhps xmm4,xmm7		; xmm4=(20 21 30 31)
-	movaps   xmm2,xmm1		; transpose coefficients(phase 1)
-	unpcklps xmm1,xmm3		; xmm1=(40 41 50 51)
-	unpckhps xmm2,xmm3		; xmm2=(60 61 70 71)
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 01 10 11)
+        unpckhps xmm4,xmm7              ; xmm4=(20 21 30 31)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(40 41 50 51)
+        unpckhps xmm2,xmm3              ; xmm2=(60 61 70 71)
 
-	movaps    xmm7,xmm6		; transpose coefficients(phase 2)
-	unpcklps2 xmm6,xmm0		; xmm6=(00 01 02 03)=data0
-	unpckhps2 xmm7,xmm0		; xmm7=(10 11 12 13)=data1
-	movaps    xmm3,xmm2		; transpose coefficients(phase 2)
-	unpcklps2 xmm2,xmm5		; xmm2=(60 61 62 63)=data6
-	unpckhps2 xmm3,xmm5		; xmm3=(70 71 72 73)=data7
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 01 02 03)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(10 11 12 13)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(60 61 62 63)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(70 71 72 73)=data7
 
-	movaps	xmm0,xmm7
-	movaps	xmm5,xmm6
-	subps	xmm7,xmm2		; xmm7=data1-data6=tmp6
-	subps	xmm6,xmm3		; xmm6=data0-data7=tmp7
-	addps	xmm0,xmm2		; xmm0=data1+data6=tmp1
-	addps	xmm5,xmm3		; xmm5=data0+data7=tmp0
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
 
-	movaps	xmm2, XMMWORD [wk(0)]	; xmm2=(22 23 32 33)
-	movaps	xmm3, XMMWORD [wk(1)]	; xmm3=(42 43 52 53)
-	movaps	XMMWORD [wk(0)], xmm7	; wk(0)=tmp6
-	movaps	XMMWORD [wk(1)], xmm6	; wk(1)=tmp7
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 23 32 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
 
-	movaps    xmm7,xmm4		; transpose coefficients(phase 2)
-	unpcklps2 xmm4,xmm2		; xmm4=(20 21 22 23)=data2
-	unpckhps2 xmm7,xmm2		; xmm7=(30 31 32 33)=data3
-	movaps    xmm6,xmm1		; transpose coefficients(phase 2)
-	unpcklps2 xmm1,xmm3		; xmm1=(40 41 42 43)=data4
-	unpckhps2 xmm6,xmm3		; xmm6=(50 51 52 53)=data5
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(20 21 22 23)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(30 31 32 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(40 41 42 43)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(50 51 52 53)=data5
 
-	movaps	xmm2,xmm7
-	movaps	xmm3,xmm4
-	addps	xmm7,xmm1		; xmm7=data3+data4=tmp3
-	addps	xmm4,xmm6		; xmm4=data2+data5=tmp2
-	subps	xmm2,xmm1		; xmm2=data3-data4=tmp4
-	subps	xmm3,xmm6		; xmm3=data2-data5=tmp5
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
 
-	; -- Even part
+        ; -- Even part
 
-	movaps	xmm1,xmm5
-	movaps	xmm6,xmm0
-	subps	xmm5,xmm7		; xmm5=tmp13
-	subps	xmm0,xmm4		; xmm0=tmp12
-	addps	xmm1,xmm7		; xmm1=tmp10
-	addps	xmm6,xmm4		; xmm6=tmp11
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
 
-	addps	xmm0,xmm5
-	mulps	xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
+        addps   xmm0,xmm5
+        mulps   xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
 
-	movaps	xmm7,xmm1
-	movaps	xmm4,xmm5
-	subps	xmm1,xmm6		; xmm1=data4
-	subps	xmm5,xmm0		; xmm5=data6
-	addps	xmm7,xmm6		; xmm7=data0
-	addps	xmm4,xmm0		; xmm4=data2
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
 
-	movaps	XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
 
-	; -- Odd part
+        ; -- Odd part
 
-	movaps	xmm6, XMMWORD [wk(0)]	; xmm6=tmp6
-	movaps	xmm0, XMMWORD [wk(1)]	; xmm0=tmp7
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
 
-	addps	xmm2,xmm3		; xmm2=tmp10
-	addps	xmm3,xmm6		; xmm3=tmp11
-	addps	xmm6,xmm0		; xmm6=tmp12, xmm0=tmp7
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
 
-	mulps	xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
+        mulps   xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
 
-	movaps	xmm1,xmm2		; xmm1=tmp10
-	subps	xmm2,xmm6
-	mulps	xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
-	mulps	xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
-	mulps	xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
-	addps	xmm1,xmm2		; xmm1=z2
-	addps	xmm6,xmm2		; xmm6=z4
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
+        mulps   xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
 
-	movaps	xmm5,xmm0
-	subps	xmm0,xmm3		; xmm0=z13
-	addps	xmm5,xmm3		; xmm5=z11
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
 
-	movaps	xmm7,xmm0
-	movaps	xmm4,xmm5
-	subps	xmm0,xmm1		; xmm0=data3
-	subps	xmm5,xmm6		; xmm5=data7
-	addps	xmm7,xmm1		; xmm7=data5
-	addps	xmm4,xmm6		; xmm4=data1
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
 
-	movaps	XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7
-	movaps	XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
 
-	add	edx, byte 4*SIZEOF_FAST_FLOAT
-	dec	ecx
-	jnz	near .columnloop
+        add     edx, byte 4*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .columnloop
 
-;	pop	edi		; unused
-;	pop	esi		; unused
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/ji3dnflt.asm b/simd/ji3dnflt.asm
index dc2076f..76303fa 100644
--- a/simd/ji3dnflt.asm
+++ b/simd/ji3dnflt.asm
@@ -24,25 +24,25 @@
 %include "jdct.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_float_3dnow)
+        alignz  16
+        global  EXTN(jconst_idct_float_3dnow)
 
 EXTN(jconst_idct_float_3dnow):
 
-PD_1_414	times 2 dd  1.414213562373095048801689
-PD_1_847	times 2 dd  1.847759065022573512256366
-PD_1_082	times 2 dd  1.082392200292393968799446
-PD_2_613	times 2 dd  2.613125929752753055713286
-PD_RNDINT_MAGIC	times 2 dd  100663296.0	; (float)(0x00C00000 << 3)
-PB_CENTERJSAMP	times 8 db  CENTERJSAMPLE
+PD_1_414        times 2 dd  1.414213562373095048801689
+PD_1_847        times 2 dd  1.847759065022573512256366
+PD_1_082        times 2 dd  1.082392200292393968799446
+PD_2_613        times 2 dd  2.613125929752753055713286
+PD_RNDINT_MAGIC times 2 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -51,402 +51,402 @@
 ;                         JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; void * dct_table
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
-%define workspace	wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
-					; FAST_FLOAT workspace[DCTSIZE2]
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
 
-	align	16
-	global	EXTN(jsimd_idct_float_3dnow)
+        align   16
+        global  EXTN(jsimd_idct_float_3dnow)
 
 EXTN(jsimd_idct_float_3dnow):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [workspace]
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input, store into work array.
+        ; ---- Pass 1: process columns from input, store into work array.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
-	lea	edi, [workspace]			; FAST_FLOAT * wsptr
-	mov	ecx, DCTSIZE/2				; ctr
-	alignx	16,7
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     ecx, DCTSIZE/2                          ; ctr
+        alignx  16,7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	short .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
 
-	pushpic	ebx		; save GOT address
-	mov	ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	mov	eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	or	ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	or	ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	or	eax,ebx
-	poppic	ebx		; restore GOT address
-	jnz	short .columnDCT
+        pushpic ebx             ; save GOT address
+        mov     ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        mov     eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        or      ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        or      ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        or      eax,ebx
+        poppic  ebx             ; restore GOT address
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
 
-	punpcklwd mm0,mm0
-	psrad     mm0,(DWORD_BIT-WORD_BIT)
-	pi2fd     mm0,mm0
+        punpcklwd mm0,mm0
+        psrad     mm0,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm0,mm0
 
-	pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movq      mm1,mm0
-	punpckldq mm0,mm0
-	punpckhdq mm1,mm1
+        movq      mm1,mm0
+        punpckldq mm0,mm0
+        punpckhdq mm1,mm1
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0
-	movq	MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0
-	movq	MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1
-	movq	MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
-	movq	MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
-	jmp	near .nextcolumn
-	alignx	16,7
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
+        jmp     near .nextcolumn
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movd      mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movd      mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movd      mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movd      mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movd      mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movd      mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
 
-	punpcklwd mm0,mm0
-	punpcklwd mm1,mm1
-	psrad     mm0,(DWORD_BIT-WORD_BIT)
-	psrad     mm1,(DWORD_BIT-WORD_BIT)
-	pi2fd     mm0,mm0
-	pi2fd     mm1,mm1
+        punpcklwd mm0,mm0
+        punpcklwd mm1,mm1
+        psrad     mm0,(DWORD_BIT-WORD_BIT)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm0,mm0
+        pi2fd     mm1,mm1
 
-	pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	pfmul     mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	punpcklwd mm2,mm2
-	punpcklwd mm3,mm3
-	psrad     mm2,(DWORD_BIT-WORD_BIT)
-	psrad     mm3,(DWORD_BIT-WORD_BIT)
-	pi2fd     mm2,mm2
-	pi2fd     mm3,mm3
+        punpcklwd mm2,mm2
+        punpcklwd mm3,mm3
+        psrad     mm2,(DWORD_BIT-WORD_BIT)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm2,mm2
+        pi2fd     mm3,mm3
 
-	pfmul     mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	pfmul     mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movq	mm4,mm0
-	movq	mm5,mm1
-	pfsub	mm0,mm2			; mm0=tmp11
-	pfsub	mm1,mm3
-	pfadd	mm4,mm2			; mm4=tmp10
-	pfadd	mm5,mm3			; mm5=tmp13
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pfsub   mm0,mm2                 ; mm0=tmp11
+        pfsub   mm1,mm3
+        pfadd   mm4,mm2                 ; mm4=tmp10
+        pfadd   mm5,mm3                 ; mm5=tmp13
 
-	pfmul	mm1,[GOTOFF(ebx,PD_1_414)]
-	pfsub	mm1,mm5			; mm1=tmp12
+        pfmul   mm1,[GOTOFF(ebx,PD_1_414)]
+        pfsub   mm1,mm5                 ; mm1=tmp12
 
-	movq	mm6,mm4
-	movq	mm7,mm0
-	pfsub	mm4,mm5			; mm4=tmp3
-	pfsub	mm0,mm1			; mm0=tmp2
-	pfadd	mm6,mm5			; mm6=tmp0
-	pfadd	mm7,mm1			; mm7=tmp1
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm5                 ; mm4=tmp3
+        pfsub   mm0,mm1                 ; mm0=tmp2
+        pfadd   mm6,mm5                 ; mm6=tmp0
+        pfadd   mm7,mm1                 ; mm7=tmp1
 
-	movq	MMWORD [wk(1)], mm4	; tmp3
-	movq	MMWORD [wk(0)], mm0	; tmp2
+        movq    MMWORD [wk(1)], mm4     ; tmp3
+        movq    MMWORD [wk(0)], mm0     ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movd      mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movd      mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	movd      mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movd      mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        movd      mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movd      mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movd      mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movd      mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
 
-	punpcklwd mm2,mm2
-	punpcklwd mm3,mm3
-	psrad     mm2,(DWORD_BIT-WORD_BIT)
-	psrad     mm3,(DWORD_BIT-WORD_BIT)
-	pi2fd     mm2,mm2
-	pi2fd     mm3,mm3
+        punpcklwd mm2,mm2
+        punpcklwd mm3,mm3
+        psrad     mm2,(DWORD_BIT-WORD_BIT)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm2,mm2
+        pi2fd     mm3,mm3
 
-	pfmul     mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	pfmul     mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	punpcklwd mm5,mm5
-	punpcklwd mm1,mm1
-	psrad     mm5,(DWORD_BIT-WORD_BIT)
-	psrad     mm1,(DWORD_BIT-WORD_BIT)
-	pi2fd     mm5,mm5
-	pi2fd     mm1,mm1
+        punpcklwd mm5,mm5
+        punpcklwd mm1,mm1
+        psrad     mm5,(DWORD_BIT-WORD_BIT)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm5,mm5
+        pi2fd     mm1,mm1
 
-	pfmul     mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	pfmul     mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movq	mm4,mm2
-	movq	mm0,mm5
-	pfadd	mm2,mm1			; mm2=z11
-	pfadd	mm5,mm3			; mm5=z13
-	pfsub	mm4,mm1			; mm4=z12
-	pfsub	mm0,mm3			; mm0=z10
+        movq    mm4,mm2
+        movq    mm0,mm5
+        pfadd   mm2,mm1                 ; mm2=z11
+        pfadd   mm5,mm3                 ; mm5=z13
+        pfsub   mm4,mm1                 ; mm4=z12
+        pfsub   mm0,mm3                 ; mm0=z10
 
-	movq	mm1,mm2
-	pfsub	mm2,mm5
-	pfadd	mm1,mm5			; mm1=tmp7
+        movq    mm1,mm2
+        pfsub   mm2,mm5
+        pfadd   mm1,mm5                 ; mm1=tmp7
 
-	pfmul	mm2,[GOTOFF(ebx,PD_1_414)]	; mm2=tmp11
+        pfmul   mm2,[GOTOFF(ebx,PD_1_414)]      ; mm2=tmp11
 
-	movq	mm3,mm0
-	pfadd	mm0,mm4
-	pfmul	mm0,[GOTOFF(ebx,PD_1_847)]	; mm0=z5
-	pfmul	mm3,[GOTOFF(ebx,PD_2_613)]	; mm3=(z10 * 2.613125930)
-	pfmul	mm4,[GOTOFF(ebx,PD_1_082)]	; mm4=(z12 * 1.082392200)
-	pfsubr	mm3,mm0			; mm3=tmp12
-	pfsub	mm4,mm0			; mm4=tmp10
+        movq    mm3,mm0
+        pfadd   mm0,mm4
+        pfmul   mm0,[GOTOFF(ebx,PD_1_847)]      ; mm0=z5
+        pfmul   mm3,[GOTOFF(ebx,PD_2_613)]      ; mm3=(z10 * 2.613125930)
+        pfmul   mm4,[GOTOFF(ebx,PD_1_082)]      ; mm4=(z12 * 1.082392200)
+        pfsubr  mm3,mm0                 ; mm3=tmp12
+        pfsub   mm4,mm0                 ; mm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	pfsub	mm3,mm1			; mm3=tmp6
-	movq	mm5,mm6
-	movq	mm0,mm7
-	pfadd	mm6,mm1			; mm6=data0=(00 01)
-	pfadd	mm7,mm3			; mm7=data1=(10 11)
-	pfsub	mm5,mm1			; mm5=data7=(70 71)
-	pfsub	mm0,mm3			; mm0=data6=(60 61)
-	pfsub	mm2,mm3			; mm2=tmp5
+        pfsub   mm3,mm1                 ; mm3=tmp6
+        movq    mm5,mm6
+        movq    mm0,mm7
+        pfadd   mm6,mm1                 ; mm6=data0=(00 01)
+        pfadd   mm7,mm3                 ; mm7=data1=(10 11)
+        pfsub   mm5,mm1                 ; mm5=data7=(70 71)
+        pfsub   mm0,mm3                 ; mm0=data6=(60 61)
+        pfsub   mm2,mm3                 ; mm2=tmp5
 
-	movq      mm1,mm6		; transpose coefficients
-	punpckldq mm6,mm7		; mm6=(00 10)
-	punpckhdq mm1,mm7		; mm1=(01 11)
-	movq      mm3,mm0		; transpose coefficients
-	punpckldq mm0,mm5		; mm0=(60 70)
-	punpckhdq mm3,mm5		; mm3=(61 71)
+        movq      mm1,mm6               ; transpose coefficients
+        punpckldq mm6,mm7               ; mm6=(00 10)
+        punpckhdq mm1,mm7               ; mm1=(01 11)
+        movq      mm3,mm0               ; transpose coefficients
+        punpckldq mm0,mm5               ; mm0=(60 70)
+        punpckhdq mm3,mm5               ; mm3=(61 71)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
-	movq	MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
-	movq	MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3
 
-	movq	mm7, MMWORD [wk(0)]	; mm7=tmp2
-	movq	mm5, MMWORD [wk(1)]	; mm5=tmp3
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp2
+        movq    mm5, MMWORD [wk(1)]     ; mm5=tmp3
 
-	pfadd	mm4,mm2			; mm4=tmp4
-	movq	mm6,mm7
-	movq	mm1,mm5
-	pfadd	mm7,mm2			; mm7=data2=(20 21)
-	pfadd	mm5,mm4			; mm5=data4=(40 41)
-	pfsub	mm6,mm2			; mm6=data5=(50 51)
-	pfsub	mm1,mm4			; mm1=data3=(30 31)
+        pfadd   mm4,mm2                 ; mm4=tmp4
+        movq    mm6,mm7
+        movq    mm1,mm5
+        pfadd   mm7,mm2                 ; mm7=data2=(20 21)
+        pfadd   mm5,mm4                 ; mm5=data4=(40 41)
+        pfsub   mm6,mm2                 ; mm6=data5=(50 51)
+        pfsub   mm1,mm4                 ; mm1=data3=(30 31)
 
-	movq      mm0,mm7		; transpose coefficients
-	punpckldq mm7,mm1		; mm7=(20 30)
-	punpckhdq mm0,mm1		; mm0=(21 31)
-	movq      mm3,mm5		; transpose coefficients
-	punpckldq mm5,mm6		; mm5=(40 50)
-	punpckhdq mm3,mm6		; mm3=(41 51)
+        movq      mm0,mm7               ; transpose coefficients
+        punpckldq mm7,mm1               ; mm7=(20 30)
+        punpckhdq mm0,mm1               ; mm0=(21 31)
+        movq      mm3,mm5               ; transpose coefficients
+        punpckldq mm5,mm6               ; mm5=(40 50)
+        punpckhdq mm3,mm6               ; mm3=(41 51)
 
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0
-	movq	MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
-	movq	MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
+        movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3
 
 .nextcolumn:
-	add	esi, byte 2*SIZEOF_JCOEF		; coef_block
-	add	edx, byte 2*SIZEOF_FLOAT_MULT_TYPE	; quantptr
-	add	edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT	; wsptr
-	dec	ecx					; ctr
-	jnz	near .columnloop
+        add     esi, byte 2*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 2*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
-	prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
-	prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
-	prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	lea	esi, [workspace]			; FAST_FLOAT * wsptr
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
-	mov	ecx, DCTSIZE/2				; ctr
-	alignx	16,7
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/2                          ; ctr
+        alignx  16,7
 .rowloop:
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
 
-	movq	mm4,mm0
-	movq	mm5,mm1
-	pfsub	mm0,mm2			; mm0=tmp11
-	pfsub	mm1,mm3
-	pfadd	mm4,mm2			; mm4=tmp10
-	pfadd	mm5,mm3			; mm5=tmp13
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pfsub   mm0,mm2                 ; mm0=tmp11
+        pfsub   mm1,mm3
+        pfadd   mm4,mm2                 ; mm4=tmp10
+        pfadd   mm5,mm3                 ; mm5=tmp13
 
-	pfmul	mm1,[GOTOFF(ebx,PD_1_414)]
-	pfsub	mm1,mm5			; mm1=tmp12
+        pfmul   mm1,[GOTOFF(ebx,PD_1_414)]
+        pfsub   mm1,mm5                 ; mm1=tmp12
 
-	movq	mm6,mm4
-	movq	mm7,mm0
-	pfsub	mm4,mm5			; mm4=tmp3
-	pfsub	mm0,mm1			; mm0=tmp2
-	pfadd	mm6,mm5			; mm6=tmp0
-	pfadd	mm7,mm1			; mm7=tmp1
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm5                 ; mm4=tmp3
+        pfsub   mm0,mm1                 ; mm0=tmp2
+        pfadd   mm6,mm5                 ; mm6=tmp0
+        pfadd   mm7,mm1                 ; mm7=tmp1
 
-	movq	MMWORD [wk(1)], mm4	; tmp3
-	movq	MMWORD [wk(0)], mm0	; tmp2
+        movq    MMWORD [wk(1)], mm4     ; tmp3
+        movq    MMWORD [wk(0)], mm0     ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
-	movq	mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
 
-	movq	mm4,mm2
-	movq	mm0,mm5
-	pfadd	mm2,mm1			; mm2=z11
-	pfadd	mm5,mm3			; mm5=z13
-	pfsub	mm4,mm1			; mm4=z12
-	pfsub	mm0,mm3			; mm0=z10
+        movq    mm4,mm2
+        movq    mm0,mm5
+        pfadd   mm2,mm1                 ; mm2=z11
+        pfadd   mm5,mm3                 ; mm5=z13
+        pfsub   mm4,mm1                 ; mm4=z12
+        pfsub   mm0,mm3                 ; mm0=z10
 
-	movq	mm1,mm2
-	pfsub	mm2,mm5
-	pfadd	mm1,mm5			; mm1=tmp7
+        movq    mm1,mm2
+        pfsub   mm2,mm5
+        pfadd   mm1,mm5                 ; mm1=tmp7
 
-	pfmul	mm2,[GOTOFF(ebx,PD_1_414)]	; mm2=tmp11
+        pfmul   mm2,[GOTOFF(ebx,PD_1_414)]      ; mm2=tmp11
 
-	movq	mm3,mm0
-	pfadd	mm0,mm4
-	pfmul	mm0,[GOTOFF(ebx,PD_1_847)]	; mm0=z5
-	pfmul	mm3,[GOTOFF(ebx,PD_2_613)]	; mm3=(z10 * 2.613125930)
-	pfmul	mm4,[GOTOFF(ebx,PD_1_082)]	; mm4=(z12 * 1.082392200)
-	pfsubr	mm3,mm0			; mm3=tmp12
-	pfsub	mm4,mm0			; mm4=tmp10
+        movq    mm3,mm0
+        pfadd   mm0,mm4
+        pfmul   mm0,[GOTOFF(ebx,PD_1_847)]      ; mm0=z5
+        pfmul   mm3,[GOTOFF(ebx,PD_2_613)]      ; mm3=(z10 * 2.613125930)
+        pfmul   mm4,[GOTOFF(ebx,PD_1_082)]      ; mm4=(z12 * 1.082392200)
+        pfsubr  mm3,mm0                 ; mm3=tmp12
+        pfsub   mm4,mm0                 ; mm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	pfsub	mm3,mm1			; mm3=tmp6
-	movq	mm5,mm6
-	movq	mm0,mm7
-	pfadd	mm6,mm1			; mm6=data0=(00 10)
-	pfadd	mm7,mm3			; mm7=data1=(01 11)
-	pfsub	mm5,mm1			; mm5=data7=(07 17)
-	pfsub	mm0,mm3			; mm0=data6=(06 16)
-	pfsub	mm2,mm3			; mm2=tmp5
+        pfsub   mm3,mm1                 ; mm3=tmp6
+        movq    mm5,mm6
+        movq    mm0,mm7
+        pfadd   mm6,mm1                 ; mm6=data0=(00 10)
+        pfadd   mm7,mm3                 ; mm7=data1=(01 11)
+        pfsub   mm5,mm1                 ; mm5=data7=(07 17)
+        pfsub   mm0,mm3                 ; mm0=data6=(06 16)
+        pfsub   mm2,mm3                 ; mm2=tmp5
 
-	movq	mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]	; mm1=[PD_RNDINT_MAGIC]
-	pcmpeqd	mm3,mm3
-	psrld	mm3,WORD_BIT		; mm3={0xFFFF 0x0000 0xFFFF 0x0000}
+        movq    mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]       ; mm1=[PD_RNDINT_MAGIC]
+        pcmpeqd mm3,mm3
+        psrld   mm3,WORD_BIT            ; mm3={0xFFFF 0x0000 0xFFFF 0x0000}
 
-	pfadd	mm6,mm1			; mm6=roundint(data0/8)=(00 ** 10 **)
-	pfadd	mm7,mm1			; mm7=roundint(data1/8)=(01 ** 11 **)
-	pfadd	mm0,mm1			; mm0=roundint(data6/8)=(06 ** 16 **)
-	pfadd	mm5,mm1			; mm5=roundint(data7/8)=(07 ** 17 **)
+        pfadd   mm6,mm1                 ; mm6=roundint(data0/8)=(00 ** 10 **)
+        pfadd   mm7,mm1                 ; mm7=roundint(data1/8)=(01 ** 11 **)
+        pfadd   mm0,mm1                 ; mm0=roundint(data6/8)=(06 ** 16 **)
+        pfadd   mm5,mm1                 ; mm5=roundint(data7/8)=(07 ** 17 **)
 
-	pand	mm6,mm3			; mm6=(00 -- 10 --)
-	pslld	mm7,WORD_BIT		; mm7=(-- 01 -- 11)
-	pand	mm0,mm3			; mm0=(06 -- 16 --)
-	pslld	mm5,WORD_BIT		; mm5=(-- 07 -- 17)
-	por	mm6,mm7			; mm6=(00 01 10 11)
-	por	mm0,mm5			; mm0=(06 07 16 17)
+        pand    mm6,mm3                 ; mm6=(00 -- 10 --)
+        pslld   mm7,WORD_BIT            ; mm7=(-- 01 -- 11)
+        pand    mm0,mm3                 ; mm0=(06 -- 16 --)
+        pslld   mm5,WORD_BIT            ; mm5=(-- 07 -- 17)
+        por     mm6,mm7                 ; mm6=(00 01 10 11)
+        por     mm0,mm5                 ; mm0=(06 07 16 17)
 
-	movq	mm1, MMWORD [wk(0)]	; mm1=tmp2
-	movq	mm3, MMWORD [wk(1)]	; mm3=tmp3
+        movq    mm1, MMWORD [wk(0)]     ; mm1=tmp2
+        movq    mm3, MMWORD [wk(1)]     ; mm3=tmp3
 
-	pfadd	mm4,mm2			; mm4=tmp4
-	movq	mm7,mm1
-	movq	mm5,mm3
-	pfadd	mm1,mm2			; mm1=data2=(02 12)
-	pfadd	mm3,mm4			; mm3=data4=(04 14)
-	pfsub	mm7,mm2			; mm7=data5=(05 15)
-	pfsub	mm5,mm4			; mm5=data3=(03 13)
+        pfadd   mm4,mm2                 ; mm4=tmp4
+        movq    mm7,mm1
+        movq    mm5,mm3
+        pfadd   mm1,mm2                 ; mm1=data2=(02 12)
+        pfadd   mm3,mm4                 ; mm3=data4=(04 14)
+        pfsub   mm7,mm2                 ; mm7=data5=(05 15)
+        pfsub   mm5,mm4                 ; mm5=data3=(03 13)
 
-	movq	mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]	; mm2=[PD_RNDINT_MAGIC]
-	pcmpeqd	mm4,mm4
-	psrld	mm4,WORD_BIT		; mm4={0xFFFF 0x0000 0xFFFF 0x0000}
+        movq    mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]       ; mm2=[PD_RNDINT_MAGIC]
+        pcmpeqd mm4,mm4
+        psrld   mm4,WORD_BIT            ; mm4={0xFFFF 0x0000 0xFFFF 0x0000}
 
-	pfadd	mm3,mm2			; mm3=roundint(data4/8)=(04 ** 14 **)
-	pfadd	mm7,mm2			; mm7=roundint(data5/8)=(05 ** 15 **)
-	pfadd	mm1,mm2			; mm1=roundint(data2/8)=(02 ** 12 **)
-	pfadd	mm5,mm2			; mm5=roundint(data3/8)=(03 ** 13 **)
+        pfadd   mm3,mm2                 ; mm3=roundint(data4/8)=(04 ** 14 **)
+        pfadd   mm7,mm2                 ; mm7=roundint(data5/8)=(05 ** 15 **)
+        pfadd   mm1,mm2                 ; mm1=roundint(data2/8)=(02 ** 12 **)
+        pfadd   mm5,mm2                 ; mm5=roundint(data3/8)=(03 ** 13 **)
 
-	pand	mm3,mm4			; mm3=(04 -- 14 --)
-	pslld	mm7,WORD_BIT		; mm7=(-- 05 -- 15)
-	pand	mm1,mm4			; mm1=(02 -- 12 --)
-	pslld	mm5,WORD_BIT		; mm5=(-- 03 -- 13)
-	por	mm3,mm7			; mm3=(04 05 14 15)
-	por	mm1,mm5			; mm1=(02 03 12 13)
+        pand    mm3,mm4                 ; mm3=(04 -- 14 --)
+        pslld   mm7,WORD_BIT            ; mm7=(-- 05 -- 15)
+        pand    mm1,mm4                 ; mm1=(02 -- 12 --)
+        pslld   mm5,WORD_BIT            ; mm5=(-- 03 -- 13)
+        por     mm3,mm7                 ; mm3=(04 05 14 15)
+        por     mm1,mm5                 ; mm1=(02 03 12 13)
 
-	movq      mm2,[GOTOFF(ebx,PB_CENTERJSAMP)]	; mm2=[PB_CENTERJSAMP]
+        movq      mm2,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm2=[PB_CENTERJSAMP]
 
-	packsswb  mm6,mm3		; mm6=(00 01 10 11 04 05 14 15)
-	packsswb  mm1,mm0		; mm1=(02 03 12 13 06 07 16 17)
-	paddb     mm6,mm2
-	paddb     mm1,mm2
+        packsswb  mm6,mm3               ; mm6=(00 01 10 11 04 05 14 15)
+        packsswb  mm1,mm0               ; mm1=(02 03 12 13 06 07 16 17)
+        paddb     mm6,mm2
+        paddb     mm1,mm2
 
-	movq      mm4,mm6		; transpose coefficients(phase 2)
-	punpcklwd mm6,mm1		; mm6=(00 01 02 03 10 11 12 13)
-	punpckhwd mm4,mm1		; mm4=(04 05 06 07 14 15 16 17)
+        movq      mm4,mm6               ; transpose coefficients(phase 2)
+        punpcklwd mm6,mm1               ; mm6=(00 01 02 03 10 11 12 13)
+        punpckhwd mm4,mm1               ; mm4=(04 05 06 07 14 15 16 17)
 
-	movq      mm7,mm6		; transpose coefficients(phase 3)
-	punpckldq mm6,mm4		; mm6=(00 01 02 03 04 05 06 07)
-	punpckhdq mm7,mm4		; mm7=(10 11 12 13 14 15 16 17)
+        movq      mm7,mm6               ; transpose coefficients(phase 3)
+        punpckldq mm6,mm4               ; mm6=(00 01 02 03 04 05 06 07)
+        punpckhdq mm7,mm4               ; mm7=(10 11 12 13 14 15 16 17)
 
-	pushpic	ebx			; save GOT address
+        pushpic ebx                     ; save GOT address
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	movq	MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
-	movq	MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
 
-	poppic	ebx			; restore GOT address
+        poppic  ebx                     ; restore GOT address
 
-	add	esi, byte 2*SIZEOF_FAST_FLOAT	; wsptr
-	add	edi, byte 2*SIZEOF_JSAMPROW
-	dec	ecx				; ctr
-	jnz	near .rowloop
+        add     esi, byte 2*SIZEOF_FAST_FLOAT   ; wsptr
+        add     edi, byte 2*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
 
-	femms		; empty MMX/3DNow! state
+        femms           ; empty MMX/3DNow! state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jimmxfst.asm b/simd/jimmxfst.asm
index 3b05572..a9eaa02 100644
--- a/simd/jimmxfst.asm
+++ b/simd/jimmxfst.asm
@@ -26,31 +26,31 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	8	; 14 is also OK.
-%define PASS1_BITS	2
+%define CONST_BITS      8       ; 14 is also OK.
+%define PASS1_BITS      2
 
 %if IFAST_SCALE_BITS != PASS1_BITS
 %error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
 %endif
 
 %if CONST_BITS == 8
-F_1_082	equ	277		; FIX(1.082392200)
-F_1_414	equ	362		; FIX(1.414213562)
-F_1_847	equ	473		; FIX(1.847759065)
-F_2_613	equ	669		; FIX(2.613125930)
-F_1_613	equ	(F_2_613 - 256)	; FIX(2.613125930) - FIX(1)
+F_1_082 equ     277             ; FIX(1.082392200)
+F_1_414 equ     362             ; FIX(1.414213562)
+F_1_847 equ     473             ; FIX(1.847759065)
+F_2_613 equ     669             ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
-%define	DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_1_082	equ	DESCALE(1162209775,30-CONST_BITS)	; FIX(1.082392200)
-F_1_414	equ	DESCALE(1518500249,30-CONST_BITS)	; FIX(1.414213562)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_2_613	equ	DESCALE(2805822602,30-CONST_BITS)	; FIX(2.613125930)
-F_1_613	equ	(F_2_613 - (1 << CONST_BITS))	; FIX(2.613125930) - FIX(1)
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082 equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414 equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613 equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
@@ -58,22 +58,22 @@
 %define PRE_MULTIPLY_SCALE_BITS   2
 %define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-	alignz	16
-	global	EXTN(jconst_idct_ifast_mmx)
+        alignz  16
+        global  EXTN(jconst_idct_ifast_mmx)
 
 EXTN(jconst_idct_ifast_mmx):
 
-PW_F1414	times 4 dw  F_1_414 << CONST_SHIFT
-PW_F1847	times 4 dw  F_1_847 << CONST_SHIFT
-PW_MF1613	times 4 dw -F_1_613 << CONST_SHIFT
-PW_F1082	times 4 dw  F_1_082 << CONST_SHIFT
-PB_CENTERJSAMP	times 8 db  CENTERJSAMPLE
+PW_F1414        times 4 dw  F_1_414 << CONST_SHIFT
+PW_F1847        times 4 dw  F_1_847 << CONST_SHIFT
+PW_MF1613       times 4 dw -F_1_613 << CONST_SHIFT
+PW_F1082        times 4 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -82,419 +82,419 @@
 ;                       JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; jpeg_component_info * compptr
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
-%define workspace	wk(0)-DCTSIZE2*SIZEOF_JCOEF
-					; JCOEF workspace[DCTSIZE2]
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                        ; JCOEF workspace[DCTSIZE2]
 
-	align	16
-	global	EXTN(jsimd_idct_ifast_mmx)
+        align   16
+        global  EXTN(jsimd_idct_ifast_mmx)
 
 EXTN(jsimd_idct_ifast_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [workspace]
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input, store into work array.
+        ; ---- Pass 1: process columns from input, store into work array.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
-	lea	edi, [workspace]			; JCOEF * wsptr
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; JCOEF * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	short .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
 
-	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	mm1,mm0
-	packsswb mm1,mm1
-	movd	eax,mm1
-	test	eax,eax
-	jnz	short .columnDCT
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm1,mm0
+        packsswb mm1,mm1
+        movd    eax,mm1
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
 
-	movq      mm2,mm0		; mm0=in0=(00 01 02 03)
-	punpcklwd mm0,mm0		; mm0=(00 00 01 01)
-	punpckhwd mm2,mm2		; mm2=(02 02 03 03)
+        movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+        punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+        punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
 
-	movq      mm1,mm0
-	punpckldq mm0,mm0		; mm0=(00 00 00 00)
-	punpckhdq mm1,mm1		; mm1=(01 01 01 01)
-	movq      mm3,mm2
-	punpckldq mm2,mm2		; mm2=(02 02 02 02)
-	punpckhdq mm3,mm3		; mm3=(03 03 03 03)
+        movq      mm1,mm0
+        punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+        punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+        movq      mm3,mm2
+        punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+        punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
-	movq	MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
-	jmp	near .nextcolumn
-	alignx	16,7
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
+        jmp     near .nextcolumn
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	movq	mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
 
-	movq	mm4,mm0
-	movq	mm5,mm1
-	psubw	mm0,mm2			; mm0=tmp11
-	psubw	mm1,mm3
-	paddw	mm4,mm2			; mm4=tmp10
-	paddw	mm5,mm3			; mm5=tmp13
+        movq    mm4,mm0
+        movq    mm5,mm1
+        psubw   mm0,mm2                 ; mm0=tmp11
+        psubw   mm1,mm3
+        paddw   mm4,mm2                 ; mm4=tmp10
+        paddw   mm5,mm3                 ; mm5=tmp13
 
-	psllw	mm1,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm1,[GOTOFF(ebx,PW_F1414)]
-	psubw	mm1,mm5			; mm1=tmp12
+        psllw   mm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm1,[GOTOFF(ebx,PW_F1414)]
+        psubw   mm1,mm5                 ; mm1=tmp12
 
-	movq	mm6,mm4
-	movq	mm7,mm0
-	psubw	mm4,mm5			; mm4=tmp3
-	psubw	mm0,mm1			; mm0=tmp2
-	paddw	mm6,mm5			; mm6=tmp0
-	paddw	mm7,mm1			; mm7=tmp1
+        movq    mm6,mm4
+        movq    mm7,mm0
+        psubw   mm4,mm5                 ; mm4=tmp3
+        psubw   mm0,mm1                 ; mm0=tmp2
+        paddw   mm6,mm5                 ; mm6=tmp0
+        paddw   mm7,mm1                 ; mm7=tmp1
 
-	movq	MMWORD [wk(1)], mm4	; wk(1)=tmp3
-	movq	MMWORD [wk(0)], mm0	; wk(0)=tmp2
+        movq    MMWORD [wk(1)], mm4     ; wk(1)=tmp3
+        movq    MMWORD [wk(0)], mm0     ; wk(0)=tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	movq	mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
 
-	movq	mm4,mm2
-	movq	mm0,mm5
-	psubw	mm2,mm1			; mm2=z12
-	psubw	mm5,mm3			; mm5=z10
-	paddw	mm4,mm1			; mm4=z11
-	paddw	mm0,mm3			; mm0=z13
+        movq    mm4,mm2
+        movq    mm0,mm5
+        psubw   mm2,mm1                 ; mm2=z12
+        psubw   mm5,mm3                 ; mm5=z10
+        paddw   mm4,mm1                 ; mm4=z11
+        paddw   mm0,mm3                 ; mm0=z13
 
-	movq	mm1,mm5			; mm1=z10(unscaled)
-	psllw	mm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	mm5,PRE_MULTIPLY_SCALE_BITS
+        movq    mm1,mm5                 ; mm1=z10(unscaled)
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm5,PRE_MULTIPLY_SCALE_BITS
 
-	movq	mm3,mm4
-	psubw	mm4,mm0
-	paddw	mm3,mm0			; mm3=tmp7
+        movq    mm3,mm4
+        psubw   mm4,mm0
+        paddw   mm3,mm0                 ; mm3=tmp7
 
-	psllw	mm4,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm4,[GOTOFF(ebx,PW_F1414)]	; mm4=tmp11
+        psllw   mm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm4,[GOTOFF(ebx,PW_F1414)]      ; mm4=tmp11
 
-	; To avoid overflow...
-	;
-	; (Original)
-	; tmp12 = -2.613125930 * z10 + z5;
-	;
-	; (This implementation)
-	; tmp12 = (-1.613125930 - 1) * z10 + z5;
-	;       = -1.613125930 * z10 - z10 + z5;
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
 
-	movq	mm0,mm5
-	paddw	mm5,mm2
-	pmulhw	mm5,[GOTOFF(ebx,PW_F1847)]	; mm5=z5
-	pmulhw	mm0,[GOTOFF(ebx,PW_MF1613)]
-	pmulhw	mm2,[GOTOFF(ebx,PW_F1082)]
-	psubw	mm0,mm1
-	psubw	mm2,mm5			; mm2=tmp10
-	paddw	mm0,mm5			; mm0=tmp12
+        movq    mm0,mm5
+        paddw   mm5,mm2
+        pmulhw  mm5,[GOTOFF(ebx,PW_F1847)]      ; mm5=z5
+        pmulhw  mm0,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  mm2,[GOTOFF(ebx,PW_F1082)]
+        psubw   mm0,mm1
+        psubw   mm2,mm5                 ; mm2=tmp10
+        paddw   mm0,mm5                 ; mm0=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	psubw	mm0,mm3			; mm0=tmp6
-	movq	mm1,mm6
-	movq	mm5,mm7
-	paddw	mm6,mm3			; mm6=data0=(00 01 02 03)
-	paddw	mm7,mm0			; mm7=data1=(10 11 12 13)
-	psubw	mm1,mm3			; mm1=data7=(70 71 72 73)
-	psubw	mm5,mm0			; mm5=data6=(60 61 62 63)
-	psubw	mm4,mm0			; mm4=tmp5
+        psubw   mm0,mm3                 ; mm0=tmp6
+        movq    mm1,mm6
+        movq    mm5,mm7
+        paddw   mm6,mm3                 ; mm6=data0=(00 01 02 03)
+        paddw   mm7,mm0                 ; mm7=data1=(10 11 12 13)
+        psubw   mm1,mm3                 ; mm1=data7=(70 71 72 73)
+        psubw   mm5,mm0                 ; mm5=data6=(60 61 62 63)
+        psubw   mm4,mm0                 ; mm4=tmp5
 
-	movq      mm3,mm6		; transpose coefficients(phase 1)
-	punpcklwd mm6,mm7		; mm6=(00 10 01 11)
-	punpckhwd mm3,mm7		; mm3=(02 12 03 13)
-	movq      mm0,mm5		; transpose coefficients(phase 1)
-	punpcklwd mm5,mm1		; mm5=(60 70 61 71)
-	punpckhwd mm0,mm1		; mm0=(62 72 63 73)
+        movq      mm3,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+        punpckhwd mm3,mm7               ; mm3=(02 12 03 13)
+        movq      mm0,mm5               ; transpose coefficients(phase 1)
+        punpcklwd mm5,mm1               ; mm5=(60 70 61 71)
+        punpckhwd mm0,mm1               ; mm0=(62 72 63 73)
 
-	movq	mm7, MMWORD [wk(0)]	; mm7=tmp2
-	movq	mm1, MMWORD [wk(1)]	; mm1=tmp3
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp2
+        movq    mm1, MMWORD [wk(1)]     ; mm1=tmp3
 
-	movq	MMWORD [wk(0)], mm5	; wk(0)=(60 70 61 71)
-	movq	MMWORD [wk(1)], mm0	; wk(1)=(62 72 63 73)
+        movq    MMWORD [wk(0)], mm5     ; wk(0)=(60 70 61 71)
+        movq    MMWORD [wk(1)], mm0     ; wk(1)=(62 72 63 73)
 
-	paddw	mm2,mm4			; mm2=tmp4
-	movq	mm5,mm7
-	movq	mm0,mm1
-	paddw	mm7,mm4			; mm7=data2=(20 21 22 23)
-	paddw	mm1,mm2			; mm1=data4=(40 41 42 43)
-	psubw	mm5,mm4			; mm5=data5=(50 51 52 53)
-	psubw	mm0,mm2			; mm0=data3=(30 31 32 33)
+        paddw   mm2,mm4                 ; mm2=tmp4
+        movq    mm5,mm7
+        movq    mm0,mm1
+        paddw   mm7,mm4                 ; mm7=data2=(20 21 22 23)
+        paddw   mm1,mm2                 ; mm1=data4=(40 41 42 43)
+        psubw   mm5,mm4                 ; mm5=data5=(50 51 52 53)
+        psubw   mm0,mm2                 ; mm0=data3=(30 31 32 33)
 
-	movq      mm4,mm7		; transpose coefficients(phase 1)
-	punpcklwd mm7,mm0		; mm7=(20 30 21 31)
-	punpckhwd mm4,mm0		; mm4=(22 32 23 33)
-	movq      mm2,mm1		; transpose coefficients(phase 1)
-	punpcklwd mm1,mm5		; mm1=(40 50 41 51)
-	punpckhwd mm2,mm5		; mm2=(42 52 43 53)
+        movq      mm4,mm7               ; transpose coefficients(phase 1)
+        punpcklwd mm7,mm0               ; mm7=(20 30 21 31)
+        punpckhwd mm4,mm0               ; mm4=(22 32 23 33)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm5               ; mm1=(40 50 41 51)
+        punpckhwd mm2,mm5               ; mm2=(42 52 43 53)
 
-	movq      mm0,mm6		; transpose coefficients(phase 2)
-	punpckldq mm6,mm7		; mm6=(00 10 20 30)
-	punpckhdq mm0,mm7		; mm0=(01 11 21 31)
-	movq      mm5,mm3		; transpose coefficients(phase 2)
-	punpckldq mm3,mm4		; mm3=(02 12 22 32)
-	punpckhdq mm5,mm4		; mm5=(03 13 23 33)
+        movq      mm0,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm7               ; mm6=(00 10 20 30)
+        punpckhdq mm0,mm7               ; mm0=(01 11 21 31)
+        movq      mm5,mm3               ; transpose coefficients(phase 2)
+        punpckldq mm3,mm4               ; mm3=(02 12 22 32)
+        punpckhdq mm5,mm4               ; mm5=(03 13 23 33)
 
-	movq	mm7, MMWORD [wk(0)]	; mm7=(60 70 61 71)
-	movq	mm4, MMWORD [wk(1)]	; mm4=(62 72 63 73)
+        movq    mm7, MMWORD [wk(0)]     ; mm7=(60 70 61 71)
+        movq    mm4, MMWORD [wk(1)]     ; mm4=(62 72 63 73)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3
-	movq	MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
 
-	movq      mm6,mm1		; transpose coefficients(phase 2)
-	punpckldq mm1,mm7		; mm1=(40 50 60 70)
-	punpckhdq mm6,mm7		; mm6=(41 51 61 71)
-	movq      mm0,mm2		; transpose coefficients(phase 2)
-	punpckldq mm2,mm4		; mm2=(42 52 62 72)
-	punpckhdq mm0,mm4		; mm0=(43 53 63 73)
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm7               ; mm1=(40 50 60 70)
+        punpckhdq mm6,mm7               ; mm6=(41 51 61 71)
+        movq      mm0,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm4               ; mm2=(42 52 62 72)
+        punpckhdq mm0,mm4               ; mm0=(43 53 63 73)
 
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6
-	movq	MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0
 
 .nextcolumn:
-	add	esi, byte 4*SIZEOF_JCOEF		; coef_block
-	add	edx, byte 4*SIZEOF_IFAST_MULT_TYPE	; quantptr
-	add	edi, byte 4*DCTSIZE*SIZEOF_JCOEF	; wsptr
-	dec	ecx					; ctr
-	jnz	near .columnloop
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_IFAST_MULT_TYPE      ; quantptr
+        add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	lea	esi, [workspace]			; JCOEF * wsptr
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; JCOEF * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .rowloop:
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movq	mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
 
-	movq	mm4,mm0
-	movq	mm5,mm1
-	psubw	mm0,mm2			; mm0=tmp11
-	psubw	mm1,mm3
-	paddw	mm4,mm2			; mm4=tmp10
-	paddw	mm5,mm3			; mm5=tmp13
+        movq    mm4,mm0
+        movq    mm5,mm1
+        psubw   mm0,mm2                 ; mm0=tmp11
+        psubw   mm1,mm3
+        paddw   mm4,mm2                 ; mm4=tmp10
+        paddw   mm5,mm3                 ; mm5=tmp13
 
-	psllw	mm1,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm1,[GOTOFF(ebx,PW_F1414)]
-	psubw	mm1,mm5			; mm1=tmp12
+        psllw   mm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm1,[GOTOFF(ebx,PW_F1414)]
+        psubw   mm1,mm5                 ; mm1=tmp12
 
-	movq	mm6,mm4
-	movq	mm7,mm0
-	psubw	mm4,mm5			; mm4=tmp3
-	psubw	mm0,mm1			; mm0=tmp2
-	paddw	mm6,mm5			; mm6=tmp0
-	paddw	mm7,mm1			; mm7=tmp1
+        movq    mm6,mm4
+        movq    mm7,mm0
+        psubw   mm4,mm5                 ; mm4=tmp3
+        psubw   mm0,mm1                 ; mm0=tmp2
+        paddw   mm6,mm5                 ; mm6=tmp0
+        paddw   mm7,mm1                 ; mm7=tmp1
 
-	movq	MMWORD [wk(1)], mm4	; wk(1)=tmp3
-	movq	MMWORD [wk(0)], mm0	; wk(0)=tmp2
+        movq    MMWORD [wk(1)], mm4     ; wk(1)=tmp3
+        movq    MMWORD [wk(0)], mm0     ; wk(0)=tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	movq	mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
 
-	movq	mm4,mm2
-	movq	mm0,mm5
-	psubw	mm2,mm1			; mm2=z12
-	psubw	mm5,mm3			; mm5=z10
-	paddw	mm4,mm1			; mm4=z11
-	paddw	mm0,mm3			; mm0=z13
+        movq    mm4,mm2
+        movq    mm0,mm5
+        psubw   mm2,mm1                 ; mm2=z12
+        psubw   mm5,mm3                 ; mm5=z10
+        paddw   mm4,mm1                 ; mm4=z11
+        paddw   mm0,mm3                 ; mm0=z13
 
-	movq	mm1,mm5			; mm1=z10(unscaled)
-	psllw	mm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	mm5,PRE_MULTIPLY_SCALE_BITS
+        movq    mm1,mm5                 ; mm1=z10(unscaled)
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm5,PRE_MULTIPLY_SCALE_BITS
 
-	movq	mm3,mm4
-	psubw	mm4,mm0
-	paddw	mm3,mm0			; mm3=tmp7
+        movq    mm3,mm4
+        psubw   mm4,mm0
+        paddw   mm3,mm0                 ; mm3=tmp7
 
-	psllw	mm4,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	mm4,[GOTOFF(ebx,PW_F1414)]	; mm4=tmp11
+        psllw   mm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm4,[GOTOFF(ebx,PW_F1414)]      ; mm4=tmp11
 
-	; To avoid overflow...
-	;
-	; (Original)
-	; tmp12 = -2.613125930 * z10 + z5;
-	;
-	; (This implementation)
-	; tmp12 = (-1.613125930 - 1) * z10 + z5;
-	;       = -1.613125930 * z10 - z10 + z5;
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
 
-	movq	mm0,mm5
-	paddw	mm5,mm2
-	pmulhw	mm5,[GOTOFF(ebx,PW_F1847)]	; mm5=z5
-	pmulhw	mm0,[GOTOFF(ebx,PW_MF1613)]
-	pmulhw	mm2,[GOTOFF(ebx,PW_F1082)]
-	psubw	mm0,mm1
-	psubw	mm2,mm5			; mm2=tmp10
-	paddw	mm0,mm5			; mm0=tmp12
+        movq    mm0,mm5
+        paddw   mm5,mm2
+        pmulhw  mm5,[GOTOFF(ebx,PW_F1847)]      ; mm5=z5
+        pmulhw  mm0,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  mm2,[GOTOFF(ebx,PW_F1082)]
+        psubw   mm0,mm1
+        psubw   mm2,mm5                 ; mm2=tmp10
+        paddw   mm0,mm5                 ; mm0=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	psubw	mm0,mm3			; mm0=tmp6
-	movq	mm1,mm6
-	movq	mm5,mm7
-	paddw	mm6,mm3			; mm6=data0=(00 10 20 30)
-	paddw	mm7,mm0			; mm7=data1=(01 11 21 31)
-	psraw	mm6,(PASS1_BITS+3)	; descale
-	psraw	mm7,(PASS1_BITS+3)	; descale
-	psubw	mm1,mm3			; mm1=data7=(07 17 27 37)
-	psubw	mm5,mm0			; mm5=data6=(06 16 26 36)
-	psraw	mm1,(PASS1_BITS+3)	; descale
-	psraw	mm5,(PASS1_BITS+3)	; descale
-	psubw	mm4,mm0			; mm4=tmp5
+        psubw   mm0,mm3                 ; mm0=tmp6
+        movq    mm1,mm6
+        movq    mm5,mm7
+        paddw   mm6,mm3                 ; mm6=data0=(00 10 20 30)
+        paddw   mm7,mm0                 ; mm7=data1=(01 11 21 31)
+        psraw   mm6,(PASS1_BITS+3)      ; descale
+        psraw   mm7,(PASS1_BITS+3)      ; descale
+        psubw   mm1,mm3                 ; mm1=data7=(07 17 27 37)
+        psubw   mm5,mm0                 ; mm5=data6=(06 16 26 36)
+        psraw   mm1,(PASS1_BITS+3)      ; descale
+        psraw   mm5,(PASS1_BITS+3)      ; descale
+        psubw   mm4,mm0                 ; mm4=tmp5
 
-	packsswb  mm6,mm5		; mm6=(00 10 20 30 06 16 26 36)
-	packsswb  mm7,mm1		; mm7=(01 11 21 31 07 17 27 37)
+        packsswb  mm6,mm5               ; mm6=(00 10 20 30 06 16 26 36)
+        packsswb  mm7,mm1               ; mm7=(01 11 21 31 07 17 27 37)
 
-	movq	mm3, MMWORD [wk(0)]	; mm3=tmp2
-	movq	mm0, MMWORD [wk(1)]	; mm0=tmp3
+        movq    mm3, MMWORD [wk(0)]     ; mm3=tmp2
+        movq    mm0, MMWORD [wk(1)]     ; mm0=tmp3
 
-	paddw	mm2,mm4			; mm2=tmp4
-	movq	mm5,mm3
-	movq	mm1,mm0
-	paddw	mm3,mm4			; mm3=data2=(02 12 22 32)
-	paddw	mm0,mm2			; mm0=data4=(04 14 24 34)
-	psraw	mm3,(PASS1_BITS+3)	; descale
-	psraw	mm0,(PASS1_BITS+3)	; descale
-	psubw	mm5,mm4			; mm5=data5=(05 15 25 35)
-	psubw	mm1,mm2			; mm1=data3=(03 13 23 33)
-	psraw	mm5,(PASS1_BITS+3)	; descale
-	psraw	mm1,(PASS1_BITS+3)	; descale
+        paddw   mm2,mm4                 ; mm2=tmp4
+        movq    mm5,mm3
+        movq    mm1,mm0
+        paddw   mm3,mm4                 ; mm3=data2=(02 12 22 32)
+        paddw   mm0,mm2                 ; mm0=data4=(04 14 24 34)
+        psraw   mm3,(PASS1_BITS+3)      ; descale
+        psraw   mm0,(PASS1_BITS+3)      ; descale
+        psubw   mm5,mm4                 ; mm5=data5=(05 15 25 35)
+        psubw   mm1,mm2                 ; mm1=data3=(03 13 23 33)
+        psraw   mm5,(PASS1_BITS+3)      ; descale
+        psraw   mm1,(PASS1_BITS+3)      ; descale
 
-	movq      mm4,[GOTOFF(ebx,PB_CENTERJSAMP)]	; mm4=[PB_CENTERJSAMP]
+        movq      mm4,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm4=[PB_CENTERJSAMP]
 
-	packsswb  mm3,mm0		; mm3=(02 12 22 32 04 14 24 34)
-	packsswb  mm1,mm5		; mm1=(03 13 23 33 05 15 25 35)
+        packsswb  mm3,mm0               ; mm3=(02 12 22 32 04 14 24 34)
+        packsswb  mm1,mm5               ; mm1=(03 13 23 33 05 15 25 35)
 
-	paddb     mm6,mm4
-	paddb     mm7,mm4
-	paddb     mm3,mm4
-	paddb     mm1,mm4
+        paddb     mm6,mm4
+        paddb     mm7,mm4
+        paddb     mm3,mm4
+        paddb     mm1,mm4
 
-	movq      mm2,mm6		; transpose coefficients(phase 1)
-	punpcklbw mm6,mm7		; mm6=(00 01 10 11 20 21 30 31)
-	punpckhbw mm2,mm7		; mm2=(06 07 16 17 26 27 36 37)
-	movq      mm0,mm3		; transpose coefficients(phase 1)
-	punpcklbw mm3,mm1		; mm3=(02 03 12 13 22 23 32 33)
-	punpckhbw mm0,mm1		; mm0=(04 05 14 15 24 25 34 35)
+        movq      mm2,mm6               ; transpose coefficients(phase 1)
+        punpcklbw mm6,mm7               ; mm6=(00 01 10 11 20 21 30 31)
+        punpckhbw mm2,mm7               ; mm2=(06 07 16 17 26 27 36 37)
+        movq      mm0,mm3               ; transpose coefficients(phase 1)
+        punpcklbw mm3,mm1               ; mm3=(02 03 12 13 22 23 32 33)
+        punpckhbw mm0,mm1               ; mm0=(04 05 14 15 24 25 34 35)
 
-	movq      mm5,mm6		; transpose coefficients(phase 2)
-	punpcklwd mm6,mm3		; mm6=(00 01 02 03 10 11 12 13)
-	punpckhwd mm5,mm3		; mm5=(20 21 22 23 30 31 32 33)
-	movq      mm4,mm0		; transpose coefficients(phase 2)
-	punpcklwd mm0,mm2		; mm0=(04 05 06 07 14 15 16 17)
-	punpckhwd mm4,mm2		; mm4=(24 25 26 27 34 35 36 37)
+        movq      mm5,mm6               ; transpose coefficients(phase 2)
+        punpcklwd mm6,mm3               ; mm6=(00 01 02 03 10 11 12 13)
+        punpckhwd mm5,mm3               ; mm5=(20 21 22 23 30 31 32 33)
+        movq      mm4,mm0               ; transpose coefficients(phase 2)
+        punpcklwd mm0,mm2               ; mm0=(04 05 06 07 14 15 16 17)
+        punpckhwd mm4,mm2               ; mm4=(24 25 26 27 34 35 36 37)
 
-	movq      mm7,mm6		; transpose coefficients(phase 3)
-	punpckldq mm6,mm0		; mm6=(00 01 02 03 04 05 06 07)
-	punpckhdq mm7,mm0		; mm7=(10 11 12 13 14 15 16 17)
-	movq      mm1,mm5		; transpose coefficients(phase 3)
-	punpckldq mm5,mm4		; mm5=(20 21 22 23 24 25 26 27)
-	punpckhdq mm1,mm4		; mm1=(30 31 32 33 34 35 36 37)
+        movq      mm7,mm6               ; transpose coefficients(phase 3)
+        punpckldq mm6,mm0               ; mm6=(00 01 02 03 04 05 06 07)
+        punpckhdq mm7,mm0               ; mm7=(10 11 12 13 14 15 16 17)
+        movq      mm1,mm5               ; transpose coefficients(phase 3)
+        punpckldq mm5,mm4               ; mm5=(20 21 22 23 24 25 26 27)
+        punpckhdq mm1,mm4               ; mm1=(30 31 32 33 34 35 36 37)
 
-	pushpic	ebx			; save GOT address
+        pushpic ebx                     ; save GOT address
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	movq	MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
-	movq	MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
-	mov	edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movq	MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
-	movq	MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
 
-	poppic	ebx			; restore GOT address
+        poppic  ebx                     ; restore GOT address
 
-	add	esi, byte 4*SIZEOF_JCOEF	; wsptr
-	add	edi, byte 4*SIZEOF_JSAMPROW
-	dec	ecx				; ctr
-	jnz	near .rowloop
+        add     esi, byte 4*SIZEOF_JCOEF        ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jimmxint.asm b/simd/jimmxint.asm
index 7b52fae..75b9ea8 100644
--- a/simd/jimmxint.asm
+++ b/simd/jimmxint.asm
@@ -26,67 +26,67 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1	(CONST_BITS-PASS1_BITS)
-%define DESCALE_P2	(CONST_BITS+PASS1_BITS+3)
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS+3)
 
 %if CONST_BITS == 13
-F_0_298	equ	 2446		; FIX(0.298631336)
-F_0_390	equ	 3196		; FIX(0.390180644)
-F_0_541	equ	 4433		; FIX(0.541196100)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_175	equ	 9633		; FIX(1.175875602)
-F_1_501	equ	12299		; FIX(1.501321110)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_1_961	equ	16069		; FIX(1.961570560)
-F_2_053	equ	16819		; FIX(2.053119869)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_072	equ	25172		; FIX(3.072711026)
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_298	equ	DESCALE( 320652955,30-CONST_BITS)	; FIX(0.298631336)
-F_0_390	equ	DESCALE( 418953276,30-CONST_BITS)	; FIX(0.390180644)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_175	equ	DESCALE(1262586813,30-CONST_BITS)	; FIX(1.175875602)
-F_1_501	equ	DESCALE(1612031267,30-CONST_BITS)	; FIX(1.501321110)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_1_961	equ	DESCALE(2106220350,30-CONST_BITS)	; FIX(1.961570560)
-F_2_053	equ	DESCALE(2204520673,30-CONST_BITS)	; FIX(2.053119869)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_072	equ	DESCALE(3299298341,30-CONST_BITS)	; FIX(3.072711026)
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_islow_mmx)
+        alignz  16
+        global  EXTN(jconst_idct_islow_mmx)
 
 EXTN(jconst_idct_islow_mmx):
 
-PW_F130_F054	times 2 dw  (F_0_541+F_0_765), F_0_541
-PW_F054_MF130	times 2 dw  F_0_541, (F_0_541-F_1_847)
-PW_MF078_F117	times 2 dw  (F_1_175-F_1_961), F_1_175
-PW_F117_F078	times 2 dw  F_1_175, (F_1_175-F_0_390)
-PW_MF060_MF089	times 2 dw  (F_0_298-F_0_899),-F_0_899
-PW_MF089_F060	times 2 dw -F_0_899, (F_1_501-F_0_899)
-PW_MF050_MF256	times 2 dw  (F_2_053-F_2_562),-F_2_562
-PW_MF256_F050	times 2 dw -F_2_562, (F_3_072-F_2_562)
-PD_DESCALE_P1	times 2 dd  1 << (DESCALE_P1-1)
-PD_DESCALE_P2	times 2 dd  1 << (DESCALE_P2-1)
-PB_CENTERJSAMP	times 8 db  CENTERJSAMPLE
+PW_F130_F054    times 2 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 2 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 2 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 2 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 2 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 2 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 2 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 2 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 2 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 2 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -95,758 +95,758 @@
 ;                       JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; jpeg_component_info * compptr
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		12
-%define workspace	wk(0)-DCTSIZE2*SIZEOF_JCOEF
-					; JCOEF workspace[DCTSIZE2]
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          12
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                        ; JCOEF workspace[DCTSIZE2]
 
-	align	16
-	global	EXTN(jsimd_idct_islow_mmx)
+        align   16
+        global  EXTN(jsimd_idct_islow_mmx)
 
 EXTN(jsimd_idct_islow_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [workspace]
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input, store into work array.
+        ; ---- Pass 1: process columns from input, store into work array.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
-	lea	edi, [workspace]			; JCOEF * wsptr
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; JCOEF * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	short .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
 
-	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	mm1,mm0
-	packsswb mm1,mm1
-	movd	eax,mm1
-	test	eax,eax
-	jnz	short .columnDCT
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm1,mm0
+        packsswb mm1,mm1
+        movd    eax,mm1
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	psllw	mm0,PASS1_BITS
+        psllw   mm0,PASS1_BITS
 
-	movq      mm2,mm0		; mm0=in0=(00 01 02 03)
-	punpcklwd mm0,mm0		; mm0=(00 00 01 01)
-	punpckhwd mm2,mm2		; mm2=(02 02 03 03)
+        movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+        punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+        punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
 
-	movq      mm1,mm0
-	punpckldq mm0,mm0		; mm0=(00 00 00 00)
-	punpckhdq mm1,mm1		; mm1=(01 01 01 01)
-	movq      mm3,mm2
-	punpckldq mm2,mm2		; mm2=(02 02 02 02)
-	punpckhdq mm3,mm3		; mm3=(03 03 03 03)
+        movq      mm1,mm0
+        punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+        punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+        movq      mm3,mm2
+        punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+        punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
-	movq	MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
-	jmp	near .nextcolumn
-	alignx	16,7
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
+        jmp     near .nextcolumn
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movq	mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; (Original)
-	; z1 = (z2 + z3) * 0.541196100;
-	; tmp2 = z1 + z3 * -1.847759065;
-	; tmp3 = z1 + z2 * 0.765366865;
-	;
-	; (This implementation)
-	; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
-	; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
 
-	movq      mm4,mm1		; mm1=in2=z2
-	movq      mm5,mm1
-	punpcklwd mm4,mm3		; mm3=in6=z3
-	punpckhwd mm5,mm3
-	movq      mm1,mm4
-	movq      mm3,mm5
-	pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]	; mm4=tmp3L
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]	; mm5=tmp3H
-	pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]	; mm1=tmp2L
-	pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]	; mm3=tmp2H
+        movq      mm4,mm1               ; mm1=in2=z2
+        movq      mm5,mm1
+        punpcklwd mm4,mm3               ; mm3=in6=z3
+        punpckhwd mm5,mm3
+        movq      mm1,mm4
+        movq      mm3,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=tmp3L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]        ; mm5=tmp3H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]       ; mm3=tmp2H
 
-	movq      mm6,mm0
-	paddw     mm0,mm2		; mm0=in0+in4
-	psubw     mm6,mm2		; mm6=in0-in4
+        movq      mm6,mm0
+        paddw     mm0,mm2               ; mm0=in0+in4
+        psubw     mm6,mm2               ; mm6=in0-in4
 
-	pxor      mm7,mm7
-	pxor      mm2,mm2
-	punpcklwd mm7,mm0		; mm7=tmp0L
-	punpckhwd mm2,mm0		; mm2=tmp0H
-	psrad     mm7,(16-CONST_BITS)	; psrad mm7,16 & pslld mm7,CONST_BITS
-	psrad     mm2,(16-CONST_BITS)	; psrad mm2,16 & pslld mm2,CONST_BITS
+        pxor      mm7,mm7
+        pxor      mm2,mm2
+        punpcklwd mm7,mm0               ; mm7=tmp0L
+        punpckhwd mm2,mm0               ; mm2=tmp0H
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+        psrad     mm2,(16-CONST_BITS)   ; psrad mm2,16 & pslld mm2,CONST_BITS
 
-	movq	mm0,mm7
-	paddd	mm7,mm4			; mm7=tmp10L
-	psubd	mm0,mm4			; mm0=tmp13L
-	movq	mm4,mm2
-	paddd	mm2,mm5			; mm2=tmp10H
-	psubd	mm4,mm5			; mm4=tmp13H
+        movq    mm0,mm7
+        paddd   mm7,mm4                 ; mm7=tmp10L
+        psubd   mm0,mm4                 ; mm0=tmp13L
+        movq    mm4,mm2
+        paddd   mm2,mm5                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp13H
 
-	movq	MMWORD [wk(0)], mm7	; wk(0)=tmp10L
-	movq	MMWORD [wk(1)], mm2	; wk(1)=tmp10H
-	movq	MMWORD [wk(2)], mm0	; wk(2)=tmp13L
-	movq	MMWORD [wk(3)], mm4	; wk(3)=tmp13H
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp10L
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=tmp10H
+        movq    MMWORD [wk(2)], mm0     ; wk(2)=tmp13L
+        movq    MMWORD [wk(3)], mm4     ; wk(3)=tmp13H
 
-	pxor      mm5,mm5
-	pxor      mm7,mm7
-	punpcklwd mm5,mm6		; mm5=tmp1L
-	punpckhwd mm7,mm6		; mm7=tmp1H
-	psrad     mm5,(16-CONST_BITS)	; psrad mm5,16 & pslld mm5,CONST_BITS
-	psrad     mm7,(16-CONST_BITS)	; psrad mm7,16 & pslld mm7,CONST_BITS
+        pxor      mm5,mm5
+        pxor      mm7,mm7
+        punpcklwd mm5,mm6               ; mm5=tmp1L
+        punpckhwd mm7,mm6               ; mm7=tmp1H
+        psrad     mm5,(16-CONST_BITS)   ; psrad mm5,16 & pslld mm5,CONST_BITS
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
 
-	movq	mm2,mm5
-	paddd	mm5,mm1			; mm5=tmp11L
-	psubd	mm2,mm1			; mm2=tmp12L
-	movq	mm0,mm7
-	paddd	mm7,mm3			; mm7=tmp11H
-	psubd	mm0,mm3			; mm0=tmp12H
+        movq    mm2,mm5
+        paddd   mm5,mm1                 ; mm5=tmp11L
+        psubd   mm2,mm1                 ; mm2=tmp12L
+        movq    mm0,mm7
+        paddd   mm7,mm3                 ; mm7=tmp11H
+        psubd   mm0,mm3                 ; mm0=tmp12H
 
-	movq	MMWORD [wk(4)], mm5	; wk(4)=tmp11L
-	movq	MMWORD [wk(5)], mm7	; wk(5)=tmp11H
-	movq	MMWORD [wk(6)], mm2	; wk(6)=tmp12L
-	movq	MMWORD [wk(7)], mm0	; wk(7)=tmp12H
+        movq    MMWORD [wk(4)], mm5     ; wk(4)=tmp11L
+        movq    MMWORD [wk(5)], mm7     ; wk(5)=tmp11H
+        movq    MMWORD [wk(6)], mm2     ; wk(6)=tmp12L
+        movq    MMWORD [wk(7)], mm0     ; wk(7)=tmp12H
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movq	mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movq	mm5,mm6
-	movq	mm7,mm4
-	paddw	mm5,mm3			; mm5=z3
-	paddw	mm7,mm1			; mm7=z4
+        movq    mm5,mm6
+        movq    mm7,mm4
+        paddw   mm5,mm3                 ; mm5=z3
+        paddw   mm7,mm1                 ; mm7=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movq      mm2,mm5
-	movq      mm0,mm5
-	punpcklwd mm2,mm7
-	punpckhwd mm0,mm7
-	movq      mm5,mm2
-	movq      mm7,mm0
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]	; mm2=z3L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]	; mm0=z3H
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]	; mm5=z4L
-	pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]	; mm7=z4H
+        movq      mm2,mm5
+        movq      mm0,mm5
+        punpcklwd mm2,mm7
+        punpckhwd mm0,mm7
+        movq      mm5,mm2
+        movq      mm7,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]       ; mm2=z3L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]       ; mm0=z3H
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]        ; mm5=z4L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]        ; mm7=z4H
 
-	movq	MMWORD [wk(10)], mm2	; wk(10)=z3L
-	movq	MMWORD [wk(11)], mm0	; wk(11)=z3H
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=z3L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=z3H
 
-	; (Original)
-	; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
-	; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
-	; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; tmp0 += z1 + z3;  tmp1 += z2 + z4;
-	; tmp2 += z2 + z3;  tmp3 += z1 + z4;
-	;
-	; (This implementation)
-	; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
-	; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
-	; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
-	; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
-	; tmp0 += z3;  tmp1 += z4;
-	; tmp2 += z3;  tmp3 += z4;
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
 
-	movq      mm2,mm3
-	movq      mm0,mm3
-	punpcklwd mm2,mm4
-	punpckhwd mm0,mm4
-	movq      mm3,mm2
-	movq      mm4,mm0
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]	; mm2=tmp0L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]	; mm0=tmp0H
-	pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]	; mm3=tmp3L
-	pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]	; mm4=tmp3H
+        movq      mm2,mm3
+        movq      mm0,mm3
+        punpcklwd mm2,mm4
+        punpckhwd mm0,mm4
+        movq      mm3,mm2
+        movq      mm4,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm2=tmp0L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm0=tmp0H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]       ; mm3=tmp3L
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]       ; mm4=tmp3H
 
-	paddd	mm2, MMWORD [wk(10)]	; mm2=tmp0L
-	paddd	mm0, MMWORD [wk(11)]	; mm0=tmp0H
-	paddd	mm3,mm5			; mm3=tmp3L
-	paddd	mm4,mm7			; mm4=tmp3H
+        paddd   mm2, MMWORD [wk(10)]    ; mm2=tmp0L
+        paddd   mm0, MMWORD [wk(11)]    ; mm0=tmp0H
+        paddd   mm3,mm5                 ; mm3=tmp3L
+        paddd   mm4,mm7                 ; mm4=tmp3H
 
-	movq	MMWORD [wk(8)], mm2	; wk(8)=tmp0L
-	movq	MMWORD [wk(9)], mm0	; wk(9)=tmp0H
+        movq    MMWORD [wk(8)], mm2     ; wk(8)=tmp0L
+        movq    MMWORD [wk(9)], mm0     ; wk(9)=tmp0H
 
-	movq      mm2,mm1
-	movq      mm0,mm1
-	punpcklwd mm2,mm6
-	punpckhwd mm0,mm6
-	movq      mm1,mm2
-	movq      mm6,mm0
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]	; mm2=tmp1L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]	; mm0=tmp1H
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]	; mm1=tmp2L
-	pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]	; mm6=tmp2H
+        movq      mm2,mm1
+        movq      mm0,mm1
+        punpcklwd mm2,mm6
+        punpckhwd mm0,mm6
+        movq      mm1,mm2
+        movq      mm6,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm2=tmp1L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm0=tmp1H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]       ; mm1=tmp2L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]       ; mm6=tmp2H
 
-	paddd	mm2,mm5			; mm2=tmp1L
-	paddd	mm0,mm7			; mm0=tmp1H
-	paddd	mm1, MMWORD [wk(10)]	; mm1=tmp2L
-	paddd	mm6, MMWORD [wk(11)]	; mm6=tmp2H
+        paddd   mm2,mm5                 ; mm2=tmp1L
+        paddd   mm0,mm7                 ; mm0=tmp1H
+        paddd   mm1, MMWORD [wk(10)]    ; mm1=tmp2L
+        paddd   mm6, MMWORD [wk(11)]    ; mm6=tmp2H
 
-	movq	MMWORD [wk(10)], mm2	; wk(10)=tmp1L
-	movq	MMWORD [wk(11)], mm0	; wk(11)=tmp1H
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=tmp1L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=tmp1H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movq	mm5, MMWORD [wk(0)]	; mm5=tmp10L
-	movq	mm7, MMWORD [wk(1)]	; mm7=tmp10H
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp10L
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp10H
 
-	movq	mm2,mm5
-	movq	mm0,mm7
-	paddd	mm5,mm3			; mm5=data0L
-	paddd	mm7,mm4			; mm7=data0H
-	psubd	mm2,mm3			; mm2=data7L
-	psubd	mm0,mm4			; mm0=data7H
+        movq    mm2,mm5
+        movq    mm0,mm7
+        paddd   mm5,mm3                 ; mm5=data0L
+        paddd   mm7,mm4                 ; mm7=data0H
+        psubd   mm2,mm3                 ; mm2=data7L
+        psubd   mm0,mm4                 ; mm0=data7H
 
-	movq	mm3,[GOTOFF(ebx,PD_DESCALE_P1)]	; mm3=[PD_DESCALE_P1]
+        movq    mm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1]
 
-	paddd	mm5,mm3
-	paddd	mm7,mm3
-	psrad	mm5,DESCALE_P1
-	psrad	mm7,DESCALE_P1
-	paddd	mm2,mm3
-	paddd	mm0,mm3
-	psrad	mm2,DESCALE_P1
-	psrad	mm0,DESCALE_P1
+        paddd   mm5,mm3
+        paddd   mm7,mm3
+        psrad   mm5,DESCALE_P1
+        psrad   mm7,DESCALE_P1
+        paddd   mm2,mm3
+        paddd   mm0,mm3
+        psrad   mm2,DESCALE_P1
+        psrad   mm0,DESCALE_P1
 
-	packssdw  mm5,mm7		; mm5=data0=(00 01 02 03)
-	packssdw  mm2,mm0		; mm2=data7=(70 71 72 73)
+        packssdw  mm5,mm7               ; mm5=data0=(00 01 02 03)
+        packssdw  mm2,mm0               ; mm2=data7=(70 71 72 73)
 
-	movq	mm4, MMWORD [wk(4)]	; mm4=tmp11L
-	movq	mm3, MMWORD [wk(5)]	; mm3=tmp11H
+        movq    mm4, MMWORD [wk(4)]     ; mm4=tmp11L
+        movq    mm3, MMWORD [wk(5)]     ; mm3=tmp11H
 
-	movq	mm7,mm4
-	movq	mm0,mm3
-	paddd	mm4,mm1			; mm4=data1L
-	paddd	mm3,mm6			; mm3=data1H
-	psubd	mm7,mm1			; mm7=data6L
-	psubd	mm0,mm6			; mm0=data6H
+        movq    mm7,mm4
+        movq    mm0,mm3
+        paddd   mm4,mm1                 ; mm4=data1L
+        paddd   mm3,mm6                 ; mm3=data1H
+        psubd   mm7,mm1                 ; mm7=data6L
+        psubd   mm0,mm6                 ; mm0=data6H
 
-	movq	mm1,[GOTOFF(ebx,PD_DESCALE_P1)]	; mm1=[PD_DESCALE_P1]
+        movq    mm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1]
 
-	paddd	mm4,mm1
-	paddd	mm3,mm1
-	psrad	mm4,DESCALE_P1
-	psrad	mm3,DESCALE_P1
-	paddd	mm7,mm1
-	paddd	mm0,mm1
-	psrad	mm7,DESCALE_P1
-	psrad	mm0,DESCALE_P1
+        paddd   mm4,mm1
+        paddd   mm3,mm1
+        psrad   mm4,DESCALE_P1
+        psrad   mm3,DESCALE_P1
+        paddd   mm7,mm1
+        paddd   mm0,mm1
+        psrad   mm7,DESCALE_P1
+        psrad   mm0,DESCALE_P1
 
-	packssdw  mm4,mm3		; mm4=data1=(10 11 12 13)
-	packssdw  mm7,mm0		; mm7=data6=(60 61 62 63)
+        packssdw  mm4,mm3               ; mm4=data1=(10 11 12 13)
+        packssdw  mm7,mm0               ; mm7=data6=(60 61 62 63)
 
-	movq      mm6,mm5		; transpose coefficients(phase 1)
-	punpcklwd mm5,mm4		; mm5=(00 10 01 11)
-	punpckhwd mm6,mm4		; mm6=(02 12 03 13)
-	movq      mm1,mm7		; transpose coefficients(phase 1)
-	punpcklwd mm7,mm2		; mm7=(60 70 61 71)
-	punpckhwd mm1,mm2		; mm1=(62 72 63 73)
+        movq      mm6,mm5               ; transpose coefficients(phase 1)
+        punpcklwd mm5,mm4               ; mm5=(00 10 01 11)
+        punpckhwd mm6,mm4               ; mm6=(02 12 03 13)
+        movq      mm1,mm7               ; transpose coefficients(phase 1)
+        punpcklwd mm7,mm2               ; mm7=(60 70 61 71)
+        punpckhwd mm1,mm2               ; mm1=(62 72 63 73)
 
-	movq	mm3, MMWORD [wk(6)]	; mm3=tmp12L
-	movq	mm0, MMWORD [wk(7)]	; mm0=tmp12H
-	movq	mm4, MMWORD [wk(10)]	; mm4=tmp1L
-	movq	mm2, MMWORD [wk(11)]	; mm2=tmp1H
+        movq    mm3, MMWORD [wk(6)]     ; mm3=tmp12L
+        movq    mm0, MMWORD [wk(7)]     ; mm0=tmp12H
+        movq    mm4, MMWORD [wk(10)]    ; mm4=tmp1L
+        movq    mm2, MMWORD [wk(11)]    ; mm2=tmp1H
 
-	movq	MMWORD [wk(0)], mm5	; wk(0)=(00 10 01 11)
-	movq	MMWORD [wk(1)], mm6	; wk(1)=(02 12 03 13)
-	movq	MMWORD [wk(4)], mm7	; wk(4)=(60 70 61 71)
-	movq	MMWORD [wk(5)], mm1	; wk(5)=(62 72 63 73)
+        movq    MMWORD [wk(0)], mm5     ; wk(0)=(00 10 01 11)
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=(02 12 03 13)
+        movq    MMWORD [wk(4)], mm7     ; wk(4)=(60 70 61 71)
+        movq    MMWORD [wk(5)], mm1     ; wk(5)=(62 72 63 73)
 
-	movq	mm5,mm3
-	movq	mm6,mm0
-	paddd	mm3,mm4			; mm3=data2L
-	paddd	mm0,mm2			; mm0=data2H
-	psubd	mm5,mm4			; mm5=data5L
-	psubd	mm6,mm2			; mm6=data5H
+        movq    mm5,mm3
+        movq    mm6,mm0
+        paddd   mm3,mm4                 ; mm3=data2L
+        paddd   mm0,mm2                 ; mm0=data2H
+        psubd   mm5,mm4                 ; mm5=data5L
+        psubd   mm6,mm2                 ; mm6=data5H
 
-	movq	mm7,[GOTOFF(ebx,PD_DESCALE_P1)]	; mm7=[PD_DESCALE_P1]
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1]
 
-	paddd	mm3,mm7
-	paddd	mm0,mm7
-	psrad	mm3,DESCALE_P1
-	psrad	mm0,DESCALE_P1
-	paddd	mm5,mm7
-	paddd	mm6,mm7
-	psrad	mm5,DESCALE_P1
-	psrad	mm6,DESCALE_P1
+        paddd   mm3,mm7
+        paddd   mm0,mm7
+        psrad   mm3,DESCALE_P1
+        psrad   mm0,DESCALE_P1
+        paddd   mm5,mm7
+        paddd   mm6,mm7
+        psrad   mm5,DESCALE_P1
+        psrad   mm6,DESCALE_P1
 
-	packssdw  mm3,mm0		; mm3=data2=(20 21 22 23)
-	packssdw  mm5,mm6		; mm5=data5=(50 51 52 53)
+        packssdw  mm3,mm0               ; mm3=data2=(20 21 22 23)
+        packssdw  mm5,mm6               ; mm5=data5=(50 51 52 53)
 
-	movq	mm1, MMWORD [wk(2)]	; mm1=tmp13L
-	movq	mm4, MMWORD [wk(3)]	; mm4=tmp13H
-	movq	mm2, MMWORD [wk(8)]	; mm2=tmp0L
-	movq	mm7, MMWORD [wk(9)]	; mm7=tmp0H
+        movq    mm1, MMWORD [wk(2)]     ; mm1=tmp13L
+        movq    mm4, MMWORD [wk(3)]     ; mm4=tmp13H
+        movq    mm2, MMWORD [wk(8)]     ; mm2=tmp0L
+        movq    mm7, MMWORD [wk(9)]     ; mm7=tmp0H
 
-	movq	mm0,mm1
-	movq	mm6,mm4
-	paddd	mm1,mm2			; mm1=data3L
-	paddd	mm4,mm7			; mm4=data3H
-	psubd	mm0,mm2			; mm0=data4L
-	psubd	mm6,mm7			; mm6=data4H
+        movq    mm0,mm1
+        movq    mm6,mm4
+        paddd   mm1,mm2                 ; mm1=data3L
+        paddd   mm4,mm7                 ; mm4=data3H
+        psubd   mm0,mm2                 ; mm0=data4L
+        psubd   mm6,mm7                 ; mm6=data4H
 
-	movq	mm2,[GOTOFF(ebx,PD_DESCALE_P1)]	; mm2=[PD_DESCALE_P1]
+        movq    mm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1]
 
-	paddd	mm1,mm2
-	paddd	mm4,mm2
-	psrad	mm1,DESCALE_P1
-	psrad	mm4,DESCALE_P1
-	paddd	mm0,mm2
-	paddd	mm6,mm2
-	psrad	mm0,DESCALE_P1
-	psrad	mm6,DESCALE_P1
+        paddd   mm1,mm2
+        paddd   mm4,mm2
+        psrad   mm1,DESCALE_P1
+        psrad   mm4,DESCALE_P1
+        paddd   mm0,mm2
+        paddd   mm6,mm2
+        psrad   mm0,DESCALE_P1
+        psrad   mm6,DESCALE_P1
 
-	packssdw  mm1,mm4		; mm1=data3=(30 31 32 33)
-	packssdw  mm0,mm6		; mm0=data4=(40 41 42 43)
+        packssdw  mm1,mm4               ; mm1=data3=(30 31 32 33)
+        packssdw  mm0,mm6               ; mm0=data4=(40 41 42 43)
 
-	movq	mm7, MMWORD [wk(0)]	; mm7=(00 10 01 11)
-	movq	mm2, MMWORD [wk(1)]	; mm2=(02 12 03 13)
+        movq    mm7, MMWORD [wk(0)]     ; mm7=(00 10 01 11)
+        movq    mm2, MMWORD [wk(1)]     ; mm2=(02 12 03 13)
 
-	movq      mm4,mm3		; transpose coefficients(phase 1)
-	punpcklwd mm3,mm1		; mm3=(20 30 21 31)
-	punpckhwd mm4,mm1		; mm4=(22 32 23 33)
-	movq      mm6,mm0		; transpose coefficients(phase 1)
-	punpcklwd mm0,mm5		; mm0=(40 50 41 51)
-	punpckhwd mm6,mm5		; mm6=(42 52 43 53)
+        movq      mm4,mm3               ; transpose coefficients(phase 1)
+        punpcklwd mm3,mm1               ; mm3=(20 30 21 31)
+        punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+        movq      mm6,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm5               ; mm0=(40 50 41 51)
+        punpckhwd mm6,mm5               ; mm6=(42 52 43 53)
 
-	movq      mm1,mm7		; transpose coefficients(phase 2)
-	punpckldq mm7,mm3		; mm7=(00 10 20 30)
-	punpckhdq mm1,mm3		; mm1=(01 11 21 31)
-	movq      mm5,mm2		; transpose coefficients(phase 2)
-	punpckldq mm2,mm4		; mm2=(02 12 22 32)
-	punpckhdq mm5,mm4		; mm5=(03 13 23 33)
+        movq      mm1,mm7               ; transpose coefficients(phase 2)
+        punpckldq mm7,mm3               ; mm7=(00 10 20 30)
+        punpckhdq mm1,mm3               ; mm1=(01 11 21 31)
+        movq      mm5,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm4               ; mm2=(02 12 22 32)
+        punpckhdq mm5,mm4               ; mm5=(03 13 23 33)
 
-	movq	mm3, MMWORD [wk(4)]	; mm3=(60 70 61 71)
-	movq	mm4, MMWORD [wk(5)]	; mm4=(62 72 63 73)
+        movq    mm3, MMWORD [wk(4)]     ; mm3=(60 70 61 71)
+        movq    mm4, MMWORD [wk(5)]     ; mm4=(62 72 63 73)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
 
-	movq      mm7,mm0		; transpose coefficients(phase 2)
-	punpckldq mm0,mm3		; mm0=(40 50 60 70)
-	punpckhdq mm7,mm3		; mm7=(41 51 61 71)
-	movq      mm1,mm6		; transpose coefficients(phase 2)
-	punpckldq mm6,mm4		; mm6=(42 52 62 72)
-	punpckhdq mm1,mm4		; mm1=(43 53 63 73)
+        movq      mm7,mm0               ; transpose coefficients(phase 2)
+        punpckldq mm0,mm3               ; mm0=(40 50 60 70)
+        punpckhdq mm7,mm3               ; mm7=(41 51 61 71)
+        movq      mm1,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm4               ; mm6=(42 52 62 72)
+        punpckhdq mm1,mm4               ; mm1=(43 53 63 73)
 
-	movq	MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7
-	movq	MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6
-	movq	MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1
 
 .nextcolumn:
-	add	esi, byte 4*SIZEOF_JCOEF		; coef_block
-	add	edx, byte 4*SIZEOF_ISLOW_MULT_TYPE	; quantptr
-	add	edi, byte 4*DCTSIZE*SIZEOF_JCOEF	; wsptr
-	dec	ecx					; ctr
-	jnz	near .columnloop
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_ISLOW_MULT_TYPE      ; quantptr
+        add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	lea	esi, [workspace]			; JCOEF * wsptr
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; JCOEF * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .rowloop:
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movq	mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
 
-	; (Original)
-	; z1 = (z2 + z3) * 0.541196100;
-	; tmp2 = z1 + z3 * -1.847759065;
-	; tmp3 = z1 + z2 * 0.765366865;
-	;
-	; (This implementation)
-	; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
-	; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
 
-	movq      mm4,mm1		; mm1=in2=z2
-	movq      mm5,mm1
-	punpcklwd mm4,mm3		; mm3=in6=z3
-	punpckhwd mm5,mm3
-	movq      mm1,mm4
-	movq      mm3,mm5
-	pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]	; mm4=tmp3L
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]	; mm5=tmp3H
-	pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]	; mm1=tmp2L
-	pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]	; mm3=tmp2H
+        movq      mm4,mm1               ; mm1=in2=z2
+        movq      mm5,mm1
+        punpcklwd mm4,mm3               ; mm3=in6=z3
+        punpckhwd mm5,mm3
+        movq      mm1,mm4
+        movq      mm3,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=tmp3L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]        ; mm5=tmp3H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]       ; mm3=tmp2H
 
-	movq      mm6,mm0
-	paddw     mm0,mm2		; mm0=in0+in4
-	psubw     mm6,mm2		; mm6=in0-in4
+        movq      mm6,mm0
+        paddw     mm0,mm2               ; mm0=in0+in4
+        psubw     mm6,mm2               ; mm6=in0-in4
 
-	pxor      mm7,mm7
-	pxor      mm2,mm2
-	punpcklwd mm7,mm0		; mm7=tmp0L
-	punpckhwd mm2,mm0		; mm2=tmp0H
-	psrad     mm7,(16-CONST_BITS)	; psrad mm7,16 & pslld mm7,CONST_BITS
-	psrad     mm2,(16-CONST_BITS)	; psrad mm2,16 & pslld mm2,CONST_BITS
+        pxor      mm7,mm7
+        pxor      mm2,mm2
+        punpcklwd mm7,mm0               ; mm7=tmp0L
+        punpckhwd mm2,mm0               ; mm2=tmp0H
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+        psrad     mm2,(16-CONST_BITS)   ; psrad mm2,16 & pslld mm2,CONST_BITS
 
-	movq	mm0,mm7
-	paddd	mm7,mm4			; mm7=tmp10L
-	psubd	mm0,mm4			; mm0=tmp13L
-	movq	mm4,mm2
-	paddd	mm2,mm5			; mm2=tmp10H
-	psubd	mm4,mm5			; mm4=tmp13H
+        movq    mm0,mm7
+        paddd   mm7,mm4                 ; mm7=tmp10L
+        psubd   mm0,mm4                 ; mm0=tmp13L
+        movq    mm4,mm2
+        paddd   mm2,mm5                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp13H
 
-	movq	MMWORD [wk(0)], mm7	; wk(0)=tmp10L
-	movq	MMWORD [wk(1)], mm2	; wk(1)=tmp10H
-	movq	MMWORD [wk(2)], mm0	; wk(2)=tmp13L
-	movq	MMWORD [wk(3)], mm4	; wk(3)=tmp13H
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp10L
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=tmp10H
+        movq    MMWORD [wk(2)], mm0     ; wk(2)=tmp13L
+        movq    MMWORD [wk(3)], mm4     ; wk(3)=tmp13H
 
-	pxor      mm5,mm5
-	pxor      mm7,mm7
-	punpcklwd mm5,mm6		; mm5=tmp1L
-	punpckhwd mm7,mm6		; mm7=tmp1H
-	psrad     mm5,(16-CONST_BITS)	; psrad mm5,16 & pslld mm5,CONST_BITS
-	psrad     mm7,(16-CONST_BITS)	; psrad mm7,16 & pslld mm7,CONST_BITS
+        pxor      mm5,mm5
+        pxor      mm7,mm7
+        punpcklwd mm5,mm6               ; mm5=tmp1L
+        punpckhwd mm7,mm6               ; mm7=tmp1H
+        psrad     mm5,(16-CONST_BITS)   ; psrad mm5,16 & pslld mm5,CONST_BITS
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
 
-	movq	mm2,mm5
-	paddd	mm5,mm1			; mm5=tmp11L
-	psubd	mm2,mm1			; mm2=tmp12L
-	movq	mm0,mm7
-	paddd	mm7,mm3			; mm7=tmp11H
-	psubd	mm0,mm3			; mm0=tmp12H
+        movq    mm2,mm5
+        paddd   mm5,mm1                 ; mm5=tmp11L
+        psubd   mm2,mm1                 ; mm2=tmp12L
+        movq    mm0,mm7
+        paddd   mm7,mm3                 ; mm7=tmp11H
+        psubd   mm0,mm3                 ; mm0=tmp12H
 
-	movq	MMWORD [wk(4)], mm5	; wk(4)=tmp11L
-	movq	MMWORD [wk(5)], mm7	; wk(5)=tmp11H
-	movq	MMWORD [wk(6)], mm2	; wk(6)=tmp12L
-	movq	MMWORD [wk(7)], mm0	; wk(7)=tmp12H
+        movq    MMWORD [wk(4)], mm5     ; wk(4)=tmp11L
+        movq    MMWORD [wk(5)], mm7     ; wk(5)=tmp11H
+        movq    MMWORD [wk(6)], mm2     ; wk(6)=tmp12L
+        movq    MMWORD [wk(7)], mm0     ; wk(7)=tmp12H
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        movq    mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
 
-	movq	mm5,mm6
-	movq	mm7,mm4
-	paddw	mm5,mm3			; mm5=z3
-	paddw	mm7,mm1			; mm7=z4
+        movq    mm5,mm6
+        movq    mm7,mm4
+        paddw   mm5,mm3                 ; mm5=z3
+        paddw   mm7,mm1                 ; mm7=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movq      mm2,mm5
-	movq      mm0,mm5
-	punpcklwd mm2,mm7
-	punpckhwd mm0,mm7
-	movq      mm5,mm2
-	movq      mm7,mm0
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]	; mm2=z3L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]	; mm0=z3H
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]	; mm5=z4L
-	pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]	; mm7=z4H
+        movq      mm2,mm5
+        movq      mm0,mm5
+        punpcklwd mm2,mm7
+        punpckhwd mm0,mm7
+        movq      mm5,mm2
+        movq      mm7,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]       ; mm2=z3L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]       ; mm0=z3H
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]        ; mm5=z4L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]        ; mm7=z4H
 
-	movq	MMWORD [wk(10)], mm2	; wk(10)=z3L
-	movq	MMWORD [wk(11)], mm0	; wk(11)=z3H
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=z3L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=z3H
 
-	; (Original)
-	; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
-	; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
-	; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; tmp0 += z1 + z3;  tmp1 += z2 + z4;
-	; tmp2 += z2 + z3;  tmp3 += z1 + z4;
-	;
-	; (This implementation)
-	; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
-	; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
-	; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
-	; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
-	; tmp0 += z3;  tmp1 += z4;
-	; tmp2 += z3;  tmp3 += z4;
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
 
-	movq      mm2,mm3
-	movq      mm0,mm3
-	punpcklwd mm2,mm4
-	punpckhwd mm0,mm4
-	movq      mm3,mm2
-	movq      mm4,mm0
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]	; mm2=tmp0L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]	; mm0=tmp0H
-	pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]	; mm3=tmp3L
-	pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]	; mm4=tmp3H
+        movq      mm2,mm3
+        movq      mm0,mm3
+        punpcklwd mm2,mm4
+        punpckhwd mm0,mm4
+        movq      mm3,mm2
+        movq      mm4,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm2=tmp0L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm0=tmp0H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]       ; mm3=tmp3L
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]       ; mm4=tmp3H
 
-	paddd	mm2, MMWORD [wk(10)]	; mm2=tmp0L
-	paddd	mm0, MMWORD [wk(11)]	; mm0=tmp0H
-	paddd	mm3,mm5			; mm3=tmp3L
-	paddd	mm4,mm7			; mm4=tmp3H
+        paddd   mm2, MMWORD [wk(10)]    ; mm2=tmp0L
+        paddd   mm0, MMWORD [wk(11)]    ; mm0=tmp0H
+        paddd   mm3,mm5                 ; mm3=tmp3L
+        paddd   mm4,mm7                 ; mm4=tmp3H
 
-	movq	MMWORD [wk(8)], mm2	; wk(8)=tmp0L
-	movq	MMWORD [wk(9)], mm0	; wk(9)=tmp0H
+        movq    MMWORD [wk(8)], mm2     ; wk(8)=tmp0L
+        movq    MMWORD [wk(9)], mm0     ; wk(9)=tmp0H
 
-	movq      mm2,mm1
-	movq      mm0,mm1
-	punpcklwd mm2,mm6
-	punpckhwd mm0,mm6
-	movq      mm1,mm2
-	movq      mm6,mm0
-	pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]	; mm2=tmp1L
-	pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]	; mm0=tmp1H
-	pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]	; mm1=tmp2L
-	pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]	; mm6=tmp2H
+        movq      mm2,mm1
+        movq      mm0,mm1
+        punpcklwd mm2,mm6
+        punpckhwd mm0,mm6
+        movq      mm1,mm2
+        movq      mm6,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm2=tmp1L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm0=tmp1H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]       ; mm1=tmp2L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]       ; mm6=tmp2H
 
-	paddd	mm2,mm5			; mm2=tmp1L
-	paddd	mm0,mm7			; mm0=tmp1H
-	paddd	mm1, MMWORD [wk(10)]	; mm1=tmp2L
-	paddd	mm6, MMWORD [wk(11)]	; mm6=tmp2H
+        paddd   mm2,mm5                 ; mm2=tmp1L
+        paddd   mm0,mm7                 ; mm0=tmp1H
+        paddd   mm1, MMWORD [wk(10)]    ; mm1=tmp2L
+        paddd   mm6, MMWORD [wk(11)]    ; mm6=tmp2H
 
-	movq	MMWORD [wk(10)], mm2	; wk(10)=tmp1L
-	movq	MMWORD [wk(11)], mm0	; wk(11)=tmp1H
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=tmp1L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=tmp1H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movq	mm5, MMWORD [wk(0)]	; mm5=tmp10L
-	movq	mm7, MMWORD [wk(1)]	; mm7=tmp10H
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp10L
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp10H
 
-	movq	mm2,mm5
-	movq	mm0,mm7
-	paddd	mm5,mm3			; mm5=data0L
-	paddd	mm7,mm4			; mm7=data0H
-	psubd	mm2,mm3			; mm2=data7L
-	psubd	mm0,mm4			; mm0=data7H
+        movq    mm2,mm5
+        movq    mm0,mm7
+        paddd   mm5,mm3                 ; mm5=data0L
+        paddd   mm7,mm4                 ; mm7=data0H
+        psubd   mm2,mm3                 ; mm2=data7L
+        psubd   mm0,mm4                 ; mm0=data7H
 
-	movq	mm3,[GOTOFF(ebx,PD_DESCALE_P2)]	; mm3=[PD_DESCALE_P2]
+        movq    mm3,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2]
 
-	paddd	mm5,mm3
-	paddd	mm7,mm3
-	psrad	mm5,DESCALE_P2
-	psrad	mm7,DESCALE_P2
-	paddd	mm2,mm3
-	paddd	mm0,mm3
-	psrad	mm2,DESCALE_P2
-	psrad	mm0,DESCALE_P2
+        paddd   mm5,mm3
+        paddd   mm7,mm3
+        psrad   mm5,DESCALE_P2
+        psrad   mm7,DESCALE_P2
+        paddd   mm2,mm3
+        paddd   mm0,mm3
+        psrad   mm2,DESCALE_P2
+        psrad   mm0,DESCALE_P2
 
-	packssdw  mm5,mm7		; mm5=data0=(00 10 20 30)
-	packssdw  mm2,mm0		; mm2=data7=(07 17 27 37)
+        packssdw  mm5,mm7               ; mm5=data0=(00 10 20 30)
+        packssdw  mm2,mm0               ; mm2=data7=(07 17 27 37)
 
-	movq	mm4, MMWORD [wk(4)]	; mm4=tmp11L
-	movq	mm3, MMWORD [wk(5)]	; mm3=tmp11H
+        movq    mm4, MMWORD [wk(4)]     ; mm4=tmp11L
+        movq    mm3, MMWORD [wk(5)]     ; mm3=tmp11H
 
-	movq	mm7,mm4
-	movq	mm0,mm3
-	paddd	mm4,mm1			; mm4=data1L
-	paddd	mm3,mm6			; mm3=data1H
-	psubd	mm7,mm1			; mm7=data6L
-	psubd	mm0,mm6			; mm0=data6H
+        movq    mm7,mm4
+        movq    mm0,mm3
+        paddd   mm4,mm1                 ; mm4=data1L
+        paddd   mm3,mm6                 ; mm3=data1H
+        psubd   mm7,mm1                 ; mm7=data6L
+        psubd   mm0,mm6                 ; mm0=data6H
 
-	movq	mm1,[GOTOFF(ebx,PD_DESCALE_P2)]	; mm1=[PD_DESCALE_P2]
+        movq    mm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2]
 
-	paddd	mm4,mm1
-	paddd	mm3,mm1
-	psrad	mm4,DESCALE_P2
-	psrad	mm3,DESCALE_P2
-	paddd	mm7,mm1
-	paddd	mm0,mm1
-	psrad	mm7,DESCALE_P2
-	psrad	mm0,DESCALE_P2
+        paddd   mm4,mm1
+        paddd   mm3,mm1
+        psrad   mm4,DESCALE_P2
+        psrad   mm3,DESCALE_P2
+        paddd   mm7,mm1
+        paddd   mm0,mm1
+        psrad   mm7,DESCALE_P2
+        psrad   mm0,DESCALE_P2
 
-	packssdw  mm4,mm3		; mm4=data1=(01 11 21 31)
-	packssdw  mm7,mm0		; mm7=data6=(06 16 26 36)
+        packssdw  mm4,mm3               ; mm4=data1=(01 11 21 31)
+        packssdw  mm7,mm0               ; mm7=data6=(06 16 26 36)
 
-	packsswb  mm5,mm7		; mm5=(00 10 20 30 06 16 26 36)
-	packsswb  mm4,mm2		; mm4=(01 11 21 31 07 17 27 37)
+        packsswb  mm5,mm7               ; mm5=(00 10 20 30 06 16 26 36)
+        packsswb  mm4,mm2               ; mm4=(01 11 21 31 07 17 27 37)
 
-	movq	mm6, MMWORD [wk(6)]	; mm6=tmp12L
-	movq	mm1, MMWORD [wk(7)]	; mm1=tmp12H
-	movq	mm3, MMWORD [wk(10)]	; mm3=tmp1L
-	movq	mm0, MMWORD [wk(11)]	; mm0=tmp1H
+        movq    mm6, MMWORD [wk(6)]     ; mm6=tmp12L
+        movq    mm1, MMWORD [wk(7)]     ; mm1=tmp12H
+        movq    mm3, MMWORD [wk(10)]    ; mm3=tmp1L
+        movq    mm0, MMWORD [wk(11)]    ; mm0=tmp1H
 
-	movq	MMWORD [wk(0)], mm5	; wk(0)=(00 10 20 30 06 16 26 36)
-	movq	MMWORD [wk(1)], mm4	; wk(1)=(01 11 21 31 07 17 27 37)
+        movq    MMWORD [wk(0)], mm5     ; wk(0)=(00 10 20 30 06 16 26 36)
+        movq    MMWORD [wk(1)], mm4     ; wk(1)=(01 11 21 31 07 17 27 37)
 
-	movq	mm7,mm6
-	movq	mm2,mm1
-	paddd	mm6,mm3			; mm6=data2L
-	paddd	mm1,mm0			; mm1=data2H
-	psubd	mm7,mm3			; mm7=data5L
-	psubd	mm2,mm0			; mm2=data5H
+        movq    mm7,mm6
+        movq    mm2,mm1
+        paddd   mm6,mm3                 ; mm6=data2L
+        paddd   mm1,mm0                 ; mm1=data2H
+        psubd   mm7,mm3                 ; mm7=data5L
+        psubd   mm2,mm0                 ; mm2=data5H
 
-	movq	mm5,[GOTOFF(ebx,PD_DESCALE_P2)]	; mm5=[PD_DESCALE_P2]
+        movq    mm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2]
 
-	paddd	mm6,mm5
-	paddd	mm1,mm5
-	psrad	mm6,DESCALE_P2
-	psrad	mm1,DESCALE_P2
-	paddd	mm7,mm5
-	paddd	mm2,mm5
-	psrad	mm7,DESCALE_P2
-	psrad	mm2,DESCALE_P2
+        paddd   mm6,mm5
+        paddd   mm1,mm5
+        psrad   mm6,DESCALE_P2
+        psrad   mm1,DESCALE_P2
+        paddd   mm7,mm5
+        paddd   mm2,mm5
+        psrad   mm7,DESCALE_P2
+        psrad   mm2,DESCALE_P2
 
-	packssdw  mm6,mm1		; mm6=data2=(02 12 22 32)
-	packssdw  mm7,mm2		; mm7=data5=(05 15 25 35)
+        packssdw  mm6,mm1               ; mm6=data2=(02 12 22 32)
+        packssdw  mm7,mm2               ; mm7=data5=(05 15 25 35)
 
-	movq	mm4, MMWORD [wk(2)]	; mm4=tmp13L
-	movq	mm3, MMWORD [wk(3)]	; mm3=tmp13H
-	movq	mm0, MMWORD [wk(8)]	; mm0=tmp0L
-	movq	mm5, MMWORD [wk(9)]	; mm5=tmp0H
+        movq    mm4, MMWORD [wk(2)]     ; mm4=tmp13L
+        movq    mm3, MMWORD [wk(3)]     ; mm3=tmp13H
+        movq    mm0, MMWORD [wk(8)]     ; mm0=tmp0L
+        movq    mm5, MMWORD [wk(9)]     ; mm5=tmp0H
 
-	movq	mm1,mm4
-	movq	mm2,mm3
-	paddd	mm4,mm0			; mm4=data3L
-	paddd	mm3,mm5			; mm3=data3H
-	psubd	mm1,mm0			; mm1=data4L
-	psubd	mm2,mm5			; mm2=data4H
+        movq    mm1,mm4
+        movq    mm2,mm3
+        paddd   mm4,mm0                 ; mm4=data3L
+        paddd   mm3,mm5                 ; mm3=data3H
+        psubd   mm1,mm0                 ; mm1=data4L
+        psubd   mm2,mm5                 ; mm2=data4H
 
-	movq	mm0,[GOTOFF(ebx,PD_DESCALE_P2)]	; mm0=[PD_DESCALE_P2]
+        movq    mm0,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2]
 
-	paddd	mm4,mm0
-	paddd	mm3,mm0
-	psrad	mm4,DESCALE_P2
-	psrad	mm3,DESCALE_P2
-	paddd	mm1,mm0
-	paddd	mm2,mm0
-	psrad	mm1,DESCALE_P2
-	psrad	mm2,DESCALE_P2
+        paddd   mm4,mm0
+        paddd   mm3,mm0
+        psrad   mm4,DESCALE_P2
+        psrad   mm3,DESCALE_P2
+        paddd   mm1,mm0
+        paddd   mm2,mm0
+        psrad   mm1,DESCALE_P2
+        psrad   mm2,DESCALE_P2
 
-	movq      mm5,[GOTOFF(ebx,PB_CENTERJSAMP)]	; mm5=[PB_CENTERJSAMP]
+        movq      mm5,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm5=[PB_CENTERJSAMP]
 
-	packssdw  mm4,mm3		; mm4=data3=(03 13 23 33)
-	packssdw  mm1,mm2		; mm1=data4=(04 14 24 34)
+        packssdw  mm4,mm3               ; mm4=data3=(03 13 23 33)
+        packssdw  mm1,mm2               ; mm1=data4=(04 14 24 34)
 
-	movq      mm0, MMWORD [wk(0)]	; mm0=(00 10 20 30 06 16 26 36)
-	movq      mm3, MMWORD [wk(1)]	; mm3=(01 11 21 31 07 17 27 37)
+        movq      mm0, MMWORD [wk(0)]   ; mm0=(00 10 20 30 06 16 26 36)
+        movq      mm3, MMWORD [wk(1)]   ; mm3=(01 11 21 31 07 17 27 37)
 
-	packsswb  mm6,mm1		; mm6=(02 12 22 32 04 14 24 34)
-	packsswb  mm4,mm7		; mm4=(03 13 23 33 05 15 25 35)
+        packsswb  mm6,mm1               ; mm6=(02 12 22 32 04 14 24 34)
+        packsswb  mm4,mm7               ; mm4=(03 13 23 33 05 15 25 35)
 
-	paddb     mm0,mm5
-	paddb     mm3,mm5
-	paddb     mm6,mm5
-	paddb     mm4,mm5
+        paddb     mm0,mm5
+        paddb     mm3,mm5
+        paddb     mm6,mm5
+        paddb     mm4,mm5
 
-	movq      mm2,mm0		; transpose coefficients(phase 1)
-	punpcklbw mm0,mm3		; mm0=(00 01 10 11 20 21 30 31)
-	punpckhbw mm2,mm3		; mm2=(06 07 16 17 26 27 36 37)
-	movq      mm1,mm6		; transpose coefficients(phase 1)
-	punpcklbw mm6,mm4		; mm6=(02 03 12 13 22 23 32 33)
-	punpckhbw mm1,mm4		; mm1=(04 05 14 15 24 25 34 35)
+        movq      mm2,mm0               ; transpose coefficients(phase 1)
+        punpcklbw mm0,mm3               ; mm0=(00 01 10 11 20 21 30 31)
+        punpckhbw mm2,mm3               ; mm2=(06 07 16 17 26 27 36 37)
+        movq      mm1,mm6               ; transpose coefficients(phase 1)
+        punpcklbw mm6,mm4               ; mm6=(02 03 12 13 22 23 32 33)
+        punpckhbw mm1,mm4               ; mm1=(04 05 14 15 24 25 34 35)
 
-	movq      mm7,mm0		; transpose coefficients(phase 2)
-	punpcklwd mm0,mm6		; mm0=(00 01 02 03 10 11 12 13)
-	punpckhwd mm7,mm6		; mm7=(20 21 22 23 30 31 32 33)
-	movq      mm5,mm1		; transpose coefficients(phase 2)
-	punpcklwd mm1,mm2		; mm1=(04 05 06 07 14 15 16 17)
-	punpckhwd mm5,mm2		; mm5=(24 25 26 27 34 35 36 37)
+        movq      mm7,mm0               ; transpose coefficients(phase 2)
+        punpcklwd mm0,mm6               ; mm0=(00 01 02 03 10 11 12 13)
+        punpckhwd mm7,mm6               ; mm7=(20 21 22 23 30 31 32 33)
+        movq      mm5,mm1               ; transpose coefficients(phase 2)
+        punpcklwd mm1,mm2               ; mm1=(04 05 06 07 14 15 16 17)
+        punpckhwd mm5,mm2               ; mm5=(24 25 26 27 34 35 36 37)
 
-	movq      mm3,mm0		; transpose coefficients(phase 3)
-	punpckldq mm0,mm1		; mm0=(00 01 02 03 04 05 06 07)
-	punpckhdq mm3,mm1		; mm3=(10 11 12 13 14 15 16 17)
-	movq      mm4,mm7		; transpose coefficients(phase 3)
-	punpckldq mm7,mm5		; mm7=(20 21 22 23 24 25 26 27)
-	punpckhdq mm4,mm5		; mm4=(30 31 32 33 34 35 36 37)
+        movq      mm3,mm0               ; transpose coefficients(phase 3)
+        punpckldq mm0,mm1               ; mm0=(00 01 02 03 04 05 06 07)
+        punpckhdq mm3,mm1               ; mm3=(10 11 12 13 14 15 16 17)
+        movq      mm4,mm7               ; transpose coefficients(phase 3)
+        punpckldq mm7,mm5               ; mm7=(20 21 22 23 24 25 26 27)
+        punpckhdq mm4,mm5               ; mm4=(30 31 32 33 34 35 36 37)
 
-	pushpic	ebx			; save GOT address
+        pushpic ebx                     ; save GOT address
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	movq	MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
-	movq	MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3
-	mov	edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movq	MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
-	movq	MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
 
-	poppic	ebx			; restore GOT address
+        poppic  ebx                     ; restore GOT address
 
-	add	esi, byte 4*SIZEOF_JCOEF	; wsptr
-	add	edi, byte 4*SIZEOF_JSAMPROW
-	dec	ecx				; ctr
-	jnz	near .rowloop
+        add     esi, byte 4*SIZEOF_JCOEF        ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jimmxred.asm b/simd/jimmxred.asm
index a2b7103..5e24836 100644
--- a/simd/jimmxred.asm
+++ b/simd/jimmxred.asm
@@ -26,74 +26,74 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1_4	(CONST_BITS-PASS1_BITS+1)
-%define DESCALE_P2_4	(CONST_BITS+PASS1_BITS+3+1)
-%define DESCALE_P1_2	(CONST_BITS-PASS1_BITS+2)
-%define DESCALE_P2_2	(CONST_BITS+PASS1_BITS+3+2)
+%define DESCALE_P1_4    (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4    (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2    (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2    (CONST_BITS+PASS1_BITS+3+2)
 
 %if CONST_BITS == 13
-F_0_211	equ	 1730		; FIX(0.211164243)
-F_0_509	equ	 4176		; FIX(0.509795579)
-F_0_601	equ	 4926		; FIX(0.601344887)
-F_0_720	equ	 5906		; FIX(0.720959822)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_850	equ	 6967		; FIX(0.850430095)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_061	equ	 8697		; FIX(1.061594337)
-F_1_272	equ	10426		; FIX(1.272758580)
-F_1_451	equ	11893		; FIX(1.451774981)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_2_172	equ	17799		; FIX(2.172734803)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_624	equ	29692		; FIX(3.624509785)
+F_0_211 equ      1730           ; FIX(0.211164243)
+F_0_509 equ      4176           ; FIX(0.509795579)
+F_0_601 equ      4926           ; FIX(0.601344887)
+F_0_720 equ      5906           ; FIX(0.720959822)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_850 equ      6967           ; FIX(0.850430095)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_061 equ      8697           ; FIX(1.061594337)
+F_1_272 equ     10426           ; FIX(1.272758580)
+F_1_451 equ     11893           ; FIX(1.451774981)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_2_172 equ     17799           ; FIX(2.172734803)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_624 equ     29692           ; FIX(3.624509785)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_211	equ	DESCALE( 226735879,30-CONST_BITS)	; FIX(0.211164243)
-F_0_509	equ	DESCALE( 547388834,30-CONST_BITS)	; FIX(0.509795579)
-F_0_601	equ	DESCALE( 645689155,30-CONST_BITS)	; FIX(0.601344887)
-F_0_720	equ	DESCALE( 774124714,30-CONST_BITS)	; FIX(0.720959822)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_850	equ	DESCALE( 913142361,30-CONST_BITS)	; FIX(0.850430095)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_061	equ	DESCALE(1139878239,30-CONST_BITS)	; FIX(1.061594337)
-F_1_272	equ	DESCALE(1366614119,30-CONST_BITS)	; FIX(1.272758580)
-F_1_451	equ	DESCALE(1558831516,30-CONST_BITS)	; FIX(1.451774981)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_2_172	equ	DESCALE(2332956230,30-CONST_BITS)	; FIX(2.172734803)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_624	equ	DESCALE(3891787747,30-CONST_BITS)	; FIX(3.624509785)
+F_0_211 equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509 equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601 equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720 equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850 equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061 equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272 equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451 equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172 equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624 equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_red_mmx)
+        alignz  16
+        global  EXTN(jconst_idct_red_mmx)
 
 EXTN(jconst_idct_red_mmx):
 
-PW_F184_MF076	times 2 dw  F_1_847,-F_0_765
-PW_F256_F089	times 2 dw  F_2_562, F_0_899
-PW_F106_MF217	times 2 dw  F_1_061,-F_2_172
-PW_MF060_MF050	times 2 dw -F_0_601,-F_0_509
-PW_F145_MF021	times 2 dw  F_1_451,-F_0_211
-PW_F362_MF127	times 2 dw  F_3_624,-F_1_272
-PW_F085_MF072	times 2 dw  F_0_850,-F_0_720
-PD_DESCALE_P1_4	times 2 dd  1 << (DESCALE_P1_4-1)
-PD_DESCALE_P2_4	times 2 dd  1 << (DESCALE_P2_4-1)
-PD_DESCALE_P1_2	times 2 dd  1 << (DESCALE_P1_2-1)
-PD_DESCALE_P2_2	times 2 dd  1 << (DESCALE_P2_2-1)
-PB_CENTERJSAMP	times 8 db  CENTERJSAMPLE
+PW_F184_MF076   times 2 dw  F_1_847,-F_0_765
+PW_F256_F089    times 2 dw  F_2_562, F_0_899
+PW_F106_MF217   times 2 dw  F_1_061,-F_2_172
+PW_MF060_MF050  times 2 dw -F_0_601,-F_0_509
+PW_F145_MF021   times 2 dw  F_1_451,-F_0_211
+PW_F362_MF127   times 2 dw  F_3_624,-F_1_272
+PW_F085_MF072   times 2 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4 times 2 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4 times 2 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2 times 2 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2 times 2 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients,
 ; producing a reduced-size 4x4 output block.
@@ -103,388 +103,388 @@
 ;                     JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; void * dct_table
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
-%define WK_NUM		2
-%define workspace	wk(0)-DCTSIZE2*SIZEOF_JCOEF
-					; JCOEF workspace[DCTSIZE2]
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                        ; JCOEF workspace[DCTSIZE2]
 
-	align	16
-	global	EXTN(jsimd_idct_4x4_mmx)
+        align   16
+        global  EXTN(jsimd_idct_4x4_mmx)
 
 EXTN(jsimd_idct_4x4_mmx):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [workspace]
-	pushpic	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input, store into work array.
+        ; ---- Pass 1: process columns from input, store into work array.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
-	lea	edi, [workspace]			; JCOEF * wsptr
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; JCOEF * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	short .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
 
-	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	mm0,mm1
-	packsswb mm0,mm0
-	movd	eax,mm0
-	test	eax,eax
-	jnz	short .columnDCT
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm0,mm1
+        packsswb mm0,mm0
+        movd    eax,mm0
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	psllw	mm0,PASS1_BITS
+        psllw   mm0,PASS1_BITS
 
-	movq      mm2,mm0		; mm0=in0=(00 01 02 03)
-	punpcklwd mm0,mm0		; mm0=(00 00 01 01)
-	punpckhwd mm2,mm2		; mm2=(02 02 03 03)
+        movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+        punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+        punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
 
-	movq      mm1,mm0
-	punpckldq mm0,mm0		; mm0=(00 00 00 00)
-	punpckhdq mm1,mm1		; mm1=(01 01 01 01)
-	movq      mm3,mm2
-	punpckldq mm2,mm2		; mm2=(02 02 02 02)
-	punpckhdq mm3,mm3		; mm3=(03 03 03 03)
+        movq      mm1,mm0
+        punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+        punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+        movq      mm3,mm2
+        punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+        punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
-	movq	MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
-	jmp	near .nextcolumn
-	alignx	16,7
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+        jmp     near .nextcolumn
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movq	mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movq      mm4,mm0
-	movq      mm5,mm0
-	punpcklwd mm4,mm1
-	punpckhwd mm5,mm1
-	movq      mm0,mm4
-	movq      mm1,mm5
-	pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]	; mm4=(tmp2L)
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]	; mm5=(tmp2H)
-	pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]	; mm0=(tmp0L)
-	pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]	; mm1=(tmp0H)
+        movq      mm4,mm0
+        movq      mm5,mm0
+        punpcklwd mm4,mm1
+        punpckhwd mm5,mm1
+        movq      mm0,mm4
+        movq      mm1,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]        ; mm4=(tmp2L)
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]        ; mm5=(tmp2H)
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]       ; mm0=(tmp0L)
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]       ; mm1=(tmp0H)
 
-	movq      mm6,mm2
-	movq      mm7,mm2
-	punpcklwd mm6,mm3
-	punpckhwd mm7,mm3
-	movq      mm2,mm6
-	movq      mm3,mm7
-	pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]	; mm6=(tmp2L)
-	pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]	; mm7=(tmp2H)
-	pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]	; mm2=(tmp0L)
-	pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]	; mm3=(tmp0H)
+        movq      mm6,mm2
+        movq      mm7,mm2
+        punpcklwd mm6,mm3
+        punpckhwd mm7,mm3
+        movq      mm2,mm6
+        movq      mm3,mm7
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm6=(tmp2L)
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm7=(tmp2H)
+        pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]       ; mm2=(tmp0L)
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]       ; mm3=(tmp0H)
 
-	paddd	mm6,mm4			; mm6=tmp2L
-	paddd	mm7,mm5			; mm7=tmp2H
-	paddd	mm2,mm0			; mm2=tmp0L
-	paddd	mm3,mm1			; mm3=tmp0H
+        paddd   mm6,mm4                 ; mm6=tmp2L
+        paddd   mm7,mm5                 ; mm7=tmp2H
+        paddd   mm2,mm0                 ; mm2=tmp0L
+        paddd   mm3,mm1                 ; mm3=tmp0H
 
-	movq	MMWORD [wk(0)], mm2	; wk(0)=tmp0L
-	movq	MMWORD [wk(1)], mm3	; wk(1)=tmp0H
+        movq    MMWORD [wk(0)], mm2     ; wk(0)=tmp0L
+        movq    MMWORD [wk(1)], mm3     ; wk(1)=tmp0H
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq	mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movq	mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	pxor      mm1,mm1
-	pxor      mm2,mm2
-	punpcklwd mm1,mm4		; mm1=tmp0L
-	punpckhwd mm2,mm4		; mm2=tmp0H
-	psrad     mm1,(16-CONST_BITS-1)	; psrad mm1,16 & pslld mm1,CONST_BITS+1
-	psrad     mm2,(16-CONST_BITS-1)	; psrad mm2,16 & pslld mm2,CONST_BITS+1
+        pxor      mm1,mm1
+        pxor      mm2,mm2
+        punpcklwd mm1,mm4               ; mm1=tmp0L
+        punpckhwd mm2,mm4               ; mm2=tmp0H
+        psrad     mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1
+        psrad     mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1
 
-	movq      mm3,mm5		; mm5=in2=z2
-	punpcklwd mm5,mm0		; mm0=in6=z3
-	punpckhwd mm3,mm0
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]	; mm5=tmp2L
-	pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]	; mm3=tmp2H
+        movq      mm3,mm5               ; mm5=in2=z2
+        punpcklwd mm5,mm0               ; mm0=in6=z3
+        punpckhwd mm3,mm0
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]       ; mm5=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]       ; mm3=tmp2H
 
-	movq	mm4,mm1
-	movq	mm0,mm2
-	paddd	mm1,mm5			; mm1=tmp10L
-	paddd	mm2,mm3			; mm2=tmp10H
-	psubd	mm4,mm5			; mm4=tmp12L
-	psubd	mm0,mm3			; mm0=tmp12H
+        movq    mm4,mm1
+        movq    mm0,mm2
+        paddd   mm1,mm5                 ; mm1=tmp10L
+        paddd   mm2,mm3                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp12L
+        psubd   mm0,mm3                 ; mm0=tmp12H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movq	mm5,mm1
-	movq	mm3,mm2
-	paddd	mm1,mm6			; mm1=data0L
-	paddd	mm2,mm7			; mm2=data0H
-	psubd	mm5,mm6			; mm5=data3L
-	psubd	mm3,mm7			; mm3=data3H
+        movq    mm5,mm1
+        movq    mm3,mm2
+        paddd   mm1,mm6                 ; mm1=data0L
+        paddd   mm2,mm7                 ; mm2=data0H
+        psubd   mm5,mm6                 ; mm5=data3L
+        psubd   mm3,mm7                 ; mm3=data3H
 
-	movq	mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]	; mm6=[PD_DESCALE_P1_4]
+        movq    mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]       ; mm6=[PD_DESCALE_P1_4]
 
-	paddd	mm1,mm6
-	paddd	mm2,mm6
-	psrad	mm1,DESCALE_P1_4
-	psrad	mm2,DESCALE_P1_4
-	paddd	mm5,mm6
-	paddd	mm3,mm6
-	psrad	mm5,DESCALE_P1_4
-	psrad	mm3,DESCALE_P1_4
+        paddd   mm1,mm6
+        paddd   mm2,mm6
+        psrad   mm1,DESCALE_P1_4
+        psrad   mm2,DESCALE_P1_4
+        paddd   mm5,mm6
+        paddd   mm3,mm6
+        psrad   mm5,DESCALE_P1_4
+        psrad   mm3,DESCALE_P1_4
 
-	packssdw  mm1,mm2		; mm1=data0=(00 01 02 03)
-	packssdw  mm5,mm3		; mm5=data3=(30 31 32 33)
+        packssdw  mm1,mm2               ; mm1=data0=(00 01 02 03)
+        packssdw  mm5,mm3               ; mm5=data3=(30 31 32 33)
 
-	movq	mm7, MMWORD [wk(0)]	; mm7=tmp0L
-	movq	mm6, MMWORD [wk(1)]	; mm6=tmp0H
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp0L
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp0H
 
-	movq	mm2,mm4
-	movq	mm3,mm0
-	paddd	mm4,mm7			; mm4=data1L
-	paddd	mm0,mm6			; mm0=data1H
-	psubd	mm2,mm7			; mm2=data2L
-	psubd	mm3,mm6			; mm3=data2H
+        movq    mm2,mm4
+        movq    mm3,mm0
+        paddd   mm4,mm7                 ; mm4=data1L
+        paddd   mm0,mm6                 ; mm0=data1H
+        psubd   mm2,mm7                 ; mm2=data2L
+        psubd   mm3,mm6                 ; mm3=data2H
 
-	movq	mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]	; mm7=[PD_DESCALE_P1_4]
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]       ; mm7=[PD_DESCALE_P1_4]
 
-	paddd	mm4,mm7
-	paddd	mm0,mm7
-	psrad	mm4,DESCALE_P1_4
-	psrad	mm0,DESCALE_P1_4
-	paddd	mm2,mm7
-	paddd	mm3,mm7
-	psrad	mm2,DESCALE_P1_4
-	psrad	mm3,DESCALE_P1_4
+        paddd   mm4,mm7
+        paddd   mm0,mm7
+        psrad   mm4,DESCALE_P1_4
+        psrad   mm0,DESCALE_P1_4
+        paddd   mm2,mm7
+        paddd   mm3,mm7
+        psrad   mm2,DESCALE_P1_4
+        psrad   mm3,DESCALE_P1_4
 
-	packssdw  mm4,mm0		; mm4=data1=(10 11 12 13)
-	packssdw  mm2,mm3		; mm2=data2=(20 21 22 23)
+        packssdw  mm4,mm0               ; mm4=data1=(10 11 12 13)
+        packssdw  mm2,mm3               ; mm2=data2=(20 21 22 23)
 
-	movq      mm6,mm1		; transpose coefficients(phase 1)
-	punpcklwd mm1,mm4		; mm1=(00 10 01 11)
-	punpckhwd mm6,mm4		; mm6=(02 12 03 13)
-	movq      mm7,mm2		; transpose coefficients(phase 1)
-	punpcklwd mm2,mm5		; mm2=(20 30 21 31)
-	punpckhwd mm7,mm5		; mm7=(22 32 23 33)
+        movq      mm6,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm4               ; mm1=(00 10 01 11)
+        punpckhwd mm6,mm4               ; mm6=(02 12 03 13)
+        movq      mm7,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm5               ; mm2=(20 30 21 31)
+        punpckhwd mm7,mm5               ; mm7=(22 32 23 33)
 
-	movq      mm0,mm1		; transpose coefficients(phase 2)
-	punpckldq mm1,mm2		; mm1=(00 10 20 30)
-	punpckhdq mm0,mm2		; mm0=(01 11 21 31)
-	movq      mm3,mm6		; transpose coefficients(phase 2)
-	punpckldq mm6,mm7		; mm6=(02 12 22 32)
-	punpckhdq mm3,mm7		; mm3=(03 13 23 33)
+        movq      mm0,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm2               ; mm1=(00 10 20 30)
+        punpckhdq mm0,mm2               ; mm0=(01 11 21 31)
+        movq      mm3,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm7               ; mm6=(02 12 22 32)
+        punpckhdq mm3,mm7               ; mm3=(03 13 23 33)
 
-	movq	MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1
-	movq	MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
-	movq	MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6
-	movq	MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
 
 .nextcolumn:
-	add	esi, byte 4*SIZEOF_JCOEF		; coef_block
-	add	edx, byte 4*SIZEOF_ISLOW_MULT_TYPE	; quantptr
-	add	edi, byte 4*DCTSIZE*SIZEOF_JCOEF	; wsptr
-	dec	ecx					; ctr
-	jnz	near .columnloop
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_ISLOW_MULT_TYPE      ; quantptr
+        add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	lea	esi, [workspace]			; JCOEF * wsptr
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; JCOEF * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	movq	mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
 
-	movq      mm4,mm0
-	movq      mm5,mm0
-	punpcklwd mm4,mm1
-	punpckhwd mm5,mm1
-	movq      mm0,mm4
-	movq      mm1,mm5
-	pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]	; mm4=(tmp2L)
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]	; mm5=(tmp2H)
-	pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]	; mm0=(tmp0L)
-	pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]	; mm1=(tmp0H)
+        movq      mm4,mm0
+        movq      mm5,mm0
+        punpcklwd mm4,mm1
+        punpckhwd mm5,mm1
+        movq      mm0,mm4
+        movq      mm1,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]        ; mm4=(tmp2L)
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]        ; mm5=(tmp2H)
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]       ; mm0=(tmp0L)
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]       ; mm1=(tmp0H)
 
-	movq      mm6,mm2
-	movq      mm7,mm2
-	punpcklwd mm6,mm3
-	punpckhwd mm7,mm3
-	movq      mm2,mm6
-	movq      mm3,mm7
-	pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]	; mm6=(tmp2L)
-	pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]	; mm7=(tmp2H)
-	pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]	; mm2=(tmp0L)
-	pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]	; mm3=(tmp0H)
+        movq      mm6,mm2
+        movq      mm7,mm2
+        punpcklwd mm6,mm3
+        punpckhwd mm7,mm3
+        movq      mm2,mm6
+        movq      mm3,mm7
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm6=(tmp2L)
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm7=(tmp2H)
+        pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]       ; mm2=(tmp0L)
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]       ; mm3=(tmp0H)
 
-	paddd	mm6,mm4			; mm6=tmp2L
-	paddd	mm7,mm5			; mm7=tmp2H
-	paddd	mm2,mm0			; mm2=tmp0L
-	paddd	mm3,mm1			; mm3=tmp0H
+        paddd   mm6,mm4                 ; mm6=tmp2L
+        paddd   mm7,mm5                 ; mm7=tmp2H
+        paddd   mm2,mm0                 ; mm2=tmp0L
+        paddd   mm3,mm1                 ; mm3=tmp0H
 
-	movq	MMWORD [wk(0)], mm2	; wk(0)=tmp0L
-	movq	MMWORD [wk(1)], mm3	; wk(1)=tmp0H
+        movq    MMWORD [wk(0)], mm2     ; wk(0)=tmp0L
+        movq    MMWORD [wk(1)], mm3     ; wk(1)=tmp0H
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq	mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movq	mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movq    mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
 
-	pxor      mm1,mm1
-	pxor      mm2,mm2
-	punpcklwd mm1,mm4		; mm1=tmp0L
-	punpckhwd mm2,mm4		; mm2=tmp0H
-	psrad     mm1,(16-CONST_BITS-1)	; psrad mm1,16 & pslld mm1,CONST_BITS+1
-	psrad     mm2,(16-CONST_BITS-1)	; psrad mm2,16 & pslld mm2,CONST_BITS+1
+        pxor      mm1,mm1
+        pxor      mm2,mm2
+        punpcklwd mm1,mm4               ; mm1=tmp0L
+        punpckhwd mm2,mm4               ; mm2=tmp0H
+        psrad     mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1
+        psrad     mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1
 
-	movq      mm3,mm5		; mm5=in2=z2
-	punpcklwd mm5,mm0		; mm0=in6=z3
-	punpckhwd mm3,mm0
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]	; mm5=tmp2L
-	pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]	; mm3=tmp2H
+        movq      mm3,mm5               ; mm5=in2=z2
+        punpcklwd mm5,mm0               ; mm0=in6=z3
+        punpckhwd mm3,mm0
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]       ; mm5=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]       ; mm3=tmp2H
 
-	movq	mm4,mm1
-	movq	mm0,mm2
-	paddd	mm1,mm5			; mm1=tmp10L
-	paddd	mm2,mm3			; mm2=tmp10H
-	psubd	mm4,mm5			; mm4=tmp12L
-	psubd	mm0,mm3			; mm0=tmp12H
+        movq    mm4,mm1
+        movq    mm0,mm2
+        paddd   mm1,mm5                 ; mm1=tmp10L
+        paddd   mm2,mm3                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp12L
+        psubd   mm0,mm3                 ; mm0=tmp12H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movq	mm5,mm1
-	movq	mm3,mm2
-	paddd	mm1,mm6			; mm1=data0L
-	paddd	mm2,mm7			; mm2=data0H
-	psubd	mm5,mm6			; mm5=data3L
-	psubd	mm3,mm7			; mm3=data3H
+        movq    mm5,mm1
+        movq    mm3,mm2
+        paddd   mm1,mm6                 ; mm1=data0L
+        paddd   mm2,mm7                 ; mm2=data0H
+        psubd   mm5,mm6                 ; mm5=data3L
+        psubd   mm3,mm7                 ; mm3=data3H
 
-	movq	mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)]	; mm6=[PD_DESCALE_P2_4]
+        movq    mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)]       ; mm6=[PD_DESCALE_P2_4]
 
-	paddd	mm1,mm6
-	paddd	mm2,mm6
-	psrad	mm1,DESCALE_P2_4
-	psrad	mm2,DESCALE_P2_4
-	paddd	mm5,mm6
-	paddd	mm3,mm6
-	psrad	mm5,DESCALE_P2_4
-	psrad	mm3,DESCALE_P2_4
+        paddd   mm1,mm6
+        paddd   mm2,mm6
+        psrad   mm1,DESCALE_P2_4
+        psrad   mm2,DESCALE_P2_4
+        paddd   mm5,mm6
+        paddd   mm3,mm6
+        psrad   mm5,DESCALE_P2_4
+        psrad   mm3,DESCALE_P2_4
 
-	packssdw  mm1,mm2		; mm1=data0=(00 10 20 30)
-	packssdw  mm5,mm3		; mm5=data3=(03 13 23 33)
+        packssdw  mm1,mm2               ; mm1=data0=(00 10 20 30)
+        packssdw  mm5,mm3               ; mm5=data3=(03 13 23 33)
 
-	movq	mm7, MMWORD [wk(0)]	; mm7=tmp0L
-	movq	mm6, MMWORD [wk(1)]	; mm6=tmp0H
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp0L
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp0H
 
-	movq	mm2,mm4
-	movq	mm3,mm0
-	paddd	mm4,mm7			; mm4=data1L
-	paddd	mm0,mm6			; mm0=data1H
-	psubd	mm2,mm7			; mm2=data2L
-	psubd	mm3,mm6			; mm3=data2H
+        movq    mm2,mm4
+        movq    mm3,mm0
+        paddd   mm4,mm7                 ; mm4=data1L
+        paddd   mm0,mm6                 ; mm0=data1H
+        psubd   mm2,mm7                 ; mm2=data2L
+        psubd   mm3,mm6                 ; mm3=data2H
 
-	movq	mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)]	; mm7=[PD_DESCALE_P2_4]
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)]       ; mm7=[PD_DESCALE_P2_4]
 
-	paddd	mm4,mm7
-	paddd	mm0,mm7
-	psrad	mm4,DESCALE_P2_4
-	psrad	mm0,DESCALE_P2_4
-	paddd	mm2,mm7
-	paddd	mm3,mm7
-	psrad	mm2,DESCALE_P2_4
-	psrad	mm3,DESCALE_P2_4
+        paddd   mm4,mm7
+        paddd   mm0,mm7
+        psrad   mm4,DESCALE_P2_4
+        psrad   mm0,DESCALE_P2_4
+        paddd   mm2,mm7
+        paddd   mm3,mm7
+        psrad   mm2,DESCALE_P2_4
+        psrad   mm3,DESCALE_P2_4
 
-	packssdw  mm4,mm0		; mm4=data1=(01 11 21 31)
-	packssdw  mm2,mm3		; mm2=data2=(02 12 22 32)
+        packssdw  mm4,mm0               ; mm4=data1=(01 11 21 31)
+        packssdw  mm2,mm3               ; mm2=data2=(02 12 22 32)
 
-	movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]	; mm6=[PB_CENTERJSAMP]
+        movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm6=[PB_CENTERJSAMP]
 
-	packsswb  mm1,mm2		; mm1=(00 10 20 30 02 12 22 32)
-	packsswb  mm4,mm5		; mm4=(01 11 21 31 03 13 23 33)
-	paddb     mm1,mm6
-	paddb     mm4,mm6
+        packsswb  mm1,mm2               ; mm1=(00 10 20 30 02 12 22 32)
+        packsswb  mm4,mm5               ; mm4=(01 11 21 31 03 13 23 33)
+        paddb     mm1,mm6
+        paddb     mm4,mm6
 
-	movq      mm7,mm1		; transpose coefficients(phase 1)
-	punpcklbw mm1,mm4		; mm1=(00 01 10 11 20 21 30 31)
-	punpckhbw mm7,mm4		; mm7=(02 03 12 13 22 23 32 33)
+        movq      mm7,mm1               ; transpose coefficients(phase 1)
+        punpcklbw mm1,mm4               ; mm1=(00 01 10 11 20 21 30 31)
+        punpckhbw mm7,mm4               ; mm7=(02 03 12 13 22 23 32 33)
 
-	movq      mm0,mm1		; transpose coefficients(phase 2)
-	punpcklwd mm1,mm7		; mm1=(00 01 02 03 10 11 12 13)
-	punpckhwd mm0,mm7		; mm0=(20 21 22 23 30 31 32 33)
+        movq      mm0,mm1               ; transpose coefficients(phase 2)
+        punpcklwd mm1,mm7               ; mm1=(00 01 02 03 10 11 12 13)
+        punpckhwd mm0,mm7               ; mm0=(20 21 22 23 30 31 32 33)
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	movd	DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
-	movd	DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movd    DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
+        movd    DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
 
-	psrlq	mm1,4*BYTE_BIT
-	psrlq	mm0,4*BYTE_BIT
+        psrlq   mm1,4*BYTE_BIT
+        psrlq   mm0,4*BYTE_BIT
 
-	mov	edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movd	DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
-	movd	DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movd    DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
+        movd    DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 
 ; --------------------------------------------------------------------------
@@ -497,210 +497,210 @@
 ;                     JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; void * dct_table
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-	align	16
-	global	EXTN(jsimd_idct_2x2_mmx)
+        align   16
+        global  EXTN(jsimd_idct_2x2_mmx)
 
 EXTN(jsimd_idct_2x2_mmx):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-	mov	edx, POINTER [dct_table(ebp)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(ebp)]		; inptr
+        mov     edx, POINTER [dct_table(ebp)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
 
-	; | input:                  | result:        |
-	; | 00 01 ** 03 ** 05 ** 07 |                |
-	; | 10 11 ** 13 ** 15 ** 17 |                |
-	; | ** ** ** ** ** ** ** ** |                |
-	; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
-	; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
-	; | 50 51 ** 53 ** 55 ** 57 |                |
-	; | ** ** ** ** ** ** ** ** |                |
-	; | 70 71 ** 73 ** 75 ** 77 |                |
+        ; | input:                  | result:        |
+        ; | 00 01 ** 03 ** 05 ** 07 |                |
+        ; | 10 11 ** 13 ** 15 ** 17 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+        ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+        ; | 50 51 ** 53 ** 55 ** 57 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 70 71 ** 73 ** 75 ** 77 |                |
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movq	mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; mm0=(10 11 ** 13), mm1=(30 31 ** 33)
-	; mm2=(50 51 ** 53), mm3=(70 71 ** 73)
+        ; mm0=(10 11 ** 13), mm1=(30 31 ** 33)
+        ; mm2=(50 51 ** 53), mm3=(70 71 ** 73)
 
-	pcmpeqd   mm7,mm7
-	pslld     mm7,WORD_BIT		; mm7={0x0000 0xFFFF 0x0000 0xFFFF}
+        pcmpeqd   mm7,mm7
+        pslld     mm7,WORD_BIT          ; mm7={0x0000 0xFFFF 0x0000 0xFFFF}
 
-	movq      mm4,mm0		; mm4=(10 11 ** 13)
-	movq      mm5,mm2		; mm5=(50 51 ** 53)
-	punpcklwd mm4,mm1		; mm4=(10 30 11 31)
-	punpcklwd mm5,mm3		; mm5=(50 70 51 71)
-	pmaddwd   mm4,[GOTOFF(ebx,PW_F362_MF127)]
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
+        movq      mm4,mm0               ; mm4=(10 11 ** 13)
+        movq      mm5,mm2               ; mm5=(50 51 ** 53)
+        punpcklwd mm4,mm1               ; mm4=(10 30 11 31)
+        punpcklwd mm5,mm3               ; mm5=(50 70 51 71)
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
 
-	psrld	mm0,WORD_BIT		; mm0=(11 -- 13 --)
-	pand	mm1,mm7			; mm1=(-- 31 -- 33)
-	psrld	mm2,WORD_BIT		; mm2=(51 -- 53 --)
-	pand	mm3,mm7			; mm3=(-- 71 -- 73)
-	por	mm0,mm1			; mm0=(11 31 13 33)
-	por	mm2,mm3			; mm2=(51 71 53 73)
-	pmaddwd	mm0,[GOTOFF(ebx,PW_F362_MF127)]
-	pmaddwd	mm2,[GOTOFF(ebx,PW_F085_MF072)]
+        psrld   mm0,WORD_BIT            ; mm0=(11 -- 13 --)
+        pand    mm1,mm7                 ; mm1=(-- 31 -- 33)
+        psrld   mm2,WORD_BIT            ; mm2=(51 -- 53 --)
+        pand    mm3,mm7                 ; mm3=(-- 71 -- 73)
+        por     mm0,mm1                 ; mm0=(11 31 13 33)
+        por     mm2,mm3                 ; mm2=(51 71 53 73)
+        pmaddwd mm0,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd mm2,[GOTOFF(ebx,PW_F085_MF072)]
 
-	paddd	mm4,mm5			; mm4=tmp0[col0 col1]
+        paddd   mm4,mm5                 ; mm4=tmp0[col0 col1]
 
-	movq	mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)]
-	pmullw	mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movq	mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)]
-	movq	mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)]
-	pmullw	mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)]
+        pmullw  mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)]
+        pmullw  mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; mm6=(** 15 ** 17), mm1=(** 35 ** 37)
-	; mm3=(** 55 ** 57), mm5=(** 75 ** 77)
+        ; mm6=(** 15 ** 17), mm1=(** 35 ** 37)
+        ; mm3=(** 55 ** 57), mm5=(** 75 ** 77)
 
-	psrld	mm6,WORD_BIT		; mm6=(15 -- 17 --)
-	pand	mm1,mm7			; mm1=(-- 35 -- 37)
-	psrld	mm3,WORD_BIT		; mm3=(55 -- 57 --)
-	pand	mm5,mm7			; mm5=(-- 75 -- 77)
-	por	mm6,mm1			; mm6=(15 35 17 37)
-	por	mm3,mm5			; mm3=(55 75 57 77)
-	pmaddwd	mm6,[GOTOFF(ebx,PW_F362_MF127)]
-	pmaddwd	mm3,[GOTOFF(ebx,PW_F085_MF072)]
+        psrld   mm6,WORD_BIT            ; mm6=(15 -- 17 --)
+        pand    mm1,mm7                 ; mm1=(-- 35 -- 37)
+        psrld   mm3,WORD_BIT            ; mm3=(55 -- 57 --)
+        pand    mm5,mm7                 ; mm5=(-- 75 -- 77)
+        por     mm6,mm1                 ; mm6=(15 35 17 37)
+        por     mm3,mm5                 ; mm3=(55 75 57 77)
+        pmaddwd mm6,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd mm3,[GOTOFF(ebx,PW_F085_MF072)]
 
-	paddd	mm0,mm2			; mm0=tmp0[col1 col3]
-	paddd	mm6,mm3			; mm6=tmp0[col5 col7]
+        paddd   mm0,mm2                 ; mm0=tmp0[col1 col3]
+        paddd   mm6,mm3                 ; mm6=tmp0[col5 col7]
 
-	; -- Even part
+        ; -- Even part
 
-	movq	mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq	mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)]
-	pmullw	mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)]
+        pmullw  mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; mm1=(00 01 ** 03), mm5=(** 05 ** 07)
+        ; mm1=(00 01 ** 03), mm5=(** 05 ** 07)
 
-	movq	mm2,mm1				; mm2=(00 01 ** 03)
-	pslld	mm1,WORD_BIT			; mm1=(-- 00 -- **)
-	psrad	mm1,(WORD_BIT-CONST_BITS-2)	; mm1=tmp10[col0 ****]
+        movq    mm2,mm1                         ; mm2=(00 01 ** 03)
+        pslld   mm1,WORD_BIT                    ; mm1=(-- 00 -- **)
+        psrad   mm1,(WORD_BIT-CONST_BITS-2)     ; mm1=tmp10[col0 ****]
 
-	pand	mm2,mm7				; mm2=(-- 01 -- 03)
-	pand	mm5,mm7				; mm5=(-- 05 -- 07)
-	psrad	mm2,(WORD_BIT-CONST_BITS-2)	; mm2=tmp10[col1 col3]
-	psrad	mm5,(WORD_BIT-CONST_BITS-2)	; mm5=tmp10[col5 col7]
+        pand    mm2,mm7                         ; mm2=(-- 01 -- 03)
+        pand    mm5,mm7                         ; mm5=(-- 05 -- 07)
+        psrad   mm2,(WORD_BIT-CONST_BITS-2)     ; mm2=tmp10[col1 col3]
+        psrad   mm5,(WORD_BIT-CONST_BITS-2)     ; mm5=tmp10[col5 col7]
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movq      mm3,mm1
-	paddd     mm1,mm4		; mm1=data0[col0 ****]=(A0 **)
-	psubd     mm3,mm4		; mm3=data1[col0 ****]=(B0 **)
-	punpckldq mm1,mm3		; mm1=(A0 B0)
+        movq      mm3,mm1
+        paddd     mm1,mm4               ; mm1=data0[col0 ****]=(A0 **)
+        psubd     mm3,mm4               ; mm3=data1[col0 ****]=(B0 **)
+        punpckldq mm1,mm3               ; mm1=(A0 B0)
 
-	movq	mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)]	; mm7=[PD_DESCALE_P1_2]
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)]       ; mm7=[PD_DESCALE_P1_2]
 
-	movq	mm4,mm2
-	movq	mm3,mm5
-	paddd	mm2,mm0			; mm2=data0[col1 col3]=(A1 A3)
-	paddd	mm5,mm6			; mm5=data0[col5 col7]=(A5 A7)
-	psubd	mm4,mm0			; mm4=data1[col1 col3]=(B1 B3)
-	psubd	mm3,mm6			; mm3=data1[col5 col7]=(B5 B7)
+        movq    mm4,mm2
+        movq    mm3,mm5
+        paddd   mm2,mm0                 ; mm2=data0[col1 col3]=(A1 A3)
+        paddd   mm5,mm6                 ; mm5=data0[col5 col7]=(A5 A7)
+        psubd   mm4,mm0                 ; mm4=data1[col1 col3]=(B1 B3)
+        psubd   mm3,mm6                 ; mm3=data1[col5 col7]=(B5 B7)
 
-	paddd	mm1,mm7
-	psrad	mm1,DESCALE_P1_2
+        paddd   mm1,mm7
+        psrad   mm1,DESCALE_P1_2
 
-	paddd	mm2,mm7
-	paddd	mm5,mm7
-	psrad	mm2,DESCALE_P1_2
-	psrad	mm5,DESCALE_P1_2
-	paddd	mm4,mm7
-	paddd	mm3,mm7
-	psrad	mm4,DESCALE_P1_2
-	psrad	mm3,DESCALE_P1_2
+        paddd   mm2,mm7
+        paddd   mm5,mm7
+        psrad   mm2,DESCALE_P1_2
+        psrad   mm5,DESCALE_P1_2
+        paddd   mm4,mm7
+        paddd   mm3,mm7
+        psrad   mm4,DESCALE_P1_2
+        psrad   mm3,DESCALE_P1_2
 
-	; ---- Pass 2: process rows, store into output array.
+        ; ---- Pass 2: process rows, store into output array.
 
-	mov	edi, JSAMPARRAY [output_buf(ebp)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(ebp)]
+        mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(ebp)]
 
-	; | input:| result:|
-	; | A0 B0 |        |
-	; | A1 B1 | C0 C1  |
-	; | A3 B3 | D0 D1  |
-	; | A5 B5 |        |
-	; | A7 B7 |        |
+        ; | input:| result:|
+        ; | A0 B0 |        |
+        ; | A1 B1 | C0 C1  |
+        ; | A3 B3 | D0 D1  |
+        ; | A5 B5 |        |
+        ; | A7 B7 |        |
 
-	; -- Odd part
+        ; -- Odd part
 
-	packssdw  mm2,mm4		; mm2=(A1 A3 B1 B3)
-	packssdw  mm5,mm3		; mm5=(A5 A7 B5 B7)
-	pmaddwd   mm2,[GOTOFF(ebx,PW_F362_MF127)]
-	pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
+        packssdw  mm2,mm4               ; mm2=(A1 A3 B1 B3)
+        packssdw  mm5,mm3               ; mm5=(A5 A7 B5 B7)
+        pmaddwd   mm2,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
 
-	paddd     mm2,mm5		; mm2=tmp0[row0 row1]
+        paddd     mm2,mm5               ; mm2=tmp0[row0 row1]
 
-	; -- Even part
+        ; -- Even part
 
-	pslld     mm1,(CONST_BITS+2)	; mm1=tmp10[row0 row1]
+        pslld     mm1,(CONST_BITS+2)    ; mm1=tmp10[row0 row1]
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movq      mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)]	; mm0=[PD_DESCALE_P2_2]
+        movq      mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)]     ; mm0=[PD_DESCALE_P2_2]
 
-	movq      mm6,mm1
-	paddd     mm1,mm2		; mm1=data0[row0 row1]=(C0 C1)
-	psubd     mm6,mm2		; mm6=data1[row0 row1]=(D0 D1)
+        movq      mm6,mm1
+        paddd     mm1,mm2               ; mm1=data0[row0 row1]=(C0 C1)
+        psubd     mm6,mm2               ; mm6=data1[row0 row1]=(D0 D1)
 
-	paddd     mm1,mm0
-	paddd     mm6,mm0
-	psrad     mm1,DESCALE_P2_2
-	psrad     mm6,DESCALE_P2_2
+        paddd     mm1,mm0
+        paddd     mm6,mm0
+        psrad     mm1,DESCALE_P2_2
+        psrad     mm6,DESCALE_P2_2
 
-	movq      mm7,mm1		; transpose coefficients
-	punpckldq mm1,mm6		; mm1=(C0 D0)
-	punpckhdq mm7,mm6		; mm7=(C1 D1)
+        movq      mm7,mm1               ; transpose coefficients
+        punpckldq mm1,mm6               ; mm1=(C0 D0)
+        punpckhdq mm7,mm6               ; mm7=(C1 D1)
 
-	packssdw  mm1,mm7		; mm1=(C0 D0 C1 D1)
-	packsswb  mm1,mm1		; mm1=(C0 D0 C1 D1 C0 D0 C1 D1)
-	paddb     mm1,[GOTOFF(ebx,PB_CENTERJSAMP)]
+        packssdw  mm1,mm7               ; mm1=(C0 D0 C1 D1)
+        packsswb  mm1,mm1               ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1)
+        paddb     mm1,[GOTOFF(ebx,PB_CENTERJSAMP)]
 
-	movd	ecx,mm1
-	movd	ebx,mm1			; ebx=(C0 D0 C1 D1)
-	shr	ecx,2*BYTE_BIT		; ecx=(C1 D1 -- --)
+        movd    ecx,mm1
+        movd    ebx,mm1                 ; ebx=(C0 D0 C1 D1)
+        shr     ecx,2*BYTE_BIT          ; ecx=(C1 D1 -- --)
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	mov	WORD [edx+eax*SIZEOF_JSAMPLE], bx
-	mov	WORD [esi+eax*SIZEOF_JSAMPLE], cx
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     WORD [edx+eax*SIZEOF_JSAMPLE], bx
+        mov     WORD [esi+eax*SIZEOF_JSAMPLE], cx
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2flt-64.asm b/simd/jiss2flt-64.asm
index 6e7e6d4..6d57a01 100644
--- a/simd/jiss2flt-64.asm
+++ b/simd/jiss2flt-64.asm
@@ -26,34 +26,34 @@
 
 ; --------------------------------------------------------------------------
 
-%macro	unpcklps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
-	shufps	%1,%2,0x44
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
 %endmacro
 
-%macro	unpckhps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
-	shufps	%1,%2,0xEE
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_float_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_float_sse2)
 
 EXTN(jconst_idct_float_sse2):
 
-PD_1_414	times 4 dd  1.414213562373095048801689
-PD_1_847	times 4 dd  1.847759065022573512256366
-PD_1_082	times 4 dd  1.082392200292393968799446
-PD_M2_613	times 4 dd -2.613125929752753055713286
-PD_RNDINT_MAGIC	times 4 dd  100663296.0	; (float)(0x00C00000 << 3)
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PD_1_414        times 4 dd  1.414213562373095048801689
+PD_1_847        times 4 dd  1.847759065022573512256366
+PD_1_082        times 4 dd  1.082392200292393968799446
+PD_M2_613       times 4 dd -2.613125929752753055713286
+PD_RNDINT_MAGIC times 4 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -67,417 +67,417 @@
 ; r12 = JSAMPARRAY output_buf
 ; r13 = JDIMENSION output_col
 
-%define original_rbp	rbp+0
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
-%define workspace	wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
-					; FAST_FLOAT workspace[DCTSIZE2]
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
 
-	align	16
-	global	EXTN(jsimd_idct_float_sse2)
+        align   16
+        global  EXTN(jsimd_idct_float_sse2)
 
 EXTN(jsimd_idct_float_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [workspace]
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [workspace]
+        collect_args
+        push    rbx
 
-	; ---- Pass 1: process columns from input, store into work array.
+        ; ---- Pass 1: process columns from input, store into work array.
 
-	mov	rdx, r10	; quantptr
-	mov	rsi, r11		; inptr
-	lea	rdi, [workspace]			; FAST_FLOAT * wsptr
-	mov	rcx, DCTSIZE/4				; ctr
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
+        lea     rdi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     rcx, DCTSIZE/4                          ; ctr
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
-	mov	eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	jnz	near .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
 
-	movq	xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movq	xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	movq	xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	movq	xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
-	movq	xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	movq	xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
-	movq	xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1,xmm2
-	por	xmm3,xmm4
-	por	xmm5,xmm6
-	por	xmm1,xmm3
-	por	xmm5,xmm7
-	por	xmm1,xmm5
-	packsswb xmm1,xmm1
-	movd	eax,xmm1
-	test	rax,rax
-	jnz	short .columnDCT
+        movq    xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1,xmm2
+        por     xmm3,xmm4
+        por     xmm5,xmm6
+        por     xmm1,xmm3
+        por     xmm5,xmm7
+        por     xmm1,xmm5
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    rax,rax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
 
-	punpcklwd xmm0,xmm0		; xmm0=(00 00 01 01 02 02 03 03)
-	psrad     xmm0,(DWORD_BIT-WORD_BIT)	; xmm0=in0=(00 01 02 03)
-	cvtdq2ps  xmm0,xmm0			; xmm0=in0=(00 01 02 03)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
 
-	mulps	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm1,xmm0
-	movaps	xmm2,xmm0
-	movaps	xmm3,xmm0
+        movaps  xmm1,xmm0
+        movaps  xmm2,xmm0
+        movaps  xmm3,xmm0
 
-	shufps	xmm0,xmm0,0x00			; xmm0=(00 00 00 00)
-	shufps	xmm1,xmm1,0x55			; xmm1=(01 01 01 01)
-	shufps	xmm2,xmm2,0xAA			; xmm2=(02 02 02 02)
-	shufps	xmm3,xmm3,0xFF			; xmm3=(03 03 03 03)
+        shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+        shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+        shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+        shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
-	jmp	near .nextcolumn
+        movaps  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        jmp     near .nextcolumn
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	movq      xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	movq      xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
-	movq      xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
 
-	punpcklwd xmm0,xmm0		; xmm0=(00 00 01 01 02 02 03 03)
-	punpcklwd xmm1,xmm1		; xmm1=(20 20 21 21 22 22 23 23)
-	psrad     xmm0,(DWORD_BIT-WORD_BIT)	; xmm0=in0=(00 01 02 03)
-	psrad     xmm1,(DWORD_BIT-WORD_BIT)	; xmm1=in2=(20 21 22 23)
-	cvtdq2ps  xmm0,xmm0			; xmm0=in0=(00 01 02 03)
-	cvtdq2ps  xmm1,xmm1			; xmm1=in2=(20 21 22 23)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpcklwd xmm1,xmm1             ; xmm1=(20 20 21 21 22 22 23 23)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in2=(20 21 22 23)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in2=(20 21 22 23)
 
-	punpcklwd xmm2,xmm2		; xmm2=(40 40 41 41 42 42 43 43)
-	punpcklwd xmm3,xmm3		; xmm3=(60 60 61 61 62 62 63 63)
-	psrad     xmm2,(DWORD_BIT-WORD_BIT)	; xmm2=in4=(40 41 42 43)
-	psrad     xmm3,(DWORD_BIT-WORD_BIT)	; xmm3=in6=(60 61 62 63)
-	cvtdq2ps  xmm2,xmm2			; xmm2=in4=(40 41 42 43)
-	cvtdq2ps  xmm3,xmm3			; xmm3=in6=(60 61 62 63)
+        punpcklwd xmm2,xmm2             ; xmm2=(40 40 41 41 42 42 43 43)
+        punpcklwd xmm3,xmm3             ; xmm3=(60 60 61 61 62 62 63 63)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in4=(40 41 42 43)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in6=(60 61 62 63)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in4=(40 41 42 43)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in6=(60 61 62 63)
 
-	mulps     xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm4,xmm0
-	movaps	xmm5,xmm1
-	subps	xmm0,xmm2		; xmm0=tmp11
-	subps	xmm1,xmm3
-	addps	xmm4,xmm2		; xmm4=tmp10
-	addps	xmm5,xmm3		; xmm5=tmp13
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
 
-	mulps	xmm1,[rel PD_1_414]
-	subps	xmm1,xmm5		; xmm1=tmp12
+        mulps   xmm1,[rel PD_1_414]
+        subps   xmm1,xmm5               ; xmm1=tmp12
 
-	movaps	xmm6,xmm4
-	movaps	xmm7,xmm0
-	subps	xmm4,xmm5		; xmm4=tmp3
-	subps	xmm0,xmm1		; xmm0=tmp2
-	addps	xmm6,xmm5		; xmm6=tmp0
-	addps	xmm7,xmm1		; xmm7=tmp1
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
 
-	movaps	XMMWORD [wk(1)], xmm4	; tmp3
-	movaps	XMMWORD [wk(0)], xmm0	; tmp2
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq      xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movq      xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	movq      xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	movq      xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
 
-	punpcklwd xmm2,xmm2		; xmm2=(10 10 11 11 12 12 13 13)
-	punpcklwd xmm3,xmm3		; xmm3=(30 30 31 31 32 32 33 33)
-	psrad     xmm2,(DWORD_BIT-WORD_BIT)	; xmm2=in1=(10 11 12 13)
-	psrad     xmm3,(DWORD_BIT-WORD_BIT)	; xmm3=in3=(30 31 32 33)
-	cvtdq2ps  xmm2,xmm2			; xmm2=in1=(10 11 12 13)
-	cvtdq2ps  xmm3,xmm3			; xmm3=in3=(30 31 32 33)
+        punpcklwd xmm2,xmm2             ; xmm2=(10 10 11 11 12 12 13 13)
+        punpcklwd xmm3,xmm3             ; xmm3=(30 30 31 31 32 32 33 33)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in1=(10 11 12 13)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in3=(30 31 32 33)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in1=(10 11 12 13)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in3=(30 31 32 33)
 
-	punpcklwd xmm5,xmm5		; xmm5=(50 50 51 51 52 52 53 53)
-	punpcklwd xmm1,xmm1		; xmm1=(70 70 71 71 72 72 73 73)
-	psrad     xmm5,(DWORD_BIT-WORD_BIT)	; xmm5=in5=(50 51 52 53)
-	psrad     xmm1,(DWORD_BIT-WORD_BIT)	; xmm1=in7=(70 71 72 73)
-	cvtdq2ps  xmm5,xmm5			; xmm5=in5=(50 51 52 53)
-	cvtdq2ps  xmm1,xmm1			; xmm1=in7=(70 71 72 73)
+        punpcklwd xmm5,xmm5             ; xmm5=(50 50 51 51 52 52 53 53)
+        punpcklwd xmm1,xmm1             ; xmm1=(70 70 71 71 72 72 73 73)
+        psrad     xmm5,(DWORD_BIT-WORD_BIT)     ; xmm5=in5=(50 51 52 53)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in7=(70 71 72 73)
+        cvtdq2ps  xmm5,xmm5                     ; xmm5=in5=(50 51 52 53)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in7=(70 71 72 73)
 
-	mulps     xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm4,xmm2
-	movaps	xmm0,xmm5
-	addps	xmm2,xmm1		; xmm2=z11
-	addps	xmm5,xmm3		; xmm5=z13
-	subps	xmm4,xmm1		; xmm4=z12
-	subps	xmm0,xmm3		; xmm0=z10
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
 
-	movaps	xmm1,xmm2
-	subps	xmm2,xmm5
-	addps	xmm1,xmm5		; xmm1=tmp7
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
 
-	mulps	xmm2,[rel PD_1_414]	; xmm2=tmp11
+        mulps   xmm2,[rel PD_1_414]     ; xmm2=tmp11
 
-	movaps	xmm3,xmm0
-	addps	xmm0,xmm4
-	mulps	xmm0,[rel PD_1_847]	; xmm0=z5
-	mulps	xmm3,[rel PD_M2_613]	; xmm3=(z10 * -2.613125930)
-	mulps	xmm4,[rel PD_1_082]	; xmm4=(z12 * 1.082392200)
-	addps	xmm3,xmm0		; xmm3=tmp12
-	subps	xmm4,xmm0		; xmm4=tmp10
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[rel PD_1_847]     ; xmm0=z5
+        mulps   xmm3,[rel PD_M2_613]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[rel PD_1_082]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	subps	xmm3,xmm1		; xmm3=tmp6
-	movaps	xmm5,xmm6
-	movaps	xmm0,xmm7
-	addps	xmm6,xmm1		; xmm6=data0=(00 01 02 03)
-	addps	xmm7,xmm3		; xmm7=data1=(10 11 12 13)
-	subps	xmm5,xmm1		; xmm5=data7=(70 71 72 73)
-	subps	xmm0,xmm3		; xmm0=data6=(60 61 62 63)
-	subps	xmm2,xmm3		; xmm2=tmp5
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+        addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+        subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+        subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+        subps   xmm2,xmm3               ; xmm2=tmp5
 
-	movaps    xmm1,xmm6		; transpose coefficients(phase 1)
-	unpcklps  xmm6,xmm7		; xmm6=(00 10 01 11)
-	unpckhps  xmm1,xmm7		; xmm1=(02 12 03 13)
-	movaps    xmm3,xmm0		; transpose coefficients(phase 1)
-	unpcklps  xmm0,xmm5		; xmm0=(60 70 61 71)
-	unpckhps  xmm3,xmm5		; xmm3=(62 72 63 73)
+        movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+        unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+        unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+        movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+        unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+        unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
 
-	movaps	xmm7, XMMWORD [wk(0)]	; xmm7=tmp2
-	movaps	xmm5, XMMWORD [wk(1)]	; xmm5=tmp3
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
 
-	movaps	XMMWORD [wk(0)], xmm0	; wk(0)=(60 70 61 71)
-	movaps	XMMWORD [wk(1)], xmm3	; wk(1)=(62 72 63 73)
+        movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+        movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
 
-	addps	xmm4,xmm2		; xmm4=tmp4
-	movaps	xmm0,xmm7
-	movaps	xmm3,xmm5
-	addps	xmm7,xmm2		; xmm7=data2=(20 21 22 23)
-	addps	xmm5,xmm4		; xmm5=data4=(40 41 42 43)
-	subps	xmm0,xmm2		; xmm0=data5=(50 51 52 53)
-	subps	xmm3,xmm4		; xmm3=data3=(30 31 32 33)
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm0,xmm7
+        movaps  xmm3,xmm5
+        addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+        addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+        subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+        subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
 
-	movaps    xmm2,xmm7		; transpose coefficients(phase 1)
-	unpcklps  xmm7,xmm3		; xmm7=(20 30 21 31)
-	unpckhps  xmm2,xmm3		; xmm2=(22 32 23 33)
-	movaps    xmm4,xmm5		; transpose coefficients(phase 1)
-	unpcklps  xmm5,xmm0		; xmm5=(40 50 41 51)
-	unpckhps  xmm4,xmm0		; xmm4=(42 52 43 53)
+        movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+        unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+        unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+        movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+        unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+        unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
 
-	movaps    xmm3,xmm6		; transpose coefficients(phase 2)
-	unpcklps2 xmm6,xmm7		; xmm6=(00 10 20 30)
-	unpckhps2 xmm3,xmm7		; xmm3=(01 11 21 31)
-	movaps    xmm0,xmm1		; transpose coefficients(phase 2)
-	unpcklps2 xmm1,xmm2		; xmm1=(02 12 22 32)
-	unpckhps2 xmm0,xmm2		; xmm0=(03 13 23 33)
+        movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+        unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+        movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+        unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
 
-	movaps	xmm7, XMMWORD [wk(0)]	; xmm7=(60 70 61 71)
-	movaps	xmm2, XMMWORD [wk(1)]	; xmm2=(62 72 63 73)
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+        movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6
-	movaps	XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
 
-	movaps    xmm6,xmm5		; transpose coefficients(phase 2)
-	unpcklps2 xmm5,xmm7		; xmm5=(40 50 60 70)
-	unpckhps2 xmm6,xmm7		; xmm6=(41 51 61 71)
-	movaps    xmm3,xmm4		; transpose coefficients(phase 2)
-	unpcklps2 xmm4,xmm2		; xmm4=(42 52 62 72)
-	unpckhps2 xmm3,xmm2		; xmm3=(43 53 63 73)
+        movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+        unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+        unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+        movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+        unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
 
-	movaps	XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
-	movaps	XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4
-	movaps	XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
 
 .nextcolumn:
-	add	rsi, byte 4*SIZEOF_JCOEF		; coef_block
-	add	rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE	; quantptr
-	add	rdi,      4*DCTSIZE*SIZEOF_FAST_FLOAT	; wsptr
-	dec	rcx					; ctr
-	jnz	near .columnloop
+        add     rsi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     rdi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     rcx                                     ; ctr
+        jnz     near .columnloop
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
-	prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
-	prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
-	prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	rax, [original_rbp]
-	lea	rsi, [workspace]			; FAST_FLOAT * wsptr
-	mov	rdi, r12	; (JSAMPROW *)
-	mov	rax, r13
-	mov	rcx, DCTSIZE/4				; ctr
+        mov     rax, [original_rbp]
+        lea     rsi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
+        mov     rcx, DCTSIZE/4                          ; ctr
 .rowloop:
 
-	; -- Even part
+        ; -- Even part
 
-	movaps	xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)]
 
-	movaps	xmm4,xmm0
-	movaps	xmm5,xmm1
-	subps	xmm0,xmm2		; xmm0=tmp11
-	subps	xmm1,xmm3
-	addps	xmm4,xmm2		; xmm4=tmp10
-	addps	xmm5,xmm3		; xmm5=tmp13
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
 
-	mulps	xmm1,[rel PD_1_414]
-	subps	xmm1,xmm5		; xmm1=tmp12
+        mulps   xmm1,[rel PD_1_414]
+        subps   xmm1,xmm5               ; xmm1=tmp12
 
-	movaps	xmm6,xmm4
-	movaps	xmm7,xmm0
-	subps	xmm4,xmm5		; xmm4=tmp3
-	subps	xmm0,xmm1		; xmm0=tmp2
-	addps	xmm6,xmm5		; xmm6=tmp0
-	addps	xmm7,xmm1		; xmm7=tmp1
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
 
-	movaps	XMMWORD [wk(1)], xmm4	; tmp3
-	movaps	XMMWORD [wk(0)], xmm0	; tmp2
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movaps	xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)]
 
-	movaps	xmm4,xmm2
-	movaps	xmm0,xmm5
-	addps	xmm2,xmm1		; xmm2=z11
-	addps	xmm5,xmm3		; xmm5=z13
-	subps	xmm4,xmm1		; xmm4=z12
-	subps	xmm0,xmm3		; xmm0=z10
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
 
-	movaps	xmm1,xmm2
-	subps	xmm2,xmm5
-	addps	xmm1,xmm5		; xmm1=tmp7
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
 
-	mulps	xmm2,[rel PD_1_414]	; xmm2=tmp11
+        mulps   xmm2,[rel PD_1_414]     ; xmm2=tmp11
 
-	movaps	xmm3,xmm0
-	addps	xmm0,xmm4
-	mulps	xmm0,[rel PD_1_847]	; xmm0=z5
-	mulps	xmm3,[rel PD_M2_613]	; xmm3=(z10 * -2.613125930)
-	mulps	xmm4,[rel PD_1_082]	; xmm4=(z12 * 1.082392200)
-	addps	xmm3,xmm0		; xmm3=tmp12
-	subps	xmm4,xmm0		; xmm4=tmp10
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[rel PD_1_847]     ; xmm0=z5
+        mulps   xmm3,[rel PD_M2_613]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[rel PD_1_082]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	subps	xmm3,xmm1		; xmm3=tmp6
-	movaps	xmm5,xmm6
-	movaps	xmm0,xmm7
-	addps	xmm6,xmm1		; xmm6=data0=(00 10 20 30)
-	addps	xmm7,xmm3		; xmm7=data1=(01 11 21 31)
-	subps	xmm5,xmm1		; xmm5=data7=(07 17 27 37)
-	subps	xmm0,xmm3		; xmm0=data6=(06 16 26 36)
-	subps	xmm2,xmm3		; xmm2=tmp5
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+        addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+        subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+        subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+        subps   xmm2,xmm3               ; xmm2=tmp5
 
-	movaps	xmm1,[rel PD_RNDINT_MAGIC]	; xmm1=[rel PD_RNDINT_MAGIC]
-	pcmpeqd	xmm3,xmm3
-	psrld	xmm3,WORD_BIT		; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+        movaps  xmm1,[rel PD_RNDINT_MAGIC]      ; xmm1=[rel PD_RNDINT_MAGIC]
+        pcmpeqd xmm3,xmm3
+        psrld   xmm3,WORD_BIT           ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
 
-	addps	xmm6,xmm1	; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
-	addps	xmm7,xmm1	; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
-	addps	xmm0,xmm1	; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
-	addps	xmm5,xmm1	; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
+        addps   xmm6,xmm1       ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
+        addps   xmm7,xmm1       ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
+        addps   xmm0,xmm1       ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
+        addps   xmm5,xmm1       ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
 
-	pand	xmm6,xmm3		; xmm6=(00 -- 10 -- 20 -- 30 --)
-	pslld	xmm7,WORD_BIT		; xmm7=(-- 01 -- 11 -- 21 -- 31)
-	pand	xmm0,xmm3		; xmm0=(06 -- 16 -- 26 -- 36 --)
-	pslld	xmm5,WORD_BIT		; xmm5=(-- 07 -- 17 -- 27 -- 37)
-	por	xmm6,xmm7		; xmm6=(00 01 10 11 20 21 30 31)
-	por	xmm0,xmm5		; xmm0=(06 07 16 17 26 27 36 37)
+        pand    xmm6,xmm3               ; xmm6=(00 -- 10 -- 20 -- 30 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 01 -- 11 -- 21 -- 31)
+        pand    xmm0,xmm3               ; xmm0=(06 -- 16 -- 26 -- 36 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 07 -- 17 -- 27 -- 37)
+        por     xmm6,xmm7               ; xmm6=(00 01 10 11 20 21 30 31)
+        por     xmm0,xmm5               ; xmm0=(06 07 16 17 26 27 36 37)
 
-	movaps	xmm1, XMMWORD [wk(0)]	; xmm1=tmp2
-	movaps	xmm3, XMMWORD [wk(1)]	; xmm3=tmp3
+        movaps  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp2
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=tmp3
 
-	addps	xmm4,xmm2		; xmm4=tmp4
-	movaps	xmm7,xmm1
-	movaps	xmm5,xmm3
-	addps	xmm1,xmm2		; xmm1=data2=(02 12 22 32)
-	addps	xmm3,xmm4		; xmm3=data4=(04 14 24 34)
-	subps	xmm7,xmm2		; xmm7=data5=(05 15 25 35)
-	subps	xmm5,xmm4		; xmm5=data3=(03 13 23 33)
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm7,xmm1
+        movaps  xmm5,xmm3
+        addps   xmm1,xmm2               ; xmm1=data2=(02 12 22 32)
+        addps   xmm3,xmm4               ; xmm3=data4=(04 14 24 34)
+        subps   xmm7,xmm2               ; xmm7=data5=(05 15 25 35)
+        subps   xmm5,xmm4               ; xmm5=data3=(03 13 23 33)
 
-	movaps	xmm2,[rel PD_RNDINT_MAGIC]	; xmm2=[rel PD_RNDINT_MAGIC]
-	pcmpeqd	xmm4,xmm4
-	psrld	xmm4,WORD_BIT		; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+        movaps  xmm2,[rel PD_RNDINT_MAGIC]      ; xmm2=[rel PD_RNDINT_MAGIC]
+        pcmpeqd xmm4,xmm4
+        psrld   xmm4,WORD_BIT           ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
 
-	addps	xmm3,xmm2	; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
-	addps	xmm7,xmm2	; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
-	addps	xmm1,xmm2	; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
-	addps	xmm5,xmm2	; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
+        addps   xmm3,xmm2       ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
+        addps   xmm7,xmm2       ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
+        addps   xmm1,xmm2       ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
+        addps   xmm5,xmm2       ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
 
-	pand	xmm3,xmm4		; xmm3=(04 -- 14 -- 24 -- 34 --)
-	pslld	xmm7,WORD_BIT		; xmm7=(-- 05 -- 15 -- 25 -- 35)
-	pand	xmm1,xmm4		; xmm1=(02 -- 12 -- 22 -- 32 --)
-	pslld	xmm5,WORD_BIT		; xmm5=(-- 03 -- 13 -- 23 -- 33)
-	por	xmm3,xmm7		; xmm3=(04 05 14 15 24 25 34 35)
-	por	xmm1,xmm5		; xmm1=(02 03 12 13 22 23 32 33)
+        pand    xmm3,xmm4               ; xmm3=(04 -- 14 -- 24 -- 34 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 05 -- 15 -- 25 -- 35)
+        pand    xmm1,xmm4               ; xmm1=(02 -- 12 -- 22 -- 32 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 03 -- 13 -- 23 -- 33)
+        por     xmm3,xmm7               ; xmm3=(04 05 14 15 24 25 34 35)
+        por     xmm1,xmm5               ; xmm1=(02 03 12 13 22 23 32 33)
 
-	movdqa    xmm2,[rel PB_CENTERJSAMP]	; xmm2=[rel PB_CENTERJSAMP]
+        movdqa    xmm2,[rel PB_CENTERJSAMP]     ; xmm2=[rel PB_CENTERJSAMP]
 
-	packsswb  xmm6,xmm3	; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
-	packsswb  xmm1,xmm0	; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
-	paddb     xmm6,xmm2
-	paddb     xmm1,xmm2
+        packsswb  xmm6,xmm3     ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
+        packsswb  xmm1,xmm0     ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
+        paddb     xmm6,xmm2
+        paddb     xmm1,xmm2
 
-	movdqa    xmm4,xmm6	; transpose coefficients(phase 2)
-	punpcklwd xmm6,xmm1	; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
-	punpckhwd xmm4,xmm1	; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        movdqa    xmm4,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm1     ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm1     ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
 
-	movdqa    xmm7,xmm6	; transpose coefficients(phase 3)
-	punpckldq xmm6,xmm4	; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
-	punpckhdq xmm7,xmm4	; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm7,xmm6     ; transpose coefficients(phase 3)
+        punpckldq xmm6,xmm4     ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm7,xmm4     ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
 
-	pshufd	xmm5,xmm6,0x4E	; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
-	pshufd	xmm3,xmm7,0x4E	; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm5,xmm6,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm3,xmm7,0x4E  ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
 
-	mov	rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
-	mov	rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
-	movq	XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7
-	mov	rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
-	mov	rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
-	movq	XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7
+        mov     rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3
 
-	add	rsi, byte 4*SIZEOF_FAST_FLOAT	; wsptr
-	add	rdi, byte 4*SIZEOF_JSAMPROW
-	dec	rcx				; ctr
-	jnz	near .rowloop
+        add     rsi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+        add     rdi, byte 4*SIZEOF_JSAMPROW
+        dec     rcx                             ; ctr
+        jnz     near .rowloop
 
-	pop	rbx
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2flt.asm b/simd/jiss2flt.asm
index 17bc363..86c9056 100644
--- a/simd/jiss2flt.asm
+++ b/simd/jiss2flt.asm
@@ -25,34 +25,34 @@
 
 ; --------------------------------------------------------------------------
 
-%macro	unpcklps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
-	shufps	%1,%2,0x44
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
 %endmacro
 
-%macro	unpckhps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
-	shufps	%1,%2,0xEE
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_float_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_float_sse2)
 
 EXTN(jconst_idct_float_sse2):
 
-PD_1_414	times 4 dd  1.414213562373095048801689
-PD_1_847	times 4 dd  1.847759065022573512256366
-PD_1_082	times 4 dd  1.082392200292393968799446
-PD_M2_613	times 4 dd -2.613125929752753055713286
-PD_RNDINT_MAGIC	times 4 dd  100663296.0	; (float)(0x00C00000 << 3)
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PD_1_414        times 4 dd  1.414213562373095048801689
+PD_1_847        times 4 dd  1.847759065022573512256366
+PD_1_082        times 4 dd  1.082392200292393968799446
+PD_M2_613       times 4 dd -2.613125929752753055713286
+PD_RNDINT_MAGIC times 4 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -61,438 +61,438 @@
 ;                        JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; void * dct_table
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
-%define workspace	wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
-					; FAST_FLOAT workspace[DCTSIZE2]
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
 
-	align	16
-	global	EXTN(jsimd_idct_float_sse2)
+        align   16
+        global  EXTN(jsimd_idct_float_sse2)
 
 EXTN(jsimd_idct_float_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [workspace]
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input, store into work array.
+        ; ---- Pass 1: process columns from input, store into work array.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
-	lea	edi, [workspace]			; FAST_FLOAT * wsptr
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	near .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
 
-	movq	xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movq	xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	movq	xmm4, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movq	xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq	xmm6, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	movq	xmm7, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	xmm1,xmm2
-	por	xmm3,xmm4
-	por	xmm5,xmm6
-	por	xmm1,xmm3
-	por	xmm5,xmm7
-	por	xmm1,xmm5
-	packsswb xmm1,xmm1
-	movd	eax,xmm1
-	test	eax,eax
-	jnz	short .columnDCT
+        movq    xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    xmm4, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    xmm6, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movq    xmm7, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm1,xmm2
+        por     xmm3,xmm4
+        por     xmm5,xmm6
+        por     xmm1,xmm3
+        por     xmm5,xmm7
+        por     xmm1,xmm5
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
 
-	punpcklwd xmm0,xmm0		; xmm0=(00 00 01 01 02 02 03 03)
-	psrad     xmm0,(DWORD_BIT-WORD_BIT)	; xmm0=in0=(00 01 02 03)
-	cvtdq2ps  xmm0,xmm0			; xmm0=in0=(00 01 02 03)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
 
-	mulps	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm1,xmm0
-	movaps	xmm2,xmm0
-	movaps	xmm3,xmm0
+        movaps  xmm1,xmm0
+        movaps  xmm2,xmm0
+        movaps  xmm3,xmm0
 
-	shufps	xmm0,xmm0,0x00			; xmm0=(00 00 00 00)
-	shufps	xmm1,xmm1,0x55			; xmm1=(01 01 01 01)
-	shufps	xmm2,xmm2,0xAA			; xmm2=(02 02 02 02)
-	shufps	xmm3,xmm3,0xFF			; xmm3=(03 03 03 03)
+        shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+        shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+        shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+        shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
-	jmp	near .nextcolumn
-	alignx	16,7
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+        jmp     near .nextcolumn
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq      xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movq      xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movq      xmm3, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq      xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
 
-	punpcklwd xmm0,xmm0		; xmm0=(00 00 01 01 02 02 03 03)
-	punpcklwd xmm1,xmm1		; xmm1=(20 20 21 21 22 22 23 23)
-	psrad     xmm0,(DWORD_BIT-WORD_BIT)	; xmm0=in0=(00 01 02 03)
-	psrad     xmm1,(DWORD_BIT-WORD_BIT)	; xmm1=in2=(20 21 22 23)
-	cvtdq2ps  xmm0,xmm0			; xmm0=in0=(00 01 02 03)
-	cvtdq2ps  xmm1,xmm1			; xmm1=in2=(20 21 22 23)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpcklwd xmm1,xmm1             ; xmm1=(20 20 21 21 22 22 23 23)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in2=(20 21 22 23)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in2=(20 21 22 23)
 
-	punpcklwd xmm2,xmm2		; xmm2=(40 40 41 41 42 42 43 43)
-	punpcklwd xmm3,xmm3		; xmm3=(60 60 61 61 62 62 63 63)
-	psrad     xmm2,(DWORD_BIT-WORD_BIT)	; xmm2=in4=(40 41 42 43)
-	psrad     xmm3,(DWORD_BIT-WORD_BIT)	; xmm3=in6=(60 61 62 63)
-	cvtdq2ps  xmm2,xmm2			; xmm2=in4=(40 41 42 43)
-	cvtdq2ps  xmm3,xmm3			; xmm3=in6=(60 61 62 63)
+        punpcklwd xmm2,xmm2             ; xmm2=(40 40 41 41 42 42 43 43)
+        punpcklwd xmm3,xmm3             ; xmm3=(60 60 61 61 62 62 63 63)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in4=(40 41 42 43)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in6=(60 61 62 63)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in4=(40 41 42 43)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in6=(60 61 62 63)
 
-	mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm4,xmm0
-	movaps	xmm5,xmm1
-	subps	xmm0,xmm2		; xmm0=tmp11
-	subps	xmm1,xmm3
-	addps	xmm4,xmm2		; xmm4=tmp10
-	addps	xmm5,xmm3		; xmm5=tmp13
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
 
-	mulps	xmm1,[GOTOFF(ebx,PD_1_414)]
-	subps	xmm1,xmm5		; xmm1=tmp12
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
 
-	movaps	xmm6,xmm4
-	movaps	xmm7,xmm0
-	subps	xmm4,xmm5		; xmm4=tmp3
-	subps	xmm0,xmm1		; xmm0=tmp2
-	addps	xmm6,xmm5		; xmm6=tmp0
-	addps	xmm7,xmm1		; xmm7=tmp1
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
 
-	movaps	XMMWORD [wk(1)], xmm4	; tmp3
-	movaps	XMMWORD [wk(0)], xmm0	; tmp2
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq      xmm2, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq      xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	movq      xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq      xmm1, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        movq      xmm2, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq      xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
 
-	punpcklwd xmm2,xmm2		; xmm2=(10 10 11 11 12 12 13 13)
-	punpcklwd xmm3,xmm3		; xmm3=(30 30 31 31 32 32 33 33)
-	psrad     xmm2,(DWORD_BIT-WORD_BIT)	; xmm2=in1=(10 11 12 13)
-	psrad     xmm3,(DWORD_BIT-WORD_BIT)	; xmm3=in3=(30 31 32 33)
-	cvtdq2ps  xmm2,xmm2			; xmm2=in1=(10 11 12 13)
-	cvtdq2ps  xmm3,xmm3			; xmm3=in3=(30 31 32 33)
+        punpcklwd xmm2,xmm2             ; xmm2=(10 10 11 11 12 12 13 13)
+        punpcklwd xmm3,xmm3             ; xmm3=(30 30 31 31 32 32 33 33)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in1=(10 11 12 13)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in3=(30 31 32 33)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in1=(10 11 12 13)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in3=(30 31 32 33)
 
-	punpcklwd xmm5,xmm5		; xmm5=(50 50 51 51 52 52 53 53)
-	punpcklwd xmm1,xmm1		; xmm1=(70 70 71 71 72 72 73 73)
-	psrad     xmm5,(DWORD_BIT-WORD_BIT)	; xmm5=in5=(50 51 52 53)
-	psrad     xmm1,(DWORD_BIT-WORD_BIT)	; xmm1=in7=(70 71 72 73)
-	cvtdq2ps  xmm5,xmm5			; xmm5=in5=(50 51 52 53)
-	cvtdq2ps  xmm1,xmm1			; xmm1=in7=(70 71 72 73)
+        punpcklwd xmm5,xmm5             ; xmm5=(50 50 51 51 52 52 53 53)
+        punpcklwd xmm1,xmm1             ; xmm1=(70 70 71 71 72 72 73 73)
+        psrad     xmm5,(DWORD_BIT-WORD_BIT)     ; xmm5=in5=(50 51 52 53)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in7=(70 71 72 73)
+        cvtdq2ps  xmm5,xmm5                     ; xmm5=in5=(50 51 52 53)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in7=(70 71 72 73)
 
-	mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm4,xmm2
-	movaps	xmm0,xmm5
-	addps	xmm2,xmm1		; xmm2=z11
-	addps	xmm5,xmm3		; xmm5=z13
-	subps	xmm4,xmm1		; xmm4=z12
-	subps	xmm0,xmm3		; xmm0=z10
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
 
-	movaps	xmm1,xmm2
-	subps	xmm2,xmm5
-	addps	xmm1,xmm5		; xmm1=tmp7
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
 
-	mulps	xmm2,[GOTOFF(ebx,PD_1_414)]	; xmm2=tmp11
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
 
-	movaps	xmm3,xmm0
-	addps	xmm0,xmm4
-	mulps	xmm0,[GOTOFF(ebx,PD_1_847)]	; xmm0=z5
-	mulps	xmm3,[GOTOFF(ebx,PD_M2_613)]	; xmm3=(z10 * -2.613125930)
-	mulps	xmm4,[GOTOFF(ebx,PD_1_082)]	; xmm4=(z12 * 1.082392200)
-	addps	xmm3,xmm0		; xmm3=tmp12
-	subps	xmm4,xmm0		; xmm4=tmp10
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	subps	xmm3,xmm1		; xmm3=tmp6
-	movaps	xmm5,xmm6
-	movaps	xmm0,xmm7
-	addps	xmm6,xmm1		; xmm6=data0=(00 01 02 03)
-	addps	xmm7,xmm3		; xmm7=data1=(10 11 12 13)
-	subps	xmm5,xmm1		; xmm5=data7=(70 71 72 73)
-	subps	xmm0,xmm3		; xmm0=data6=(60 61 62 63)
-	subps	xmm2,xmm3		; xmm2=tmp5
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+        addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+        subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+        subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+        subps   xmm2,xmm3               ; xmm2=tmp5
 
-	movaps    xmm1,xmm6		; transpose coefficients(phase 1)
-	unpcklps  xmm6,xmm7		; xmm6=(00 10 01 11)
-	unpckhps  xmm1,xmm7		; xmm1=(02 12 03 13)
-	movaps    xmm3,xmm0		; transpose coefficients(phase 1)
-	unpcklps  xmm0,xmm5		; xmm0=(60 70 61 71)
-	unpckhps  xmm3,xmm5		; xmm3=(62 72 63 73)
+        movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+        unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+        unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+        movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+        unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+        unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
 
-	movaps	xmm7, XMMWORD [wk(0)]	; xmm7=tmp2
-	movaps	xmm5, XMMWORD [wk(1)]	; xmm5=tmp3
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
 
-	movaps	XMMWORD [wk(0)], xmm0	; wk(0)=(60 70 61 71)
-	movaps	XMMWORD [wk(1)], xmm3	; wk(1)=(62 72 63 73)
+        movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+        movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
 
-	addps	xmm4,xmm2		; xmm4=tmp4
-	movaps	xmm0,xmm7
-	movaps	xmm3,xmm5
-	addps	xmm7,xmm2		; xmm7=data2=(20 21 22 23)
-	addps	xmm5,xmm4		; xmm5=data4=(40 41 42 43)
-	subps	xmm0,xmm2		; xmm0=data5=(50 51 52 53)
-	subps	xmm3,xmm4		; xmm3=data3=(30 31 32 33)
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm0,xmm7
+        movaps  xmm3,xmm5
+        addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+        addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+        subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+        subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
 
-	movaps    xmm2,xmm7		; transpose coefficients(phase 1)
-	unpcklps  xmm7,xmm3		; xmm7=(20 30 21 31)
-	unpckhps  xmm2,xmm3		; xmm2=(22 32 23 33)
-	movaps    xmm4,xmm5		; transpose coefficients(phase 1)
-	unpcklps  xmm5,xmm0		; xmm5=(40 50 41 51)
-	unpckhps  xmm4,xmm0		; xmm4=(42 52 43 53)
+        movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+        unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+        unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+        movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+        unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+        unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
 
-	movaps    xmm3,xmm6		; transpose coefficients(phase 2)
-	unpcklps2 xmm6,xmm7		; xmm6=(00 10 20 30)
-	unpckhps2 xmm3,xmm7		; xmm3=(01 11 21 31)
-	movaps    xmm0,xmm1		; transpose coefficients(phase 2)
-	unpcklps2 xmm1,xmm2		; xmm1=(02 12 22 32)
-	unpckhps2 xmm0,xmm2		; xmm0=(03 13 23 33)
+        movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+        unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+        movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+        unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
 
-	movaps	xmm7, XMMWORD [wk(0)]	; xmm7=(60 70 61 71)
-	movaps	xmm2, XMMWORD [wk(1)]	; xmm2=(62 72 63 73)
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+        movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
-	movaps	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
 
-	movaps    xmm6,xmm5		; transpose coefficients(phase 2)
-	unpcklps2 xmm5,xmm7		; xmm5=(40 50 60 70)
-	unpckhps2 xmm6,xmm7		; xmm6=(41 51 61 71)
-	movaps    xmm3,xmm4		; transpose coefficients(phase 2)
-	unpcklps2 xmm4,xmm2		; xmm4=(42 52 62 72)
-	unpckhps2 xmm3,xmm2		; xmm3=(43 53 63 73)
+        movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+        unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+        unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+        movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+        unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
 
-	movaps	XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
-	movaps	XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
-	movaps	XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
 
 .nextcolumn:
-	add	esi, byte 4*SIZEOF_JCOEF		; coef_block
-	add	edx, byte 4*SIZEOF_FLOAT_MULT_TYPE	; quantptr
-	add	edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT	; wsptr
-	dec	ecx					; ctr
-	jnz	near .columnloop
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	lea	esi, [workspace]			; FAST_FLOAT * wsptr
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .rowloop:
 
-	; -- Even part
+        ; -- Even part
 
-	movaps	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
 
-	movaps	xmm4,xmm0
-	movaps	xmm5,xmm1
-	subps	xmm0,xmm2		; xmm0=tmp11
-	subps	xmm1,xmm3
-	addps	xmm4,xmm2		; xmm4=tmp10
-	addps	xmm5,xmm3		; xmm5=tmp13
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
 
-	mulps	xmm1,[GOTOFF(ebx,PD_1_414)]
-	subps	xmm1,xmm5		; xmm1=tmp12
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
 
-	movaps	xmm6,xmm4
-	movaps	xmm7,xmm0
-	subps	xmm4,xmm5		; xmm4=tmp3
-	subps	xmm0,xmm1		; xmm0=tmp2
-	addps	xmm6,xmm5		; xmm6=tmp0
-	addps	xmm7,xmm1		; xmm7=tmp1
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
 
-	movaps	XMMWORD [wk(1)], xmm4	; tmp3
-	movaps	XMMWORD [wk(0)], xmm0	; tmp2
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movaps	xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
 
-	movaps	xmm4,xmm2
-	movaps	xmm0,xmm5
-	addps	xmm2,xmm1		; xmm2=z11
-	addps	xmm5,xmm3		; xmm5=z13
-	subps	xmm4,xmm1		; xmm4=z12
-	subps	xmm0,xmm3		; xmm0=z10
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
 
-	movaps	xmm1,xmm2
-	subps	xmm2,xmm5
-	addps	xmm1,xmm5		; xmm1=tmp7
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
 
-	mulps	xmm2,[GOTOFF(ebx,PD_1_414)]	; xmm2=tmp11
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
 
-	movaps	xmm3,xmm0
-	addps	xmm0,xmm4
-	mulps	xmm0,[GOTOFF(ebx,PD_1_847)]	; xmm0=z5
-	mulps	xmm3,[GOTOFF(ebx,PD_M2_613)]	; xmm3=(z10 * -2.613125930)
-	mulps	xmm4,[GOTOFF(ebx,PD_1_082)]	; xmm4=(z12 * 1.082392200)
-	addps	xmm3,xmm0		; xmm3=tmp12
-	subps	xmm4,xmm0		; xmm4=tmp10
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	subps	xmm3,xmm1		; xmm3=tmp6
-	movaps	xmm5,xmm6
-	movaps	xmm0,xmm7
-	addps	xmm6,xmm1		; xmm6=data0=(00 10 20 30)
-	addps	xmm7,xmm3		; xmm7=data1=(01 11 21 31)
-	subps	xmm5,xmm1		; xmm5=data7=(07 17 27 37)
-	subps	xmm0,xmm3		; xmm0=data6=(06 16 26 36)
-	subps	xmm2,xmm3		; xmm2=tmp5
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+        addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+        subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+        subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+        subps   xmm2,xmm3               ; xmm2=tmp5
 
-	movaps	xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]	; xmm1=[PD_RNDINT_MAGIC]
-	pcmpeqd	xmm3,xmm3
-	psrld	xmm3,WORD_BIT		; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+        movaps  xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]      ; xmm1=[PD_RNDINT_MAGIC]
+        pcmpeqd xmm3,xmm3
+        psrld   xmm3,WORD_BIT           ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
 
-	addps	xmm6,xmm1	; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
-	addps	xmm7,xmm1	; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
-	addps	xmm0,xmm1	; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
-	addps	xmm5,xmm1	; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
+        addps   xmm6,xmm1       ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
+        addps   xmm7,xmm1       ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
+        addps   xmm0,xmm1       ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
+        addps   xmm5,xmm1       ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
 
-	pand	xmm6,xmm3		; xmm6=(00 -- 10 -- 20 -- 30 --)
-	pslld	xmm7,WORD_BIT		; xmm7=(-- 01 -- 11 -- 21 -- 31)
-	pand	xmm0,xmm3		; xmm0=(06 -- 16 -- 26 -- 36 --)
-	pslld	xmm5,WORD_BIT		; xmm5=(-- 07 -- 17 -- 27 -- 37)
-	por	xmm6,xmm7		; xmm6=(00 01 10 11 20 21 30 31)
-	por	xmm0,xmm5		; xmm0=(06 07 16 17 26 27 36 37)
+        pand    xmm6,xmm3               ; xmm6=(00 -- 10 -- 20 -- 30 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 01 -- 11 -- 21 -- 31)
+        pand    xmm0,xmm3               ; xmm0=(06 -- 16 -- 26 -- 36 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 07 -- 17 -- 27 -- 37)
+        por     xmm6,xmm7               ; xmm6=(00 01 10 11 20 21 30 31)
+        por     xmm0,xmm5               ; xmm0=(06 07 16 17 26 27 36 37)
 
-	movaps	xmm1, XMMWORD [wk(0)]	; xmm1=tmp2
-	movaps	xmm3, XMMWORD [wk(1)]	; xmm3=tmp3
+        movaps  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp2
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=tmp3
 
-	addps	xmm4,xmm2		; xmm4=tmp4
-	movaps	xmm7,xmm1
-	movaps	xmm5,xmm3
-	addps	xmm1,xmm2		; xmm1=data2=(02 12 22 32)
-	addps	xmm3,xmm4		; xmm3=data4=(04 14 24 34)
-	subps	xmm7,xmm2		; xmm7=data5=(05 15 25 35)
-	subps	xmm5,xmm4		; xmm5=data3=(03 13 23 33)
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm7,xmm1
+        movaps  xmm5,xmm3
+        addps   xmm1,xmm2               ; xmm1=data2=(02 12 22 32)
+        addps   xmm3,xmm4               ; xmm3=data4=(04 14 24 34)
+        subps   xmm7,xmm2               ; xmm7=data5=(05 15 25 35)
+        subps   xmm5,xmm4               ; xmm5=data3=(03 13 23 33)
 
-	movaps	xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]	; xmm2=[PD_RNDINT_MAGIC]
-	pcmpeqd	xmm4,xmm4
-	psrld	xmm4,WORD_BIT		; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+        movaps  xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]      ; xmm2=[PD_RNDINT_MAGIC]
+        pcmpeqd xmm4,xmm4
+        psrld   xmm4,WORD_BIT           ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
 
-	addps	xmm3,xmm2	; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
-	addps	xmm7,xmm2	; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
-	addps	xmm1,xmm2	; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
-	addps	xmm5,xmm2	; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
+        addps   xmm3,xmm2       ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
+        addps   xmm7,xmm2       ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
+        addps   xmm1,xmm2       ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
+        addps   xmm5,xmm2       ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
 
-	pand	xmm3,xmm4		; xmm3=(04 -- 14 -- 24 -- 34 --)
-	pslld	xmm7,WORD_BIT		; xmm7=(-- 05 -- 15 -- 25 -- 35)
-	pand	xmm1,xmm4		; xmm1=(02 -- 12 -- 22 -- 32 --)
-	pslld	xmm5,WORD_BIT		; xmm5=(-- 03 -- 13 -- 23 -- 33)
-	por	xmm3,xmm7		; xmm3=(04 05 14 15 24 25 34 35)
-	por	xmm1,xmm5		; xmm1=(02 03 12 13 22 23 32 33)
+        pand    xmm3,xmm4               ; xmm3=(04 -- 14 -- 24 -- 34 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 05 -- 15 -- 25 -- 35)
+        pand    xmm1,xmm4               ; xmm1=(02 -- 12 -- 22 -- 32 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 03 -- 13 -- 23 -- 33)
+        por     xmm3,xmm7               ; xmm3=(04 05 14 15 24 25 34 35)
+        por     xmm1,xmm5               ; xmm1=(02 03 12 13 22 23 32 33)
 
-	movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]	; xmm2=[PB_CENTERJSAMP]
+        movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm2=[PB_CENTERJSAMP]
 
-	packsswb  xmm6,xmm3	; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
-	packsswb  xmm1,xmm0	; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
-	paddb     xmm6,xmm2
-	paddb     xmm1,xmm2
+        packsswb  xmm6,xmm3     ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
+        packsswb  xmm1,xmm0     ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
+        paddb     xmm6,xmm2
+        paddb     xmm1,xmm2
 
-	movdqa    xmm4,xmm6	; transpose coefficients(phase 2)
-	punpcklwd xmm6,xmm1	; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
-	punpckhwd xmm4,xmm1	; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        movdqa    xmm4,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm1     ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm1     ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
 
-	movdqa    xmm7,xmm6	; transpose coefficients(phase 3)
-	punpckldq xmm6,xmm4	; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
-	punpckhdq xmm7,xmm4	; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm7,xmm6     ; transpose coefficients(phase 3)
+        punpckldq xmm6,xmm4     ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm7,xmm4     ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
 
-	pshufd	xmm5,xmm6,0x4E	; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
-	pshufd	xmm3,xmm7,0x4E	; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm5,xmm6,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm3,xmm7,0x4E  ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
 
-	pushpic	ebx			; save GOT address
+        pushpic ebx                     ; save GOT address
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
-	movq	XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7
-	mov	edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
-	movq	XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
 
-	poppic	ebx			; restore GOT address
+        poppic  ebx                     ; restore GOT address
 
-	add	esi, byte 4*SIZEOF_FAST_FLOAT	; wsptr
-	add	edi, byte 4*SIZEOF_JSAMPROW
-	dec	ecx				; ctr
-	jnz	near .rowloop
+        add     esi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2fst-64.asm b/simd/jiss2fst-64.asm
index 0887505..432e289 100644
--- a/simd/jiss2fst-64.asm
+++ b/simd/jiss2fst-64.asm
@@ -27,31 +27,31 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	8	; 14 is also OK.
-%define PASS1_BITS	2
+%define CONST_BITS      8       ; 14 is also OK.
+%define PASS1_BITS      2
 
 %if IFAST_SCALE_BITS != PASS1_BITS
 %error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
 %endif
 
 %if CONST_BITS == 8
-F_1_082	equ	277		; FIX(1.082392200)
-F_1_414	equ	362		; FIX(1.414213562)
-F_1_847	equ	473		; FIX(1.847759065)
-F_2_613	equ	669		; FIX(2.613125930)
-F_1_613	equ	(F_2_613 - 256)	; FIX(2.613125930) - FIX(1)
+F_1_082 equ     277             ; FIX(1.082392200)
+F_1_414 equ     362             ; FIX(1.414213562)
+F_1_847 equ     473             ; FIX(1.847759065)
+F_2_613 equ     669             ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
-%define	DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_1_082	equ	DESCALE(1162209775,30-CONST_BITS)	; FIX(1.082392200)
-F_1_414	equ	DESCALE(1518500249,30-CONST_BITS)	; FIX(1.414213562)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_2_613	equ	DESCALE(2805822602,30-CONST_BITS)	; FIX(2.613125930)
-F_1_613	equ	(F_2_613 - (1 << CONST_BITS))	; FIX(2.613125930) - FIX(1)
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082 equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414 equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613 equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
@@ -59,22 +59,22 @@
 %define PRE_MULTIPLY_SCALE_BITS   2
 %define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-	alignz	16
-	global	EXTN(jconst_idct_ifast_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_ifast_sse2)
 
 EXTN(jconst_idct_ifast_sse2):
 
-PW_F1414	times 8 dw  F_1_414 << CONST_SHIFT
-PW_F1847	times 8 dw  F_1_847 << CONST_SHIFT
-PW_MF1613	times 8 dw -F_1_613 << CONST_SHIFT
-PW_F1082	times 8 dw  F_1_082 << CONST_SHIFT
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PW_F1414        times 8 dw  F_1_414 << CONST_SHIFT
+PW_F1847        times 8 dw  F_1_847 << CONST_SHIFT
+PW_MF1613       times 8 dw -F_1_613 << CONST_SHIFT
+PW_F1082        times 8 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -88,405 +88,405 @@
 ; r12 = JSAMPARRAY output_buf
 ; r13 = JDIMENSION output_col
 
-%define original_rbp	rbp+0
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_idct_ifast_sse2)
+        align   16
+        global  EXTN(jsimd_idct_ifast_sse2)
 
 EXTN(jsimd_idct_ifast_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-	mov	rdx, r10	; quantptr
-	mov	rsi, r11		; inptr
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
 
 %ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
-	mov	eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	jnz	near .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1,xmm0
-	packsswb xmm1,xmm1
-	packsswb xmm1,xmm1
-	movd	eax,xmm1
-	test	rax,rax
-	jnz	short .columnDCT
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    rax,rax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movdqa    xmm7,xmm0		; xmm0=in0=(00 01 02 03 04 05 06 07)
-	punpcklwd xmm0,xmm0		; xmm0=(00 00 01 01 02 02 03 03)
-	punpckhwd xmm7,xmm7		; xmm7=(04 04 05 05 06 06 07 07)
+        movdqa    xmm7,xmm0             ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm7,xmm7             ; xmm7=(04 04 05 05 06 06 07 07)
 
-	pshufd	xmm6,xmm0,0x00		; xmm6=col0=(00 00 00 00 00 00 00 00)
-	pshufd	xmm2,xmm0,0x55		; xmm2=col1=(01 01 01 01 01 01 01 01)
-	pshufd	xmm5,xmm0,0xAA		; xmm5=col2=(02 02 02 02 02 02 02 02)
-	pshufd	xmm0,xmm0,0xFF		; xmm0=col3=(03 03 03 03 03 03 03 03)
-	pshufd	xmm1,xmm7,0x00		; xmm1=col4=(04 04 04 04 04 04 04 04)
-	pshufd	xmm4,xmm7,0x55		; xmm4=col5=(05 05 05 05 05 05 05 05)
-	pshufd	xmm3,xmm7,0xAA		; xmm3=col6=(06 06 06 06 06 06 06 06)
-	pshufd	xmm7,xmm7,0xFF		; xmm7=col7=(07 07 07 07 07 07 07 07)
+        pshufd  xmm6,xmm0,0x00          ; xmm6=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm2,xmm0,0x55          ; xmm2=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm5,xmm0,0xAA          ; xmm5=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm0,xmm0,0xFF          ; xmm0=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm1,xmm7,0x00          ; xmm1=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm4,xmm7,0x55          ; xmm4=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm3,xmm7,0xAA          ; xmm3=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm7,xmm7,0xFF          ; xmm7=col7=(07 07 07 07 07 07 07 07)
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=col1
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=col3
-	jmp	near .column_end
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=col3
+        jmp     near .column_end
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
 
-	movdqa	xmm4,xmm0
-	movdqa	xmm5,xmm1
-	psubw	xmm0,xmm2		; xmm0=tmp11
-	psubw	xmm1,xmm3
-	paddw	xmm4,xmm2		; xmm4=tmp10
-	paddw	xmm5,xmm3		; xmm5=tmp13
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        psubw   xmm0,xmm2               ; xmm0=tmp11
+        psubw   xmm1,xmm3
+        paddw   xmm4,xmm2               ; xmm4=tmp10
+        paddw   xmm5,xmm3               ; xmm5=tmp13
 
-	psllw	xmm1,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm1,[rel PW_F1414]
-	psubw	xmm1,xmm5		; xmm1=tmp12
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm1,[rel PW_F1414]
+        psubw   xmm1,xmm5               ; xmm1=tmp12
 
-	movdqa	xmm6,xmm4
-	movdqa	xmm7,xmm0
-	psubw	xmm4,xmm5		; xmm4=tmp3
-	psubw	xmm0,xmm1		; xmm0=tmp2
-	paddw	xmm6,xmm5		; xmm6=tmp0
-	paddw	xmm7,xmm1		; xmm7=tmp1
+        movdqa  xmm6,xmm4
+        movdqa  xmm7,xmm0
+        psubw   xmm4,xmm5               ; xmm4=tmp3
+        psubw   xmm0,xmm1               ; xmm0=tmp2
+        paddw   xmm6,xmm5               ; xmm6=tmp0
+        paddw   xmm7,xmm1               ; xmm7=tmp1
 
-	movdqa	XMMWORD [wk(1)], xmm4	; wk(1)=tmp3
-	movdqa	XMMWORD [wk(0)], xmm0	; wk(0)=tmp2
+        movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=tmp3
+        movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
-	movdqa	xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
 
-	movdqa	xmm4,xmm2
-	movdqa	xmm0,xmm5
-	psubw	xmm2,xmm1		; xmm2=z12
-	psubw	xmm5,xmm3		; xmm5=z10
-	paddw	xmm4,xmm1		; xmm4=z11
-	paddw	xmm0,xmm3		; xmm0=z13
+        movdqa  xmm4,xmm2
+        movdqa  xmm0,xmm5
+        psubw   xmm2,xmm1               ; xmm2=z12
+        psubw   xmm5,xmm3               ; xmm5=z10
+        paddw   xmm4,xmm1               ; xmm4=z11
+        paddw   xmm0,xmm3               ; xmm0=z13
 
-	movdqa	xmm1,xmm5		; xmm1=z10(unscaled)
-	psllw	xmm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
+        movdqa  xmm1,xmm5               ; xmm1=z10(unscaled)
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
 
-	movdqa	xmm3,xmm4
-	psubw	xmm4,xmm0
-	paddw	xmm3,xmm0		; xmm3=tmp7
+        movdqa  xmm3,xmm4
+        psubw   xmm4,xmm0
+        paddw   xmm3,xmm0               ; xmm3=tmp7
 
-	psllw	xmm4,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm4,[rel PW_F1414]	; xmm4=tmp11
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm4,[rel PW_F1414]     ; xmm4=tmp11
 
-	; To avoid overflow...
-	;
-	; (Original)
-	; tmp12 = -2.613125930 * z10 + z5;
-	;
-	; (This implementation)
-	; tmp12 = (-1.613125930 - 1) * z10 + z5;
-	;       = -1.613125930 * z10 - z10 + z5;
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
 
-	movdqa	xmm0,xmm5
-	paddw	xmm5,xmm2
-	pmulhw	xmm5,[rel PW_F1847]	; xmm5=z5
-	pmulhw	xmm0,[rel PW_MF1613]
-	pmulhw	xmm2,[rel PW_F1082]
-	psubw	xmm0,xmm1
-	psubw	xmm2,xmm5		; xmm2=tmp10
-	paddw	xmm0,xmm5		; xmm0=tmp12
+        movdqa  xmm0,xmm5
+        paddw   xmm5,xmm2
+        pmulhw  xmm5,[rel PW_F1847]     ; xmm5=z5
+        pmulhw  xmm0,[rel PW_MF1613]
+        pmulhw  xmm2,[rel PW_F1082]
+        psubw   xmm0,xmm1
+        psubw   xmm2,xmm5               ; xmm2=tmp10
+        paddw   xmm0,xmm5               ; xmm0=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	psubw	xmm0,xmm3		; xmm0=tmp6
-	movdqa	xmm1,xmm6
-	movdqa	xmm5,xmm7
-	paddw	xmm6,xmm3		; xmm6=data0=(00 01 02 03 04 05 06 07)
-	paddw	xmm7,xmm0		; xmm7=data1=(10 11 12 13 14 15 16 17)
-	psubw	xmm1,xmm3		; xmm1=data7=(70 71 72 73 74 75 76 77)
-	psubw	xmm5,xmm0		; xmm5=data6=(60 61 62 63 64 65 66 67)
-	psubw	xmm4,xmm0		; xmm4=tmp5
+        psubw   xmm0,xmm3               ; xmm0=tmp6
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm7
+        paddw   xmm6,xmm3               ; xmm6=data0=(00 01 02 03 04 05 06 07)
+        paddw   xmm7,xmm0               ; xmm7=data1=(10 11 12 13 14 15 16 17)
+        psubw   xmm1,xmm3               ; xmm1=data7=(70 71 72 73 74 75 76 77)
+        psubw   xmm5,xmm0               ; xmm5=data6=(60 61 62 63 64 65 66 67)
+        psubw   xmm4,xmm0               ; xmm4=tmp5
 
-	movdqa    xmm3,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm7		; xmm6=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm3,xmm7		; xmm3=(04 14 05 15 06 16 07 17)
-	movdqa    xmm0,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm1		; xmm5=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm0,xmm1		; xmm0=(64 74 65 75 66 76 67 77)
+        movdqa    xmm3,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm3,xmm7             ; xmm3=(04 14 05 15 06 16 07 17)
+        movdqa    xmm0,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm1             ; xmm5=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm0,xmm1             ; xmm0=(64 74 65 75 66 76 67 77)
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=tmp2
-	movdqa	xmm1, XMMWORD [wk(1)]	; xmm1=tmp3
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
 
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=(60 70 61 71 62 72 63 73)
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=(64 74 65 75 66 76 67 77)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(64 74 65 75 66 76 67 77)
 
-	paddw	xmm2,xmm4		; xmm2=tmp4
-	movdqa	xmm5,xmm7
-	movdqa	xmm0,xmm1
-	paddw	xmm7,xmm4		; xmm7=data2=(20 21 22 23 24 25 26 27)
-	paddw	xmm1,xmm2		; xmm1=data4=(40 41 42 43 44 45 46 47)
-	psubw	xmm5,xmm4		; xmm5=data5=(50 51 52 53 54 55 56 57)
-	psubw	xmm0,xmm2		; xmm0=data3=(30 31 32 33 34 35 36 37)
+        paddw   xmm2,xmm4               ; xmm2=tmp4
+        movdqa  xmm5,xmm7
+        movdqa  xmm0,xmm1
+        paddw   xmm7,xmm4               ; xmm7=data2=(20 21 22 23 24 25 26 27)
+        paddw   xmm1,xmm2               ; xmm1=data4=(40 41 42 43 44 45 46 47)
+        psubw   xmm5,xmm4               ; xmm5=data5=(50 51 52 53 54 55 56 57)
+        psubw   xmm0,xmm2               ; xmm0=data3=(30 31 32 33 34 35 36 37)
 
-	movdqa    xmm4,xmm7		; transpose coefficients(phase 1)
-	punpcklwd xmm7,xmm0		; xmm7=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm4,xmm0		; xmm4=(24 34 25 35 26 36 27 37)
-	movdqa    xmm2,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm5		; xmm1=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm2,xmm5		; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm4,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm0             ; xmm7=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm0             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm2,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm5             ; xmm1=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm5             ; xmm2=(44 54 45 55 46 56 47 57)
 
-	movdqa    xmm0,xmm3		; transpose coefficients(phase 2)
-	punpckldq xmm3,xmm4		; xmm3=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm0,xmm4		; xmm0=(06 16 26 36 07 17 27 37)
-	movdqa    xmm5,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm7		; xmm6=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm5,xmm7		; xmm5=(02 12 22 32 03 13 23 33)
+        movdqa    xmm0,xmm3             ; transpose coefficients(phase 2)
+        punpckldq xmm3,xmm4             ; xmm3=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm0,xmm4             ; xmm0=(06 16 26 36 07 17 27 37)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7             ; xmm6=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm5,xmm7             ; xmm5=(02 12 22 32 03 13 23 33)
 
-	movdqa	xmm4, XMMWORD [wk(0)]	; xmm4=(60 70 61 71 62 72 63 73)
-	movdqa	xmm7, XMMWORD [wk(1)]	; xmm7=(64 74 65 75 66 76 67 77)
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(60 70 61 71 62 72 63 73)
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=(64 74 65 75 66 76 67 77)
 
-	movdqa	XMMWORD [wk(0)], xmm3	; wk(0)=(04 14 24 34 05 15 25 35)
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=(06 16 26 36 07 17 27 37)
+        movdqa  XMMWORD [wk(0)], xmm3   ; wk(0)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(06 16 26 36 07 17 27 37)
 
-	movdqa    xmm3,xmm1		; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm4		; xmm1=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm3,xmm4		; xmm3=(42 52 62 72 43 53 63 73)
-	movdqa    xmm0,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm7		; xmm2=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm0,xmm7		; xmm0=(46 56 66 76 47 57 67 77)
+        movdqa    xmm3,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm4             ; xmm1=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm3,xmm4             ; xmm3=(42 52 62 72 43 53 63 73)
+        movdqa    xmm0,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm7             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm0,xmm7             ; xmm0=(46 56 66 76 47 57 67 77)
 
-	movdqa     xmm4,xmm6		; transpose coefficients(phase 3)
-	punpcklqdq xmm6,xmm1		; xmm6=col0=(00 10 20 30 40 50 60 70)
-	punpckhqdq xmm4,xmm1		; xmm4=col1=(01 11 21 31 41 51 61 71)
-	movdqa     xmm7,xmm5		; transpose coefficients(phase 3)
-	punpcklqdq xmm5,xmm3		; xmm5=col2=(02 12 22 32 42 52 62 72)
-	punpckhqdq xmm7,xmm3		; xmm7=col3=(03 13 23 33 43 53 63 73)
+        movdqa     xmm4,xmm6            ; transpose coefficients(phase 3)
+        punpcklqdq xmm6,xmm1            ; xmm6=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm4,xmm1            ; xmm4=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm7,xmm5            ; transpose coefficients(phase 3)
+        punpcklqdq xmm5,xmm3            ; xmm5=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm7,xmm3            ; xmm7=col3=(03 13 23 33 43 53 63 73)
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=(04 14 24 34 05 15 25 35)
-	movdqa	xmm3, XMMWORD [wk(1)]	; xmm3=(06 16 26 36 07 17 27 37)
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(04 14 24 34 05 15 25 35)
+        movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(06 16 26 36 07 17 27 37)
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=col1
-	movdqa	XMMWORD [wk(1)], xmm7	; wk(1)=col3
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=col3
 
-	movdqa     xmm4,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm2		; xmm1=col4=(04 14 24 34 44 54 64 74)
-	punpckhqdq xmm4,xmm2		; xmm4=col5=(05 15 25 35 45 55 65 75)
-	movdqa     xmm7,xmm3		; transpose coefficients(phase 3)
-	punpcklqdq xmm3,xmm0		; xmm3=col6=(06 16 26 36 46 56 66 76)
-	punpckhqdq xmm7,xmm0		; xmm7=col7=(07 17 27 37 47 57 67 77)
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm4,xmm2            ; xmm4=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm7,xmm3            ; transpose coefficients(phase 3)
+        punpcklqdq xmm3,xmm0            ; xmm3=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm7,xmm0            ; xmm7=col7=(07 17 27 37 47 57 67 77)
 .column_end:
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	rax, [original_rbp]
-	mov	rdi, r12	; (JSAMPROW *)
-	mov	rax, r13
+        mov     rax, [original_rbp]
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
 
-	; -- Even part
+        ; -- Even part
 
-	; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
+        ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
 
-	movdqa	xmm2,xmm6
-	movdqa	xmm0,xmm5
-	psubw	xmm6,xmm1		; xmm6=tmp11
-	psubw	xmm5,xmm3
-	paddw	xmm2,xmm1		; xmm2=tmp10
-	paddw	xmm0,xmm3		; xmm0=tmp13
+        movdqa  xmm2,xmm6
+        movdqa  xmm0,xmm5
+        psubw   xmm6,xmm1               ; xmm6=tmp11
+        psubw   xmm5,xmm3
+        paddw   xmm2,xmm1               ; xmm2=tmp10
+        paddw   xmm0,xmm3               ; xmm0=tmp13
 
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm5,[rel PW_F1414]
-	psubw	xmm5,xmm0		; xmm5=tmp12
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[rel PW_F1414]
+        psubw   xmm5,xmm0               ; xmm5=tmp12
 
-	movdqa	xmm1,xmm2
-	movdqa	xmm3,xmm6
-	psubw	xmm2,xmm0		; xmm2=tmp3
-	psubw	xmm6,xmm5		; xmm6=tmp2
-	paddw	xmm1,xmm0		; xmm1=tmp0
-	paddw	xmm3,xmm5		; xmm3=tmp1
+        movdqa  xmm1,xmm2
+        movdqa  xmm3,xmm6
+        psubw   xmm2,xmm0               ; xmm2=tmp3
+        psubw   xmm6,xmm5               ; xmm6=tmp2
+        paddw   xmm1,xmm0               ; xmm1=tmp0
+        paddw   xmm3,xmm5               ; xmm3=tmp1
 
-	movdqa	xmm0, XMMWORD [wk(0)]	; xmm0=col1
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=col3
+        movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=col1
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=col3
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=tmp3
-	movdqa	XMMWORD [wk(1)], xmm6	; wk(1)=tmp2
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp3
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
+        ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
 
-	movdqa	xmm2,xmm0
-	movdqa	xmm6,xmm4
-	psubw	xmm0,xmm7		; xmm0=z12
-	psubw	xmm4,xmm5		; xmm4=z10
-	paddw	xmm2,xmm7		; xmm2=z11
-	paddw	xmm6,xmm5		; xmm6=z13
+        movdqa  xmm2,xmm0
+        movdqa  xmm6,xmm4
+        psubw   xmm0,xmm7               ; xmm0=z12
+        psubw   xmm4,xmm5               ; xmm4=z10
+        paddw   xmm2,xmm7               ; xmm2=z11
+        paddw   xmm6,xmm5               ; xmm6=z13
 
-	movdqa	xmm7,xmm4		; xmm7=z10(unscaled)
-	psllw	xmm0,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm4,PRE_MULTIPLY_SCALE_BITS
+        movdqa  xmm7,xmm4               ; xmm7=z10(unscaled)
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
 
-	movdqa	xmm5,xmm2
-	psubw	xmm2,xmm6
-	paddw	xmm5,xmm6		; xmm5=tmp7
+        movdqa  xmm5,xmm2
+        psubw   xmm2,xmm6
+        paddw   xmm5,xmm6               ; xmm5=tmp7
 
-	psllw	xmm2,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm2,[rel PW_F1414]	; xmm2=tmp11
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm2,[rel PW_F1414]     ; xmm2=tmp11
 
-	; To avoid overflow...
-	;
-	; (Original)
-	; tmp12 = -2.613125930 * z10 + z5;
-	;
-	; (This implementation)
-	; tmp12 = (-1.613125930 - 1) * z10 + z5;
-	;       = -1.613125930 * z10 - z10 + z5;
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
 
-	movdqa	xmm6,xmm4
-	paddw	xmm4,xmm0
-	pmulhw	xmm4,[rel PW_F1847]	; xmm4=z5
-	pmulhw	xmm6,[rel PW_MF1613]
-	pmulhw	xmm0,[rel PW_F1082]
-	psubw	xmm6,xmm7
-	psubw	xmm0,xmm4		; xmm0=tmp10
-	paddw	xmm6,xmm4		; xmm6=tmp12
+        movdqa  xmm6,xmm4
+        paddw   xmm4,xmm0
+        pmulhw  xmm4,[rel PW_F1847]     ; xmm4=z5
+        pmulhw  xmm6,[rel PW_MF1613]
+        pmulhw  xmm0,[rel PW_F1082]
+        psubw   xmm6,xmm7
+        psubw   xmm0,xmm4               ; xmm0=tmp10
+        paddw   xmm6,xmm4               ; xmm6=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	psubw	xmm6,xmm5		; xmm6=tmp6
-	movdqa	xmm7,xmm1
-	movdqa	xmm4,xmm3
-	paddw	xmm1,xmm5		; xmm1=data0=(00 10 20 30 40 50 60 70)
-	paddw	xmm3,xmm6		; xmm3=data1=(01 11 21 31 41 51 61 71)
-	psraw	xmm1,(PASS1_BITS+3)	; descale
-	psraw	xmm3,(PASS1_BITS+3)	; descale
-	psubw	xmm7,xmm5		; xmm7=data7=(07 17 27 37 47 57 67 77)
-	psubw	xmm4,xmm6		; xmm4=data6=(06 16 26 36 46 56 66 76)
-	psraw	xmm7,(PASS1_BITS+3)	; descale
-	psraw	xmm4,(PASS1_BITS+3)	; descale
-	psubw	xmm2,xmm6		; xmm2=tmp5
+        psubw   xmm6,xmm5               ; xmm6=tmp6
+        movdqa  xmm7,xmm1
+        movdqa  xmm4,xmm3
+        paddw   xmm1,xmm5               ; xmm1=data0=(00 10 20 30 40 50 60 70)
+        paddw   xmm3,xmm6               ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        psraw   xmm1,(PASS1_BITS+3)     ; descale
+        psraw   xmm3,(PASS1_BITS+3)     ; descale
+        psubw   xmm7,xmm5               ; xmm7=data7=(07 17 27 37 47 57 67 77)
+        psubw   xmm4,xmm6               ; xmm4=data6=(06 16 26 36 46 56 66 76)
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psubw   xmm2,xmm6               ; xmm2=tmp5
 
-	packsswb  xmm1,xmm4	; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	packsswb  xmm3,xmm7	; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        packsswb  xmm1,xmm4     ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm7     ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=tmp2
-	movdqa	xmm6, XMMWORD [wk(0)]	; xmm6=tmp3
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp2
+        movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp3
 
-	paddw	xmm0,xmm2		; xmm0=tmp4
-	movdqa	xmm4,xmm5
-	movdqa	xmm7,xmm6
-	paddw	xmm5,xmm2		; xmm5=data2=(02 12 22 32 42 52 62 72)
-	paddw	xmm6,xmm0		; xmm6=data4=(04 14 24 34 44 54 64 74)
-	psraw	xmm5,(PASS1_BITS+3)	; descale
-	psraw	xmm6,(PASS1_BITS+3)	; descale
-	psubw	xmm4,xmm2		; xmm4=data5=(05 15 25 35 45 55 65 75)
-	psubw	xmm7,xmm0		; xmm7=data3=(03 13 23 33 43 53 63 73)
-	psraw	xmm4,(PASS1_BITS+3)	; descale
-	psraw	xmm7,(PASS1_BITS+3)	; descale
+        paddw   xmm0,xmm2               ; xmm0=tmp4
+        movdqa  xmm4,xmm5
+        movdqa  xmm7,xmm6
+        paddw   xmm5,xmm2               ; xmm5=data2=(02 12 22 32 42 52 62 72)
+        paddw   xmm6,xmm0               ; xmm6=data4=(04 14 24 34 44 54 64 74)
+        psraw   xmm5,(PASS1_BITS+3)     ; descale
+        psraw   xmm6,(PASS1_BITS+3)     ; descale
+        psubw   xmm4,xmm2               ; xmm4=data5=(05 15 25 35 45 55 65 75)
+        psubw   xmm7,xmm0               ; xmm7=data3=(03 13 23 33 43 53 63 73)
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
 
-	movdqa    xmm2,[rel PB_CENTERJSAMP]	; xmm2=[rel PB_CENTERJSAMP]
+        movdqa    xmm2,[rel PB_CENTERJSAMP]     ; xmm2=[rel PB_CENTERJSAMP]
 
-	packsswb  xmm5,xmm6	; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
-	packsswb  xmm7,xmm4	; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+        packsswb  xmm5,xmm6     ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm7,xmm4     ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
 
-	paddb     xmm1,xmm2
-	paddb     xmm3,xmm2
-	paddb     xmm5,xmm2
-	paddb     xmm7,xmm2
+        paddb     xmm1,xmm2
+        paddb     xmm3,xmm2
+        paddb     xmm5,xmm2
+        paddb     xmm7,xmm2
 
-	movdqa    xmm0,xmm1	; transpose coefficients(phase 1)
-	punpcklbw xmm1,xmm3	; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
-	punpckhbw xmm0,xmm3	; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
-	movdqa    xmm6,xmm5	; transpose coefficients(phase 1)
-	punpcklbw xmm5,xmm7	; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
-	punpckhbw xmm6,xmm7	; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 1)
+        punpcklbw xmm1,xmm3     ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm3     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm6,xmm5     ; transpose coefficients(phase 1)
+        punpcklbw xmm5,xmm7     ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm6,xmm7     ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
 
-	movdqa    xmm4,xmm1	; transpose coefficients(phase 2)
-	punpcklwd xmm1,xmm5	; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
-	punpckhwd xmm4,xmm5	; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
-	movdqa    xmm2,xmm6	; transpose coefficients(phase 2)
-	punpcklwd xmm6,xmm0	; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
-	punpckhwd xmm2,xmm0	; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+        movdqa    xmm4,xmm1     ; transpose coefficients(phase 2)
+        punpcklwd xmm1,xmm5     ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm5     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm0     ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm2,xmm0     ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
 
-	movdqa    xmm3,xmm1	; transpose coefficients(phase 3)
-	punpckldq xmm1,xmm6	; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
-	punpckhdq xmm3,xmm6	; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
-	movdqa    xmm7,xmm4	; transpose coefficients(phase 3)
-	punpckldq xmm4,xmm2	; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
-	punpckhdq xmm7,xmm2	; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+        movdqa    xmm3,xmm1     ; transpose coefficients(phase 3)
+        punpckldq xmm1,xmm6     ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm3,xmm6     ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm7,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm2     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm7,xmm2     ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
 
-	pshufd	xmm5,xmm1,0x4E	; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
-	pshufd	xmm0,xmm3,0x4E	; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
-	pshufd	xmm6,xmm4,0x4E	; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
-	pshufd	xmm2,xmm7,0x4E	; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+        pshufd  xmm5,xmm1,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm3,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm6,xmm4,0x4E  ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm2,xmm7,0x4E  ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
 
-	mov	rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
-	mov	rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
+        mov     rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
 
-	mov	rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
-	mov	rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
+        mov     rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
+        mov     rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
 
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
-	ret
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2fst.asm b/simd/jiss2fst.asm
index b53664d..0312be2 100644
--- a/simd/jiss2fst.asm
+++ b/simd/jiss2fst.asm
@@ -26,31 +26,31 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	8	; 14 is also OK.
-%define PASS1_BITS	2
+%define CONST_BITS      8       ; 14 is also OK.
+%define PASS1_BITS      2
 
 %if IFAST_SCALE_BITS != PASS1_BITS
 %error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
 %endif
 
 %if CONST_BITS == 8
-F_1_082	equ	277		; FIX(1.082392200)
-F_1_414	equ	362		; FIX(1.414213562)
-F_1_847	equ	473		; FIX(1.847759065)
-F_2_613	equ	669		; FIX(2.613125930)
-F_1_613	equ	(F_2_613 - 256)	; FIX(2.613125930) - FIX(1)
+F_1_082 equ     277             ; FIX(1.082392200)
+F_1_414 equ     362             ; FIX(1.414213562)
+F_1_847 equ     473             ; FIX(1.847759065)
+F_2_613 equ     669             ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
-%define	DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_1_082	equ	DESCALE(1162209775,30-CONST_BITS)	; FIX(1.082392200)
-F_1_414	equ	DESCALE(1518500249,30-CONST_BITS)	; FIX(1.414213562)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_2_613	equ	DESCALE(2805822602,30-CONST_BITS)	; FIX(2.613125930)
-F_1_613	equ	(F_2_613 - (1 << CONST_BITS))	; FIX(2.613125930) - FIX(1)
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082 equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414 equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613 equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
@@ -58,22 +58,22 @@
 %define PRE_MULTIPLY_SCALE_BITS   2
 %define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-	alignz	16
-	global	EXTN(jconst_idct_ifast_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_ifast_sse2)
 
 EXTN(jconst_idct_ifast_sse2):
 
-PW_F1414	times 8 dw  F_1_414 << CONST_SHIFT
-PW_F1847	times 8 dw  F_1_847 << CONST_SHIFT
-PW_MF1613	times 8 dw -F_1_613 << CONST_SHIFT
-PW_F1082	times 8 dw  F_1_082 << CONST_SHIFT
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PW_F1414        times 8 dw  F_1_414 << CONST_SHIFT
+PW_F1847        times 8 dw  F_1_847 << CONST_SHIFT
+PW_MF1613       times 8 dw -F_1_613 << CONST_SHIFT
+PW_F1082        times 8 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -82,421 +82,421 @@
 ;                       JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; jpeg_component_info * compptr
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_idct_ifast_sse2)
+        align   16
+        global  EXTN(jsimd_idct_ifast_sse2)
 
 EXTN(jsimd_idct_ifast_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
 
 %ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	near .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	xmm1,xmm0
-	packsswb xmm1,xmm1
-	packsswb xmm1,xmm1
-	movd	eax,xmm1
-	test	eax,eax
-	jnz	short .columnDCT
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movdqa    xmm7,xmm0		; xmm0=in0=(00 01 02 03 04 05 06 07)
-	punpcklwd xmm0,xmm0		; xmm0=(00 00 01 01 02 02 03 03)
-	punpckhwd xmm7,xmm7		; xmm7=(04 04 05 05 06 06 07 07)
+        movdqa    xmm7,xmm0             ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm7,xmm7             ; xmm7=(04 04 05 05 06 06 07 07)
 
-	pshufd	xmm6,xmm0,0x00		; xmm6=col0=(00 00 00 00 00 00 00 00)
-	pshufd	xmm2,xmm0,0x55		; xmm2=col1=(01 01 01 01 01 01 01 01)
-	pshufd	xmm5,xmm0,0xAA		; xmm5=col2=(02 02 02 02 02 02 02 02)
-	pshufd	xmm0,xmm0,0xFF		; xmm0=col3=(03 03 03 03 03 03 03 03)
-	pshufd	xmm1,xmm7,0x00		; xmm1=col4=(04 04 04 04 04 04 04 04)
-	pshufd	xmm4,xmm7,0x55		; xmm4=col5=(05 05 05 05 05 05 05 05)
-	pshufd	xmm3,xmm7,0xAA		; xmm3=col6=(06 06 06 06 06 06 06 06)
-	pshufd	xmm7,xmm7,0xFF		; xmm7=col7=(07 07 07 07 07 07 07 07)
+        pshufd  xmm6,xmm0,0x00          ; xmm6=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm2,xmm0,0x55          ; xmm2=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm5,xmm0,0xAA          ; xmm5=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm0,xmm0,0xFF          ; xmm0=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm1,xmm7,0x00          ; xmm1=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm4,xmm7,0x55          ; xmm4=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm3,xmm7,0xAA          ; xmm3=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm7,xmm7,0xFF          ; xmm7=col7=(07 07 07 07 07 07 07 07)
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=col1
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=col3
-	jmp	near .column_end
-	alignx	16,7
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=col3
+        jmp     near .column_end
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
 
-	movdqa	xmm4,xmm0
-	movdqa	xmm5,xmm1
-	psubw	xmm0,xmm2		; xmm0=tmp11
-	psubw	xmm1,xmm3
-	paddw	xmm4,xmm2		; xmm4=tmp10
-	paddw	xmm5,xmm3		; xmm5=tmp13
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        psubw   xmm0,xmm2               ; xmm0=tmp11
+        psubw   xmm1,xmm3
+        paddw   xmm4,xmm2               ; xmm4=tmp10
+        paddw   xmm5,xmm3               ; xmm5=tmp13
 
-	psllw	xmm1,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm1,[GOTOFF(ebx,PW_F1414)]
-	psubw	xmm1,xmm5		; xmm1=tmp12
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm1,[GOTOFF(ebx,PW_F1414)]
+        psubw   xmm1,xmm5               ; xmm1=tmp12
 
-	movdqa	xmm6,xmm4
-	movdqa	xmm7,xmm0
-	psubw	xmm4,xmm5		; xmm4=tmp3
-	psubw	xmm0,xmm1		; xmm0=tmp2
-	paddw	xmm6,xmm5		; xmm6=tmp0
-	paddw	xmm7,xmm1		; xmm7=tmp1
+        movdqa  xmm6,xmm4
+        movdqa  xmm7,xmm0
+        psubw   xmm4,xmm5               ; xmm4=tmp3
+        psubw   xmm0,xmm1               ; xmm0=tmp2
+        paddw   xmm6,xmm5               ; xmm6=tmp0
+        paddw   xmm7,xmm1               ; xmm7=tmp1
 
-	movdqa	XMMWORD [wk(1)], xmm4	; wk(1)=tmp3
-	movdqa	XMMWORD [wk(0)], xmm0	; wk(0)=tmp2
+        movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=tmp3
+        movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	movdqa	xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
 
-	movdqa	xmm4,xmm2
-	movdqa	xmm0,xmm5
-	psubw	xmm2,xmm1		; xmm2=z12
-	psubw	xmm5,xmm3		; xmm5=z10
-	paddw	xmm4,xmm1		; xmm4=z11
-	paddw	xmm0,xmm3		; xmm0=z13
+        movdqa  xmm4,xmm2
+        movdqa  xmm0,xmm5
+        psubw   xmm2,xmm1               ; xmm2=z12
+        psubw   xmm5,xmm3               ; xmm5=z10
+        paddw   xmm4,xmm1               ; xmm4=z11
+        paddw   xmm0,xmm3               ; xmm0=z13
 
-	movdqa	xmm1,xmm5		; xmm1=z10(unscaled)
-	psllw	xmm2,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
+        movdqa  xmm1,xmm5               ; xmm1=z10(unscaled)
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
 
-	movdqa	xmm3,xmm4
-	psubw	xmm4,xmm0
-	paddw	xmm3,xmm0		; xmm3=tmp7
+        movdqa  xmm3,xmm4
+        psubw   xmm4,xmm0
+        paddw   xmm3,xmm0               ; xmm3=tmp7
 
-	psllw	xmm4,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm4,[GOTOFF(ebx,PW_F1414)]	; xmm4=tmp11
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F1414)]     ; xmm4=tmp11
 
-	; To avoid overflow...
-	;
-	; (Original)
-	; tmp12 = -2.613125930 * z10 + z5;
-	;
-	; (This implementation)
-	; tmp12 = (-1.613125930 - 1) * z10 + z5;
-	;       = -1.613125930 * z10 - z10 + z5;
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
 
-	movdqa	xmm0,xmm5
-	paddw	xmm5,xmm2
-	pmulhw	xmm5,[GOTOFF(ebx,PW_F1847)]	; xmm5=z5
-	pmulhw	xmm0,[GOTOFF(ebx,PW_MF1613)]
-	pmulhw	xmm2,[GOTOFF(ebx,PW_F1082)]
-	psubw	xmm0,xmm1
-	psubw	xmm2,xmm5		; xmm2=tmp10
-	paddw	xmm0,xmm5		; xmm0=tmp12
+        movdqa  xmm0,xmm5
+        paddw   xmm5,xmm2
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F1847)]     ; xmm5=z5
+        pmulhw  xmm0,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  xmm2,[GOTOFF(ebx,PW_F1082)]
+        psubw   xmm0,xmm1
+        psubw   xmm2,xmm5               ; xmm2=tmp10
+        paddw   xmm0,xmm5               ; xmm0=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	psubw	xmm0,xmm3		; xmm0=tmp6
-	movdqa	xmm1,xmm6
-	movdqa	xmm5,xmm7
-	paddw	xmm6,xmm3		; xmm6=data0=(00 01 02 03 04 05 06 07)
-	paddw	xmm7,xmm0		; xmm7=data1=(10 11 12 13 14 15 16 17)
-	psubw	xmm1,xmm3		; xmm1=data7=(70 71 72 73 74 75 76 77)
-	psubw	xmm5,xmm0		; xmm5=data6=(60 61 62 63 64 65 66 67)
-	psubw	xmm4,xmm0		; xmm4=tmp5
+        psubw   xmm0,xmm3               ; xmm0=tmp6
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm7
+        paddw   xmm6,xmm3               ; xmm6=data0=(00 01 02 03 04 05 06 07)
+        paddw   xmm7,xmm0               ; xmm7=data1=(10 11 12 13 14 15 16 17)
+        psubw   xmm1,xmm3               ; xmm1=data7=(70 71 72 73 74 75 76 77)
+        psubw   xmm5,xmm0               ; xmm5=data6=(60 61 62 63 64 65 66 67)
+        psubw   xmm4,xmm0               ; xmm4=tmp5
 
-	movdqa    xmm3,xmm6		; transpose coefficients(phase 1)
-	punpcklwd xmm6,xmm7		; xmm6=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm3,xmm7		; xmm3=(04 14 05 15 06 16 07 17)
-	movdqa    xmm0,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm1		; xmm5=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm0,xmm1		; xmm0=(64 74 65 75 66 76 67 77)
+        movdqa    xmm3,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm3,xmm7             ; xmm3=(04 14 05 15 06 16 07 17)
+        movdqa    xmm0,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm1             ; xmm5=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm0,xmm1             ; xmm0=(64 74 65 75 66 76 67 77)
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=tmp2
-	movdqa	xmm1, XMMWORD [wk(1)]	; xmm1=tmp3
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
 
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=(60 70 61 71 62 72 63 73)
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=(64 74 65 75 66 76 67 77)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(64 74 65 75 66 76 67 77)
 
-	paddw	xmm2,xmm4		; xmm2=tmp4
-	movdqa	xmm5,xmm7
-	movdqa	xmm0,xmm1
-	paddw	xmm7,xmm4		; xmm7=data2=(20 21 22 23 24 25 26 27)
-	paddw	xmm1,xmm2		; xmm1=data4=(40 41 42 43 44 45 46 47)
-	psubw	xmm5,xmm4		; xmm5=data5=(50 51 52 53 54 55 56 57)
-	psubw	xmm0,xmm2		; xmm0=data3=(30 31 32 33 34 35 36 37)
+        paddw   xmm2,xmm4               ; xmm2=tmp4
+        movdqa  xmm5,xmm7
+        movdqa  xmm0,xmm1
+        paddw   xmm7,xmm4               ; xmm7=data2=(20 21 22 23 24 25 26 27)
+        paddw   xmm1,xmm2               ; xmm1=data4=(40 41 42 43 44 45 46 47)
+        psubw   xmm5,xmm4               ; xmm5=data5=(50 51 52 53 54 55 56 57)
+        psubw   xmm0,xmm2               ; xmm0=data3=(30 31 32 33 34 35 36 37)
 
-	movdqa    xmm4,xmm7		; transpose coefficients(phase 1)
-	punpcklwd xmm7,xmm0		; xmm7=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm4,xmm0		; xmm4=(24 34 25 35 26 36 27 37)
-	movdqa    xmm2,xmm1		; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm5		; xmm1=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm2,xmm5		; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm4,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm0             ; xmm7=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm0             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm2,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm5             ; xmm1=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm5             ; xmm2=(44 54 45 55 46 56 47 57)
 
-	movdqa    xmm0,xmm3		; transpose coefficients(phase 2)
-	punpckldq xmm3,xmm4		; xmm3=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm0,xmm4		; xmm0=(06 16 26 36 07 17 27 37)
-	movdqa    xmm5,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm7		; xmm6=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm5,xmm7		; xmm5=(02 12 22 32 03 13 23 33)
+        movdqa    xmm0,xmm3             ; transpose coefficients(phase 2)
+        punpckldq xmm3,xmm4             ; xmm3=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm0,xmm4             ; xmm0=(06 16 26 36 07 17 27 37)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7             ; xmm6=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm5,xmm7             ; xmm5=(02 12 22 32 03 13 23 33)
 
-	movdqa	xmm4, XMMWORD [wk(0)]	; xmm4=(60 70 61 71 62 72 63 73)
-	movdqa	xmm7, XMMWORD [wk(1)]	; xmm7=(64 74 65 75 66 76 67 77)
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(60 70 61 71 62 72 63 73)
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=(64 74 65 75 66 76 67 77)
 
-	movdqa	XMMWORD [wk(0)], xmm3	; wk(0)=(04 14 24 34 05 15 25 35)
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=(06 16 26 36 07 17 27 37)
+        movdqa  XMMWORD [wk(0)], xmm3   ; wk(0)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(06 16 26 36 07 17 27 37)
 
-	movdqa    xmm3,xmm1		; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm4		; xmm1=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm3,xmm4		; xmm3=(42 52 62 72 43 53 63 73)
-	movdqa    xmm0,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm7		; xmm2=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm0,xmm7		; xmm0=(46 56 66 76 47 57 67 77)
+        movdqa    xmm3,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm4             ; xmm1=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm3,xmm4             ; xmm3=(42 52 62 72 43 53 63 73)
+        movdqa    xmm0,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm7             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm0,xmm7             ; xmm0=(46 56 66 76 47 57 67 77)
 
-	movdqa     xmm4,xmm6		; transpose coefficients(phase 3)
-	punpcklqdq xmm6,xmm1		; xmm6=col0=(00 10 20 30 40 50 60 70)
-	punpckhqdq xmm4,xmm1		; xmm4=col1=(01 11 21 31 41 51 61 71)
-	movdqa     xmm7,xmm5		; transpose coefficients(phase 3)
-	punpcklqdq xmm5,xmm3		; xmm5=col2=(02 12 22 32 42 52 62 72)
-	punpckhqdq xmm7,xmm3		; xmm7=col3=(03 13 23 33 43 53 63 73)
+        movdqa     xmm4,xmm6            ; transpose coefficients(phase 3)
+        punpcklqdq xmm6,xmm1            ; xmm6=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm4,xmm1            ; xmm4=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm7,xmm5            ; transpose coefficients(phase 3)
+        punpcklqdq xmm5,xmm3            ; xmm5=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm7,xmm3            ; xmm7=col3=(03 13 23 33 43 53 63 73)
 
-	movdqa	xmm1, XMMWORD [wk(0)]	; xmm1=(04 14 24 34 05 15 25 35)
-	movdqa	xmm3, XMMWORD [wk(1)]	; xmm3=(06 16 26 36 07 17 27 37)
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(04 14 24 34 05 15 25 35)
+        movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(06 16 26 36 07 17 27 37)
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=col1
-	movdqa	XMMWORD [wk(1)], xmm7	; wk(1)=col3
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=col3
 
-	movdqa     xmm4,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm2		; xmm1=col4=(04 14 24 34 44 54 64 74)
-	punpckhqdq xmm4,xmm2		; xmm4=col5=(05 15 25 35 45 55 65 75)
-	movdqa     xmm7,xmm3		; transpose coefficients(phase 3)
-	punpcklqdq xmm3,xmm0		; xmm3=col6=(06 16 26 36 46 56 66 76)
-	punpckhqdq xmm7,xmm0		; xmm7=col7=(07 17 27 37 47 57 67 77)
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm4,xmm2            ; xmm4=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm7,xmm3            ; transpose coefficients(phase 3)
+        punpcklqdq xmm3,xmm0            ; xmm3=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm7,xmm0            ; xmm7=col7=(07 17 27 37 47 57 67 77)
 .column_end:
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
+        mov     eax, [original_ebp]
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
 
-	; -- Even part
+        ; -- Even part
 
-	; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
+        ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
 
-	movdqa	xmm2,xmm6
-	movdqa	xmm0,xmm5
-	psubw	xmm6,xmm1		; xmm6=tmp11
-	psubw	xmm5,xmm3
-	paddw	xmm2,xmm1		; xmm2=tmp10
-	paddw	xmm0,xmm3		; xmm0=tmp13
+        movdqa  xmm2,xmm6
+        movdqa  xmm0,xmm5
+        psubw   xmm6,xmm1               ; xmm6=tmp11
+        psubw   xmm5,xmm3
+        paddw   xmm2,xmm1               ; xmm2=tmp10
+        paddw   xmm0,xmm3               ; xmm0=tmp13
 
-	psllw	xmm5,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm5,[GOTOFF(ebx,PW_F1414)]
-	psubw	xmm5,xmm0		; xmm5=tmp12
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F1414)]
+        psubw   xmm5,xmm0               ; xmm5=tmp12
 
-	movdqa	xmm1,xmm2
-	movdqa	xmm3,xmm6
-	psubw	xmm2,xmm0		; xmm2=tmp3
-	psubw	xmm6,xmm5		; xmm6=tmp2
-	paddw	xmm1,xmm0		; xmm1=tmp0
-	paddw	xmm3,xmm5		; xmm3=tmp1
+        movdqa  xmm1,xmm2
+        movdqa  xmm3,xmm6
+        psubw   xmm2,xmm0               ; xmm2=tmp3
+        psubw   xmm6,xmm5               ; xmm6=tmp2
+        paddw   xmm1,xmm0               ; xmm1=tmp0
+        paddw   xmm3,xmm5               ; xmm3=tmp1
 
-	movdqa	xmm0, XMMWORD [wk(0)]	; xmm0=col1
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=col3
+        movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=col1
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=col3
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=tmp3
-	movdqa	XMMWORD [wk(1)], xmm6	; wk(1)=tmp2
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp3
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
+        ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
 
-	movdqa	xmm2,xmm0
-	movdqa	xmm6,xmm4
-	psubw	xmm0,xmm7		; xmm0=z12
-	psubw	xmm4,xmm5		; xmm4=z10
-	paddw	xmm2,xmm7		; xmm2=z11
-	paddw	xmm6,xmm5		; xmm6=z13
+        movdqa  xmm2,xmm0
+        movdqa  xmm6,xmm4
+        psubw   xmm0,xmm7               ; xmm0=z12
+        psubw   xmm4,xmm5               ; xmm4=z10
+        paddw   xmm2,xmm7               ; xmm2=z11
+        paddw   xmm6,xmm5               ; xmm6=z13
 
-	movdqa	xmm7,xmm4		; xmm7=z10(unscaled)
-	psllw	xmm0,PRE_MULTIPLY_SCALE_BITS
-	psllw	xmm4,PRE_MULTIPLY_SCALE_BITS
+        movdqa  xmm7,xmm4               ; xmm7=z10(unscaled)
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
 
-	movdqa	xmm5,xmm2
-	psubw	xmm2,xmm6
-	paddw	xmm5,xmm6		; xmm5=tmp7
+        movdqa  xmm5,xmm2
+        psubw   xmm2,xmm6
+        paddw   xmm5,xmm6               ; xmm5=tmp7
 
-	psllw	xmm2,PRE_MULTIPLY_SCALE_BITS
-	pmulhw	xmm2,[GOTOFF(ebx,PW_F1414)]	; xmm2=tmp11
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm2,[GOTOFF(ebx,PW_F1414)]     ; xmm2=tmp11
 
-	; To avoid overflow...
-	;
-	; (Original)
-	; tmp12 = -2.613125930 * z10 + z5;
-	;
-	; (This implementation)
-	; tmp12 = (-1.613125930 - 1) * z10 + z5;
-	;       = -1.613125930 * z10 - z10 + z5;
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
 
-	movdqa	xmm6,xmm4
-	paddw	xmm4,xmm0
-	pmulhw	xmm4,[GOTOFF(ebx,PW_F1847)]	; xmm4=z5
-	pmulhw	xmm6,[GOTOFF(ebx,PW_MF1613)]
-	pmulhw	xmm0,[GOTOFF(ebx,PW_F1082)]
-	psubw	xmm6,xmm7
-	psubw	xmm0,xmm4		; xmm0=tmp10
-	paddw	xmm6,xmm4		; xmm6=tmp12
+        movdqa  xmm6,xmm4
+        paddw   xmm4,xmm0
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F1847)]     ; xmm4=z5
+        pmulhw  xmm6,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  xmm0,[GOTOFF(ebx,PW_F1082)]
+        psubw   xmm6,xmm7
+        psubw   xmm0,xmm4               ; xmm0=tmp10
+        paddw   xmm6,xmm4               ; xmm6=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	psubw	xmm6,xmm5		; xmm6=tmp6
-	movdqa	xmm7,xmm1
-	movdqa	xmm4,xmm3
-	paddw	xmm1,xmm5		; xmm1=data0=(00 10 20 30 40 50 60 70)
-	paddw	xmm3,xmm6		; xmm3=data1=(01 11 21 31 41 51 61 71)
-	psraw	xmm1,(PASS1_BITS+3)	; descale
-	psraw	xmm3,(PASS1_BITS+3)	; descale
-	psubw	xmm7,xmm5		; xmm7=data7=(07 17 27 37 47 57 67 77)
-	psubw	xmm4,xmm6		; xmm4=data6=(06 16 26 36 46 56 66 76)
-	psraw	xmm7,(PASS1_BITS+3)	; descale
-	psraw	xmm4,(PASS1_BITS+3)	; descale
-	psubw	xmm2,xmm6		; xmm2=tmp5
+        psubw   xmm6,xmm5               ; xmm6=tmp6
+        movdqa  xmm7,xmm1
+        movdqa  xmm4,xmm3
+        paddw   xmm1,xmm5               ; xmm1=data0=(00 10 20 30 40 50 60 70)
+        paddw   xmm3,xmm6               ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        psraw   xmm1,(PASS1_BITS+3)     ; descale
+        psraw   xmm3,(PASS1_BITS+3)     ; descale
+        psubw   xmm7,xmm5               ; xmm7=data7=(07 17 27 37 47 57 67 77)
+        psubw   xmm4,xmm6               ; xmm4=data6=(06 16 26 36 46 56 66 76)
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psubw   xmm2,xmm6               ; xmm2=tmp5
 
-	packsswb  xmm1,xmm4	; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	packsswb  xmm3,xmm7	; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        packsswb  xmm1,xmm4     ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm7     ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	movdqa	xmm5, XMMWORD [wk(1)]	; xmm5=tmp2
-	movdqa	xmm6, XMMWORD [wk(0)]	; xmm6=tmp3
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp2
+        movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp3
 
-	paddw	xmm0,xmm2		; xmm0=tmp4
-	movdqa	xmm4,xmm5
-	movdqa	xmm7,xmm6
-	paddw	xmm5,xmm2		; xmm5=data2=(02 12 22 32 42 52 62 72)
-	paddw	xmm6,xmm0		; xmm6=data4=(04 14 24 34 44 54 64 74)
-	psraw	xmm5,(PASS1_BITS+3)	; descale
-	psraw	xmm6,(PASS1_BITS+3)	; descale
-	psubw	xmm4,xmm2		; xmm4=data5=(05 15 25 35 45 55 65 75)
-	psubw	xmm7,xmm0		; xmm7=data3=(03 13 23 33 43 53 63 73)
-	psraw	xmm4,(PASS1_BITS+3)	; descale
-	psraw	xmm7,(PASS1_BITS+3)	; descale
+        paddw   xmm0,xmm2               ; xmm0=tmp4
+        movdqa  xmm4,xmm5
+        movdqa  xmm7,xmm6
+        paddw   xmm5,xmm2               ; xmm5=data2=(02 12 22 32 42 52 62 72)
+        paddw   xmm6,xmm0               ; xmm6=data4=(04 14 24 34 44 54 64 74)
+        psraw   xmm5,(PASS1_BITS+3)     ; descale
+        psraw   xmm6,(PASS1_BITS+3)     ; descale
+        psubw   xmm4,xmm2               ; xmm4=data5=(05 15 25 35 45 55 65 75)
+        psubw   xmm7,xmm0               ; xmm7=data3=(03 13 23 33 43 53 63 73)
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
 
-	movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]	; xmm2=[PB_CENTERJSAMP]
+        movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm2=[PB_CENTERJSAMP]
 
-	packsswb  xmm5,xmm6	; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
-	packsswb  xmm7,xmm4	; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+        packsswb  xmm5,xmm6     ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm7,xmm4     ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
 
-	paddb     xmm1,xmm2
-	paddb     xmm3,xmm2
-	paddb     xmm5,xmm2
-	paddb     xmm7,xmm2
+        paddb     xmm1,xmm2
+        paddb     xmm3,xmm2
+        paddb     xmm5,xmm2
+        paddb     xmm7,xmm2
 
-	movdqa    xmm0,xmm1	; transpose coefficients(phase 1)
-	punpcklbw xmm1,xmm3	; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
-	punpckhbw xmm0,xmm3	; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
-	movdqa    xmm6,xmm5	; transpose coefficients(phase 1)
-	punpcklbw xmm5,xmm7	; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
-	punpckhbw xmm6,xmm7	; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 1)
+        punpcklbw xmm1,xmm3     ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm3     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm6,xmm5     ; transpose coefficients(phase 1)
+        punpcklbw xmm5,xmm7     ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm6,xmm7     ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
 
-	movdqa    xmm4,xmm1	; transpose coefficients(phase 2)
-	punpcklwd xmm1,xmm5	; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
-	punpckhwd xmm4,xmm5	; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
-	movdqa    xmm2,xmm6	; transpose coefficients(phase 2)
-	punpcklwd xmm6,xmm0	; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
-	punpckhwd xmm2,xmm0	; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+        movdqa    xmm4,xmm1     ; transpose coefficients(phase 2)
+        punpcklwd xmm1,xmm5     ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm5     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm0     ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm2,xmm0     ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
 
-	movdqa    xmm3,xmm1	; transpose coefficients(phase 3)
-	punpckldq xmm1,xmm6	; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
-	punpckhdq xmm3,xmm6	; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
-	movdqa    xmm7,xmm4	; transpose coefficients(phase 3)
-	punpckldq xmm4,xmm2	; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
-	punpckhdq xmm7,xmm2	; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+        movdqa    xmm3,xmm1     ; transpose coefficients(phase 3)
+        punpckldq xmm1,xmm6     ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm3,xmm6     ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm7,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm2     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm7,xmm2     ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
 
-	pshufd	xmm5,xmm1,0x4E	; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
-	pshufd	xmm0,xmm3,0x4E	; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
-	pshufd	xmm6,xmm4,0x4E	; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
-	pshufd	xmm2,xmm7,0x4E	; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+        pshufd  xmm5,xmm1,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm3,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm6,xmm4,0x4E  ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm2,xmm7,0x4E  ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
-	mov	edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+        mov     edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7
 
-	mov	edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
-	mov	edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
+        mov     edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2int-64.asm b/simd/jiss2int-64.asm
index 13764d6..10e9521 100644
--- a/simd/jiss2int-64.asm
+++ b/simd/jiss2int-64.asm
@@ -27,67 +27,67 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1	(CONST_BITS-PASS1_BITS)
-%define DESCALE_P2	(CONST_BITS+PASS1_BITS+3)
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS+3)
 
 %if CONST_BITS == 13
-F_0_298	equ	 2446		; FIX(0.298631336)
-F_0_390	equ	 3196		; FIX(0.390180644)
-F_0_541	equ	 4433		; FIX(0.541196100)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_175	equ	 9633		; FIX(1.175875602)
-F_1_501	equ	12299		; FIX(1.501321110)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_1_961	equ	16069		; FIX(1.961570560)
-F_2_053	equ	16819		; FIX(2.053119869)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_072	equ	25172		; FIX(3.072711026)
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_298	equ	DESCALE( 320652955,30-CONST_BITS)	; FIX(0.298631336)
-F_0_390	equ	DESCALE( 418953276,30-CONST_BITS)	; FIX(0.390180644)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_175	equ	DESCALE(1262586813,30-CONST_BITS)	; FIX(1.175875602)
-F_1_501	equ	DESCALE(1612031267,30-CONST_BITS)	; FIX(1.501321110)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_1_961	equ	DESCALE(2106220350,30-CONST_BITS)	; FIX(1.961570560)
-F_2_053	equ	DESCALE(2204520673,30-CONST_BITS)	; FIX(2.053119869)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_072	equ	DESCALE(3299298341,30-CONST_BITS)	; FIX(3.072711026)
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_islow_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_islow_sse2)
 
 EXTN(jconst_idct_islow_sse2):
 
-PW_F130_F054	times 4 dw  (F_0_541+F_0_765), F_0_541
-PW_F054_MF130	times 4 dw  F_0_541, (F_0_541-F_1_847)
-PW_MF078_F117	times 4 dw  (F_1_175-F_1_961), F_1_175
-PW_F117_F078	times 4 dw  F_1_175, (F_1_175-F_0_390)
-PW_MF060_MF089	times 4 dw  (F_0_298-F_0_899),-F_0_899
-PW_MF089_F060	times 4 dw -F_0_899, (F_1_501-F_0_899)
-PW_MF050_MF256	times 4 dw  (F_2_053-F_2_562),-F_2_562
-PW_MF256_F050	times 4 dw -F_2_562, (F_3_072-F_2_562)
-PD_DESCALE_P1	times 4 dd  1 << (DESCALE_P1-1)
-PD_DESCALE_P2	times 4 dd  1 << (DESCALE_P2-1)
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -101,748 +101,748 @@
 ; r12 = JSAMPARRAY output_buf
 ; r13 = JDIMENSION output_col
 
-%define original_rbp	rbp+0
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		12
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          12
 
-	align	16
-	global	EXTN(jsimd_idct_islow_sse2)
+        align   16
+        global  EXTN(jsimd_idct_islow_sse2)
 
 EXTN(jsimd_idct_islow_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-	mov	rdx, r10	; quantptr
-	mov	rsi, r11		; inptr
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
 
 %ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
-	mov	eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	jnz	near .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1,xmm0
-	packsswb xmm1,xmm1
-	packsswb xmm1,xmm1
-	movd	eax,xmm1
-	test	rax,rax
-	jnz	short .columnDCT
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    rax,rax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movdqa	xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	psllw	xmm5,PASS1_BITS
+        psllw   xmm5,PASS1_BITS
 
-	movdqa    xmm4,xmm5		; xmm5=in0=(00 01 02 03 04 05 06 07)
-	punpcklwd xmm5,xmm5		; xmm5=(00 00 01 01 02 02 03 03)
-	punpckhwd xmm4,xmm4		; xmm4=(04 04 05 05 06 06 07 07)
+        movdqa    xmm4,xmm5             ; xmm5=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm5,xmm5             ; xmm5=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm4,xmm4             ; xmm4=(04 04 05 05 06 06 07 07)
 
-	pshufd	xmm7,xmm5,0x00		; xmm7=col0=(00 00 00 00 00 00 00 00)
-	pshufd	xmm6,xmm5,0x55		; xmm6=col1=(01 01 01 01 01 01 01 01)
-	pshufd	xmm1,xmm5,0xAA		; xmm1=col2=(02 02 02 02 02 02 02 02)
-	pshufd	xmm5,xmm5,0xFF		; xmm5=col3=(03 03 03 03 03 03 03 03)
-	pshufd	xmm0,xmm4,0x00		; xmm0=col4=(04 04 04 04 04 04 04 04)
-	pshufd	xmm3,xmm4,0x55		; xmm3=col5=(05 05 05 05 05 05 05 05)
-	pshufd	xmm2,xmm4,0xAA		; xmm2=col6=(06 06 06 06 06 06 06 06)
-	pshufd	xmm4,xmm4,0xFF		; xmm4=col7=(07 07 07 07 07 07 07 07)
+        pshufd  xmm7,xmm5,0x00          ; xmm7=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm6,xmm5,0x55          ; xmm6=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm1,xmm5,0xAA          ; xmm1=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm5,xmm5,0xFF          ; xmm5=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm0,xmm4,0x00          ; xmm0=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm3,xmm4,0x55          ; xmm3=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm2,xmm4,0xAA          ; xmm2=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm4,xmm4,0xFF          ; xmm4=col7=(07 07 07 07 07 07 07 07)
 
-	movdqa	XMMWORD [wk(8)], xmm6	; wk(8)=col1
-	movdqa	XMMWORD [wk(9)], xmm5	; wk(9)=col3
-	movdqa	XMMWORD [wk(10)], xmm3	; wk(10)=col5
-	movdqa	XMMWORD [wk(11)], xmm4	; wk(11)=col7
-	jmp	near .column_end
+        movdqa  XMMWORD [wk(8)], xmm6   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm5   ; wk(9)=col3
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+        jmp     near .column_end
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; (Original)
-	; z1 = (z2 + z3) * 0.541196100;
-	; tmp2 = z1 + z3 * -1.847759065;
-	; tmp3 = z1 + z2 * 0.765366865;
-	;
-	; (This implementation)
-	; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
-	; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
 
-	movdqa    xmm4,xmm1		; xmm1=in2=z2
-	movdqa    xmm5,xmm1
-	punpcklwd xmm4,xmm3		; xmm3=in6=z3
-	punpckhwd xmm5,xmm3
-	movdqa    xmm1,xmm4
-	movdqa    xmm3,xmm5
-	pmaddwd   xmm4,[rel PW_F130_F054]	; xmm4=tmp3L
-	pmaddwd   xmm5,[rel PW_F130_F054]	; xmm5=tmp3H
-	pmaddwd   xmm1,[rel PW_F054_MF130]	; xmm1=tmp2L
-	pmaddwd   xmm3,[rel PW_F054_MF130]	; xmm3=tmp2H
+        movdqa    xmm4,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm4,xmm3             ; xmm3=in6=z3
+        punpckhwd xmm5,xmm3
+        movdqa    xmm1,xmm4
+        movdqa    xmm3,xmm5
+        pmaddwd   xmm4,[rel PW_F130_F054]       ; xmm4=tmp3L
+        pmaddwd   xmm5,[rel PW_F130_F054]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[rel PW_F054_MF130]      ; xmm1=tmp2L
+        pmaddwd   xmm3,[rel PW_F054_MF130]      ; xmm3=tmp2H
 
-	movdqa    xmm6,xmm0
-	paddw     xmm0,xmm2		; xmm0=in0+in4
-	psubw     xmm6,xmm2		; xmm6=in0-in4
+        movdqa    xmm6,xmm0
+        paddw     xmm0,xmm2             ; xmm0=in0+in4
+        psubw     xmm6,xmm2             ; xmm6=in0-in4
 
-	pxor      xmm7,xmm7
-	pxor      xmm2,xmm2
-	punpcklwd xmm7,xmm0		; xmm7=tmp0L
-	punpckhwd xmm2,xmm0		; xmm2=tmp0H
-	psrad     xmm7,(16-CONST_BITS)	; psrad xmm7,16 & pslld xmm7,CONST_BITS
-	psrad     xmm2,(16-CONST_BITS)	; psrad xmm2,16 & pslld xmm2,CONST_BITS
+        pxor      xmm7,xmm7
+        pxor      xmm2,xmm2
+        punpcklwd xmm7,xmm0             ; xmm7=tmp0L
+        punpckhwd xmm2,xmm0             ; xmm2=tmp0H
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+        psrad     xmm2,(16-CONST_BITS)  ; psrad xmm2,16 & pslld xmm2,CONST_BITS
 
-	movdqa	xmm0,xmm7
-	paddd	xmm7,xmm4		; xmm7=tmp10L
-	psubd	xmm0,xmm4		; xmm0=tmp13L
-	movdqa	xmm4,xmm2
-	paddd	xmm2,xmm5		; xmm2=tmp10H
-	psubd	xmm4,xmm5		; xmm4=tmp13H
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm4               ; xmm7=tmp10L
+        psubd   xmm0,xmm4               ; xmm0=tmp13L
+        movdqa  xmm4,xmm2
+        paddd   xmm2,xmm5               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp13H
 
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=tmp10L
-	movdqa	XMMWORD [wk(1)], xmm2	; wk(1)=tmp10H
-	movdqa	XMMWORD [wk(2)], xmm0	; wk(2)=tmp13L
-	movdqa	XMMWORD [wk(3)], xmm4	; wk(3)=tmp13H
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm0   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm4   ; wk(3)=tmp13H
 
-	pxor      xmm5,xmm5
-	pxor      xmm7,xmm7
-	punpcklwd xmm5,xmm6		; xmm5=tmp1L
-	punpckhwd xmm7,xmm6		; xmm7=tmp1H
-	psrad     xmm5,(16-CONST_BITS)	; psrad xmm5,16 & pslld xmm5,CONST_BITS
-	psrad     xmm7,(16-CONST_BITS)	; psrad xmm7,16 & pslld xmm7,CONST_BITS
+        pxor      xmm5,xmm5
+        pxor      xmm7,xmm7
+        punpcklwd xmm5,xmm6             ; xmm5=tmp1L
+        punpckhwd xmm7,xmm6             ; xmm7=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
 
-	movdqa	xmm2,xmm5
-	paddd	xmm5,xmm1		; xmm5=tmp11L
-	psubd	xmm2,xmm1		; xmm2=tmp12L
-	movdqa	xmm0,xmm7
-	paddd	xmm7,xmm3		; xmm7=tmp11H
-	psubd	xmm0,xmm3		; xmm0=tmp12H
+        movdqa  xmm2,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm2,xmm1               ; xmm2=tmp12L
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm3               ; xmm7=tmp11H
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
 
-	movdqa	XMMWORD [wk(4)], xmm5	; wk(4)=tmp11L
-	movdqa	XMMWORD [wk(5)], xmm7	; wk(5)=tmp11H
-	movdqa	XMMWORD [wk(6)], xmm2	; wk(6)=tmp12L
-	movdqa	XMMWORD [wk(7)], xmm0	; wk(7)=tmp12H
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm7   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm0   ; wk(7)=tmp12H
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movdqa	xmm5,xmm6
-	movdqa	xmm7,xmm4
-	paddw	xmm5,xmm3		; xmm5=z3
-	paddw	xmm7,xmm1		; xmm7=z4
+        movdqa  xmm5,xmm6
+        movdqa  xmm7,xmm4
+        paddw   xmm5,xmm3               ; xmm5=z3
+        paddw   xmm7,xmm1               ; xmm7=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm2,xmm5
-	movdqa    xmm0,xmm5
-	punpcklwd xmm2,xmm7
-	punpckhwd xmm0,xmm7
-	movdqa    xmm5,xmm2
-	movdqa    xmm7,xmm0
-	pmaddwd   xmm2,[rel PW_MF078_F117]	; xmm2=z3L
-	pmaddwd   xmm0,[rel PW_MF078_F117]	; xmm0=z3H
-	pmaddwd   xmm5,[rel PW_F117_F078]	; xmm5=z4L
-	pmaddwd   xmm7,[rel PW_F117_F078]	; xmm7=z4H
+        movdqa    xmm2,xmm5
+        movdqa    xmm0,xmm5
+        punpcklwd xmm2,xmm7
+        punpckhwd xmm0,xmm7
+        movdqa    xmm5,xmm2
+        movdqa    xmm7,xmm0
+        pmaddwd   xmm2,[rel PW_MF078_F117]      ; xmm2=z3L
+        pmaddwd   xmm0,[rel PW_MF078_F117]      ; xmm0=z3H
+        pmaddwd   xmm5,[rel PW_F117_F078]       ; xmm5=z4L
+        pmaddwd   xmm7,[rel PW_F117_F078]       ; xmm7=z4H
 
-	movdqa	XMMWORD [wk(10)], xmm2	; wk(10)=z3L
-	movdqa	XMMWORD [wk(11)], xmm0	; wk(11)=z3H
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=z3H
 
-	; (Original)
-	; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
-	; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
-	; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; tmp0 += z1 + z3;  tmp1 += z2 + z4;
-	; tmp2 += z2 + z3;  tmp3 += z1 + z4;
-	;
-	; (This implementation)
-	; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
-	; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
-	; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
-	; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
-	; tmp0 += z3;  tmp1 += z4;
-	; tmp2 += z3;  tmp3 += z4;
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
 
-	movdqa    xmm2,xmm3
-	movdqa    xmm0,xmm3
-	punpcklwd xmm2,xmm4
-	punpckhwd xmm0,xmm4
-	movdqa    xmm3,xmm2
-	movdqa    xmm4,xmm0
-	pmaddwd   xmm2,[rel PW_MF060_MF089]	; xmm2=tmp0L
-	pmaddwd   xmm0,[rel PW_MF060_MF089]	; xmm0=tmp0H
-	pmaddwd   xmm3,[rel PW_MF089_F060]	; xmm3=tmp3L
-	pmaddwd   xmm4,[rel PW_MF089_F060]	; xmm4=tmp3H
+        movdqa    xmm2,xmm3
+        movdqa    xmm0,xmm3
+        punpcklwd xmm2,xmm4
+        punpckhwd xmm0,xmm4
+        movdqa    xmm3,xmm2
+        movdqa    xmm4,xmm0
+        pmaddwd   xmm2,[rel PW_MF060_MF089]     ; xmm2=tmp0L
+        pmaddwd   xmm0,[rel PW_MF060_MF089]     ; xmm0=tmp0H
+        pmaddwd   xmm3,[rel PW_MF089_F060]      ; xmm3=tmp3L
+        pmaddwd   xmm4,[rel PW_MF089_F060]      ; xmm4=tmp3H
 
-	paddd	xmm2, XMMWORD [wk(10)]	; xmm2=tmp0L
-	paddd	xmm0, XMMWORD [wk(11)]	; xmm0=tmp0H
-	paddd	xmm3,xmm5		; xmm3=tmp3L
-	paddd	xmm4,xmm7		; xmm4=tmp3H
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp0L
+        paddd   xmm0, XMMWORD [wk(11)]  ; xmm0=tmp0H
+        paddd   xmm3,xmm5               ; xmm3=tmp3L
+        paddd   xmm4,xmm7               ; xmm4=tmp3H
 
-	movdqa	XMMWORD [wk(8)], xmm2	; wk(8)=tmp0L
-	movdqa	XMMWORD [wk(9)], xmm0	; wk(9)=tmp0H
+        movdqa  XMMWORD [wk(8)], xmm2   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm0   ; wk(9)=tmp0H
 
-	movdqa    xmm2,xmm1
-	movdqa    xmm0,xmm1
-	punpcklwd xmm2,xmm6
-	punpckhwd xmm0,xmm6
-	movdqa    xmm1,xmm2
-	movdqa    xmm6,xmm0
-	pmaddwd   xmm2,[rel PW_MF050_MF256]	; xmm2=tmp1L
-	pmaddwd   xmm0,[rel PW_MF050_MF256]	; xmm0=tmp1H
-	pmaddwd   xmm1,[rel PW_MF256_F050]	; xmm1=tmp2L
-	pmaddwd   xmm6,[rel PW_MF256_F050]	; xmm6=tmp2H
+        movdqa    xmm2,xmm1
+        movdqa    xmm0,xmm1
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm0,xmm6
+        movdqa    xmm1,xmm2
+        movdqa    xmm6,xmm0
+        pmaddwd   xmm2,[rel PW_MF050_MF256]     ; xmm2=tmp1L
+        pmaddwd   xmm0,[rel PW_MF050_MF256]     ; xmm0=tmp1H
+        pmaddwd   xmm1,[rel PW_MF256_F050]      ; xmm1=tmp2L
+        pmaddwd   xmm6,[rel PW_MF256_F050]      ; xmm6=tmp2H
 
-	paddd	xmm2,xmm5		; xmm2=tmp1L
-	paddd	xmm0,xmm7		; xmm0=tmp1H
-	paddd	xmm1, XMMWORD [wk(10)]	; xmm1=tmp2L
-	paddd	xmm6, XMMWORD [wk(11)]	; xmm6=tmp2H
+        paddd   xmm2,xmm5               ; xmm2=tmp1L
+        paddd   xmm0,xmm7               ; xmm0=tmp1H
+        paddd   xmm1, XMMWORD [wk(10)]  ; xmm1=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
 
-	movdqa	XMMWORD [wk(10)], xmm2	; wk(10)=tmp1L
-	movdqa	XMMWORD [wk(11)], xmm0	; wk(11)=tmp1H
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=tmp1H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=tmp10L
-	movdqa	xmm7, XMMWORD [wk(1)]	; xmm7=tmp10H
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=tmp10H
 
-	movdqa	xmm2,xmm5
-	movdqa	xmm0,xmm7
-	paddd	xmm5,xmm3		; xmm5=data0L
-	paddd	xmm7,xmm4		; xmm7=data0H
-	psubd	xmm2,xmm3		; xmm2=data7L
-	psubd	xmm0,xmm4		; xmm0=data7H
+        movdqa  xmm2,xmm5
+        movdqa  xmm0,xmm7
+        paddd   xmm5,xmm3               ; xmm5=data0L
+        paddd   xmm7,xmm4               ; xmm7=data0H
+        psubd   xmm2,xmm3               ; xmm2=data7L
+        psubd   xmm0,xmm4               ; xmm0=data7H
 
-	movdqa	xmm3,[rel PD_DESCALE_P1]	; xmm3=[rel PD_DESCALE_P1]
+        movdqa  xmm3,[rel PD_DESCALE_P1]        ; xmm3=[rel PD_DESCALE_P1]
 
-	paddd	xmm5,xmm3
-	paddd	xmm7,xmm3
-	psrad	xmm5,DESCALE_P1
-	psrad	xmm7,DESCALE_P1
-	paddd	xmm2,xmm3
-	paddd	xmm0,xmm3
-	psrad	xmm2,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
+        paddd   xmm5,xmm3
+        paddd   xmm7,xmm3
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm7,DESCALE_P1
+        paddd   xmm2,xmm3
+        paddd   xmm0,xmm3
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
 
-	packssdw  xmm5,xmm7		; xmm5=data0=(00 01 02 03 04 05 06 07)
-	packssdw  xmm2,xmm0		; xmm2=data7=(70 71 72 73 74 75 76 77)
+        packssdw  xmm5,xmm7             ; xmm5=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm2,xmm0             ; xmm2=data7=(70 71 72 73 74 75 76 77)
 
-	movdqa	xmm4, XMMWORD [wk(4)]	; xmm4=tmp11L
-	movdqa	xmm3, XMMWORD [wk(5)]	; xmm3=tmp11H
+        movdqa  xmm4, XMMWORD [wk(4)]   ; xmm4=tmp11L
+        movdqa  xmm3, XMMWORD [wk(5)]   ; xmm3=tmp11H
 
-	movdqa	xmm7,xmm4
-	movdqa	xmm0,xmm3
-	paddd	xmm4,xmm1		; xmm4=data1L
-	paddd	xmm3,xmm6		; xmm3=data1H
-	psubd	xmm7,xmm1		; xmm7=data6L
-	psubd	xmm0,xmm6		; xmm0=data6H
+        movdqa  xmm7,xmm4
+        movdqa  xmm0,xmm3
+        paddd   xmm4,xmm1               ; xmm4=data1L
+        paddd   xmm3,xmm6               ; xmm3=data1H
+        psubd   xmm7,xmm1               ; xmm7=data6L
+        psubd   xmm0,xmm6               ; xmm0=data6H
 
-	movdqa	xmm1,[rel PD_DESCALE_P1]	; xmm1=[rel PD_DESCALE_P1]
+        movdqa  xmm1,[rel PD_DESCALE_P1]        ; xmm1=[rel PD_DESCALE_P1]
 
-	paddd	xmm4,xmm1
-	paddd	xmm3,xmm1
-	psrad	xmm4,DESCALE_P1
-	psrad	xmm3,DESCALE_P1
-	paddd	xmm7,xmm1
-	paddd	xmm0,xmm1
-	psrad	xmm7,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
+        paddd   xmm4,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
+        paddd   xmm7,xmm1
+        paddd   xmm0,xmm1
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
 
-	packssdw  xmm4,xmm3		; xmm4=data1=(10 11 12 13 14 15 16 17)
-	packssdw  xmm7,xmm0		; xmm7=data6=(60 61 62 63 64 65 66 67)
+        packssdw  xmm4,xmm3             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm7,xmm0             ; xmm7=data6=(60 61 62 63 64 65 66 67)
 
-	movdqa    xmm6,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm4		; xmm5=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm6,xmm4		; xmm6=(04 14 05 15 06 16 07 17)
-	movdqa    xmm1,xmm7		; transpose coefficients(phase 1)
-	punpcklwd xmm7,xmm2		; xmm7=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm1,xmm2		; xmm1=(64 74 65 75 66 76 67 77)
+        movdqa    xmm6,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm4             ; xmm5=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4             ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm2             ; xmm7=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm1,xmm2             ; xmm1=(64 74 65 75 66 76 67 77)
 
-	movdqa	xmm3, XMMWORD [wk(6)]	; xmm3=tmp12L
-	movdqa	xmm0, XMMWORD [wk(7)]	; xmm0=tmp12H
-	movdqa	xmm4, XMMWORD [wk(10)]	; xmm4=tmp1L
-	movdqa	xmm2, XMMWORD [wk(11)]	; xmm2=tmp1H
+        movdqa  xmm3, XMMWORD [wk(6)]   ; xmm3=tmp12L
+        movdqa  xmm0, XMMWORD [wk(7)]   ; xmm0=tmp12H
+        movdqa  xmm4, XMMWORD [wk(10)]  ; xmm4=tmp1L
+        movdqa  xmm2, XMMWORD [wk(11)]  ; xmm2=tmp1H
 
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=(00 10 01 11 02 12 03 13)
-	movdqa	XMMWORD [wk(1)], xmm6	; wk(1)=(04 14 05 15 06 16 07 17)
-	movdqa	XMMWORD [wk(4)], xmm7	; wk(4)=(60 70 61 71 62 72 63 73)
-	movdqa	XMMWORD [wk(5)], xmm1	; wk(5)=(64 74 65 75 66 76 67 77)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 01 11 02 12 03 13)
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=(04 14 05 15 06 16 07 17)
+        movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(5)], xmm1   ; wk(5)=(64 74 65 75 66 76 67 77)
 
-	movdqa	xmm5,xmm3
-	movdqa	xmm6,xmm0
-	paddd	xmm3,xmm4		; xmm3=data2L
-	paddd	xmm0,xmm2		; xmm0=data2H
-	psubd	xmm5,xmm4		; xmm5=data5L
-	psubd	xmm6,xmm2		; xmm6=data5H
+        movdqa  xmm5,xmm3
+        movdqa  xmm6,xmm0
+        paddd   xmm3,xmm4               ; xmm3=data2L
+        paddd   xmm0,xmm2               ; xmm0=data2H
+        psubd   xmm5,xmm4               ; xmm5=data5L
+        psubd   xmm6,xmm2               ; xmm6=data5H
 
-	movdqa	xmm7,[rel PD_DESCALE_P1]	; xmm7=[rel PD_DESCALE_P1]
+        movdqa  xmm7,[rel PD_DESCALE_P1]        ; xmm7=[rel PD_DESCALE_P1]
 
-	paddd	xmm3,xmm7
-	paddd	xmm0,xmm7
-	psrad	xmm3,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
-	paddd	xmm5,xmm7
-	paddd	xmm6,xmm7
-	psrad	xmm5,DESCALE_P1
-	psrad	xmm6,DESCALE_P1
+        paddd   xmm3,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm3,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+        paddd   xmm5,xmm7
+        paddd   xmm6,xmm7
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
 
-	packssdw  xmm3,xmm0		; xmm3=data2=(20 21 22 23 24 25 26 27)
-	packssdw  xmm5,xmm6		; xmm5=data5=(50 51 52 53 54 55 56 57)
+        packssdw  xmm3,xmm0             ; xmm3=data2=(20 21 22 23 24 25 26 27)
+        packssdw  xmm5,xmm6             ; xmm5=data5=(50 51 52 53 54 55 56 57)
 
-	movdqa	xmm1, XMMWORD [wk(2)]	; xmm1=tmp13L
-	movdqa	xmm4, XMMWORD [wk(3)]	; xmm4=tmp13H
-	movdqa	xmm2, XMMWORD [wk(8)]	; xmm2=tmp0L
-	movdqa	xmm7, XMMWORD [wk(9)]	; xmm7=tmp0H
+        movdqa  xmm1, XMMWORD [wk(2)]   ; xmm1=tmp13L
+        movdqa  xmm4, XMMWORD [wk(3)]   ; xmm4=tmp13H
+        movdqa  xmm2, XMMWORD [wk(8)]   ; xmm2=tmp0L
+        movdqa  xmm7, XMMWORD [wk(9)]   ; xmm7=tmp0H
 
-	movdqa	xmm0,xmm1
-	movdqa	xmm6,xmm4
-	paddd	xmm1,xmm2		; xmm1=data3L
-	paddd	xmm4,xmm7		; xmm4=data3H
-	psubd	xmm0,xmm2		; xmm0=data4L
-	psubd	xmm6,xmm7		; xmm6=data4H
+        movdqa  xmm0,xmm1
+        movdqa  xmm6,xmm4
+        paddd   xmm1,xmm2               ; xmm1=data3L
+        paddd   xmm4,xmm7               ; xmm4=data3H
+        psubd   xmm0,xmm2               ; xmm0=data4L
+        psubd   xmm6,xmm7               ; xmm6=data4H
 
-	movdqa	xmm2,[rel PD_DESCALE_P1]	; xmm2=[rel PD_DESCALE_P1]
+        movdqa  xmm2,[rel PD_DESCALE_P1]        ; xmm2=[rel PD_DESCALE_P1]
 
-	paddd	xmm1,xmm2
-	paddd	xmm4,xmm2
-	psrad	xmm1,DESCALE_P1
-	psrad	xmm4,DESCALE_P1
-	paddd	xmm0,xmm2
-	paddd	xmm6,xmm2
-	psrad	xmm0,DESCALE_P1
-	psrad	xmm6,DESCALE_P1
+        paddd   xmm1,xmm2
+        paddd   xmm4,xmm2
+        psrad   xmm1,DESCALE_P1
+        psrad   xmm4,DESCALE_P1
+        paddd   xmm0,xmm2
+        paddd   xmm6,xmm2
+        psrad   xmm0,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
 
-	packssdw  xmm1,xmm4		; xmm1=data3=(30 31 32 33 34 35 36 37)
-	packssdw  xmm0,xmm6		; xmm0=data4=(40 41 42 43 44 45 46 47)
+        packssdw  xmm1,xmm4             ; xmm1=data3=(30 31 32 33 34 35 36 37)
+        packssdw  xmm0,xmm6             ; xmm0=data4=(40 41 42 43 44 45 46 47)
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=(00 10 01 11 02 12 03 13)
-	movdqa	xmm2, XMMWORD [wk(1)]	; xmm2=(04 14 05 15 06 16 07 17)
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(00 10 01 11 02 12 03 13)
+        movdqa  xmm2, XMMWORD [wk(1)]   ; xmm2=(04 14 05 15 06 16 07 17)
 
-	movdqa    xmm4,xmm3		; transpose coefficients(phase 1)
-	punpcklwd xmm3,xmm1		; xmm3=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm4,xmm1		; xmm4=(24 34 25 35 26 36 27 37)
-	movdqa    xmm6,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm5		; xmm0=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm6,xmm5		; xmm6=(44 54 45 55 46 56 47 57)
+        movdqa    xmm4,xmm3             ; transpose coefficients(phase 1)
+        punpcklwd xmm3,xmm1             ; xmm3=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm1             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm6,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm5             ; xmm0=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm6,xmm5             ; xmm6=(44 54 45 55 46 56 47 57)
 
-	movdqa    xmm1,xmm7		; transpose coefficients(phase 2)
-	punpckldq xmm7,xmm3		; xmm7=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm1,xmm3		; xmm1=(02 12 22 32 03 13 23 33)
-	movdqa    xmm5,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm4		; xmm2=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm5,xmm4		; xmm5=(06 16 26 36 07 17 27 37)
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 2)
+        punpckldq xmm7,xmm3             ; xmm7=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm1,xmm3             ; xmm1=(02 12 22 32 03 13 23 33)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm4             ; xmm2=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm5,xmm4             ; xmm5=(06 16 26 36 07 17 27 37)
 
-	movdqa	xmm3, XMMWORD [wk(4)]	; xmm3=(60 70 61 71 62 72 63 73)
-	movdqa	xmm4, XMMWORD [wk(5)]	; xmm4=(64 74 65 75 66 76 67 77)
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=(60 70 61 71 62 72 63 73)
+        movdqa  xmm4, XMMWORD [wk(5)]   ; xmm4=(64 74 65 75 66 76 67 77)
 
-	movdqa	XMMWORD [wk(6)], xmm2	; wk(6)=(04 14 24 34 05 15 25 35)
-	movdqa	XMMWORD [wk(7)], xmm5	; wk(7)=(06 16 26 36 07 17 27 37)
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(7)], xmm5   ; wk(7)=(06 16 26 36 07 17 27 37)
 
-	movdqa    xmm2,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm3		; xmm0=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm2,xmm3		; xmm2=(42 52 62 72 43 53 63 73)
-	movdqa    xmm5,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm4		; xmm6=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm5,xmm4		; xmm5=(46 56 66 76 47 57 67 77)
+        movdqa    xmm2,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm2,xmm3             ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm4             ; xmm6=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm5,xmm4             ; xmm5=(46 56 66 76 47 57 67 77)
 
-	movdqa     xmm3,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm0		; xmm7=col0=(00 10 20 30 40 50 60 70)
-	punpckhqdq xmm3,xmm0		; xmm3=col1=(01 11 21 31 41 51 61 71)
-	movdqa     xmm4,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm2		; xmm1=col2=(02 12 22 32 42 52 62 72)
-	punpckhqdq xmm4,xmm2		; xmm4=col3=(03 13 23 33 43 53 63 73)
+        movdqa     xmm3,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm0            ; xmm7=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm3,xmm0            ; xmm3=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm4,xmm2            ; xmm4=col3=(03 13 23 33 43 53 63 73)
 
-	movdqa	xmm0, XMMWORD [wk(6)]	; xmm0=(04 14 24 34 05 15 25 35)
-	movdqa	xmm2, XMMWORD [wk(7)]	; xmm2=(06 16 26 36 07 17 27 37)
+        movdqa  xmm0, XMMWORD [wk(6)]   ; xmm0=(04 14 24 34 05 15 25 35)
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=(06 16 26 36 07 17 27 37)
 
-	movdqa	XMMWORD [wk(8)], xmm3	; wk(8)=col1
-	movdqa	XMMWORD [wk(9)], xmm4	; wk(9)=col3
+        movdqa  XMMWORD [wk(8)], xmm3   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm4   ; wk(9)=col3
 
-	movdqa     xmm3,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm6		; xmm0=col4=(04 14 24 34 44 54 64 74)
-	punpckhqdq xmm3,xmm6		; xmm3=col5=(05 15 25 35 45 55 65 75)
-	movdqa     xmm4,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm5		; xmm2=col6=(06 16 26 36 46 56 66 76)
-	punpckhqdq xmm4,xmm5		; xmm4=col7=(07 17 27 37 47 57 67 77)
+        movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm3,xmm6            ; xmm3=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm4,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm5            ; xmm2=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm4,xmm5            ; xmm4=col7=(07 17 27 37 47 57 67 77)
 
-	movdqa	XMMWORD [wk(10)], xmm3	; wk(10)=col5
-	movdqa	XMMWORD [wk(11)], xmm4	; wk(11)=col7
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
 .column_end:
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	rax, [original_rbp]
-	mov	rdi, r12	; (JSAMPROW *)
-	mov	rax, r13
+        mov     rax, [original_rbp]
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
 
-	; -- Even part
+        ; -- Even part
 
-	; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
+        ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
 
-	; (Original)
-	; z1 = (z2 + z3) * 0.541196100;
-	; tmp2 = z1 + z3 * -1.847759065;
-	; tmp3 = z1 + z2 * 0.765366865;
-	;
-	; (This implementation)
-	; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
-	; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
 
-	movdqa    xmm6,xmm1		; xmm1=in2=z2
-	movdqa    xmm5,xmm1
-	punpcklwd xmm6,xmm2		; xmm2=in6=z3
-	punpckhwd xmm5,xmm2
-	movdqa    xmm1,xmm6
-	movdqa    xmm2,xmm5
-	pmaddwd   xmm6,[rel PW_F130_F054]	; xmm6=tmp3L
-	pmaddwd   xmm5,[rel PW_F130_F054]	; xmm5=tmp3H
-	pmaddwd   xmm1,[rel PW_F054_MF130]	; xmm1=tmp2L
-	pmaddwd   xmm2,[rel PW_F054_MF130]	; xmm2=tmp2H
+        movdqa    xmm6,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm6,xmm2             ; xmm2=in6=z3
+        punpckhwd xmm5,xmm2
+        movdqa    xmm1,xmm6
+        movdqa    xmm2,xmm5
+        pmaddwd   xmm6,[rel PW_F130_F054]       ; xmm6=tmp3L
+        pmaddwd   xmm5,[rel PW_F130_F054]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[rel PW_F054_MF130]      ; xmm1=tmp2L
+        pmaddwd   xmm2,[rel PW_F054_MF130]      ; xmm2=tmp2H
 
-	movdqa    xmm3,xmm7
-	paddw     xmm7,xmm0		; xmm7=in0+in4
-	psubw     xmm3,xmm0		; xmm3=in0-in4
+        movdqa    xmm3,xmm7
+        paddw     xmm7,xmm0             ; xmm7=in0+in4
+        psubw     xmm3,xmm0             ; xmm3=in0-in4
 
-	pxor      xmm4,xmm4
-	pxor      xmm0,xmm0
-	punpcklwd xmm4,xmm7		; xmm4=tmp0L
-	punpckhwd xmm0,xmm7		; xmm0=tmp0H
-	psrad     xmm4,(16-CONST_BITS)	; psrad xmm4,16 & pslld xmm4,CONST_BITS
-	psrad     xmm0,(16-CONST_BITS)	; psrad xmm0,16 & pslld xmm0,CONST_BITS
+        pxor      xmm4,xmm4
+        pxor      xmm0,xmm0
+        punpcklwd xmm4,xmm7             ; xmm4=tmp0L
+        punpckhwd xmm0,xmm7             ; xmm0=tmp0H
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+        psrad     xmm0,(16-CONST_BITS)  ; psrad xmm0,16 & pslld xmm0,CONST_BITS
 
-	movdqa	xmm7,xmm4
-	paddd	xmm4,xmm6		; xmm4=tmp10L
-	psubd	xmm7,xmm6		; xmm7=tmp13L
-	movdqa	xmm6,xmm0
-	paddd	xmm0,xmm5		; xmm0=tmp10H
-	psubd	xmm6,xmm5		; xmm6=tmp13H
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm6               ; xmm4=tmp10L
+        psubd   xmm7,xmm6               ; xmm7=tmp13L
+        movdqa  xmm6,xmm0
+        paddd   xmm0,xmm5               ; xmm0=tmp10H
+        psubd   xmm6,xmm5               ; xmm6=tmp13H
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=tmp10L
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=tmp10H
-	movdqa	XMMWORD [wk(2)], xmm7	; wk(2)=tmp13L
-	movdqa	XMMWORD [wk(3)], xmm6	; wk(3)=tmp13H
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm6   ; wk(3)=tmp13H
 
-	pxor      xmm5,xmm5
-	pxor      xmm4,xmm4
-	punpcklwd xmm5,xmm3		; xmm5=tmp1L
-	punpckhwd xmm4,xmm3		; xmm4=tmp1H
-	psrad     xmm5,(16-CONST_BITS)	; psrad xmm5,16 & pslld xmm5,CONST_BITS
-	psrad     xmm4,(16-CONST_BITS)	; psrad xmm4,16 & pslld xmm4,CONST_BITS
+        pxor      xmm5,xmm5
+        pxor      xmm4,xmm4
+        punpcklwd xmm5,xmm3             ; xmm5=tmp1L
+        punpckhwd xmm4,xmm3             ; xmm4=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
 
-	movdqa	xmm0,xmm5
-	paddd	xmm5,xmm1		; xmm5=tmp11L
-	psubd	xmm0,xmm1		; xmm0=tmp12L
-	movdqa	xmm7,xmm4
-	paddd	xmm4,xmm2		; xmm4=tmp11H
-	psubd	xmm7,xmm2		; xmm7=tmp12H
+        movdqa  xmm0,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm0,xmm1               ; xmm0=tmp12L
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm2               ; xmm4=tmp11H
+        psubd   xmm7,xmm2               ; xmm7=tmp12H
 
-	movdqa	XMMWORD [wk(4)], xmm5	; wk(4)=tmp11L
-	movdqa	XMMWORD [wk(5)], xmm4	; wk(5)=tmp11H
-	movdqa	XMMWORD [wk(6)], xmm0	; wk(6)=tmp12L
-	movdqa	XMMWORD [wk(7)], xmm7	; wk(7)=tmp12H
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm0   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm7   ; wk(7)=tmp12H
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm6, XMMWORD [wk(9)]	; xmm6=col3
-	movdqa	xmm3, XMMWORD [wk(8)]	; xmm3=col1
-	movdqa	xmm1, XMMWORD [wk(11)]	; xmm1=col7
-	movdqa	xmm2, XMMWORD [wk(10)]	; xmm2=col5
+        movdqa  xmm6, XMMWORD [wk(9)]   ; xmm6=col3
+        movdqa  xmm3, XMMWORD [wk(8)]   ; xmm3=col1
+        movdqa  xmm1, XMMWORD [wk(11)]  ; xmm1=col7
+        movdqa  xmm2, XMMWORD [wk(10)]  ; xmm2=col5
 
-	movdqa	xmm5,xmm6
-	movdqa	xmm4,xmm3
-	paddw	xmm5,xmm1		; xmm5=z3
-	paddw	xmm4,xmm2		; xmm4=z4
+        movdqa  xmm5,xmm6
+        movdqa  xmm4,xmm3
+        paddw   xmm5,xmm1               ; xmm5=z3
+        paddw   xmm4,xmm2               ; xmm4=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm0,xmm5
-	movdqa    xmm7,xmm5
-	punpcklwd xmm0,xmm4
-	punpckhwd xmm7,xmm4
-	movdqa    xmm5,xmm0
-	movdqa    xmm4,xmm7
-	pmaddwd   xmm0,[rel PW_MF078_F117]	; xmm0=z3L
-	pmaddwd   xmm7,[rel PW_MF078_F117]	; xmm7=z3H
-	pmaddwd   xmm5,[rel PW_F117_F078]	; xmm5=z4L
-	pmaddwd   xmm4,[rel PW_F117_F078]	; xmm4=z4H
+        movdqa    xmm0,xmm5
+        movdqa    xmm7,xmm5
+        punpcklwd xmm0,xmm4
+        punpckhwd xmm7,xmm4
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm7
+        pmaddwd   xmm0,[rel PW_MF078_F117]      ; xmm0=z3L
+        pmaddwd   xmm7,[rel PW_MF078_F117]      ; xmm7=z3H
+        pmaddwd   xmm5,[rel PW_F117_F078]       ; xmm5=z4L
+        pmaddwd   xmm4,[rel PW_F117_F078]       ; xmm4=z4H
 
-	movdqa	XMMWORD [wk(10)], xmm0	; wk(10)=z3L
-	movdqa	XMMWORD [wk(11)], xmm7	; wk(11)=z3H
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=z3H
 
-	; (Original)
-	; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
-	; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
-	; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; tmp0 += z1 + z3;  tmp1 += z2 + z4;
-	; tmp2 += z2 + z3;  tmp3 += z1 + z4;
-	;
-	; (This implementation)
-	; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
-	; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
-	; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
-	; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
-	; tmp0 += z3;  tmp1 += z4;
-	; tmp2 += z3;  tmp3 += z4;
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
 
-	movdqa    xmm0,xmm1
-	movdqa    xmm7,xmm1
-	punpcklwd xmm0,xmm3
-	punpckhwd xmm7,xmm3
-	movdqa    xmm1,xmm0
-	movdqa    xmm3,xmm7
-	pmaddwd   xmm0,[rel PW_MF060_MF089]	; xmm0=tmp0L
-	pmaddwd   xmm7,[rel PW_MF060_MF089]	; xmm7=tmp0H
-	pmaddwd   xmm1,[rel PW_MF089_F060]	; xmm1=tmp3L
-	pmaddwd   xmm3,[rel PW_MF089_F060]	; xmm3=tmp3H
+        movdqa    xmm0,xmm1
+        movdqa    xmm7,xmm1
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm1,xmm0
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm0,[rel PW_MF060_MF089]     ; xmm0=tmp0L
+        pmaddwd   xmm7,[rel PW_MF060_MF089]     ; xmm7=tmp0H
+        pmaddwd   xmm1,[rel PW_MF089_F060]      ; xmm1=tmp3L
+        pmaddwd   xmm3,[rel PW_MF089_F060]      ; xmm3=tmp3H
 
-	paddd	xmm0, XMMWORD [wk(10)]	; xmm0=tmp0L
-	paddd	xmm7, XMMWORD [wk(11)]	; xmm7=tmp0H
-	paddd	xmm1,xmm5		; xmm1=tmp3L
-	paddd	xmm3,xmm4		; xmm3=tmp3H
+        paddd   xmm0, XMMWORD [wk(10)]  ; xmm0=tmp0L
+        paddd   xmm7, XMMWORD [wk(11)]  ; xmm7=tmp0H
+        paddd   xmm1,xmm5               ; xmm1=tmp3L
+        paddd   xmm3,xmm4               ; xmm3=tmp3H
 
-	movdqa	XMMWORD [wk(8)], xmm0	; wk(8)=tmp0L
-	movdqa	XMMWORD [wk(9)], xmm7	; wk(9)=tmp0H
+        movdqa  XMMWORD [wk(8)], xmm0   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm7   ; wk(9)=tmp0H
 
-	movdqa    xmm0,xmm2
-	movdqa    xmm7,xmm2
-	punpcklwd xmm0,xmm6
-	punpckhwd xmm7,xmm6
-	movdqa    xmm2,xmm0
-	movdqa    xmm6,xmm7
-	pmaddwd   xmm0,[rel PW_MF050_MF256]	; xmm0=tmp1L
-	pmaddwd   xmm7,[rel PW_MF050_MF256]	; xmm7=tmp1H
-	pmaddwd   xmm2,[rel PW_MF256_F050]	; xmm2=tmp2L
-	pmaddwd   xmm6,[rel PW_MF256_F050]	; xmm6=tmp2H
+        movdqa    xmm0,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm0,xmm6
+        punpckhwd xmm7,xmm6
+        movdqa    xmm2,xmm0
+        movdqa    xmm6,xmm7
+        pmaddwd   xmm0,[rel PW_MF050_MF256]     ; xmm0=tmp1L
+        pmaddwd   xmm7,[rel PW_MF050_MF256]     ; xmm7=tmp1H
+        pmaddwd   xmm2,[rel PW_MF256_F050]      ; xmm2=tmp2L
+        pmaddwd   xmm6,[rel PW_MF256_F050]      ; xmm6=tmp2H
 
-	paddd	xmm0,xmm5		; xmm0=tmp1L
-	paddd	xmm7,xmm4		; xmm7=tmp1H
-	paddd	xmm2, XMMWORD [wk(10)]	; xmm2=tmp2L
-	paddd	xmm6, XMMWORD [wk(11)]	; xmm6=tmp2H
+        paddd   xmm0,xmm5               ; xmm0=tmp1L
+        paddd   xmm7,xmm4               ; xmm7=tmp1H
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
 
-	movdqa	XMMWORD [wk(10)], xmm0	; wk(10)=tmp1L
-	movdqa	XMMWORD [wk(11)], xmm7	; wk(11)=tmp1H
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=tmp1H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=tmp10L
-	movdqa	xmm4, XMMWORD [wk(1)]	; xmm4=tmp10H
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm4, XMMWORD [wk(1)]   ; xmm4=tmp10H
 
-	movdqa	xmm0,xmm5
-	movdqa	xmm7,xmm4
-	paddd	xmm5,xmm1		; xmm5=data0L
-	paddd	xmm4,xmm3		; xmm4=data0H
-	psubd	xmm0,xmm1		; xmm0=data7L
-	psubd	xmm7,xmm3		; xmm7=data7H
+        movdqa  xmm0,xmm5
+        movdqa  xmm7,xmm4
+        paddd   xmm5,xmm1               ; xmm5=data0L
+        paddd   xmm4,xmm3               ; xmm4=data0H
+        psubd   xmm0,xmm1               ; xmm0=data7L
+        psubd   xmm7,xmm3               ; xmm7=data7H
 
-	movdqa	xmm1,[rel PD_DESCALE_P2]	; xmm1=[rel PD_DESCALE_P2]
+        movdqa  xmm1,[rel PD_DESCALE_P2]        ; xmm1=[rel PD_DESCALE_P2]
 
-	paddd	xmm5,xmm1
-	paddd	xmm4,xmm1
-	psrad	xmm5,DESCALE_P2
-	psrad	xmm4,DESCALE_P2
-	paddd	xmm0,xmm1
-	paddd	xmm7,xmm1
-	psrad	xmm0,DESCALE_P2
-	psrad	xmm7,DESCALE_P2
+        paddd   xmm5,xmm1
+        paddd   xmm4,xmm1
+        psrad   xmm5,DESCALE_P2
+        psrad   xmm4,DESCALE_P2
+        paddd   xmm0,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm0,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
 
-	packssdw  xmm5,xmm4		; xmm5=data0=(00 10 20 30 40 50 60 70)
-	packssdw  xmm0,xmm7		; xmm0=data7=(07 17 27 37 47 57 67 77)
+        packssdw  xmm5,xmm4             ; xmm5=data0=(00 10 20 30 40 50 60 70)
+        packssdw  xmm0,xmm7             ; xmm0=data7=(07 17 27 37 47 57 67 77)
 
-	movdqa	xmm3, XMMWORD [wk(4)]	; xmm3=tmp11L
-	movdqa	xmm1, XMMWORD [wk(5)]	; xmm1=tmp11H
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=tmp11L
+        movdqa  xmm1, XMMWORD [wk(5)]   ; xmm1=tmp11H
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm7,xmm1
-	paddd	xmm3,xmm2		; xmm3=data1L
-	paddd	xmm1,xmm6		; xmm1=data1H
-	psubd	xmm4,xmm2		; xmm4=data6L
-	psubd	xmm7,xmm6		; xmm7=data6H
+        movdqa  xmm4,xmm3
+        movdqa  xmm7,xmm1
+        paddd   xmm3,xmm2               ; xmm3=data1L
+        paddd   xmm1,xmm6               ; xmm1=data1H
+        psubd   xmm4,xmm2               ; xmm4=data6L
+        psubd   xmm7,xmm6               ; xmm7=data6H
 
-	movdqa	xmm2,[rel PD_DESCALE_P2]	; xmm2=[rel PD_DESCALE_P2]
+        movdqa  xmm2,[rel PD_DESCALE_P2]        ; xmm2=[rel PD_DESCALE_P2]
 
-	paddd	xmm3,xmm2
-	paddd	xmm1,xmm2
-	psrad	xmm3,DESCALE_P2
-	psrad	xmm1,DESCALE_P2
-	paddd	xmm4,xmm2
-	paddd	xmm7,xmm2
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm7,DESCALE_P2
+        paddd   xmm3,xmm2
+        paddd   xmm1,xmm2
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm4,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
 
-	packssdw  xmm3,xmm1		; xmm3=data1=(01 11 21 31 41 51 61 71)
-	packssdw  xmm4,xmm7		; xmm4=data6=(06 16 26 36 46 56 66 76)
+        packssdw  xmm3,xmm1             ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        packssdw  xmm4,xmm7             ; xmm4=data6=(06 16 26 36 46 56 66 76)
 
-	packsswb  xmm5,xmm4		; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	packsswb  xmm3,xmm0		; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        packsswb  xmm5,xmm4             ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm0             ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	movdqa	xmm6, XMMWORD [wk(6)]	; xmm6=tmp12L
-	movdqa	xmm2, XMMWORD [wk(7)]	; xmm2=tmp12H
-	movdqa	xmm1, XMMWORD [wk(10)]	; xmm1=tmp1L
-	movdqa	xmm7, XMMWORD [wk(11)]	; xmm7=tmp1H
+        movdqa  xmm6, XMMWORD [wk(6)]   ; xmm6=tmp12L
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=tmp12H
+        movdqa  xmm1, XMMWORD [wk(10)]  ; xmm1=tmp1L
+        movdqa  xmm7, XMMWORD [wk(11)]  ; xmm7=tmp1H
 
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	movdqa	xmm4,xmm6
-	movdqa	xmm0,xmm2
-	paddd	xmm6,xmm1		; xmm6=data2L
-	paddd	xmm2,xmm7		; xmm2=data2H
-	psubd	xmm4,xmm1		; xmm4=data5L
-	psubd	xmm0,xmm7		; xmm0=data5H
+        movdqa  xmm4,xmm6
+        movdqa  xmm0,xmm2
+        paddd   xmm6,xmm1               ; xmm6=data2L
+        paddd   xmm2,xmm7               ; xmm2=data2H
+        psubd   xmm4,xmm1               ; xmm4=data5L
+        psubd   xmm0,xmm7               ; xmm0=data5H
 
-	movdqa	xmm5,[rel PD_DESCALE_P2]	; xmm5=[rel PD_DESCALE_P2]
+        movdqa  xmm5,[rel PD_DESCALE_P2]        ; xmm5=[rel PD_DESCALE_P2]
 
-	paddd	xmm6,xmm5
-	paddd	xmm2,xmm5
-	psrad	xmm6,DESCALE_P2
-	psrad	xmm2,DESCALE_P2
-	paddd	xmm4,xmm5
-	paddd	xmm0,xmm5
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm0,DESCALE_P2
+        paddd   xmm6,xmm5
+        paddd   xmm2,xmm5
+        psrad   xmm6,DESCALE_P2
+        psrad   xmm2,DESCALE_P2
+        paddd   xmm4,xmm5
+        paddd   xmm0,xmm5
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
 
-	packssdw  xmm6,xmm2		; xmm6=data2=(02 12 22 32 42 52 62 72)
-	packssdw  xmm4,xmm0		; xmm4=data5=(05 15 25 35 45 55 65 75)
+        packssdw  xmm6,xmm2             ; xmm6=data2=(02 12 22 32 42 52 62 72)
+        packssdw  xmm4,xmm0             ; xmm4=data5=(05 15 25 35 45 55 65 75)
 
-	movdqa	xmm3, XMMWORD [wk(2)]	; xmm3=tmp13L
-	movdqa	xmm1, XMMWORD [wk(3)]	; xmm1=tmp13H
-	movdqa	xmm7, XMMWORD [wk(8)]	; xmm7=tmp0L
-	movdqa	xmm5, XMMWORD [wk(9)]	; xmm5=tmp0H
+        movdqa  xmm3, XMMWORD [wk(2)]   ; xmm3=tmp13L
+        movdqa  xmm1, XMMWORD [wk(3)]   ; xmm1=tmp13H
+        movdqa  xmm7, XMMWORD [wk(8)]   ; xmm7=tmp0L
+        movdqa  xmm5, XMMWORD [wk(9)]   ; xmm5=tmp0H
 
-	movdqa	xmm2,xmm3
-	movdqa	xmm0,xmm1
-	paddd	xmm3,xmm7		; xmm3=data3L
-	paddd	xmm1,xmm5		; xmm1=data3H
-	psubd	xmm2,xmm7		; xmm2=data4L
-	psubd	xmm0,xmm5		; xmm0=data4H
+        movdqa  xmm2,xmm3
+        movdqa  xmm0,xmm1
+        paddd   xmm3,xmm7               ; xmm3=data3L
+        paddd   xmm1,xmm5               ; xmm1=data3H
+        psubd   xmm2,xmm7               ; xmm2=data4L
+        psubd   xmm0,xmm5               ; xmm0=data4H
 
-	movdqa	xmm7,[rel PD_DESCALE_P2]	; xmm7=[rel PD_DESCALE_P2]
+        movdqa  xmm7,[rel PD_DESCALE_P2]        ; xmm7=[rel PD_DESCALE_P2]
 
-	paddd	xmm3,xmm7
-	paddd	xmm1,xmm7
-	psrad	xmm3,DESCALE_P2
-	psrad	xmm1,DESCALE_P2
-	paddd	xmm2,xmm7
-	paddd	xmm0,xmm7
-	psrad	xmm2,DESCALE_P2
-	psrad	xmm0,DESCALE_P2
+        paddd   xmm3,xmm7
+        paddd   xmm1,xmm7
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm2,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm2,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
 
-	movdqa    xmm5,[rel PB_CENTERJSAMP]	; xmm5=[rel PB_CENTERJSAMP]
+        movdqa    xmm5,[rel PB_CENTERJSAMP]     ; xmm5=[rel PB_CENTERJSAMP]
 
-	packssdw  xmm3,xmm1		; xmm3=data3=(03 13 23 33 43 53 63 73)
-	packssdw  xmm2,xmm0		; xmm2=data4=(04 14 24 34 44 54 64 74)
+        packssdw  xmm3,xmm1             ; xmm3=data3=(03 13 23 33 43 53 63 73)
+        packssdw  xmm2,xmm0             ; xmm2=data4=(04 14 24 34 44 54 64 74)
 
-	movdqa    xmm7, XMMWORD [wk(0)]	; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	movdqa    xmm1, XMMWORD [wk(1)]	; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        movdqa    xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	packsswb  xmm6,xmm2		; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
-	packsswb  xmm3,xmm4		; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+        packsswb  xmm6,xmm2             ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm3,xmm4             ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
 
-	paddb     xmm7,xmm5
-	paddb     xmm1,xmm5
-	paddb     xmm6,xmm5
-	paddb     xmm3,xmm5
+        paddb     xmm7,xmm5
+        paddb     xmm1,xmm5
+        paddb     xmm6,xmm5
+        paddb     xmm3,xmm5
 
-	movdqa    xmm0,xmm7	; transpose coefficients(phase 1)
-	punpcklbw xmm7,xmm1	; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
-	punpckhbw xmm0,xmm1	; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
-	movdqa    xmm2,xmm6	; transpose coefficients(phase 1)
-	punpcklbw xmm6,xmm3	; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
-	punpckhbw xmm2,xmm3	; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+        movdqa    xmm0,xmm7     ; transpose coefficients(phase 1)
+        punpcklbw xmm7,xmm1     ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm1     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 1)
+        punpcklbw xmm6,xmm3     ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm2,xmm3     ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
 
-	movdqa    xmm4,xmm7	; transpose coefficients(phase 2)
-	punpcklwd xmm7,xmm6	; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
-	punpckhwd xmm4,xmm6	; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
-	movdqa    xmm5,xmm2	; transpose coefficients(phase 2)
-	punpcklwd xmm2,xmm0	; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
-	punpckhwd xmm5,xmm0	; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+        movdqa    xmm4,xmm7     ; transpose coefficients(phase 2)
+        punpcklwd xmm7,xmm6     ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm6     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm5,xmm2     ; transpose coefficients(phase 2)
+        punpcklwd xmm2,xmm0     ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm5,xmm0     ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
 
-	movdqa    xmm1,xmm7	; transpose coefficients(phase 3)
-	punpckldq xmm7,xmm2	; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
-	punpckhdq xmm1,xmm2	; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
-	movdqa    xmm3,xmm4	; transpose coefficients(phase 3)
-	punpckldq xmm4,xmm5	; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
-	punpckhdq xmm3,xmm5	; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+        movdqa    xmm1,xmm7     ; transpose coefficients(phase 3)
+        punpckldq xmm7,xmm2     ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm1,xmm2     ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm3,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm5     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm3,xmm5     ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
 
-	pshufd	xmm6,xmm7,0x4E	; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
-	pshufd	xmm0,xmm1,0x4E	; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
-	pshufd	xmm2,xmm4,0x4E	; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
-	pshufd	xmm5,xmm3,0x4E	; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+        pshufd  xmm6,xmm7,0x4E  ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm1,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm2,xmm4,0x4E  ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm5,xmm3,0x4E  ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
 
-	mov	rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
-	mov	rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
+        mov     rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
 
-	mov	rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
-	mov	rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
-	movq	XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
+        mov     rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
+        mov     rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
 
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2int.asm b/simd/jiss2int.asm
index adf39fb..e78f5ff 100644
--- a/simd/jiss2int.asm
+++ b/simd/jiss2int.asm
@@ -26,67 +26,67 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1	(CONST_BITS-PASS1_BITS)
-%define DESCALE_P2	(CONST_BITS+PASS1_BITS+3)
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS+3)
 
 %if CONST_BITS == 13
-F_0_298	equ	 2446		; FIX(0.298631336)
-F_0_390	equ	 3196		; FIX(0.390180644)
-F_0_541	equ	 4433		; FIX(0.541196100)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_175	equ	 9633		; FIX(1.175875602)
-F_1_501	equ	12299		; FIX(1.501321110)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_1_961	equ	16069		; FIX(1.961570560)
-F_2_053	equ	16819		; FIX(2.053119869)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_072	equ	25172		; FIX(3.072711026)
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_298	equ	DESCALE( 320652955,30-CONST_BITS)	; FIX(0.298631336)
-F_0_390	equ	DESCALE( 418953276,30-CONST_BITS)	; FIX(0.390180644)
-F_0_541	equ	DESCALE( 581104887,30-CONST_BITS)	; FIX(0.541196100)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_175	equ	DESCALE(1262586813,30-CONST_BITS)	; FIX(1.175875602)
-F_1_501	equ	DESCALE(1612031267,30-CONST_BITS)	; FIX(1.501321110)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_1_961	equ	DESCALE(2106220350,30-CONST_BITS)	; FIX(1.961570560)
-F_2_053	equ	DESCALE(2204520673,30-CONST_BITS)	; FIX(2.053119869)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_072	equ	DESCALE(3299298341,30-CONST_BITS)	; FIX(3.072711026)
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_islow_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_islow_sse2)
 
 EXTN(jconst_idct_islow_sse2):
 
-PW_F130_F054	times 4 dw  (F_0_541+F_0_765), F_0_541
-PW_F054_MF130	times 4 dw  F_0_541, (F_0_541-F_1_847)
-PW_MF078_F117	times 4 dw  (F_1_175-F_1_961), F_1_175
-PW_F117_F078	times 4 dw  F_1_175, (F_1_175-F_0_390)
-PW_MF060_MF089	times 4 dw  (F_0_298-F_0_899),-F_0_899
-PW_MF089_F060	times 4 dw -F_0_899, (F_1_501-F_0_899)
-PW_MF050_MF256	times 4 dw  (F_2_053-F_2_562),-F_2_562
-PW_MF256_F050	times 4 dw -F_2_562, (F_3_072-F_2_562)
-PD_DESCALE_P1	times 4 dd  1 << (DESCALE_P1-1)
-PD_DESCALE_P2	times 4 dd  1 << (DESCALE_P2-1)
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -95,765 +95,765 @@
 ;                        JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; jpeg_component_info * compptr
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		12
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          12
 
-	align	16
-	global	EXTN(jsimd_idct_islow_sse2)
+        align   16
+        global  EXTN(jsimd_idct_islow_sse2)
 
 EXTN(jsimd_idct_islow_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
 
 %ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	near .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	xmm1,xmm0
-	packsswb xmm1,xmm1
-	packsswb xmm1,xmm1
-	movd	eax,xmm1
-	test	eax,eax
-	jnz	short .columnDCT
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movdqa	xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	psllw	xmm5,PASS1_BITS
+        psllw   xmm5,PASS1_BITS
 
-	movdqa    xmm4,xmm5		; xmm5=in0=(00 01 02 03 04 05 06 07)
-	punpcklwd xmm5,xmm5		; xmm5=(00 00 01 01 02 02 03 03)
-	punpckhwd xmm4,xmm4		; xmm4=(04 04 05 05 06 06 07 07)
+        movdqa    xmm4,xmm5             ; xmm5=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm5,xmm5             ; xmm5=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm4,xmm4             ; xmm4=(04 04 05 05 06 06 07 07)
 
-	pshufd	xmm7,xmm5,0x00		; xmm7=col0=(00 00 00 00 00 00 00 00)
-	pshufd	xmm6,xmm5,0x55		; xmm6=col1=(01 01 01 01 01 01 01 01)
-	pshufd	xmm1,xmm5,0xAA		; xmm1=col2=(02 02 02 02 02 02 02 02)
-	pshufd	xmm5,xmm5,0xFF		; xmm5=col3=(03 03 03 03 03 03 03 03)
-	pshufd	xmm0,xmm4,0x00		; xmm0=col4=(04 04 04 04 04 04 04 04)
-	pshufd	xmm3,xmm4,0x55		; xmm3=col5=(05 05 05 05 05 05 05 05)
-	pshufd	xmm2,xmm4,0xAA		; xmm2=col6=(06 06 06 06 06 06 06 06)
-	pshufd	xmm4,xmm4,0xFF		; xmm4=col7=(07 07 07 07 07 07 07 07)
+        pshufd  xmm7,xmm5,0x00          ; xmm7=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm6,xmm5,0x55          ; xmm6=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm1,xmm5,0xAA          ; xmm1=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm5,xmm5,0xFF          ; xmm5=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm0,xmm4,0x00          ; xmm0=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm3,xmm4,0x55          ; xmm3=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm2,xmm4,0xAA          ; xmm2=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm4,xmm4,0xFF          ; xmm4=col7=(07 07 07 07 07 07 07 07)
 
-	movdqa	XMMWORD [wk(8)], xmm6	; wk(8)=col1
-	movdqa	XMMWORD [wk(9)], xmm5	; wk(9)=col3
-	movdqa	XMMWORD [wk(10)], xmm3	; wk(10)=col5
-	movdqa	XMMWORD [wk(11)], xmm4	; wk(11)=col7
-	jmp	near .column_end
-	alignx	16,7
+        movdqa  XMMWORD [wk(8)], xmm6   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm5   ; wk(9)=col3
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+        jmp     near .column_end
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; (Original)
-	; z1 = (z2 + z3) * 0.541196100;
-	; tmp2 = z1 + z3 * -1.847759065;
-	; tmp3 = z1 + z2 * 0.765366865;
-	;
-	; (This implementation)
-	; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
-	; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
 
-	movdqa    xmm4,xmm1		; xmm1=in2=z2
-	movdqa    xmm5,xmm1
-	punpcklwd xmm4,xmm3		; xmm3=in6=z3
-	punpckhwd xmm5,xmm3
-	movdqa    xmm1,xmm4
-	movdqa    xmm3,xmm5
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]	; xmm4=tmp3L
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]	; xmm5=tmp3H
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]	; xmm1=tmp2L
-	pmaddwd   xmm3,[GOTOFF(ebx,PW_F054_MF130)]	; xmm3=tmp2H
+        movdqa    xmm4,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm4,xmm3             ; xmm3=in6=z3
+        punpckhwd xmm5,xmm3
+        movdqa    xmm1,xmm4
+        movdqa    xmm3,xmm5
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]       ; xmm4=tmp3L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=tmp2L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm3=tmp2H
 
-	movdqa    xmm6,xmm0
-	paddw     xmm0,xmm2		; xmm0=in0+in4
-	psubw     xmm6,xmm2		; xmm6=in0-in4
+        movdqa    xmm6,xmm0
+        paddw     xmm0,xmm2             ; xmm0=in0+in4
+        psubw     xmm6,xmm2             ; xmm6=in0-in4
 
-	pxor      xmm7,xmm7
-	pxor      xmm2,xmm2
-	punpcklwd xmm7,xmm0		; xmm7=tmp0L
-	punpckhwd xmm2,xmm0		; xmm2=tmp0H
-	psrad     xmm7,(16-CONST_BITS)	; psrad xmm7,16 & pslld xmm7,CONST_BITS
-	psrad     xmm2,(16-CONST_BITS)	; psrad xmm2,16 & pslld xmm2,CONST_BITS
+        pxor      xmm7,xmm7
+        pxor      xmm2,xmm2
+        punpcklwd xmm7,xmm0             ; xmm7=tmp0L
+        punpckhwd xmm2,xmm0             ; xmm2=tmp0H
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+        psrad     xmm2,(16-CONST_BITS)  ; psrad xmm2,16 & pslld xmm2,CONST_BITS
 
-	movdqa	xmm0,xmm7
-	paddd	xmm7,xmm4		; xmm7=tmp10L
-	psubd	xmm0,xmm4		; xmm0=tmp13L
-	movdqa	xmm4,xmm2
-	paddd	xmm2,xmm5		; xmm2=tmp10H
-	psubd	xmm4,xmm5		; xmm4=tmp13H
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm4               ; xmm7=tmp10L
+        psubd   xmm0,xmm4               ; xmm0=tmp13L
+        movdqa  xmm4,xmm2
+        paddd   xmm2,xmm5               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp13H
 
-	movdqa	XMMWORD [wk(0)], xmm7	; wk(0)=tmp10L
-	movdqa	XMMWORD [wk(1)], xmm2	; wk(1)=tmp10H
-	movdqa	XMMWORD [wk(2)], xmm0	; wk(2)=tmp13L
-	movdqa	XMMWORD [wk(3)], xmm4	; wk(3)=tmp13H
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm0   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm4   ; wk(3)=tmp13H
 
-	pxor      xmm5,xmm5
-	pxor      xmm7,xmm7
-	punpcklwd xmm5,xmm6		; xmm5=tmp1L
-	punpckhwd xmm7,xmm6		; xmm7=tmp1H
-	psrad     xmm5,(16-CONST_BITS)	; psrad xmm5,16 & pslld xmm5,CONST_BITS
-	psrad     xmm7,(16-CONST_BITS)	; psrad xmm7,16 & pslld xmm7,CONST_BITS
+        pxor      xmm5,xmm5
+        pxor      xmm7,xmm7
+        punpcklwd xmm5,xmm6             ; xmm5=tmp1L
+        punpckhwd xmm7,xmm6             ; xmm7=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
 
-	movdqa	xmm2,xmm5
-	paddd	xmm5,xmm1		; xmm5=tmp11L
-	psubd	xmm2,xmm1		; xmm2=tmp12L
-	movdqa	xmm0,xmm7
-	paddd	xmm7,xmm3		; xmm7=tmp11H
-	psubd	xmm0,xmm3		; xmm0=tmp12H
+        movdqa  xmm2,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm2,xmm1               ; xmm2=tmp12L
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm3               ; xmm7=tmp11H
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
 
-	movdqa	XMMWORD [wk(4)], xmm5	; wk(4)=tmp11L
-	movdqa	XMMWORD [wk(5)], xmm7	; wk(5)=tmp11H
-	movdqa	XMMWORD [wk(6)], xmm2	; wk(6)=tmp12L
-	movdqa	XMMWORD [wk(7)], xmm0	; wk(7)=tmp12H
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm7   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm0   ; wk(7)=tmp12H
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movdqa	xmm5,xmm6
-	movdqa	xmm7,xmm4
-	paddw	xmm5,xmm3		; xmm5=z3
-	paddw	xmm7,xmm1		; xmm7=z4
+        movdqa  xmm5,xmm6
+        movdqa  xmm7,xmm4
+        paddw   xmm5,xmm3               ; xmm5=z3
+        paddw   xmm7,xmm1               ; xmm7=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm2,xmm5
-	movdqa    xmm0,xmm5
-	punpcklwd xmm2,xmm7
-	punpckhwd xmm0,xmm7
-	movdqa    xmm5,xmm2
-	movdqa    xmm7,xmm0
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_MF078_F117)]	; xmm2=z3L
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]	; xmm0=z3H
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]	; xmm5=z4L
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_F117_F078)]	; xmm7=z4H
+        movdqa    xmm2,xmm5
+        movdqa    xmm0,xmm5
+        punpcklwd xmm2,xmm7
+        punpckhwd xmm0,xmm7
+        movdqa    xmm5,xmm2
+        movdqa    xmm7,xmm0
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm2=z3L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm0=z3H
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]       ; xmm5=z4L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_F117_F078)]       ; xmm7=z4H
 
-	movdqa	XMMWORD [wk(10)], xmm2	; wk(10)=z3L
-	movdqa	XMMWORD [wk(11)], xmm0	; wk(11)=z3H
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=z3H
 
-	; (Original)
-	; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
-	; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
-	; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; tmp0 += z1 + z3;  tmp1 += z2 + z4;
-	; tmp2 += z2 + z3;  tmp3 += z1 + z4;
-	;
-	; (This implementation)
-	; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
-	; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
-	; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
-	; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
-	; tmp0 += z3;  tmp1 += z4;
-	; tmp2 += z3;  tmp3 += z4;
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
 
-	movdqa    xmm2,xmm3
-	movdqa    xmm0,xmm3
-	punpcklwd xmm2,xmm4
-	punpckhwd xmm0,xmm4
-	movdqa    xmm3,xmm2
-	movdqa    xmm4,xmm0
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm2=tmp0L
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm0=tmp0H
-	pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]	; xmm3=tmp3L
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_MF089_F060)]	; xmm4=tmp3H
+        movdqa    xmm2,xmm3
+        movdqa    xmm0,xmm3
+        punpcklwd xmm2,xmm4
+        punpckhwd xmm0,xmm4
+        movdqa    xmm3,xmm2
+        movdqa    xmm4,xmm0
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm2=tmp0L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm0=tmp0H
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm3=tmp3L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm4=tmp3H
 
-	paddd	xmm2, XMMWORD [wk(10)]	; xmm2=tmp0L
-	paddd	xmm0, XMMWORD [wk(11)]	; xmm0=tmp0H
-	paddd	xmm3,xmm5		; xmm3=tmp3L
-	paddd	xmm4,xmm7		; xmm4=tmp3H
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp0L
+        paddd   xmm0, XMMWORD [wk(11)]  ; xmm0=tmp0H
+        paddd   xmm3,xmm5               ; xmm3=tmp3L
+        paddd   xmm4,xmm7               ; xmm4=tmp3H
 
-	movdqa	XMMWORD [wk(8)], xmm2	; wk(8)=tmp0L
-	movdqa	XMMWORD [wk(9)], xmm0	; wk(9)=tmp0H
+        movdqa  XMMWORD [wk(8)], xmm2   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm0   ; wk(9)=tmp0H
 
-	movdqa    xmm2,xmm1
-	movdqa    xmm0,xmm1
-	punpcklwd xmm2,xmm6
-	punpckhwd xmm0,xmm6
-	movdqa    xmm1,xmm2
-	movdqa    xmm6,xmm0
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm2=tmp1L
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm0=tmp1H
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_MF256_F050)]	; xmm1=tmp2L
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]	; xmm6=tmp2H
+        movdqa    xmm2,xmm1
+        movdqa    xmm0,xmm1
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm0,xmm6
+        movdqa    xmm1,xmm2
+        movdqa    xmm6,xmm0
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm2=tmp1L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm0=tmp1H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm1=tmp2L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm6=tmp2H
 
-	paddd	xmm2,xmm5		; xmm2=tmp1L
-	paddd	xmm0,xmm7		; xmm0=tmp1H
-	paddd	xmm1, XMMWORD [wk(10)]	; xmm1=tmp2L
-	paddd	xmm6, XMMWORD [wk(11)]	; xmm6=tmp2H
+        paddd   xmm2,xmm5               ; xmm2=tmp1L
+        paddd   xmm0,xmm7               ; xmm0=tmp1H
+        paddd   xmm1, XMMWORD [wk(10)]  ; xmm1=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
 
-	movdqa	XMMWORD [wk(10)], xmm2	; wk(10)=tmp1L
-	movdqa	XMMWORD [wk(11)], xmm0	; wk(11)=tmp1H
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=tmp1H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=tmp10L
-	movdqa	xmm7, XMMWORD [wk(1)]	; xmm7=tmp10H
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=tmp10H
 
-	movdqa	xmm2,xmm5
-	movdqa	xmm0,xmm7
-	paddd	xmm5,xmm3		; xmm5=data0L
-	paddd	xmm7,xmm4		; xmm7=data0H
-	psubd	xmm2,xmm3		; xmm2=data7L
-	psubd	xmm0,xmm4		; xmm0=data7H
+        movdqa  xmm2,xmm5
+        movdqa  xmm0,xmm7
+        paddd   xmm5,xmm3               ; xmm5=data0L
+        paddd   xmm7,xmm4               ; xmm7=data0H
+        psubd   xmm2,xmm3               ; xmm2=data7L
+        psubd   xmm0,xmm4               ; xmm0=data7H
 
-	movdqa	xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]	; xmm3=[PD_DESCALE_P1]
+        movdqa  xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm3=[PD_DESCALE_P1]
 
-	paddd	xmm5,xmm3
-	paddd	xmm7,xmm3
-	psrad	xmm5,DESCALE_P1
-	psrad	xmm7,DESCALE_P1
-	paddd	xmm2,xmm3
-	paddd	xmm0,xmm3
-	psrad	xmm2,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
+        paddd   xmm5,xmm3
+        paddd   xmm7,xmm3
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm7,DESCALE_P1
+        paddd   xmm2,xmm3
+        paddd   xmm0,xmm3
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
 
-	packssdw  xmm5,xmm7		; xmm5=data0=(00 01 02 03 04 05 06 07)
-	packssdw  xmm2,xmm0		; xmm2=data7=(70 71 72 73 74 75 76 77)
+        packssdw  xmm5,xmm7             ; xmm5=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm2,xmm0             ; xmm2=data7=(70 71 72 73 74 75 76 77)
 
-	movdqa	xmm4, XMMWORD [wk(4)]	; xmm4=tmp11L
-	movdqa	xmm3, XMMWORD [wk(5)]	; xmm3=tmp11H
+        movdqa  xmm4, XMMWORD [wk(4)]   ; xmm4=tmp11L
+        movdqa  xmm3, XMMWORD [wk(5)]   ; xmm3=tmp11H
 
-	movdqa	xmm7,xmm4
-	movdqa	xmm0,xmm3
-	paddd	xmm4,xmm1		; xmm4=data1L
-	paddd	xmm3,xmm6		; xmm3=data1H
-	psubd	xmm7,xmm1		; xmm7=data6L
-	psubd	xmm0,xmm6		; xmm0=data6H
+        movdqa  xmm7,xmm4
+        movdqa  xmm0,xmm3
+        paddd   xmm4,xmm1               ; xmm4=data1L
+        paddd   xmm3,xmm6               ; xmm3=data1H
+        psubd   xmm7,xmm1               ; xmm7=data6L
+        psubd   xmm0,xmm6               ; xmm0=data6H
 
-	movdqa	xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]	; xmm1=[PD_DESCALE_P1]
+        movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm1=[PD_DESCALE_P1]
 
-	paddd	xmm4,xmm1
-	paddd	xmm3,xmm1
-	psrad	xmm4,DESCALE_P1
-	psrad	xmm3,DESCALE_P1
-	paddd	xmm7,xmm1
-	paddd	xmm0,xmm1
-	psrad	xmm7,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
+        paddd   xmm4,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
+        paddd   xmm7,xmm1
+        paddd   xmm0,xmm1
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
 
-	packssdw  xmm4,xmm3		; xmm4=data1=(10 11 12 13 14 15 16 17)
-	packssdw  xmm7,xmm0		; xmm7=data6=(60 61 62 63 64 65 66 67)
+        packssdw  xmm4,xmm3             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm7,xmm0             ; xmm7=data6=(60 61 62 63 64 65 66 67)
 
-	movdqa    xmm6,xmm5		; transpose coefficients(phase 1)
-	punpcklwd xmm5,xmm4		; xmm5=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm6,xmm4		; xmm6=(04 14 05 15 06 16 07 17)
-	movdqa    xmm1,xmm7		; transpose coefficients(phase 1)
-	punpcklwd xmm7,xmm2		; xmm7=(60 70 61 71 62 72 63 73)
-	punpckhwd xmm1,xmm2		; xmm1=(64 74 65 75 66 76 67 77)
+        movdqa    xmm6,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm4             ; xmm5=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4             ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm2             ; xmm7=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm1,xmm2             ; xmm1=(64 74 65 75 66 76 67 77)
 
-	movdqa	xmm3, XMMWORD [wk(6)]	; xmm3=tmp12L
-	movdqa	xmm0, XMMWORD [wk(7)]	; xmm0=tmp12H
-	movdqa	xmm4, XMMWORD [wk(10)]	; xmm4=tmp1L
-	movdqa	xmm2, XMMWORD [wk(11)]	; xmm2=tmp1H
+        movdqa  xmm3, XMMWORD [wk(6)]   ; xmm3=tmp12L
+        movdqa  xmm0, XMMWORD [wk(7)]   ; xmm0=tmp12H
+        movdqa  xmm4, XMMWORD [wk(10)]  ; xmm4=tmp1L
+        movdqa  xmm2, XMMWORD [wk(11)]  ; xmm2=tmp1H
 
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=(00 10 01 11 02 12 03 13)
-	movdqa	XMMWORD [wk(1)], xmm6	; wk(1)=(04 14 05 15 06 16 07 17)
-	movdqa	XMMWORD [wk(4)], xmm7	; wk(4)=(60 70 61 71 62 72 63 73)
-	movdqa	XMMWORD [wk(5)], xmm1	; wk(5)=(64 74 65 75 66 76 67 77)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 01 11 02 12 03 13)
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=(04 14 05 15 06 16 07 17)
+        movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(5)], xmm1   ; wk(5)=(64 74 65 75 66 76 67 77)
 
-	movdqa	xmm5,xmm3
-	movdqa	xmm6,xmm0
-	paddd	xmm3,xmm4		; xmm3=data2L
-	paddd	xmm0,xmm2		; xmm0=data2H
-	psubd	xmm5,xmm4		; xmm5=data5L
-	psubd	xmm6,xmm2		; xmm6=data5H
+        movdqa  xmm5,xmm3
+        movdqa  xmm6,xmm0
+        paddd   xmm3,xmm4               ; xmm3=data2L
+        paddd   xmm0,xmm2               ; xmm0=data2H
+        psubd   xmm5,xmm4               ; xmm5=data5L
+        psubd   xmm6,xmm2               ; xmm6=data5H
 
-	movdqa	xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]	; xmm7=[PD_DESCALE_P1]
+        movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm7=[PD_DESCALE_P1]
 
-	paddd	xmm3,xmm7
-	paddd	xmm0,xmm7
-	psrad	xmm3,DESCALE_P1
-	psrad	xmm0,DESCALE_P1
-	paddd	xmm5,xmm7
-	paddd	xmm6,xmm7
-	psrad	xmm5,DESCALE_P1
-	psrad	xmm6,DESCALE_P1
+        paddd   xmm3,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm3,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+        paddd   xmm5,xmm7
+        paddd   xmm6,xmm7
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
 
-	packssdw  xmm3,xmm0		; xmm3=data2=(20 21 22 23 24 25 26 27)
-	packssdw  xmm5,xmm6		; xmm5=data5=(50 51 52 53 54 55 56 57)
+        packssdw  xmm3,xmm0             ; xmm3=data2=(20 21 22 23 24 25 26 27)
+        packssdw  xmm5,xmm6             ; xmm5=data5=(50 51 52 53 54 55 56 57)
 
-	movdqa	xmm1, XMMWORD [wk(2)]	; xmm1=tmp13L
-	movdqa	xmm4, XMMWORD [wk(3)]	; xmm4=tmp13H
-	movdqa	xmm2, XMMWORD [wk(8)]	; xmm2=tmp0L
-	movdqa	xmm7, XMMWORD [wk(9)]	; xmm7=tmp0H
+        movdqa  xmm1, XMMWORD [wk(2)]   ; xmm1=tmp13L
+        movdqa  xmm4, XMMWORD [wk(3)]   ; xmm4=tmp13H
+        movdqa  xmm2, XMMWORD [wk(8)]   ; xmm2=tmp0L
+        movdqa  xmm7, XMMWORD [wk(9)]   ; xmm7=tmp0H
 
-	movdqa	xmm0,xmm1
-	movdqa	xmm6,xmm4
-	paddd	xmm1,xmm2		; xmm1=data3L
-	paddd	xmm4,xmm7		; xmm4=data3H
-	psubd	xmm0,xmm2		; xmm0=data4L
-	psubd	xmm6,xmm7		; xmm6=data4H
+        movdqa  xmm0,xmm1
+        movdqa  xmm6,xmm4
+        paddd   xmm1,xmm2               ; xmm1=data3L
+        paddd   xmm4,xmm7               ; xmm4=data3H
+        psubd   xmm0,xmm2               ; xmm0=data4L
+        psubd   xmm6,xmm7               ; xmm6=data4H
 
-	movdqa	xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]	; xmm2=[PD_DESCALE_P1]
+        movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm2=[PD_DESCALE_P1]
 
-	paddd	xmm1,xmm2
-	paddd	xmm4,xmm2
-	psrad	xmm1,DESCALE_P1
-	psrad	xmm4,DESCALE_P1
-	paddd	xmm0,xmm2
-	paddd	xmm6,xmm2
-	psrad	xmm0,DESCALE_P1
-	psrad	xmm6,DESCALE_P1
+        paddd   xmm1,xmm2
+        paddd   xmm4,xmm2
+        psrad   xmm1,DESCALE_P1
+        psrad   xmm4,DESCALE_P1
+        paddd   xmm0,xmm2
+        paddd   xmm6,xmm2
+        psrad   xmm0,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
 
-	packssdw  xmm1,xmm4		; xmm1=data3=(30 31 32 33 34 35 36 37)
-	packssdw  xmm0,xmm6		; xmm0=data4=(40 41 42 43 44 45 46 47)
+        packssdw  xmm1,xmm4             ; xmm1=data3=(30 31 32 33 34 35 36 37)
+        packssdw  xmm0,xmm6             ; xmm0=data4=(40 41 42 43 44 45 46 47)
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=(00 10 01 11 02 12 03 13)
-	movdqa	xmm2, XMMWORD [wk(1)]	; xmm2=(04 14 05 15 06 16 07 17)
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(00 10 01 11 02 12 03 13)
+        movdqa  xmm2, XMMWORD [wk(1)]   ; xmm2=(04 14 05 15 06 16 07 17)
 
-	movdqa    xmm4,xmm3		; transpose coefficients(phase 1)
-	punpcklwd xmm3,xmm1		; xmm3=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm4,xmm1		; xmm4=(24 34 25 35 26 36 27 37)
-	movdqa    xmm6,xmm0		; transpose coefficients(phase 1)
-	punpcklwd xmm0,xmm5		; xmm0=(40 50 41 51 42 52 43 53)
-	punpckhwd xmm6,xmm5		; xmm6=(44 54 45 55 46 56 47 57)
+        movdqa    xmm4,xmm3             ; transpose coefficients(phase 1)
+        punpcklwd xmm3,xmm1             ; xmm3=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm1             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm6,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm5             ; xmm0=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm6,xmm5             ; xmm6=(44 54 45 55 46 56 47 57)
 
-	movdqa    xmm1,xmm7		; transpose coefficients(phase 2)
-	punpckldq xmm7,xmm3		; xmm7=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm1,xmm3		; xmm1=(02 12 22 32 03 13 23 33)
-	movdqa    xmm5,xmm2		; transpose coefficients(phase 2)
-	punpckldq xmm2,xmm4		; xmm2=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm5,xmm4		; xmm5=(06 16 26 36 07 17 27 37)
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 2)
+        punpckldq xmm7,xmm3             ; xmm7=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm1,xmm3             ; xmm1=(02 12 22 32 03 13 23 33)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm4             ; xmm2=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm5,xmm4             ; xmm5=(06 16 26 36 07 17 27 37)
 
-	movdqa	xmm3, XMMWORD [wk(4)]	; xmm3=(60 70 61 71 62 72 63 73)
-	movdqa	xmm4, XMMWORD [wk(5)]	; xmm4=(64 74 65 75 66 76 67 77)
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=(60 70 61 71 62 72 63 73)
+        movdqa  xmm4, XMMWORD [wk(5)]   ; xmm4=(64 74 65 75 66 76 67 77)
 
-	movdqa	XMMWORD [wk(6)], xmm2	; wk(6)=(04 14 24 34 05 15 25 35)
-	movdqa	XMMWORD [wk(7)], xmm5	; wk(7)=(06 16 26 36 07 17 27 37)
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(7)], xmm5   ; wk(7)=(06 16 26 36 07 17 27 37)
 
-	movdqa    xmm2,xmm0		; transpose coefficients(phase 2)
-	punpckldq xmm0,xmm3		; xmm0=(40 50 60 70 41 51 61 71)
-	punpckhdq xmm2,xmm3		; xmm2=(42 52 62 72 43 53 63 73)
-	movdqa    xmm5,xmm6		; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm4		; xmm6=(44 54 64 74 45 55 65 75)
-	punpckhdq xmm5,xmm4		; xmm5=(46 56 66 76 47 57 67 77)
+        movdqa    xmm2,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm2,xmm3             ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm4             ; xmm6=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm5,xmm4             ; xmm5=(46 56 66 76 47 57 67 77)
 
-	movdqa     xmm3,xmm7		; transpose coefficients(phase 3)
-	punpcklqdq xmm7,xmm0		; xmm7=col0=(00 10 20 30 40 50 60 70)
-	punpckhqdq xmm3,xmm0		; xmm3=col1=(01 11 21 31 41 51 61 71)
-	movdqa     xmm4,xmm1		; transpose coefficients(phase 3)
-	punpcklqdq xmm1,xmm2		; xmm1=col2=(02 12 22 32 42 52 62 72)
-	punpckhqdq xmm4,xmm2		; xmm4=col3=(03 13 23 33 43 53 63 73)
+        movdqa     xmm3,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm0            ; xmm7=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm3,xmm0            ; xmm3=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm4,xmm2            ; xmm4=col3=(03 13 23 33 43 53 63 73)
 
-	movdqa	xmm0, XMMWORD [wk(6)]	; xmm0=(04 14 24 34 05 15 25 35)
-	movdqa	xmm2, XMMWORD [wk(7)]	; xmm2=(06 16 26 36 07 17 27 37)
+        movdqa  xmm0, XMMWORD [wk(6)]   ; xmm0=(04 14 24 34 05 15 25 35)
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=(06 16 26 36 07 17 27 37)
 
-	movdqa	XMMWORD [wk(8)], xmm3	; wk(8)=col1
-	movdqa	XMMWORD [wk(9)], xmm4	; wk(9)=col3
+        movdqa  XMMWORD [wk(8)], xmm3   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm4   ; wk(9)=col3
 
-	movdqa     xmm3,xmm0		; transpose coefficients(phase 3)
-	punpcklqdq xmm0,xmm6		; xmm0=col4=(04 14 24 34 44 54 64 74)
-	punpckhqdq xmm3,xmm6		; xmm3=col5=(05 15 25 35 45 55 65 75)
-	movdqa     xmm4,xmm2		; transpose coefficients(phase 3)
-	punpcklqdq xmm2,xmm5		; xmm2=col6=(06 16 26 36 46 56 66 76)
-	punpckhqdq xmm4,xmm5		; xmm4=col7=(07 17 27 37 47 57 67 77)
+        movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm3,xmm6            ; xmm3=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm4,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm5            ; xmm2=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm4,xmm5            ; xmm4=col7=(07 17 27 37 47 57 67 77)
 
-	movdqa	XMMWORD [wk(10)], xmm3	; wk(10)=col5
-	movdqa	XMMWORD [wk(11)], xmm4	; wk(11)=col7
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
 .column_end:
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
+        mov     eax, [original_ebp]
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
 
-	; -- Even part
+        ; -- Even part
 
-	; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
+        ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
 
-	; (Original)
-	; z1 = (z2 + z3) * 0.541196100;
-	; tmp2 = z1 + z3 * -1.847759065;
-	; tmp3 = z1 + z2 * 0.765366865;
-	;
-	; (This implementation)
-	; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
-	; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
 
-	movdqa    xmm6,xmm1		; xmm1=in2=z2
-	movdqa    xmm5,xmm1
-	punpcklwd xmm6,xmm2		; xmm2=in6=z3
-	punpckhwd xmm5,xmm2
-	movdqa    xmm1,xmm6
-	movdqa    xmm2,xmm5
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]	; xmm6=tmp3L
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]	; xmm5=tmp3H
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]	; xmm1=tmp2L
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_F054_MF130)]	; xmm2=tmp2H
+        movdqa    xmm6,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm6,xmm2             ; xmm2=in6=z3
+        punpckhwd xmm5,xmm2
+        movdqa    xmm1,xmm6
+        movdqa    xmm2,xmm5
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]       ; xmm6=tmp3L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=tmp2L
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm2=tmp2H
 
-	movdqa    xmm3,xmm7
-	paddw     xmm7,xmm0		; xmm7=in0+in4
-	psubw     xmm3,xmm0		; xmm3=in0-in4
+        movdqa    xmm3,xmm7
+        paddw     xmm7,xmm0             ; xmm7=in0+in4
+        psubw     xmm3,xmm0             ; xmm3=in0-in4
 
-	pxor      xmm4,xmm4
-	pxor      xmm0,xmm0
-	punpcklwd xmm4,xmm7		; xmm4=tmp0L
-	punpckhwd xmm0,xmm7		; xmm0=tmp0H
-	psrad     xmm4,(16-CONST_BITS)	; psrad xmm4,16 & pslld xmm4,CONST_BITS
-	psrad     xmm0,(16-CONST_BITS)	; psrad xmm0,16 & pslld xmm0,CONST_BITS
+        pxor      xmm4,xmm4
+        pxor      xmm0,xmm0
+        punpcklwd xmm4,xmm7             ; xmm4=tmp0L
+        punpckhwd xmm0,xmm7             ; xmm0=tmp0H
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+        psrad     xmm0,(16-CONST_BITS)  ; psrad xmm0,16 & pslld xmm0,CONST_BITS
 
-	movdqa	xmm7,xmm4
-	paddd	xmm4,xmm6		; xmm4=tmp10L
-	psubd	xmm7,xmm6		; xmm7=tmp13L
-	movdqa	xmm6,xmm0
-	paddd	xmm0,xmm5		; xmm0=tmp10H
-	psubd	xmm6,xmm5		; xmm6=tmp13H
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm6               ; xmm4=tmp10L
+        psubd   xmm7,xmm6               ; xmm7=tmp13L
+        movdqa  xmm6,xmm0
+        paddd   xmm0,xmm5               ; xmm0=tmp10H
+        psubd   xmm6,xmm5               ; xmm6=tmp13H
 
-	movdqa	XMMWORD [wk(0)], xmm4	; wk(0)=tmp10L
-	movdqa	XMMWORD [wk(1)], xmm0	; wk(1)=tmp10H
-	movdqa	XMMWORD [wk(2)], xmm7	; wk(2)=tmp13L
-	movdqa	XMMWORD [wk(3)], xmm6	; wk(3)=tmp13H
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm6   ; wk(3)=tmp13H
 
-	pxor      xmm5,xmm5
-	pxor      xmm4,xmm4
-	punpcklwd xmm5,xmm3		; xmm5=tmp1L
-	punpckhwd xmm4,xmm3		; xmm4=tmp1H
-	psrad     xmm5,(16-CONST_BITS)	; psrad xmm5,16 & pslld xmm5,CONST_BITS
-	psrad     xmm4,(16-CONST_BITS)	; psrad xmm4,16 & pslld xmm4,CONST_BITS
+        pxor      xmm5,xmm5
+        pxor      xmm4,xmm4
+        punpcklwd xmm5,xmm3             ; xmm5=tmp1L
+        punpckhwd xmm4,xmm3             ; xmm4=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
 
-	movdqa	xmm0,xmm5
-	paddd	xmm5,xmm1		; xmm5=tmp11L
-	psubd	xmm0,xmm1		; xmm0=tmp12L
-	movdqa	xmm7,xmm4
-	paddd	xmm4,xmm2		; xmm4=tmp11H
-	psubd	xmm7,xmm2		; xmm7=tmp12H
+        movdqa  xmm0,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm0,xmm1               ; xmm0=tmp12L
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm2               ; xmm4=tmp11H
+        psubd   xmm7,xmm2               ; xmm7=tmp12H
 
-	movdqa	XMMWORD [wk(4)], xmm5	; wk(4)=tmp11L
-	movdqa	XMMWORD [wk(5)], xmm4	; wk(5)=tmp11H
-	movdqa	XMMWORD [wk(6)], xmm0	; wk(6)=tmp12L
-	movdqa	XMMWORD [wk(7)], xmm7	; wk(7)=tmp12H
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm0   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm7   ; wk(7)=tmp12H
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm6, XMMWORD [wk(9)]	; xmm6=col3
-	movdqa	xmm3, XMMWORD [wk(8)]	; xmm3=col1
-	movdqa	xmm1, XMMWORD [wk(11)]	; xmm1=col7
-	movdqa	xmm2, XMMWORD [wk(10)]	; xmm2=col5
+        movdqa  xmm6, XMMWORD [wk(9)]   ; xmm6=col3
+        movdqa  xmm3, XMMWORD [wk(8)]   ; xmm3=col1
+        movdqa  xmm1, XMMWORD [wk(11)]  ; xmm1=col7
+        movdqa  xmm2, XMMWORD [wk(10)]  ; xmm2=col5
 
-	movdqa	xmm5,xmm6
-	movdqa	xmm4,xmm3
-	paddw	xmm5,xmm1		; xmm5=z3
-	paddw	xmm4,xmm2		; xmm4=z4
+        movdqa  xmm5,xmm6
+        movdqa  xmm4,xmm3
+        paddw   xmm5,xmm1               ; xmm5=z3
+        paddw   xmm4,xmm2               ; xmm4=z4
 
-	; (Original)
-	; z5 = (z3 + z4) * 1.175875602;
-	; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
-	; z3 += z5;  z4 += z5;
-	;
-	; (This implementation)
-	; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
-	; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
 
-	movdqa    xmm0,xmm5
-	movdqa    xmm7,xmm5
-	punpcklwd xmm0,xmm4
-	punpckhwd xmm7,xmm4
-	movdqa    xmm5,xmm0
-	movdqa    xmm4,xmm7
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]	; xmm0=z3L
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]	; xmm7=z3H
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]	; xmm5=z4L
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_F117_F078)]	; xmm4=z4H
+        movdqa    xmm0,xmm5
+        movdqa    xmm7,xmm5
+        punpcklwd xmm0,xmm4
+        punpckhwd xmm7,xmm4
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm7
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm0=z3L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm7=z3H
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]       ; xmm5=z4L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F117_F078)]       ; xmm4=z4H
 
-	movdqa	XMMWORD [wk(10)], xmm0	; wk(10)=z3L
-	movdqa	XMMWORD [wk(11)], xmm7	; wk(11)=z3H
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=z3H
 
-	; (Original)
-	; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
-	; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
-	; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
-	; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
-	; tmp0 += z1 + z3;  tmp1 += z2 + z4;
-	; tmp2 += z2 + z3;  tmp3 += z1 + z4;
-	;
-	; (This implementation)
-	; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
-	; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
-	; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
-	; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
-	; tmp0 += z3;  tmp1 += z4;
-	; tmp2 += z3;  tmp3 += z4;
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
 
-	movdqa    xmm0,xmm1
-	movdqa    xmm7,xmm1
-	punpcklwd xmm0,xmm3
-	punpckhwd xmm7,xmm3
-	movdqa    xmm1,xmm0
-	movdqa    xmm3,xmm7
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm0=tmp0L
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]	; xmm7=tmp0H
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]	; xmm1=tmp3L
-	pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]	; xmm3=tmp3H
+        movdqa    xmm0,xmm1
+        movdqa    xmm7,xmm1
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm1,xmm0
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm0=tmp0L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm7=tmp0H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm1=tmp3L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm3=tmp3H
 
-	paddd	xmm0, XMMWORD [wk(10)]	; xmm0=tmp0L
-	paddd	xmm7, XMMWORD [wk(11)]	; xmm7=tmp0H
-	paddd	xmm1,xmm5		; xmm1=tmp3L
-	paddd	xmm3,xmm4		; xmm3=tmp3H
+        paddd   xmm0, XMMWORD [wk(10)]  ; xmm0=tmp0L
+        paddd   xmm7, XMMWORD [wk(11)]  ; xmm7=tmp0H
+        paddd   xmm1,xmm5               ; xmm1=tmp3L
+        paddd   xmm3,xmm4               ; xmm3=tmp3H
 
-	movdqa	XMMWORD [wk(8)], xmm0	; wk(8)=tmp0L
-	movdqa	XMMWORD [wk(9)], xmm7	; wk(9)=tmp0H
+        movdqa  XMMWORD [wk(8)], xmm0   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm7   ; wk(9)=tmp0H
 
-	movdqa    xmm0,xmm2
-	movdqa    xmm7,xmm2
-	punpcklwd xmm0,xmm6
-	punpckhwd xmm7,xmm6
-	movdqa    xmm2,xmm0
-	movdqa    xmm6,xmm7
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm0=tmp1L
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_MF050_MF256)]	; xmm7=tmp1H
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_MF256_F050)]	; xmm2=tmp2L
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]	; xmm6=tmp2H
+        movdqa    xmm0,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm0,xmm6
+        punpckhwd xmm7,xmm6
+        movdqa    xmm2,xmm0
+        movdqa    xmm6,xmm7
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm0=tmp1L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm7=tmp1H
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm2=tmp2L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm6=tmp2H
 
-	paddd	xmm0,xmm5		; xmm0=tmp1L
-	paddd	xmm7,xmm4		; xmm7=tmp1H
-	paddd	xmm2, XMMWORD [wk(10)]	; xmm2=tmp2L
-	paddd	xmm6, XMMWORD [wk(11)]	; xmm6=tmp2H
+        paddd   xmm0,xmm5               ; xmm0=tmp1L
+        paddd   xmm7,xmm4               ; xmm7=tmp1H
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
 
-	movdqa	XMMWORD [wk(10)], xmm0	; wk(10)=tmp1L
-	movdqa	XMMWORD [wk(11)], xmm7	; wk(11)=tmp1H
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=tmp1H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm5, XMMWORD [wk(0)]	; xmm5=tmp10L
-	movdqa	xmm4, XMMWORD [wk(1)]	; xmm4=tmp10H
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm4, XMMWORD [wk(1)]   ; xmm4=tmp10H
 
-	movdqa	xmm0,xmm5
-	movdqa	xmm7,xmm4
-	paddd	xmm5,xmm1		; xmm5=data0L
-	paddd	xmm4,xmm3		; xmm4=data0H
-	psubd	xmm0,xmm1		; xmm0=data7L
-	psubd	xmm7,xmm3		; xmm7=data7H
+        movdqa  xmm0,xmm5
+        movdqa  xmm7,xmm4
+        paddd   xmm5,xmm1               ; xmm5=data0L
+        paddd   xmm4,xmm3               ; xmm4=data0H
+        psubd   xmm0,xmm1               ; xmm0=data7L
+        psubd   xmm7,xmm3               ; xmm7=data7H
 
-	movdqa	xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]	; xmm1=[PD_DESCALE_P2]
+        movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm1=[PD_DESCALE_P2]
 
-	paddd	xmm5,xmm1
-	paddd	xmm4,xmm1
-	psrad	xmm5,DESCALE_P2
-	psrad	xmm4,DESCALE_P2
-	paddd	xmm0,xmm1
-	paddd	xmm7,xmm1
-	psrad	xmm0,DESCALE_P2
-	psrad	xmm7,DESCALE_P2
+        paddd   xmm5,xmm1
+        paddd   xmm4,xmm1
+        psrad   xmm5,DESCALE_P2
+        psrad   xmm4,DESCALE_P2
+        paddd   xmm0,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm0,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
 
-	packssdw  xmm5,xmm4		; xmm5=data0=(00 10 20 30 40 50 60 70)
-	packssdw  xmm0,xmm7		; xmm0=data7=(07 17 27 37 47 57 67 77)
+        packssdw  xmm5,xmm4             ; xmm5=data0=(00 10 20 30 40 50 60 70)
+        packssdw  xmm0,xmm7             ; xmm0=data7=(07 17 27 37 47 57 67 77)
 
-	movdqa	xmm3, XMMWORD [wk(4)]	; xmm3=tmp11L
-	movdqa	xmm1, XMMWORD [wk(5)]	; xmm1=tmp11H
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=tmp11L
+        movdqa  xmm1, XMMWORD [wk(5)]   ; xmm1=tmp11H
 
-	movdqa	xmm4,xmm3
-	movdqa	xmm7,xmm1
-	paddd	xmm3,xmm2		; xmm3=data1L
-	paddd	xmm1,xmm6		; xmm1=data1H
-	psubd	xmm4,xmm2		; xmm4=data6L
-	psubd	xmm7,xmm6		; xmm7=data6H
+        movdqa  xmm4,xmm3
+        movdqa  xmm7,xmm1
+        paddd   xmm3,xmm2               ; xmm3=data1L
+        paddd   xmm1,xmm6               ; xmm1=data1H
+        psubd   xmm4,xmm2               ; xmm4=data6L
+        psubd   xmm7,xmm6               ; xmm7=data6H
 
-	movdqa	xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]	; xmm2=[PD_DESCALE_P2]
+        movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm2=[PD_DESCALE_P2]
 
-	paddd	xmm3,xmm2
-	paddd	xmm1,xmm2
-	psrad	xmm3,DESCALE_P2
-	psrad	xmm1,DESCALE_P2
-	paddd	xmm4,xmm2
-	paddd	xmm7,xmm2
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm7,DESCALE_P2
+        paddd   xmm3,xmm2
+        paddd   xmm1,xmm2
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm4,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
 
-	packssdw  xmm3,xmm1		; xmm3=data1=(01 11 21 31 41 51 61 71)
-	packssdw  xmm4,xmm7		; xmm4=data6=(06 16 26 36 46 56 66 76)
+        packssdw  xmm3,xmm1             ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        packssdw  xmm4,xmm7             ; xmm4=data6=(06 16 26 36 46 56 66 76)
 
-	packsswb  xmm5,xmm4		; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	packsswb  xmm3,xmm0		; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        packsswb  xmm5,xmm4             ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm0             ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	movdqa	xmm6, XMMWORD [wk(6)]	; xmm6=tmp12L
-	movdqa	xmm2, XMMWORD [wk(7)]	; xmm2=tmp12H
-	movdqa	xmm1, XMMWORD [wk(10)]	; xmm1=tmp1L
-	movdqa	xmm7, XMMWORD [wk(11)]	; xmm7=tmp1H
+        movdqa  xmm6, XMMWORD [wk(6)]   ; xmm6=tmp12L
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=tmp12H
+        movdqa  xmm1, XMMWORD [wk(10)]  ; xmm1=tmp1L
+        movdqa  xmm7, XMMWORD [wk(11)]  ; xmm7=tmp1H
 
-	movdqa	XMMWORD [wk(0)], xmm5	; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	movdqa	xmm4,xmm6
-	movdqa	xmm0,xmm2
-	paddd	xmm6,xmm1		; xmm6=data2L
-	paddd	xmm2,xmm7		; xmm2=data2H
-	psubd	xmm4,xmm1		; xmm4=data5L
-	psubd	xmm0,xmm7		; xmm0=data5H
+        movdqa  xmm4,xmm6
+        movdqa  xmm0,xmm2
+        paddd   xmm6,xmm1               ; xmm6=data2L
+        paddd   xmm2,xmm7               ; xmm2=data2H
+        psubd   xmm4,xmm1               ; xmm4=data5L
+        psubd   xmm0,xmm7               ; xmm0=data5H
 
-	movdqa	xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]	; xmm5=[PD_DESCALE_P2]
+        movdqa  xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm5=[PD_DESCALE_P2]
 
-	paddd	xmm6,xmm5
-	paddd	xmm2,xmm5
-	psrad	xmm6,DESCALE_P2
-	psrad	xmm2,DESCALE_P2
-	paddd	xmm4,xmm5
-	paddd	xmm0,xmm5
-	psrad	xmm4,DESCALE_P2
-	psrad	xmm0,DESCALE_P2
+        paddd   xmm6,xmm5
+        paddd   xmm2,xmm5
+        psrad   xmm6,DESCALE_P2
+        psrad   xmm2,DESCALE_P2
+        paddd   xmm4,xmm5
+        paddd   xmm0,xmm5
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
 
-	packssdw  xmm6,xmm2		; xmm6=data2=(02 12 22 32 42 52 62 72)
-	packssdw  xmm4,xmm0		; xmm4=data5=(05 15 25 35 45 55 65 75)
+        packssdw  xmm6,xmm2             ; xmm6=data2=(02 12 22 32 42 52 62 72)
+        packssdw  xmm4,xmm0             ; xmm4=data5=(05 15 25 35 45 55 65 75)
 
-	movdqa	xmm3, XMMWORD [wk(2)]	; xmm3=tmp13L
-	movdqa	xmm1, XMMWORD [wk(3)]	; xmm1=tmp13H
-	movdqa	xmm7, XMMWORD [wk(8)]	; xmm7=tmp0L
-	movdqa	xmm5, XMMWORD [wk(9)]	; xmm5=tmp0H
+        movdqa  xmm3, XMMWORD [wk(2)]   ; xmm3=tmp13L
+        movdqa  xmm1, XMMWORD [wk(3)]   ; xmm1=tmp13H
+        movdqa  xmm7, XMMWORD [wk(8)]   ; xmm7=tmp0L
+        movdqa  xmm5, XMMWORD [wk(9)]   ; xmm5=tmp0H
 
-	movdqa	xmm2,xmm3
-	movdqa	xmm0,xmm1
-	paddd	xmm3,xmm7		; xmm3=data3L
-	paddd	xmm1,xmm5		; xmm1=data3H
-	psubd	xmm2,xmm7		; xmm2=data4L
-	psubd	xmm0,xmm5		; xmm0=data4H
+        movdqa  xmm2,xmm3
+        movdqa  xmm0,xmm1
+        paddd   xmm3,xmm7               ; xmm3=data3L
+        paddd   xmm1,xmm5               ; xmm1=data3H
+        psubd   xmm2,xmm7               ; xmm2=data4L
+        psubd   xmm0,xmm5               ; xmm0=data4H
 
-	movdqa	xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]	; xmm7=[PD_DESCALE_P2]
+        movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm7=[PD_DESCALE_P2]
 
-	paddd	xmm3,xmm7
-	paddd	xmm1,xmm7
-	psrad	xmm3,DESCALE_P2
-	psrad	xmm1,DESCALE_P2
-	paddd	xmm2,xmm7
-	paddd	xmm0,xmm7
-	psrad	xmm2,DESCALE_P2
-	psrad	xmm0,DESCALE_P2
+        paddd   xmm3,xmm7
+        paddd   xmm1,xmm7
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm2,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm2,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
 
-	movdqa    xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)]	; xmm5=[PB_CENTERJSAMP]
+        movdqa    xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm5=[PB_CENTERJSAMP]
 
-	packssdw  xmm3,xmm1		; xmm3=data3=(03 13 23 33 43 53 63 73)
-	packssdw  xmm2,xmm0		; xmm2=data4=(04 14 24 34 44 54 64 74)
+        packssdw  xmm3,xmm1             ; xmm3=data3=(03 13 23 33 43 53 63 73)
+        packssdw  xmm2,xmm0             ; xmm2=data4=(04 14 24 34 44 54 64 74)
 
-	movdqa    xmm7, XMMWORD [wk(0)]	; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
-	movdqa    xmm1, XMMWORD [wk(1)]	; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+        movdqa    xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
 
-	packsswb  xmm6,xmm2		; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
-	packsswb  xmm3,xmm4		; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+        packsswb  xmm6,xmm2             ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm3,xmm4             ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
 
-	paddb     xmm7,xmm5
-	paddb     xmm1,xmm5
-	paddb     xmm6,xmm5
-	paddb     xmm3,xmm5
+        paddb     xmm7,xmm5
+        paddb     xmm1,xmm5
+        paddb     xmm6,xmm5
+        paddb     xmm3,xmm5
 
-	movdqa    xmm0,xmm7	; transpose coefficients(phase 1)
-	punpcklbw xmm7,xmm1	; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
-	punpckhbw xmm0,xmm1	; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
-	movdqa    xmm2,xmm6	; transpose coefficients(phase 1)
-	punpcklbw xmm6,xmm3	; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
-	punpckhbw xmm2,xmm3	; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+        movdqa    xmm0,xmm7     ; transpose coefficients(phase 1)
+        punpcklbw xmm7,xmm1     ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm1     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 1)
+        punpcklbw xmm6,xmm3     ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm2,xmm3     ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
 
-	movdqa    xmm4,xmm7	; transpose coefficients(phase 2)
-	punpcklwd xmm7,xmm6	; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
-	punpckhwd xmm4,xmm6	; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
-	movdqa    xmm5,xmm2	; transpose coefficients(phase 2)
-	punpcklwd xmm2,xmm0	; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
-	punpckhwd xmm5,xmm0	; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+        movdqa    xmm4,xmm7     ; transpose coefficients(phase 2)
+        punpcklwd xmm7,xmm6     ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm6     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm5,xmm2     ; transpose coefficients(phase 2)
+        punpcklwd xmm2,xmm0     ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm5,xmm0     ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
 
-	movdqa    xmm1,xmm7	; transpose coefficients(phase 3)
-	punpckldq xmm7,xmm2	; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
-	punpckhdq xmm1,xmm2	; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
-	movdqa    xmm3,xmm4	; transpose coefficients(phase 3)
-	punpckldq xmm4,xmm5	; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
-	punpckhdq xmm3,xmm5	; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+        movdqa    xmm1,xmm7     ; transpose coefficients(phase 3)
+        punpckldq xmm7,xmm2     ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm1,xmm2     ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm3,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm5     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm3,xmm5     ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
 
-	pshufd	xmm6,xmm7,0x4E	; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
-	pshufd	xmm0,xmm1,0x4E	; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
-	pshufd	xmm2,xmm4,0x4E	; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
-	pshufd	xmm5,xmm3,0x4E	; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+        pshufd  xmm6,xmm7,0x4E  ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm1,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm2,xmm4,0x4E  ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm5,xmm3,0x4E  ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1
-	mov	edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1
+        mov     edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
 
-	mov	edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
-	mov	edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
-	movq	XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2
-	movq	XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
+        mov     edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2red-64.asm b/simd/jiss2red-64.asm
index 6807f17..bd7c35c 100644
--- a/simd/jiss2red-64.asm
+++ b/simd/jiss2red-64.asm
@@ -27,74 +27,74 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1_4	(CONST_BITS-PASS1_BITS+1)
-%define DESCALE_P2_4	(CONST_BITS+PASS1_BITS+3+1)
-%define DESCALE_P1_2	(CONST_BITS-PASS1_BITS+2)
-%define DESCALE_P2_2	(CONST_BITS+PASS1_BITS+3+2)
+%define DESCALE_P1_4    (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4    (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2    (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2    (CONST_BITS+PASS1_BITS+3+2)
 
 %if CONST_BITS == 13
-F_0_211	equ	 1730		; FIX(0.211164243)
-F_0_509	equ	 4176		; FIX(0.509795579)
-F_0_601	equ	 4926		; FIX(0.601344887)
-F_0_720	equ	 5906		; FIX(0.720959822)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_850	equ	 6967		; FIX(0.850430095)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_061	equ	 8697		; FIX(1.061594337)
-F_1_272	equ	10426		; FIX(1.272758580)
-F_1_451	equ	11893		; FIX(1.451774981)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_2_172	equ	17799		; FIX(2.172734803)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_624	equ	29692		; FIX(3.624509785)
+F_0_211 equ      1730           ; FIX(0.211164243)
+F_0_509 equ      4176           ; FIX(0.509795579)
+F_0_601 equ      4926           ; FIX(0.601344887)
+F_0_720 equ      5906           ; FIX(0.720959822)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_850 equ      6967           ; FIX(0.850430095)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_061 equ      8697           ; FIX(1.061594337)
+F_1_272 equ     10426           ; FIX(1.272758580)
+F_1_451 equ     11893           ; FIX(1.451774981)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_2_172 equ     17799           ; FIX(2.172734803)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_624 equ     29692           ; FIX(3.624509785)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_211	equ	DESCALE( 226735879,30-CONST_BITS)	; FIX(0.211164243)
-F_0_509	equ	DESCALE( 547388834,30-CONST_BITS)	; FIX(0.509795579)
-F_0_601	equ	DESCALE( 645689155,30-CONST_BITS)	; FIX(0.601344887)
-F_0_720	equ	DESCALE( 774124714,30-CONST_BITS)	; FIX(0.720959822)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_850	equ	DESCALE( 913142361,30-CONST_BITS)	; FIX(0.850430095)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_061	equ	DESCALE(1139878239,30-CONST_BITS)	; FIX(1.061594337)
-F_1_272	equ	DESCALE(1366614119,30-CONST_BITS)	; FIX(1.272758580)
-F_1_451	equ	DESCALE(1558831516,30-CONST_BITS)	; FIX(1.451774981)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_2_172	equ	DESCALE(2332956230,30-CONST_BITS)	; FIX(2.172734803)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_624	equ	DESCALE(3891787747,30-CONST_BITS)	; FIX(3.624509785)
+F_0_211 equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509 equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601 equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720 equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850 equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061 equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272 equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451 equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172 equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624 equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_red_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_red_sse2)
 
 EXTN(jconst_idct_red_sse2):
 
-PW_F184_MF076	times 4 dw  F_1_847,-F_0_765
-PW_F256_F089	times 4 dw  F_2_562, F_0_899
-PW_F106_MF217	times 4 dw  F_1_061,-F_2_172
-PW_MF060_MF050	times 4 dw -F_0_601,-F_0_509
-PW_F145_MF021	times 4 dw  F_1_451,-F_0_211
-PW_F362_MF127	times 4 dw  F_3_624,-F_1_272
-PW_F085_MF072	times 4 dw  F_0_850,-F_0_720
-PD_DESCALE_P1_4	times 4 dd  1 << (DESCALE_P1_4-1)
-PD_DESCALE_P2_4	times 4 dd  1 << (DESCALE_P2_4-1)
-PD_DESCALE_P1_2	times 4 dd  1 << (DESCALE_P1_2-1)
-PD_DESCALE_P2_2	times 4 dd  1 << (DESCALE_P2_2-1)
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PW_F184_MF076   times 4 dw  F_1_847,-F_0_765
+PW_F256_F089    times 4 dw  F_2_562, F_0_899
+PW_F106_MF217   times 4 dw  F_1_061,-F_2_172
+PW_MF060_MF050  times 4 dw -F_0_601,-F_0_509
+PW_F145_MF021   times 4 dw  F_1_451,-F_0_211
+PW_F362_MF127   times 4 dw  F_3_624,-F_1_272
+PW_F085_MF072   times 4 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4 times 4 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4 times 4 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2 times 4 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2 times 4 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
+        SECTION SEG_TEXT
+        BITS    64
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients,
 ; producing a reduced-size 4x4 output block.
@@ -109,292 +109,292 @@
 ; r12 = JSAMPARRAY output_buf
 ; r13 = JDIMENSION output_col
 
-%define original_rbp	rbp+0
-%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_idct_4x4_sse2)
+        align   16
+        global  EXTN(jsimd_idct_4x4_sse2)
 
 EXTN(jsimd_idct_4x4_sse2):
-	push	rbp
-	mov	rax,rsp				; rax = original rbp
-	sub	rsp, byte 4
-	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[rsp],rax
-	mov	rbp,rsp				; rbp = aligned rbp
-	lea	rsp, [wk(0)]
-	collect_args
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-	mov	rdx, r10	; quantptr
-	mov	rsi, r11		; inptr
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
 
 %ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
-	mov	eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	jnz	short .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	por	xmm0,xmm1
-	packsswb xmm0,xmm0
-	packsswb xmm0,xmm0
-	movd	eax,xmm0
-	test	rax,rax
-	jnz	short .columnDCT
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0,xmm1
+        packsswb xmm0,xmm0
+        packsswb xmm0,xmm0
+        movd    eax,xmm0
+        test    rax,rax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	psllw	xmm0,PASS1_BITS
+        psllw   xmm0,PASS1_BITS
 
-	movdqa    xmm3,xmm0	; xmm0=in0=(00 01 02 03 04 05 06 07)
-	punpcklwd xmm0,xmm0	; xmm0=(00 00 01 01 02 02 03 03)
-	punpckhwd xmm3,xmm3	; xmm3=(04 04 05 05 06 06 07 07)
+        movdqa    xmm3,xmm0     ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0     ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm3,xmm3     ; xmm3=(04 04 05 05 06 06 07 07)
 
-	pshufd	xmm1,xmm0,0x50	; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
-	pshufd	xmm0,xmm0,0xFA	; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
-	pshufd	xmm6,xmm3,0x50	; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
-	pshufd	xmm3,xmm3,0xFA	; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
+        pshufd  xmm1,xmm0,0x50  ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
+        pshufd  xmm0,xmm0,0xFA  ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
+        pshufd  xmm6,xmm3,0x50  ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
+        pshufd  xmm3,xmm3,0xFA  ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
 
-	jmp	near .column_end
+        jmp     near .column_end
 %endif
 .columnDCT:
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movdqa    xmm4,xmm0
-	movdqa    xmm5,xmm0
-	punpcklwd xmm4,xmm1
-	punpckhwd xmm5,xmm1
-	movdqa    xmm0,xmm4
-	movdqa    xmm1,xmm5
-	pmaddwd   xmm4,[rel PW_F256_F089]	; xmm4=(tmp2L)
-	pmaddwd   xmm5,[rel PW_F256_F089]	; xmm5=(tmp2H)
-	pmaddwd   xmm0,[rel PW_F106_MF217]	; xmm0=(tmp0L)
-	pmaddwd   xmm1,[rel PW_F106_MF217]	; xmm1=(tmp0H)
+        movdqa    xmm4,xmm0
+        movdqa    xmm5,xmm0
+        punpcklwd xmm4,xmm1
+        punpckhwd xmm5,xmm1
+        movdqa    xmm0,xmm4
+        movdqa    xmm1,xmm5
+        pmaddwd   xmm4,[rel PW_F256_F089]       ; xmm4=(tmp2L)
+        pmaddwd   xmm5,[rel PW_F256_F089]       ; xmm5=(tmp2H)
+        pmaddwd   xmm0,[rel PW_F106_MF217]      ; xmm0=(tmp0L)
+        pmaddwd   xmm1,[rel PW_F106_MF217]      ; xmm1=(tmp0H)
 
-	movdqa    xmm6,xmm2
-	movdqa    xmm7,xmm2
-	punpcklwd xmm6,xmm3
-	punpckhwd xmm7,xmm3
-	movdqa    xmm2,xmm6
-	movdqa    xmm3,xmm7
-	pmaddwd   xmm6,[rel PW_MF060_MF050]	; xmm6=(tmp2L)
-	pmaddwd   xmm7,[rel PW_MF060_MF050]	; xmm7=(tmp2H)
-	pmaddwd   xmm2,[rel PW_F145_MF021]	; xmm2=(tmp0L)
-	pmaddwd   xmm3,[rel PW_F145_MF021]	; xmm3=(tmp0H)
+        movdqa    xmm6,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm6,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm2,xmm6
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm6,[rel PW_MF060_MF050]     ; xmm6=(tmp2L)
+        pmaddwd   xmm7,[rel PW_MF060_MF050]     ; xmm7=(tmp2H)
+        pmaddwd   xmm2,[rel PW_F145_MF021]      ; xmm2=(tmp0L)
+        pmaddwd   xmm3,[rel PW_F145_MF021]      ; xmm3=(tmp0H)
 
-	paddd	xmm6,xmm4		; xmm6=tmp2L
-	paddd	xmm7,xmm5		; xmm7=tmp2H
-	paddd	xmm2,xmm0		; xmm2=tmp0L
-	paddd	xmm3,xmm1		; xmm3=tmp0H
+        paddd   xmm6,xmm4               ; xmm6=tmp2L
+        paddd   xmm7,xmm5               ; xmm7=tmp2H
+        paddd   xmm2,xmm0               ; xmm2=tmp0L
+        paddd   xmm3,xmm1               ; xmm3=tmp0H
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=tmp0L
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=tmp0H
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp0L
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=tmp0H
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	pxor      xmm1,xmm1
-	pxor      xmm2,xmm2
-	punpcklwd xmm1,xmm4		; xmm1=tmp0L
-	punpckhwd xmm2,xmm4		; xmm2=tmp0H
-	psrad     xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
-	psrad     xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
+        pxor      xmm1,xmm1
+        pxor      xmm2,xmm2
+        punpcklwd xmm1,xmm4             ; xmm1=tmp0L
+        punpckhwd xmm2,xmm4             ; xmm2=tmp0H
+        psrad     xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
+        psrad     xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
 
-	movdqa    xmm3,xmm5		; xmm5=in2=z2
-	punpcklwd xmm5,xmm0		; xmm0=in6=z3
-	punpckhwd xmm3,xmm0
-	pmaddwd   xmm5,[rel PW_F184_MF076]	; xmm5=tmp2L
-	pmaddwd   xmm3,[rel PW_F184_MF076]	; xmm3=tmp2H
+        movdqa    xmm3,xmm5             ; xmm5=in2=z2
+        punpcklwd xmm5,xmm0             ; xmm0=in6=z3
+        punpckhwd xmm3,xmm0
+        pmaddwd   xmm5,[rel PW_F184_MF076]      ; xmm5=tmp2L
+        pmaddwd   xmm3,[rel PW_F184_MF076]      ; xmm3=tmp2H
 
-	movdqa	xmm4,xmm1
-	movdqa	xmm0,xmm2
-	paddd	xmm1,xmm5		; xmm1=tmp10L
-	paddd	xmm2,xmm3		; xmm2=tmp10H
-	psubd	xmm4,xmm5		; xmm4=tmp12L
-	psubd	xmm0,xmm3		; xmm0=tmp12H
+        movdqa  xmm4,xmm1
+        movdqa  xmm0,xmm2
+        paddd   xmm1,xmm5               ; xmm1=tmp10L
+        paddd   xmm2,xmm3               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp12L
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm5,xmm1
-	movdqa	xmm3,xmm2
-	paddd	xmm1,xmm6		; xmm1=data0L
-	paddd	xmm2,xmm7		; xmm2=data0H
-	psubd	xmm5,xmm6		; xmm5=data3L
-	psubd	xmm3,xmm7		; xmm3=data3H
+        movdqa  xmm5,xmm1
+        movdqa  xmm3,xmm2
+        paddd   xmm1,xmm6               ; xmm1=data0L
+        paddd   xmm2,xmm7               ; xmm2=data0H
+        psubd   xmm5,xmm6               ; xmm5=data3L
+        psubd   xmm3,xmm7               ; xmm3=data3H
 
-	movdqa	xmm6,[rel PD_DESCALE_P1_4]	; xmm6=[rel PD_DESCALE_P1_4]
+        movdqa  xmm6,[rel PD_DESCALE_P1_4]      ; xmm6=[rel PD_DESCALE_P1_4]
 
-	paddd	xmm1,xmm6
-	paddd	xmm2,xmm6
-	psrad	xmm1,DESCALE_P1_4
-	psrad	xmm2,DESCALE_P1_4
-	paddd	xmm5,xmm6
-	paddd	xmm3,xmm6
-	psrad	xmm5,DESCALE_P1_4
-	psrad	xmm3,DESCALE_P1_4
+        paddd   xmm1,xmm6
+        paddd   xmm2,xmm6
+        psrad   xmm1,DESCALE_P1_4
+        psrad   xmm2,DESCALE_P1_4
+        paddd   xmm5,xmm6
+        paddd   xmm3,xmm6
+        psrad   xmm5,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
 
-	packssdw  xmm1,xmm2		; xmm1=data0=(00 01 02 03 04 05 06 07)
-	packssdw  xmm5,xmm3		; xmm5=data3=(30 31 32 33 34 35 36 37)
+        packssdw  xmm1,xmm2             ; xmm1=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm5,xmm3             ; xmm5=data3=(30 31 32 33 34 35 36 37)
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=tmp0L
-	movdqa	xmm6, XMMWORD [wk(1)]	; xmm6=tmp0H
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp0L
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp0H
 
-	movdqa	xmm2,xmm4
-	movdqa	xmm3,xmm0
-	paddd	xmm4,xmm7		; xmm4=data1L
-	paddd	xmm0,xmm6		; xmm0=data1H
-	psubd	xmm2,xmm7		; xmm2=data2L
-	psubd	xmm3,xmm6		; xmm3=data2H
+        movdqa  xmm2,xmm4
+        movdqa  xmm3,xmm0
+        paddd   xmm4,xmm7               ; xmm4=data1L
+        paddd   xmm0,xmm6               ; xmm0=data1H
+        psubd   xmm2,xmm7               ; xmm2=data2L
+        psubd   xmm3,xmm6               ; xmm3=data2H
 
-	movdqa	xmm7,[rel PD_DESCALE_P1_4]	; xmm7=[rel PD_DESCALE_P1_4]
+        movdqa  xmm7,[rel PD_DESCALE_P1_4]      ; xmm7=[rel PD_DESCALE_P1_4]
 
-	paddd	xmm4,xmm7
-	paddd	xmm0,xmm7
-	psrad	xmm4,DESCALE_P1_4
-	psrad	xmm0,DESCALE_P1_4
-	paddd	xmm2,xmm7
-	paddd	xmm3,xmm7
-	psrad	xmm2,DESCALE_P1_4
-	psrad	xmm3,DESCALE_P1_4
+        paddd   xmm4,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm4,DESCALE_P1_4
+        psrad   xmm0,DESCALE_P1_4
+        paddd   xmm2,xmm7
+        paddd   xmm3,xmm7
+        psrad   xmm2,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
 
-	packssdw  xmm4,xmm0		; xmm4=data1=(10 11 12 13 14 15 16 17)
-	packssdw  xmm2,xmm3		; xmm2=data2=(20 21 22 23 24 25 26 27)
+        packssdw  xmm4,xmm0             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm2,xmm3             ; xmm2=data2=(20 21 22 23 24 25 26 27)
 
-	movdqa    xmm6,xmm1	; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm4	; xmm1=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm6,xmm4	; xmm6=(04 14 05 15 06 16 07 17)
-	movdqa    xmm7,xmm2	; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm5	; xmm2=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm7,xmm5	; xmm7=(24 34 25 35 26 36 27 37)
+        movdqa    xmm6,xmm1     ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm4     ; xmm1=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4     ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm7,xmm2     ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm5     ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm7,xmm5     ; xmm7=(24 34 25 35 26 36 27 37)
 
-	movdqa    xmm0,xmm1	; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm2	; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm0,xmm2	; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
-	movdqa    xmm3,xmm6	; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm7	; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm3,xmm7	; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm2     ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm0,xmm2     ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
+        movdqa    xmm3,xmm6     ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7     ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm3,xmm7     ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
 .column_end:
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows, store into output array.
+        ; ---- Pass 2: process rows, store into output array.
 
-	mov	rax, [original_rbp]
-	mov	rdi, r12	; (JSAMPROW *)
-	mov	rax, r13
+        mov     rax, [original_rbp]
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
 
-	; -- Even part
+        ; -- Even part
 
-	pxor      xmm4,xmm4
-	punpcklwd xmm4,xmm1		; xmm4=tmp0
-	psrad     xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
+        pxor      xmm4,xmm4
+        punpcklwd xmm4,xmm1             ; xmm4=tmp0
+        psrad     xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
 
-	; -- Odd part
+        ; -- Odd part
 
-	punpckhwd xmm1,xmm0
-	punpckhwd xmm6,xmm3
-	movdqa    xmm5,xmm1
-	movdqa    xmm2,xmm6
-	pmaddwd   xmm1,[rel PW_F256_F089]	; xmm1=(tmp2)
-	pmaddwd   xmm6,[rel PW_MF060_MF050]	; xmm6=(tmp2)
-	pmaddwd   xmm5,[rel PW_F106_MF217]	; xmm5=(tmp0)
-	pmaddwd   xmm2,[rel PW_F145_MF021]	; xmm2=(tmp0)
+        punpckhwd xmm1,xmm0
+        punpckhwd xmm6,xmm3
+        movdqa    xmm5,xmm1
+        movdqa    xmm2,xmm6
+        pmaddwd   xmm1,[rel PW_F256_F089]       ; xmm1=(tmp2)
+        pmaddwd   xmm6,[rel PW_MF060_MF050]     ; xmm6=(tmp2)
+        pmaddwd   xmm5,[rel PW_F106_MF217]      ; xmm5=(tmp0)
+        pmaddwd   xmm2,[rel PW_F145_MF021]      ; xmm2=(tmp0)
 
-	paddd     xmm6,xmm1		; xmm6=tmp2
-	paddd     xmm2,xmm5		; xmm2=tmp0
+        paddd     xmm6,xmm1             ; xmm6=tmp2
+        paddd     xmm2,xmm5             ; xmm2=tmp0
 
-	; -- Even part
+        ; -- Even part
 
-	punpcklwd xmm0,xmm3
-	pmaddwd   xmm0,[rel PW_F184_MF076]	; xmm0=tmp2
+        punpcklwd xmm0,xmm3
+        pmaddwd   xmm0,[rel PW_F184_MF076]      ; xmm0=tmp2
 
-	movdqa    xmm7,xmm4
-	paddd     xmm4,xmm0		; xmm4=tmp10
-	psubd     xmm7,xmm0		; xmm7=tmp12
+        movdqa    xmm7,xmm4
+        paddd     xmm4,xmm0             ; xmm4=tmp10
+        psubd     xmm7,xmm0             ; xmm7=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm1,[rel PD_DESCALE_P2_4]	; xmm1=[rel PD_DESCALE_P2_4]
+        movdqa  xmm1,[rel PD_DESCALE_P2_4]      ; xmm1=[rel PD_DESCALE_P2_4]
 
-	movdqa	xmm5,xmm4
-	movdqa	xmm3,xmm7
-	paddd	xmm4,xmm6		; xmm4=data0=(00 10 20 30)
-	paddd	xmm7,xmm2		; xmm7=data1=(01 11 21 31)
-	psubd	xmm5,xmm6		; xmm5=data3=(03 13 23 33)
-	psubd	xmm3,xmm2		; xmm3=data2=(02 12 22 32)
+        movdqa  xmm5,xmm4
+        movdqa  xmm3,xmm7
+        paddd   xmm4,xmm6               ; xmm4=data0=(00 10 20 30)
+        paddd   xmm7,xmm2               ; xmm7=data1=(01 11 21 31)
+        psubd   xmm5,xmm6               ; xmm5=data3=(03 13 23 33)
+        psubd   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
 
-	paddd	xmm4,xmm1
-	paddd	xmm7,xmm1
-	psrad	xmm4,DESCALE_P2_4
-	psrad	xmm7,DESCALE_P2_4
-	paddd	xmm5,xmm1
-	paddd	xmm3,xmm1
-	psrad	xmm5,DESCALE_P2_4
-	psrad	xmm3,DESCALE_P2_4
+        paddd   xmm4,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm4,DESCALE_P2_4
+        psrad   xmm7,DESCALE_P2_4
+        paddd   xmm5,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm5,DESCALE_P2_4
+        psrad   xmm3,DESCALE_P2_4
 
-	packssdw  xmm4,xmm3		; xmm4=(00 10 20 30 02 12 22 32)
-	packssdw  xmm7,xmm5		; xmm7=(01 11 21 31 03 13 23 33)
+        packssdw  xmm4,xmm3             ; xmm4=(00 10 20 30 02 12 22 32)
+        packssdw  xmm7,xmm5             ; xmm7=(01 11 21 31 03 13 23 33)
 
-	movdqa    xmm0,xmm4		; transpose coefficients(phase 1)
-	punpcklwd xmm4,xmm7		; xmm4=(00 01 10 11 20 21 30 31)
-	punpckhwd xmm0,xmm7		; xmm0=(02 03 12 13 22 23 32 33)
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm7             ; xmm4=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm0,xmm7             ; xmm0=(02 03 12 13 22 23 32 33)
 
-	movdqa    xmm6,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm0		; xmm4=(00 01 02 03 10 11 12 13)
-	punpckhdq xmm6,xmm0		; xmm6=(20 21 22 23 30 31 32 33)
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm0             ; xmm4=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm6,xmm0             ; xmm6=(20 21 22 23 30 31 32 33)
 
-	packsswb  xmm4,xmm6		; xmm4=(00 01 02 03 10 11 12 13 20 ..)
-	paddb     xmm4,[rel PB_CENTERJSAMP]
+        packsswb  xmm4,xmm6             ; xmm4=(00 01 02 03 10 11 12 13 20 ..)
+        paddb     xmm4,[rel PB_CENTERJSAMP]
 
-	pshufd    xmm2,xmm4,0x39	; xmm2=(10 11 12 13 20 21 22 23 30 ..)
-	pshufd    xmm1,xmm4,0x4E	; xmm1=(20 21 22 23 30 31 32 33 00 ..)
-	pshufd    xmm3,xmm4,0x93	; xmm3=(30 31 32 33 00 01 02 03 10 ..)
+        pshufd    xmm2,xmm4,0x39        ; xmm2=(10 11 12 13 20 21 22 23 30 ..)
+        pshufd    xmm1,xmm4,0x4E        ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
+        pshufd    xmm3,xmm4,0x93        ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
 
-	mov	rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
-	movd	XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
-	movd	XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
-	mov	rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
-	movd	XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
-	movd	XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
+        movd    XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
+        mov     rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
+        movd    XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
 
-	uncollect_args
-	mov	rsp,rbp		; rsp <- aligned rbp
-	pop	rsp		; rsp <- original rbp
-	pop	rbp
-	ret
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
 
 
 ; --------------------------------------------------------------------------
@@ -412,165 +412,165 @@
 ; r12 = JSAMPARRAY output_buf
 ; r13 = JDIMENSION output_col
 
-	align	16
-	global	EXTN(jsimd_idct_2x2_sse2)
+        align   16
+        global  EXTN(jsimd_idct_2x2_sse2)
 
 EXTN(jsimd_idct_2x2_sse2):
-	push	rbp
-	mov	rax,rsp
-	mov	rbp,rsp
-	collect_args
-	push	rbx
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-	mov	rdx, r10	; quantptr
-	mov	rsi, r11		; inptr
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
 
-	; | input:                  | result:        |
-	; | 00 01 ** 03 ** 05 ** 07 |                |
-	; | 10 11 ** 13 ** 15 ** 17 |                |
-	; | ** ** ** ** ** ** ** ** |                |
-	; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
-	; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
-	; | 50 51 ** 53 ** 55 ** 57 |                |
-	; | ** ** ** ** ** ** ** ** |                |
-	; | 70 71 ** 73 ** 75 ** 77 |                |
+        ; | input:                  | result:        |
+        ; | 00 01 ** 03 ** 05 ** 07 |                |
+        ; | 10 11 ** 13 ** 15 ** 17 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+        ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+        ; | 50 51 ** 53 ** 55 ** 57 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 70 71 ** 73 ** 75 ** 77 |                |
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
-	; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
+        ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
+        ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
 
-	pcmpeqd   xmm7,xmm7
-	pslld     xmm7,WORD_BIT		; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
+        pcmpeqd   xmm7,xmm7
+        pslld     xmm7,WORD_BIT         ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
 
-	movdqa    xmm4,xmm0		; xmm4=(10 11 ** 13 ** 15 ** 17)
-	movdqa    xmm5,xmm2		; xmm5=(50 51 ** 53 ** 55 ** 57)
-	punpcklwd xmm4,xmm1		; xmm4=(10 30 11 31 ** ** 13 33)
-	punpcklwd xmm5,xmm3		; xmm5=(50 70 51 71 ** ** 53 73)
-	pmaddwd   xmm4,[rel PW_F362_MF127]
-	pmaddwd   xmm5,[rel PW_F085_MF072]
+        movdqa    xmm4,xmm0             ; xmm4=(10 11 ** 13 ** 15 ** 17)
+        movdqa    xmm5,xmm2             ; xmm5=(50 51 ** 53 ** 55 ** 57)
+        punpcklwd xmm4,xmm1             ; xmm4=(10 30 11 31 ** ** 13 33)
+        punpcklwd xmm5,xmm3             ; xmm5=(50 70 51 71 ** ** 53 73)
+        pmaddwd   xmm4,[rel PW_F362_MF127]
+        pmaddwd   xmm5,[rel PW_F085_MF072]
 
-	psrld	xmm0,WORD_BIT		; xmm0=(11 -- 13 -- 15 -- 17 --)
-	pand	xmm1,xmm7		; xmm1=(-- 31 -- 33 -- 35 -- 37)
-	psrld	xmm2,WORD_BIT		; xmm2=(51 -- 53 -- 55 -- 57 --)
-	pand	xmm3,xmm7		; xmm3=(-- 71 -- 73 -- 75 -- 77)
-	por	xmm0,xmm1		; xmm0=(11 31 13 33 15 35 17 37)
-	por	xmm2,xmm3		; xmm2=(51 71 53 73 55 75 57 77)
-	pmaddwd	xmm0,[rel PW_F362_MF127]
-	pmaddwd	xmm2,[rel PW_F085_MF072]
+        psrld   xmm0,WORD_BIT           ; xmm0=(11 -- 13 -- 15 -- 17 --)
+        pand    xmm1,xmm7               ; xmm1=(-- 31 -- 33 -- 35 -- 37)
+        psrld   xmm2,WORD_BIT           ; xmm2=(51 -- 53 -- 55 -- 57 --)
+        pand    xmm3,xmm7               ; xmm3=(-- 71 -- 73 -- 75 -- 77)
+        por     xmm0,xmm1               ; xmm0=(11 31 13 33 15 35 17 37)
+        por     xmm2,xmm3               ; xmm2=(51 71 53 73 55 75 57 77)
+        pmaddwd xmm0,[rel PW_F362_MF127]
+        pmaddwd xmm2,[rel PW_F085_MF072]
 
-	paddd	xmm4,xmm5		; xmm4=tmp0[col0 col1 **** col3]
-	paddd	xmm0,xmm2		; xmm0=tmp0[col1 col3 col5 col7]
+        paddd   xmm4,xmm5               ; xmm4=tmp0[col0 col1 **** col3]
+        paddd   xmm0,xmm2               ; xmm0=tmp0[col1 col3 col5 col7]
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
-	pmullw	xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; xmm6=(00 01 ** 03 ** 05 ** 07)
+        ; xmm6=(00 01 ** 03 ** 05 ** 07)
 
-	movdqa	xmm1,xmm6		; xmm1=(00 01 ** 03 ** 05 ** 07)
-	pslld	xmm6,WORD_BIT		; xmm6=(-- 00 -- ** -- ** -- **)
-	pand	xmm1,xmm7		; xmm1=(-- 01 -- 03 -- 05 -- 07)
-	psrad	xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
-	psrad	xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
+        movdqa  xmm1,xmm6               ; xmm1=(00 01 ** 03 ** 05 ** 07)
+        pslld   xmm6,WORD_BIT           ; xmm6=(-- 00 -- ** -- ** -- **)
+        pand    xmm1,xmm7               ; xmm1=(-- 01 -- 03 -- 05 -- 07)
+        psrad   xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
+        psrad   xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm3,xmm6
-	movdqa	xmm5,xmm1
-	paddd	xmm6,xmm4	; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
-	paddd	xmm1,xmm0	; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
-	psubd	xmm3,xmm4	; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
-	psubd	xmm5,xmm0	; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
+        movdqa  xmm3,xmm6
+        movdqa  xmm5,xmm1
+        paddd   xmm6,xmm4       ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
+        paddd   xmm1,xmm0       ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
+        psubd   xmm3,xmm4       ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
+        psubd   xmm5,xmm0       ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
 
-	movdqa	xmm2,[rel PD_DESCALE_P1_2]	; xmm2=[rel PD_DESCALE_P1_2]
+        movdqa  xmm2,[rel PD_DESCALE_P1_2]      ; xmm2=[rel PD_DESCALE_P1_2]
 
-	punpckldq  xmm6,xmm3		; xmm6=(A0 B0 ** **)
+        punpckldq  xmm6,xmm3            ; xmm6=(A0 B0 ** **)
 
-	movdqa     xmm7,xmm1
-	punpcklqdq xmm1,xmm5		; xmm1=(A1 A3 B1 B3)
-	punpckhqdq xmm7,xmm5		; xmm7=(A5 A7 B5 B7)
+        movdqa     xmm7,xmm1
+        punpcklqdq xmm1,xmm5            ; xmm1=(A1 A3 B1 B3)
+        punpckhqdq xmm7,xmm5            ; xmm7=(A5 A7 B5 B7)
 
-	paddd	xmm6,xmm2
-	psrad	xmm6,DESCALE_P1_2
+        paddd   xmm6,xmm2
+        psrad   xmm6,DESCALE_P1_2
 
-	paddd	xmm1,xmm2
-	paddd	xmm7,xmm2
-	psrad	xmm1,DESCALE_P1_2
-	psrad	xmm7,DESCALE_P1_2
+        paddd   xmm1,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm1,DESCALE_P1_2
+        psrad   xmm7,DESCALE_P1_2
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows, store into output array.
+        ; ---- Pass 2: process rows, store into output array.
 
-	mov	rdi, r12	; (JSAMPROW *)
-	mov	rax, r13
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
 
-	; | input:| result:|
-	; | A0 B0 |        |
-	; | A1 B1 | C0 C1  |
-	; | A3 B3 | D0 D1  |
-	; | A5 B5 |        |
-	; | A7 B7 |        |
+        ; | input:| result:|
+        ; | A0 B0 |        |
+        ; | A1 B1 | C0 C1  |
+        ; | A3 B3 | D0 D1  |
+        ; | A5 B5 |        |
+        ; | A7 B7 |        |
 
-	; -- Odd part
+        ; -- Odd part
 
-	packssdw  xmm1,xmm1		; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
-	packssdw  xmm7,xmm7		; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
-	pmaddwd   xmm1,[rel PW_F362_MF127]
-	pmaddwd   xmm7,[rel PW_F085_MF072]
+        packssdw  xmm1,xmm1             ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
+        packssdw  xmm7,xmm7             ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
+        pmaddwd   xmm1,[rel PW_F362_MF127]
+        pmaddwd   xmm7,[rel PW_F085_MF072]
 
-	paddd     xmm1,xmm7		; xmm1=tmp0[row0 row1 row0 row1]
+        paddd     xmm1,xmm7             ; xmm1=tmp0[row0 row1 row0 row1]
 
-	; -- Even part
+        ; -- Even part
 
-	pslld     xmm6,(CONST_BITS+2)	; xmm6=tmp10[row0 row1 **** ****]
+        pslld     xmm6,(CONST_BITS+2)   ; xmm6=tmp10[row0 row1 **** ****]
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa    xmm4,xmm6
-	paddd     xmm6,xmm1	; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
-	psubd     xmm4,xmm1	; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
+        movdqa    xmm4,xmm6
+        paddd     xmm6,xmm1     ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
+        psubd     xmm4,xmm1     ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
 
-	punpckldq xmm6,xmm4	; xmm6=(C0 D0 C1 D1)
+        punpckldq xmm6,xmm4     ; xmm6=(C0 D0 C1 D1)
 
-	paddd     xmm6,[rel PD_DESCALE_P2_2]
-	psrad     xmm6,DESCALE_P2_2
+        paddd     xmm6,[rel PD_DESCALE_P2_2]
+        psrad     xmm6,DESCALE_P2_2
 
-	packssdw  xmm6,xmm6		; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
-	packsswb  xmm6,xmm6		; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
-	paddb     xmm6,[rel PB_CENTERJSAMP]
+        packssdw  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
+        packsswb  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
+        paddb     xmm6,[rel PB_CENTERJSAMP]
 
-	pextrw	ebx,xmm6,0x00		; ebx=(C0 D0 -- --)
-	pextrw	ecx,xmm6,0x01		; ecx=(C1 D1 -- --)
+        pextrw  ebx,xmm6,0x00           ; ebx=(C0 D0 -- --)
+        pextrw  ecx,xmm6,0x01           ; ecx=(C1 D1 -- --)
 
-	mov	rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
-	mov	rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
-	mov	WORD [rdx+rax*SIZEOF_JSAMPLE], bx
-	mov	WORD [rsi+rax*SIZEOF_JSAMPLE], cx
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     WORD [rdx+rax*SIZEOF_JSAMPLE], bx
+        mov     WORD [rsi+rax*SIZEOF_JSAMPLE], cx
 
-	pop	rbx
-	uncollect_args
-	pop	rbp
-	ret
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jiss2red.asm b/simd/jiss2red.asm
index 238c61d..886d79d 100644
--- a/simd/jiss2red.asm
+++ b/simd/jiss2red.asm
@@ -26,74 +26,74 @@
 
 ; --------------------------------------------------------------------------
 
-%define CONST_BITS	13
-%define PASS1_BITS	2
+%define CONST_BITS      13
+%define PASS1_BITS      2
 
-%define DESCALE_P1_4	(CONST_BITS-PASS1_BITS+1)
-%define DESCALE_P2_4	(CONST_BITS+PASS1_BITS+3+1)
-%define DESCALE_P1_2	(CONST_BITS-PASS1_BITS+2)
-%define DESCALE_P2_2	(CONST_BITS+PASS1_BITS+3+2)
+%define DESCALE_P1_4    (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4    (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2    (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2    (CONST_BITS+PASS1_BITS+3+2)
 
 %if CONST_BITS == 13
-F_0_211	equ	 1730		; FIX(0.211164243)
-F_0_509	equ	 4176		; FIX(0.509795579)
-F_0_601	equ	 4926		; FIX(0.601344887)
-F_0_720	equ	 5906		; FIX(0.720959822)
-F_0_765	equ	 6270		; FIX(0.765366865)
-F_0_850	equ	 6967		; FIX(0.850430095)
-F_0_899	equ	 7373		; FIX(0.899976223)
-F_1_061	equ	 8697		; FIX(1.061594337)
-F_1_272	equ	10426		; FIX(1.272758580)
-F_1_451	equ	11893		; FIX(1.451774981)
-F_1_847	equ	15137		; FIX(1.847759065)
-F_2_172	equ	17799		; FIX(2.172734803)
-F_2_562	equ	20995		; FIX(2.562915447)
-F_3_624	equ	29692		; FIX(3.624509785)
+F_0_211 equ      1730           ; FIX(0.211164243)
+F_0_509 equ      4176           ; FIX(0.509795579)
+F_0_601 equ      4926           ; FIX(0.601344887)
+F_0_720 equ      5906           ; FIX(0.720959822)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_850 equ      6967           ; FIX(0.850430095)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_061 equ      8697           ; FIX(1.061594337)
+F_1_272 equ     10426           ; FIX(1.272758580)
+F_1_451 equ     11893           ; FIX(1.451774981)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_2_172 equ     17799           ; FIX(2.172734803)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_624 equ     29692           ; FIX(3.624509785)
 %else
 ; NASM cannot do compile-time arithmetic on floating-point constants.
 %define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
-F_0_211	equ	DESCALE( 226735879,30-CONST_BITS)	; FIX(0.211164243)
-F_0_509	equ	DESCALE( 547388834,30-CONST_BITS)	; FIX(0.509795579)
-F_0_601	equ	DESCALE( 645689155,30-CONST_BITS)	; FIX(0.601344887)
-F_0_720	equ	DESCALE( 774124714,30-CONST_BITS)	; FIX(0.720959822)
-F_0_765	equ	DESCALE( 821806413,30-CONST_BITS)	; FIX(0.765366865)
-F_0_850	equ	DESCALE( 913142361,30-CONST_BITS)	; FIX(0.850430095)
-F_0_899	equ	DESCALE( 966342111,30-CONST_BITS)	; FIX(0.899976223)
-F_1_061	equ	DESCALE(1139878239,30-CONST_BITS)	; FIX(1.061594337)
-F_1_272	equ	DESCALE(1366614119,30-CONST_BITS)	; FIX(1.272758580)
-F_1_451	equ	DESCALE(1558831516,30-CONST_BITS)	; FIX(1.451774981)
-F_1_847	equ	DESCALE(1984016188,30-CONST_BITS)	; FIX(1.847759065)
-F_2_172	equ	DESCALE(2332956230,30-CONST_BITS)	; FIX(2.172734803)
-F_2_562	equ	DESCALE(2751909506,30-CONST_BITS)	; FIX(2.562915447)
-F_3_624	equ	DESCALE(3891787747,30-CONST_BITS)	; FIX(3.624509785)
+F_0_211 equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509 equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601 equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720 equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850 equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061 equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272 equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451 equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172 equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624 equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
 %endif
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_red_sse2)
+        alignz  16
+        global  EXTN(jconst_idct_red_sse2)
 
 EXTN(jconst_idct_red_sse2):
 
-PW_F184_MF076	times 4 dw  F_1_847,-F_0_765
-PW_F256_F089	times 4 dw  F_2_562, F_0_899
-PW_F106_MF217	times 4 dw  F_1_061,-F_2_172
-PW_MF060_MF050	times 4 dw -F_0_601,-F_0_509
-PW_F145_MF021	times 4 dw  F_1_451,-F_0_211
-PW_F362_MF127	times 4 dw  F_3_624,-F_1_272
-PW_F085_MF072	times 4 dw  F_0_850,-F_0_720
-PD_DESCALE_P1_4	times 4 dd  1 << (DESCALE_P1_4-1)
-PD_DESCALE_P2_4	times 4 dd  1 << (DESCALE_P2_4-1)
-PD_DESCALE_P1_2	times 4 dd  1 << (DESCALE_P1_2-1)
-PD_DESCALE_P2_2	times 4 dd  1 << (DESCALE_P2_2-1)
-PB_CENTERJSAMP	times 16 db CENTERJSAMPLE
+PW_F184_MF076   times 4 dw  F_1_847,-F_0_765
+PW_F256_F089    times 4 dw  F_2_562, F_0_899
+PW_F106_MF217   times 4 dw  F_1_061,-F_2_172
+PW_MF060_MF050  times 4 dw -F_0_601,-F_0_509
+PW_F145_MF021   times 4 dw  F_1_451,-F_0_211
+PW_F362_MF127   times 4 dw  F_3_624,-F_1_272
+PW_F085_MF072   times 4 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4 times 4 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4 times 4 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2 times 4 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2 times 4 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients,
 ; producing a reduced-size 4x4 output block.
@@ -103,309 +103,309 @@
 ;                      JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; void * dct_table
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
 
-	align	16
-	global	EXTN(jsimd_idct_4x4_sse2)
+        align   16
+        global  EXTN(jsimd_idct_4x4_sse2)
 
 EXTN(jsimd_idct_4x4_sse2):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [wk(0)]
-	pushpic	ebx
-;	push	ecx		; unused
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
 
 %ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	short .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	por	xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	por	xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	xmm0,xmm1
-	packsswb xmm0,xmm0
-	packsswb xmm0,xmm0
-	movd	eax,xmm0
-	test	eax,eax
-	jnz	short .columnDCT
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm0,xmm1
+        packsswb xmm0,xmm0
+        packsswb xmm0,xmm0
+        movd    eax,xmm0
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	psllw	xmm0,PASS1_BITS
+        psllw   xmm0,PASS1_BITS
 
-	movdqa    xmm3,xmm0	; xmm0=in0=(00 01 02 03 04 05 06 07)
-	punpcklwd xmm0,xmm0	; xmm0=(00 00 01 01 02 02 03 03)
-	punpckhwd xmm3,xmm3	; xmm3=(04 04 05 05 06 06 07 07)
+        movdqa    xmm3,xmm0     ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0     ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm3,xmm3     ; xmm3=(04 04 05 05 06 06 07 07)
 
-	pshufd	xmm1,xmm0,0x50	; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
-	pshufd	xmm0,xmm0,0xFA	; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
-	pshufd	xmm6,xmm3,0x50	; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
-	pshufd	xmm3,xmm3,0xFA	; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
+        pshufd  xmm1,xmm0,0x50  ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
+        pshufd  xmm0,xmm0,0xFA  ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
+        pshufd  xmm6,xmm3,0x50  ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
+        pshufd  xmm3,xmm3,0xFA  ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
 
-	jmp	near .column_end
-	alignx	16,7
+        jmp     near .column_end
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	movdqa    xmm4,xmm0
-	movdqa    xmm5,xmm0
-	punpcklwd xmm4,xmm1
-	punpckhwd xmm5,xmm1
-	movdqa    xmm0,xmm4
-	movdqa    xmm1,xmm5
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_F256_F089)]	; xmm4=(tmp2L)
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F256_F089)]	; xmm5=(tmp2H)
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_F106_MF217)]	; xmm0=(tmp0L)
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_F106_MF217)]	; xmm1=(tmp0H)
+        movdqa    xmm4,xmm0
+        movdqa    xmm5,xmm0
+        punpcklwd xmm4,xmm1
+        punpckhwd xmm5,xmm1
+        movdqa    xmm0,xmm4
+        movdqa    xmm1,xmm5
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F256_F089)]       ; xmm4=(tmp2L)
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F256_F089)]       ; xmm5=(tmp2H)
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm0=(tmp0L)
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm1=(tmp0H)
 
-	movdqa    xmm6,xmm2
-	movdqa    xmm7,xmm2
-	punpcklwd xmm6,xmm3
-	punpckhwd xmm7,xmm3
-	movdqa    xmm2,xmm6
-	movdqa    xmm3,xmm7
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]	; xmm6=(tmp2L)
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF050)]	; xmm7=(tmp2H)
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]	; xmm2=(tmp0L)
-	pmaddwd   xmm3,[GOTOFF(ebx,PW_F145_MF021)]	; xmm3=(tmp0H)
+        movdqa    xmm6,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm6,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm2,xmm6
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm6=(tmp2L)
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm7=(tmp2H)
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm2=(tmp0L)
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm3=(tmp0H)
 
-	paddd	xmm6,xmm4		; xmm6=tmp2L
-	paddd	xmm7,xmm5		; xmm7=tmp2H
-	paddd	xmm2,xmm0		; xmm2=tmp0L
-	paddd	xmm3,xmm1		; xmm3=tmp0H
+        paddd   xmm6,xmm4               ; xmm6=tmp2L
+        paddd   xmm7,xmm5               ; xmm7=tmp2H
+        paddd   xmm2,xmm0               ; xmm2=tmp0L
+        paddd   xmm3,xmm1               ; xmm3=tmp0H
 
-	movdqa	XMMWORD [wk(0)], xmm2	; wk(0)=tmp0L
-	movdqa	XMMWORD [wk(1)], xmm3	; wk(1)=tmp0H
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp0L
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=tmp0H
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	pxor      xmm1,xmm1
-	pxor      xmm2,xmm2
-	punpcklwd xmm1,xmm4		; xmm1=tmp0L
-	punpckhwd xmm2,xmm4		; xmm2=tmp0H
-	psrad     xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
-	psrad     xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
+        pxor      xmm1,xmm1
+        pxor      xmm2,xmm2
+        punpcklwd xmm1,xmm4             ; xmm1=tmp0L
+        punpckhwd xmm2,xmm4             ; xmm2=tmp0H
+        psrad     xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
+        psrad     xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
 
-	movdqa    xmm3,xmm5		; xmm5=in2=z2
-	punpcklwd xmm5,xmm0		; xmm0=in6=z3
-	punpckhwd xmm3,xmm0
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F184_MF076)]	; xmm5=tmp2L
-	pmaddwd   xmm3,[GOTOFF(ebx,PW_F184_MF076)]	; xmm3=tmp2H
+        movdqa    xmm3,xmm5             ; xmm5=in2=z2
+        punpcklwd xmm5,xmm0             ; xmm0=in6=z3
+        punpckhwd xmm3,xmm0
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm5=tmp2L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm3=tmp2H
 
-	movdqa	xmm4,xmm1
-	movdqa	xmm0,xmm2
-	paddd	xmm1,xmm5		; xmm1=tmp10L
-	paddd	xmm2,xmm3		; xmm2=tmp10H
-	psubd	xmm4,xmm5		; xmm4=tmp12L
-	psubd	xmm0,xmm3		; xmm0=tmp12H
+        movdqa  xmm4,xmm1
+        movdqa  xmm0,xmm2
+        paddd   xmm1,xmm5               ; xmm1=tmp10L
+        paddd   xmm2,xmm3               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp12L
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm5,xmm1
-	movdqa	xmm3,xmm2
-	paddd	xmm1,xmm6		; xmm1=data0L
-	paddd	xmm2,xmm7		; xmm2=data0H
-	psubd	xmm5,xmm6		; xmm5=data3L
-	psubd	xmm3,xmm7		; xmm3=data3H
+        movdqa  xmm5,xmm1
+        movdqa  xmm3,xmm2
+        paddd   xmm1,xmm6               ; xmm1=data0L
+        paddd   xmm2,xmm7               ; xmm2=data0H
+        psubd   xmm5,xmm6               ; xmm5=data3L
+        psubd   xmm3,xmm7               ; xmm3=data3H
 
-	movdqa	xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]	; xmm6=[PD_DESCALE_P1_4]
+        movdqa  xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]      ; xmm6=[PD_DESCALE_P1_4]
 
-	paddd	xmm1,xmm6
-	paddd	xmm2,xmm6
-	psrad	xmm1,DESCALE_P1_4
-	psrad	xmm2,DESCALE_P1_4
-	paddd	xmm5,xmm6
-	paddd	xmm3,xmm6
-	psrad	xmm5,DESCALE_P1_4
-	psrad	xmm3,DESCALE_P1_4
+        paddd   xmm1,xmm6
+        paddd   xmm2,xmm6
+        psrad   xmm1,DESCALE_P1_4
+        psrad   xmm2,DESCALE_P1_4
+        paddd   xmm5,xmm6
+        paddd   xmm3,xmm6
+        psrad   xmm5,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
 
-	packssdw  xmm1,xmm2		; xmm1=data0=(00 01 02 03 04 05 06 07)
-	packssdw  xmm5,xmm3		; xmm5=data3=(30 31 32 33 34 35 36 37)
+        packssdw  xmm1,xmm2             ; xmm1=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm5,xmm3             ; xmm5=data3=(30 31 32 33 34 35 36 37)
 
-	movdqa	xmm7, XMMWORD [wk(0)]	; xmm7=tmp0L
-	movdqa	xmm6, XMMWORD [wk(1)]	; xmm6=tmp0H
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp0L
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp0H
 
-	movdqa	xmm2,xmm4
-	movdqa	xmm3,xmm0
-	paddd	xmm4,xmm7		; xmm4=data1L
-	paddd	xmm0,xmm6		; xmm0=data1H
-	psubd	xmm2,xmm7		; xmm2=data2L
-	psubd	xmm3,xmm6		; xmm3=data2H
+        movdqa  xmm2,xmm4
+        movdqa  xmm3,xmm0
+        paddd   xmm4,xmm7               ; xmm4=data1L
+        paddd   xmm0,xmm6               ; xmm0=data1H
+        psubd   xmm2,xmm7               ; xmm2=data2L
+        psubd   xmm3,xmm6               ; xmm3=data2H
 
-	movdqa	xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]	; xmm7=[PD_DESCALE_P1_4]
+        movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]      ; xmm7=[PD_DESCALE_P1_4]
 
-	paddd	xmm4,xmm7
-	paddd	xmm0,xmm7
-	psrad	xmm4,DESCALE_P1_4
-	psrad	xmm0,DESCALE_P1_4
-	paddd	xmm2,xmm7
-	paddd	xmm3,xmm7
-	psrad	xmm2,DESCALE_P1_4
-	psrad	xmm3,DESCALE_P1_4
+        paddd   xmm4,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm4,DESCALE_P1_4
+        psrad   xmm0,DESCALE_P1_4
+        paddd   xmm2,xmm7
+        paddd   xmm3,xmm7
+        psrad   xmm2,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
 
-	packssdw  xmm4,xmm0		; xmm4=data1=(10 11 12 13 14 15 16 17)
-	packssdw  xmm2,xmm3		; xmm2=data2=(20 21 22 23 24 25 26 27)
+        packssdw  xmm4,xmm0             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm2,xmm3             ; xmm2=data2=(20 21 22 23 24 25 26 27)
 
-	movdqa    xmm6,xmm1	; transpose coefficients(phase 1)
-	punpcklwd xmm1,xmm4	; xmm1=(00 10 01 11 02 12 03 13)
-	punpckhwd xmm6,xmm4	; xmm6=(04 14 05 15 06 16 07 17)
-	movdqa    xmm7,xmm2	; transpose coefficients(phase 1)
-	punpcklwd xmm2,xmm5	; xmm2=(20 30 21 31 22 32 23 33)
-	punpckhwd xmm7,xmm5	; xmm7=(24 34 25 35 26 36 27 37)
+        movdqa    xmm6,xmm1     ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm4     ; xmm1=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4     ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm7,xmm2     ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm5     ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm7,xmm5     ; xmm7=(24 34 25 35 26 36 27 37)
 
-	movdqa    xmm0,xmm1	; transpose coefficients(phase 2)
-	punpckldq xmm1,xmm2	; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
-	punpckhdq xmm0,xmm2	; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
-	movdqa    xmm3,xmm6	; transpose coefficients(phase 2)
-	punpckldq xmm6,xmm7	; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
-	punpckhdq xmm3,xmm7	; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm2     ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm0,xmm2     ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
+        movdqa    xmm3,xmm6     ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7     ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm3,xmm7     ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
 .column_end:
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows, store into output array.
+        ; ---- Pass 2: process rows, store into output array.
 
-	mov	eax, [original_ebp]
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
+        mov     eax, [original_ebp]
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
 
-	; -- Even part
+        ; -- Even part
 
-	pxor      xmm4,xmm4
-	punpcklwd xmm4,xmm1		; xmm4=tmp0
-	psrad     xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
+        pxor      xmm4,xmm4
+        punpcklwd xmm4,xmm1             ; xmm4=tmp0
+        psrad     xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
 
-	; -- Odd part
+        ; -- Odd part
 
-	punpckhwd xmm1,xmm0
-	punpckhwd xmm6,xmm3
-	movdqa    xmm5,xmm1
-	movdqa    xmm2,xmm6
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_F256_F089)]	; xmm1=(tmp2)
-	pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]	; xmm6=(tmp2)
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F106_MF217)]	; xmm5=(tmp0)
-	pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]	; xmm2=(tmp0)
+        punpckhwd xmm1,xmm0
+        punpckhwd xmm6,xmm3
+        movdqa    xmm5,xmm1
+        movdqa    xmm2,xmm6
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F256_F089)]       ; xmm1=(tmp2)
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm6=(tmp2)
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm5=(tmp0)
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm2=(tmp0)
 
-	paddd     xmm6,xmm1		; xmm6=tmp2
-	paddd     xmm2,xmm5		; xmm2=tmp0
+        paddd     xmm6,xmm1             ; xmm6=tmp2
+        paddd     xmm2,xmm5             ; xmm2=tmp0
 
-	; -- Even part
+        ; -- Even part
 
-	punpcklwd xmm0,xmm3
-	pmaddwd   xmm0,[GOTOFF(ebx,PW_F184_MF076)]	; xmm0=tmp2
+        punpcklwd xmm0,xmm3
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm0=tmp2
 
-	movdqa    xmm7,xmm4
-	paddd     xmm4,xmm0		; xmm4=tmp10
-	psubd     xmm7,xmm0		; xmm7=tmp12
+        movdqa    xmm7,xmm4
+        paddd     xmm4,xmm0             ; xmm4=tmp10
+        psubd     xmm7,xmm0             ; xmm7=tmp12
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)]	; xmm1=[PD_DESCALE_P2_4]
+        movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)]      ; xmm1=[PD_DESCALE_P2_4]
 
-	movdqa	xmm5,xmm4
-	movdqa	xmm3,xmm7
-	paddd	xmm4,xmm6		; xmm4=data0=(00 10 20 30)
-	paddd	xmm7,xmm2		; xmm7=data1=(01 11 21 31)
-	psubd	xmm5,xmm6		; xmm5=data3=(03 13 23 33)
-	psubd	xmm3,xmm2		; xmm3=data2=(02 12 22 32)
+        movdqa  xmm5,xmm4
+        movdqa  xmm3,xmm7
+        paddd   xmm4,xmm6               ; xmm4=data0=(00 10 20 30)
+        paddd   xmm7,xmm2               ; xmm7=data1=(01 11 21 31)
+        psubd   xmm5,xmm6               ; xmm5=data3=(03 13 23 33)
+        psubd   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
 
-	paddd	xmm4,xmm1
-	paddd	xmm7,xmm1
-	psrad	xmm4,DESCALE_P2_4
-	psrad	xmm7,DESCALE_P2_4
-	paddd	xmm5,xmm1
-	paddd	xmm3,xmm1
-	psrad	xmm5,DESCALE_P2_4
-	psrad	xmm3,DESCALE_P2_4
+        paddd   xmm4,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm4,DESCALE_P2_4
+        psrad   xmm7,DESCALE_P2_4
+        paddd   xmm5,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm5,DESCALE_P2_4
+        psrad   xmm3,DESCALE_P2_4
 
-	packssdw  xmm4,xmm3		; xmm4=(00 10 20 30 02 12 22 32)
-	packssdw  xmm7,xmm5		; xmm7=(01 11 21 31 03 13 23 33)
+        packssdw  xmm4,xmm3             ; xmm4=(00 10 20 30 02 12 22 32)
+        packssdw  xmm7,xmm5             ; xmm7=(01 11 21 31 03 13 23 33)
 
-	movdqa    xmm0,xmm4		; transpose coefficients(phase 1)
-	punpcklwd xmm4,xmm7		; xmm4=(00 01 10 11 20 21 30 31)
-	punpckhwd xmm0,xmm7		; xmm0=(02 03 12 13 22 23 32 33)
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm7             ; xmm4=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm0,xmm7             ; xmm0=(02 03 12 13 22 23 32 33)
 
-	movdqa    xmm6,xmm4		; transpose coefficients(phase 2)
-	punpckldq xmm4,xmm0		; xmm4=(00 01 02 03 10 11 12 13)
-	punpckhdq xmm6,xmm0		; xmm6=(20 21 22 23 30 31 32 33)
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm0             ; xmm4=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm6,xmm0             ; xmm6=(20 21 22 23 30 31 32 33)
 
-	packsswb  xmm4,xmm6		; xmm4=(00 01 02 03 10 11 12 13 20 ..)
-	paddb     xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)]
+        packsswb  xmm4,xmm6             ; xmm4=(00 01 02 03 10 11 12 13 20 ..)
+        paddb     xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)]
 
-	pshufd    xmm2,xmm4,0x39	; xmm2=(10 11 12 13 20 21 22 23 30 ..)
-	pshufd    xmm1,xmm4,0x4E	; xmm1=(20 21 22 23 30 31 32 33 00 ..)
-	pshufd    xmm3,xmm4,0x93	; xmm3=(30 31 32 33 00 01 02 03 10 ..)
+        pshufd    xmm2,xmm4,0x39        ; xmm2=(10 11 12 13 20 21 22 23 30 ..)
+        pshufd    xmm1,xmm4,0x4E        ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
+        pshufd    xmm3,xmm4,0x93        ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	movd	XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
-	movd	XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
-	mov	edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movd	XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
-	movd	XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+        movd    XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
+        movd    XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; unused
-	poppic	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 
 ; --------------------------------------------------------------------------
@@ -418,177 +418,177 @@
 ;                      JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; void * dct_table
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-	align	16
-	global	EXTN(jsimd_idct_2x2_sse2)
+        align   16
+        global  EXTN(jsimd_idct_2x2_sse2)
 
 EXTN(jsimd_idct_2x2_sse2):
-	push	ebp
-	mov	ebp,esp
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input.
+        ; ---- Pass 1: process columns from input.
 
-	mov	edx, POINTER [dct_table(ebp)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(ebp)]		; inptr
+        mov     edx, POINTER [dct_table(ebp)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
 
-	; | input:                  | result:        |
-	; | 00 01 ** 03 ** 05 ** 07 |                |
-	; | 10 11 ** 13 ** 15 ** 17 |                |
-	; | ** ** ** ** ** ** ** ** |                |
-	; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
-	; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
-	; | 50 51 ** 53 ** 55 ** 57 |                |
-	; | ** ** ** ** ** ** ** ** |                |
-	; | 70 71 ** 73 ** 75 ** 77 |                |
+        ; | input:                  | result:        |
+        ; | 00 01 ** 03 ** 05 ** 07 |                |
+        ; | 10 11 ** 13 ** 15 ** 17 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+        ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+        ; | 50 51 ** 53 ** 55 ** 57 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 70 71 ** 73 ** 75 ** 77 |                |
 
-	; -- Odd part
+        ; -- Odd part
 
-	movdqa	xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	movdqa	xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movdqa	xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
-	pmullw	xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
-	; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
+        ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
+        ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
 
-	pcmpeqd   xmm7,xmm7
-	pslld     xmm7,WORD_BIT		; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
+        pcmpeqd   xmm7,xmm7
+        pslld     xmm7,WORD_BIT         ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
 
-	movdqa    xmm4,xmm0		; xmm4=(10 11 ** 13 ** 15 ** 17)
-	movdqa    xmm5,xmm2		; xmm5=(50 51 ** 53 ** 55 ** 57)
-	punpcklwd xmm4,xmm1		; xmm4=(10 30 11 31 ** ** 13 33)
-	punpcklwd xmm5,xmm3		; xmm5=(50 70 51 71 ** ** 53 73)
-	pmaddwd   xmm4,[GOTOFF(ebx,PW_F362_MF127)]
-	pmaddwd   xmm5,[GOTOFF(ebx,PW_F085_MF072)]
+        movdqa    xmm4,xmm0             ; xmm4=(10 11 ** 13 ** 15 ** 17)
+        movdqa    xmm5,xmm2             ; xmm5=(50 51 ** 53 ** 55 ** 57)
+        punpcklwd xmm4,xmm1             ; xmm4=(10 30 11 31 ** ** 13 33)
+        punpcklwd xmm5,xmm3             ; xmm5=(50 70 51 71 ** ** 53 73)
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F085_MF072)]
 
-	psrld	xmm0,WORD_BIT		; xmm0=(11 -- 13 -- 15 -- 17 --)
-	pand	xmm1,xmm7		; xmm1=(-- 31 -- 33 -- 35 -- 37)
-	psrld	xmm2,WORD_BIT		; xmm2=(51 -- 53 -- 55 -- 57 --)
-	pand	xmm3,xmm7		; xmm3=(-- 71 -- 73 -- 75 -- 77)
-	por	xmm0,xmm1		; xmm0=(11 31 13 33 15 35 17 37)
-	por	xmm2,xmm3		; xmm2=(51 71 53 73 55 75 57 77)
-	pmaddwd	xmm0,[GOTOFF(ebx,PW_F362_MF127)]
-	pmaddwd	xmm2,[GOTOFF(ebx,PW_F085_MF072)]
+        psrld   xmm0,WORD_BIT           ; xmm0=(11 -- 13 -- 15 -- 17 --)
+        pand    xmm1,xmm7               ; xmm1=(-- 31 -- 33 -- 35 -- 37)
+        psrld   xmm2,WORD_BIT           ; xmm2=(51 -- 53 -- 55 -- 57 --)
+        pand    xmm3,xmm7               ; xmm3=(-- 71 -- 73 -- 75 -- 77)
+        por     xmm0,xmm1               ; xmm0=(11 31 13 33 15 35 17 37)
+        por     xmm2,xmm3               ; xmm2=(51 71 53 73 55 75 57 77)
+        pmaddwd xmm0,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd xmm2,[GOTOFF(ebx,PW_F085_MF072)]
 
-	paddd	xmm4,xmm5		; xmm4=tmp0[col0 col1 **** col3]
-	paddd	xmm0,xmm2		; xmm0=tmp0[col1 col3 col5 col7]
+        paddd   xmm4,xmm5               ; xmm4=tmp0[col0 col1 **** col3]
+        paddd   xmm0,xmm2               ; xmm0=tmp0[col1 col3 col5 col7]
 
-	; -- Even part
+        ; -- Even part
 
-	movdqa	xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	pmullw	xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
 
-	; xmm6=(00 01 ** 03 ** 05 ** 07)
+        ; xmm6=(00 01 ** 03 ** 05 ** 07)
 
-	movdqa	xmm1,xmm6		; xmm1=(00 01 ** 03 ** 05 ** 07)
-	pslld	xmm6,WORD_BIT		; xmm6=(-- 00 -- ** -- ** -- **)
-	pand	xmm1,xmm7		; xmm1=(-- 01 -- 03 -- 05 -- 07)
-	psrad	xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
-	psrad	xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
+        movdqa  xmm1,xmm6               ; xmm1=(00 01 ** 03 ** 05 ** 07)
+        pslld   xmm6,WORD_BIT           ; xmm6=(-- 00 -- ** -- ** -- **)
+        pand    xmm1,xmm7               ; xmm1=(-- 01 -- 03 -- 05 -- 07)
+        psrad   xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
+        psrad   xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa	xmm3,xmm6
-	movdqa	xmm5,xmm1
-	paddd	xmm6,xmm4	; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
-	paddd	xmm1,xmm0	; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
-	psubd	xmm3,xmm4	; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
-	psubd	xmm5,xmm0	; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
+        movdqa  xmm3,xmm6
+        movdqa  xmm5,xmm1
+        paddd   xmm6,xmm4       ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
+        paddd   xmm1,xmm0       ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
+        psubd   xmm3,xmm4       ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
+        psubd   xmm5,xmm0       ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
 
-	movdqa	xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)]	; xmm2=[PD_DESCALE_P1_2]
+        movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)]      ; xmm2=[PD_DESCALE_P1_2]
 
-	punpckldq  xmm6,xmm3		; xmm6=(A0 B0 ** **)
+        punpckldq  xmm6,xmm3            ; xmm6=(A0 B0 ** **)
 
-	movdqa     xmm7,xmm1
-	punpcklqdq xmm1,xmm5		; xmm1=(A1 A3 B1 B3)
-	punpckhqdq xmm7,xmm5		; xmm7=(A5 A7 B5 B7)
+        movdqa     xmm7,xmm1
+        punpcklqdq xmm1,xmm5            ; xmm1=(A1 A3 B1 B3)
+        punpckhqdq xmm7,xmm5            ; xmm7=(A5 A7 B5 B7)
 
-	paddd	xmm6,xmm2
-	psrad	xmm6,DESCALE_P1_2
+        paddd   xmm6,xmm2
+        psrad   xmm6,DESCALE_P1_2
 
-	paddd	xmm1,xmm2
-	paddd	xmm7,xmm2
-	psrad	xmm1,DESCALE_P1_2
-	psrad	xmm7,DESCALE_P1_2
+        paddd   xmm1,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm1,DESCALE_P1_2
+        psrad   xmm7,DESCALE_P1_2
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
-	prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows, store into output array.
+        ; ---- Pass 2: process rows, store into output array.
 
-	mov	edi, JSAMPARRAY [output_buf(ebp)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(ebp)]
+        mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(ebp)]
 
-	; | input:| result:|
-	; | A0 B0 |        |
-	; | A1 B1 | C0 C1  |
-	; | A3 B3 | D0 D1  |
-	; | A5 B5 |        |
-	; | A7 B7 |        |
+        ; | input:| result:|
+        ; | A0 B0 |        |
+        ; | A1 B1 | C0 C1  |
+        ; | A3 B3 | D0 D1  |
+        ; | A5 B5 |        |
+        ; | A7 B7 |        |
 
-	; -- Odd part
+        ; -- Odd part
 
-	packssdw  xmm1,xmm1		; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
-	packssdw  xmm7,xmm7		; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
-	pmaddwd   xmm1,[GOTOFF(ebx,PW_F362_MF127)]
-	pmaddwd   xmm7,[GOTOFF(ebx,PW_F085_MF072)]
+        packssdw  xmm1,xmm1             ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
+        packssdw  xmm7,xmm7             ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_F085_MF072)]
 
-	paddd     xmm1,xmm7		; xmm1=tmp0[row0 row1 row0 row1]
+        paddd     xmm1,xmm7             ; xmm1=tmp0[row0 row1 row0 row1]
 
-	; -- Even part
+        ; -- Even part
 
-	pslld     xmm6,(CONST_BITS+2)	; xmm6=tmp10[row0 row1 **** ****]
+        pslld     xmm6,(CONST_BITS+2)   ; xmm6=tmp10[row0 row1 **** ****]
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	movdqa    xmm4,xmm6
-	paddd     xmm6,xmm1	; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
-	psubd     xmm4,xmm1	; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
+        movdqa    xmm4,xmm6
+        paddd     xmm6,xmm1     ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
+        psubd     xmm4,xmm1     ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
 
-	punpckldq xmm6,xmm4	; xmm6=(C0 D0 C1 D1)
+        punpckldq xmm6,xmm4     ; xmm6=(C0 D0 C1 D1)
 
-	paddd     xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)]
-	psrad     xmm6,DESCALE_P2_2
+        paddd     xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)]
+        psrad     xmm6,DESCALE_P2_2
 
-	packssdw  xmm6,xmm6		; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
-	packsswb  xmm6,xmm6		; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
-	paddb     xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)]
+        packssdw  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
+        packsswb  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
+        paddb     xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)]
 
-	pextrw	ebx,xmm6,0x00		; ebx=(C0 D0 -- --)
-	pextrw	ecx,xmm6,0x01		; ecx=(C1 D1 -- --)
+        pextrw  ebx,xmm6,0x00           ; ebx=(C0 D0 -- --)
+        pextrw  ecx,xmm6,0x01           ; ecx=(C1 D1 -- --)
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	mov	WORD [edx+eax*SIZEOF_JSAMPLE], bx
-	mov	WORD [esi+eax*SIZEOF_JSAMPLE], cx
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     WORD [edx+eax*SIZEOF_JSAMPLE], bx
+        mov     WORD [esi+eax*SIZEOF_JSAMPLE], cx
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jisseflt.asm b/simd/jisseflt.asm
index d6147c1..8b81355 100644
--- a/simd/jisseflt.asm
+++ b/simd/jisseflt.asm
@@ -25,34 +25,34 @@
 
 ; --------------------------------------------------------------------------
 
-%macro	unpcklps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
-	shufps	%1,%2,0x44
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
 %endmacro
 
-%macro	unpckhps2 2	; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
-	shufps	%1,%2,0xEE
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 
-	alignz	16
-	global	EXTN(jconst_idct_float_sse)
+        alignz  16
+        global  EXTN(jconst_idct_float_sse)
 
 EXTN(jconst_idct_float_sse):
 
-PD_1_414	times 4 dd  1.414213562373095048801689
-PD_1_847	times 4 dd  1.847759065022573512256366
-PD_1_082	times 4 dd  1.082392200292393968799446
-PD_M2_613	times 4 dd -2.613125929752753055713286
-PD_0_125	times 4 dd  0.125	; 1/8
-PB_CENTERJSAMP	times 8 db  CENTERJSAMPLE
+PD_1_414        times 4 dd  1.414213562373095048801689
+PD_1_847        times 4 dd  1.847759065022573512256366
+PD_1_082        times 4 dd  1.082392200292393968799446
+PD_M2_613       times 4 dd -2.613125929752753055713286
+PD_0_125        times 4 dd  0.125       ; 1/8
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
 
-	alignz	16
+        alignz  16
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Perform dequantization and inverse DCT on one block of coefficients.
 ;
@@ -61,512 +61,512 @@
 ;                       JSAMPARRAY output_buf, JDIMENSION output_col)
 ;
 
-%define dct_table(b)	(b)+8			; void * dct_table
-%define coef_block(b)	(b)+12		; JCOEFPTR coef_block
-%define output_buf(b)	(b)+16		; JSAMPARRAY output_buf
-%define output_col(b)	(b)+20		; JDIMENSION output_col
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
 
-%define original_ebp	ebp+0
-%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
-%define WK_NUM		2
-%define workspace	wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
-					; FAST_FLOAT workspace[DCTSIZE2]
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
 
-	align	16
-	global	EXTN(jsimd_idct_float_sse)
+        align   16
+        global  EXTN(jsimd_idct_float_sse)
 
 EXTN(jsimd_idct_float_sse):
-	push	ebp
-	mov	eax,esp				; eax = original ebp
-	sub	esp, byte 4
-	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
-	mov	[esp],eax
-	mov	ebp,esp				; ebp = aligned ebp
-	lea	esp, [workspace]
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-	push	esi
-	push	edi
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
 
-	get_GOT	ebx		; get GOT address
+        get_GOT ebx             ; get GOT address
 
-	; ---- Pass 1: process columns from input, store into work array.
+        ; ---- Pass 1: process columns from input, store into work array.
 
-;	mov	eax, [original_ebp]
-	mov	edx, POINTER [dct_table(eax)]	; quantptr
-	mov	esi, JCOEFPTR [coef_block(eax)]		; inptr
-	lea	edi, [workspace]			; FAST_FLOAT * wsptr
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
-	mov	eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	or	eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	jnz	near .columnDCT
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
 
-	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	por	mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
-	por	mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
-	por	mm1,mm0
-	packsswb mm1,mm1
-	movd	eax,mm1
-	test	eax,eax
-	jnz	short .columnDCT
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm1,mm0
+        packsswb mm1,mm1
+        movd    eax,mm1
+        test    eax,eax
+        jnz     short .columnDCT
 
-	; -- AC terms all zero
+        ; -- AC terms all zero
 
-	movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
 
-	punpckhwd mm1,mm0			; mm1=(** 02 ** 03)
-	punpcklwd mm0,mm0			; mm0=(00 00 01 01)
-	psrad     mm1,(DWORD_BIT-WORD_BIT)	; mm1=in0H=(02 03)
-	psrad     mm0,(DWORD_BIT-WORD_BIT)	; mm0=in0L=(00 01)
-	cvtpi2ps  xmm3,mm1			; xmm3=(02 03 ** **)
-	cvtpi2ps  xmm0,mm0			; xmm0=(00 01 ** **)
-	movlhps   xmm0,xmm3			; xmm0=in0=(00 01 02 03)
+        punpckhwd mm1,mm0                       ; mm1=(** 02 ** 03)
+        punpcklwd mm0,mm0                       ; mm0=(00 00 01 01)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in0H=(02 03)
+        psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in0L=(00 01)
+        cvtpi2ps  xmm3,mm1                      ; xmm3=(02 03 ** **)
+        cvtpi2ps  xmm0,mm0                      ; xmm0=(00 01 ** **)
+        movlhps   xmm0,xmm3                     ; xmm0=in0=(00 01 02 03)
 
-	mulps	xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm1,xmm0
-	movaps	xmm2,xmm0
-	movaps	xmm3,xmm0
+        movaps  xmm1,xmm0
+        movaps  xmm2,xmm0
+        movaps  xmm3,xmm0
 
-	shufps	xmm0,xmm0,0x00			; xmm0=(00 00 00 00)
-	shufps	xmm1,xmm1,0x55			; xmm1=(01 01 01 01)
-	shufps	xmm2,xmm2,0xAA			; xmm2=(02 02 02 02)
-	shufps	xmm3,xmm3,0xFF			; xmm3=(03 03 03 03)
+        shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+        shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+        shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+        shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
-	movaps	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
-	movaps	XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
-	jmp	near .nextcolumn
-	alignx	16,7
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+        jmp     near .nextcolumn
+        alignx  16,7
 %endif
 .columnDCT:
 
-	; -- Even part
+        ; -- Even part
 
-	movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
-	movq      mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
-	movq      mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
-	movq      mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq      mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq      mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq      mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
 
-	punpckhwd mm4,mm0			; mm4=(** 02 ** 03)
-	punpcklwd mm0,mm0			; mm0=(00 00 01 01)
-	punpckhwd mm5,mm1			; mm5=(** 22 ** 23)
-	punpcklwd mm1,mm1			; mm1=(20 20 21 21)
+        punpckhwd mm4,mm0                       ; mm4=(** 02 ** 03)
+        punpcklwd mm0,mm0                       ; mm0=(00 00 01 01)
+        punpckhwd mm5,mm1                       ; mm5=(** 22 ** 23)
+        punpcklwd mm1,mm1                       ; mm1=(20 20 21 21)
 
-	psrad     mm4,(DWORD_BIT-WORD_BIT)	; mm4=in0H=(02 03)
-	psrad     mm0,(DWORD_BIT-WORD_BIT)	; mm0=in0L=(00 01)
-	cvtpi2ps  xmm4,mm4			; xmm4=(02 03 ** **)
-	cvtpi2ps  xmm0,mm0			; xmm0=(00 01 ** **)
-	psrad     mm5,(DWORD_BIT-WORD_BIT)	; mm5=in2H=(22 23)
-	psrad     mm1,(DWORD_BIT-WORD_BIT)	; mm1=in2L=(20 21)
-	cvtpi2ps  xmm5,mm5			; xmm5=(22 23 ** **)
-	cvtpi2ps  xmm1,mm1			; xmm1=(20 21 ** **)
+        psrad     mm4,(DWORD_BIT-WORD_BIT)      ; mm4=in0H=(02 03)
+        psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in0L=(00 01)
+        cvtpi2ps  xmm4,mm4                      ; xmm4=(02 03 ** **)
+        cvtpi2ps  xmm0,mm0                      ; xmm0=(00 01 ** **)
+        psrad     mm5,(DWORD_BIT-WORD_BIT)      ; mm5=in2H=(22 23)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in2L=(20 21)
+        cvtpi2ps  xmm5,mm5                      ; xmm5=(22 23 ** **)
+        cvtpi2ps  xmm1,mm1                      ; xmm1=(20 21 ** **)
 
-	punpckhwd mm6,mm2			; mm6=(** 42 ** 43)
-	punpcklwd mm2,mm2			; mm2=(40 40 41 41)
-	punpckhwd mm7,mm3			; mm7=(** 62 ** 63)
-	punpcklwd mm3,mm3			; mm3=(60 60 61 61)
+        punpckhwd mm6,mm2                       ; mm6=(** 42 ** 43)
+        punpcklwd mm2,mm2                       ; mm2=(40 40 41 41)
+        punpckhwd mm7,mm3                       ; mm7=(** 62 ** 63)
+        punpcklwd mm3,mm3                       ; mm3=(60 60 61 61)
 
-	psrad     mm6,(DWORD_BIT-WORD_BIT)	; mm6=in4H=(42 43)
-	psrad     mm2,(DWORD_BIT-WORD_BIT)	; mm2=in4L=(40 41)
-	cvtpi2ps  xmm6,mm6			; xmm6=(42 43 ** **)
-	cvtpi2ps  xmm2,mm2			; xmm2=(40 41 ** **)
-	psrad     mm7,(DWORD_BIT-WORD_BIT)	; mm7=in6H=(62 63)
-	psrad     mm3,(DWORD_BIT-WORD_BIT)	; mm3=in6L=(60 61)
-	cvtpi2ps  xmm7,mm7			; xmm7=(62 63 ** **)
-	cvtpi2ps  xmm3,mm3			; xmm3=(60 61 ** **)
+        psrad     mm6,(DWORD_BIT-WORD_BIT)      ; mm6=in4H=(42 43)
+        psrad     mm2,(DWORD_BIT-WORD_BIT)      ; mm2=in4L=(40 41)
+        cvtpi2ps  xmm6,mm6                      ; xmm6=(42 43 ** **)
+        cvtpi2ps  xmm2,mm2                      ; xmm2=(40 41 ** **)
+        psrad     mm7,(DWORD_BIT-WORD_BIT)      ; mm7=in6H=(62 63)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)      ; mm3=in6L=(60 61)
+        cvtpi2ps  xmm7,mm7                      ; xmm7=(62 63 ** **)
+        cvtpi2ps  xmm3,mm3                      ; xmm3=(60 61 ** **)
 
-	movlhps   xmm0,xmm4			; xmm0=in0=(00 01 02 03)
-	movlhps   xmm1,xmm5			; xmm1=in2=(20 21 22 23)
-	mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        movlhps   xmm0,xmm4                     ; xmm0=in0=(00 01 02 03)
+        movlhps   xmm1,xmm5                     ; xmm1=in2=(20 21 22 23)
+        mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movlhps   xmm2,xmm6			; xmm2=in4=(40 41 42 43)
-	movlhps   xmm3,xmm7			; xmm3=in6=(60 61 62 63)
-	mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        movlhps   xmm2,xmm6                     ; xmm2=in4=(40 41 42 43)
+        movlhps   xmm3,xmm7                     ; xmm3=in6=(60 61 62 63)
+        mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm4,xmm0
-	movaps	xmm5,xmm1
-	subps	xmm0,xmm2		; xmm0=tmp11
-	subps	xmm1,xmm3
-	addps	xmm4,xmm2		; xmm4=tmp10
-	addps	xmm5,xmm3		; xmm5=tmp13
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
 
-	mulps	xmm1,[GOTOFF(ebx,PD_1_414)]
-	subps	xmm1,xmm5		; xmm1=tmp12
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
 
-	movaps	xmm6,xmm4
-	movaps	xmm7,xmm0
-	subps	xmm4,xmm5		; xmm4=tmp3
-	subps	xmm0,xmm1		; xmm0=tmp2
-	addps	xmm6,xmm5		; xmm6=tmp0
-	addps	xmm7,xmm1		; xmm7=tmp1
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
 
-	movaps	XMMWORD [wk(1)], xmm4	; tmp3
-	movaps	XMMWORD [wk(0)], xmm0	; tmp2
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movq      mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
-	movq      mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
-	movq      mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
-	movq      mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        movq      mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq      mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq      mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq      mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
 
-	punpckhwd mm6,mm4			; mm6=(** 12 ** 13)
-	punpcklwd mm4,mm4			; mm4=(10 10 11 11)
-	punpckhwd mm2,mm0			; mm2=(** 32 ** 33)
-	punpcklwd mm0,mm0			; mm0=(30 30 31 31)
+        punpckhwd mm6,mm4                       ; mm6=(** 12 ** 13)
+        punpcklwd mm4,mm4                       ; mm4=(10 10 11 11)
+        punpckhwd mm2,mm0                       ; mm2=(** 32 ** 33)
+        punpcklwd mm0,mm0                       ; mm0=(30 30 31 31)
 
-	psrad     mm6,(DWORD_BIT-WORD_BIT)	; mm6=in1H=(12 13)
-	psrad     mm4,(DWORD_BIT-WORD_BIT)	; mm4=in1L=(10 11)
-	cvtpi2ps  xmm4,mm6			; xmm4=(12 13 ** **)
-	cvtpi2ps  xmm2,mm4			; xmm2=(10 11 ** **)
-	psrad     mm2,(DWORD_BIT-WORD_BIT)	; mm2=in3H=(32 33)
-	psrad     mm0,(DWORD_BIT-WORD_BIT)	; mm0=in3L=(30 31)
-	cvtpi2ps  xmm0,mm2			; xmm0=(32 33 ** **)
-	cvtpi2ps  xmm3,mm0			; xmm3=(30 31 ** **)
+        psrad     mm6,(DWORD_BIT-WORD_BIT)      ; mm6=in1H=(12 13)
+        psrad     mm4,(DWORD_BIT-WORD_BIT)      ; mm4=in1L=(10 11)
+        cvtpi2ps  xmm4,mm6                      ; xmm4=(12 13 ** **)
+        cvtpi2ps  xmm2,mm4                      ; xmm2=(10 11 ** **)
+        psrad     mm2,(DWORD_BIT-WORD_BIT)      ; mm2=in3H=(32 33)
+        psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in3L=(30 31)
+        cvtpi2ps  xmm0,mm2                      ; xmm0=(32 33 ** **)
+        cvtpi2ps  xmm3,mm0                      ; xmm3=(30 31 ** **)
 
-	punpckhwd mm7,mm5			; mm7=(** 52 ** 53)
-	punpcklwd mm5,mm5			; mm5=(50 50 51 51)
-	punpckhwd mm3,mm1			; mm3=(** 72 ** 73)
-	punpcklwd mm1,mm1			; mm1=(70 70 71 71)
+        punpckhwd mm7,mm5                       ; mm7=(** 52 ** 53)
+        punpcklwd mm5,mm5                       ; mm5=(50 50 51 51)
+        punpckhwd mm3,mm1                       ; mm3=(** 72 ** 73)
+        punpcklwd mm1,mm1                       ; mm1=(70 70 71 71)
 
-	movlhps   xmm2,xmm4			; xmm2=in1=(10 11 12 13)
-	movlhps   xmm3,xmm0			; xmm3=in3=(30 31 32 33)
+        movlhps   xmm2,xmm4                     ; xmm2=in1=(10 11 12 13)
+        movlhps   xmm3,xmm0                     ; xmm3=in3=(30 31 32 33)
 
-	psrad     mm7,(DWORD_BIT-WORD_BIT)	; mm7=in5H=(52 53)
-	psrad     mm5,(DWORD_BIT-WORD_BIT)	; mm5=in5L=(50 51)
-	cvtpi2ps  xmm4,mm7			; xmm4=(52 53 ** **)
-	cvtpi2ps  xmm5,mm5			; xmm5=(50 51 ** **)
-	psrad     mm3,(DWORD_BIT-WORD_BIT)	; mm3=in7H=(72 73)
-	psrad     mm1,(DWORD_BIT-WORD_BIT)	; mm1=in7L=(70 71)
-	cvtpi2ps  xmm0,mm3			; xmm0=(72 73 ** **)
-	cvtpi2ps  xmm1,mm1			; xmm1=(70 71 ** **)
+        psrad     mm7,(DWORD_BIT-WORD_BIT)      ; mm7=in5H=(52 53)
+        psrad     mm5,(DWORD_BIT-WORD_BIT)      ; mm5=in5L=(50 51)
+        cvtpi2ps  xmm4,mm7                      ; xmm4=(52 53 ** **)
+        cvtpi2ps  xmm5,mm5                      ; xmm5=(50 51 ** **)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)      ; mm3=in7H=(72 73)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in7L=(70 71)
+        cvtpi2ps  xmm0,mm3                      ; xmm0=(72 73 ** **)
+        cvtpi2ps  xmm1,mm1                      ; xmm1=(70 71 ** **)
 
-	mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movlhps   xmm5,xmm4			; xmm5=in5=(50 51 52 53)
-	movlhps   xmm1,xmm0			; xmm1=in7=(70 71 72 73)
-	mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
-	mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        movlhps   xmm5,xmm4                     ; xmm5=in5=(50 51 52 53)
+        movlhps   xmm1,xmm0                     ; xmm1=in7=(70 71 72 73)
+        mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
 
-	movaps	xmm4,xmm2
-	movaps	xmm0,xmm5
-	addps	xmm2,xmm1		; xmm2=z11
-	addps	xmm5,xmm3		; xmm5=z13
-	subps	xmm4,xmm1		; xmm4=z12
-	subps	xmm0,xmm3		; xmm0=z10
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
 
-	movaps	xmm1,xmm2
-	subps	xmm2,xmm5
-	addps	xmm1,xmm5		; xmm1=tmp7
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
 
-	mulps	xmm2,[GOTOFF(ebx,PD_1_414)]	; xmm2=tmp11
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
 
-	movaps	xmm3,xmm0
-	addps	xmm0,xmm4
-	mulps	xmm0,[GOTOFF(ebx,PD_1_847)]	; xmm0=z5
-	mulps	xmm3,[GOTOFF(ebx,PD_M2_613)]	; xmm3=(z10 * -2.613125930)
-	mulps	xmm4,[GOTOFF(ebx,PD_1_082)]	; xmm4=(z12 * 1.082392200)
-	addps	xmm3,xmm0		; xmm3=tmp12
-	subps	xmm4,xmm0		; xmm4=tmp10
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	subps	xmm3,xmm1		; xmm3=tmp6
-	movaps	xmm5,xmm6
-	movaps	xmm0,xmm7
-	addps	xmm6,xmm1		; xmm6=data0=(00 01 02 03)
-	addps	xmm7,xmm3		; xmm7=data1=(10 11 12 13)
-	subps	xmm5,xmm1		; xmm5=data7=(70 71 72 73)
-	subps	xmm0,xmm3		; xmm0=data6=(60 61 62 63)
-	subps	xmm2,xmm3		; xmm2=tmp5
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+        addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+        subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+        subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+        subps   xmm2,xmm3               ; xmm2=tmp5
 
-	movaps    xmm1,xmm6		; transpose coefficients(phase 1)
-	unpcklps  xmm6,xmm7		; xmm6=(00 10 01 11)
-	unpckhps  xmm1,xmm7		; xmm1=(02 12 03 13)
-	movaps    xmm3,xmm0		; transpose coefficients(phase 1)
-	unpcklps  xmm0,xmm5		; xmm0=(60 70 61 71)
-	unpckhps  xmm3,xmm5		; xmm3=(62 72 63 73)
+        movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+        unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+        unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+        movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+        unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+        unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
 
-	movaps	xmm7, XMMWORD [wk(0)]	; xmm7=tmp2
-	movaps	xmm5, XMMWORD [wk(1)]	; xmm5=tmp3
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
 
-	movaps	XMMWORD [wk(0)], xmm0	; wk(0)=(60 70 61 71)
-	movaps	XMMWORD [wk(1)], xmm3	; wk(1)=(62 72 63 73)
+        movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+        movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
 
-	addps	xmm4,xmm2		; xmm4=tmp4
-	movaps	xmm0,xmm7
-	movaps	xmm3,xmm5
-	addps	xmm7,xmm2		; xmm7=data2=(20 21 22 23)
-	addps	xmm5,xmm4		; xmm5=data4=(40 41 42 43)
-	subps	xmm0,xmm2		; xmm0=data5=(50 51 52 53)
-	subps	xmm3,xmm4		; xmm3=data3=(30 31 32 33)
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm0,xmm7
+        movaps  xmm3,xmm5
+        addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+        addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+        subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+        subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
 
-	movaps    xmm2,xmm7		; transpose coefficients(phase 1)
-	unpcklps  xmm7,xmm3		; xmm7=(20 30 21 31)
-	unpckhps  xmm2,xmm3		; xmm2=(22 32 23 33)
-	movaps    xmm4,xmm5		; transpose coefficients(phase 1)
-	unpcklps  xmm5,xmm0		; xmm5=(40 50 41 51)
-	unpckhps  xmm4,xmm0		; xmm4=(42 52 43 53)
+        movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+        unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+        unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+        movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+        unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+        unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
 
-	movaps    xmm3,xmm6		; transpose coefficients(phase 2)
-	unpcklps2 xmm6,xmm7		; xmm6=(00 10 20 30)
-	unpckhps2 xmm3,xmm7		; xmm3=(01 11 21 31)
-	movaps    xmm0,xmm1		; transpose coefficients(phase 2)
-	unpcklps2 xmm1,xmm2		; xmm1=(02 12 22 32)
-	unpckhps2 xmm0,xmm2		; xmm0=(03 13 23 33)
+        movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+        unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+        movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+        unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
 
-	movaps	xmm7, XMMWORD [wk(0)]	; xmm7=(60 70 61 71)
-	movaps	xmm2, XMMWORD [wk(1)]	; xmm2=(62 72 63 73)
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+        movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
 
-	movaps	XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
-	movaps	XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
-	movaps	XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
-	movaps	XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
 
-	movaps    xmm6,xmm5		; transpose coefficients(phase 2)
-	unpcklps2 xmm5,xmm7		; xmm5=(40 50 60 70)
-	unpckhps2 xmm6,xmm7		; xmm6=(41 51 61 71)
-	movaps    xmm3,xmm4		; transpose coefficients(phase 2)
-	unpcklps2 xmm4,xmm2		; xmm4=(42 52 62 72)
-	unpckhps2 xmm3,xmm2		; xmm3=(43 53 63 73)
+        movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+        unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+        unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+        movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+        unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
 
-	movaps	XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
-	movaps	XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
-	movaps	XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
-	movaps	XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
 
 .nextcolumn:
-	add	esi, byte 4*SIZEOF_JCOEF		; coef_block
-	add	edx, byte 4*SIZEOF_FLOAT_MULT_TYPE	; quantptr
-	add	edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT	; wsptr
-	dec	ecx					; ctr
-	jnz	near .columnloop
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
 
-	; -- Prefetch the next coefficient block
+        ; -- Prefetch the next coefficient block
 
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
-	prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
 
-	; ---- Pass 2: process rows from work array, store into output array.
+        ; ---- Pass 2: process rows from work array, store into output array.
 
-	mov	eax, [original_ebp]
-	lea	esi, [workspace]			; FAST_FLOAT * wsptr
-	mov	edi, JSAMPARRAY [output_buf(eax)]	; (JSAMPROW *)
-	mov	eax, JDIMENSION [output_col(eax)]
-	mov	ecx, DCTSIZE/4				; ctr
-	alignx	16,7
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
 .rowloop:
 
-	; -- Even part
+        ; -- Even part
 
-	movaps	xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
 
-	movaps	xmm4,xmm0
-	movaps	xmm5,xmm1
-	subps	xmm0,xmm2		; xmm0=tmp11
-	subps	xmm1,xmm3
-	addps	xmm4,xmm2		; xmm4=tmp10
-	addps	xmm5,xmm3		; xmm5=tmp13
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
 
-	mulps	xmm1,[GOTOFF(ebx,PD_1_414)]
-	subps	xmm1,xmm5		; xmm1=tmp12
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
 
-	movaps	xmm6,xmm4
-	movaps	xmm7,xmm0
-	subps	xmm4,xmm5		; xmm4=tmp3
-	subps	xmm0,xmm1		; xmm0=tmp2
-	addps	xmm6,xmm5		; xmm6=tmp0
-	addps	xmm7,xmm1		; xmm7=tmp1
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
 
-	movaps	XMMWORD [wk(1)], xmm4	; tmp3
-	movaps	XMMWORD [wk(0)], xmm0	; tmp2
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
 
-	; -- Odd part
+        ; -- Odd part
 
-	movaps	xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
-	movaps	xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
 
-	movaps	xmm4,xmm2
-	movaps	xmm0,xmm5
-	addps	xmm2,xmm1		; xmm2=z11
-	addps	xmm5,xmm3		; xmm5=z13
-	subps	xmm4,xmm1		; xmm4=z12
-	subps	xmm0,xmm3		; xmm0=z10
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
 
-	movaps	xmm1,xmm2
-	subps	xmm2,xmm5
-	addps	xmm1,xmm5		; xmm1=tmp7
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
 
-	mulps	xmm2,[GOTOFF(ebx,PD_1_414)]	; xmm2=tmp11
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
 
-	movaps	xmm3,xmm0
-	addps	xmm0,xmm4
-	mulps	xmm0,[GOTOFF(ebx,PD_1_847)]	; xmm0=z5
-	mulps	xmm3,[GOTOFF(ebx,PD_M2_613)]	; xmm3=(z10 * -2.613125930)
-	mulps	xmm4,[GOTOFF(ebx,PD_1_082)]	; xmm4=(z12 * 1.082392200)
-	addps	xmm3,xmm0		; xmm3=tmp12
-	subps	xmm4,xmm0		; xmm4=tmp10
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
 
-	; -- Final output stage
+        ; -- Final output stage
 
-	subps	xmm3,xmm1		; xmm3=tmp6
-	movaps	xmm5,xmm6
-	movaps	xmm0,xmm7
-	addps	xmm6,xmm1		; xmm6=data0=(00 10 20 30)
-	addps	xmm7,xmm3		; xmm7=data1=(01 11 21 31)
-	subps	xmm5,xmm1		; xmm5=data7=(07 17 27 37)
-	subps	xmm0,xmm3		; xmm0=data6=(06 16 26 36)
-	subps	xmm2,xmm3		; xmm2=tmp5
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+        addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+        subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+        subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+        subps   xmm2,xmm3               ; xmm2=tmp5
 
-	movaps	xmm1,[GOTOFF(ebx,PD_0_125)]	; xmm1=[PD_0_125]
+        movaps  xmm1,[GOTOFF(ebx,PD_0_125)]     ; xmm1=[PD_0_125]
 
-	mulps	xmm6,xmm1		; descale(1/8)
-	mulps	xmm7,xmm1		; descale(1/8)
-	mulps	xmm5,xmm1		; descale(1/8)
-	mulps	xmm0,xmm1		; descale(1/8)
+        mulps   xmm6,xmm1               ; descale(1/8)
+        mulps   xmm7,xmm1               ; descale(1/8)
+        mulps   xmm5,xmm1               ; descale(1/8)
+        mulps   xmm0,xmm1               ; descale(1/8)
 
-	movhlps   xmm3,xmm6
-	movhlps   xmm1,xmm7
-	cvtps2pi  mm0,xmm6		; round to int32, mm0=data0L=(00 10)
-	cvtps2pi  mm1,xmm7		; round to int32, mm1=data1L=(01 11)
-	cvtps2pi  mm2,xmm3		; round to int32, mm2=data0H=(20 30)
-	cvtps2pi  mm3,xmm1		; round to int32, mm3=data1H=(21 31)
-	packssdw  mm0,mm2		; mm0=data0=(00 10 20 30)
-	packssdw  mm1,mm3		; mm1=data1=(01 11 21 31)
+        movhlps   xmm3,xmm6
+        movhlps   xmm1,xmm7
+        cvtps2pi  mm0,xmm6              ; round to int32, mm0=data0L=(00 10)
+        cvtps2pi  mm1,xmm7              ; round to int32, mm1=data1L=(01 11)
+        cvtps2pi  mm2,xmm3              ; round to int32, mm2=data0H=(20 30)
+        cvtps2pi  mm3,xmm1              ; round to int32, mm3=data1H=(21 31)
+        packssdw  mm0,mm2               ; mm0=data0=(00 10 20 30)
+        packssdw  mm1,mm3               ; mm1=data1=(01 11 21 31)
 
-	movhlps   xmm6,xmm5
-	movhlps   xmm7,xmm0
-	cvtps2pi  mm4,xmm5		; round to int32, mm4=data7L=(07 17)
-	cvtps2pi  mm5,xmm0		; round to int32, mm5=data6L=(06 16)
-	cvtps2pi  mm6,xmm6		; round to int32, mm6=data7H=(27 37)
-	cvtps2pi  mm7,xmm7		; round to int32, mm7=data6H=(26 36)
-	packssdw  mm4,mm6		; mm4=data7=(07 17 27 37)
-	packssdw  mm5,mm7		; mm5=data6=(06 16 26 36)
+        movhlps   xmm6,xmm5
+        movhlps   xmm7,xmm0
+        cvtps2pi  mm4,xmm5              ; round to int32, mm4=data7L=(07 17)
+        cvtps2pi  mm5,xmm0              ; round to int32, mm5=data6L=(06 16)
+        cvtps2pi  mm6,xmm6              ; round to int32, mm6=data7H=(27 37)
+        cvtps2pi  mm7,xmm7              ; round to int32, mm7=data6H=(26 36)
+        packssdw  mm4,mm6               ; mm4=data7=(07 17 27 37)
+        packssdw  mm5,mm7               ; mm5=data6=(06 16 26 36)
 
-	packsswb  mm0,mm5		; mm0=(00 10 20 30 06 16 26 36)
-	packsswb  mm1,mm4		; mm1=(01 11 21 31 07 17 27 37)
+        packsswb  mm0,mm5               ; mm0=(00 10 20 30 06 16 26 36)
+        packsswb  mm1,mm4               ; mm1=(01 11 21 31 07 17 27 37)
 
-	movaps	xmm3, XMMWORD [wk(0)]	; xmm3=tmp2
-	movaps	xmm1, XMMWORD [wk(1)]	; xmm1=tmp3
+        movaps  xmm3, XMMWORD [wk(0)]   ; xmm3=tmp2
+        movaps  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
 
-	movaps	xmm6,[GOTOFF(ebx,PD_0_125)]	; xmm6=[PD_0_125]
+        movaps  xmm6,[GOTOFF(ebx,PD_0_125)]     ; xmm6=[PD_0_125]
 
-	addps	xmm4,xmm2		; xmm4=tmp4
-	movaps	xmm5,xmm3
-	movaps	xmm0,xmm1
-	addps	xmm3,xmm2		; xmm3=data2=(02 12 22 32)
-	addps	xmm1,xmm4		; xmm1=data4=(04 14 24 34)
-	subps	xmm5,xmm2		; xmm5=data5=(05 15 25 35)
-	subps	xmm0,xmm4		; xmm0=data3=(03 13 23 33)
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm5,xmm3
+        movaps  xmm0,xmm1
+        addps   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
+        addps   xmm1,xmm4               ; xmm1=data4=(04 14 24 34)
+        subps   xmm5,xmm2               ; xmm5=data5=(05 15 25 35)
+        subps   xmm0,xmm4               ; xmm0=data3=(03 13 23 33)
 
-	mulps	xmm3,xmm6		; descale(1/8)
-	mulps	xmm1,xmm6		; descale(1/8)
-	mulps	xmm5,xmm6		; descale(1/8)
-	mulps	xmm0,xmm6		; descale(1/8)
+        mulps   xmm3,xmm6               ; descale(1/8)
+        mulps   xmm1,xmm6               ; descale(1/8)
+        mulps   xmm5,xmm6               ; descale(1/8)
+        mulps   xmm0,xmm6               ; descale(1/8)
 
-	movhlps   xmm7,xmm3
-	movhlps   xmm2,xmm1
-	cvtps2pi  mm2,xmm3		; round to int32, mm2=data2L=(02 12)
-	cvtps2pi  mm3,xmm1		; round to int32, mm3=data4L=(04 14)
-	cvtps2pi  mm6,xmm7		; round to int32, mm6=data2H=(22 32)
-	cvtps2pi  mm7,xmm2		; round to int32, mm7=data4H=(24 34)
-	packssdw  mm2,mm6		; mm2=data2=(02 12 22 32)
-	packssdw  mm3,mm7		; mm3=data4=(04 14 24 34)
+        movhlps   xmm7,xmm3
+        movhlps   xmm2,xmm1
+        cvtps2pi  mm2,xmm3              ; round to int32, mm2=data2L=(02 12)
+        cvtps2pi  mm3,xmm1              ; round to int32, mm3=data4L=(04 14)
+        cvtps2pi  mm6,xmm7              ; round to int32, mm6=data2H=(22 32)
+        cvtps2pi  mm7,xmm2              ; round to int32, mm7=data4H=(24 34)
+        packssdw  mm2,mm6               ; mm2=data2=(02 12 22 32)
+        packssdw  mm3,mm7               ; mm3=data4=(04 14 24 34)
 
-	movhlps   xmm4,xmm5
-	movhlps   xmm6,xmm0
-	cvtps2pi  mm5,xmm5		; round to int32, mm5=data5L=(05 15)
-	cvtps2pi  mm4,xmm0		; round to int32, mm4=data3L=(03 13)
-	cvtps2pi  mm6,xmm4		; round to int32, mm6=data5H=(25 35)
-	cvtps2pi  mm7,xmm6		; round to int32, mm7=data3H=(23 33)
-	packssdw  mm5,mm6		; mm5=data5=(05 15 25 35)
-	packssdw  mm4,mm7		; mm4=data3=(03 13 23 33)
+        movhlps   xmm4,xmm5
+        movhlps   xmm6,xmm0
+        cvtps2pi  mm5,xmm5              ; round to int32, mm5=data5L=(05 15)
+        cvtps2pi  mm4,xmm0              ; round to int32, mm4=data3L=(03 13)
+        cvtps2pi  mm6,xmm4              ; round to int32, mm6=data5H=(25 35)
+        cvtps2pi  mm7,xmm6              ; round to int32, mm7=data3H=(23 33)
+        packssdw  mm5,mm6               ; mm5=data5=(05 15 25 35)
+        packssdw  mm4,mm7               ; mm4=data3=(03 13 23 33)
 
-	movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]	; mm6=[PB_CENTERJSAMP]
+        movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm6=[PB_CENTERJSAMP]
 
-	packsswb  mm2,mm3		; mm2=(02 12 22 32 04 14 24 34)
-	packsswb  mm4,mm5		; mm4=(03 13 23 33 05 15 25 35)
+        packsswb  mm2,mm3               ; mm2=(02 12 22 32 04 14 24 34)
+        packsswb  mm4,mm5               ; mm4=(03 13 23 33 05 15 25 35)
 
-	paddb     mm0,mm6
-	paddb     mm1,mm6
-	paddb     mm2,mm6
-	paddb     mm4,mm6
+        paddb     mm0,mm6
+        paddb     mm1,mm6
+        paddb     mm2,mm6
+        paddb     mm4,mm6
 
-	movq      mm7,mm0		; transpose coefficients(phase 1)
-	punpcklbw mm0,mm1		; mm0=(00 01 10 11 20 21 30 31)
-	punpckhbw mm7,mm1		; mm7=(06 07 16 17 26 27 36 37)
-	movq      mm3,mm2		; transpose coefficients(phase 1)
-	punpcklbw mm2,mm4		; mm2=(02 03 12 13 22 23 32 33)
-	punpckhbw mm3,mm4		; mm3=(04 05 14 15 24 25 34 35)
+        movq      mm7,mm0               ; transpose coefficients(phase 1)
+        punpcklbw mm0,mm1               ; mm0=(00 01 10 11 20 21 30 31)
+        punpckhbw mm7,mm1               ; mm7=(06 07 16 17 26 27 36 37)
+        movq      mm3,mm2               ; transpose coefficients(phase 1)
+        punpcklbw mm2,mm4               ; mm2=(02 03 12 13 22 23 32 33)
+        punpckhbw mm3,mm4               ; mm3=(04 05 14 15 24 25 34 35)
 
-	movq      mm5,mm0		; transpose coefficients(phase 2)
-	punpcklwd mm0,mm2		; mm0=(00 01 02 03 10 11 12 13)
-	punpckhwd mm5,mm2		; mm5=(20 21 22 23 30 31 32 33)
-	movq      mm6,mm3		; transpose coefficients(phase 2)
-	punpcklwd mm3,mm7		; mm3=(04 05 06 07 14 15 16 17)
-	punpckhwd mm6,mm7		; mm6=(24 25 26 27 34 35 36 37)
+        movq      mm5,mm0               ; transpose coefficients(phase 2)
+        punpcklwd mm0,mm2               ; mm0=(00 01 02 03 10 11 12 13)
+        punpckhwd mm5,mm2               ; mm5=(20 21 22 23 30 31 32 33)
+        movq      mm6,mm3               ; transpose coefficients(phase 2)
+        punpcklwd mm3,mm7               ; mm3=(04 05 06 07 14 15 16 17)
+        punpckhwd mm6,mm7               ; mm6=(24 25 26 27 34 35 36 37)
 
-	movq      mm1,mm0		; transpose coefficients(phase 3)
-	punpckldq mm0,mm3		; mm0=(00 01 02 03 04 05 06 07)
-	punpckhdq mm1,mm3		; mm1=(10 11 12 13 14 15 16 17)
-	movq      mm4,mm5		; transpose coefficients(phase 3)
-	punpckldq mm5,mm6		; mm5=(20 21 22 23 24 25 26 27)
-	punpckhdq mm4,mm6		; mm4=(30 31 32 33 34 35 36 37)
+        movq      mm1,mm0               ; transpose coefficients(phase 3)
+        punpckldq mm0,mm3               ; mm0=(00 01 02 03 04 05 06 07)
+        punpckhdq mm1,mm3               ; mm1=(10 11 12 13 14 15 16 17)
+        movq      mm4,mm5               ; transpose coefficients(phase 3)
+        punpckldq mm5,mm6               ; mm5=(20 21 22 23 24 25 26 27)
+        punpckhdq mm4,mm6               ; mm4=(30 31 32 33 34 35 36 37)
 
-	pushpic	ebx			; save GOT address
+        pushpic ebx                     ; save GOT address
 
-	mov	edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
-	movq	MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
-	movq	MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
-	mov	edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
-	mov	ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
-	movq	MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
-	movq	MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
 
-	poppic	ebx			; restore GOT address
+        poppic  ebx                     ; restore GOT address
 
-	add	esi, byte 4*SIZEOF_FAST_FLOAT	; wsptr
-	add	edi, byte 4*SIZEOF_JSAMPROW
-	dec	ecx				; ctr
-	jnz	near .rowloop
+        add     esi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
 
-	emms		; empty MMX state
+        emms            ; empty MMX state
 
-	pop	edi
-	pop	esi
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	mov	esp,ebp		; esp <- aligned ebp
-	pop	esp		; esp <- original ebp
-	pop	ebp
-	ret
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jsimd_arm.c b/simd/jsimd_arm.c
index cae84df..d5cf6a6 100644
--- a/simd/jsimd_arm.c
+++ b/simd/jsimd_arm.c
@@ -3,16 +3,14 @@
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright 2009-2011 D. R. Commander
- * 
+ *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * For conditions of distribution and use, see copyright notice in jsimdext.inc
  *
  * This file contains the interface between the "normal" portions
- * of the library and the SIMD implementations when running on
- * ARM architecture.
- *
- * Based on the stubs from 'jsimd_none.c'
+ * of the library and the SIMD implementations when running on a
+ * 32-bit ARM architecture.
  */
 
 #define JPEG_INTERNALS
@@ -170,6 +168,7 @@
     return 0;
   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     return 0;
+
   if (simd_support & JSIMD_ARM_NEON)
     return 1;
 
@@ -183,8 +182,7 @@
 {
   void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
-  switch(cinfo->in_color_space)
-  {
+  switch(cinfo->in_color_space) {
     case JCS_EXT_RGB:
       neonfct=jsimd_extrgb_ycc_convert_neon;
       break;
@@ -213,8 +211,7 @@
   }
 
   if (simd_support & JSIMD_ARM_NEON)
-    neonfct(cinfo->image_width, input_buf,
-        output_buf, output_row, num_rows);
+    neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 }
 
 GLOBAL(void)
@@ -231,8 +228,7 @@
 {
   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
-  switch(cinfo->out_color_space)
-  {
+  switch(cinfo->out_color_space) {
     case JCS_EXT_RGB:
       neonfct=jsimd_ycc_extrgb_convert_neon;
       break;
@@ -261,8 +257,7 @@
   }
 
   if (simd_support & JSIMD_ARM_NEON)
-    neonfct(cinfo->output_width, input_buf,
-        input_row, output_buf, num_rows);
+    neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
 }
 
 GLOBAL(int)
@@ -311,7 +306,7 @@
 
 GLOBAL(void)
 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
@@ -319,7 +314,7 @@
 
 GLOBAL(void)
 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
@@ -352,7 +347,7 @@
 
 GLOBAL(void)
 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
@@ -360,13 +355,14 @@
 
 GLOBAL(void)
 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
   if (simd_support & JSIMD_ARM_NEON)
     jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
-        compptr->downsampled_width, input_data, output_data_ptr);
+                                   compptr->downsampled_width, input_data,
+                                   output_data_ptr);
 }
 
 GLOBAL(int)
@@ -552,7 +548,7 @@
   if (sizeof(ISLOW_MULT_TYPE) != 2)
     return 0;
 
-  if ((simd_support & JSIMD_ARM_NEON))
+  if (simd_support & JSIMD_ARM_NEON)
     return 1;
 
   return 0;
@@ -575,7 +571,7 @@
   if (sizeof(ISLOW_MULT_TYPE) != 2)
     return 0;
 
-  if ((simd_support & JSIMD_ARM_NEON))
+  if (simd_support & JSIMD_ARM_NEON)
     return 1;
 
   return 0;
@@ -586,8 +582,9 @@
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
                 JDIMENSION output_col)
 {
-  if ((simd_support & JSIMD_ARM_NEON))
-    jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
+                        output_col);
 }
 
 GLOBAL(void)
@@ -595,8 +592,9 @@
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
                 JDIMENSION output_col)
 {
-  if ((simd_support & JSIMD_ARM_NEON))
-    jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
+                        output_col);
 }
 
 GLOBAL(int)
@@ -641,7 +639,7 @@
   if (IFAST_SCALE_BITS != 2)
     return 0;
 
-  if ((simd_support & JSIMD_ARM_NEON))
+  if (simd_support & JSIMD_ARM_NEON)
     return 1;
 
   return 0;
@@ -657,26 +655,27 @@
 
 GLOBAL(void)
 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
-  if ((simd_support & JSIMD_ARM_NEON))
-    jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
+                          output_col);
 }
 
 GLOBAL(void)
 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
-  if ((simd_support & JSIMD_ARM_NEON))
-    jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
+                          output_col);
 }
 
 GLOBAL(void)
 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
 }
-
diff --git a/simd/jsimd_arm_neon.S b/simd/jsimd_arm_neon.S
index 9962b8a..ac6c860 100644
--- a/simd/jsimd_arm_neon.S
+++ b/simd/jsimd_arm_neon.S
@@ -1,5 +1,5 @@
 /*
- * ARM NEON optimizations for libjpeg-turbo
+ * ARMv7 NEON optimizations for libjpeg-turbo
  *
  * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
  * All rights reserved.
@@ -35,6 +35,7 @@
 
 #define RESPECT_STRICT_ALIGNMENT 1
 
+
 /*****************************************************************************/
 
 /* Supplementary macro for setting function attributes */
@@ -62,6 +63,7 @@
     vtrn.32 \x1, \x3
 .endm
 
+
 #define CENTERJSAMPLE 128
 
 /*****************************************************************************/
@@ -670,6 +672,7 @@
     .unreq          ROW7R
 .endfunc
 
+
 /*****************************************************************************/
 
 /*
@@ -894,6 +897,7 @@
     .unreq          TMP4
 .endfunc
 
+
 /*****************************************************************************/
 
 /*
@@ -1108,6 +1112,7 @@
 
 .purgem idct_helper
 
+
 /*****************************************************************************/
 
 /*
@@ -1262,6 +1267,7 @@
 
 .purgem idct_helper
 
+
 /*****************************************************************************/
 
 /*
@@ -1561,6 +1567,7 @@
 .purgem do_load
 .purgem do_store
 
+
 /*****************************************************************************/
 
 /*
@@ -1871,6 +1878,7 @@
 .purgem do_load
 .purgem do_store
 
+
 /*****************************************************************************/
 
 /*
@@ -1934,6 +1942,7 @@
     .unreq          TMP4
 .endfunc
 
+
 /*****************************************************************************/
 
 /*
@@ -2057,6 +2066,7 @@
     .unreq          TMP
 .endfunc
 
+
 /*****************************************************************************/
 
 /*
@@ -2158,6 +2168,7 @@
     .unreq          LOOP_COUNT
 .endfunc
 
+
 /*****************************************************************************/
 
 /*
diff --git a/simd/jsimd_i386.c b/simd/jsimd_i386.c
index e96f5b8..639be52 100644
--- a/simd/jsimd_i386.c
+++ b/simd/jsimd_i386.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright 2009-2011 D. R. Commander
- * 
+ *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * For conditions of distribution and use, see copyright notice in jsimdext.inc
@@ -135,8 +135,7 @@
   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
-  switch(cinfo->in_color_space)
-  {
+  switch(cinfo->in_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_extrgb_ycc_convert_sse2;
       mmxfct=jsimd_extrgb_ycc_convert_mmx;
@@ -173,11 +172,9 @@
 
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
-    sse2fct(cinfo->image_width, input_buf,
-        output_buf, output_row, num_rows);
+    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
   else if (simd_support & JSIMD_MMX)
-    mmxfct(cinfo->image_width, input_buf,
-        output_buf, output_row, num_rows);
+    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 }
 
 GLOBAL(void)
@@ -188,8 +185,7 @@
   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
-  switch(cinfo->in_color_space)
-  {
+  switch(cinfo->in_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_extrgb_gray_convert_sse2;
       mmxfct=jsimd_extrgb_gray_convert_mmx;
@@ -226,11 +222,9 @@
 
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
-    sse2fct(cinfo->image_width, input_buf,
-        output_buf, output_row, num_rows);
+    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
   else if (simd_support & JSIMD_MMX)
-    mmxfct(cinfo->image_width, input_buf,
-        output_buf, output_row, num_rows);
+    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 }
 
 GLOBAL(void)
@@ -241,8 +235,7 @@
   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
-  switch(cinfo->out_color_space)
-  {
+  switch(cinfo->out_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_ycc_extrgb_convert_sse2;
       mmxfct=jsimd_ycc_extrgb_convert_mmx;
@@ -279,11 +272,9 @@
 
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
-    sse2fct(cinfo->output_width, input_buf,
-        input_row, output_buf, num_rows);
+    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
   else if (simd_support & JSIMD_MMX)
-    mmxfct(cinfo->output_width, input_buf,
-        input_row, output_buf, num_rows);
+    mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
 }
 
 GLOBAL(int)
@@ -330,12 +321,13 @@
 {
   if (simd_support & JSIMD_SSE2)
     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
-        compptr->v_samp_factor, compptr->width_in_blocks,
-        input_data, output_data);
+                               compptr->v_samp_factor,
+                               compptr->width_in_blocks, input_data,
+                               output_data);
   else if (simd_support & JSIMD_MMX)
     jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
-        compptr->v_samp_factor, compptr->width_in_blocks,
-        input_data, output_data);
+                              compptr->v_samp_factor, compptr->width_in_blocks,
+                              input_data, output_data);
 }
 
 GLOBAL(void)
@@ -344,12 +336,13 @@
 {
   if (simd_support & JSIMD_SSE2)
     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
-        compptr->v_samp_factor, compptr->width_in_blocks,
-        input_data, output_data);
+                               compptr->v_samp_factor,
+                               compptr->width_in_blocks, input_data,
+                               output_data);
   else if (simd_support & JSIMD_MMX)
     jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
-        compptr->v_samp_factor, compptr->width_in_blocks,
-        input_data, output_data);
+                              compptr->v_samp_factor, compptr->width_in_blocks,
+                              input_data, output_data);
 }
 
 GLOBAL(int)
@@ -392,30 +385,30 @@
 
 GLOBAL(void)
 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
   if (simd_support & JSIMD_SSE2)
-    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
-        cinfo->output_width, input_data, output_data_ptr);
+    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
+                             input_data, output_data_ptr);
   else if (simd_support & JSIMD_MMX)
-    jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
-        cinfo->output_width, input_data, output_data_ptr);
+    jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
+                            input_data, output_data_ptr);
 }
 
 GLOBAL(void)
 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
   if (simd_support & JSIMD_SSE2)
-    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
-        cinfo->output_width, input_data, output_data_ptr);
+    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
+                             input_data, output_data_ptr);
   else if (simd_support & JSIMD_MMX)
-    jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
-        cinfo->output_width, input_data, output_data_ptr);
+    jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
+                            input_data, output_data_ptr);
 }
 
 GLOBAL(int)
@@ -460,32 +453,36 @@
 
 GLOBAL(void)
 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
-        compptr->downsampled_width, input_data, output_data_ptr);
+                                   compptr->downsampled_width, input_data,
+                                   output_data_ptr);
   else if (simd_support & JSIMD_MMX)
     jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
-        compptr->downsampled_width, input_data, output_data_ptr);
+                                  compptr->downsampled_width, input_data,
+                                  output_data_ptr);
 }
 
 GLOBAL(void)
 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
-        compptr->downsampled_width, input_data, output_data_ptr);
+                                   compptr->downsampled_width, input_data,
+                                   output_data_ptr);
   else if (simd_support & JSIMD_MMX)
     jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
-        compptr->downsampled_width, input_data, output_data_ptr);
+                                  compptr->downsampled_width, input_data,
+                                  output_data_ptr);
 }
 
 GLOBAL(int)
@@ -537,8 +534,7 @@
   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
-  switch(cinfo->out_color_space)
-  {
+  switch(cinfo->out_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
       mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
@@ -575,11 +571,9 @@
 
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
-    sse2fct(cinfo->output_width, input_buf,
-        in_row_group_ctr, output_buf);
+    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
   else if (simd_support & JSIMD_MMX)
-    mmxfct(cinfo->output_width, input_buf,
-        in_row_group_ctr, output_buf);
+    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
 }
 
 GLOBAL(void)
@@ -591,8 +585,7 @@
   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
-  switch(cinfo->out_color_space)
-  {
+  switch(cinfo->out_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
       mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
@@ -629,11 +622,9 @@
 
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
-    sse2fct(cinfo->output_width, input_buf,
-        in_row_group_ctr, output_buf);
+    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
   else if (simd_support & JSIMD_MMX)
-    mmxfct(cinfo->output_width, input_buf,
-        in_row_group_ctr, output_buf);
+    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
 }
 
 GLOBAL(int)
@@ -912,7 +903,8 @@
                 JDIMENSION output_col)
 {
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
-    jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+    jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
   else if (simd_support & JSIMD_MMX)
     jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
 }
@@ -923,7 +915,8 @@
                 JDIMENSION output_col)
 {
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
-    jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+    jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
   else if (simd_support & JSIMD_MMX)
     jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
 }
@@ -1010,39 +1003,43 @@
 
 GLOBAL(void)
 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
-    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
+                          output_col);
   else if (simd_support & JSIMD_MMX)
-    jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+    jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
+                         output_col);
 }
 
 GLOBAL(void)
 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
-    jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+    jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
+                          output_col);
   else if (simd_support & JSIMD_MMX)
-    jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+    jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
+                         output_col);
 }
 
 GLOBAL(void)
 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
-    jsimd_idct_float_sse2(compptr->dct_table, coef_block,
-        output_buf, output_col);
+    jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
+                          output_col);
   else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
-    jsimd_idct_float_sse(compptr->dct_table, coef_block,
-        output_buf, output_col);
+    jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
+                         output_col);
   else if (simd_support & JSIMD_3DNOW)
-    jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
-        output_buf, output_col);
+    jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
+                           output_col);
 }
 
diff --git a/simd/jsimd_x86_64.c b/simd/jsimd_x86_64.c
index 8d17db3..87c9d56 100644
--- a/simd/jsimd_x86_64.c
+++ b/simd/jsimd_x86_64.c
@@ -3,14 +3,14 @@
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright 2009-2011 D. R. Commander
- * 
+ *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * For conditions of distribution and use, see copyright notice in jsimdext.inc
  *
  * This file contains the interface between the "normal" portions
  * of the library and the SIMD implementations when running on a
- * x86_64 architecture.
+ * 64-bit x86 architecture.
  */
 
 #define JPEG_INTERNALS
@@ -87,8 +87,7 @@
 {
   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
-  switch(cinfo->in_color_space)
-  {
+  switch(cinfo->in_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_extrgb_ycc_convert_sse2;
       break;
@@ -126,8 +125,7 @@
 {
   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
-  switch(cinfo->in_color_space)
-  {
+  switch(cinfo->in_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_extrgb_gray_convert_sse2;
       break;
@@ -165,8 +163,7 @@
 {
   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
-  switch(cinfo->out_color_space)
-  {
+  switch(cinfo->out_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_ycc_extrgb_convert_sse2;
       break;
@@ -225,10 +222,8 @@
 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
                        JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
-  jsimd_h2v2_downsample_sse2(cinfo->image_width,
-                             cinfo->max_v_samp_factor,
-                             compptr->v_samp_factor,
-                             compptr->width_in_blocks,
+  jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
+                             compptr->v_samp_factor, compptr->width_in_blocks,
                              input_data, output_data);
 }
 
@@ -236,10 +231,8 @@
 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
                        JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
-  jsimd_h2v1_downsample_sse2(cinfo->image_width,
-                             cinfo->max_v_samp_factor,
-                             compptr->v_samp_factor,
-                             compptr->width_in_blocks,
+  jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
+                             compptr->v_samp_factor, compptr->width_in_blocks,
                              input_data, output_data);
 }
 
@@ -269,23 +262,21 @@
 
 GLOBAL(void)
 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
-  jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
-                           cinfo->output_width,
+  jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
                            input_data, output_data_ptr);
 }
 
 GLOBAL(void)
 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
-                     jpeg_component_info * compptr, 
+                     jpeg_component_info * compptr,
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
-  jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
-                           cinfo->output_width,
+  jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
                            input_data, output_data_ptr);
 }
 
@@ -321,24 +312,24 @@
 
 GLOBAL(void)
 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
   jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
-                                 compptr->downsampled_width,
-                                 input_data, output_data_ptr);
+                                 compptr->downsampled_width, input_data,
+                                 output_data_ptr);
 }
 
 GLOBAL(void)
 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
-                           jpeg_component_info * compptr, 
+                           jpeg_component_info * compptr,
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
   jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
-                                 compptr->downsampled_width,
-                                 input_data, output_data_ptr);
+                                 compptr->downsampled_width, input_data,
+                                 output_data_ptr);
 }
 
 GLOBAL(int)
@@ -379,8 +370,7 @@
 {
   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
-  switch(cinfo->out_color_space)
-  {
+  switch(cinfo->out_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
       break;
@@ -419,8 +409,7 @@
 {
   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
-  switch(cinfo->out_color_space)
-  {
+  switch(cinfo->out_color_space) {
     case JCS_EXT_RGB:
       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
       break;
@@ -728,26 +717,28 @@
 
 GLOBAL(void)
 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
-  jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+  jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
 }
 
 GLOBAL(void)
 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
-  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
 }
 
 GLOBAL(void)
 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
 {
-  jsimd_idct_float_sse2(compptr->dct_table, coef_block,
-                        output_buf, output_col);
+  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
 }
 
diff --git a/simd/jsimdcpu.asm b/simd/jsimdcpu.asm
index bdbcc23..c42c4ad 100644
--- a/simd/jsimdcpu.asm
+++ b/simd/jsimdcpu.asm
@@ -19,8 +19,8 @@
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
+        SECTION SEG_TEXT
+        BITS    32
 ;
 ; Check if the CPU supports SIMD instructions
 ;
@@ -28,78 +28,78 @@
 ; jpeg_simd_cpu_support (void)
 ;
 
-	align	16
-	global	EXTN(jpeg_simd_cpu_support)
+        align   16
+        global  EXTN(jpeg_simd_cpu_support)
 
 EXTN(jpeg_simd_cpu_support):
-	push	ebx
-;	push	ecx		; need not be preserved
-;	push	edx		; need not be preserved
-;	push	esi		; unused
-	push	edi
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+        push    edi
 
-	xor	edi,edi			; simd support flag
+        xor     edi,edi                 ; simd support flag
 
-	pushfd
-	pop	eax
-	mov	edx,eax
-	xor	eax, 1<<21		; flip ID bit in EFLAGS
-	push	eax
-	popfd
-	pushfd
-	pop	eax
-	xor	eax,edx
-	jz	short .return		; CPUID is not supported
+        pushfd
+        pop     eax
+        mov     edx,eax
+        xor     eax, 1<<21              ; flip ID bit in EFLAGS
+        push    eax
+        popfd
+        pushfd
+        pop     eax
+        xor     eax,edx
+        jz      short .return           ; CPUID is not supported
 
-	; Check for MMX instruction support
-	xor	eax,eax
-	cpuid
-	test	eax,eax
-	jz	short .return
+        ; Check for MMX instruction support
+        xor     eax,eax
+        cpuid
+        test    eax,eax
+        jz      short .return
 
-	xor	eax,eax
-	inc	eax
-	cpuid
-	mov	eax,edx			; eax = Standard feature flags
+        xor     eax,eax
+        inc     eax
+        cpuid
+        mov     eax,edx                 ; eax = Standard feature flags
 
-	test	eax, 1<<23		; bit23:MMX
-	jz	short .no_mmx
-	or	edi, byte JSIMD_MMX
+        test    eax, 1<<23              ; bit23:MMX
+        jz      short .no_mmx
+        or      edi, byte JSIMD_MMX
 .no_mmx:
-	test	eax, 1<<25		; bit25:SSE
-	jz	short .no_sse
-	or	edi, byte JSIMD_SSE
+        test    eax, 1<<25              ; bit25:SSE
+        jz      short .no_sse
+        or      edi, byte JSIMD_SSE
 .no_sse:
-	test	eax, 1<<26		; bit26:SSE2
-	jz	short .no_sse2
-	or	edi, byte JSIMD_SSE2
+        test    eax, 1<<26              ; bit26:SSE2
+        jz      short .no_sse2
+        or      edi, byte JSIMD_SSE2
 .no_sse2:
 
-	; Check for 3DNow! instruction support
-	mov	eax, 0x80000000
-	cpuid
-	cmp	eax, 0x80000000
-	jbe	short .return
+        ; Check for 3DNow! instruction support
+        mov     eax, 0x80000000
+        cpuid
+        cmp     eax, 0x80000000
+        jbe     short .return
 
-	mov	eax, 0x80000001
-	cpuid
-	mov	eax,edx			; eax = Extended feature flags
+        mov     eax, 0x80000001
+        cpuid
+        mov     eax,edx                 ; eax = Extended feature flags
 
-	test	eax, 1<<31		; bit31:3DNow!(vendor independent)
-	jz	short .no_3dnow
-	or	edi, byte JSIMD_3DNOW
+        test    eax, 1<<31              ; bit31:3DNow!(vendor independent)
+        jz      short .no_3dnow
+        or      edi, byte JSIMD_3DNOW
 .no_3dnow:
 
 .return:
-	mov	eax,edi
+        mov     eax,edi
 
-	pop	edi
-;	pop	esi		; unused
-;	pop	edx		; need not be preserved
-;	pop	ecx		; need not be preserved
-	pop	ebx
-	ret
+        pop     edi
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        ret
 
 ; For some reason, the OS X linker does not honor the request to align the
 ; segment unless we do this.
-	align	16
+        align   16
diff --git a/simd/jsimdext.inc b/simd/jsimdext.inc
index 253b897..0f1a8da 100644
--- a/simd/jsimdext.inc
+++ b/simd/jsimdext.inc
@@ -30,7 +30,7 @@
 ; ==========================================================================
 ;  System-dependent configurations
 
-%ifdef WIN32	; ----(nasm -fwin32 -DWIN32 ...)--------
+%ifdef WIN32    ; ----(nasm -fwin32 -DWIN32 ...)--------
 ; * Microsoft Visual C++
 ; * MinGW (Minimalist GNU for Windows)
 ; * CygWin
@@ -46,7 +46,7 @@
 %define SEG_CONST   .rdata align=16 public use32 class=CONST
 %endif
 
-%elifdef WIN64	; ----(nasm -fwin64 -DWIN64 ...)--------
+%elifdef WIN64  ; ----(nasm -fwin64 -DWIN64 ...)--------
 ; * Microsoft Visual C++
 
 ; -- segment definition --
@@ -58,9 +58,9 @@
 %define SEG_TEXT    .text  align=16 public use64 class=CODE
 %define SEG_CONST   .rdata align=16 public use64 class=CONST
 %endif
-%define EXTN(name)  name			; foo() -> foo
+%define EXTN(name)  name                        ; foo() -> foo
 
-%elifdef OBJ32	; ----(nasm -fobj -DOBJ32 ...)----------
+%elifdef OBJ32  ; ----(nasm -fobj -DOBJ32 ...)----------
 ; * Borland C++ (Win32)
 
 ; -- segment definition --
@@ -68,7 +68,7 @@
 %define SEG_TEXT    .text  align=16 public use32 class=CODE
 %define SEG_CONST   .data  align=16 public use32 class=DATA
 
-%elifdef ELF	; ----(nasm -felf[64] -DELF ...)------------
+%elifdef ELF    ; ----(nasm -felf[64] -DELF ...)------------
 ; * Linux
 ; * *BSD family Unix using elf format
 ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
@@ -88,10 +88,10 @@
 
 ; To make the code position-independent, append -DPIC to the commandline
 ;
-%define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_	; ELF supports PIC
-%define EXTN(name)  name			; foo() -> foo
+%define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_       ; ELF supports PIC
+%define EXTN(name)  name                        ; foo() -> foo
 
-%elifdef AOUT	; ----(nasm -faoutb/aout -DAOUT ...)----
+%elifdef AOUT   ; ----(nasm -faoutb/aout -DAOUT ...)----
 ; * Older Linux using a.out format  (nasm -f aout -DAOUT ...)
 ; * *BSD family Unix using a.out format  (nasm -f aoutb -DAOUT ...)
 
@@ -102,29 +102,29 @@
 
 ; To make the code position-independent, append -DPIC to the commandline
 ;
-%define GOT_SYMBOL  __GLOBAL_OFFSET_TABLE_	; BSD-style a.out supports PIC
+%define GOT_SYMBOL  __GLOBAL_OFFSET_TABLE_      ; BSD-style a.out supports PIC
 
-%elifdef MACHO	; ----(nasm -fmacho -DMACHO ...)--------
+%elifdef MACHO  ; ----(nasm -fmacho -DMACHO ...)--------
 ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
 
 ; -- segment definition --
 ;
-%define SEG_TEXT    .text  ;align=16	; nasm doesn't accept align=16. why?
+%define SEG_TEXT    .text  ;align=16    ; nasm doesn't accept align=16. why?
 %define SEG_CONST   .rodata align=16
 
 ; The generation of position-independent code (PIC) is the default on Darwin.
 ;
 %define PIC
-%define GOT_SYMBOL  _MACHO_PIC_		; Mach-O style code-relative addressing
+%define GOT_SYMBOL  _MACHO_PIC_         ; Mach-O style code-relative addressing
 
-%else		; ----(Other case)----------------------
+%else           ; ----(Other case)----------------------
 
 ; -- segment definition --
 ;
 %define SEG_TEXT    .text
 %define SEG_CONST   .data
 
-%endif	; ----------------------------------------------
+%endif  ; ----------------------------------------------
 
 ; ==========================================================================
 
@@ -179,7 +179,7 @@
 ;  External Symbol Name
 ;
 %ifndef EXTN
-%define EXTN(name)   _ %+ name		; foo() -> _foo
+%define EXTN(name)   _ %+ name          ; foo() -> _foo
 %endif
 
 ; --------------------------------------------------------------------------
@@ -196,79 +196,79 @@
 ; At present, nasm doesn't seem to support PIC generation for Mach-O.
 ; The PIC support code below is a little tricky.
 
-	SECTION	SEG_CONST
+        SECTION SEG_CONST
 const_base:
 
 %define GOTOFF(got,sym) (got) + (sym) - const_base
 
-%imacro get_GOT	1
-	; NOTE: this macro destroys ecx resister.
-	call	%%geteip
-	add	ecx, byte (%%ref - $)
-	jmp	short %%adjust
+%imacro get_GOT 1
+        ; NOTE: this macro destroys ecx resister.
+        call    %%geteip
+        add     ecx, byte (%%ref - $)
+        jmp     short %%adjust
 %%geteip:
-	mov	ecx, POINTER [esp]
-	ret
+        mov     ecx, POINTER [esp]
+        ret
 %%adjust:
-	push	ebp
-	xor	ebp,ebp		; ebp = 0
-%ifidni %1,ebx	; (%1 == ebx)
-	; db 0x8D,0x9C + jmp near const_base =
-	;   lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
-	db	0x8D,0x9C		; 8D,9C
-	jmp	near const_base		; E9,(const_base-%%ref)
+        push    ebp
+        xor     ebp,ebp         ; ebp = 0
+%ifidni %1,ebx  ; (%1 == ebx)
+        ; db 0x8D,0x9C + jmp near const_base =
+        ;   lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
+        db      0x8D,0x9C               ; 8D,9C
+        jmp     near const_base         ; E9,(const_base-%%ref)
 %%ref:
 %else  ; (%1 != ebx)
-	; db 0x8D,0x8C + jmp near const_base =
-	;   lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
-	db	0x8D,0x8C		; 8D,8C
-	jmp	near const_base		; E9,(const_base-%%ref)
-%%ref:	mov	%1, ecx
+        ; db 0x8D,0x8C + jmp near const_base =
+        ;   lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
+        db      0x8D,0x8C               ; 8D,8C
+        jmp     near const_base         ; E9,(const_base-%%ref)
+%%ref:  mov     %1, ecx
 %endif ; (%1 == ebx)
-	pop	ebp
+        pop     ebp
 %endmacro
 
-%else	; GOT_SYMBOL != _MACHO_PIC_ ----------------
+%else   ; GOT_SYMBOL != _MACHO_PIC_ ----------------
 
 %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
 
-%imacro get_GOT	1
-	extern	GOT_SYMBOL
-	call	%%geteip
-	add	%1, GOT_SYMBOL + $$ - $ wrt ..gotpc
-	jmp	short %%done
+%imacro get_GOT 1
+        extern  GOT_SYMBOL
+        call    %%geteip
+        add     %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
+        jmp     short %%done
 %%geteip:
-	mov	%1, POINTER [esp]
-	ret
+        mov     %1, POINTER [esp]
+        ret
 %%done:
 %endmacro
 
-%endif	; GOT_SYMBOL == _MACHO_PIC_ ----------------
+%endif  ; GOT_SYMBOL == _MACHO_PIC_ ----------------
 
-%imacro pushpic	1.nolist
-	push	%1
+%imacro pushpic 1.nolist
+        push    %1
 %endmacro
-%imacro poppic	1.nolist
-	pop	%1
+%imacro poppic  1.nolist
+        pop     %1
 %endmacro
-%imacro movpic	2.nolist
-	mov	%1,%2
+%imacro movpic  2.nolist
+        mov     %1,%2
 %endmacro
 
-%else	; !PIC -----------------------------------------
+%else   ; !PIC -----------------------------------------
 
 %define GOTOFF(got,sym) (sym)
 
-%imacro get_GOT	1.nolist
+%imacro get_GOT 1.nolist
 %endmacro
-%imacro pushpic	1.nolist
+%imacro pushpic 1.nolist
 %endmacro
-%imacro poppic	1.nolist
+%imacro poppic  1.nolist
 %endmacro
-%imacro movpic	2.nolist
+%imacro movpic  2.nolist
 %endmacro
 
-%endif	;  PIC -----------------------------------------
+%endif  ;  PIC -----------------------------------------
 
 ; --------------------------------------------------------------------------
 ;  Align the next instruction on {2,4,8,16,..}-byte boundary.
@@ -278,28 +278,28 @@
 %define FILLB(b,n)  (($$-(b)) & ((n)-1))
 
 %imacro alignx 1-2.nolist 0xFFFF
-%%bs:	times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
-	       db 0x90                               ; nop
-	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
-	       db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
-	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
-	       db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
-	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
-	       db 0x8D,0xAD,0x00,0x00,0x00,0x00      ; lea ebp,[ebp+0x00000000]
-	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
-	       db 0x8D,0x6C,0x25,0x00                ; lea ebp,[ebp+0x00]
-	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
-	       db 0x8D,0x6D,0x00                     ; lea ebp,[ebp+0x00]
-	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
-	       db 0x8B,0xED                          ; mov ebp,ebp
-	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
-	       db 0x90                               ; nop
+%%bs:   times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
+               db 0x90                               ; nop
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
+               db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
+               db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
+               db 0x8D,0xAD,0x00,0x00,0x00,0x00      ; lea ebp,[ebp+0x00000000]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
+               db 0x8D,0x6C,0x25,0x00                ; lea ebp,[ebp+0x00]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
+               db 0x8D,0x6D,0x00                     ; lea ebp,[ebp+0x00]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
+               db 0x8B,0xED                          ; mov ebp,ebp
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
+               db 0x90                               ; nop
 %endmacro
 
 ; Align the next data on {2,4,8,16,..}-byte boundary.
 ;
 %imacro alignz 1.nolist
-	align %1, db 0		; filling zeros
+        align %1, db 0          ; filling zeros
 %endmacro
 
 %ifdef __x86_64__
@@ -307,61 +307,61 @@
 %ifdef WIN64
 
 %imacro collect_args 0
-	push r12
-	push r13
-	push r14
-	push r15
-	mov r10, rcx
-	mov r11, rdx
-	mov r12, r8
-	mov r13, r9
-	mov r14, [rax+48]
-	mov r15, [rax+56]
-	push rsi
-	push rdi
-	sub     rsp, SIZEOF_XMMWORD
-	movaps  XMMWORD [rsp], xmm6
-	sub     rsp, SIZEOF_XMMWORD
-	movaps  XMMWORD [rsp], xmm7
+        push r12
+        push r13
+        push r14
+        push r15
+        mov r10, rcx
+        mov r11, rdx
+        mov r12, r8
+        mov r13, r9
+        mov r14, [rax+48]
+        mov r15, [rax+56]
+        push rsi
+        push rdi
+        sub     rsp, SIZEOF_XMMWORD
+        movaps  XMMWORD [rsp], xmm6
+        sub     rsp, SIZEOF_XMMWORD
+        movaps  XMMWORD [rsp], xmm7
 %endmacro
 
 %imacro uncollect_args 0
-	movaps  xmm7, XMMWORD [rsp]
-	add     rsp, SIZEOF_XMMWORD
-	movaps  xmm6, XMMWORD [rsp]
-	add     rsp, SIZEOF_XMMWORD
-	pop rdi
-	pop rsi
-	pop r15
-	pop r14
-	pop r13
-	pop r12
+        movaps  xmm7, XMMWORD [rsp]
+        add     rsp, SIZEOF_XMMWORD
+        movaps  xmm6, XMMWORD [rsp]
+        add     rsp, SIZEOF_XMMWORD
+        pop rdi
+        pop rsi
+        pop r15
+        pop r14
+        pop r13
+        pop r12
 %endmacro
 
 %else
 
 %imacro collect_args 0
-	push r10
-	push r11
-	push r12
-	push r13
-	push r14
-	push r15
-	mov r10, rdi
-	mov r11, rsi
-	mov r12, rdx
-	mov r13, rcx
-	mov r14, r8
-	mov r15, r9
+        push r10
+        push r11
+        push r12
+        push r13
+        push r14
+        push r15
+        mov r10, rdi
+        mov r11, rsi
+        mov r12, rdx
+        mov r13, rcx
+        mov r14, r8
+        mov r15, r9
 %endmacro
 
 %imacro uncollect_args 0
-	pop r15
-	pop r14
-	pop r13
-	pop r12
-	pop r11
-	pop r10
+        pop r15
+        pop r14
+        pop r13
+        pop r12
+        pop r11
+        pop r10
 %endmacro
 
 %endif
diff --git a/structure.txt b/structure.txt
index b4dfef8..6045784 100644
--- a/structure.txt
+++ b/structure.txt
@@ -1,7 +1,7 @@
 IJG JPEG LIBRARY:  SYSTEM ARCHITECTURE
 
+This file was part of the Independent JPEG Group's software:
 Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
-This file was part of the Independent JPEG Group's software.
 It was modified by The libjpeg-turbo Project to include only information
 relevant to libjpeg-turbo.
 For conditions of distribution and use, see the accompanying README file.
@@ -24,8 +24,8 @@
   A "coefficient" is a frequency coefficient (a DCT transform output number).
   A "block" is an 8x8 group of samples or coefficients.
   An "MCU" (minimum coded unit) is an interleaved set of blocks of size
-	determined by the sampling factors, or a single block in a
-	noninterleaved scan.
+        determined by the sampling factors, or a single block in a
+        noninterleaved scan.
 We do not use the terms "pixel" and "sample" interchangeably.  When we say
 pixel, we mean an element of the full-size image, while a sample is an element
 of the downsampled image.  Thus the number of samples may vary across
@@ -264,14 +264,14 @@
    1B.  Per-pass control.  This determines how many passes will be performed
         and calls each active processing module to configure itself
         appropriately at the beginning of each pass.  End-of-pass processing,
-	where necessary, is also invoked from the master control module.
+        where necessary, is also invoked from the master control module.
 
    Method selection is partially distributed, in that a particular processing
    module may contain several possible implementations of a particular method,
    which it will select among when given its initialization call.  The master
    control code need only be concerned with decisions that affect more than
    one module.
- 
+
 2. Data buffering control.  A separate control module exists for each
    inter-processing-step data buffer.  This module is responsible for
    invoking the processing steps that write or read that data buffer.
@@ -457,8 +457,8 @@
   buffered by the coefficient controller have NOT been dequantized; we
   merge dequantization and inverse DCT into a single step for speed reasons.
   When scaled-down output is asked for, simplified DCT algorithms may be used
-  that emit only 1x1, 2x2, or 4x4 samples per DCT block, not the full 8x8.
-  Works on one DCT block at a time.
+  that emit fewer samples per DCT block, not the full 8x8.  Works on one DCT
+  block at a time.
 
 * Postprocessing controller: buffer controller for the color quantization
   input buffer, when quantization is in use.  (Without quantization, this
@@ -573,10 +573,10 @@
 
 Arrays of pixel sample values use the following data structure:
 
-    typedef something JSAMPLE;		a pixel component value, 0..MAXJSAMPLE
-    typedef JSAMPLE *JSAMPROW;		ptr to a row of samples
-    typedef JSAMPROW *JSAMPARRAY;	ptr to a list of rows
-    typedef JSAMPARRAY *JSAMPIMAGE;	ptr to a list of color-component arrays
+    typedef something JSAMPLE;          a pixel component value, 0..MAXJSAMPLE
+    typedef JSAMPLE *JSAMPROW;          ptr to a row of samples
+    typedef JSAMPROW *JSAMPARRAY;       ptr to a list of rows
+    typedef JSAMPARRAY *JSAMPIMAGE;     ptr to a list of color-component arrays
 
 The basic element type JSAMPLE will typically be one of unsigned char,
 (signed) char, or short.  Short will be used if samples wider than 8 bits are
@@ -617,7 +617,7 @@
 is helpful when dealing with noninterleaved JPEG files.
 
 In general, a specific sample value is accessed by code such as
-	GETJSAMPLE(image[colorcomponent][row][col])
+        GETJSAMPLE(image[colorcomponent][row][col])
 where col is measured from the image left edge, but row is measured from the
 first sample row currently in memory.  Either of the first two indexings can
 be precomputed by copying the relevant pointer.
@@ -636,11 +636,11 @@
 
 Arrays of DCT-coefficient values use the following data structure:
 
-    typedef short JCOEF;		a 16-bit signed integer
-    typedef JCOEF JBLOCK[DCTSIZE2];	an 8x8 block of coefficients
-    typedef JBLOCK *JBLOCKROW;		ptr to one horizontal row of 8x8 blocks
-    typedef JBLOCKROW *JBLOCKARRAY;	ptr to a list of such rows
-    typedef JBLOCKARRAY *JBLOCKIMAGE;	ptr to a list of color component arrays
+    typedef short JCOEF;                a 16-bit signed integer
+    typedef JCOEF JBLOCK[DCTSIZE2];     an 8x8 block of coefficients
+    typedef JBLOCK *JBLOCKROW;          ptr to one horizontal row of 8x8 blocks
+    typedef JBLOCKROW *JBLOCKARRAY;     ptr to a list of such rows
+    typedef JBLOCKARRAY *JBLOCKIMAGE;   ptr to a list of color component arrays
 
 The underlying type is at least a 16-bit signed integer; while "short" is big
 enough on all machines of interest, on some machines it is preferable to use
@@ -759,8 +759,8 @@
 it speeds up operations whenever malloc/free are slow (as they often are).
 The pools can be regarded as lifetime identifiers for objects.  Two
 pools/lifetimes are defined:
-  * JPOOL_PERMANENT	lasts until master record is destroyed
-  * JPOOL_IMAGE		lasts until done with image (JPEG datastream)
+  * JPOOL_PERMANENT     lasts until master record is destroyed
+  * JPOOL_IMAGE         lasts until done with image (JPEG datastream)
 Permanent lifetime is used for parameters and tables that should be carried
 across from one datastream to another; this includes all application-visible
 parameters.  Image lifetime is used for everything else.  (A third lifetime,
@@ -857,20 +857,20 @@
 the following routines for use by the front end (none of these routines
 are known to the rest of the JPEG code):
 
-jpeg_mem_init, jpeg_mem_term	system-dependent initialization/shutdown
+jpeg_mem_init, jpeg_mem_term    system-dependent initialization/shutdown
 
-jpeg_get_small, jpeg_free_small	interface to malloc and free library routines
-				(or their equivalents)
+jpeg_get_small, jpeg_free_small interface to malloc and free library routines
+                                (or their equivalents)
 
-jpeg_get_large, jpeg_free_large	interface to FAR malloc/free in MSDOS machines;
-				else usually the same as
-				jpeg_get_small/jpeg_free_small
+jpeg_get_large, jpeg_free_large interface to FAR malloc/free in MSDOS machines;
+                                else usually the same as
+                                jpeg_get_small/jpeg_free_small
 
-jpeg_mem_available		estimate available memory
+jpeg_mem_available              estimate available memory
 
-jpeg_open_backing_store		create a backing-store object
+jpeg_open_backing_store         create a backing-store object
 
-read_backing_store,		manipulate a backing-store object
+read_backing_store,             manipulate a backing-store object
 write_backing_store,
 close_backing_store
 
diff --git a/testimages/nightshot_iso_100.txt b/testimages/nightshot_iso_100.txt
new file mode 100644
index 0000000..9320886
--- /dev/null
+++ b/testimages/nightshot_iso_100.txt
@@ -0,0 +1,25 @@
+libjpeg-turbo note:  This image was extracted from the 8-bit nightshot_iso_100
+image.  The original can be downloaded at the link below.
+
+The New Image Compression Test Set - Jan 2008
+http://www.imagecompression.info/test_images
+
+The images historically used for compression research (lena, barbra, pepper etc...) have outlived their useful life and its about time they become a part of history only. They are too small, come from data sources too old and are available in only 8-bit precision.
+
+These images have been carefully selected to aid in image compression algorithm research and evaluation. These are photographic images chosen to come from a wide variety of sources and each one picked to stress different aspects of algorithms. Images are available in 8-bit, 16-bit and 16-bit linear variations, RGB and gray.
+
+Images are available without any prohibitive copyright restrictions.
+
+These images are (c) there respective owners. You are granted full redistribution and publication rights on these images provided:
+
+1. The origin of the pictures must not be misrepresented; you must not claim that you took the original pictures. If you use, publish or redistribute them, an acknowledgment would be appreciated but is not required.
+2. Altered versions must be plainly marked as such, and must not be misinterpreted as being the originals.
+3. No payment is required for distribution this material, it must be available freely under the conditions stated here. That is, it is prohibited to sell the material.
+4. This notice may not be removed or altered from any distribution.
+
+Acknowledgments: A lot of people contributed a lot of time and effort in making this test set possible. Thanks to everyone who voiced their opinion in any of the discussions online. Thanks to Axel Becker, Thomas Richter and Niels Fröhling for their extensive help in picking images, running all the various tests etc... Thanks to Pete Fraser, Tony Story, Wayne J. Cosshall, David Coffin, Bruce Lindbloom and raw.fotosite.pl for the images which make up this set.
+
+Sachin Garg [India]
+sachingarg@c10n.info
+
+www.sachingarg.com | www.c10n.info | www.imagecompression.info
diff --git a/testimages/testimg.bmp b/testimages/testimg.bmp
deleted file mode 100644
index 8603d15..0000000
--- a/testimages/testimg.bmp
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgari.ppm b/testimages/testimgari.ppm
deleted file mode 100644
index d7b38f2..0000000
--- a/testimages/testimgari.ppm
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgcrop.jpg b/testimages/testimgcrop.jpg
deleted file mode 100644
index 18fa0b1..0000000
--- a/testimages/testimgcrop.jpg
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgflt-nosimd.jpg b/testimages/testimgflt-nosimd.jpg
deleted file mode 100644
index 462f39b..0000000
--- a/testimages/testimgflt-nosimd.jpg
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgflt.jpg b/testimages/testimgflt.jpg
deleted file mode 100644
index 832f4cc..0000000
--- a/testimages/testimgflt.jpg
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgflt.ppm b/testimages/testimgflt.ppm
deleted file mode 100644
index 78945b6..0000000
--- a/testimages/testimgflt.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-227 149
-255
-0/-0/-10.21/40-51.62/62/83/83/:3-:3-:3-:3-:3-:3-:2/91.91.80-80-91.91.:2/80-80-80-80-80-80-80-80-6.+6.+6.+5-*5-*4,)4,)4,)4,)4,)4,)4,)4,)4,)4,)2-)/*$/,%/,%0-&1.'2/(30)30)63,63,74-85.85.96/:70:7.A:0B<0D>2F@4IA4JB5KC6KC6MD5MD5OC3NB2OC3OC3PD4RE5R?1Y?2b@4nB5}E6‹H8™G9£F7¯G:¸G9¾E:ÅG;ÇG>ÊG?ËH@ÐE@çFLíCLëDKëEIîCIïBDñ>Bô=Aø;A÷:@ô:?ð<?é?@â@>×?<ËA7»=/µ@.µ@.´?-´?-²?,²?-¯@-­@,ªA,¦A-¢B,Ÿ@*›A)˜@*–A,”>-’?/’?/‘>.‘>,=+’<+’<+”?+”?+”=*”=*”=*•>+–?,–@/–?6•>5—=2Ÿ?1©B3³D3¼D4¿D4¹?0µA2¬F8žH;‡H9oA2T8*C3&=5295495473271160050-50-72/72/72/61.61-50,50,41,//-.0-//-//-0/-0/-2.-2.-4+,4+,4*+3)*7(+=.1E69P:<jBC|IHMM•OOžVZªdnªoƒŸt”{£‡®€†º~ˆ½sy­`a‘TKvQEiSJgOG^MH^TQbfdo|}‚‘™ž˜£©Ÿ¢¨šž “’{|lfgUXWEQNEUR[UQbUQb0/-0/-10.10.40-51.62/62/83/83/:3-:3-:3-:3-:3-:3-91.91.80-80-80-80-91.91.80-80-80-80-80-80-80-80-6.+6.+5-*5-*5-*4,)4,)4,)5-*5-*5-*5-*5-*5-*5-*3.*0+%0-&0-&1.'2/(30)41*41*63,63,74-74-85.96/:70:7.@9/A;/C=1E?3H@3IA4JB5JB5LC4LC4MA1MA1MA1NB2OC3QD4P>0U?1^A3jC4xD6„E4’E5œC3§C4¯A4µA4¼B7ÀD:ÄE<ÅF=ÍC@áEIçBIèCIêDHíDGïBDó@Cö?Cø;A÷:@ô:?ð<?é?@àA>Õ@<Ê@6¹>/µ@.´?-´?-´?-²?,°?-¯@-­@,©@+¦A-¡A+Ÿ@*›A)˜@*–A,”>-’?/‘>.‘>.‘>,=+’<+’<+”?+“>*”=*”=*”=*•>+–?,–@/”@5•>5˜>3 >1«A3µD4½C4¿D5»A2·C6¬F8œI;…G:l@3S9*B4)>63:6595484382271161.61.72/72/72/61.61-50,50,41,//-.0-//-//-0/-0/-2.-2.-3--5,-5+,3)*5)+<-0C47N8:d>=vEA†JINLšTV¤aj¥l}rŽ‘{¢†€®…¹{„»ou©[[QHuOCiOFeOG_PH_RN_[Yfnotƒ‡ˆ”™•™ž—š ”™‘ƒ~ojkY][LVSJXSZVRaXQa/.,/.,0/-10.40-40-51.51.72.72.72.72.92,92,92,92,91.80.7/-7/-7/-7/-80.91/80.80.80.80.80.80.80.80.6.,5-+5-+5-+4,*4,*4,*4,*5-+5-+5-+5-+5-+5-+5-+3.*2-'1.'2/(30)30)41*41*52+63,63,63,74-85.96/96/:7.?8.@:.B<0D>2G?4H@5H@3H@3I@1I@1K?1K?1K?/L@0MA1NB2MA1QA1YB2dC2qC3|C2‡B2’A0˜<- :+§;.¯=2µ@6ºD:¿F=ÅD>ÙCEá@FãBGçBFêDFðCEôADø?Dú;@ù:?õ;@ð=@è@@ÜA=Ñ@;Æ@5·=.³@-³@-²?,²?-°?-¯>,­@,ª?-§@-¥@,¡A+A,š?*˜@*•@+”>-‘>.‘>.‘>.=+=+=+=+‘>,‘>,’<+’<+“=,“=,”?+•?.•A6–?5š>3£>2¯A4¹C5¿D5ÁC5ÀD8¸F;®I=™J=G;h@4Q:,B5,?74=77<66;5594183072/72/62/62/62/51.52-41,41,21,.0-,1-.0-.0-//-//-0/-2.-5//4..5,-4*+4*+9-/>24I56[97l?9|E@†IDOM˜[`›fv”mˆŒwžƒ}­}‚¹u~·fm¤TV‰MEvLAkMAeOFcQHcMH^NK\[[eqty…‰ˆ‡Œ†Šˆ…†Š|xzlfiXZ[MVSLZU[ZT`[S`.-+/.,/.,0/-3/,40-40-40-61-61-61-61-81+81+81+81+7/-7/-6.,6.,6.,6.,7/-7/-80.80.80.80.80.80.80.80.5-+5-+5-+4,*4,*4,*3+)3+)6.,6.,6.,6.,6.,6.,6.,4/,30+30)30)41*41*52+52+52+52+52+63,74-85.85.96/96->7-?9-@:.B<0E=2E=2F>1F>1G=1G>/H<.I=/I=/J>.L@0JA0LE3NE4UD4^D3iD2sB1~A/†?-Œ9)”9'9*£<-¬@3³E8¸H<ÁF>ÒDCÚACÞBCâDCçCDìBCó@C÷?Aú;@ù:?õ;@î>@åA@ÚB=Í@9Â@3µ=-°@,°@,°@,¯>,®?,®?,¬?+©@-¦?,£@+ @*œ@+˜@*–@)”?*‘>,‘>.‘>.=-=+=+<*<*=+=+<*<*’<+‘>,”>-’?-•A6–?5œ>2¦@4²B6¼C8ÁC7ÂB7ÂF<¹I>¬L@—K>|F:b@4L:.A7-@85>96=77<74:5294183083062/62/62/32.52-21,21,12--2.-2./1./1.00.00.10.10.5106005//5,-4+,6,-:01D22T71c;3qA7{E;‚HD‰RU_l‹i‚ƒs˜}y«x}µowµ`f¢QR‹LEyL@pL@hPEgQFfLC^GBVMLZ^^fjnquyxy~xz€vwzokoa`bTWYLTTL]WY]V]]V^------.-+/.,0/-10.3/,40-3/,3/,4/+4/+4/+4/+6/)6/)4/,4/,3.+3.+3.+3.+4/,4/,50-50-50-50-50-50-50-50-3.+3.+2-*2-*2-*1,)1,)1,)4/,4/,4/,4/,4/,4/,4/,4/,41,41,41,41,52-52-52-52-52-52-63.63.74/85096196/<5-=6,?8.@9/B:/C;0C;0C;.D:.D:.F:.G;-H<.I=/J>0I@1JG6MH5RG5YF5bE3jD1uB/|?,‚;)‹:)“:*š=,£B2¬F8²J=»I?ÌGBÔDCØDBÜDAâBBéAAð=@ô<>ù:?ø:<ô<>í?>áB>ÓC:ÅA5¹?0²?-®?,®?,®?,­>-¬>-¬>-ª?-¨>.¤?- ?,ž?+š?,—?+•>*“>*‘>,?.>->->-Ž=,Ž=,Ž=,Ž=,Ž=,Ž=,Ž=,<,>-‘>.?.”B4—A4@1¨@3¶A7¿C9ÅB8ÄA7¾C;·H?¦LCJ@tE;Z>2E9-<5+@93@85?75>63=52<4194083/62/43/43/23.32.12-12-02--2.,2.-2.-2./1./1.00.10.3205105104..2,,4+,7./=/.N5.Y9.e=1n@3sB;yKK€Zeƒg€p—zxªu{·ks´_d¦TT”OGƒLBwNAmNBhMAeJA`GBYGFXKKWMPU]cc`fbbia`f\Z`TW[MUXMXXP^YV`WX`WZ,,,,,,.-+.-+/.,0/-3/,3/,2.+2.+3.*3.*3.*3.*5.(5-*3.+3.+2-*1,)1,)2-*3.+3.+3.+3.+3.+3.+3.+3.+3.+3.+2-*2-*2-*2-*1,)1,)1,)0+(3.+3.+3.+3.+3.+3.+3.+3.+41,41,41,41,41,41,41,41,41,52-52-63.74/85085085.;4,<5+=6,>7-@7.A9.A9.A9.C9/C9-E9-F:.G;/H<.J>0HA1JG6IH6NG5VF6\E3dC2lA0t?-{<+‚;)Œ;*”=,œ@1£F5ªJ:´J=ÄH@ÌEAÑFAÖE@ÞCAä@>ì>?ñ:<÷;<ô:;ð<=é@=ÜC=ÍC8¾@2±>,®?,«@,ª?+ª?+ª?-©>,©>,¨?,¥>-¢?, ?,›>,—?+•>*“>)‘?*>+>->->-Ž=,Ž=,Ž=,<+Ž=,‹<+<+‹<+‹<-Œ=,>/Ž?0”B4—A2 @2¬B5¹C9ÂC:ÅB:ÂA;ºA9±I@£NGNEoG=S?4A;/96-@93A75?74>63<4194083/74/43/43/34/23.23.02-02-.3--3/-3/.3/.3/02/02/11/11/21/32032040/2.-1-,4..8.,G4-O4)X8+`<0e?6mGFyYd‚k…€uŸ||²w|¼nu»dh¯[[¡RMLB~OArL@hI=cH>`HB^ECX@BO;@FBGJDMJJQJJQIIQFKQEOUIVWO^YS`YS`XU++++++,,,---/.,/.,0/-0/-1-*1-*1-*1-*2-)2-)2-)2-)2-*2,,1++1++1++1++2,,2,,1++1++1++1++1++1++1++1++2,,2,,2,,1++1++1++0**0**3--3--3--3--3--3--3--3.+41,41,41,30+30+30+30+30+41,41,52-63.74/74/85085.:3+;4,<5-=6.?6/?6-?6-?7,B8.B8.E8/E9-G;/H<0J>2H@3HE6GE6KE5QD4XC2_B2f?.n=,v=,|:*…9)Œ;*“=.›B2¢F7¬F8¼G=ÂF>ÉF>ÐE>ÙD@âC?é??ð>>ò::ñ;:ì<<äA<ÖC;ÆD6µ@/ª=)ª?-©@-©@-¨?,¨>.¨>.§=-¥>-£=. ?.ž?-š?-–?,“>*‘?*>)>+>->-Œ=,Œ=.Œ=.‹<-‹<-‹<-Š=-Š;,‰<,Š</Š=-‹=0Œ?/’C4˜B3¡A3®B6¼C:ÂC<ÄC=ÀC=¹FA²QJ¥XRXQsRIWI>CC7<?6>93@72>71=60:5/94.83/63.43.43.34/13.13..3-.3-.3--3/-3/-3/-3/.3/.3/02/02/00.11/22021/0/-/.,2.-4/,?0+D0)K3)T8-Z<4dFFu]jƒs‰€«…„¾~ƒÇtzÆmp½ce¯VSšLC‚K?qI<gG;cE>_FB]DBW?AN;?H:BE>HGDMHGQIGQHJRGNVKUXM^ZOaYNaXO++++++,,,---.,-/.,0/-0/-1-*1-*1-*1-*2-)2-)2-)2-*2,,1++1++0**0**1++1++2,,0**0**0**0**0**0**0**0**2,,2,,2,,1++1++0**0**0**2,,2,,2,,2,,2,,2,,2,,2,,3/,30+30+30+30+30+30+2/*41,41,52-63.63.74/85085092,:3+;4,<5->5.>5.>5.>5,B8/B8.E8/E8/G:1I=1J>2I?3FB6FB6JB5OA4TA2\@2b>0j<-q<.w9*}8)…8(Œ:,•=/›B4¤B5²F:ºE;ÁF>ÊG?ÔG@ÞFAçCAîB@í;;ë;;ç>;ßB;ÑD:¿D4¯@-¤>(¦A-¦A-¦A-¥@.¥@.¤?-¤?-¤>/¢>.Ÿ@.œ?.˜?-•>+‘?*>)>+>->->-Œ=,Œ=.‹<-‹<-Š=-Š=-ˆ<.ˆ<.ˆ<.ˆ</‡>/ˆ>1‹?1‘D4–C3¢B4­C6ºC;ÁD>ÁD>»EA¹PL²[T¥e\‘f]u_T[UIGMACI?<92?82>71;6094.74-63.43.43.34.24/13./4..3-.3-.3--3/-3/-3/-3/.3/.3/02/02///-00.22022010./.,0/-3/,8,,;,)C0*K70S<6^IHtbn‡z”Š¶ŒÆ„ˆÏz€ÌrxÆik¶WWŸID„E=nG<dD<aC>^CAY@CV@DP>EKGQRKWUQ^WU`XS_UR^TT^SY_S^[LaZJaZJ,-/,-/,-/,.-------.-+.-+/.,/.,1-*0,)0,)0,)/+(/+(/+*/+*/+*/+*/+*/+*/+*0,+/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*0,+0,+1-,1-,2.-1-,1-,1-,1-,1-,1-,1-,1-,0,)1-*2.+3/,3/,3/,3/,3/,3/,3/,3/,3/,40-51.62/74/80-92,:3-;4.=4/>5.>5.>5.?4.?5,B5-C6.D7/F90G:1F<2G>5H@5J@6P?5T>3X<1^90b6+m9.t8-|8+ƒ9,;/“=0˜?1ž>0§A3­A4µC8¾E:ÊG=ÔG>ÞE?åC@è@?êBAæDAÚE>ÈD8·B1ªA.¢B,¢A.¡@-¢?,¢>.¡=-¡=-¢>. ?/œ<,š=,˜<-”=,“=,=+Ž=*Œ>*‹<+‹<+Š=-‰<,‰<,ˆ;+ˆ:-‡;-‰=/†</†</‡=0ˆ>3ˆ@2‰A5‹A4‘E5—D4£E9±I>ºG@»D>»EA¸MG´ZR¯f_£qf‘sh~rdjj^V^SIQFLLBJF=B>5<8/95,74+63,33+43.34.14-14-02-/1,,1+,1+-2.-2.-2.-2./1./1./1./1.02/02/11/11/11/11/11/40/4+0;/3A32C4/J;6]OOymy‹…Ÿ“»”–ϐ•Ùƒ‰ÓtzÆjn·_b§Z[”LItHBdA>]>>X?BUAIVLU\U`bbqnn}xv†|rulyoguh_k_W_P]\Hb\Fc]G,-/,-/,-/,-/------.,-.-+/.,.-+0,)0,)/+(/+(/+(/+(.*).*).*).*)/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*0,+0,+0,+1-,1-,1-,1-,1-,1-,1-,1-,1-,1-,0,+0,)1-*2.+3/,3/,2.+2.+2.+2.+2.+2.+3/,40-51.62/80.91.:2/;4.=4/>50>50=4-?4.?4.A4.B5-C6.E80G:2H;3H>5H>5L=6O>6Q=4V;2Z90_7/h70p7.w7-9-‰9.<1–<1›=1¢@3§A3­C6´D8¾E:ÉF<ÔE=ÛD=ßD@àE@ÞFAÔG>ÄF8²C2¥B-žB-ŸC.žB-Ÿ@.ž?-ž?-ž?-ž>.ž>.š=,™>,–=-”=,=+>+Œ>*Œ=,‹<+Š=+‰<,‰<,‰<,‡;+‡;-…<-†</…=/…=1ƒ=1„>2†@6‡A7ˆB6“G9˜F8£G<¯J@¸IB¸GA·LF³TN±d\ªqfŸ~oo|mmseZfZNXMLNAKI=EC7@=4=:188.44,11)23-23-03,/2+/1,.0+.0+.0+/1.-2./1./1./1./1./1./1.02/02/11/11/11/11/11/2015+49-7<23?53H?:^VTxr|Šˆž““¹”—ʐ–Ô„‹ÏyÂqy¸kt­hnž\_€XZqSUjRWjT^hZgmfvvr‚tˆ~’‡ƒ•‰~Žw‡zr€qfteZeT[ZE`Z@b\D,-/,-/,-/,-/,-/,.-------.-+.-+.-+-,*/+(.*'.*'.*',+),*+,*+,*++)*+)*+)*+)*-+,-+,-+,-+,-+,-+,-+,-+,,*+,*+-+,-+,-+,.,-.,-.,-.,-.,-.,-/-./-./-./-./.,0,+0,+1-,2.-2.-2.-2.-1-,1-,1-,1-,1-,2.-3/.40/51.80.91.:2/;30=31=4/=4/=4/?40?4.A4.A4.C60D71F93F93H;5J;6K<7N=6P;6S:5V72[6.c60k6.t5,}7/‡9/;0”<0–<1?3 @4¢@3¨@3±C6ºD8ÅE:ÍD:ÕF@ÖG?ÔIBÌI?¾E:®C3¡B.œA.B/œA.œ?.›>-›>-›>-›>/›?0˜<-–=-”<.“=.>-Œ=,‹<+Š=+‰<*‰<*‰<,‡;+‡;-…<-„:-ƒ;-„<0‚<0‚<2‚>3ƒ?4…A8‡C:ˆD9”J=—H; H>«JC²KF³LG²SM®`V­sg¦qŒz‘Ž{‚‰ws€ocqbXcUNRDMN@HI;DD8@@49;.46+/1&01)01)/0*/0*./*./*//-//-//-.0-//-//-//-//-//-//-00.00.00.00.00.00.00.1/26+97+98/4;63HE>_^Yzz|‹š”±’—¿•Ç„ŽÃ}‰»{‰¶|‹²}Œ«}ˆšwƒq~‡n}‚n~~pƒyŽ…ƒ™‹¡”‘¨˜¥”ˆŠƒ–ƒ|{j{i\hTXX@]Y<_[@-.0-.0-.0-.0-.0-.0.......,-.-+-,*-,*.*'.*'.*'.*),*++)*+)*+)*+)**()*()*(),*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+-+,-+,-+,-+,.,-.,-.,-.,-/-./-./-./+*0,+1-,1-,1-,1-,0,+0,+0,+0,+0,+0,+1-,2.-3/.40/91/:20;31<41=31=31=31=4/>3/>3/@2/@3-A4.C60D71E82F93H94I:5J;6L:6N94Q83T50^72e60o6/x8/‚90Š:/<1”>1™?4›?4›?2Ÿ?1¥A2®B5¸D7¿E:ÇG>ËH@ÊJAÃI>¶F:ªB5žA0™@.™A-™A-˜?-—>,—>,™>,™=.—>.•<,”=,“=.=-Ž=,Š=+Š=-‰<,ˆ;)ˆ;)‡;+‡;+„;,„;,ƒ;-€;,;/€<1€<1>5ƒ@7ƒC:‡D<ˆE<”L@–H<žG>§JC®LI®QL­]Vªj^§€o¡yšš‚›ƒ†•€z‹xm{jbm]SZJQUFKO@EI:@D6;=057,13(01)/0(./).-(.-).-)/.,0/-/.,/.,/.,/.,/.,..,/.,..,0/-//-0/-//-//-//-//-2-17,:6*83-1961HJ?bfX{€z‹““£–°Œ•¶ƒ²¯…™²¤¶’¨³“¨«¤£Œ ž‡ž˜‚™}˜‰œ‹ˆ£”°š˜±›”­—‹£…›„~‘{k|iXfOSV;ZV9^Z=+/2+/2+/2+/2-.0-.0-.0-/.------.-+-,*-,*,+),+),+),*+,*+,*++)*+)**()*()*(),*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*++)*+)*+)*,*+-+,-+,.,-.,-/-./-./-./+*/+*0,+0,+0,+0,+/+*.*)/+*/+*/+*/+*0,+1-,3/.40/901:20;31<42=32=31<20<20=20=2.?1.?1.@2/A30B5/C60D63C84D95E:6G96H94K84N50X72_60i70r7/}:1†<1Œ>2>2—@6—?5—?5—?3œ@3£C5«C6³E8ºE;½G=¾H>¹G=°D8¥@4›?0—@/—@-—@-–?.•>-”=,•<,–=/–=/•<.“;-’</Ž<.‹<-‰<,‰<,ˆ<,‡;+†:*†:*†:*ƒ:+ƒ:+‚:,€:.€:0€<1€=4?6‚B9ƒD=‡F@ˆH?‘KA’H=šG?£LE¨PL¨XQ©f]¦vh¢Œw›™€•¤‡Œ£‡†ƒ~‘}t‚qjue\eRV_LNUCEL:?F6<@27:/58-01)/0(.-(/,'/+(/+*1-,2.-1-,1-,1-,1-,1-,/.,1-,/.,2.-0/-2.-0/-0/-0/-0/-2-18,:5)51++66,GL8`hQzƒpˆ’‡—–š ™¥†–£‡›¢ª©›¹± À³˜·¨š¶§›·¨˜·¥‘±œ‰«’‰«±”’³–•´•­ˆ¢…„›{’xgzdTbIQT9VU9XW;,03,03,03,03./1./1./1./1....../.,.-+.-+.-+-,*-,*-+,-+,-+,,*++)**()*()*()+)*+)*+)*+)*+)*+)*+)*+)*,*+,*++)*+)*+)**()*()*(),*+,*+,*+-+,.,-.,-/-./-./+*/+*0,+0,+/+*/+*.*)-)(0,+0,+/+*0,+1-,2.-3/.40/:12:12;31<42=32<21<20;1/=20<1/>0->0-?1.@2/A4.?4.@51@72@93A:4B94C84F74H5/Q51X5/a6/l8-u9.€</†>0‹=1“?5•>5“?5“?4•B4šB4 C4¥D4¬B5°D8´E:±E;ªB7¡?4š>1–>0•?.”?+“=,“=,“<+“<+”<.”<.“;-’<-<.;-Š;,‰<,‡;+‡;+†:*†:*…9)ƒ:)ƒ:+9*9-9-€<1<3~>5€A8‚C<…F?ˆIBŠICŽJAG>—HCŸNJ§VR§`Z©pe¤ƒrœ”}–¡ƒ¨Šˆ§ˆƒ „~–~yˆup{jdp\]iSR^JJS@BK:>E5:@29<134,22*1.)/,'/))0**1++2,,1++1++1++1++1++0,+0,+0,+1-,1-,1-,1-,/.,/.,/.,2,.8*75(13+(56&EK1\gGu‚d†“yŽ›Š‘ž”Ÿš‹ž˜¥——´ž¢Å¥©Î­¥É­¦Ê°§Ë±¤È¬—¾Ÿ‹³‘ˆ°²Ž²Ž²¬Š„¡‚€™{wŽrdx]Q_ENR7QQ5SR6,03,03,03,03./1./1./1./1/////////////.,/.,.-+.-+/-..,-.,--+,,*++)*+)**()+)*+)*+)*+)*+)*+)*+)*+)*+)*+)*+)**()*()*())'()'(+)*+)*,*+-+,.,-.,-/-./-./+*/+*/+*/+*/+*.*)-)(,('0,+0,+0,+0,+1-,2.-40/40/:12;23;23<34=32<21<21;10<1/<1/>0/=/.>0-?1.?1.>3/=60;60;83<94=:5>93@72C60G4.O4+Y4+d5+n8,x:-;.…;.<4‘<5>3@3A2“B1—A2™@0 >1¦@4ªB7ªB9¥@6Ÿ>5—=2•?2’?-’?-‘>,=+‘;*‘;*’<-’<-‘;.‘;.Ž;-;-Š;.ˆ:-†:,„;,…9)…9)ƒ:)‚9(‚9*9*~8,~8,€<1€=4€@7B9„E>…IAˆKFŒMFŒIAŽH@˜JF¡SO¨]X©hb©{n¤Žy™œ’§†‰¬‹‚ªˆ€¡„™€~‹wxnjxadr[ZhQQ]IITCCL;>D6<?467/44,30+0+'1()1()2)*4+,2)*2)*2)*2)*2)*0**0**0**1++1++0,+0,+0,+0,+0,+1+-6)25)-4-%46!AH&Wc;q€W‚’mœ} †¢Œ‹¡Š¨‰•¸Ž È”¤Ð©Ó«¨Ò®§Ñ­¡Ë¥“½—…¯‡‚¬„‡°†ˆ®…Œ±ˆ‹«†Ÿ{{”tqˆk_sWM]BIO3JM0KK/./1,01./1./1.0/.0/.0////00.00.00.0/-0/-0/+/.,1-,2.-1-,1-,0,+/+,,*++)*+)*+)*+)*+)*+)*+)*+)*+),+)*+),+)*-(,*(),())'()'((&'*()+)*,*+,+)-+,.-+/.,/.,-,*-,*/+*/+*.*)-)(,(',('1-,2-*2-*2-*3.+4/,50-61.;31;31<42<42=31<20;1/;1/<1/<1/=/.=/.=/,>0-?1.=2.=4-=6.;819919:29:2;81?6/C2+J1,T2)^3*g7+o9-u=.z</‡=4‹=3‹?2Œ@2Ž@3@1’?1•=/˜</œ>2¢B6¡C7žB7™?4–>2”>1”>/“=.=-<,Ž=,Œ=,Œ=.>/Ž<.Ž<.‹</Š;.‰;.ˆ:-†:,„;,‚:+‚:+€;+~;*~;+|:*}9,|:.€>2>5€@6‚C:ƒG=…JB‡NE‹OGŒICŽID“PJ˜\Th`uiž‡už˜€”¡…ª‰‰¬‹ƒ¨‡€¡„~›zytˆoj~cby]\pUUgOO\HGRAAH8=A388.85.7/,3+)2()2()3)*4*+0*,/+*0*,0**0*,0*,2),2),3*-1+-1+-1+-0,-0,-0,-1+/4)/4*+4-%46!?F%T`8m|Qg‰™tžz¡~ˆ¡zŠ§{‘¶‚›ÄˆŸËŸÉ™žÇÆš˜Á•Œ·Š‚­€…®‚Œµ‰„«‰®…‰©‚~œxvolƒfZnSJZ?GM3FJ1DF./0+.0+/0+01,01,01,12-21-32.43/43/62/51.41,3/,4/,50-50-4/,3.+2,,1-,0,+0,+.,-.,---/--/,,.++-*).))+.)/.)-/(/.)-/)-.)-.*+.*+/+*/+*-,*.-)--+-.)./*./*------.,-/-./.,0/-2.+2.+2-*4,)5-*6.+8/*:1,:1*;2+=4-=4->50>50>50=4/<3.;2-=2.<1-<1/;0.=/.>0/>0/@1.A0)@2);4*77-39-19.47.83-?-)F*'L)'S*&Z/(`5,d<0k@0yA2@/†A2‹B3@3“=0“;/’8-“;/“=.”A1•B2”C2’A0‘>.‘;,—;0—:2’;1Œ=0ˆ?0„?/‚?.ƒ>.ˆ@1ˆ?0Š>1Š<0‰;/ˆ:.†:-ƒ;-{9+~@3w<.q7)w>-w>-v;-}?2{;/‚@4‡E9‰I?ˆLA†MB„PC‡NE‘KI—SP”bY‘paŠ}j‡‰q‰–zŠŸ€¤‡¦‰Ž§Š§Œ‡¤ˆ~Ÿ‚uš{o—uiib„_[zXZsUZnSWeNPWEJK=C?6@93;0.6**3')3'+3'+0'*++-)+***,*(),'+.(,1(-2'-3(.3(.3(02(00)00)0/*1/*0/)-1++0-(//#<?*V\@pzX‹gœu‹sŒ¡vŒ¦wŠ¨v‰ªuŽ´{™¿†ŸÁ¾–ºŠ³ƒˆ®}…«|ƒª{…ª~ƒ¨}ƒ¦~€ž|y”ur‰mh|cYkUMZFIQ<CH4?B//0*01+01+01+12,12,21,32-43.43.74/74/63.52-50,50,7/,7/,6.,6.,3.+2-*0,+0,+.,-/-./-0--/-,1+*/)*.)(-.)//(//(//(//)-/)-.*+.*+/+*/+*.-).-)./*./*./)./*.....0....../.,/.,1-*3.+5-*5.(7.)8/(:/);0*<2)=3*>5,>5,>5.>5.>5.=4-<3.<3.=2.<1-;0.;0.=/.=/.>0/@1.C0*C0)A2+>4+:6-95,:3-<1-?-+D*)I*(N+'T/)Z5-_:1e=1pA1x@/€A0ˆ@2?4‘=3“:2“;19.‹</‹>.Œ@0A1A1ŽA/‘?1•;2•;2<2‹=0†>0ƒ@/?/‚?.…@1†>/‰=0ˆ<.‡;.„;,‚:,€;,x8,x<1s9-n9+s>.s>.r8*u:,}=1ƒA5‰E:‹I=‰K@†NA„PC‡NE‘JH—SR’f]Œwfƒ‡p“w€Ÿ„¦…ˆ§ˆ§Š§‹§Š¤‰Ÿƒwœ}o˜ve’i`‹`Y‚ZY{X\wXZnSSaJNUCFH;C@7<737/-3*+2)*1(+.(*+++)*,+),+),-(,/)-2(02(03(02(02(10)1/*1-*1-*3-*12-12.-0,)--%8:,SXBox]€‹i‡–o†™l†žnˆ£p†¤p„¥pŠ­w“¶€™¹‡–¶„“³¯|†«x‚¨w‚¨yƒ¨|ƒ¦|‚¤{x’uqˆnh{eYjWMZHEL<@D5;=/12,12,12,12,23-23-43.43.54/54/85085085074/72.61-80-80-7/,6.+4/,3.+1-,1-,1-./-.0.1/-0--/,,.+*/+)./(/1'//)-/)-/)-/)-.*+.*+0,+0,+/.*/.*/.)0/*/0*/0+//-///0./0.//.,/.,1-*2-*6.+70*90+:1*<1+=3*>4+?5,?6-?6-@7.@7.?6/>5.=4-=4-=2.<1-;0.;0.;0.;0.<1/?1.C2+E0+H/+K--L,/K+.I*/E+.A-,@.*A.(E.(M.)X1*b3-g5.j:,o;-w;0=4‡<6Œ<5:4:4Š<2‡=2†>2…?3†A2‰A3ŒB5C5Ž?2=/Œ>1Š>.‡>/…>,†=.ˆ<,‰;.‰<,ˆ:-†:*ƒ;,<)~=+{<+}=1z<1v:/u;/x@1x@1v<.v;-?5‚B8‰F=‹H?‰JA‡KA‡NCŠNF‘JH˜TQ–f\ve‡…n‘t{¥…¨‡ˆª‰ª‹©ŒŽ¥‰ˆŸƒš}y˜xi–ma’e\‹a\†`]‚`[|]UrVPhPFYEBP?:D93:2.3,*/)*,)**(.((1&*1&*1&*0'*1(+2).1+//*.-+.,+0+,0)-0(-0(-1)-01/23/./+(**"57*PUAmu]|Ši€k~“j™jƒŸn€ n~ m€¥q‡¬x¯|‹­zˆªw„©u§t§u¦w¦y¤y€¢}}›yw‘tp‡mh|cYkUMZHDH9=?299-23-23-34.34.34.45/54/54/650761961:72:72961940940:2/91.91.80-50-4/,3/.3/.3/03/01/01/20.1..0--/-+.0)02).0*.0*.0*.0*,/+,/+*1-,1-,0/+0/+10+10+01+01,11/1111/010.10.2.+3.+3.*92,92,;2+<3*>4+@6-@6-A7.B8/A8/B90A8/A81?6/>5.>5.=2.=2.<1/;0.;0.<1/=20=2.B3.E2.L..R+0V'/U&.P'/I).C/.<1+;2)?2)H1)R/)_.*d/)i9/k;/u<3}<6…;8‰;9Š;7ˆ:6‰>8†@8†B9…B9…C7†B7‡A7‰A5ˆ@1ˆA/‡@.‡@.‡>-ˆ<,ˆ;+‰:+Š8*ˆ9*‡:*ƒ:)<){=(x>(x>*‚>1{7.z7.z<1v;-w=/|A3{@2€B7ƒE:†H=ˆJ?‡KA‡MBˆODŠQHŽSK\QŽh[‹tb…g€‹m~–vž|ƒ§ƒ„ª…‡ª‰‹ªŠ‹¦‡‡ ‚ƒš~}˜yq•oi”ifgfŒeg‹gd†e_~_Zw[PhPK^JBP?8D62:/.4*,/(+*%2&&5%(4%(2&(1'(/)+/+,.,-+++*+-*+-(,-(,/',/',/*+-.*+1++0+'.+"88,PUAiqYvƒe{Œhyh|–iœl}Ÿmyžkz m¥r‚§t€¥q}¤o}£p}¥s~¥v~¥x}¤x|¡x}Ÿzz™wuqn‡je|bXlSN[GDF9?=1:8,45/45/45/560560560761761872872;83<94<94<94<73<73<41<41;30;3083072/61.61.5106216213123121/00./1-.2,04+.4+.4+.2,.2,,2,,2,,3/.3/,3/,3/,21,21,32-32.22022032032051051.61.61-;4.<5/=4-?6-A7.B8.E8/C9/D:1D:1E;2D:1C90B8/@5/@5/>3/>3/=2.=2.=20=20>31>31@51E31M02T,4X)3W(2R)1K,1B30:6-77+:6*B4)M2)X/)^/)f:1j;3s<7z=:‚<:†<;‡;;†:::6>8A8B9ƒC:ƒC:…@9„@7‚C2‚C1ƒB0„?/†=.ˆ<.‰:-‹9-‰7+‡8+…9+‚:+~=+x>*v?*x>*9-|/'‚8/„>4w4+s5*}A6}C7E:€G<‚I>ƒJ?„KB†MD‡QG…WJ}`N{iQ€pYu]‚|bƒ†i†“uˆ|ƒ£~§€‚ª…‚¬†©„¥€{ž}z™wz’pzlxlwŒkumr‰lm„gkd`rX[iRR[HHL=@@4;7,70&5*$6('5''3''1'&.)&+*(++)+-*',('+*)+*)+***,+),+),-)*,#$2*'50*86*BC5UZFfpWn}^tˆeqŠbuex˜ivšjs™htšiwŸkz¢nx lwŸkx ny£q{¥u{¥w|£wyžuzœww–tsŽom†hd{_WkPN[GCC7>:195,560560671671782782872872983:94=:5>;6>;6>;6>95>95?74?74>63=52;63:5294194184184395484373243151240/6-06-.6-06-.4..4..4..4/,40-40-40-51.32-32-43.43.43/331542540841850940:5/=60>7/@7.A9.C9/D:0G:1H;2F<3F<3F<3F<3E;2C90B71A60@51@51?40>3/>31?42@53?53@72C52I35P16T/6S.5P05J22C52=90<:-=:+C7)I6(Q3)W2)]2+d3,l50v64}77‚87ƒ77‚66~75}:4}<6}>5€?9ƒ@8†?9…A8€B3€C1B1ƒ@0…=/‡;.ˆ:.‡9-…9,ƒ9,‚:,<,|=,y>,x?,|=,‡5*‹2,¡LE¨XQ‹A8|90‚F;€K=yH:zJ<{M>|N?}OBQE‡UJ‚_LrkOosR|uX†v\yb˜iŸ‹s ”z––z|‹£ˆ¨…§‚€¤~{Ÿy~™vƒ‘p…ŽoƒŽp€pp{ŽpwŒms‡kj~bfv\_hSV[GOM>GA3@6*=0';,'9+(6+'3+(/,',-'+.').().(+-(-,*/+*3)*4(*7'*7'(3($<3,E>4IG:QR@]bKgqVjyZn‚]k„\l‰]p‘bq•eo•do—eršfuŸmrœjq›itžnx¢rz¤vy¢vyŸvvštw™vu”rokj…fc|^UlON\ECC7@91;4,671782782782893893983:94:94;:5>;6?<7?<7?<7@;7@;7B:7B:7A96@85=84=84<73<73<73<74<74<74;639529338308/09/.8/080.80.80.61.61-61-61-52-52-63.63.74/54/540540841952:63<94=84@93@70A8/C90D:0G:1H<0I=1I=1J=4J=4J=4I<3F<3D:1B8/A7.A81@70@72?61?61@72@72A83?74@85B86D97G96H96H96H94E80E8/E9-E9+G9,I9*K9+Q7*Z/&d/'n3-z63ƒ98‰;9‹;:‹=;‹A>‡@:‚=6<3:3‚<4ˆ=7‰@7ƒA5B3‚@2ƒ?2ƒ=1„<0;/€</~<.|=.{=.|>/|>/}>/=/†9/1+£;8ÎkfÛ~y­ZTŒC<ˆLAN@tJ<qM=pQ?qR@tS@zWD‚[J~eOmsOo{U„y[˜u_©oc¶mf¾qk½wo»‚w±Œz§—~žŸ€–¡¡~…y…™vˆ‘r‹rˆ‘r„“r€–r|–sx“pt’pm‹ii„edx\]kQV^GMP;ED0B;+@3+?2,;0*70*30)00(./'./)/0*2/*6,+:*+>(+C&+E$+C&(F5-LC4VP@[XE`bLgmQiwVj{Wl‚[g‚Wf†WlŽ\o”an”an–bršfsko›jo™irœnvŸsy¢xxžwu›vs—su—vs’rn‹li„cb{[TmMM]CGH:E<5@707827828938938939:4:94:94;:5<;6?<7@=8@=8@=8A<8A<8C;8C;8C;8B:7?:6>95>95=84>95>95>95>95=85<73:51;30:0.:0.91.91.91/91.91.72.61-61-63.63.63.74/74/74/540651952;83<94?:4B;5B;3A8/B:/C9/E;/H<0I=1J>2J>2K>5K>5J=4J=4F<3E;2C90B8/B92B92A83@72@72A83B94A:4?82@93B;5D=7F=6G<6K<5N;4M6.N6,Q6+Q6+Q7*P9+P9)V6'f6*r6,~;3‰@9•D@›HDŸJGŸLF QJ™LDŽD;…;0„7-…7-Œ91=4ˆ>5‡>7†=6…<5…<5=4}=3z>3x@3vA3x@3z>3<3ƒ:3ˆ73’42˜(&³=;í{zþ•’Åhc–G@‰K@xH:nM<jQ=fT>hV>lX@t[E`L€hPysQ„wUžt\´l^É__ÓV\ÙQ[×T\äouÙzx̆|À~µ•€«•}£’xŸvœŠrœŠt™u•u‘‘u‹’s†‘s‚‘r|‹ny†ju{amoWgbN_TBUE5R</O4-M1-I0,D/*>/(9/&7.'6/'81):/)=.+A,+F)+H(+K'+H)'TB4YQ<d^HgeLilOnuVm{Xl~Xk„Zg„Vg‡Vm]p•bq–br™duit›lo™ko–jr™mwvyŸxwšyt–us”uu”usqoŠki„ea|[TmMM]BMN@KB9F=69:49:49:49:49:4:;5=<7=<7=<7=<7@=8@=8A>9B?:D?;D?;E@<E@<E@:D?9D?9C>8E>8D=7B;5B;5B;5B;5B:7B;5A:4A:4A83A83A83@72@64>71>71>71<71<71;60:5/96/85.74/74/961961961:70<71=82A:2B;1C:1D<1F<2J>2K?3L@2N@3N@5N@7N@7L?6K>5I<3H;2E;2E;2B90A8/@91?80?80?80@93@93<5/MD=M@:K:3T@9R62O0+a<6\3-`5.`4+^/%^/%f6,m=1q=0ŠJ>G;—I?ŸKAŸF>›>7›>7žD;¦ND¤PE¨VJ«YK¥OB™A5—;0›>6“98‘98Œ65†52†84„?8|@6r>1rB4oA2q=0v:0‚72Œ43’-1š',ÈHIèbaÅEDÍWUáyv«VO†F<„VFmR=i[AibFjeHj`En^D|dLjT’bL²m]ÑnhÛX]à;Lç-Eò+Hõ0Lö@YìI\ä[eånrâ{|ۀ}ۃ؉‚Ãv½wº„x¶†x²ˆx­Šw¨‹y¥Œxš€o™~m˜xi“oabY„UM{IBxA>u:<i.2d04a03P&'G%#L0-M51G4.E2,D1-F1.F.,F*)K--Q6/YH4`W:f_BgdEkoNu|Zu„]oYlƒWl‡Zp]r’as•bu—dw™fv›hq•is˜os—qq•os•tw™xz˜|y–zx•ysrm‡jk„fi‚dazZTmMO_DNO?SK@PG>9:49:49:49:4:;5;<6>=8?>9>=8>=8A>9A>9B?:C@;D?;D?;FA=E@<E@:E@:D?9D?9E>8E>8E>8E>8E>8D=5C<6C<4D;4C:1B92B92B92B92B94A83@93@93>71<71;60;6096/85.74/74/96196/96/:70<71>:1A:0B<0C;0E;/I=1J>0L@2L@2N@3N@3N@5N@7M?6K>5I<3H;2E;2E;2C:1B90@91@91@91@91A:4B92D93J71L/+V.,j76u99{;;…ECƒD?„H@…I?†H=ŽJA™NH¡PL¢OG“D7”B4šB6ŸC8 >3œ7-œ7-Ÿ=0£C5›?0™@0 E3¤G6§F6®H:¶NE´LM¯HL¥@D™9;‘98>:ƒ@8x>3o>0zI;…LA„?8„,+’).³9DÒLWÜKNßMMÊ>=À@?Ð`\³YQ„@5‚TDyaIe[@_^@ihIslOvgJ~dK–cN¼dXÖ_[æUZëANò,Eþ%Dÿ&Hÿ'Iÿ.Nù3Nò<RðIZëP^åQ]ãS]àXbÛbiÕflÔjnÒmqÏqrÍutÉxwÇywÊ||ÉyzÈvxÇqtÆkrÅfnÅakÃ_kÅ`n³S_¥KWšGQDK†EIm69HT1-M0*H1+J6/K81K:2O>6UE8[O5cZ9gaAifEnrOz‚]z‰bt†^r‰_p‰_p‹`qŽ`u’dw”dw”dw”fs’ix–rz—xy–x~˜}ƒ„…‡ƒ›…™ƒz’zr‰om„hhd`y[TmMNaERSAVPBUOC8938938939:4;<6<=7?>9@?:@?:@?:C@;C@;C@;C@;D?;D?;FA;FA;FA;E@:E@:E@:G@8F?7JA:I@9H?8G>5F=6E<3F<3E;1D:1D:1D:1D:1D;4D;4D;4C<4?80?80<71;60:5/:5/94.94.96/96/:5/;7.>7/@9/B90C;0E;1F<0I=1K?1M?2M@0P@1O?0P?5M?4L>3K=2I<3H;2E;1D:0C:1C:1A:0A:0?;2@<3@<3D;4UD<T3.j23ŽAG­LWÃUbË]fÆ_c›B>—I?–LAœNB¬RJ¾URÌOSÄLK¥@4B0 @0¥?1¥;.£7+¤8+§=/«E6¡>+˜7$:'¥>-¬A/²C2¶D:§02¯:B¹DL½LRºMR­KL–A>‚71{8/v6,x2*„1-œ37¹=GÕGWèM[âFIÏ53Ð:9½31ÈNKÆc]”J?Œ[J}cLj_CgdEtoOpQhJhN°hYäbbõQZôDQö7Iý1Jÿ2Nÿ/Mÿ+Iÿ2Oü0K÷1Jõ5Lò4Mð2Kó3Ló7Pò@XïC[îF]íIaëNcêQeèUhèVkåSjäRiãOiäNiçNlëPoòSsóVuþh…ña|å[uÕUlÍYlÉdr¨R]r-2^&'Y0,W:4Q?5B:-:8)>B1LN9VO3aV6e_?heDquT†e~ŒiwˆdxŒiu‹er‹dr‹cvŒeyhzizj€•v„›~Šž… Š’£—¨–˜©™–¦™‘¢’‡˜†zypƒmg~b^wYTmMPcGSXDXUDYVG7827828939:4;<6=>8@?:A@;BA<BA<DA<DA<DA<DA<E@<E@<GB<GB<GB<FA;FA;FB9HA9HA9LC<KB9JA8I@7H>5G=3F<2E;1D:0D:0E;1E;2D;2E<3E<5E<5@91?80=82<71;60:5/:5/:5/96/96-;7.;7,?8.@:.C;0D</F<0F=.J>0K?/M@0M@0O?/O?0O?2O>4L>3K=2J<3G:1E;1D:0D;2D;2B;1@<1@<3A=4B>5G=4S81[*&‰:?¾T`ÙTeæTgãUcËJOŸ30–:/‘>0™A3®F=ÅIGÕBHÍ>@±B7§F5©B3¬@3¯?3°>3³B4´D6²G5©B/£<)¤;(©<(«:(®6&­3(¶97º9=½7>¼7<¿<BÆJLÄTS¾XS™?7Œ4*‰,%61ÃHKÜOWâAQÛ3@Þ:9Í.*Ó84È74ÆE@È]U©WKWFwW@veI~pSpQ„fJdK©o[ÕuiîSWüDP÷;Jø9Iþ=Nÿ@Qÿ:Nù4Hÿ?Sþ9Mû4Ký3Ký1Jÿ/Iÿ0Nÿ6Sÿ3Tý4Vý6Wú7Wø9Zø=\÷>^õ@aøCfö@fõ?eõ?gø?hüCmÿErÿIsÿOuÿQuþUvòSqçTnåaxÖfv·Wb}15j0/Y2+P8,G>/>A.;D/?G/PK._T8g`CokN||`ˆŽr‡‘v}‹qzq|s|szozŠm}Œoƒ’uˆ—z•£Šœ©•£®¦±¡¬´§±¹®°·°«µ­¡«¢•¡•ƒ‘‚s„qh|c]tXSlNPdHRZCWYDYZH671671782893:;5<=7?>9@?:BA<BA<DA<DA<EB=EB=FA=FA=FC<FC<GB<HC=HD;HD;JC;JC9LC:KB9KA8J@6J=4I=1H<0G;/E;/E;/E;/E;1E;1E;1D;2D;2@9/@9/@91?80<71;60;60:5/;7.;7.;7.<8-?8.A;/C;0D</G=1G>/K?1M@0M@0NA0O?/O?0O?2N>1N=3J<1I;0G;/D:0D:0D<1D<1B;1@<1A=2A?3B?6J<3\5.w32­LSÛ]iæM_å?S×6E·&+¨1+7(’:&—<)¬@3ÃD=Ô>?Í;;±?5¨B4ª@2­?2²@5¸C9¼H;»J<¬>/ª?/¨=+¦;)ª9)°:,·;/¼<1ÊFAÎDAÐ>?Ð79Ö7<ÞBEâLNÝROÄC>ÆKCÍTLÔSMÙKJÛ?Bà5>á27Û4.Ú7.Ð1+Ï:4¿84³@9µ[P–UC{N9‚bI†kP„`F‘ZE­fTËrdçmhêEKò:Dê9Cç<DçBHçFKèGMêFMôJSóAMó8Iö5Hù2Gý/Fÿ2Lÿ8Rÿ9Vÿ9Xÿ:Xý:Zþ;]ÿ=_ÿ@bÿAeÿCiþDkþDmüCmüCoüCoüBqþCpÿGpÿ@hÿGmÿMpòIhéOkå[rÙcs½YcŽ?D`&$Q+"V@3VO=IL7@D-RJ3eYCujV€{gŽy˜œ‹”Œ‡”ƒ‚Ž€‰—ˆ›Œ™ˆ‡’‚Š“‚—žŽ¤¨™²¶§¸¼®¿Á¶ÀÁ¹ÂýÅÆÁÃÃþ¾¾°µ±¢©¢—Œy‡vi{e\sYTmORfJQ[BUZCW\F560560671782893:;5=<7>=8@?:@?:C@;DA<EB=EB=GB>GB>GD=GD=HC=ID>IE<IE<KD:LE;LC:KC8LB8KA5L@4K?3J>2I=1G=1G=1F<0F<0E;1E;1D;2C:1A:0A:0@91@91=82<71;60;60;7.;7.;7,<8-?9-A;/D</E=0H?0J>0L?/M@0NA0NA0O?/O?/O?0N>1M<2L;1I;0G;/F90C9/C;0B<0B;1@<1@>2A?3B?6N;4m84—FEÄY_ÛWbßCQÞ8FÔ3;½++­1'¡8%˜:!—9 ¤;&¶>.Ã:2¾71§7,¢:-¥9-§7,¯:1¸B8ºE;·E:®>2­?0§<,¤6'ª6)¶>0ÃD;ÊG=¾8-Î@6áD?ìBBô=Aó9>ë27Þ.0Û75âGBèSLåNGÜ=9Ù10ã/2ê67Ù1(Þ</Í,"Ð71¾3,°7/Ég\¸l\ŽWC‡ZCƒV?‰R=©\LÎlaágbäTTðEKñ=Fä;@Ù=>ÒA>ÒGDÝOMëWWíNSïDMò9Gö5Fø1Fø.Dû1Gþ7Nþ8Sý8Tý8Vþ9Wÿ;\ÿ>aÿAeÿCkþ;eý<hý>jý?mú>nø>oô=mö:jÿHrÿ;aÿ?dÿKnÿKn÷NmïUoãZnèr‚Ä`j—HKt:8\6-P9+PC2UM:^QAreUˆ|n•‚Ÿ‘¦§Ÿ §Ÿ”˜•ž›ž§¤¦¯¬¦¬¨¢§£¦¨£µ´°ÂÁ¼ÌÉÀÑÎÅÕÐÌÕÐÍÔÎÎÕÏÑÑËÏËÆÊ¿½À°²¯˜Ÿ˜€Œ~m~k`t[WnRVjNS`FV`EX`H560560560560671893:94;:5=<7>=8A>9C@;DA<FC>HC?HC?GD=HE>ID>ID>JF=JF=MF<MF<ME:LD9MC9MC7NB6MA3N@3MA3JA2JA2I@1H?0G=1F<0D<1C;0C:1C:1A:2@91?82?82<71<71<8/<8-<8-<8-@:.A;/D</E=0H?0K?1M@0NA0P@0P@0O@-O?/O=/O=1M=0L</H:/F:.E8/C9/B:/A;/A:0?;0?=1@>2@@4P91r1/¨JKÀRU»>D½06Â03Á//º1)©1!£9#œ=!™;›< ¡=#¥;%¤6%ž6) 8-¢6,¥4,­81·@:¸C<²@6±B7­A4¨</¦8+¯:0¼C8ÃD=Å@7Ã>-Ï>-Ù5+á*&í"%ú%+ÿ*1ÿ.4í)*ç.+á3,ß4,à3,ã2,é0-ç2+Ú2%Ö6&Ð2&Ï7,Å6.½>7È`UÒq¦eSVBŠM:£WIÈf[ßd_çRTèCIõFMí@Dß??Ô@<É@8Æ?9ÑFAãMLïJPô@Kù9Hý7Hü5Hø3E÷6Gú;Mú8Pü7Rû6Rý6Uÿ6Xÿ9]ÿ;aÿ<gÿ>kÿ?mÿ@qÿBtÿBuþBuüBtùBpÿFmúAaþEeÿKjþKkÿVtÿ_zõ]tòj~æp~Ónv¯]aƒCAg6/hB7sUJye\Œ}v£–­£¡²®«¶¶¶²¶¹¨°³°·½¶½Å½ÁÊÀÃÊÄÃËÉÆÍÕÎÖÝ×ÛâÚØçÝÛéÝÝæÚÜåØßä×ààÔÞÙÏØÊÅ˺ºº¢§£ˆ’‡t‚qexb\sW[oS[jM\iK]gL561561561560561671872983<;6=<7@=8B?:DA<FC<GD=HE>HE>HE>HE<JF=JF;KG;KG;NH:MG;MG;ME:MD;MD;MD=NC?NC=MC7MD5KB3JA2H>2G=1E=2D;2C:1C:3A:2@93@93?74<73<71>7/>7/<8/=90>:1A:2C<2D=3H@5I?3L@2MA1NA0P@0O@-O@-L?/L>1M=0L;1J91I81E80C90@9/>:/;;/<<0=>0>?1A?0S8-‡<7Ä\[ÊZY®86«.*±0*®.%­2#¬9&¬B*ªF,¦D'£A&£A&¢<#Ÿ9# 9*£;0¦:0¦7.­<4¶C<·E;°@5«=0¨</©;.¬<0»E9ÆLAÅE<¼8,¿6$Í:(Ý;.æ3,ñ+*ù(+ý&+û%'ø**õ.+ï2,ë4,è3*å3)ä2(à3%à:*Ñ2Ü?0Î6)Ç7,Â?5³A6Ñl`¿gY¥RB¨OAËcZçhbæRRêAFóBJë>Dà:<Ö<<Í@9Á>4¶8,¹6.Ä94âHHé@Eð:Fö:Hú:Iø8G÷;Iû?NþAUÿ?Vþ<Tþ9Tÿ8Uÿ9Zÿ:^ÿ;cÿ>iÿ>lÿ>oÿ>pþ?rú@rø@t÷Aq÷CjÿMmÿSs÷JhìAaýTsÿg…ÿe‚÷]wï_xèg{Üp}Æqx§gg^X{ZQ—~wª™’¾°­Ä¼ºÆÂÃÈÇÌÇÇÏÃÃÏËÊØÌÊØÑÊÚ×ÎßßÔäæÛéêàëíãëïåæòèæôèèðäæîáèíàéêÝçäØâÑÊÑÁ¿Â««©‘–z†xl{hbu_`rXbpVboScmT21/320431651875984984984;:5<;6==5??7AA9CC9EE;EE;HH@HH>HH<JH;KJ8LK7ML8NK8NK:JF:GD;JFCNIMNIPGDOD?ENE>LC4J@4KA5MC9LB8F=4B90E<5C:5@93?74@85?75<74<42C:5B94A96>95>96>:7?;8@<9B?:D@7G@6J@4L@0O@-O@+L?,G@.F?/I;0K81M53L76J88F;9B?:6904</9D4:A/69$?=(bC1¼j^¹JA¦7,¥5)§7+¥5'¥7&¨=+£8$¥<&ª@*­@)¬=)¬9&®6%«6%¤6'¡7)¢8*¦<.ª@2¬B4¬B2¬A1©>,«=,­<,³=/½C4ÄF8À>1º2&Ç9+Ì8*Ð8+Ö8-Û7-â5.é3/î1-ñ.,ô.-ó0,ñ1,ê5*ã7)Ý:)Û:(Ü9&Ô3Ð3 Ï8'Æ7'¾6(ÂB5ÏSIÈMEÏSKá\Wî]ZðPRí>Cò9?÷?GêAFßCDÕAAÈ?9»<3±;/°</²<0ÊKBÔKCÜFEá>?ë<A÷?Gý?Iù;GþDQüCQþDRÿDUý@Vü<Uÿ<ZÿBdÿFkÿAjú;gõ:gö<mùCsýIyÿKzÿMyÿMtøPsðOpçMiêMjüYxÿgˆÿ_ƒõGl÷Vwè_yÃ^n½{‡a^xp§‘„»§œÑÄ»ÝÖÐÝÜÚÜÛàÞÜçâÛëæ×ìëØîñÙñöÝó÷âó÷çôõëôôîòùôñùôðùñïöíîóèìîãééÞäæÝâÖÐÔÍÉÊ»º¸£¥ ‹‰yƒxr}op{k}†sxlv}k0./1/0320542653762873872:94;:5==5??7AA7CC9DD8EE;HH>HH>HH<JH9KJ6LK6MM5NM8KJ8KI=KJEPPRWU`YXjVTjRO`OIKMD?H?:F=8G>9H?:F=8B;5D:8A96?74?74@85@85=85;62=4/=4/=52<74=96>:9=<:>=9B?:D?9G@6J@4M@0O@-P?+L@*B?,B?.F<0H:1J65I56F35@65@<9:=6;>5@@4E<-J9)^B4€L>®QB«;- 2#¡6&£:'ž7$ž9%£>*¡;%§<(®?+³@-¶=,·9*¹7)µ7)ª9)¥:*¥:*¥;+¨>.«A1«B/«@.®?,«:(±<+¿E6ÆH9Á?1¼8+À6)Ë7+Ï7,Ð9.Ñ;-Ô</Ø:.à8/ç4-î1-ô.+ô.+ñ0+ê4)á7(×:'Õ:$Þ<'Ù6#Õ8%Ô=*Ì;*Á5&Ä:/ÏF<ÑF?ÚIFéOMóPQôGIð;@õ:AøCHåBEÚDCÒBAÅ>8·;1®:-ª<-«=.¶E5ÆL?ÔNEÜGCã?>ï?Bú?Fý?IøCJ÷CLúDPÿFTýBSù>Sü>XÿBaÿGiÿCiü?iù@jùBpüFvþJzÿK{ÿJzÿHtõKpñQsõ\zù_{úWvüNqýEkúDlïEiÚKgÈ_p±nukLI~t°–‰Ì²¥äÓÉìãÜëçæêéîêçòíâóöãùûáúÿâýÿçÿÿìÿÿñýÿöþÿùûþú÷ýúõü÷ôúõòøïðôëîñèíïæëæÝàÜÖØÊÆÅ´³¯ž¡šŽ“Œ‰…‡‚•‡‰€†Œ~/.,0/-10.21/43/540762761:94::2<<4>>6@@6BB8CC9DD:IF=IG;JH;LI8MJ7NL7NL7OL9KI:NKBTRS_^fihxmm…lkŠkhƒ`YiYQ\OGRH@KH>GG=EE<A@:<C:;?:7<74<73=82>93>:1=9083-94.;60<92=:3>;4?<5@<3E>4G?4I?3L@0O?0P?/P?-L?,<;&:=(?<+A;-B71A62>42;30=84B;5H94N2.Y,)l/.‡;=¡FE£;0¢2$Ÿ1 ¤9'¥<)ž7$ž8"¢<&¤;&ª=)±A-¶A/¹>.»<-¾:-¾</²<.¬<.©9+©9+«<+­>-­>-­<,²?-°8(¹>.ËL=ËG:¼6*º2&É9.Ð6,Ô6+Ô8,Ö:.Ø:/Ü8.ã6/è3,ð0-ô.+ô.+ð1)ç2'Ý6&Ô8"Ò7!Ý8$Ü5#Ú9'Ú>/Ó=.È6)Å9,ËA7É<5Í>8ÙEAåKIêJJéCCì?AëCCÞCAÕD?ÍB=Â=4¶:.®8*«:*¬;+¬8)ÀD8ÔNEÝIEâ?@ê=A÷BIÿHPôAGô@IùCOýGTþEUú@Uû?XþA_ÿEgÿCiÿCkÿFpÿIwÿK{ÿL|ÿJzÿFxÿHwÿKtûOuÿZ|ÿa€ýUvñCdûIkÿTvÜ?\ÍI`Ø{…¸||aC;|n®ŽÕ³§óÛÑûìåúòðù÷ú÷ôýôìûúëÿÿêÿÿìÿÿîÿÿóÿÿ÷ÿÿûÿÿýüÿþûÿýùÿúøÿøöÿõöýóôûðôùðóðçêåßáÓÏξ½¹­®¨¢¥ž£™£—£«žž¦—›£–/.,/.,0/-10,21-32.54/650880991;;3==5??5AA7BB8CC9HE<JF;KH9MJ9PK8OM8QL8OL;LJ>QPL^]bmlzzz’‚¡€‚¨€€¦{u—rkŠe_{YSmTLdMEZG@PB<HD?F@;?<87;63;60<8/=9.;8/:70991;;3>;2?=1@<1?;/A;-F?/H?.K?/M@/O?0O?/P>0L?/@=*?>,@=.?;/?;2>93=:5:94<94D95M51V-+j)-†18ž9A¬>A¦7.¥7(¥:(ª?-¨?, 9&¡8#¤;&¥:&«<)³>,¹@/½>/Á=0Å=1Ä>2½?3¶=2²9.®8*¯9+°:,²:,³9*¶;,¹;-ÄB4ËE9È>3¾1'À2(Í9/Ö5+Ø4*Ú6-Û7-Þ7.á6,æ3,ì1*ó0,ô.+ô/)ï0(ä2&Û4$Ò6 Ð5×2Ú0Ú4$Ü<.Ö=/Ð9.Í=2ÒD:Ä5-Å60Ê;5ØD@âMIåKIÞC?Ö=8Ó@9ÎA8Ç>4¾</µ:+°8*¯9+±;-¬4&½?3ÑH@ØGBßA@æ@DôGMýNUð?Eð?EöBMþHUþHWüBWú@XüA^ÿAcÿCfÿFmÿIrÿMyÿO|ÿL{ÿIyÿDvÿN~ÿR}þKrøMoÿVuÿUu÷MjðFcïKfÒ>VádtþŸ§ÓŽd92lJ>¥ynÐ¥œõÔËÿéãÿôòÿýÿÿüÿùóÿüïÿÿïÿÿðÿÿòÿÿôÿÿ÷ÿÿûýÿüûÿýúÿüøÿúøÿù÷ÿö÷ÿõöÿôøþó÷öëïìãäÚÕÒÉÆÁº¹´³´¬±´©°¶ª¶¾³°¸­¬´©10,10,0/+0/+10,21,43.54/77/880991;;3==3??5AA7DA8IE:LF:NH:PJ:RK9RM:SL:QK=OJDVTUfdqwx‡‡©¹“ϐƎ‹À‡ƒ¶{w©pmšid_[~UPnNJaKGXEBM?=B;7895296/85,85,671783891;;1></?<+B=*C<)G?,J?+K@,LA/M@0M?2L@4K>5L?6K>5F<3B92=82:946;46=57<5>:1H4-Y2-w78”?D¦>E©8:ª;0ª<+«@.¬A/©@-¥<)¦;'«>*§8$­:'µ<+¼=.Á<-Å;.Ê<0Ê=3Å?6¾>3º:/·7,·7,¸8+º8+¹7*»7+ÇA5ÍC8Æ9/Á1&Æ3)Ë7-Ï5+Û4+ß3)á4-â6,ã5,ç5+ì1*ð/*õ/,ö/*ò/)ì0'â2#Ù4!Ð5Ï4Ö1Ù/Ø2"Ø6)×9-Ô:0Ö?6ØE=ÖF>Í@7É<5Ë@9ÖKDÙNGÒE>Å;1Å=1Â<0À</º;,·9*´:+¶<-¹?0µ9-¿<2É@:ÒC?ÛCBãDHíJOöOVì?Eí>CòBLûIUÿJYýFXúCYûC]û@_ûBbÿEjÿJsÿNzÿO|ÿLyþIvÿJwÿTÿT}ýJqùImÿStÿVtøNiëD^äF]êXkÿ–¢ÿµ¼óžŽHFk.)¦kc͖óÈÁÿãÞÿñðÿüýÿýÿü÷þþöÿÿöÿÿøÿÿøÿÿúÿÿüÿÿýýÿýúÿüøÿûøÿùøÿø÷ÿöøÿõ÷ÿôøÿôøýñóóéêäÜÚÖÑÍÍÊÃÉÉ¿ÊÊÀÊÍÂÉÐȾȿ¸Â¹65143.32.10+10+21,32-43.66.77/880::2<<2>>4@@6B@4JD8ME8OH8RK;TK:TM;SL<RKAQLIYV]li|~Œ¸–™Ì™žØšžÞ™˜Ú”‘ÔŒŠÉ†…¿~µxv§nk–fe‡]ZwVTiMKYDAJ><?;:8;74762555457664872<:.?<+C>*F@*G?*H@+IA,IB0IA4HB6HA9G@:K=<I;;C9:=77875384/83.916=574+C1']80~C=–GCž>?40§;/ª<+ª?-ª?-©>,©>*¬?+°?-«8%²9(¹:+¿9-Ã9,È:.Í:0Ï<2Ë>5Æ;4Â91Á8.Á8.Â9/Â8.Ã6,Å7-ÑC9Ð@7Ã0&Ã,#Ð7/Ö<4Ó2*à3,ä1*ç2+è3*ê3+í2+ð/*ó-*ö/*õ.)ò/)ì1(á4$Ú5"Ñ6 Ð5Ø7#Ù3#Ö3$Õ3&Õ5)Ô8,Ô<1Ö?6ìYQáRJÓHAÌC;ÊE<ÌI?ÈE;À>1¼:*¼;(¼:*¹:)·:(¸:+¼>0¿A3ÁA6Á<3Æ=7ÏA=×EEÝGHäIMëLPèBFê@CðCIùKTþNYüJZüF\üF]øA]ùBaýEiÿJpÿNwÿOzÿNxÿLvÿQ{ÿRyþOvýOrÿVwÿ]{ÿSoîD^úTlîQd÷dtÿ’žÿ‹•ôƒ‰Ç`d§MMµjgΏŠï¼¸ÿÞÙÿïîÿúùÿüþýûÿýúÿþûÿÿûÿÿûÿÿüÿÿûýÿûûÿûøÿûøÿüùÿûúÿûúÿøúÿöøÿó÷ýñóùíïòææçÝÛßØÒÛ×ÎÝÛÏàÞÒßáÖÜãÛÏØÓÅÎÉ<94;8185052+41*41*52+63,74-85.96/;81=:1?<3A>5C?4JB7MC7PG8SJ;WK;UL=UK?SJCSJK]Wcnl‚‚‚¦‘•Åš ÚŸ¦ê¢©ñ¡¥ïž ë™›å•˜Ý”•Ö‘ʉ‰½‚ƒ±zy¡rq‘fc~XVkPN\IGRFCLCBJ??K==G;:@;9:<94?;/C=-E@,F@*FA+EB/EC4CC7BC;AC>@ACB<FD>LCANEEOCHNAIL>HI=HDCHAA=2L:.gF7ƒOAI<Ž?2”8)¥>/©>,«>*¬?+­@,¯@-°?-²=+±8'¶8)½9*Ã9,È8-Ì8.Ò:/Ô;3Ï:4Í:3Ë81Ë81Ì92Í:2Ì70Ë6/Ó<5Õ>5Ò91Ì2(Î4*Ø;2Ú<3×3*ä1*é1)ì1*î2)ð1)ò/)ô-(ö,(÷-)õ.)ñ0)ê2(â5'Ù6%Ó7!Ï7 Ô9%Õ8%Ó6%Ñ4%Ò4(Ó7+Ò8.Ð7/ãNGèXPçZSÚQIÌG>ÄD9¿@7¹>/¹<*¹<&¹<(¹:'¸9(º;*À>0ÃA4ÉD;Æ@7É@:ÑFCÕIHÖHGØGJßIKæFHèBDíDIõLSüQZûO]ûL]ûK`öD^÷DaùFfýIlÿNuÿOxÿOxÿPy÷UzóUvôTvüYxÿ]{ÿZwûQkòI`ýWköUgêM^ö^mâKZæTaåTa×XaÁefȃ~嫧ÿÔÑÿëèÿóòÿøùÿþÿûüÿûüÿûüÿüûÿüúýýùúý÷÷ü÷ôÿú÷ÿûøÿüûÿûúÿøøÿóóýîñúëîðáäéÝÝäÙÕâÙÒæßÕëçÛñíáññåìóìÛæâÎÙÕ@=6=:3:7074-52+52+52+63,74-74-96/;81=:1?<3@=4B>3JB7MC7RF8VJ:WK;XL<WK?TICSJM^Xfpmˆ‚„«‘—Ëœ¤ã£­õ§°ýª±ÿ¦¬ú¡§ó ¤îŸ£ê¡á™›Ö•–Ì’’ĉˆ´yyŸji‹`]|XUpRPhOMeNJcKG^FBS@=H?:>?:6@<1A>/C@/CB0BC3BD7?D=>D@<ED;BJ>CVCIaLRhU\n\br`dobbjd`afXWaJDlG>ƒSE•XF“J7‘@+™>+¨A.®@/®A-¯@-°A.´A/²=+°7&µ7(¼8+Â8+È8-Í6-Ñ7/Õ8/Ø;4Ò72Ð72Ð72Ñ82Ô94Ô94Ò72Ó6/ÞA:Õ8/Ñ3*Ö8/Ü>5Þ=5Ú91Ü5-ç2+ë0)ï0(ñ0)ô/)÷-)÷,(÷,(÷-)ô/)ð1)ê4)á5'Ú7&Ô7$Ð8#Î7"Ï8%Î7$Ï6&Ð7)Ò9+Ñ7+Î4*Ë4+ãNGód\ë`YÕOFÄA7º;2µ9-¸>)¸>'¹<&¹;%º9&½9*À</Ä>2ÊD9ÊA9ÎE?ÕKHÓLIÎGDÍEEÖHGãIIæCDèEHòMSøSZùR\÷O^úOaôH^õG`øGdûIiÿNrÿQxÿRyÿTzðUuïZxù^}ý^|ùUpõMgùOiÿYnÿ[mÿctðM^øUfôO`üUgõL_äR_À^_»vqל˜úÉÅÿåãÿíëÿôôÿÿýüÿÿûÿÿûÿÿûÿÿýýýýüúþùöýøõÿøõÿùöÿúùÿúùÿööüððøéìôææëÝÝèÚÙäÙÓèÞÕðèÝùóåÿúìþþòôúöâìëÓÝÜB>5A=4@<3>:1<8/:6-84+73*62)62)73*84+;60>93A<6E>6I@7MC7RF8UI9WJ:XK;ZLA[NHTIMXR`gd|~§’Ê— ãžªö¥°ÿª³ÿ«²ÿ¬´ÿ¬²üª±ùª¯ó¨¬ì©«èŸ¡Ú™šÒÁ±tr¡he’]Z‡XR~XQ{YQxVOpMH_C?M?:@?;:B?8>>4@B5BE:?D=<B>:CB>FH?LUEWoQfƒarŽisŽtvx„o€€^lŠ[c’WYžWU¦VM¦M?Ÿ@.œ9$ :$¨=)¬=*®?,±@.³@-´?-¶=,¹;,º6'¾6(Ä6*Ë7-Ñ7-Ö8/Ú91Ú83×84Ø95Ù:6Ø93Ø61Ø61Ú83Ý:3æC<à=6Ú70ã@7þ[Rÿlcÿ]Tç?6ä2(í2)ñ2*ô/)ô*&õ(%ø(&ø+(÷-+ó0,í2+ã1'Ú0#Ô1"Ô4$Ó:(Ì;&Ê<(Í<)Ï<*Ò:,Ò9+Ò8,Ñ7-Ú@8Ï81ÜGAôc^åXQËB<Å@;·4*·:&¶<$¹<&º<&½:(¿:+Ä<0Æ>2Ç>4ÌC;ÙPJßXRÔOJÃ?:À?:ÏHDÞHGâGEæJKïPTñRWïNVíKXñL\ùRføOf÷Ke÷JfüMlÿQsÿRvýRvôWvîXuõXuû[wÿ\xÿZsÿWnÿVkÿbuýVgÿ\mÿ[lñFXôDXÿQeîVeÆfgªieʏ‹ç¶²øÒÏÿëçýïîüø÷ùýüùÿÿùÿÿúÿþüþýÿþüÿûøþöôþùõÿû÷ÿùöÿöôÿööÿõõöèèæØØãÕÔÞÐÍáÖÐñçÞüôçþøèÿüéÿÿóúÿùèñðØáàC?6B>3A=4?;2=90;7.:6-95,73*73*73*84+:5/=82@;5D=5H?6LB6OF7TH8VI8WJ9YK>ZMEUJNWQ_eby{¤ŠÉ”â›¦õ¢¯þ«´ÿ¬µÿ®¶ÿ¯·ÿ°·ÿ¯¶ü¯´ø¯²õ©ªë¢£ã˜˜ØŒŠÉ€»uq®jf¡d_™`WŽ`VŠ]TVPtMIbFCTBAIAAC<<:AB=FEACD?A?@DBEOMRWVdch†quš}¢‡}Ÿ’}œ |˜¥pŠ¤`u¬Yi¶VaºQU¸HF°@4¬;)¬<&­='¨8$¨7%«8&®9(±9(²9(µ7(¸6(»3%Â4(Ê7-Ó:2Ù<3Þ=5ß<5ß<5á>9ß<7ß<7á>9åB=èC=çB<æA;å@:æB9ä@7Ü8/Ô1(Ø6+ëI>ÿ[Oñ@6î6,ê+#î)#ø-)ÿ/-þ,+ö((ú0.õ4/ï61ç6.Þ3)Ö2&Ô2%Ï6&Ë:'È;'Ê<(Í:(Ï9*Ð7)Ñ5)Ð4(Ó9/Ï5-Ô=6åPJåTQÛLHÍB?·1(¼;(»=&½<'¾;'¾9(À8(Â:,Ä:/È>4ËB8ÕOFÜWPÒRIÃD=¿C;ÌHCÛLHßJFåMLëSRíTWëPVéNVìOZ÷VføUhùSiúRkþQmÿSqÿStÿRuòMmøUtÿ\zÿ_{ÿ\vÿUoûRiûReüUf÷RbÿZhÿZhóN^øScþYißS^¶`_—^W¶}Ù©¥îÈÅýáÞùééýøõúüùùÿÿøÿÿ÷ÿýúþýÿÿýÿûûÿ÷õýøôþùõþùõþöóÿ÷öÿõõöêêêÜÛäÖÓÝÐÊßÕÌïåÛüõåÿùçÿýéÿÿïúÿøèñîÛáßEA6EA5C?4A=2?;0=9.<8-;7,84+84+84+84+:5/=82?:4A=4F?5JB7ND8RF6TG6WG7WJ:XKBWKKXP]b^ysuœ…ŠÂ™Ü˜¤ðŸ¬úª³þ¬¶þ¯·ÿ±¹ÿ³ºÿ´»ÿµ¹ÿµ¹ÿ¯²ù¬¬ô££ë™—àŽÓƒ€Çxu¼sm³k`¤i]›cZ‘]W…VRwNKhEDV@@L85<A<@KABL@BQ>BZEJpW]‚cs—s—¤{§®«²{£¸vœÀp“ÂcƒÀTnÂG\ÐK\ÑHPÄ<<º5.¸9*µ<'°<%°;'°;)²:)´;*·<,»<-À<-Ä<.Ä6*Ë8.Ó<1Ù?5ßA8á>7à<3ß:4Ü71Ù4.Ø3-Þ93æA;ìE?éB<ä=5ã<4Û4,Ø4+Û7-Ô2'Ì, Ö6*éG:õK>ò@6ï4-ð-)ö,*ú,,ü,,ø*,ð*)ì/+è2.ã5.Ü4+Ø1(Õ1'Ñ3'Í7(Ë:)Í:(Ð:)Ò;*Ó:*Ô8+Ô8,Ñ4+Ö<4Õ:5Õ<7åONíYWÙEEÂ3/À;,¿<(À;*¿:)Á9)À8(À8*À8,Æ=3ÇA6ÏJAØUKÒRGÆG>ÀD:ÇG>ÖKDØICÝLIäSPåSTâPSâMSåNWòXdöZhý[pþZrýUnûQlýPlÿQpýGmÿPvÿZ}ÿ[{ÿVrýTkþWkÿ\mûYhøZhûamö`kí]gïfnìfmÉ\_ ]W{OF˜mfÁ˜”ݺ¶ðÔÑôàßÿõôùù÷ùÿýùÿÿ÷ÿýúþýÿÿÿÿýÿÿøùüùôüùòüùôýøôÿùöÿøöùîìïäâåÚÖÛÐÊÜÒÈîäØýöäÿûèÿýçÿÿíúýôêðìÞãßIE9HD8FB6D@4B>3@<1?;0>:/:6-95,95,95,:5/<71>93@;5C?6HA7LD7OF5RE4UF3UH7VJ>XMKXNW^Zqpp–€†ºŒ–Ô•¡é©õ§±ù©±ù¬´ý°·ÿ±·ÿ²¹ÿ´¸ÿ´¸ÿ¯±ü«­ø¥§òŸžê—–⌋ׂÍ}yÅum¶pgªf`ž_ZYX„QQuGGcA>Q=5DH8BT=C`@EnCJ€OU–]dªgx½kÉnšËo˜ÉiÈ`ƒÉWxÆIgÄ;UÆ2HÙAPàEMÑ;<Æ71Ä?0½@,±:$­6"­5$®5$³5&¶7(¼8+Ã;-Ç;.Ó@6×@7Ú@6Ü?6Þ=5Þ93Ý6.Û4,Ó.(Ò-'Ó.(Ø3-á:4ã<4á81Ü5-Ü5-Ð,"Í)Ù7,âB6Ü?0Ð4%Ë.éF7óI<ýH?ü<7ô-*ñ#%õ')û/2ñ-.ë/.ä2.Þ3,Ú1*×0*Ö/)Ó0'Ó7*Ò9+Ö:+Ø<-Ú>/Û?0Ü@3Ü@3Ò6*æLBåJEÎ50ßGFóZ\ßIKÕA?È</Ä;+Ã:*Ã:*Â:*Â:*Á9+¾9*Â<0Â>2ÈF9ÏOBÎPDÆH<¿C7ÁC7ÎH?ÑF?ÕJEÛPKÝROÙMLÚKMÝLQëX`ó[güaqÿauûXmõOgõMgÿNkÿOuÿRyÿUwÿTqüTmý[pÿcuÿjxÿguúboõamídlãflÙhjÎghµfa^TeG<^U©ˆÍ«©æÈÆïÚÙÿóóùøöùýüøÿÿ÷ÿÿûÿÿÿþÿÿüÿÿùúþûöûûóûúõþûöÿýùÿûøþôòøíéêßÙÞÔËÝÓÉíæÖÿøåÿþèÿÿæÿÿëøúïêïèãæßNH:MG9LF8JD6HB6F@4D=3C<2?80>7/=6.=6.=60>71@93?:4C>8EA8KC8NE6QD3RE2VG4VH;ZMGVMR[Whlk‹~‚±Š“Î’Ÿãš¦î¦®÷©¯ù«±ý¯³ý°³ÿ°´þ°³ÿ¯³ý«®û¨«ø¤§ô ¢ïšœé“•âŠŒÙ†…уÉ{u»mk¬dež_a’Y[„RQsNHbWFY_CQmBLEO˜LV¬U]¼[dÊZhÙRpâRußTuÙPlÒJbÍBWÇ8JÅ.?Ô8EãCKåFJØ>>Í>6ÌF:ÃH6¶?+·>-¸=-º<-½>/Ã?0ÊB4ÐD7ÕE:ÙE9ÚB7Û>5Ù;2Ù6-Ù5,Ú3+Ù2*Õ0*×4-Û7.Ý90ß80Þ7/Ý6.Ü5,Ö/&Ù5+×7+Ï2#Î2#Ò9)Õ>-Ô;)Ø<-çC7øJAýD?ù64õ*-ö*-ö.1÷67í55â30Ú1,×0*×/,Ù0-Ù0+ã81ä91â:1â;2à<0ß=0Ü?0Ú>1Ñ7+ïWLøaZÓ;6Ñ;:æPQÜFHæPQÏ<4Ê:/Æ8,Å9,Ä:-Ä<.Ã;-À;,¾:-¾<.ÃA3ÇH9ÇI;ÂD6¾@2¼>0ÉF<ÊE<ÎIBÕPI×RMÕNKÓKK×LOéZ`ð^hüfrÿhxü]qóPeóMeýNkÿVzÿTxÿRrøTmö]qûhxûjw÷erüetó_mì`kêkrßruÃjfªd\žla}fVXH9kXJ”|r½ œÞÂÁîÖÖþîïûõõûûûûÿÿùÿÿýþÿÿþÿÿûÿÿúýÿþùûþõûüöÿþùÿÿúÿþúÿûõÿ÷òñèáäÜÑáÙÌðé×ÿúäÿÿæÿþåÿÿê÷ùëíðçèéáQK=PJ<OI;MG9KE9IC7G@6G@6B;3A:2@91?80?82?82A:4@;5B?8EB9KE9MF6PE3RE2UF3UH7YLCUKLZScii…|«‰“È“žÞš¤ë¦¬ø©¬û¬­ý­°ÿ®±ÿ®±þ­°ý­°ý©¬ù§ª÷¤¨ò¢¦ðŸ£í›Ÿé•™ã‘•Þ’’Ú‰‰Ï|€Àsx²ou©mpigŽj_}y_x‚WjŽP_¢N[»S^ÍT]ÖPWÛHRå@Qè?RäCRÜDPÖCKÏ@DÊ9<É46äJLçIJáAAÕ74Î95ÎA8ÈD8¾>1ÃE6ÃE6ÅF7ÉE8ÎF8ÑG:×G<ÚG=Ö>3Õ;1Ö8/Õ4,Ö3*Ø4+Û4,Ú6-Û81Þ;4à=4à<3Ý90Ü5-Ü5,Û7-Ò.$Ý;0Ú=.Í1"Æ-Î7&Ò=)Í:&Î8'Ô8)â:/ó=9ÿ@@ÿ<>þ37ó,/ê,.á+*Ø*)Ô+(×/,Þ44ä88é99ï75ð74î73è71ã7-Ü6*×5(Ñ5&Í5(çPEÿmcÛHAÈ42×CCÓ>BêVVÙA<Ò>4Ë8.Æ6+Æ:-Æ</Ã>/Á=.»9+»<-¾?0ÀA2¿A2½@.¼=.»<-ÄD7ÄD9ÈH?ÐPGÓRLÐOIÒNLÕONç_cîagûitÿnzÿduõVjõQiÿUoÿWvÿVtúXpñ^pônyöw€íksÞXañ`mñ`mìboïs}숊́|ªth™yjskXON:_VGƒqg¯–’ÜÀ¿ïÕØüéëþôõýûüýþÿúþÿþýÿÿýÿÿûÿÿùþÿÿûùÿõúýöÿÿúÿÿúÿÿøÿý÷ÿþöùðçíåÚéáÔôïÜÿûåÿþåÿÿãÿÿèùúêòôçîðåSM=RL<QK=OI;MG9KE7JD8IC7E>4D=3B;3A:2@93A:4A96@;7A@;CC;IE9MG7OG2RF0UF1UH5WK?RHFXR^jg‚}§‰“Ä’žÚ˜¢ç¤§ö¦§ú«©ü«¬þ¬­ÿ­®þ¬®û¬®û¨¬ö¦ªô¤¨ñ£§ð¡¨î §í¤è›¢æ—žâ’šÛ‹“Ò†ÊƒÂ„ˆ·ƒ¨ˆyšmˆšcz©Xi»Q_ÏMYàIRèAIê=Aé=;ã>8ÝC9ÖH:ÎI8ÅF3ÂC0Å@1äTIàG?Ú;7Ø64Ø88Ø:;Ñ98È74À:/¾<.¿;.À:.Â8+Ã7*Æ4'Æ2&Î4*Ð3*Ñ3(Ô3)Ö3*Ù5,Ü5-Ü8/Þ=3Ý<2Û:0Û8/Û7-Ú6,Ø4*×3)Õ3&Ó3%Ð4%Ï6&Ò=)Ñ>*Ê9$À2È:&Ê4%Õ1'ç51ú<<ÿ=@ÿ7=÷37ê-1á-.Ù--×/.Ü43ã9:é;=ð9=ö26ø03ô01í1/ä1*Þ2(Õ1%Ï3$É1$Ñ=1új_äTLÈ95Ð@?Ë:=äRSäJHÜC=Ñ:3É6.Æ8.Å;0Â<0¿=/º;*¼?-½@.¼?-º;*º;(½;+¾<,¼>0¼>2ÀD8ÈLBÌPHÊNFÌLIÐNLç`dìaføhrÿo{ÿhx÷Zk÷UjþZrÿYsû]tîaræjtë}€ñ‡‰èwyÚ_dñgtþm|óeuëlwö‘•ì¢ŸÄ”Š¢|hjUHQ<WUFth\¤ŒˆÛ¿¾òÕÙùãæÿóöÿúüþþÿüýÿþûÿÿüÿÿúÿÿøýÿÿûùÿõ÷ýóýÿ÷ÿÿøÿýöÿý÷ÿÿöÿùïõïáòìÜúõáÿýçÿýäÿþâÿÿçýþìùúì÷÷ëVO?TN>SK>PJ<OG:LF8LD7JD8H@5F@4E<3B;1B92A:2C:3A<6C@9DD:JF:MG7OG2QF0TH2UH5UI=QGEWQ]jh€~‚¨Œ”Å“Ø™¢çŸ ò£ ÷¦£ú§§ý¨ªÿ©«þ©¬ý§­û¥«õ£ªò£§î¢§ë¤©í¦©î¥¨í£§î˜ è”¡å”Ÿß”Ÿ×•›Íš•¾ Œ¯ªƒ ¥e´]pÂQcÐIWÜBNå>Fë<Aì<<ê?8ãA4ÜE4ÔI4ÊI3ÂF.¿B,Â?-âSEÞD<Ý97æ>>êDFå@DÙ8=Î65ÍB;ÈD8ÉC8ÇA5Æ>2Ã9,Â6)Â2'Æ2(Ë3(Î4*Ó5,Ø5.Ù4.Û4.Û4,äB7Ý=1Ù7,Ú6,ß9-ß9-Û5)Õ1%Ø8(Í2 Ê1!Ï9(Î8'Ç4"Ê7%ÔC0È7&Î8*×6,Þ5.æ3/î53ö:9û?>ÿLKúHFòBBì>=ê<=ç7:å26æ,1ô+1ø)/ô,.ï/.è1+à4*Ù5)Ó7(Æ0!À.ôdYë]SÏ@:ÔDCË;;ßMMêRQàHEÓ;6É6/Æ8.Æ9/Ã;-¾:+½;+Á@-ÀA.»<)·8%¸9&½<)¾?.µ7)³7+¸>1ÁG:ÅK@ÅIAÆJBËJEåa_æ^`ñdjþnwþkuó]iðXeø]mú^sõbtèdoãlrì~ù‹ö†…éqsøryÿy†õaqàXföˆ‘ÿ´´Ð®¢ ›‡]jPDR9QT?jbUœ‰‚ÚÀ¿òØÙöàãÿô÷ÿúýÿþÿýüÿþüÿÿýÿÿûÿþùýûÿúôÿòòþòùÿôüÿöûýòÿýôÿÿôÿÿóûõç÷ñáþùåÿýçþüãþýáÿÿêÿÿñüþóüüòXO@WP@WN?UN>TK<RK;RI:PH;MC7KC6KA5H@3H>2G?2H>2F@4FB9GE9JG8NI6RJ5UJ4UJ4VK9XNDUMKZTbkhƒ{~§‰Ã‘™×˜Ÿç£¤ö¥¢ù¢¢ü¡¢ü¡¥ÿ¢¨ÿ£«ÿ¤­ü¡ªõ¡©ñ¡¨ì¥¨íª©ï­©ó±©ö¬©ø ©ø—§ò’¡â–ŸÖ¦£Î¶ž¾¹…œ¸gz¿L]ÑERß>Mç;Gç<Dæ=Bà@@à@@â>?ß<=ÝEBÉ:4ÊD9ÊF:º1)ÛJEçIHéCEê=Aè;?æ<?á=>Ú<=Ô<;ÏB;Ç>6ÊD;ÑMAÍK>¿=0¸6)¼8+¿7+Ã6,È5-Ë2*Ñ/*Ô/+Û2/Þ5.çA5ß9+Ù1$Ý3&å9+ç;-ß8&Õ2Ï2Ì4Ê5!Ç4"Ç4"Ê4%Í5'Ï5)Ê0&Í3)Ñ5)Ò6*Ò4(Õ3&×3'Ü4)à2)æ3.ë52ë33é/2ç-2è-4í.5ö-3ö+/ï+,ì/-ê5.ã9,Ù7(Ð4%Ã-Ä3"çXHéYNË;3ßNIÅ41ÚHHáOPÜJJÔC@Ì;6È80Æ8,Ç9+È<+Å<*Â;(¾9&¼;(»<)º=)º=+·<,´8,µ;0¹=1»?3»?3¾@4À@5ÄA7ïjaõpiêc_ômjûqqìadôgmÿpyÿlzûhxðdoébiëdjîlnîqoîqoûy{ÿ}…ùZlücwßbpû¬¯¶ªš}‘u]rQJY:LR8snZ¥•ˆË·°íÕÓÿîðÿö÷ÿ÷ûýøüüüþÿÿÿÿþÿÿþÿûÿþðÿôåþéæûêïÿîøÿöüÿôþÿóÿþñÿýñÿûîÿúêÿùæÿùãÿùáÿùáüúåÿÿõþÿûÿÿûYPAXO@XO@VM>UL=TK<SJ;RI:NE6MD5LC4KB3JA2JA2JA2IA4GC8HD9LF8OH6RJ5SK4VK7TK:XNDULMYUckhƒ{}¦†ŒÀ•Ó–šáŸžì žòžžôžŸù¢ýŸ¦ÿ ªÿ¡¬û ¬ô «íŸ§è¢¥è¥¤ê©£í« î¤Ÿï §÷š¦ðœ¡á£œÐ±˜À¿ªÂq„ÄTbÑCOâ<Fì8Aï7?ê:=å<?Ü@AÛACà<CÚ9?ÚADË;;ÌB?Ê@=Á31åOPæCFè?Dê;Bç8?â8;Û89Õ:8Ï;7ÔE?Æ=5Å<4ÌG>ÌLAÁC5¸:,·9+¹7*¿7+Æ6-Ë4-Ñ2.Ø3/á53ä84ä>2á;-Þ6)ß5&á5'á5'Ú5"Ó4Ñ9$Ë9"Ç9%Ç:&É<+Ë<,Ì8,Î4*Ó4.Ö5-Ó7+Ï7)Ë5&Ç4"Ë4!Ï4"Ø2$Þ2&ã0+æ.,æ,-æ,/æ-2è.3ì+.í,-ì0.è2.à4*Û5'Õ8'Ò9'Õ?.»*ÖD5Ð>1ÙF>ëVPÚB?ØBAçUVâRRÜKHÔC>Í=5É9.È9+Ç8(È;)Ä;(¿:'½<)½>+º?-¹@/¶>.²:,´;0·=0¸>1º?0½?1ÁB3ÅC5ëi\òmdçb[ðkfõolèbañklütxÿq~ÿo|öirí]fêY`ñbf÷qpþzxûvwÿx~ùRdþ[pä]nö¦©©¦“jŒkZtON_;RY:us\©ÖĸöáÜÿõñÿúúÿúûÿûüüüüüþýûÿþúþÿõÿúíÿóáÿéáýæéÿëóÿñøÿòýÿòÿÿñÿýïÿýíÿúéÿöåûóàúòÝüôßü÷äûûóüüúÿÿý\PB[OA[OAYM?XL>WK=VJ<VJ<SG9RF8QE7OC5OC5OC5OC5MC7JC9JF;MG9PI7SK6VK5VK7TK:WMCVNLZWbkiy{¡…‰¹‹‘Ë‘–Ö˜˜Þ™™ã˜™éšî› ô£÷Ÿ§ø ©ô ªï¨èœ¤ãœ¢àžŸàžá™ã›™â–›ßššÚ¤•Ð±ŒÀ¿‚¬Çr“ÉYqÇBS×ALß>Dã=?å==å<?ã=?á>Aá>Cà<C×8=Õ?AÎ@?ÊC?Ã<8Ä96éWWàBCä>Bè;?ã9<Ü68Ö66Ñ96Ì;6ÕHAÄ;1¾5+ÅA5ÍK>ÆH:»=/·8)º6)¿7)Ç7,Ï6.×50Þ71æ95é=9×3*×5*Ú6*Ü6*Ý5*Ý7)Ú:*×>,Å4!¿4¹4!¹6"¼8)½8)½3(À-%ã@;å>8Þ=3Õ<.Î8)È7$È7"Ì7#Ò7%Õ3$×/&Ù.'Û.*Ý/.Û//Ü./Û,)ß0+à5-Ü5,Õ3&Ð4%Ð9&Ñ>*ÔA/Â1 çSEÒ;0ÚA;ÞC?èJIðTUíUTçSQãOMÝJCÖC;Ï<2Ê8+Å6&Ä7&À7%¼7&¹:'¹<*·>-µ=,²=,°:,±;/´<.µ=/·=.¹?0¾C4ÃE7Ü\Qêg]äaYðlgöpoçabìfgójqàR^ï^kõhqòemñ`gõfjùqqþxwútuÿnuóJ]øRhä[mñœ¡£œŠa€`UoHOd=W`Aww]­¥’áÑÂÿîæÿøñÿüøÿþúþÿúûÿüûÿýûÿýùÿûóÿöèÿîÜþãÚøÞáùáéüæïüèöýëýþìýúéÿúêÿøçúñàôèØòçÕöëÙøñáþúñýüøÿÿû\PB\PB[OAZN@YM?YM?XL>XL>UI;TH:SG9RF8RF8RF8RF8PF:LE;KG<OI;RK9TL7WL6WL8WK;VLBUMK\V`jh~yy›‚…²ŠÂ”Γ•Ò•”Ö••Û–˜ã˜›è˜žìš¢í›£ëœ¥è˜¢ß–ŸÚ•›Õ”˜Õ“—Ö’“Ö‘Ò…„½‘ƒ¶¥}±·s¢ÈfÓW{ÕGaÔ<KÞAJàBCÞCAàB?áAAä?Cç>Eç=Fã?FÖ:>Ñ?@ÐDCÆA<º61Å>:êZYÝABã@Cæ=@ã:=Û89Ó97Ð<8ÎA8ÏE;Â:.»2(À</ÇE7ÄE6½>/¸9(½8)Â9)Ë8.Ò:/Ù80à91è;7ë>8Ø1+Ù2*Ü3,Ý5,à5-ß7.Û9.Ö=/É7(Æ:)Á<+¾<,¿;.Á;0Å<6Î95ëC@í@<ä@7Ú<1Ï9*É8%È:&É;%Ï<(Ð7'Ï1%Ñ1%Ô1(Ö3,Ô1,Ñ/*Ó2*Ó5,Ó7+Ð7)Í5'Ê7%Ë:'Ì>*Ì;(Ï<,ô^PàF<Ò3/Ò.,æ@@ûWXÿusÿroÿjgö^YçPGÕA5Ç5(½.Ä8'À8(»8&¸;)·<,µ=-²=,°<-¯;.­<.°</°<-±;-¶>.¼B3ÀF7ËMAß`Wâa[ôpløstæ_cä]aæ]dÖHTçYeógrôgoöelükpþsvþvvûpsûenñDXôKbåXk뒖 •ƒ_zYQlCSh?\gEy|_°ª”çÜÊÿõèÿùðÿûñþþôýÿ÷ûÿúùÿúöÿøòüóêûëÚøÞÎóÒÊëÌÏìÎ×îÒàðÖéóÛò÷áöõáûöãüõãõìÛíáÑéÝÍíáÑñèÙüõíü÷ñþùó\PB\PB[OA[OAZN@YM?YM?YM?WK=VJ<UI;UI;TH:UI;UI;SI=OG<OI=QI<SL:UM8XM7YL9XL<UKAUKI\U]hexut“|~¥…‡·ŒÂŽÃÈ’ϐ”Ô’•Ü“—à”˜ß•šÞ•Ü’›ÖŒ•Î‡Ç…‹Å‡Ã}ƒÁ~¸}tŸŽp”¨gÁ\„ÖNvâAcè:Uê9KæAGàECÜGAÛGCßEEæAGî<Jí=JæBI×>@Ð@?ÐIEÀ?9³2,ÈC<àUPÜDCáACã?@á>?Ú?=Ô@<ÑD;ÏF<Ç=2À8*½5)½9*¿=-¿=-½;+½<)Â:*Ç;*Ï;/Õ;/Û8/à8/ç83é:5æ95ä52ä20æ21ç32æ40Ý2+Ô0'Ë1'Ë7-È:0Ä7.Á4-Ã40Ì:;Ý?@ë8;í76ä71Ø5,Ì4&Æ5"Ä7#Å:%Ë=)Ë8&Ì4&Î5'Ô8,Ô:0Ó9/Ð8-Ð>1Ê;-Å8'Ä7%Æ9'Ç<)Ç<)Ç:(Í>-Í:*Ø>2áC:Ô/-ä::ß13ß56×53Ö;6ÞC>åKCêQIêSHèTHäUGË?0Ä?.¾<,¸=-·>-´?.°?/®>0¬<.¬<.¬<.¬=,­<,±=.·A3ºD6ÆLAÝcXàc]ïolôrrå`cå`eç`g÷kvûoz÷kví`hð_fýlqÿy|ÿy|úmsø^jóAWôC]èUh懍£“ƒg]YtI^uIhwPˆf³²–èâÌÿúéÿýíÿþïýÿòûÿôøÿôôÿñíûêäòáØî×Åèǻ伺޺¿Þ¼ÇàÀÍâÃØæÌãëÓìîØóðÝ÷òßóêÙìàÒèÚÍêÜÏïáÖòèßñèáòéâ[OA[OA[OAZN@ZN@YM?YM?YM?XL>XL>WK=VJ<VJ<WK=WK=XL>QI>QI>SK>UL;XM9XM7YL9YL;WK?ULGYSWe^nnjƒut–~}¥„…±†‰´‡‰¹Š‹Á‹ÈŽÏŽŽÔÕÓ”Ó‹ÊƒŠÁ{‚¸u|²ov­io«ljœ€lˆ“cy®YvËPoãBdð7Vö4L÷8Hï@EåFBÝJCÛJEáGGèCJò=Nð>LäCI×CCÌA>ÎIDº;4°4,ÉI@ÕLFØBAÚ?=Û;;Ú<;Ø@=ÔE?ÍG<ËG:À8*Á8(À8*¾9(¼9'»8&½:(À;(Ä;)Ê=,Ò<.×;.Ü8.á6.ä6/è50é32è./é,0î02õ47õ77î45æ21Ú.*×4/Õ62Ñ32Ï/1Ò/4Û6=ê:Dí06î02æ3/Ü3,Ñ5)Ê7'È9(É<(Æ8$É6$Î6(Ó:,Ú>2ÝA5ÝA5ØA6ÓG8ÉA1Â;(À9&Ã<)Æ=*Å<*Å8&Ì:+Í7)Í0'æC<Û2/ï?Aã/2Ú*,Õ1/Ò5.Õ81Ö90Ò8.Ì5*Æ2&À1#ÎE5ÆA2¼=.µ:*±9)¯9+«:,ª:,«;/«=0¬>/¬<.«<+­<,²>/µA2ÉSGàg^ßd_èkiînoæchðjqõoxúq{üs}ömuîbködnÿrzÿx~ÿquúipøWfùAYô@[êQfá~ƒ¬—†ykmˆ]o‰ZyŠ`—r¶·˜ßÞÂùôÞüúåÿÿïûÿïõÿïïÿëçúäÜïÙÐãÍÅÞÁ°Ö­¨Ó¦ªÐ§°Ó«¶Ô®¼Ö±ÇÚºÒßÁàæÌèêÔòíÚóêÙîâÔìÛÑêÙÏìÛÓéÚÓæÙÑä×Ï[N>[N>[N>[N>ZM=ZM=ZM=ZM=YL<YL<XK;XK;XK;YL<YL<YM?SI?TJ>UL=XL<YN:ZM:ZM:ZM<YM?VKEXOR_Wbd_sjfsoyy|~¤~€©‚²ƒ„¼††Æˆ‡Ë‰ˆÌ‰ˆÊˆˆÈƒƒ¿|}µvw­pq§kl¤fg l`Ž‚`q•V^³N^ÒJ^ì@X÷6Kþ3Fþ9Có?BéD@áHBÞICâFIéBJò=Nï>NÝAEÔFDÈA=ÉHB´:/±7,ÎOFÉB<Î=8Ð64Ï10Ð51Ó>8ÑD;ÈF9ÃD5À8(Ã:(Â:*¿:'½8%¾9&¿:'Á:&Ä9&Ë:)Ó:,Ù9+Ý7+á5+ä3+ê3-ë*+ñ*-ø-3ü/4ÿ17ÿ37ÿ38ý58ù8;õ8<ò9>ò9Aô9D÷7Fû6Hþ5Eó*4ô-2í12ã4/Ù6-Ð8*Î;+Î=,Ë5&Ñ8*Ù;/ß=2â>4ä=4å>5ßA6ÙJ<ÏG7ÉA1Ä<,Â9)Â9'Æ8*È9)É2'Ó9/á@8ëD>Þ0/à..ë46è66Ø3-Õ7.×90×:1Õ;1Ò;0Í:0É;/ÖL?ÌF:¿@1´9*°6)¬6(«7*©8*¬;-­=/®>0­=/«<+«<+­<.±=0ÀKAÚdZÞc^èkiðosêinõrzüvîfp÷oyùryöjsþlvÿwÿu|ùhoùcn÷RbþB[÷<YêKaÝt{µš‰žz}˜k{—f‚•h‘u¬²ŽËÌ­ààÄéìÑîöÞåóÙÙíÑÏæÉÅÞÀ¼Õ·´Ë®ªÈ¤šÁ”–™Ã“¡È™§Ë¬Ë¡µÎ§ÁÒ°ÎÙ»ÙÝÄåãÎìåÕïáÖëÚÒçÒÍãÐÊàÏÈÚÌÃÕǾ[N>ZM=ZM=ZM=ZM=ZM=YL<YL<ZM=YL<YL<XK;XK;YL<ZM=ZL?VJ>VJ>XL>YN<ZM<ZM:ZM:ZM<[OAWMDWML\RZ^Xfb]qjfspwv–xxœ|{§~µ„¿…‚ň‚ȉ„Ç‚}¿y·zu­xq§un¤sj¡pg za‹„QZ™HE³EHÓHOîBNø:Fþ6Aþ9@õ==îA=æD?ãEBäCHèAKî=Më?MØ?BÑGDÅA<ÃG?²9.³:/ÑUKÃ>5É83Ç/,Å*&È/*Í:3ÍC9ÃC6¼>/À;*Ä;)Ã<)Á:'Â;(Ã<)Â;'Ã8%Ä5$Ê7%Ô8)Ù7*Ý5*á3*å3)ê2*õ33ÿ58ÿ8<ÿ4:ÿ-2ÿ(-ÿ(-þ+1ü-3ö)0ò&1÷)6ÿ,?ÿ*Aü 9ó/ö"0ó(.í,1æ0/Ú1,Ô3+Ñ5)Ð6*Ô6+Ý90æ=6ë>7ë:4è71è50â92ßH=×K<ÓE7Ë=/Ä6(Â3%È6)Ï8-Î4*Ô6-æC<à93à21Ò  ì89å63Õ1(Ò4)Ò4)Ï5)Î6+Ê6*È5+Å7+ãYNØRFÊH;¾@2·;/µ;.µ<1µ<1¯9-±;/±=0±=.¯;,­9*­9,®:-±<2ÐYQÜb]ìppôvyîmrõq|ör}ÿzƒÿ‰ÿy€ôhq÷epÿq|ÿuÿktø^jöOaÿA^÷8WéG^Ûntº‹™©„‚žn}™f€–gœq¡©‚´º–ÈÍ­ÕܽÌÙ»ÁÔ´²Ì©§ÂŸž¼˜š¶–²Œ±†‰³†µ~¹„•À‹œÂŸÄ‘¨Å™²È¡¾ÎªÉÒµÚÙÄæßÍêÞÒéÖÏáÌÉÛÆÃÔÁ»Î½µÈ·°ZM=ZM=ZM=ZM=ZM=ZM=ZM=ZM=XK;XK;XK;YL<YL<ZM=ZM=ZM=ZL?ZL?ZM=[N=]M=^O<\O<\O>YM=YLCWLHXNOZPX]Ub`Yia\rjfokŽsp›vq§{u³ƒ~ÁŠƒÉŠƒÇˆÀŠ~¼ˆy²ƒr¨†r§t«‡l£†\„HL¤B9¶A:ÍC@Þ@?è:;ô<>ÿDE÷><ó=<ì>=ê@AëBIêCMëANãALÐ>>Å@9¾?6·>3°</²<0ÀD8ÒMDÇ61Ô;6Í2.Ì3.ÖC;È>3µ6'»>,»6%Á:'Ã<)Â;(À9%Â9&Æ;(Ç:(Ë:'Ð9(Ö6(Ø2$Ú."à/%ì7.õ=5õ82÷40÷0-ø**û&(û%'û%'û%'û&*÷!)ø".ÿ'7ÿ(>ÿ!;ÿ6ý1ÿ'8ï#,æ#+ã+-Ý--Ô+(Ò-)Ü41â62è64î66ò65ò12ï./î,,å/,Ü92Ñ:1Ò91ìSK¼#Ä+#ßF>È.&Í.(Þ<7èE@â:7Ø/*Ú.*â51å<7ß>4Õ<.Ï5)Ë3&Ì5*Í;.Ë;0Å8.ìbXàZOÈC:¹6,º:/¸8-±3'´6*°0%°2&®2&¯4%°6'²:*´<.³=1°:0ºC;Ö\Wìppñsvöx|ûyƒõq|ÿ|„ýw€út}üs{ÿr~ÿr}ÿmxÿgsü`nôI\ÿ>\ÿ>[äAVÔek´•€’¡zƒm}™f~”c‡—j”Ÿwž¦ ¨ƒœ¨„’£Š£|€Ÿvzšqyšo|r vƒ¤u‚­wƒ³y‹¸‘¼„“¾†—¾‡œ¾Œ¢½§¼“¯½š¾Â§Ï̹ßÓÇãÒÊÜÇÄÔ¿¼Ìº¶Á²«¹ª¥ZM=ZM=ZM=ZM=ZM=ZM=ZM=ZM=XK;XK;XK;YL<YL<ZM=ZM=ZM=ZL?ZM=\L=]M=]N;^O<\O<\O>]P@[OC[NFYNJZPQ\SX_U^_Wfd\tibƒng‘qjxp¬€y¼‡€ÄŠÄ¿~ºt«Œn¢—q¤¡u¨šjž™W{=Aª5+±5+Â<3Ó@8Ý>8è?:óA=õ=;õ;<ô;@ñ>Bî@IèAIâ@KÚAFÇ<9¼<3¶<1±;/«:,®:+½A5ÐJAÚG@ÜA=Ô63Ð51Õ@9ÍC8»9+²5#½8'Â;(Ä=*Ä=*Ä;(Æ;(Ç:(Ê9(Ï9(Ï6&Ô2#Ú2%ã5*ê8.ï80ð91ç1&ç/%ê.%ï,&ö+'û+)þ,+ÿ-.ü*-ú%+ú$.ÿ'7ÿ$;ÿ7ÿ3ü0õ0ö.9õ3<å*1Û(,Ü.0Þ02Û+.ñ:>ô7=÷4:ö26ö/4ø03ù25ô87Û2-Ú;5×82äE?äE?Î/+Ñ2.Á"Ô51á?:èE@à=8Ø3-Ù2,Ý60Ý:1Ô:.Ï9*É5'Ç5(È9+Ê<0Ê<2Æ9/ícYáXNÉ@8º4+Â91Ã:0¿6,Â9/¿5+¾4*½4*»5)¸6)µ5(²7(°6)´;0»B9ÓYTèljïqtöx|þ|„øv€þ|„üyùv~þuÿsÿp}ÿkwüdqý_nöI]ÿ>\ÿ>\éCYÓeh®Žw‰˜oz•bu’\xŽ]cŠ•k˜p‹•p…”mp‡]l‰]h‰\g^l`q•ey›izŸk¬t„±vŠ·|»}¼“¼‚–½†›»‰›µ†¡³‹­¶—¾½¨ÏÆ·ØÇ¿ÖÁ¼Ï¼¸Æ·´»­ª²¤£\L<\L<\L<\L<\L<\L<\L<\L<ZJ:ZJ:ZJ:[K;[K;\L<\L<\L<\L=\L=]K=]M=]N;^O<^O<\O<_RA]QA\PD[NFZOK[PN]QS]RZ_SgcXvh]…kb‘sk¤vµ†}¾‰}»—…¿™€·›v©žpž®u¢ºv¥µi˜°Rt¸<D¾5-º4+À<0ËC7ÓE9ÛC8ä?9ñ=<÷:>ù:Aö=Eï@GæAHÙ@EÏ@BÀ;6¸90±9+­9*©8(­9*»?3ÏF<ÙD>×96Ù74Ú;7ÜC=ÝOEÎH<µ3#¾9(Á:'Ä;)Æ=+É=,È<+É:)Ì9)Ï8'Ï2#Ó/#ß4*î=3ô?6ñ91é4+à4&Þ4%â2%å/$ë,$î+%ñ+(ò+(ó+-ð(+ò&/ø(5þ%8ü4ü3ø3ô%7ò0;ó5?ò9Að;Dð<Eé5>ä,6ð2<ó/;ö.9ö-7õ.3õ.1ó/1í42Û/+â=9Ú64ß;9ÿmjåCAÑ/-Î/,Ù:7ß@<àA;Û<6Õ7.Ö5-Õ4*Ò6*È6'Ã6%Â6'Ä8)Å;.Æ<1Æ<1Å;1ë`YáTMÉ<3¿0(Ë;3Ñ>6Î;3Ò=6Ø=8×<7Ô=6Ï<2É;1Â:.»7*¶6)¹;/¼@6ÏTMãgeíorøz~ÿˆû|ƒû|ƒûyûx€ÿwÿsÿn}ÿgvû`pü[mõG^ÿ<Zÿ>\ìFZÎ`až~ev…Zi„Qh…OmƒRt…X{ˆ]}ˆ^x„\q‚X`zM_Pa…UeYn•`uœg} j~£mƒ¬r„²tˆ¶x‹¹y‹¹y¹z‘º~”º“±•¬€›©†©­’ºµ¢Ç¹®Ë¸²È¶´¸¬¬ª¡¢ —š[K;[K;[K;[K;[K;[K;[K;[K;ZJ:ZJ:ZJ:[K;[K;\L<\L<\L<]K=]K=]L<^M=^M;^O<^O<^O<^Q@^Q@]OB[OC\NE\OG\OI]OO^P_bTmdY{i^‰pgœ|t­…{·ˆ{³™…¸¥…´¯€ª¸z¡ÆxŸÑuœÊdŒÆMlÜFRßA@ÕA=ÏE;ÌH;ÍG;ÓE9ÞA:î@Aõ<Aú=Dø?GðAHâAFÒ@AÆ?<¿?6¶<1¯9+«:*ª9)®8*¼>0ÎD:Õ<7Õ31à;9åA?áD?èSLßUJ¿:+Á9)¾7$À7%Ä;)É=,Ê=,Ë9*Î8)Î5'Ò2$Ù3'å9/ñ>7õ>6ð5.ä/&à6)Þ6)à4*ã1'ç.)ê-)ì*(ì**ê),è',ë'1ò*7ö';÷!9ü>û%Aû6Hä(6ç/;ÿP[ÿ`lÿP\ò<Ió9Gè)8í*8ñ-9ô0:ñ27ê01á+*×(%Ø0-Ú72Ô2/åC@ÿspõVSÝ>;åGDÛ=:Ú<9Õ:5Ò80Ñ7-Ð6*Í4&Æ3#¾5#¹6$»8&¿;,À</¿9-À:/Å<2åXQÜMGÊ70Ç0)×<7ÞA:Ú;5ß<5ç>9é=9æ?9á@8Ù?5Ï<2È:.Á9-»9,»=1ËNHàc_ìnoú|ÿ„ˆý~…øy€øy€üyÿx‚ÿt‚ÿn}ýetù^n÷VhóE\ý8Vÿ<ZîH\ÇYZŠlRcrG\tB\yCdzIl}PtVvƒXs‚YoWm‡ZlŽ\o•buh{¤l¨o„ªq…«p…­q…±tˆ´u‰·v‰·v‰·v‹¸w¹z‘´|«{’¤|›¤…­«–¾²¤È·¯Ç¸µ´«®¡ž¥—’™[K;[K;[K;[K;[K;[K;[K;[K;ZJ:ZJ:ZJ:[K;[K;\L<\L<\L<]L<^K<^K<^M=^M;_N<^O<^O<^O<^N>^N>\O?^NA]OB^PE^OJbNYbQdcTqdZ}le‘xr¢w¨ƒv¤”}§¬†«Â‰©Î‚žÚw”ál‰ÛXwÚD_óBTúDPïJPáIHÐE>ËE<ÐF<ÙD>èBBð@Cõ@GõBHíDGßCDÏA=Â?7¿@7µ=/®:+«:*ª9)®8*»;.Ì?6áFAâ=;îBBëAAÞ;6ãJBæXLÕK>È@2¿7'¼3#Á8(È<-É:*Ë7)Ð8+Î0%×3)à8/é;4í:5í60ë0+å,'ã0,ã1-ç10ì31ó25ö37÷48÷6;ì-5ê,6í.=ñ0Có-Fõ(Fý+Nÿ3Rô9Lå3?ð@Mÿ_lÿkyÿWfõCSõ>Pî3Dí3Aí3>é6<ã99Û83Î4*Ç/$Ë2*Ç.(Ð72òYTö\ZòXVÛA?âHFÜB@Ô<7Î70Í6-Ï8-Ï9+Ê7'Â5#¹8#³9$µ:(¸=-¹;,¶6)¼7.Æ=5ÛNGÙEAÎ50Ð1-á>9ç@:ã81ç51ë20î21ë52ç83á:2Ø:/Ð9.È:.¾6*º8+ÈIBÜ_Yënlû}€ÿ…‰ýƒõv}öw~üyÿx‚ÿsƒÿk}ýdvù^pôSeôF]ý8Vþ=ZïL]ÀTRz_BWh<Yq?[vCfyKn~QtVx…Zyˆ_wŒay•exšhz m|§qªr„«r…«p†©o‡­rˆ°t‰³tˆµt‡µt…·r‰¸t‹¸w¶{Ž¬xŽ£x™¤‚­­•À¹§ÎÀµÒÄÁ¼¹À¨ª¶œœ¨ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:[K;[K;\L<\L<]L<^K<^K<^K<_L=^M;_N<^O<^O<]N;]N;^O<_O?_O?`P@aQAbPFbNPcN]_Pe`Usga…to—{t}q—v–®ƒŸÌ‹¡Û‚–ær…ëduéPdé;Rù3Lÿ<QûERéDKÕ?@Î@>ÑC?ÖEBãEFèBDîAEíCFçDEÚEAËB:¿?4»?3±;-©8(¨9(ª9)°8*¼:-Ê;3èIEé??ë=>å78Ú2/ÞA:îZPôh[ÚPCÉA3¾5%Â6'Ç9+È9+Ë7+Ò8,Ñ0&Ú6-æ;4é;4ê40è/,ê-+ë--ó49õ3;ø3=ü3=ÿ1>þ0=ý1=ù1>õ0Aò1Bô4Kõ5Nõ0Nö-Oÿ1Xÿ>_æ3HùO\ÿanÿ_nÿWfûP`ôDXé8JóAQê:GÝ2:Ò/2Ê2-Â8-½;+¼:*Ä:/À2(ÙJBúkcÝLGáPKÏ;9Ï;9àLJÔ@<Ê70Ê7-Î</Í<+Ë:)Ä;(´;&­:%¯<)³>-±9)¯3'º:/ÊD;ÖGAØC=Ô72×2.ç;7ì:6è2.í2-ò,-ô,,ò./ï31è71â:1Ú<1Ò>2Â6)º4)ÅB:×XRçjhú|}ÿ†‰þ€„õv}÷xýz‚ÿx‚ÿqÿi{ûbtø]oòQc÷I`ÿ:Xý?[ðO_ºQNqX:Sf8]uEa|InSv†Y{ˆ]~‹`g‚—l€œl}Ÿm|¤o}¨p€©o‚ªnƒ©l…©lˆ«qˆ®qˆ°r†³r„³oƒµp…·r‰¸t‹µv‰¬tŽ¥wš¨„°´™ÈïØ̾ÛÒÍÂÂÌ«±Á›¡±ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8[L9[L9\M:\M:]L:^K<`J<^K<_L=^M;_N<^O<^O<^O:_P;`O;aP>aP>aP>aP<bOAeMMdMW`O_aUmgb€uq’|v˜€s‘uŽ­€”Ά”ß~‡îqyùclúP[ú;Mÿ*Fÿ2Qÿ<Sñ<Mà<E×@EÕCDÔBCÜCEàBCäADâBDÞDBÔC>É@8¾>3¸>1¯9+¨7'©:)­<*´<,Á=1Ï?7ß=:å99ã03â/2à42Û94æMEülaòh[ÙQCÆ</Æ:-È:.É7*Í6+Õ8/Õ4,Þ7/æ93è62é1/è/,î1/õ15ÿ2Cÿ0Eÿ-Fÿ*Bÿ%@ÿ#=ü :ö :÷'Aõ+Eõ.Mõ/Pó+Pô)Tÿ2`ÿAhóCXÿ`mÿhuøVeíKZñL\ëBUÚ2Cë@PÝ7CÉ-1º((±.$«8&¥?&¨@'¸?.¼:,ÝYMá]QÌF=ÍD>Á63É;7äUQÕF@É91Ç7,É:,È9(È9(Ä=)±?'¦<&¨;&¬=*«7(¬3(¼=4ÓLFÖGAÝE@Û96Ü30é73ì51ë0+ô1-ý,/ÿ+-ú./ô1/í4/æ81ß;1Ø>2É9.½3(À;2ÏNHáa^÷yzÿˆŠÿƒ‡ùzû|ƒÿ{†ÿx„ÿn~þew÷^põZlëL`õIaü9Wû=YîM]²JGhQ1Qd6\tDf~Nuˆ[Žc„h†’jˆ—n‰žsˆ¤t„¦t§r}¨p¨n©mƒ©l…©l…¨n‡«n…­o…¯o°l€²k‚¶n…·r‡³t‡­t§wœ¬…²¹šÈƯ×νÙÒÌ»¿Ê¤¬¿’š­[J8ZK8[J8ZK8[J8ZK8[J8ZK8[J8ZK8[J8[L9\K9\M:]L:]L:^K<^K<^K<^M=^M=^O<^O<\O<`Q>aR=aR?bQ?bQ?aP>aO;bL>hOKgNRbP\cWkjfuv”}|œx—‘z–«•Æ„’Ú}…ðuzýklÿY[ýDLþ/Iÿ5Rÿ<Uó<Né?LãCKÝBHÖ=@ÛACÜ@AÞ@AÛA?ØC?ÐA9Ç>4½=0µ?1¬;+©8(­<,´?.»@1ÊA7ØC=Ù74á85Þ..â30æ:6Ó0'Ñ5)ëUGÿwjë[PÑC7Ê<0Ë;0É7*Í6+Ô:0Ú70ß82å63ç32ê01í12ô36ý4<þ%6ÿ#9ÿ"9ÿ"9ÿ!8ÿ6ý5ø6ï2î9ð%Bï(Gî&Jñ'Mÿ3\ÿEhÿbuÿ`l÷WcñQ]òP]ïMZâ@MÔ2=Ú=DÌ79»/.®/(¦5'¢;(›?&œ>%ª9'²8+ÑUI»;2Ä?8¾41½2/ÎC@åWSÕHAÈ91Ä6*Å6(Ä5%Æ5$Ã:(±<(§:&¨7%­9*¬4&¯1%Â?5ÛRJÙIAáHBÞ;6Þ2.ê41í1/ï-+ü22þ,/ý+.ù+-ó++í-*æ1*ß4,Ö8,Ñ=1¿2(¼7.ÈIBÙ\Xówwÿ‰‹ÿ‡Šû€…þ†ÿ}ˆÿv„ÿj|þ_s÷XlöTiêD\õD^õ6Uñ:VàLZ¥HC^K-Mc5Uo?b}JtŒ\€–g‡˜lˆ›nŠ r‹¥vŒ¬z†«wªr~§m|¥i}¥g¥g‚¦i‚¥k‚¨mƒ«m‚¬l®h~°i³j„¶o‰¶s‰¯rŽ¬x™°„­»˜¿Å©ËʵËͶ½Åžªº‹—§]K7\K7]K7\K7]K7\K7]K7\K7^L8]L8^L8]L8^L8]L8^L8]L:]J<]K=^L>\L<\L<[N=]P?^SA`SB_R?`P@bQAcRBdQBdNAdLBjMIhKMdPYf[lnk†w{ž€…­‰ˆ²ƒ©¥‡©¾ˆ Ð‚’ây~ðsqújbÿa^ùJWñ>Që:Lî?NïCQê@Kæ<Gå>Fá<Bß<?Ú<=Ö<:Ï>9Ê=4Á;0¸:,§6&¯A0­<,¬6(¿A5ÌF=Ì=5Ð72Ú85Ü71Þ5.Þ6-Þ8*Ý<*Ù:'Ó6%Ù;0ðSLòZOÙE9Ç8*Í>.ÔB3Ò:-Ô3+ðGBá//î5:ó6=î,7ÿBMñ'3ú&2ÿ)2ÿ(2ù'ý"*ÿ'/þ'/ð&ð)ù+7ê .ì&7ñ-Aç#;ÿXrç-Dÿ]lÿ_iþZcùU\õPWëHMÝ?@Î95Ë>7ÄA7·?1¦7&™2!™6#š;'Ÿ<)§8-®7/ÂE?¿;7½31Ä64Å54ÔDCÛLHÚKEÑD;È:0Ä4)Ç5(Ë4)Å5*¼</³9,­/#«(³-$Ã9/ÒC;ØH?äPFßE=Ú70Ý0*ç0,ò21ù13ý14÷/1ö01ö01ó0.ò0.í2-æ5-Ý9/Õ=0¾2%À>1¹=3Ö_Yésqÿû„†û‡Šÿ„Œÿxˆÿh}ÿ_wÿ[uÿUoøLføEcñ:Yô;[æ?YÛ]i‹@;VF-L^6YuBg†Mw–]}œcg€¡jƒ¦n‚¨o€§n¦m~¥l}¥i|¤h}£f£e£f~¤k¦m«l¬k~­g®f±gƒ³i‡µm‹´r°vŽ®|™´‰§½™±Á¤²Á®¦²°˜žz…‹]K7]K7]K7]K7]K7]K7]K7]K7^L8^L8^L8^L8^L8^L8^L8^K:\K;^L>_M?^N>[N=[N=\Q?^RB_SC]QA]OB_OB`NDbPFcOHeNHiMJgNQdR^g_tnny~¨€‡»‡ŠÁž•Ê£‹»«£¹xÓyƒê|{ôvjöi`ø]aóOZìFRòHSøIVõEPï?Jë;Eä9?à:<Ú::Õ=8Î?7Ç>4À</·<,«<+ª?-©8(²:,ÁA6Å<4Ç61Ô<7Ò50Ö5-×5*Ø4(Ú7&Û:&Û;%Ú9'Ý6.æA;ëMDáI<Ð>/Ç8(Ê8)Ó;.Þ;4öJFä01ð6;ö6Añ.<ÿ@Nï'4ó$,÷%(û),ý+,ü*+ø((ö((÷+,ï$'í%(ë&-ý;DÙ'ð4CüARØ&6ÿjuÿbjþZaüW]ôOSåBC×<8Ñ>6¾8,¿D5¹H6©>,™/)(’(ž-%°:6ÑTPÎHGÃ54Æ45Ñ==ëWU×FAÖG?ÐA9Ç9/Æ3)Ê6,Ð7/Î70Ç=3Â91Â5.Ã2-É4.Ñ83Ø?9ÞE=äJ@ÞA8Ú6-Ý0*ç0,ò21û03û03ô02ó12ô01ó/0ó0.î1-ç4-ß9-×=1¿3$¾</·=2Ó^Wévsÿù…ˆø…Šÿ‚Šÿt…ÿf|ÿ\vÿXtÿQoþIhü@aó:Zñ?_áI`ÁS\{?7RF.M_9ZvCe‡Ks•Yxœ_wb{¡f}¥i}¦j|¥i|¥i|¤h{£e{¡d{¡d}¡c~¢e|£j}¦l}©l}ªi|«e}­c¯c‚±c…°h‡²kˆ¯pˆ«s‹«|¬„ª‡Œ¢‹x‡€dppR^^]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8^K:_N>aP@`P@]P?\O>[O?[RC[QE[QG\OG\OI_PMcQOfTTgUUgUUgV\h]nli†sv£|ƒº‚ŠËŠÔ˜‘× ŠÊª‚´¹}ŸÏ€“䄆îvósjüfh÷S\íGQðFOöHR÷EQòAKî=Eä7=à7:Ù99Ô<7Í@7ÅA4¾?0¶>-­>-ª<+«:,¸B6ÇH?À;2»0)Ê:2Í6/Ð6.Õ7,Ø8,Ù7(Ù8&Ü9&à:*Ý4-Û4.âA9éOCÞH:È5%È2#Ù@2â>5õHDè13ò5;ù7@ø4@ÿESø2?ð)0ë#&ì$'ù13õ-/î&&ì&%õ12ë)*ö8:è+1â)1ë3=ÿR^ï=Kÿ\iþ^fñV\îOTðMRêEIÝ:;Õ74Ô?9À6,À>1¼B5³=1¬7-¥3)¡,#œ' š%©/*ÇECÂ::¼,,È35×ABô^]Ñ@;ÒC;ÐA9Ë=3Ì9/Ô=4Ú@8Û@;Û@<Ø=;Ý>;ãA?ä>>à::ã?=ëIDàB9Ý<4Ù5,Ý2+ç1-ð31õ12ø02ô02ô02ô01ó/0ó0.î1-ç4-ß9-Ø>2À4%º8+µ;0Ð[Tízwÿ“’û‡Šúƒ‰ý~‰ÿr‚ÿdyÿ[tÿTqÿNmÿFgû=_ô=\îEbßTg¢BFp<1QI2Pb<]xEf‡Nr“Zu˜^tš_xžc{£g|¤hy¢fy¢fy¡cx byŸby a| bz cz¢f{¤h{§h{¨e{©a{©`}«`®`‡²jŠµn‹²s‰¬t†¦w‚žuy“no†lSeYAPM2A>^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^L8`N:aP<`Q>]P?ZN>YOCXPEXOH[QO_UTbXYeX_iYcl\fk^glaildqok‚rtšy~µ†Ê…ŒÚŒŽáŽ‡×Ÿ‹Ó­ŠÄ³€©½yŽÌ{Þzò‚wÿsrÿcgùU\ôMTõGPôCMò>Gï<Bå6;à88Û97Ô<7Í@6ÅA4¼A1·?.«:*­>-±=0¼F:ÌRGÇH?º5.º1)È91Í81Ó;0Ù=1Û;-Ù6'Ü6&à8+à5-Û2+à=4ìNBãK=Ò:,Ñ8*àB6ß7.ð>:é/0ð16÷4<ü8BÿNZÿLUý>Eò38â$&í/1ê,,ð22è**ë/.ò::Ñæ37Ý,2è9@ðENÿXcòNWØ=CÚADáBFá>AÞ9=Ü89Ø88Ó97É83Â91½:2ÀA:ÍNHÖVSÐNNÃC@²72¶95ÏKIÙMLãQRïYZêRQñYVÉ51Î>6ÒB9Ð@7Ó?5ÙB9àC<â@=é?@ç8=ì9=ò?Cï:?è58ì>=öMJÞ93Û60Ú3+Þ3,å4.ì30ò21ô01ô02ö/2ö01õ/0õ/.ð0-é4-à8-Ù?3Â6'·5(³9.ËVOï|yÿ“’üˆ‹ý‰þz‡ýoücwÿXqÿOlÿFhÿ@cù:\ô?^éKdØZh…43f<.VP8WgCa{Kj‰Ps’Yt•\u˜^wby¡ez¢fw dv awŸavž`xŸ`xŸ`{Ÿaz cz¢fz£gz§fz§bz¨`z¨]|«]~­_†²g‹´nŒ³tŠ­s†¦u€qtŽii€dQcUDSL8G@^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M9]K5_M5`P9_P;\O>ZN@XOFXQKYPQ`W\g`hnfsshysg{sg}qh}ol}pq†rw—v}«z‚Á‡Ò„Šà‹Œæ“ãŒÚ „¿žuŸ l‚¬nq¼vlÒviòsmÿllÿgiþ^`ûRWõFMï<Bê7=æ5;á78Ý98Ö=7ÎA7ÆB5½B0¸@/­7)´@3¯:0°;1ÈRHÕ\QÉMC»;0¾90Á7-Ì9/Ö>3Û=1Ú6*Ü4'â6*à3,â70ä=5åE9âF9ÞB3ÞA2â>2Û0&ê72ì0/ï.1ñ.4ú7?ÿPXÿ]dÿZ_ÿOSá.1ã03à,-÷EEè66Ü,,Û-.ë>@Ü25Ù37ÿbhà<CË*2Ê-4Á&*Õ;=á@EÛ6:Ù37ß9=Ü7;Í/0Ì43Í;;ÕGFÞRSì`cõilôekê^aÄ@>ÆE@å^[øhgÿopÿjjåKK×=;É40Ñ>7×D<ÖC;Õ>5Õ<4Ø:1Ü41î5;ñ-9ð,6ò.8ï,4ë,3ó9<þJKß3/Ü3.Ú3+Ü3,ã5.ê40ï4/ò21ô02ö/2÷/1ö.0õ/.ð0-ê3-â7-Ù?3Æ8*´2%²8-ÃNGï|yÿ’‘ÿˆŒÿ‰ÿw…úl|øatûTnýHgÿ>bÿ:_ú6\óAaàNcÂW_p/+_?0[W>^lIe}Mk‰St’\u–_v™_xžcy¡cx buŸ`uŸ`u_vž_wž_xŸ`{Ÿaz¡bz¢d{¥e|¦d|§`y§^y¨Z{ª\}¬^€¬a…¯g‡®m‡ªp…¥sƒ r{•nrŠjexe[k^RbU_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N:^L4_N4`N8^O:[N>YOEZSMZTT_ZahbprmzuŒ{u‘{q“zp“vp’ps’rxšt}¨w¶zƒÄ~†Ï„‰Ú‹ŒÞ–Ü™ŠÍ›…¶¢ƒ£ª„‘«~yŸl[šQ>¼MBÔQIâWRëWUñSRñKMí>Cå4:é9<æ9;à;9Ø=8Ó@8ÊB6ÀA0º?/³9,ºA6«5+£.$¾I?Ùg\ÙdZËUI¸<2¸6)À3)Î:0Ù;0Ú6,Þ3)ä6-ß1*ä71ã;2Ý9/Þ>0ãF5äB3Þ8*ß1(é4-ð31ï-.í*.ô17ÿEJÿV[ÿ]bÿ\aâ9<ã9<Õ+,ôJKâ89×//Û57Ì()Ì'+øXZÝ>BÖ:>Ñ6:º#Ä(+Ü<>æAEÝ49Ù,2Þ17Ù05Í(.Ò37äKNöaeûhnùcl÷`iú`løbkÖLLÕNJê\ZêVTçMMãHFÏ42Î3/ÙA<àKDäOHßJCÙ@8×:3×6.Ü.-õ1;û-:ø*7õ'4õ)4ø0:ÿ>CÿJJà1.Þ3,Ü3,Ý5,á4-ç4/ì3.ð3/ô02÷/2ù.1ø-0ö..ñ/-ë2-â7-Ù=1É;-³1$±7,ºC=ìyvÿÿˆÿ|ˆÿqöhxö_rùRlùDcÿ9^ÿ6]ù7\ðFcÑN^¢IKe5+VD0[Y@^lIe|NkˆRt‘[w•_x™`{ždz cvž`s›]rœ\s›\tœ]v^wž_{Ÿaz¡by¡bz¤b|§b{¦_x¦[x§Yz©Y{ªZ~ª_ƒ­e…¬k‡©m‰§s‰¥u„s|”ru‰pnlfwe_P=_P=_P=_P=_P=_P=_P=_P=^O<^O<^O<^O<^O<^O<^O<_N:aP6aP6aO9^O<[OA[RK\VV_Zakgvso†}z—‚~¡|¥}w£{u¥xv§sy©s}°x¸{„¿~†Ä„‰ÉŽÎ”“Íš“Ç •½«›µ¼¦²É­©Å¦”ªˆmœfJœE2¯A4¶@4ÂB9×IEèNNîJKì?Cë=?é;=ä<;ß=:Ø?7Ð@5È>1Á=0½:0¼@6¯4,§1'»I?ÑcVÖh[ÕeWÃOBº>2º4)Ç7,Ö90Ù5,ß2+ç60â0,ã4/â7/Þ7.à>1åC4ä>0Þ4%æ4*é0+ð3/ð..ï-.ò14ó78öBEöMRüY^éFKöQWØ37ëFJÝ8<æAEÚ7:Î.0Ñ05äHKÒ7;Ð7:¹#%Õ;=Û8;å;>ë<Aç6<â-6Ý)2á1;ç<DðLSý]eÿksÿjsú`jöZgùXh÷[fôddèZVèWTØ@=Ï10Ô44Ó33áC@ãJEéRKêSLáJAÙ?7Ø93Û81ã33ó.7ÿ0>ÿ0>ú*7ü.:ÿ;EÿCHûABã1/ß4-Ý4-Ý5,â4-æ3,í4/ñ40ô02÷/2ù.1ø-0ö..ñ/-ë2-ã7-Ø:/Í>0µ1%±7,²;5ésqÿŠŠÿ‡ÿv†þi}óató[p÷PjúCbÿ8`ÿ8aù;_ìLf¿O[~86]=0NF1VX@ZeEcxMj…RrŽ[v”^yša{ždxžas›]p˜Yp™WqšXqšXt›\uœ]y_xŸ`x¡_y¢^z¥^y¤\x¤Yx¤Wy¥V{§X©_‚¬d„«h†¨l‡¥o†¢q€šmx‘jq…ikdbv]_P=_P=_P=_P=_P=_P=_P=_P=^O<^O<^O<^O<^O<^O<^O<_N:cQ9cQ9aP<]P@ZPG\TR_Zaa_mtrŠyx˜€¨ƒ°}}¯yy¯yu°wwµw|¾x€Á}…Ä‚ˆÄ…ŠÁ¿—–¾ ¼¬§»¶¬µ¾³±È¸«Ð¾¦ÖÁ¢Ñ»–Ьˆ¯oT®\G¤G5¨>0ÂG?àWQïWVñNOé@Cê=?æ<=â=;ß=:×=5Ñ:1É90Á80À=5¹:3·>5ÁOEÇXMÇ]OÑeXÓcUÂL>¼:-Å8.Ò91×4-ß2,ç60æ40á2-à5.ã<3çC7æC4â:-ß3%ê5,æ.&ð0-ï/,ð31ñ54ç-.â24çBF÷X]öW\ÿouåDJëHMà<Cþ[`ÿouÞ=Bÿ}‚ÿmqêQTÔ>@Æ02Ö:=òHKî9>ð7?ø=Fò7Bè-8ô>Jÿ]gÿmwÿjrÿcn÷]gú^kÿaoÿ]nùYeò\]ãPIâJGÕ74Ò/0Ù54Ò0.Ø95ßD?ãJDâIAÙ@8Ó6/Ö4/Ü71é77ê'/ü0<þ2=÷+6ù0:ÿ>Eý?Aì44â3.à5.Þ6-ß4,á3*è3,ï4/ô41ö01÷/2ù.1ø-0ø..ò/-ë2-ã7-×9.Ñ?2¶0%²6,¯50çonÿ‡ˆÿ†Žÿp‚ücxñ\pôXn÷OiûBbÿ9aÿ<döAbäRg°QWd/'WD3GG/RT<U^?btLiQq‹[v’_z˜b|dxœ_r™Zn—Un—Un—Uo˜Vr™Zt›\w›]wž]wž[x¡[y£[w£Xv¢Wv¢Uw£Ty¥Xz¤Z~¦_¦c¡d~œf{—frŒ]i‚X`vRZoPRgH\L<^N>`P@`P@_O?_O?_O?aQAbRB_O?]M=]M=aQAcSCbRB`Q>dS?aP>]P@^QI[QPZSZeapsq‰yxšz|¥~€±€¹}ºxz·rs´lo¶twÈv{̀ƒÊ‹Æ–—䤾²°»¼¹´ËƳÓÌ°ÙϬÛϧÜÏ£ÝУ٘٘Ѩˆ¸€g ^HËzgÖtg½H?×QN÷ccéKLå?Aâ89ä::â:9Ü75Ù61Ö;6Ë81À5.ÇB;¹=3¿I?ÆXK¹OAÎdVÑcTÌXIÅG9Ä:/Í6-Ø7/á6/ã4/ß0-ß2,à70ä=4à<0Ý7)ß5(å7,ä/$ê/&ì/)î1-î20ë40è51â64×79æJNõY]íNS×8=×6<ùX`ÿ~„ÿ~†ÿouÿgoÿipÿflõ\añX[ûZ_ÿUZÿDKü8Bû7A÷3?î+9ì0>ñ=HÿS_ÿ`iÿgrÿepþbmÿcnÿ`oý]gíSSßF@Ö85Ö41×11×/.Ø31Û94áD=ÜB:Ù?7Ö<4×90Û81à93æ66ñ7<ó4;õ3;ö4<÷6;ò59ë33ã1/Þ3,Þ5.ß7.à5+ä3+é2,ï2,õ20÷12ø03ú/2ù.1ù//ó0.ì3.å7.à?5Ì8,¿6,¯/&¸;7ÒXWÿ˜šþ{ƒÿm€ÿe|ø\rðPhôGeüCeÿ;bù9^õNlÓRd‰=?Q-!C;&CG.KM5Z`DZjEcxMqˆZx’bx–`t•\r–Yq˜Yl•Sm–Rn—So˜Tr™Xt›Zuœ]uœ[t›XtWv Xw¡Wv¢Wv¢Uu¡Tt S{¥[{£]zž^u—[m‹Ub~MXrCSj>AW0?U/<R,[K;^N>`P@`P@`P@_O?`P@aQAcSC`P@_O?`P@bRBcSCbRB`P@fVG_QF\OG^TSaZad`omkƒvw–wy¢{}®}¸x|¹quµkp´lo¶nq¾uvÐ|~Չ‰Ó˜˜Î¨§Ç¸¶ÁÉźÓαÛתâÛ¥çÞ¥æÝ¢äÛ¤âØ£ÛÑ ÔǛӼš»œ€©|eŋwÊ}m´VJÒa[örpìZ[çKLã?@ç=>ë>@ç;;â66Û64Ö=8É83Ê?8¹6.¸>3½K@¶H;È]MÝo`Ô`QÆH:½5)Å1'Ô6-Ü5-Û0)â51å95ä=7à<3Ü8.Û5)ß4*ã5*ì7.í5+í2+ë0+ç0,ã2,á4.Ý52Ö87Ø>@äJLóW[ú^bü\dþ]eÿaiôS[ïNVñQYÿ_gÿjqÿflõZ`óNTí4:ñ,5ô*6÷-9ñ'5å!-æ'6í7CþPZÿZdÿblÿdmÿenÿcmý[hõQZåEEÞ<7Û64Þ63ß55Ü30Ú2/Û62à>9Û>7Ø;4×:3Û81ß82å95ç:6ê7:ë7:í6:ï79ï79ì65å31ß2,Ü3,Ý6-ß7,â6,å2+ê2*ò1,õ20÷12÷03ú/2ù.1÷//ó0.ì3.å7.ß<3Ô=2Â8.¶1*·74ØZ[ÿ“ûs}ÿfzÿ^vùWo÷OiùFfû?bü<aôAaåOh¹KVx;8L2#<:#<B(EG/OS8VdA`rJnƒXu_v“]t“Xq“Vp”Tk’Ok”Pl•Qm–Rp—Vr™Xsš[sšYuœYuWuŸWuŸWtžTržQqRpœQn˜Pl”NhŒLbFXu?Ni6F]/@W+@W-@W-AX.YL<[N>^QA^QA^QA^QA_RB`SCaTD`SC_RB`SCaTDaTD`SC^PCbUM\QM[QRcYbkdtqm„vu•z|¥y{¬y~¶x|»nsµei°`g¯jn¸ruÆ}}ׇ„Ý—”Û©§Ø»¹ÏËÊÈÜØ¿åá´éäªíè¦ðé¥íå¤çá§âÛ§×Ï¡Ëܸ¯©››ƒk¨ƒp§o`œRGº]UÝmi÷uuï__äJLå@DëADì>@è8:â66ß=:Ô;6ÔA:Â70·7,·>3®=/¶F8Ûj\ßi[Ù[MÊB6Å2(Ñ4+Ù6-Ù2,Þ52éA>ìGCà=6Õ2)Õ1'Ú2'Þ0%æ4*è1)ç.)æ/)ã2,â7/á=4àB9Æ-(Ê43ÜDCðVXú^b÷ZaóS[ñQ[èFQêHSïOYü\fÿmvÿntôY_ã>Dé.5ô-4û2<ü2>õ+9í'4ö6EÿKW÷ISõPWøW_þ^fÿckÿ`i÷RYéBIÞ97Þ71ã75é<8ë;;æ95á51Ý60Ý:3Ù;2×90×90Ü71á83ç:6é<8ã99ã99æ87è88è88æ74á40Û2+Ú6-Û7-Þ8,â6,ç2)ë0)ó0,÷1.õ12õ13ø02÷/1÷//ò0.î3.ç6.Ý90ÜD9Ç80»4.µ/.ä_bÿƒˆølwþbxþWsúPmþLlÿCfö:]ó;_ìIfÔSgDJl;4O>,=<':?(ED/FH0Q\<YkEg|Qr‰[tZqUmRkOiMi‘Ki’Lk”Pm–Tn—Up—Xq˜WuœYuWtœUrœTpšRm—Mk•Mj”LeGaˆE[AUt;Li3E`-AX*>U'B\/E_2Ic6WJ:ZM=\O?^QA^QA^QA_RBaTD`SC`SC`SCaTDaTD`SC_RB\PDZOMZQVbXcjcsqm†vu•xz£z|­w|´sy·mr´bi¯]d¬ag±lr¾x{ʉ‰ß•‘夡䶵áÇÄÙÕÑÎáÝÄçä¹ëæ¯îê­îè¬éâ«ãÛªÚÓ©Ë¡»¶™¥¥›™„“ˆv˜q™rc›bW³i`Ïrmû‹‰õutê[]æLNèCGí@Dì<?é;<ß:8Û<8ÞE?ÓC;Ã:0º:/°6)§1#¿I;ÛaRêk\ÛSEÉ:,Ì4'Ô6+×4-Ù42ñMKþZXíKFÚ91Ö3*Û4+Þ2(ß.&á,%á,%Þ-%Ü1)Ú6,Ù;0×?4Â/'Ä4,Î:6ÙCBßFHàEIäGNêMVñQ]öVbøXbù\eÿgpÿmuôYaÞ;@ê17ñ.4ò-6ò*4ë%2ì(4û=IÿS_í?IèAIêGLôQVÿY`ÿX]ôGMä68á53ä73ê;8ò@>ô@?ï=;æ95à72Ý82Ú91Ù80Ù80Ü71ã73è96é:7â:7à;9ã:7æ:8ç:6å84à5.Ú3+Ú6,Û7-Þ8,â6,ç2)ë0)ò/)ö0-ô22ô23÷12ö01ö0/ò0.î3.ç6.Ü8/àF<É91¾3.¶,,îfjÿy€øgtÿbyýTqþKkÿGjÿ>cò6Yë=^ãQhÄXeˆBBgC7[N;GF1AC-JH3EE-LU6Sc>btLlƒUp‹XnRkPiŽKgIhJh‘Kj“Mk”Pm–To–Wp—Vt›Xs›Uq™So—Pj“MgJeŽHdG`‡D]AWy=Rq8Mi6Lf6Ne7Of8Lf7Oi9Sm>UH8WJ:[N>\O?\O?]P@_RBaTD_RB`SCaTDaTD`SC_RB_RB^QIVMR_Xhnf{tp‰xt—xw¡xz«y{´sy·jr³`g­[c«_g°jr»v|ƀ„Γ•àžžä­®ç»½æËÉßÔÓØÝÛÎáßÆåá¾æâ¼äß¹à׶ÚеÎƱ¾µ¦­§›˜šŒŒŽ€‰…yŠ|q‘vk›si¨meºoi扄î~ósrñedñWYðMNîDEì@@ã:7à;7äB=ÞE?Ì<3Æ=3ÄB5¯1#®0"ÇI;ÜXKÖN@Ì=/Î8*Ñ7+Ñ3*Ö42õUUÿkhú]XãD>Ü;3ß;2ã80â4-ã2,â1+Ý0)Ô0'Í/$Æ/$Â0#Å9,À4'Á3)Ë:5×CAÛEGÛBGÚ@HßCNëOZòVaòVaú^iÿgoóYaáBGê:<í57ï28ï28ò2=÷9CÿHSÿV_ä6?Ý4;ß6;ëADùJOüIMô;Aé13è51ë95ð=9ò<9ó;9ð:7é73â70ß;2Ü;1Û:0Ü90ß82ä73è64é75æ:8ä;8æ:8è:9è;7æ95á6/Ü3,Û7-Ü8.ß9-á7*å3)é1)ñ0)ô1-ó32ñ33ô22ô01ô1/ò0.î3.ç6.Þ7/àC:Ê70À1-À13økqÿqzúftû]túPmÿCfÿ=dÿ9cõ8^æEdÙZm«V[wE>dJ=aYFTO;KI4PI6GE.FO2L\8ZlFf|Nm…SmŠRlŒMhJfŽHgHg‘Ii’Lj“Ol•Sn•Vo–Ut™Vs˜Un•Rj’LfKcŒHaŠF_ˆFZBX|?St;Pn8Ok:Rl=Wn@ZqCUo?Vq>WqARE5UH8XK;ZM=[N>\O?^QA`SC`SCaTDaTDaTD`SC`SCbUEaVP^Vcje|yu}yšzy¡xy©vy°sw´jr³cj°[c«]e®hp¹v~ǁ‡ÑˆŒÓ™Ý£¥à¯³ã»¿åÆÇãÏÎÞÔÓØ×ÖÑÙ×ËÙÖÇ×ÑÅÒÉÀÌþŹ¹´¨¬£šŠ‹†x{tyvquleleogŽa[`[ÀsmØyuð{ü|yÿrpýccöSTðHGïC?ê>:á<6àA;Ñ:1ÓC:ÝTJÂ>2¸6)½9,Â=.È?/ÏB1×C5Ö@1Ñ7-Ð2/êLMú_]òWSßD?Ù:4Û81à70æ93ç92ç92â:1Ú9/Ñ9,Ê8+Å9*°(¸3"È@2ÓI>ÒD@Ë;:Ì7;Ï:@Ñ7AâHRíV_ðYbõ^g÷`iêU[ÜAEå@>ì?;ó@CûGJÿLTÿQYÿT]ÿV^á4:Ú/5Ü/3ç7:ô=Aø;?÷48ó12í42ï95ð86í42ë20ê20ç40á4.ã<4à=4ß<3Þ;2á83ä73è43è43è88ç98è88ë99ë97é75â5/Þ3,Û7-Ü8,Ý:+ß7*ã2(ç0(ï0(ò1,ñ42ð43ó32ó11ó1/ñ1.ì3.ç6.à91Ù<3Ì71Á0-Ñ@CÿnuýkuùbqðRiõHfÿ;aÿ4^ÿ6bû>däNiÊ\i‹GFjF:]L<]VCXQ>RJ7OG4ID.DJ.GU2Sc>^sHgMj‡OjŠKgŒGgŒFfŽGfHgJi’Nk”Rm”Um”Uq•Up•Rj‘NgŽKcŒH`‰E_ˆF^†G\‚E[~DWxAVr?Uo@YpB]rG^uG[sCYtAXs@NB2QE5UI9WK;XL<ZN>\P@^RBaUEbVFbVFaUE_SCaUEfZJh]YngwupŽ|x™{y wx¦vw­pu¯jo¯ah¬^f®_g°em¶pxÀ}…͉Ø—Ø™ Ö¡¨Öª±Û´¹ßº¿ßÀÁÝÃÃÛÇÆØÆÃÔÅÁÐÁ»É¼³Ä»°Áµ¨¼¦˜¯–‹œˆ…Œqqqtpopfewhe†nj„_Y•d_›ZT¹f`Ùsnðzvÿ{wÿwsÿhfüZWþRNõHBæ=6çD=Ø;4ÛG=ê\RÅ;0Ä<0¼6*¾6(Ç>.ÑD3ÔB3ÔA1×A3Ò86Ø>>ÝCCÜB@Ö=7Õ81Ø7/Þ7/å:3æ93å:3â;3Û>5ÕA5ÑE8ÌG6®-¼;%ÍJ8ÑL=Æ=5º/*À00Ê9>×AJáKTëU`ðZeð]eìYaßLRÔ>@Ï4/Ø7/à;9ê@@òCHõDJñBIî?Fâ5;ß26ã36ï8<õ79ø14û/2ü14ï20ð95ð74ì0.è,*ç0,æ3.à3,ä=5à=4à=4à<3ã:5æ95ê65ì65ì57ì57í57î68î66ë54ä2.Þ1+Þ7.Ý9-Ý:+ß7*á4&æ1&í1(ñ1,ð42î53ñ42ò21ó1/ñ1.ì3.ç6.â;3Ò4+Ò:5Æ20èVYÿpxùcnòZiçI`óFdÿ8bÿ/]ÿ5bÿBhÜLe±PYj5/^G7XK:UN;[P>VI8M@0JC0@F,AO.K[6Xj@bxGfJhˆIf‹FeŠDc‹BcEeŽHgLh‘Oi‘Rk’Sn’RlPhNd‹JaŠH_ˆF_‡H_‡H_…H_‚H]~G]yF]wH`wKcxOczN]uC[vAZu@JC3MF6QH9TK<WK=YM?\O?^QAdWFeXGdXHbVF_UIaWMg^Ulcfwr‰yušxvžtsŸrr¤tu­lp¯bg©Za©_e¯fl¸ms¿v|ȁ‡Ó“Ý—œÞ›¡Õ §Ó©­Ú®³Ý±´Ý²´Û´´Ú¶´Ûµ±Ö³­Ñ®§É«¡ÄªžÂ¥™½™‹¯ˆ~™|x†dbgiefcYZj[X{c_yVP’c]ŠNF¢UM½`XÖkcñwrÿzÿzwÿnjþa\÷TOéGBîOIÝB=ÞG@êWOº,"¾1'¾4'Ç>.ÑE6Ë>-Á2"Ê7'ÛE7ÞE?Ó97Ì3.Í50Ô;5×=5Ý<4á=4ä;4ä92Þ5.×4+Ï5)Ê8+Å<,Â?-ÔS>ÈG2½<)¾:-Ã=2Ä;5Â74Â43ÜKNÞMRáPUåTYèY]åVXÜMOÓD@É5+Ñ7+Ù80Ý52â24å26æ18æ.6ì4>ì3;ó5?û9Bý4>ú+5ÿ(3ÿ/9ø-3ù57ù59ð./ë*+é0.è51â5/ã<4à=4à=4á=4ä;6ç:6ë76î66ì46ë35ì25í34í55ê41ã1-Û0(Þ8,Ý:+Ý:+Þ8(à4&ã3$é3&ì4*î50î52ð42ð3/ò2/ï2,ì4,ç6.ä=4Í/&×>9Ì35ùbgÿoy÷[iðNcêC]ùFfÿ:eÿ0]ÿ6aöGhÉJ]“CFR,#VI8SL:OH6[O?UI9F?-HF1>B'@J(IU1Sd:_rEe}Ig„JeˆHcˆCa‰BbŠCcEeŽHfMhPiQlPkMfJcŠIaˆG`‡F`‡H`†I^G^F_}G`|Ia{Kd{Mf|Nf~N]xC]z@^{CCA5FD8JF;PG>RH>WI>]M>_O?`Q>aR?`SB_VGbYRgb_ojnso~wq“zt |y¨yw©pp¦gg£`a¢\_¤`b­fhµopÀyz̄‚֍‹ß–’眙栞ݡ¡×££Ù¤¤Ú¦£Ø¥¢×¥¡Ô¤ Ó¥žÑ£Í¢šÉ¢™ÈŸ–ٍ»Ž‚°ƒ|žrn}igld^`fZZkYUrWP}WNŒWO•RI¨ZPµXPÁZSØkdìyrù‚|ÿ‹†ùuqÿusí_]ßPLÙEAïZTÌ71Å2*Ä6*Ã7(À7'Ã7&Æ9(Ê;+Ð<.Õ?1Ò8.Ö<2×?4Ó;0Í5(Í3'Ø:/â@5à90á90Þ7.×7+Ï7)É:*Æ?,ÄA/Â=.Æ@4ËE:ÍG<ÌC;Å<4»2,¶+&Ç<7ÐEBÙOLàVSãYVáXRØOIÐC:Ì;*Ó:(Ü8,â5.è./ì*2ð(3ô'6ú->ú+?û*?ý)?ÿ(?ÿ&?ÿ$<ÿ$<ÿ$8ÿ'9ÿ+<þ,9ó+5í.5è45ç;9Ý84Ý:3Þ;4à;5ã:5æ95é73ë54ë35ì46ë54é54ç53â5/Ý5,Û5)Ü8,Û9*Ü9(Þ9&ß8&â7%ã6%æ4&ì5-í4/ð50ñ4.ñ5,í5+ê5*å7,à90Ø7/Õ:6Û?BÿmwÿbsûSjõIcÿKjö7Yÿ6_ÿ<gø>cçNj¯IV]#!D, ?:'HE4PM<PI9EB1>C/?G/BH&EK%KS.S]8[kDavKd~NdƒJb†F`ˆBa‰@c‹BeFhJlPm‘Ql‘LiŽHgŒGf‹HfŠJe‰Ic…HaƒGdƒJdIbGc~GdJdHe~Ga}C[{<\=_‚B=?4?A6FC<JE?PE?TG>[K>]M=`O;`Q<^SA^VIc[Xhcinlyso†tp•vpžtp¢pm¢gfŸaaŸ]^¡\\¤ed°kjºtrÅ}{φ‚ٍ‰à”Žæ˜“å›–Ùœ˜Õ™Öžš×ž˜Ö—Õœ–Ô›•Ñž–Ñœ”Íœ“Ê›’ÇšÄ“‰»ˆ®‚xtk|mdif]^f[YiZSoWM{UJŠVKšVK°\RºYRÁXRÓfaàqjãvoí~wý‡…ÿ‡…÷usîgdãUSëZUÇ2,Â/%Å7)Ã7&À7%Á8&Ä8'È;*Ï;-Ó=/Õ9,Õ9,Ó:,Ò:,Ñ9+Ñ9+Ô8+Ø8,ß;1à90Ü8.Ö8,Ï9*È9(Á:&¾9&¾6*Á80Æ=5É@8ÊA9È?7Å<4Â91¿4-Ç>6ÑH@ØOGÚQI×NFÌC;Ã7*Ë8&Ó8$Ý7)å4,í//ô+3ù)7ý(:ÿ+@ÿ*Aÿ)Aÿ&Aÿ#@ÿ!<ÿ=ÿ:ÿ:ÿ 9ÿ%;ÿ);÷-9ð19ê7:å;;Ü94Ü:5Þ;4à;5ã:5æ95é73ë54ë35ì46ë54è64ä71ß6/Ü5,Ù5)Û9*Û9*Ü9(Ü9&Þ9&ß8%á8%ã6%ç5+ê3+ë4,ë5*ë5*ê6(ä7)á7*ß;1Õ4,Ó54óTYÿesÿ[põGbÿIhÿAdÿ;`ý3[ù;aòMmÍLb‰8?S*$A5'69(;<,B@1DA0@?-=B.>G,=CKP'\b<eoJhvRezQd~Od‚LaƒFa†Aa‰@cŒ@gDiŽHkMl‘Nl‘KjGhHf‹FgŠJf‰If…JdƒHfƒKdIdHd€FeGeGfEb~A]€>_„?b‡B69.:<1??7DA:KB;OE<VH=YL<^O<\O<\SB^WMc]]helom{sq‰so”qk™jf˜c`•\[”ZZ˜\] \_¦hjµno¿wxÊ€Ò†„ØŠˆÜ‹à‘ŽÝ“Ó“‘Г‘Г‘ДÑ“ŽÐ“ŒÏ’‹Î•ŒÍ“‹Ç”‹Ä”‹À“‹¾Œ„³ƒz¥€r•|ewu^fi[[d[Vd[RfXKrVH„TH¢YP½`YÉ\YÏYWÛcbákißokãvqûŠ„ÿŠ†ÿ…ÿ}y÷nhîaXÄ4+À2&Ä8)Á:'À9&À9%Â9&Å:'Ì:+Ð:,Ö:-Ó5)Î5'Ð8*Ô>/Ô>/Î6(Ë/"Ú:.Ú8-Ù7,Ö8,Ð:+É:)Â9&¾7$½5)À6,Â8.Ä:0Ç=3Ê@6ËA7ÌB8Ç:1Ê@6ÏE;ÐF<ÒH>ÑG=Ê@6Å7+Ì6%Ö6&ß7,ç4/ñ03ö-5û+9ÿ*<ü';ü'=û%=ù#;û!:ú7ü7þ6ÿ8ÿ6ÿ$8ý*;÷2<ñ6=ë8;ä::Ü94Ü:5Þ;4à;5å95ç85é75ë54í36î47í55ê65å61á6/Ü5,Ø4(Ú8)Ù9)Ú9'Û8%Ý8%Þ7$à7$á6%á5'â4)ã5*ã6(ã6(á7(Þ6)Û7+Ý<2Ð1+Ø88ÿkrÿ]nþPiñ<[ÿHlÿ7]ÿ>eù5[î>bæVqª?Qa#(L/);7+07'37&9<)@=*A<(@>)BB&?FXa4s~T€Œdw‡`hSa}La€G`ƒC`…?a‰@cŒ@hŽEjJkMl‘Nn“Mk‘HhHgŒGh‹Kh‹Kh‡Lg†Ke„KdƒJc€FdGf‚Hf‚GeFb€Bc†DeŠEhH25*58-;<4A>7G@:KB;RE<VJ<XL<YM=YQD^WOc^bigron€roŒrm•jg”`^WW‹TUŽWX˜\_¢ac­ln»rsÃ{|΂ƒÕˆ†ÙŠˆÛŒˆÜŠ×ŽÒÎŒÏŽ‹Î‰ÏŽˆÐŽ†Ï…Ώ…ÊŽ…Ə…Á‡¼‡¶‰‚¬€xoŒ€_p|Y`lXYd[T^_Q_\IjWF}SEžUL¾\YÐY[ÙX\äcgçkkáplåxsì}vízsþƒ~ÿ†ÿ{õkaÉ<2Ä8)Â;(¿:'¿;&¾:%Á:&Å:'È9)Î8*Ø:.Ô6*Î5'Î8)Ñ>.Ñ>.Ë8(Ç/!Ò6)Ó5)Ò4(Ò6)Ñ9+Í<+È=*Ä;)Ä:-Ä:/Æ90Æ9/Ç:1È;1É<3Ë=3ÓD<ÓE;ÐA9Ë=3Ë<4ÏA7Ï@8Î;1Î5'Ù5)â5.ë31ò/5÷+6ù)7ü):ù&9÷&;÷&;ø'<û&<ý%:ÿ$:ÿ#:ÿ"7þ#7ú%7ù,;÷5>ò9?è8:à87Ü94Ü:5Þ;4à;5å95ç85é75ë54î47î47î66ê65æ72á6/Ü5,×5*Ø8*Ø8(Ø8(Ù8&Ú7&Ü7$Ý6$Ý6$Û5%Ü6(Ý7'Ü9(Ü9(Û:(Ù9)×:+×:1Ð3.ëHKÿoyÿVjøD_ø<_ÿAgÿ3\ÿ8bø>cèMlÅOe€0=J C1-11'.6'4=,=B.B@+B<$E>$HE$U\0p~MŸr—¨|ƒ˜mj„W_{J]~E_ƒCa†@bŠAfCkJl‘Lm’Om’Oo”Nl’IiŽIhHiŒLiŒLj‰NiˆMf…LdƒJdGe‚HgƒHh„If‚Gd‚DgŠHiŽIl‘L-3'36+891<;6B=9G@:MD=QG=SI=SK>UNDZUQa^eigumklkŠjhb`XVˆPP†QQXYš`cªfj´orÁuxÉ|Ђ…Ö†‡Ù‡ˆØŠ‰Ù‹ŠÖ‹ŒÏ‹ŠÌŠ‰Í‰‡Ð‰…ш„ÑŠƒÑ‰‚ÐŒ„Ï‹„ȍ…Á†º†°‡¥~w–m…„Yj€SZnVTc\RZaOZ_HeZDxUB’LD¸TRÐSWÛRYå^dèejãklæuqäunâoh÷~vÿ‚{ÿ‡~ôl`É?2Á8(À;(¿='¿='À<'Â;'Å:'Ç8(Í7)Ø:/Ø7-Ò8,Ï9+Ì:+É:*È9)É7(Ï9+Ñ7+Ò8,Ñ7+Ï7*Ì8*Ë<,Ë>-Ê<0É=0Ê<2É;/È:0Ç9-Å7-Å5*Ð@7Ð@5Í=4É9.Ë80Î;1Í:2Ì5,Ð/%Ù0)ä20ì25ô/8ù-9û+;ü,<ü-?û.?ü/@ü/@þ/Aÿ/?ÿ-?ÿ.@ÿ)9ü)8÷+7õ0:ñ6=í8=ã77Ü43Ü94Ü:5Þ;6â:7å95ç85ê65ì44ñ48ñ48ï56í76æ72á6/Ú6,×5*Ø8*Ö9(Ö9(Ø9&Ø9&Ù8&Ú7&Ù8&Õ8%Õ:&Ö;'Õ=(Ô=(Ô=*Ô=,Ô<.Ñ7/Ú;8ÿ^eÿaqÿPi÷<[ÿAiÿ4_ÿ4`û1YôKlÛYq•>NZ%-C),:2/-0)08+7@/<A+<<"A<UH(`[5{…S£lª¾‹¨¾Ž¥wo‹[aK^F`„DcˆBeDj“Go”Np•Po“So“So”Nl’IiŽIgŒGiŒLiŒLi‹OhŠNg†Ke„IdƒHe„Ig…Ih†Hg…Ge…DiŽIi‘Kl”N+1%.4*470893>:7B=:HA;KD<NH<NG=OJDWSR_^ffeugggf†ba‰[YŠQQ…NO‡RS“[^£ei³lo¾uwÊz|сփ…؆‡Ù†‡×ˆ‡Õ‰‰Ó‡ˆË†‡È†…Ë…ƒÎ…€Ð…€Ò‡҇€Ð…ÒŒ…ˍ…ÁŒ…¸‹…«„{v|jz„VcPVnUQd^P[eMXbIb^EsXCOE´WRÐUZÚSZç\cèagågjèpoìyræqh÷|tósjþxmæ\QÄ;+¾5#Â;'Á=(Á=(Ã<(Ä;(Ç:(Ë9*Ï7*Ö5+Ø7-Ô:.Î:,Ç:)Ä8'Æ:)Ê=,Ï=.Ö@2ÙA4Ö>1Î8*È4&Ê8)Ë<,É:,È:,É;/É;-Ê:/È9+È5+Ç5(Ê6,Ï;/Ò>4Ó?3Ö?6Õ>3Î7.É,#Ò+%Ü-*ç02ð39ö1;ú0<ý0?ý0?ù0@÷1@÷1@ö0=÷/<÷-9ù+8ú*7ü0<ù/;ô0:ñ4;í6;æ69ß55Ù42Ü94Ý:5ß:6â:7å95ç85ê65ì44ò59ò59ð67í76æ74ß6/Ú6-Ö6*Õ7+Ô8)Ô8)Ô9'Õ8'Õ8%×7'Ô9%Ð9$Î<%Ï=&Ï?'Î@(Î@*Ð?.Ò;0Ñ61ëHIÿfrÿOdÿHfû7[ÿAmý/]ÿ7aò4XãPj¼Ufm19E(*E697325406906=-3;#38CCja:…TŸ®w©Á‡µÎ—¯Ç“’®}v”be†Q`‚F`…BcˆBgFm–Jr˜Or—Rq•Up”To”Nl‘KhHf‹Fh‹KiŒLi‹Ni‹Og†Kf…Je„If…IhˆIi‰Hi‡Gf†CjJk“Mn–P(0#+1'/4-350764<85@=8C>8IE<HE<KHCRPQ[[eabtbb|``‚_^ˆZXŠTTŠTT\] gi³psÄvxÍ{Ø~‚Ûƒ„Þ„†Ý„…؃„Ö…„Ô†…у†Ë‚…ȃƒË‚Íƒ€Ñ…€Ôˆ€Õˆ€Ó…Ò‹„È‹ƒ¾‹ƒ²‰‚¤|“wp€veo~U[|PQnUNf_O]eMZdIaaGq]E‰TF­]TÈZ[ÔVZâ[aæ]déaeîllõzsírj÷xoÞYPå[PÒD8Ã5'Ç:)Å:'Ä<&Å='È=(Ê;*Í:*Ð8*Ô6*Ó0'Ô3)Ò8,Í;,Ç;*Â;(À;(Ã<)È;*ÕC4áK=ÞH:Ó=/Ê4&É5'Í;,È9)È9)È9+È9)Ê8+Ê8)Ë7+Ë7)Í6+Ò<.×?4Ø@3Ú@6ÛA5×:1Ò1)Ü0.å14î49õ6>ù4>ù1>ú0@ø/?ò.<ï/<í/9í07ï-5ï,2ò+2ñ*1õ2:ó4;ò5;ì59å57á55Þ65Ü75Ü86Ý97ß:6ã:7æ87é77ê67ì46ô5:ô5:ñ7:î87æ74à70Ú6-Ô6*Ó7*Ñ8*Ð8*Ð9(Ð9(Ð9&Ñ8(Ð9&Ë9"È:"È>$Ç?%È@(È@*Ì=-Ï;1×96üU\ÿ^qÿGcÿ?aÿ:aÿ7gþ3`û9^íFbÅNa‰@IS12A64@:<:46?56>7179+4="9CSZ.‚‚P¢§q±ÅŠ¯ËŽ°Ì’¦Ã’®}{™egˆQ_E^ƒ@c‰@iDo•Js™Ps˜Sq•Uo“Sn“NkJgŒGeŠEgŠJh‹Ki‹Ni‹NfˆKd†Id†IeˆHgŠJh‹Ii‰Hf‰Ej’Ll–No™Q&.!(0%-2+13.331764:95=<7GD=DD<GFBOOQZZd_ap_az\\~`_‰\ZŒYX‘\\šgg­qsÀz|р܁„ႅ䅆ㄅჃہ‚Õƒ‚Òƒ‚΄‡Ìƒ†É„„̃‚Î…‚Ó‡‚ÖŠ‚׌ƒÔ…ÐŒƒÄ‹‚·Š‚«†€œxˆsnuqbevWUtSLjVKf_MbfO_eIcbFp^FzQ?[M·ZSÅSR×VZâW\êY`òce÷rmðme÷qhÑD;ÔA7Å1%È4&ÔC2Ç9%Ç:&É;'Í<)Ð:+Ó:,Ø8,Ù7,Ï,#Î0%Î6)Ë<,Ç@-Á>*¾;'¼7$À4#ÑB2ãOAäN@ØB4Ë7)Ë7)Ð>/Ê;+É:*Ç8(Æ7'È6'Ê6(Î8*Ð8+Ó;.Õ;/Õ9-Ò6*Õ7,Ú<1Ý<2Ü71è88ñ8=÷:Aû9Bú4Aõ/<ò,;ï,:í/;ê19ê38ë48í57ñ56ô36ó57ð37ï6;í9<é69â45Ý33ß76à;9Ý86Þ97á96ã:7æ87é77ê67ì46ô5:ô5:ñ7:ì89æ95ß82Ø7/Ó7+Ð8+Í:*Í9+Ì;*Í:*Í:(Í:*Ì;(Å9"Â:"Ã=$Â>%Ã?(Æ?+Ê<.Ð92ß<?ÿ\fÿQhÿIfÿ9]ÿ=hù.]ý<hé;\å\p¦LV^..G83BC><89?48E26C41=:)>E&FU*bs?›a­½±ËŒ«É‹¦Ã‹º„Ž¬z{™ef‡PZ|@^?a‡>hŽCo•Jr˜Or—Rp”Tn’Rn“NjIf‹Fd‰Df‰Ih‹Ki‹Ni‹NfˆKe‡Jd‡GeˆFgŠHh‹Gh‹GeŠDk“Lm—OpšR(0#'/$(-&).(.0-3317839:4==5?@8DE@JLKORYVXeZ\s]^}YX‚[Y‹^]–ee£nn¶wxȁ؄…⌌ðŠŠì†‡ä‚‚Ü€€Ö‚€Óƒ‚΀ƒÈ„ǃƒË„„ΆƒÐˆƒÓ‹„ÒŒ„ÏŠ€ÅŠ€¼‰€¯‡}Ÿ€xwozkegf]Xm[Qm[Mi\Lf^Kd_IeaHhaGo_FoP;…UA¥]O¿aYÑYXÝQTîS[ø\`ùheæYPÔD<Î70Ñ7/×:1Ö=/Ô;+Ï9(Í:&Ï9(Ñ:)Ô8)Ø8*Ü5,Ü5,Ð/%Í3)Ì8,Ê>/Å@/Á@-¾=*¼9'Â9)¾0"éWJßK?Ã/#Ñ=1Î</Ç8(Ê;+É:)È9(Ç8'È7&É6&Ë5&Ì4&Ð7)×;.Ü>2Ü<0Ù7,Ú8-â>4ìC<õBEò9?ð2<ö4?ö2@ñ-;í-:í2=ì7>æ5;ä68å78ë99ð:9ñ78ï77ò;=î<<ê::æ87á85ß74Ü75Ü75Þ97á98ã99æ::é9;ì9<í9<ï8<ô7;ó6:ð8:í9:ç;9à;5×90Ï7,Ê8+È9)Ç9+Æ:)Ç:)Ç:(È;*Ç<)Á9#Ä='¾:#¶4¼9%ÉD3Ï?6Ê1,ÿ^eÿVgÿIbÿ<\ø1X÷0Yû6cõBiçYo¬FQo33N3,B?6>C<@78@-1L/3K2.A9&DJ(Zn=~š_ž¸w¨Å‚«É‹Ÿ¾‚˜¶€”²~‚ nf„RXvBY{?]€>c‡AiDn”Im•Ll”Nk’Ol‘NiŽIfŠDc†Bc†Bf‰Ih‹KfŠLdˆJg‹MhŒNg‹Kf‹HeŠEf‹EjIk“Jq›QržSt U*0$)/#).').(-/,130561782;<4>?7AD=HJGOPTSUaXYkZ\u\Z^]‰db”kj¤utº~·‡ß‹ŒéïŒŒð‰‰ë…„ ؀~ҀρË…†É†‡È‡ˆË‰ˆÎ‹ˆÏŒ†Î†ÌŽ…ÈŠ€¼‰±†|¡‚x‘{s€tknlb`e]Re`Me^Kf^Kf^Kh]Ij^Hk_Gn^Ew_G[C’YF­ZLÇVPÞRSõQXÿX_ðUSãLEÙ>9Õ60Ù6/Ü90Û9,Ö9(Ó8&Ñ8&Ñ8(Ô8)×7)Ü6*ß4,Ý5,Î0'É5)È9+Æ=-ÄA/Á@-¿<*¾9(Ä8)Ë<.äREÜH<È4(Ë9,È:,Â4&Ê=,Ë<+Ê;*Ê;*Ë:)Ì9)Î8)Ï6(Ó7*Ö8,Ù7,×3)Õ1'Ø1(Þ7.ç<5øDGñ8>í/9ï/:ð0=î.;ë1<ë6?ä5:Ü36Ø22ß:8îEBøIFøDCó?>é:7ç98å97â96á85à85á98á98à87â88å99ç9:é9;ì9<í9<î7;ó6:ò59î68ê88ä;8Ü:5Ò8.Ë7+Æ8*Ã:*Â:*À;*À;*Á<)Á<+Á<)½9$Á?)»:%³5¹:'Á;/Ê:2Ø<=ÿ^gÿPdÿA[ù8Wö6[õ;`ô>dãIe«?Lƒ?>_:2G;/:>08>2F<:R>=N0.S8/VK5ciEx[°pÂ}¡Æš¹}š¹€š¶ƒ©xrŽ^YuETp=\{B_@b†@iCm“Hl”Kk“LiMiMgŒGe‰Cc†Bc†DgŠJiŒLhŒNg‹MiOiMiŽKhHgŒFfŽEl’Im•LpœQoRt U-1#-1#,/&,.)//-11/34/45-9;0<>1@B7EHAKMLQRWVVbVWi\Yt^]fdŒpn zx·…ƒÎ‹ßêŽïŽŒíŠ‰ç‡„߁Ó|Ë}Ȁņ…LjˆÆŠŠÊŒŠÉŒŠÉŒˆÅŒ„¿‹‚·‰€­†|¡€v{p€ujpndbi_Vc]M_aL_aKc^Jg\Jl[In\Ho]Gq]EwaIxX?‡S=£VDÇXOãUSøOTþMSâ><Þ<7Ý84Þ71á6/á6.Ý7+Ú7(Ô7&Ó8$Ó8&Õ8'Ø6)Ý5*à3,Ý5,Ë3(Ä6(Â9)Á<+Â?-Â?-Á<+À8(Ã5'ÛI<ÚF:ÕA5Î</Ä6(Ä;+¾6&Æ:)Ç:)Ç:)É:*Ê8)Ë7)Ì6(Í3'Õ9-Õ7,Ö3*×3*Ú3+ß6/å:3é<8ð<=í49ë07ì18î3<í4<ë7@ê=CÝ7;áAAìNKøYUýZUúQLïB>å84â62à72à72à72á85â96ä;8ä;8á77ä88æ8:è8;ë8<ë8<ë8<ì8;ï58î47ì57ç77â:7Ù:4Ï8/Ç7,Â8+¿:+½;+»<+»<+»<)»<+»<)¸9&½@*³9$±6$¶;+µ0'Ã40ìPTÿ[iÿJ`õ:Uð7Vñ@]ïHfçHdÍNa‚89e?6R@2DA09=,57)>6+L:0E-#R=,g]Bˆ_’«t—»{–Áz–¿{”³xœ¸…š´…€šm\uKHb5Nh8[xB_}?cƒ@hŒDl’Gj’Ih’JgLfKd‰Dd‡Cc†DeˆFh‹KkŽNkOjŽNiOiOiMgIgHh’Hl”Ko™Ms¡Vs£Wv¤Y24&04&01)01+12-23.34.46+9;-;>-?A3DF9IJDMONRQWSS]XVd\Zoda~nl”zw¬„‚ÃŒ‰Ö‘ã”‘ꓐ뎋≅قÌ|Ã|¿€~¿„‚Á†…Á‰ˆÂŠÃ‹ˆ¿‰…¸‡®…}¤…{€vŽyn~rgmmaak^Vi[Pc\J]`K]aJc^Jh[JmZKqZJtZIv[FsWAxR;ŠQ=ªYFÍ[PãTPðFIñ>AÛ2/Ü5/á51ã60ã5.á5+à6)Û8)Ö7$Ó8$Ñ8&Ô9'Ø6)Ü6*à3,Ú6-È6)¿7'¼7&½:(¿>+Â?-Ã;+Ä8)Æ4'éUIÓ<1Ì8,ÑB4¾5%À=+À=+À8(Ä8'Å9(Ç:)È9)Ë7)Ì6(Í3'Ò6*Ô3)Õ2)Û4,â92é>7ì?9ì=:è45ì59ï6;î5;ê2:å28ä7=â<@óUVübbÿmjÿidóVQäA<ß63Ý4/ß61Þ71Þ71ß82á85â96å97æ:8å78æ89è8;é9<ë8<ë8<ë8<ì7<î5:î68ê67å97ß<7Ö<4Ì9/Å9,¾9*º;*¶;)µ<)µ<+´=)³;*³<(³:'·@,­8&­8'³;-«+"Ã54ý`gÿTdúG\ð<Uë=XçG_áOdÒO_´SZvGAXH9KD2FE1BE0=?*:8#;3?3PG*nkHŠ“f•¬t‘´tŽ·s‘ºxž»…¡¸Š§}l‚[K_:AW0Lc7Wr?_{@dƒ@iŠCk‘Fi‘Hg‘GdHeŽJb‡Bc†Bd‡EgŠHjMlOm‘Qn’Rj‘Pj‘Nj’Lh’Jg‘Gi“Gm—KpœOv¥Wv¦Zx¨\78(68*66,56.56056067/68+;>-=@-@C.DG4IK>MNFQPNRRRVUSYX]a_lkius›€}²ˆ†ÇŒŠÓ•’ᔑ␍܋‡Ó„Ä}º|µ|³~·„º‡„¹‰…·Š„²†¨z›€xs‡znzrfjm_^j[Ti[PjZKfZJb_Lb_Le^Li\Kn[Lr[Kw\K{ZG~UA†R=›RA¶VFÎRHÚG@â88å33Ý1-ß3/â5/ã5.á3,ß3)Ý5(Ü9(Ó8$Ð9$Ï:&Ñ:'Õ8)Ù7*Ý5,Ø7-Æ:+»9)µ6%·8'½;+Â=.Ç;.Ç7,Ð90õ\TÖ=5É5+ÏA5¸3$·:(¼?+¿:)Ã:*Æ:+È:,Ê;-Ì:-Ï8-Ñ7-Ð3*Ò1)Ö1+Þ71å<7ê>:è;7æ74é75ï;<ð<?ê69ä15ä5:èBFíMOÿxwÿqnñ`[ÝJCÑ83Ñ2.Ü73ç?<à93à93à72à72â62ã73å76å76æ68ç79ê7;ë8<ë8>ë8>è7=ê7;î7;î79ê7:ä::ß=:Ö=7Ë;2Ã;/º8*µ:*³;*¯<)¯<*¬<(¬;)¬;)ª;(¬=*¦8'§9*ª8-¬/)Ë@CÿhrôM^óH[íDYåH[ÙO^ÉT]·TWœXUdM?OL9LG4JE1FD-?B'=D#>H#KV.\g<xT‹™f‘¤m©n²r—¹}Ÿ¶ˆ’§€u†dRcC?P0BS1Oa9Vl=b{Be‚BiŠCjEi’Fe‘FbHcŽGb‡Bf†CgŠHiŒJkŽNmPn’Ro“Sj‘Nk’Oj“Mi“Ki“Ij–InšKržOv¥Wv§Xw¨Y?=.=>.==1==3==5;<4;=2;=/?B/@D-CG.FJ3KL:NOAQQIRRHYWJZZN_^\gerqn‰{y¡ƒ€·‡…ďŒÑŒÓŠÏŠ…ǃº~y¯zw¦yv¥{v¬|y®€|®ƒ}©ƒ}£x—|t‹zrvksrffm_\j\Sk[NlZLl[Kk[Ki]Mg^Mi]Mj]Mn]Ms^My\LZI“`O›VG§N@¸J=Å@7Ï6.Ø1+Þ1+á51á6/â5.â4-à2)Þ4'Ü6&Ú;(Ñ9$Í;$Ì;&Î;'Ñ:)Ö9*Ù7,Ó9-Â=,µ:(°5#²5#»9)Ä<.É;/Ë7-Ù?7ø[TãIAÎ:0Ì@3¸6&¬3 µ<)¼9'¿7'À7'Ã7(Å7)É7*Î7,Ñ7-Ù<3Û:2Þ93å<7ê>:ê=9ç85ã41è96ë<9ì::è8:ì>@øPPÿbcÿppùheâTPÊ=6À1+É40Ö;7ß=:à;7â;5á:4â94á83ã73ä73å55æ66é69é69ê7;ë8<ë8>ê7=è7=é6:ï8<ì89è8:ã;:Ý>:Ó>7É<2À</¶8)²:)®;)ª;(©<(¨;'¥:&¥:(¥<) 9( ;) 9*¢6*µ>8ÛTXûepîK\ïI]éJ\ÛM[ÉQZ´VV ZRŠ`RPI7HM9NI5H@+;567>O%Lf6lŠTw”\„›e‰šd›f£k”­t˜°|ƒ•mn}\Q`C<J09F,AP1O_;Wj=czDe‚Bj‹Dk‘Fi’Fe‘FaFcŽGcˆCi‰FjKlMlOlOm‘Qn’Rm”Qm•Om–Pl–Lk—Jm™JrŸNs¢Q{¬]{¬]z«\DA0DB3DB6CC9CC;CC;BB6BC3DF0EH-GJ-JM2ON9RQ?TREUSD\ZC\\D\]Oaaaihxrq{z¦~µ†‚¿ˆƒÃˆ„Á„º{®ys¡to—om•plžrn¡vpžwršwr’to…skzrinmb`l_Wj\Qk[Lm\Lo\Mo\Kq[Mq[No\Nm]Nl_Om`Os`Oz_N†ZMŸ_S¤PE®B8º;2Æ6-Î1(Ö2)Ý5,â70á6/á4-à3,à4*Ý5(Ø7%Ô9%Ï:"Ê="É=$Ê>'Í<)Ò;*Õ9,Ï;-¿@-°;'¬5!®3!º8(Ä<.Ê:/Î5-Ø93ðNIñTMÙB9ÌB5¾?.¦1³<(¾<,Á9+Á7*Â6)Å7+É9.Ï;1Ô;3áD=ãA<ä?;ç>;é=;é;:è88ç85ç85æ95æ74é;:ôJJÿ\[ÿdcøc_Ì=9Á82½4.Ã81ÕA=ÞE@ß=;Ù42â;5â;3â94â94ä84å84ç77é77ê69ë7:ë6;ë8<ë8>è7=è7=é6:ë7:é69æ89à;9Û>9Ñ>6Æ<1¾<.³8)®;)ª;(¦;'¤;(£:'¡:'Ÿ:&¡>+•6$œ=+š9)š2'ÃPKíkmî\fìP^éJ\ßHWÍKS¹RS¤[R’cQ€jUJM8DL7JB/H9$B7FH#Up=g“V~³o‚³qƒ¥h}’YƒYŽ–c‰—d|Œ_XfCGT89E-6B*:D,=J0JX7Wh>e|Fh‚Cl‹ElFh’Fc‘Fa‘GaGf‹FlŒImNn‘OlOkŽNkOm‘Qp—Tq™Sp™SpšPo›NqNt£Rw¦U|­^z­^y¬]IC3JD6IE9IE:IG;IG;HF9HG5IH3JJ2LL2NN4RP;TQ>XRDYTA]Y>[Z>\YH^[Tdahnk~xs“}x {§ƒ}«…­ƒ|¦}wunoi…kg‚ieˆjf‹mhˆmi‚mh|lfrkbgj`^g]Th\Nh[Kk\In]Kq_Kq_Kp]Lp]Np]Op]Ns]Ow]N~]N‡\L’XL¡UH¤F<«9/º7-Ê7/Ó6-Ú6-Þ6-ß6/ß4-ß4,ß4,ß7,Ý7)Ø6'Ñ6$Ï:$Ì<$Ê<&Ë='Î;)Ñ;*Ó:,Î<-¾A/¯<'¨3!¬3 ¶9'Á=.É;/Ì8.Ì2*àA;ø[TàI@ÐD7ÊF7«2¹>,ÊF7ÌB5Ê@3Ë?2Í?3ÔA7ÜE<ãF?â@;â=9ã:7â64ã54ä65å97å<9ä?9à=6Ü:5ß@:êMHêQLÓ@9¸)#¸-&¾5-É>7Ñ@;Ö>9Ú;8ã;;ç==â96â96â96ä86å95ç:6é99ê::ê88ê88ë8;ë8;é9<è8;ç7:ç7:é6:æ68â88Þ:8Ö=8Ì=5Â<1¹;-®9(ª;*¦;)¢;( ;'ž;(ž;(š;'˜?-Œ5"™=.˜8*—/&Í\X÷y|ßX_æXdÚQ[ÊKR¹LO©VP_TgU€nVPW8?J*<;C=UQ+lvDƒ¢f”Á~‡¼v†¸s|žan‚Mu~Q‚‡^u}V[b@;D)4<%5=(=D2<E29C+CP2Ue>g{HiƒFl‹Gm‘Gj“Gf’GbGcŽGhJlMo’Pn“PlMiŽKjJj’Ls›TsUsSržQržOs¢Rw¦Uz«Zv¦Zu¥[s£YRK;RK;RJ=RJ=RJ=RJ=RJ=RK;UN<VO=WP>XQ?YR@[TD\UE\UEb[Hb[IaYLbYRe][kaiqftuj{zn„|pˆs‹s‰|p„uj{mbre]jc`k`_g_\c_Z^^ZY^YUaZRbZOe[Og[Ki\Kk\Ik\Gm]Fm]Fj^HicMqfTqZJuOB‹WJœ\P£YL¬ZL¥N=¦E4§<,±7(Â8-Ñ80Ü41Ý1-ß6/Þ7.Þ7.Þ7.Þ7.Û7+Ø6)×7)Õ8)Ô8)Ò9)Ò9)Ò9+Ò9+Ô8+Í;,½;+¶>-®9(©1 «2!·9*À</Ã9.È5+Î5-Ô:0ÛC8ÝK>ÔH9Â=,·2#ÝPFØJ@ÓE;ÔD;ÚG?ÞG@àC>à<:à74æ87ì::ì::ç77â64à93Ü=7ÙF>ÏB9Ä:/¼4(·1&¹5)¿;/Ã?3Ã:0Ê=4Ò?7Ú?:â=;ç;9ì89í9:ç7:å8:å8:å99å97å97å95å95à40á51ä65å76æ89ç9:ç9:æ8:ë;>æ9;ß99Ü=:ÖA=ÍB;ÁA6·?1©:)£<+ =*›<(™:&–9'•8&9&A0‹<-‡/#‘/&¼LJämoçloÍVZ½LNµNOªQM¢UO›XO—]R•aTŠjSWZ/:M7FTa)~ŽP›°oŸ¼yœ½x’µs~ah€L\mCYdBT]BHP9?D0:=,@C2<@1:>0<C3:C0AN2Rd>]sBgƒHpKo“Kl’Gj’Ii“KgJlPm‘So“Sl“Pm’Mk“Jm–Jm˜IqœLqžMrŸNr¡Pt£Rv§UyªYzª^y¦cm™ZbŽOWN?WN?WN?WN?WN?WN?WN?WN?XO@YPAYPA[RC\SD]TE^UF_VGd[JcZIcYMcYOf[Uj_]nbdpdhreltgpvirvirsfmm`gg[_bXY^[VZZRYVOWTKXTIXTH\VH^WGcZKcZIf[Ig\Hi]Gj^Fk_Gh`IcbMngTv\MSFšVM«VO°RH·QE¯H9¬C0¬=,±9)¾8,Ë80Õ62Ù40Ý6.Ü8.Ü8.Ü8.Û7-Ú8-Ù7,×7+×7+Õ7+Õ7+Õ7+Ô8+Ô8+Ô8+Î:,¾<.¶>.®9(¦1 §2 ²:)¹>.¾<.Æ:-Ç4*Ë4)Ò;0ÜE:áOBßQCÝOCÛF?Ø@;Ó<5Ò;4Ö=7Ú=8Ü86Ý33ç77ë78î79ë78ç77å97â=9ÛB:ÊA7¿?4¸:,±6'¯5&²8)¸=-¿@1ÅA4Í@6×@7ß=8ã:7é77ï56ð69é6:ç6<ç7:æ89æ87å95å95ã:5â92ã:3ã:5ä;6ä;8ä;8å99å99ä:;ã;;ß<=Ù?=ÐA;Ä?6·=2¬;-¥;+ =*œ=+—<)–;(’:&‘8&Œ9'€9'€8)‰5*<5¿QP×eeÑ^a·MM¬NL¤TMYP˜\Q—^S—^S˜_VŒfQ`_/Sg*^r3zO™¯n¥¾|™²pˆ bj‚HauBTe;JX7FP7?H59?158-:</AB4=?4:=2=A3:C0@M3Rd>^tEgƒHqMp”Ln’Hk”Hk“Lh‘Kk’Qn’Tm”Sm”Qk“Lk”Hl—Hm˜HpJpJp Ls¢Qu¦Tx©Xx©Xx¨^m—Xb‹QY‚H]TE]TE]TE]TE]TE]TE]TE]TE[RC\SD]TE^UF_VG`WHaXIaXGe]Je]Hd[Jd[Le[Qf[Uh]Yh][i]]j^^k__k__i^\f[YbWS_VQZVMWTKUQHRNCQMBSOCWQCXRB^WG^WEaYFc[Fg\Hi^Hk`JhbLbaMngUy_PˆXN¢\T±VQ²IE¸E@¹@7¸>1·;/¸8+¼8+Ã9,É;/Ñ:/Ù8.Û7-Û7-Û9.Ú8-Ù7,Ù7,×7+×7+Õ7+Õ7+Ô8+Ô8+Ô8+Ò9+Í;,À>0¶>.¬9'£2 £3ª:&³>,¹>.Ä?0Ä8+Å2(È4*Ð<0ØF9ÜM?âNDÙ:6Ø43Ô20Ö42Û97á=<æ<<é;<í9<î5:ë27ê37è58å99á<:Ù@:¾8-µ:+±6'®4%­5%°8(µ<+»=.ÈD5ÏC6ÖB8Ü=7â94è64í55ï79ê69é69é69ç77ç77æ95å95å:3ã:3ä;4ä;6ã<6ã;8â:9á99Þ88Ý9:Ü<<Ú@>ÓB=È?9¹:1­7+¤6'¡;,<+™<+”;)“:(9&8%Š9&z6#y6%ˆ9, F=¹SOÀWT¸PO«IF¢MHœSL™YP—^S—aW—aW˜_V‹ePsrFzV“¦n¤¹€«À‡ž³{|[[o<?R$AS+AP/?L2:C06<.69058/9;.@A3=?49<1=A39B/@M3Qc=`vGi…Js’Or–Np”Jm–Jm•Nk”Nn•To–Uo–Sn–Pl”Kl•Il—Hn™IpLpLpŸNu¤Sx©Xyª[u¦Wq X_‰JVEOx>aXIaXIaXIaXIaXIaXIaXIaXI_VG`WH`WHaXIbYJcZKd[Ld[Je]Hf^Gf^Ig_Lg^Of\Pf\Rf\Sh]Wh]Wh]Wh^Ug]Te[QdZPc[N]YMZVJVRFRNBPL@PM>TN@UO?WQAYTA[VC]XDaZGd]Jf_LfaMdcQleUv\OŠ[Q©b\¸[V¶FD¹<:Ã<8Æ;6Ä:0Â8-½8)¼:*¾?.Ç?/Ó9-Ù7,Ú8-Ù9-Ú8-Ø8,Ø8,Õ7+Õ7+Ô8+Ô8+Ô8+Ô8+Ô8+Ò9+Í;,Á?1·?/­:(¢3 3¢8"¬=)µ@.¼?-¿:+Ã7*Å5*Ë7+Î:.Î</Ó:2Þ65à24Û12Ý34à88å;<ê=?ì<>î7<ì38é06æ25å58á77Ú65Î61·5(¯7'®6&¬7&¬7&®9(²:)µ:*ÃA1ÊB4ÔA7Ü?8â;5æ95ë76î87í68ë78ë78ê86ê86è94ç:4æ;4â92á:2ß:4à;7Þ:8Ü:8Û97Ù99Ö<:Õ@<ÒC=ÊA;»;2¯6+¤6'ž7(œ;*˜;)”;)’;(:&8$Œ7#‡9%€=*v5#5'™E:ªPH¬MG©LG¦QLPJ˜UL”ZO”^R•aV•aV—aWgT‰…_™©x­½Ž¦¸ˆ¡ts†YN`8/A->3B%:F.=F3;A3:=2:=4<=599-?@2<>39<1<@29B/?L2Pb<awHj†Kt“Ps—Or–Lo˜Lo—Pm—Op—Tq˜Up˜Ro—Pm–Jm–Hn™Jp›KqžMrŸNt£Sy¨X{¬]w§[mSe’M[…FS|BNw=cZKcZKcZKcZKcZKcZKcZKcZKcZKcZKd[Ld[Le\Mf]Nf]Nf]Lf^Gg_Hh`IiaLiaNi`Oh_Pg^Oj`Ti`Qi`Qh_Nh_Nh`Mh`MhaNd^N`ZL[UGVPBPM>NK:NK:NK:PM<RO>TQ@VS@XWC[ZF]\H^]Kb`Qf^QmWJ†[R¬ic¾c`ÁMMÈBCÐ;=Ô89Ó84Ì70Á9+º=)µC+¼B+Î;+×7)Ø8*×:+Ù9+Ö9*Ö9*Ô8)Ô8)Ô8)Ô8)Ò9)Ò9)Ò9)Ò9)Í;,Á?1¶>0«:(¡6"š4›7 £=&¯B-³;*º;,Â:,Ç9-Ë8.Î:0Ð<0Ù:4é9<ì4<æ39ã28â38á48â38â25é6<é49ç4:ç7:ã9<Û76Ð21À0(´6(­:(­:(«:(«:(«:(¬9&¯7&¸:+Á;/Î>3Ø?7à=8ä;8è96ì87î66î66î66í74ê84è:3ç:3ä<3á:2ß;2ß<5Ü=7Û=:Ù><×=;Ô>=Î@<ÉB<ÅB:¹=5­7-£5(ž7(š:*–;)“<)‘;*:(Š9&‰8%ˆ7$ƒ8%ƒ@-u4"{3%“G:¤QI¡NFžNG¡WN˜UL’XLZN\N]R”`U—aWhW‰f˜¥z©€Žj]kHDS42@&$2-:&0;*5=.9?3<?4=?4:;3::099->?1;=28;0;?18A.>K1Pb<bxIk‡Lu”Qt˜Ps—MqšNq™Ro™QršSršSršQp™Mm—Km˜Ip›LsžOt Qv¢Sx§Y{©^z¨_qŸVc‘IY†C]†JVGQzBe\Me\Me\Me\Me\Me\Me\Me\Me\Mf]Nf]Nf]Ng^Og^Oh_Ph_Ng_Jh`IiaLjbMjbOjbOjaPjaPjaPiaNh`Mh`Kh`Kh`IiaJjbKf_Mb]J^XHXRBSM=MJ9KH7IH6KJ8LK9LM;NO=PQ?QTASVESVE[YL^VIcQEzXN¡ha¶eaÄVUÖQRÙ@Cß:>Ü87Ô70Ç;.¼?+±C*µA(Ê;*Ô7(Õ8)Õ9*Ö9*Õ9*Õ9*Ô8)Ô8)Ò9)Ò9)Ò9)Ò9)Ò9)Ò9)Í;,Á?1µ=/«<)¢9$™6•5›;"§A*²B.¹@/¿;,Ã7*Å2(Ë4+Ò;0ß=8ê5:î3<ë6=é8>ç:>ä;>ã:=â9<ß58á7:â9>á=>Û=>Ñ96Ã2/¶0'¯9+«>*«>*©>*ª=)©<(©:'«8%¯6%¹7)Å;0Ð=3Ù<5ß<7ä;8é:7î87ï75ï75î85ë:4é;4ç<4ä=4â>5à?7ÞA:ÚA;ÙA>ÕA?Ñ@=ÍB?Á@:»@8³=3ª8-¡5(›5'˜8(•<*‘<(<)Š<(ˆ:&†9'„7%ƒ6$6%}:)t3!~9*—OA£YNœRG•OE˜VJ•YN[MŽZMŒ[MŽ\Q“_T™`W‘gW~xXyƒ^r|ZU`B;E,/;%0;*0<.3=24<16<2<?6@A9?A6<<077+89+>?1:<17:/:>07@-=J0Oa;bxIk‡Lu”Qu™Qt˜NqšNršSpšRs›RsœPr›Op™Kn™Jp›LsžOv¡Ry¥X{§Zz¨]x¦]qŸWg”O]ŠEVƒB_ˆNXIR{Cg^Og^Og^Og^Og^Og^Og^Og^Og^Og^Og^Oh_Ph_Ph_Ph_Pi`OjaPjbOjbOjbOi`Oi`OiaNiaNiaNh`Kh`Kh`Ih`IhaGhaGhaGf_Le^Kb[I\WDWR?PM:LI8IH6IH6GH6GJ9GK:HL;IM<HO?JN?PQCXRF[OCiRDƒYM–VL°SLÔZYÛIJâ?Bá99Ú72Ï;1Â?-´B*³<$É:)Ó7(Ô8)Ó:*Õ9*Ó:*Ó:*Ó:*Ò9)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Í;,Á=0µ=/¬=,£<)˜8 4•9 ¡A)­C-¶A/¿=/À6+Â/%Ç0'Ð6.Þ:8è59î6>ì;Aê@CæAEàBCÜ@AÙ??Í12Ð66Ó:<Ñ=;É;9¾71µ4.­4)ª<-¥>+¥>+¥@,¥>+¤>(¦;'©:'ª7%²7'»7*Å8.Ì70Ô:2Þ<7æ?9ë97ì95ì95ë:4é;4è;4å=4â>4àB7ÝC9ÚC<ÕB;ÏA=Ê?<Ç=;¿>9±<3©:/¢8+œ6(˜7'”8)‘:):(‹=)‰<*‡<)ƒ:'‚9(€7&6%}6$x3#x5%…B2˜UEWK”PCPD•ZL“YMZN\Q\Q’[T•\U˜[VŽbUkbEV`=JS4=G,6?*2=,4>35?66=68?8=B;CF=EG<CD6@>/;9*78*=>0:<17:/:>06?,=J0N`:awHk‡Lu”Qu™Qt˜Nr›OršSpšPq›OrNqœMp›LošKrNu¡Ty¥X«`~©az¥]qœUf‘L_‰G]‡E^ˆIb‰PY€ISzCh_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Pi`Qi`QmdUlcTjaRi`Qh_Pg^Og^Mh`MiaNiaLiaLiaLiaJh`IhaGh`IgaKf_Le^KaZG\WDUR?PM<ML:EF4DG4DH7DH7BI9BK:BK:CJ:IK>SQDSPAXL<eK:tE5•H>ÂYSÙROàDEà::Ü75Õ;3Ê>/º=)µ8"É:)Ï8'Ð9(Ñ:)Ò;*Ò;*Ò;*Ò;*Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Í;,Á;0¶:.­<,§>+™9#3“7Ÿ?'¢9#¯<)À>0Æ<1Ì8.Ð7/Ù<5ã?=ì=Bî?DéCEâDCØ@?Ê;7À50»1.¹/,¼51¾:6»<6³:2ª8.¤8,¡:+¡=-Ÿ>+ ?,¡@-¡@-¡A+£>*¦=*©;*­:(²7(¹5)¿5+É90Ö?8àC<å<7è;5è;5ç<4å<5ä=4á>5Þ@5ÜD9ØD:ÒC;ÊA9Ä?:¼;6¸85°93¢7-™8(•5'“6'‘:)<*‹:)…8&ˆ=*…<+ƒ<*€<)~:'|8%{6&{6&y1#€8*ŒG8–SC’PB‹L=RE™_S‘WL’YN”[R–]T–]V—\V–YTŒ^QaX;EM(6@8A&>G2<E43=2-7.1817>6BE<GJ?HI;BC1?>*;:&78(=>09;069.:>06?,<I/N`:awHj†Ku”Qt˜Pt˜Nr›OršSpšPqœMqœLqœLp›Kp›LsžOw£V{§\‚­e€«dx£\j•P]‡EYƒA^ˆHePcŠQZJSzCi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Sj`Wj`Wj`Wj`Wj`Vj`Vj`TjaRjaRjaPjaPjbOjbOjbOjbMjbMg_Jg_Jf^Ie_Ib]IZXCSP=ML:HI7EH7AE6@D5?E7?G8?G8>F7GK=IK=AE4KJ6\Q;aE/ƒJ9Àj]Üg`ãUSäFEåA@àA=Õ>5É=0Æ=-Æ4%Ë5&Ì6'Í7(Í7(Î8)Ñ;,Ó=.Ò<-Ò<-Ò<-Ò<-Ñ;,Ñ;,Ñ;,Î<-»2(¶7.±;/¨:)š7"”5•7œ9"¬A-±<+º6)Á4*Í6/×<7âC?éGDèDEèEHÝCCÉ:6»61´80®8.©4+«9/©9.¤8. 8-›9,–:+“<+’<+–=+˜=*™>+š?,›@-A, A-£@-¢;*¥:(«7(°6)¶6+¾8-Ä;1Ì<3Ú=6à=6â?8âA7âA9ÞB6Ú@6×@5ØH=ÒE;ÇA8»;2²5/ª3-¦1*Ÿ2+™9-’;*‘9+:*‹:)ˆ;)‡;+…<+€9'~9)|9(|9(|9({8'y6&x3$w- ‹A4—OC“MAŽL@‘QE“UJŽRG˜\R˜\R‘UMTL—ZU•XS”WT“gZ]T5IQ*AK(BK,>H/7@+2=-4?14<1HPCZ^PW[JMO:DF.AA'==%:;)>?1;=2:=2;?14=*;H.Pb<cyJlˆMu”Qs—Or–Lp™MršSq›QqžMqžKrŸNrŸNsŸPv¢U{§\ªb‚­fx£^j“O`‰G^‡Ea‰JfŽPhTd‰S[€LTyEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Sj_Yj_[j_[j_Yj_Yj`Wj`Vj`Tj`TjaRjaRjaRjaRjaRjaPjaPh`Mh`Kg_Jg_Jd]J^YEUR?ON:IJ:EH7BD6@D5?E7>F7>F7=E6@F8>E5>F1HL5MI0U?'Q:¼sbËcXà_ZëUTçGGâ@>àA>Ø?9Î:0Í9+Í<+Ï=.Ï>-Ï;-Î;+Í9+Í:*Î:,Ð:+Ð:,Ð:+Ð:,Ð:+Ð:,Í;.É?5ÅB:¼B7®=/ 7$–1–1›2¦5#²7(Ã;/ÑA8ÞE?æIDéJFêKHÞCAÛEDÐB>¼<3¯9/©=0¥?1Ÿ=0Ÿ?1œ>2—>0”>/>-Š>.‡@.ˆ?,Ž=*‘;*’<+“=,”?+–?,—>,™>,Ÿ@.¡=-¤:,¨8,¬8+±8-¶:0¼9/Ê<2Ð<2Ó?5ÕA7×C9ÖC9ÔD9ÑE8ÊB6ÃA4¹=3°:0©6/¢5.Ÿ4,™7,‘;.Ž=,Š;,ˆ;+†:*„;*ƒ<*<,~;*|;){9)y:)x9(w8'u6'w4$€4'D7˜NC‘K?ŒJ>QD’TIRG“SJ˜XO–UO—VPœ]X”WRŽSOŠcT]W5Xc9_jBZfBIT66C)4@*<H4OXGYbQ_fTW\FJN7BE*??#<<"9:(=>0:<19<1<@25>+;H.Pb<dzKlˆMu”Qt˜Pr–LqšNršSq›Qs OrŸLpLqžMt Qx¤W|§_ªc{¦aošUcŒJ]†D`ˆIeNeNc‹Od‰S[€LTyEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Uj_[j^^j^^j_]j_Yj`Wj`Vj`Tj`Tj`Tj`Tj`Tj`Tj`Tj`Vj`TlaOk`LiaNiaNf_La\HYVCSR>MK<HI9DF8AE6AE6?F6>D6<D5=F57@-@H1IK3F?%W?'‡ZC·s`¶VJÙbZðebìTSæFHëGHæCDØ:7Ë7-É:*É:,É:*Ë9,Ê8)È6)È6'Ê8+Ë7)Ë7+Ë7)Ë7+Ì8*Ì8,Ë8.Â3-¾5/¶6-­3(¡0"ž/ 1 §3$¼A2ÇC7ÖG?áLFçLGéJFåFBßD@×CAÏDAÁ@:±;1¤</¡A1œE4˜E3“D3‘D4B2ŠA0…A.‚A/~A.@.†:*‹9+‹:)Š;*‹<+<+Ž=*<*–?.–=-›;+9* 8+¥9-¨:-®9/¸:.¾90Á;0Ä>3Å?3ÅA4ÃA3ÁA4¶;,²:,ª8-£7+ž6+™7,˜7.“;/‹</ˆ<,‡;-„;,‚:+€;+€;+<,};+z;*z;,w9*v8)u7(t6)v4&ˆ;1”F<–NBŽJ=ˆH<OB‘SH’RI’OI™TO™SQ˜WSš]Z“YU[Tl[}X~‰^€‹alxRLX46D#=J.LZ@eqYeoW]eMPU>EI0?C(=@#:<$89';<.8:/9<1=A36?,<I/Pb<dzKm‰Nv•Rt˜Ps—MqšNršSpšPu¢Qp LnLpŸOu¤Vy§\}¨`|§`p™UfM\„E\„EeŒMiQfN`†IcˆRZLSxEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QjaRjaRjaRjaRjaRjaRjaRj`Vk`\k__k`^k`\k`ZkaXkaWkaUkaUkaUkaUkaUkaWkaWkaXkaWnbRmbPjbOjbOhaNe^K_ZGYVERO>MK<GH:DF8CE7BD6@D5<C3<H49B-FJ3LF0O:%qL9šcOª^N¡A5ÆSLçc_ð^^íSUëJOéDJáAAÌ91Ç9-Å7+Å7+È8-Ê:/Ë;0Í:0Ñ>4Ñ>4Ò>4Ò>4Ó?5Ó?5Ó?5Ó>7Ó?;ÑB>ËD>ÆF=ÂF<ÃI>ÇM@ÎPDÙSJÝPGâMGäIEäGBâC?àA=ÛB=ÕFBÉD?º?7«=0Ÿ>.šC0•G3”I6F4ŒE3ˆC3…B1€A/}@.zA.|?-‚:,†8,…9+†:,†:,‡;-‡;+ˆ;+‰:+‹:)Ž8)8*•9,™;/=/¢<0ª:/­8.¯9/°:.°:.°:,¯9+®8*¨7)¥7(ž6)›6*•7+’8-9/Ž<0ˆ<.…<-„;,9*:*~9*}:*};+|:,z;,y;,w9*s8*q6(r7)u5)ŒD8’H=‘K?ŠH:‡I<‹OD“TK—TL–QL›TP—QO”SO–\X_Xh^—~j””p…’fu‚WYh?CR+?O*P`<crQetU\iKNX=CL1?E+>B)=A(;='78(:;-79.9<1=A37@-<I/Oa;bxIlˆMv•Ru™Qt˜Nr›OršSo›Pq OožMožMs¢Rz¨]|ªay¤]tŸZcŒJ^‡E[ƒD_‡HgŽOj‘PfL`‡Hb‡QY~KRwDi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QjaRjaRjaRjaRjaRjaRjaRj`Tk`Zk`\k`ZkaXkaWkaUkbSkbSkbSkbSkbSkaUkaWkaWkaXkaWocUnbRkbQkbQkbQhaOd]M`ZJXRBRO>NK<IG8GE6DE5BC3>B1=I1?G/LG1S>+g=-•ZL­_Sœ>4ž3+¶?9ØXUóigödeëRUæJNèNPÚIDÓF<Í@6Ë>4Î@6ÒD:ÕG=×G>ÚJAÚJAÝJBÝJBÞKCÞKCßJCàKEëSRèTTãSRÜROØSLØSLÛTNàUNàLHáIFãEBâC@äB@âC@ãDAàFDÙJFÊE@¹>7¬=2¡?2˜B1’E3G4ŒE3ˆC3‡B3ƒ@0?/~?0z?/|>/€:.ƒ9.ƒ9.ƒ9.ƒ;-ƒ;-ƒ;-ƒ;-9*ƒ:+…9+‡:*‰:+;-<.“;/š:.ž9-Ÿ9- :,¡9,¢8+¢8*¢8*Ÿ8)œ8)˜8(”8)9*Œ:,‰:-‡;-„:-‚:,9+€8*}8)}7+|8+{9+z8,y9-x:-v8+q5*o5)q7+t9+ŒH;I<‰I=„I;…K?ŒRF“WM˜XO˜QMœUQ—SP“VQ’`YˆbWƒg[†zdnrQZi@JY0AR(IZ0Wi?dvNj|VXiEL[:?L.:D)<D,>C-<A-:=,89+:;-68-8;0>B47@-;H.L^8_uFi…Jt“Pt˜Pt˜Nr›Os›TpœQnMmžLp¡Pw¨Y|ªaz¨`rXi“QZƒA\„EaˆIeŒMgŽMgŽKfJd‹Lc†PY{IRtBi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkaUlbYlbYlbYlbVlbVlcRlcRldQldQldQlcRlcTlbVlbXlbXlbXocUnbRkbQlcRlcRkbQg`Pd]M^WGYSCSM?MJ;KH9GE6EB3?B/;H.?H-OC-a=-ƒE:«XR´NJž,+²:9¹=;ÓSRòpnþvvñefëX^ð^_ãUQÚPFÔG@ÐC:ÑD=ÔG>ÖGAÖG?ÙHCÚJBÜHDÝJCÞJFÞKDàKGàKGâHHáGIÜHHÙGGÖHD×HDØGDÛEDàBAäABæBAèBBçCBäB@ßA>ÛA?ÛJGÊC?¹<6®>3¢@3˜A0A0D3ŠA2ˆ@1‡?1…=/‚<0€</=1=1;1;1ƒ:3;1;1€</€<1=/€</€=-<-€;,‚:,9*‚8+ƒ7)Œ:.8.‘9-”:/—;.š=.ž</Ÿ>.™9)–9(“:*:+Š;,‡;+„;,‚:+‚:,9+7)~6({5){5){5)z6)x6*x8,w9,s8*p4)o5)r:-v<0‹M@‰K<„J<‚M=…QC‹VH’XM—WN“NI™TO—VR–]Ve\{^PfVF`^GEO->N)?O(J\2_rEk~QexKXj@M_7@Q-6E&7D*=F1=D4:>07;-8:,:;-68-8;0>B47@-9F,I[5[qBeFr‘Ns—Ot˜Nr›Os›TpœQmœLo Os¤Ux¨\z¨`s [i“Q`ŠHYB^†GeŒMgŽMfJeGeGfLa„NXzHQsAi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkbSlbXlbVlbVlcRlcRldOldOldMldMldMldOldQlcRlcTlbVlbVocUnbTkbSlcTmdUlcTi`Qf_Ob[K^WGXQARL<OI9JG6IC3AC.:G+@F*U@+xG9£PL´HH¶8<º39ÈAEÄ?@ÊJIßb`ñqpðnnçbeâZZÛQNÕLDÏD?ÍB;ÏD?ÐE>ÐB>Í@9ÒC?ÒC=ÔC@ÕD?×CAØD@ÙECÚDCÝADÝADÞDFÝGHßIHàJIâHHåGHçACêADìBEìBEçAAâ@>Û=:Õ=:ÚFFÉ=<¹83°;2¦@4˜?1>0ŽA1Š</Š<0‰;1ˆ:0‡81†91…:4„;4;3€<3:4€<3<3}=1}=3|<0|<0|=.|=.{<-|:,{9)z8*{7*‚:.ƒ9.‡9-‰9.9.‘;.•<.–=-’9)‘:):*Š;*‡;+ƒ<*€;+~;*€8*€8,~6*}5)|4(z4(z4*y5*u2)v6,v8-r6+o3)o5*s:/x@3‹QC†N?N=‚Q@…VF‰XIŽUJ‘RI’OI—TN“TO‘\VŠfZn[JPK7EK1AN0KZ9WgC`rJgyOdzLVl>G]/AV-7H$1@!6B*=F5<D98=67:39;.;<.68-9<1?C57@-7D*FX2Vl=a}BoŽKq•Ms—Mr›OtœUqRožPq¢Qu¦Wv¦Zs¡Yj—RbŒJ[…E^†H`ˆIcŠKeŒKfJfŽHf‹Fe‰I`ƒMWyGPr@i`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkbSlbVlcTlcTlcRldQldOldOldMldMldOldOldQlcRlcTlcTlcTnbTmaSmaSmaSmdUlcTjaRg`Pd]M_YIZSCTN>PK8MH5ID1DC.@F*DD([B,ˆTG¯\X«>A¬/3ÎINÆ?EÀ;>»;<ÄFGØZ[ãefÛY[ÌGHÑJGÐEBÍB?ÐB>ÔFBÔFBÐB>É>7ÏD=ÏD=ÒD@ÔEAÖEBÙECÚDEÝDFÞ?Cß@DÞBEßEEáGGàFFàBCá>?èBDé@Cè>Aç=>ä>>ãA?áC@ßECÖBBÅ98¶50°;4¦?6š>3=/?2ˆ</‡:0‡:0‡81‡81‡81ˆ94ˆ;5‚92€:2€:2;2~<0~<0|<0|<0x8,y9-z:.z:.{<-{<-z;,z;,};/9/€7.7,„6*…5*†7*†7(Š8*ˆ9*ˆ;+…<-„<-€=,~<,};+}9,}7-|6,y5*z4*x4)x4)x5,r2(u5+v8-r6+n4)n5*t;0xB6ŠUG„PB€O@„SDˆWIŒVJŒRGNG•TN”UN‹RK„XMdSeZDGK2=J,IX9ZkIhyUcxQYnESh=Nc8J_4:L&0?,95>)=C5<B8:<79<5;=2;=079.9<1?C58?-8B)EU1Uh;azCnŠMr“Nr–NršQsSqRr Uu£Xv¤[t¢ZošUfN^ˆH[ƒDdŒNa‰K`‡Hb‰HfJgŽKeŠEa…E`ƒMWyGPr@i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbQkbQkbQkbQlcRlcRlcRlcRlcTlcTlcTnbTk]Pp`QtdWrdWnbVj`Th`ShbTgaSebQc]M[VCVQ=SN8NH2GA)KG,I?$bI3‘gW¶xm»le¸YW¹OO¶BE½BEÂDGÈFHÍHKÐJKÒLMÓKKÏEEÐDEÓEDÖFEÕFBÒC=ÏD=ÏF<ÈD8ÉE9ÊE<ÌE?ÒD@×CCÚADÛ?CáBFâBDáCDàDEàDEáFDãEDäDDãCCäB@ä@?ã?>ã?>â@=â@=ÞC?ÓB?Å>:¶93ª70Ÿ:0—=2Ž>3†>2ƒ?2?3=2;3„93‡83ˆ81‰92…;0„<.„<.ƒ;-;/€:.€:.€:.~:-~:-~:-~:-};/};/};/};/}90}90~80~8.€7.€7.€7.~8,€8,~8,}9,|8-{9-z8,x8,x8,x8.x8.w7-u7,v6,t6+s5*q5+m1'r6,n4)m3(o6+m4)q8-I=…SH…SJ†RGˆRH‹PHQJ”QK•RL“TMTK‰[N‚cQo_HYU:MV7O_;j~YbxQYoHUjCRg@Mb;IY5CR17D&4>#2:#6;'<>0@B5@@6<>358-39-4:.7:/9=/;?.=B,<E(S_;arFn†Rs‘Ur•QršSsSrUužZ~§e~§ep˜YbŠK^†G^†H^„G`†IbˆKdŠMfNeŒMcŠK`‡H^„G_‚LTvDKm;i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTrbSqaRrbUrdWrfZogZmeXjdVgcWkhYmjYjgThcOc^H[U=RL4PJ0H>%Q?)kP=XG„NBˆE=“D?±WV¸TT»QQÀPOÀNMÁMMÂLJÃKJÐTTÈHGÂ=>Ä=:ÐB@ÓEAÏB;Ç>4ÊD9ÉE9ËF=ÍG>ÔFBØDDÜAEÝAEáBFáCDáCDâDEâDCáFDáFDáFDàEAßD@àB?ßA>àA=àA=àA=ÞC?ÔE?ÈC<º>6¬:0 90”:/Š<0‚<0€A2~@3~>4<3ƒ:3†91ˆ81‰90‡;.…<-„;,„;,ƒ;-‚:,‚:,‚:,9-9-9-9-~:/~:/~:/~:/}90}90}90}90}90}90|90|90|90z:0z:0y9/y9/y9/w9.w9.w8/w8/t8.s7-r6,r6,p6+p6+k1&q7,m4)l3(o6+l3(p7,~H>„RI…SLˆQJ‹PJQL”PM•QN”SOŠOG…SH€[K{ePsiPhkLfqQgxT^tMXnGPf?La:K_:K\:GV7CP4;E,8@)6;'7:)<=/?@2>>2;=04:03;05;16<0:</<?.>A,>D*Q[9^mDkRpŒRp“Sq™RsSsžVxŸ^|¢exžak‘T`†I]ƒF]ƒF\‚E`†Ia‡Jc‰Le‹NdŠMbˆK`†I^„I]LRtBIk9i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTqdTo_Pm]Pm_RqeYog\ldYhbVfbWqma{xi~{j€zjzubpiVe`JTT<FF.C>(MB.S@/U8(]7*l=3n70u91~;5‡@:’IB›PJ¥XP­ZR»^YºSN·GE¼B?ÈDBÑFCÓD@Ï@:ÐA;ÐC<ÒE>ÔG@ÖGCÚFFÛEFÞDFßCDßCDßCDßCDàDEßEEßECßECÞE@ÞE@ÜD?ÛC>ÜC=ÜC=ÜC=ÛC>ÖE@ÎE?ÃB<³>5¢:1”8-ˆ9,ƒ;-‚@2@1~>2=1ƒ:1†91ˆ81‰90‡;.„;,„;,„;,‚:,‚:,‚:,‚:,9-9-~:/~:/~:/~:/~:/~:/}90}90}90}90|90{8/{8/{8/{;1{;1z:0y;0x:/x:/x:/x:/t8.t8.s7-s7-r6,q5+o5*o5*j1&o6+m4)k2'l6*i3'm7+}G=‡PI‡PI‹PJŽQL‘RM’SN”UP‘VPŒWO‡[P~^OtbNoiQorUlwYfwUQeBK_:EY6DU3EV6GV9GT8ER8?I0<D-9>*7;*:<.<>0;=/9;.6<25=26<26<0:</<>0=@-=C)MW5Zi@f|MlˆNn‘Qq™RuŸUtŸWz¡`w`n”Wc‰L\‚E\‚E\‚EZ€C_…Ha‡JbˆKdŠMc‰La‡J_…H]ƒH[}JPr@Gi7i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTrdWoaTm_Rk_SkaWlcZle[jf]jf]xtiˆ„x‘•’Œ~‰‚r}zg`cNOT>DG2FE1HC0F=,L<,VC4P9+R6*V4*Y5)\6+a9/e=1m=1ŠKBœRI®UO¶NK¼FDÆABÒDCØFFÔ@>ÕA?ÖB>ÖE@ÙECÚFDÛEDÝEDÞDDßCDßCDÞDDÞDDÝEDÝEBÝEBÛFBÚE?ÙD>ØE>×D<×D<ÙD=ØE>ÔD<ÒE>ÉF>ºA8§;1˜8,Œ9+…<-ƒ@0@1~?0=/ƒ:1„:/‡9/ˆ:.„:-„;,„;,ƒ:+‚:,‚:,9+:+9-9-~:/~:/~:/~:/~:/~:/}90|90}90{8/{8/{8/{8/x8.{;1y;0y;0y;0x:/x:/x:/v:/s7-s7-s7-r6,q5+o5*o5*m4)j1&o6+l3(h2&k5)h2&l6*|F<ŠOGŒOJPKRMSN‘VPWPŒZQ‰]R…aUy_PiZGd_IdhO\fKN]@EU8AQ4=M0<K.?M3CQ7FR:GS;BL4>G2:A/9=,9=.9=.8<-7;-7=36=56;46<2:<1;=/<?,<A*JS4Ve>bxIj†Ln‘Qs›Tw¡Wx¢Z{¢ar˜[e‹N\‚EZ€C\‚E\‚EZ€C_…H`†Ia‡JbˆKa‡J`†I^„G]ƒHY{HNp>Eg5haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlcTrfXth\sg[mcYjaXle]snhzwp~w† ”¨¥œ«§œ¨¤™£‘™–‡z{kcfSOR?GJ7EH5BE2BE2EH5KL<JH9GD5D>0A9,A7+@6*F4(S5*qB8ŒPHŸPK¯IG¿GIÌEIÓBEÖ@BÙ@BÚBAÛCBÚEAÚEAÜDAÜDAÞDDÞDFÞDFÞDFÞDDÝEDÝEDÛFBÚFB×FAÖF>ÔE=ÔE=ÓF=ÔE=ÕF>ÔA:ÓC;ÎE=ÁB9®>3ž9-‘;.Š</„?0?/€>.€=-;/„:-†:-‡;.„:-ƒ;-‚:,‚:,‚:,9+9-9-9/~:/~:/~:/}:1}:1}:1}:1|90z:0|90y9/y9/x8.x8.v8-y;0y;0x:/v:/v:/u9.u9.t:.r8-q6.q6.p5-o4,n3+n3+m4+j1(m7-j4*h2(j4*f2'j6+}D;NGNHQJTLUOŽWPŠYRƒ[Q{YMv\OkZJ]UBYXDY^HOYA?K3:H/7E.4B+5A+8D.=I3CL7EN9BK6?H5<E2:A/8?/7>.5<,4:,5<45<56;56;49;0:</;>-;@)GP1Ra:`vGi…Ko’RuVy£Yy£[xŸ`n‘W`ƒIZ}C[~D^G^G]€F`ƒIa„Jb…Kb…Ka„J`ƒI_‚H^IW|ILp@Bf6haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlcTkaUndZpf\lcZkd\rmg„|‘Œ§£ µ±®ÄÁ¼ÉÆÁÉľÅÀºÀ¹±¹²¨¤œˆ‚rfcTPQ?FI8>E3;D1:F28E38E38C2:B3<B4@C8CE:GE9>4(P:/e?6€EA¡PO¿X[ÊQVÊCIÕFJ×EFÚFFÛEDÝEBÞDBÝD?ÞCAÞDDÞDFÞDFÞDFÜDCÜDCÜDCÚDCÙEA×FAÕF@ÓF=ÒE<ÐF<ÑG=ÔE=Ô?8Õ@9ÐC:ÆC9¶A7¥=0–=/Œ=.†>/‚?/>.€=-<-ƒ;-„:-…;.‚:,‚:,‚:,‚:,9+:+9-~8,~:/~:/~:/~:/}:1}:1}:1}:1z:0z:0y9/y9/x8.v8-v8-t8-w9.u9.u9.t:.t8-s9-r8,r8-q6.q6.p5-o4,n3+m4+l3*j4*h2(l6,i3)f2'h4)e1&i5*{B9ŽMG’MH‘PJTLVO‡XNYO|\Qs\Nk[L`WFVSBPSBJQ?@I64@,3<+2;*09(09(2;*5>-9B1<E4?H7?H5>G4:F2:C25@/3>-1<,3:33954954928919;.;>-;@*CL/O^7]sDi„Mp“SwŸYy£Yw¡Yq˜Yf‰OZ}CX{A\E]€F]€F^G`ƒI`ƒI`ƒI`ƒI`ƒI_‚H^G]€JV{HJn@Ae7haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlbVj`VjaXkdZkg^upjˆ…€Ÿžš°°®ÃÂÀÓÒÐâáßçæäçãàâßÚÜ×ÓØÏÈμ²¸¥——‡zym]_YIKL:@E1;D/>J6=H7=F5;C4;>5:;3983880==3C<2N71a84ˆHH¯]_Å`dÊWZÍQSÓNO×MKÛKJÞHGáFDãDAâBBßCDÞDFÞDFÝCEÝCEÜBDÜBBÛCBÚDCØD@ÕD?ÓF?ÒE<ÐF<ÐG=ÔE=Ù@:Ù@:ÑB:ÈC:¼C:¬@4™;/Œ9+‰=/ƒ>.>-€=,<,ƒ;,ƒ;,„<-‚:,‚:,‚:,9+9-9-~8.}9.~:/~:/}:1}:1}:1}:1}:1}:1z:1y:1x90x90w8/t8.s7-r8-t8.s9.r8-q8-r8-p7,p7,p7,o6-o6-n5,n5,m4+l3*k2)i3)g1'k7,h4)e1&h4)b0%f4)zA8MG’MH‘PJŽULˆXNYOxZOr\Nk^N^WGRPAKN=CJ:9B12:+08)17+17-06,/5+/5)08+2:+4<-9D4;F5=H7<I7;F56C13@,1>-.800621622717829;0:<.:?)?H+KY5[qChƒLq“Vwž[w¡YsœVgP^IUx@WzB[~F[~F[~F]€H_‚J_‚J_‚J_‚J^I^I]€H]€JUvGJk@@a6haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlbVpg^ng_lgatqj‡„¡ œ¼¼ºÍÏÎÏÏÏàààòòòúøùû÷ö÷óðñìéîãßèÐÆÝÁµÁ«¢‘}ub^\GKM7CH2AI2AI4@G5BD7AA7B=9C97?:69<5>?7B71J.+d66QR³giÆnmÀ\\ÇZWÐVS×QNÜLKâHHæCDåBCßCFÝDFÝDFÝCEÝCEÜBDÜBBÜBBÜDCÙCB×C?ÓD>ÒE>ÐF<ÐF<ÔE=ÜC=ÛA9ÒB:ÊD;¿E:°A6œ:-‹5&Š</…=.‚=-€=,<,<,ƒ<*„<-‚:,‚:,‚:,:+9-~8,}9.}9.~:/~:/}:1}:1}:1}:1}:1{;1y:1w;1x90u9/t8.s9.r8-q7,r8-q8-p7,n8,p7,m7+o6+o6+o6-o6-n5,m4+m4+j4*i3)h4)f2'k7,h4)c1&e3(b0%e3(xB8ŽMG’OIŽRJŠUM„XOzZOq[Mi]MZTDIJ:?C4>D69A208+/7*6<247058157247025.06,08-19,6A3:E5=J9>K9<I76E22A,1>-,6..400511606718:/:<.:?+<E*IW4YoAg‚Mp’Vv\užXp™U`†IX{CSv>WzB[~FY|DY|D\G_‚J_‚J^I^I^I]€H]€H]LStGHh??]7icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSjcSkbSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUmdUmdUldWibZgd]vsn“’Ž¯¯­ÄÆÅÚÜÛêîïóôöö÷ùüüþÿÿÿÿÿýÿûúü÷ôýòì÷áÔòØÇàʵƶŸª ‡ˆ„ibbFEH-DH/CG0FE1GD5F<3C60F42H:9>=9>?:B:7I75cGF’jjœ’è­©Ò‹‡ÇtnÀ]XÆSPÕONßJLçFKéFKßEGßIKÚDFÔ;>Ø>@áGIàDGÖ:=Ú@@Ú@@ÙA@ÖB>ÔC>ÑB<ÐA;ÑA9ÞC>ÛA9Ó@9É@8¾B8°@5Ÿ:.8*Œ=0‡>/ƒ>.>-€=,<+=*<,:+9-9-~8,~8.|8-|8-|8-{8/|90|90|90z:1z:1z:1z:1y=5w<4w;3u:2t91q8/p7.p7.o6-m7-m7-l8-m7-l8-m7-m7-i2+m6/p92o81k4-g2*g2*h3+e0(i7.d2)]-#`0&_/%b2(s>6ŒOJPKˆQJRJ}YMx^QhXITN>DE5>E5:C25@04</3;04:04:068378366446135005./6..6+.9+2=-6C2:G6:I68G44C.1?.*4+,2.-2..3-45/79.:</:?+6>&DR1WlCh‚Rs”]wž_r›Wk”R[~DXyDTu@Tu@WxCZ{F[|GZ{F^JbƒNbƒN^J\}H_€K`L^~MTsJFd@<W4icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSkbSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUmdUmdUldYgd]onj†…ƒ¤¤¢ÁÃÂÖÚÛêîï÷ûü÷ûþøüÿüýÿþþþÿÿýÿþúÿüùÿúóÿðàûëÔíßÅ×ͲÀ»¤£„‚ƒcgjKLQ3GJ/EC.GB/H>2I:3L95K<9?:7BA?MHE]RPyjg£Ž‹Î²®èÅ¿ÿ×Ñ벩͂|Àc^ÊVVØRSßHMÚADÖDEÕEEØHHÜJKÛEFÕ<>Ö<>ÛACÙ??Ù??ÙA@×A@ÖB>ÓB=ÒA<Ó@9ÜA<Û@;Ó@9ËB:¿C;±A6 ;1’8-Œ=0‡>/ƒ>.>-€=,<+<,<,:+9-9-~8,}9.|8-|8-|8-|90|90|90|90z:1{;2{;2z;2w<4u<3u:2s:1r90n8.o6-m7-m7-l8-l8-l8-l8-j8-l8-l8-k6.n70o81n91l7/i4,g2*e3*c1(d4*_/%^.$b2(^.$`0&p>5JE‡RL‡XR~XOrVKdRDRI:>?/:A16A05@03>04<14<15;17:379477577557246116//6./7,.9+1<.4A08E49H58G44E24B1.8/.5..3--2+23+46+8:-9=,7?(DQ3WlEh‚Ut•`wbp˜YgPY|DWxCTu@Tu@VwBYzEYzEYzE[|G_€K`L\}H[|G^J^J^{MSnKF_A9R4icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSkbSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUneVneVlfZed_y{x—™˜²¶·ÍÑÒäéìôùüúÿÿûÿÿüÿÿýþÿþþþÿþüÿÿûÿÿúÿÿôþüçúùÝïðÑáåÄÒØ´½Å  ¨ƒ‡‘l`gEQX7EH-DC.JD4LD7KA8I>:C;9SJKia_wvœ’¾³¯ÙÎÈêÜÓþèÝÿóéÿäÛð©£ÃecµBEÇDIÚRVÛUTÑMKÊDAÍCAÔFE×EFØDDÙCDÖ=?×>@Ø@?Ö@?ÖB@ÓB?ÓB=ÔA:Ù@:Ù@:ÒB:ËE<ÀD<²B7¡<2“9.Ž<0‰=0ƒ>/>.=-~<,€=-€=-9-9-~:/}9.}9.|8-{8/{8/}:1}:1{;2{;2{;2{;2z;4x<4t;2q;1r90o9/n8.l8-l6,k7,l8-j8-j8-j8-j8-i9-j8-j8-n91m80m80m80m80i7.f4+c1(d2)a1']-#_0&c4*\-#_0&sD<}PJVP~YQpTI^J?OC7?<-46(2;*0;+1<.2=/4<15;17:17:379479668357257227007/08-.9+0;-2?.5B17E48G48G49F54>33:2/4-,2(/0(13(57*6:)6>'ER6YmJk„Zw—fwœfl“Z_‡KWxCWuCTr@Tr@VtBXvDYwEYwEZxF]{I_}K]{I]{I_}K^|JZwKPgJAW@3I2icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUneVneVneVneVmg[jkfƒ‡ˆ£§¨¹¾ÁÐÕØåíïóûýõýÿûÿÿûÿÿüÿÿüþûýþùþýøÿþ÷ÿÿóøýæôýÞî÷ØæòÎÝéÃÎÚ´¶Á™¡«†€‡efmLMR4FH0HJ5KI:HD9D@7OGDj`_‹€¤š™·¯¬ËÆÂÞÛÔêæÝúñèÿûñÿ÷íÿÝÕû¶±åŽÊcd°@?ÃPKÉTMÉPHÃF@ÈGBÔMJÖKHÐ@?Ò>>Ó??Õ?>ÕA?ÕA?ÓB?ÓB=ÓB=Ö>9Õ@:ÒC=ÌE?ÀE>²A9¢=5•;2Ž<1‰=0ƒ>/>.=/~<,€=-€=-9-9-~:/}9.}9.|8-{8/{8/~;2~;2|<3|<3{;2{;2z;4x<4r90o9/o9/m9.l8-k7,j6+h6+i7,i7,i7,h8,h8,h8,h8,h8,m;2l7/j5-i7.j8/j8/f4+a1'd4*a2(].$a2(b3)Y* a2(yNE\VvWRiNGWC:I<3@:.:;-8<.08)/:*3;.4</6<25;169069079468368349238139/19.19..9+/:,0=,2?.5B17E4;H6<I89C87?428.-3'.0%/1$24&48'4<'ER8]pPr‹d{šnwœifXU|ESsATr@Sq?Sq?Tr@UsAVtBVtBWuC[yG]{I\zH^|J_}K\zHWrIJ^E<M;.?-gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUneVneVofWofWng]qqo‡Œ¤©¬·¿ÂÍÕØãíïòüþôþÿøÿÿûÿÿûÿüýÿúüýõúüñøøìöøêöÿèóÿäðýáîùÙéôÒÞèÅÊÒ­¶¾™£}‚bY]BGL5FK7FJ9CG9AC8c^X…}z«¡ŸÁ·µÌÂÁ×ÏÍæáÝñîéú÷ðüõëÿñèÿòéÿôìÿåß騤·hc¦LAµPDÁYNÇ[QÆQH¿D=ÂA<ËDAË@=Î@>Ð@?ÑA@ÒC?ÑC?ÑC?ÒC=Ó?;ÒA<ÐE@ÉE@½C>¯@9 >5•=3Ž<1‰<2ƒ=1=0=1~<.=/=/~:/~:/~:/}9.|90{8/{8/{8/|<3|<3|<3{;2z;4y:3y:3w;3n70m80l7/l7/k6.h6-h6-h6-i7.h8.h8.g8.g8.g8.g8.h8.k92h6/f4-g5.i70h70e4-b1*c2+b3+_0(]1(]1(V*!b6-{ULtYRaNHN?8A7.=6,;9-9;.9<139-3;.5;/6<06<07:/68-47.6904923812:/2:/2:/19.19,.9+.9+/:,0;-3>.6C2:E5<G7=E8:B55;/06(02%/1$13%15$2:%GS;buWwk|›rq•g\‚QJp=Oo>Qn>Qn>Qn>Qn>Qn>Sp@Sp@WtD[xH]zJ]zJ_|L_|LZwGRlECU?6D7(6)gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUneVneVofWpgXpgXng]lnmz‚…“š «µ·ÄÎÐÜæèí÷ùôþÿõþýöÿüúÿúûÿùûþóøúíõöèñôãôýêôýèôüåóùßòöÛéìÍØ×¹ÅÄ¥±°’ŽregOMP;GK:FM=GOBKQG{{sŸš”ž¸ØÎÌßÓÓçÛÛóëéü÷ôðïêüüôÿÿöÿýôÿùïÿóêÿçÞÿ×Ëؓƒ¸eS£J:´TF½WIµF;·@8ÈKEÇC>ÊC?ÌB?ÎC@ÎC>ÎC>ÍD>ÍB=ÐA;ÒC?ÎDAÅD?¸A=ª=8ž;5•<4Ž;3‰<2ƒ=3=2=1~<0=1=/~:/~:/~:/}9.|90{8/{8/{8/|<3{;2{;2z:1y:3x92w81u91n70l7/l7/j8/i7.i7.h6-g7-g7-g7-f7-f7-f7-f7-f7-f7-h70h6/g5.g5.g6/h70g6/f5.a2*e6.^2)\0'`4+a5,i=4uSJXG@E>6<5/95,;8/8:/57,36+69.5;/7:/69.69.58-57,36+28,19,19.19,19.19,19,19,.9+.9+.9+/:,0;+3>.6A17B2<D5:C28?/5;-57*24'13%04%18&HT>ex\umt’lf‰_RwKBh9Kk<Nk;Pm=Pm=Nk;Nk;Ol<Qn>VsCZwG]zJ^{K_|L_|LVsCNg@<J9/:2$.&gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUneVneVofWpgXqhYoh^hikmtz…Œ”£¬±¿ÈÍÓÞâåðòðüüòüûõþùøÿ÷úÿöüÿòúýìøùçö÷çööê÷÷ëú÷èüöæýõâ÷ìÖçØÃÕƯÀ±šž“}un[ZWFPQCSVK[bZeld•˜²²¨ÕÎÆèÝÙóããúêëÿñôÿ÷ùÿþûþýùùúôøùñüüðÿÿóÿÿñÿûêÿæÑñª”¼o[©VD®RCµPD»LA¿JAÂG?ÆE?ÉE@ÊE@ÊE>ÉD=ÈC<ÉB<ÑC?ÐE@ÌEAÂC=³=9¦;5œ;5•<6Œ<3‡=4…<5=4<3~<0=1=1~:/~:/~:/}9.|90{8/{8/{8/{;2{;2y:3x92w81v70v70s7/n70j8/j8/j8/j8/h8.h8.h8.g7-f7-f7-f7-d8-d8-d8-f7-g6/h70i81i81h70i81j;3l=5g80k<4b6-a5,oC:xLCtI@nMDC9056.45/8918;247.14+25,58-58-57,46+46+46+46)37)/7(-8(.9+.9).9+.9)08)08)19*08)08+08+08+19,3;.4<-9B1:C0;B0:A1:<.68*24&/3$29'IU?cu[m„gf„bWyTInE?d8Hg;Li;Nk=Nk=Li;Li;Mj<Ol>UrDYvH[xJ]zL^{M\yKQn@G_;6B4+4/#)%gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSjcSjbUlbVlbVmcWmdUmdUmdUleUmfVngWogZnh\oh^mjekormtzƒŠ¥®³ÃÌÑÕßáæîñôüþôýú÷ýùùþ÷úÿôüÿòüÿñüÿîýþðûøïþúñÿùïÿúíÿùéÿòßïàËÝηÁ²›¢—|ubb_NXZL]cWmwn|†}¦¬¢¾ÀµÚÕÏîäâüîîÿôôÿô÷þôõþøøûúøûýøüÿúøÿõôüñ÷ýñÿÿñÿùèÿôãÿÌ»½p›QD©SFµSH²C:ÀJ@ÄH@ÇG>ÉF>ÈE=ÇD<ÆC;ÆC;ÍG>ÌG>ÇG>¼C;­>5Ÿ:2—:3‘>6Š=5†=4ƒ=5=4€<3;0;1;1~:/~:/~:1}90|90{8/{8/{8/z:1z:1x92w81v70u6/u6/q6.m80k90j8/j8/i9/i9/i9/h8.g7-f7-g7-f7-f7-f7-f7-f7-f5.j81m;4k:3j92j:0m>4oC8l@5oD;d<2c=2zVJ†dZyWMbLA>:13814927<54:0/5+25,9<336+25*24)03(13(25*46+47,.6),7)/7*-8*/7*/7*/7*/7*19,08+08-/7,/7,/7,08-08+7?09B1<E4=D4;A38<.26'-4$2;*JVBcr[h|a]vXOmKFfAAa:Hd;Kh<Nk?Nk?Kh<Jg9Li=Nk=TqEWtHZwK[xL]zNZvMNjAC[;2>4*00!''heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVjdXldYlcZmeZmeXmfVg`NjeRnkXol]he\feasqr‚†Š“–ž¡«³¶ÃÈËØÝàêîïö÷ùùûúüþýþþüÿÿúýþöúýòùüñúýòûþóùúòúúòùùíüúëþüçù÷ÞéåÊÕÔ¶º¹›˜˜|psX_dMbkXr}l„‚‘›´¹²ËÌÇãâÞòîë÷óòû÷öýùøüø÷ÿþüþþüþþüýÿúýÿúýÿúýÿúýÿúúü÷ÿÿúÿûöÿêåا £`X›D=µNE¿MCÂF<ÄD9ÊE<ÎI@ÎKAËH@ÇG<ÆG8ÃH8½J8³H8¦E5šB4‘B5‹C5†B5„B6‚@4‚>3‚<2„:1†91†9191~:1|91{80z7/z7/w7.v6-w7.w7.w8/w8/v7.u6-q5+n3+l7/i81i81h70h70g6/g6/g6/f5.g6/i70i81h70g6/e4-e3,h3-j5/m80k90j:0i;.i=0kA3gB2jI:dH:v_O~k\ŠxjŒoRJ=79.4:04:039/39/39/28.28.17-17-36-06,25,/5+14+/5+/4-.5-/4-.5-/4-/4-/4-/4-05./4-/4-.3,.3,/4-/4-/6.1;23=26@58B79D67B45@03>-4?.DQ=WeN[kQPbHEX<@T8BW6Ic>Ke>Kf=Kf=Je<Je:Lg>Oj?UpGYtK\vO]wP]wRWqNHb?;O6&1+&+.$),heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVidQjhSmjWjk[gg_lll~„“š¨¯µ¸ÂÄÑÖÙâçêòóõûüþÿþÿÿþÿþýûÿþúÿÿúþÿùûüôøûòöüò÷ýó÷úñøúïøúí÷úéùûåòõÚßâÃÌЯ´¸—‘–vkqUYaI_kUtoŒ˜Š§œ¿Á¼ÓÓÑççåóóñ÷÷õûûùüüúûûùþþüþþüþþüþþüþþüþþüþþüûÿþõÿÿõÿÿþÿÿÿûúÿíêð¿ºÂ~uŸKA¯M@¹K>ÂL@ÅK>ÃE9¼=4¾?6ÃE9ÄF7ÃJ7½L:³J7¦F6™C4‘C6E7…C7‚B8A7>5‚<4ƒ:3†91„93~92}:2z:1z:1y90x8/v7.v7.u6-v7.t8.t8.t8.r6,o5*m4+l7/i81j81h70i70h70g6/g6/e3,e3,g5.h6/j81j81j81j81k60l71k90k;1k;/j>1j@0hC1gH6kP=gRAufSue‹†s†ƒrKL<69.39/39/39/28.28.28.17-17-17-17-06,06,/5+/5+/5+/4./4./4./4./4./4./4./4.05//4..3-.3-.3-.3-/4./6/.80/:21=34@66B66B66B45B16C1CP>P]IR`IIW@AO6BP7FV;Jb@Jd?Ke@Ke>Ic<Hc:Id;Je<SnEXrK\vQ]wT[tTTmOC\>6I5&1-',0$)-heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVmhUkiTmjWjk[kkcwww‘’—§¬²¿ÆÌÒÜÞìñô÷üÿþÿÿþÿÿÿþÿÿþÿþýûÿþúÿÿúýþøúûóõøïñ÷ëñ÷ëô÷ìõùëöøêõøåõ÷áîîÔÛÛ¿ÉÊ«°±’“–yuy`kpZt{iˆŸ§œ®µ­ÍÏÊÞÞÜïïíøøöûûùþþüþþüýýûþþüþþüþþüþþüþþüþþüþþüýÿþûÿÿûÿÿþþþÿûùÿúôÿ÷íýÑÆ؞”L=¥RB©L;§@1·G;ÏYMÏUJ¼@4ÃC6ÆH:ÃK=¹I;ªB7?5—@7“D=‰@9„A9A8?6~>5}=4<4}=4z;4x<4w;3w;3u:2t91t91t91r90r90r90r90o9/n8.l6,k5+l7/j81l71j81k60i70i70h6/g5.g5.f5.g6/h70i81k:3k:3l;4j;3i:0h<1k?2jC4iD2fE2gJ8lVAjYGujV†m‘Ž{€mDG658-28.28.28.17-17-17-06,17-17-17-06,06,/5+/5+/5+/4./4./4./4./4./4./4./4./4./4..3--2,-2,.3-/4..5.+5-+6.-9//;12>24@46B47D38E3?L:ER>CQ:;I28F->L3DT9J_@Jb@LdBKc?Ia=G`9F_8G`9RkDXpL]uS^vVZqTPgK=T8.A.&1-',0%*.heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVniVljUlkWlm]pqi€‚ž¡¦¹¾ÄÄËÑÛåçõúýûÿÿüýÿüýÿþüýüûùÿþüÿÿûÿþùûüôõöîîñæéïãçíßíñâðôåñôãñôßòñÜêêÐØؼÇÇ«²±•¡¡‰‘“}“€œž‘ª­¢¶»´ÀÅ¿ÜÜÚééçööôûûùýýûÿÿýÿÿýýýûþþüþþüþþüþþüþþüþþüþþüþþþûûýÿþÿÿþÿ÷òïüóìÿüóÿûíÿôäìñ¹p•P@¢O?¸XJ·M?µE9ÀG<ÁA6ÈE;ÅI?½G=¯@9¡;6š=8–A>?;‡@<@:}@;z?9y@9x?8x?8v=4v=4v=4u<3s=3r<2r<2q;1o;0o;0o;0n:/k9.j8-j8-i7.m82n72m61l71l71k60i70i70l:3j92h70f7/f7/f7/h91f:1j?6f>4e=1f>2hC3iE5fG3cG2cJ6hV@i\IskV…‚ozor_9=,28,28.17-17-17-06,06,06,17-17-17-06,06,/5+/5+/5+.3-.3-.3-.3-.3-.3-.3-.3-.3-.3--2,-2,-2,-2,.3--4-*4,)4,*6,+7-.:.1=14@26B46C2:G5<I78E12?+2@)8F/>N4DX<E]=K`AJb@K`?G_;H^:F^:SiEXpN`uV^uXZnSLcI9M4):(%.+%*.$)-heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVlgTkiTnmYop`svmƒ‡†¤§¬¿ÄÊÎÕÛæðòûÿÿûÿÿûüþýþÿÿþÿþýûÿÿýÿÿûþýø÷øðïðèçêßàæÚÞåÕçëÚêïÛíðÛìðÙîîÖèèÐÚØ¿Ìʱ½§·µ ±®¶´§Á¾µÉÈÃÒÓÎÙÙ×èèæòòðúúøüüúýýûÿÿýÿÿýüüúþþüþþüþþüþþüþþüþþüþþüÿþüÿüÿÿüÿÿûúüû÷ýÿùþÿøþÿôÿÿíÿþìøßËŜŠbP–J:¥L<´P@¼L>¿@7ÆC;ÅF?¿D=±>;¦:7ž:8™>=‘=;Š?<„@=}@=xA<tB;rC;rC;q?6s>6s>6r=5r=5r=5p>5o=4n>4m=3l<2k;1j;1i:0i:0j:0n72p62p62m61m61l71i70i70l;4k:3h91e90e90e90e:1d<2fB6cA5cC4dD5dG5dI6bI3_I2^L6eV?jbMrmW„ƒo„‡r[`L3:(28,28.17-17-17-06,06,06,17-17-17-06,06,/5+/5+/5+.3-.3-.3-.3-.3-.3-.3-.3--2,-2,,1+,1+,1+,1+-2,,3,,6.+5-*4+*4++5,.8-0;-2=/2=-5@/6A05A-1=)1=)5A-8F/=O5@T8FY=H\@J]?I^=K^>K`?TgG[pQcvZ`tYYkSJ]G6G4&4%$-*$),"'*heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZlfVjgTkjUlnYorasvm€†„Ÿ¤¨¸¿ÇÖßäí÷ùûÿÿûÿÿûüÿþÿÿÿþÿþýûÿÿûÿÿúüüôóõêëíâãçÙÝãÕÜãÑãèÒçìÕéíÖêìÔëëÓèæÏßÚÄÓθÓκÌƶËøÐÉÁ×ÒÎàÛØêæåðïíóòðùù÷ýýûýýûýýûÿÿýþþüüüúþþüþþüþþüþþüþþüþþüþþüÿýüÿûüÿøùÿþýþÿýùÿýôÿøïÿõ÷ÿöð÷çÿÿíÿîÛͧ”¤kX¢[GªWE­L;¹D:ÀD<ÀE>¼E?´A>ª=:¢:9š;9•=<?=…?=}@;wB<rC;oD;oD;p?8q?6q?6q?6q?6p>5o?5o?5o@6n?5m>4k<2h<1h<1h<1l<2o83q62p62p62m61l71j81i81h70g80e90e:1d<2e?4f@5dB6^B4aG8cL:dM;cM8`K6]K3]M4]Q9bX?mhRss[€ƒnv{eFO:4=*39-28.28.28.17-17-17-06,17-17-17-06,06,/5+/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,-2,,1++0*+0*+0*+0*,1+,3,.5.,6.*4+*4+*4++5,-7,.9+-8*0;+4?/5@/4?.3?+3>-3@,7F/9K3@O8BT:GW<H[?M]BM`DUeJ\oSdtZ`rZXgRFWD4B1$2%$-*$),"'*heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVjcYkdZlc\md[lfZlfVliVlkVkmXjn]lrhy}• ®·¾ÒÛàêóøûÿÿùþÿùúþþÿÿÿþÿúù÷ÿÿûþýøùùññóèéëÞãçØßæÖàèÓäéÒçíÓèìÓçéÑèèÐçåÎàÛÇ×оÚÓÃÙÑÆÞÔËæÝØîäãóéêúñôÿùûúù÷þþüÿÿýýýûýýûÿÿýÿÿýüüúþþüþþüþþüþþüþþüþþüþþüÿýüÿùøÿúùÿþýüÿý÷ÿþîÿùñÿÿòÿýõÿúóùëÿýíÿúèôλ«wbQ:©[G²N>¸J=¹I>¸I@´G@¯D>¥=:œ:7–=9>:ˆ?9€A:xA:tD:pF:pE<p?8q?8q?8q?8q?8q?8p?8p?8qB:pA9n?7l=5i=4j>5j>5n=6o83r73p62p62n72l71j81i81d8/d90c;1d>3d@4cA5cA5_C5ZE4_N<gVBhWCcS<^O8[O5\R7]V<]Y>nlUtv^|kcmU2>(6B.3;.39/39/39/28.28.28.17-17-17-17-06,06,/5+/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,,1+,1++0**/)*/)+0*,1++2+-4-+5-*4,*4,+5,+5,,6+-7,,6+/:,2=/6A17B27B15@04?.2?-4C.:G3=L5AO8DT:JX?L\BTbI\kTcqZ^mXUbPDRA2>0#/%&/,&,,$**heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVjcYkdZlc\md[lfZlfVolYmlWjlWgkZflbr{xŽ˜š¨±¸ÊÓØãìñøýÿ÷üÿúûÿþÿÿÿþÿúùõÿÿúýýõøøîðòåéëÝåéØãêØåíÖæìÒéíÒêíÒèèÎèæÏèãÏáÚÈØÐÃØÎÄÝÒÌéÞÚøíëÿôöÿõùÿõûÿùüÿýþÿÿýÿÿýþþüþþüÿÿýÿÿýýýûþþüþþüþþüþþüþþüþþüþþüÿþúÿýûÿýûýüúùýüøÿÿõÿÿóÿÿíýúóÿûûÿúÿÿôÿöæÿóßÿãÍ×­•©oY¨WD®Q@¬O>¯OA±OB­K@¤C<™<5•>7?6ˆ?8A7{B7uD6qE8qE8p?8q?8s>8q?8q?8q?8q?8p?8sB;qB:p?8m>6l=5j>5m>6o>7o83q73o83m82m82i81h91f:1f;2d>3d@4bB5`C5]A3\@2WB1TG4[S>f^Gi^HbX?ZS9ZS7\W:ZV;XW;kmUsw^u}fUaI&28D03;.4:04:039/39/39/28.28.17-17-17-06,06,/5+/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,,1++0*+0**/)*/)+0*+0*,1++2+)3+*4,+5-+5,,6--7.-7,,6+.8-2<16A39D69D47B46A10;+2?-5B09F2<I5@N7FT=JX?R`I[hTanZ\iWQ^MBN@/;/",#+1-(.,&,,heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWkdZle[md]ne\mg[mgWqn[kmWknYkr`jrgpyv‡‘“Ÿª°»ÄÉÐÙÞêïóöûþüýÿþÿÿÿþÿþýùÿÿúþþöúúðöøëôöèðôãêñßçïØðöÚíòÔëîÑììÒèæÑáÜÉÝÕÈÞÔÊáÔÎèÚÙòääúëîþòôÿ÷ûÿùÿÿüÿÿýþþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþÿúÿþúÿþúþþüýÿþûÿÿúÿÿúÿÿûÿÿþÿÿÿÿýÿûõÿûñÿüìÿûåÿóÛÿêѺ‚i¦bK—O9ŸT?¨ZF£RAžK; M?Ž<0‹=0†>2=/x=/t?/sB3tD6r=5r=7t=8s>8u@:u@:t?9q?8sA:q@9r@9q@9p?8n?7o>7o>7k92m82l:3l;4l;4h<3g<3c=2c?3^>1dG9cG9T=-N9(M8'@1WQ;PN7KI0OK2]Y>ieJeaDVU7XX<]`CosZz€fdlU>J2-9#2>*4</5;14:04:04:039/39/39/39/39/39/28.17-17-06,06,16016005/05//4./4./4./4.,1+,1+,1+,1+,1+,1++0*+0*.5.-4--4-,3,-4--4,.5-.5-+2*-4,07/5=29A6=E8?G<?J<7B25@/3>-1=)2>*9E/BN8IU?O[GVbN]hWYdTLWI<F;/9.)3*)/+(.*'-+heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWkdZle[md]ne\mg[khWonZjlVknYkr`jtiq|x‰”–£®´¾ÇÌÒÛàëðô÷üÿýþÿþÿÿÿþüþýùÿÿúþþôûûï÷ùëô÷æðõáêòÝèíÖêïÑæëËäçÊææÌãáÌßÙÉÞÔÊßÔÎçÙØíßßöçêûïóÿôøÿ÷ûÿúþÿüÿÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüûÿúûÿúýÿüýþÿþýÿÿüÿÿüÿÿûÿÿüÿÿüÿÿûùÿüõÿþñÿÿíÿûåÿòÙÿëÑ౗§u\‘[C–ZBšYCšVC™R@ŽG5‹F6ˆE4ƒD3|C2wB0s@/o>/u@8t?9u>9u@:t?9o=6p>7sA:q?8p?8p?8o>7o>7m>6n=6m>6k<4l=5m>6k?6j?6gA6eA5bB5dG9[A2^G7^I8N=+F7$G8%?7"LL4GK2DG,BE*MM1[[?baC`aBYY=dgJsw^sy_X`I:D,/;%7C/6>16<26<26<25;15;15;15;15;15;15;14:04:039/39/39/27127127116016016005/05/.3-.3--2,-2,,1++0*+0*+0**1**1*)0))0))0)*1*+2*,3+.5--4,.5-07/4;39A6?F>BJ?;F8:E57B14?.3?+6B.<H2@L6LXDS_KYdTWbRKUJ=G<1;2+5,+2+*0,)/+heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWlcZmd[md]ne\mg[khWmlXimVjoYktaiuiq}yŠ˜™¥²¸¿ÊÐÔÝâíòö÷üÿüýÿýÿþÿþüþýøÿÿ÷ýýóúûí÷úéô÷äïôÞçïØåëÑâçÉÞáÂÝÝÁÞÞÆÞÛÈÝ×ËàÕÏäÙ×îââóçëúîòÿôøÿ÷ûÿùüÿúýÿýýÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüøÿüøÿüûÿþþþþÿüÿÿúÿÿùÿÿøÿÿùÿÿúÿÿúúÿýøÿÿöÿÿóýÿïÿúæÿóÛÿøàÿìÓ㿧¬‚j‡YB…Q;’[F—]IS?F2u>)r>)s@+tD0vE4q?4n<3q<4tB9sA8o?5qA7xH>n>4m>4m>4m>4l=3j>3l=3j>3g?5gA6gA6gC7eC7cC6`C5^D5bK;UB1ZI7`S@RG3C;&E=(FA+AE,BH.AE*<@%?B%LO2_`AijK``DnqTvzagmSJO94<%4=(=F38>28>48>48>47=37=37=36<28>48>47=37=37=36<26<26<25:449349349338238238227105/05//4..3--2,,1++0*+0*).().().().().(*/)+0*,1+160/4.,1+,1+/4.6;4=B<AH@@K=>K:<I78E34A-2?+3A*5C,DQ=KXDQ^MQ^MHTH<H<2>4.8/-4--4-,3,gdUgdUheVheVheVheVifWifWifWifWifWifWifWifWifWifWkeWkeWlcZmd[md]mf\mg[jiWkmXimVjoYjs`hthm|wˆ˜˜¦³¹¿ÊÐÓÜãëðööûÿüýÿýÿþÿÿýÿþùþþöýýñúûí÷úçô÷âîóÜåíÕâèÎÛàÀ×Ú»ÖÖ¼Ù×ÂÝ×ÇÞ×ÍåÛÙìààøìðûðöÿõûÿùþÿúýÿûûÿûûÿýüÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüøÿþøÿþûÿþþþþÿüþÿúÿÿùÿÿøþÿúýÿùüÿùùÿýúÿÿøÿÿøûÿõüþðÿÿíÿüéÿñßÿðÝÿïÚñÒ½´zyS>sI3uH3vI4zM8~O;yL7nA.e7'{M@sD:qB8sD:oC8g;0d8-g=1i?3i?3i?3g?3f>2f>2f>2e?2b@4bB5`C5_C5_C5[D4YD3WD3XI6OB/\T?oiSc^HMK4IG0IK3>D*?H-@F*<B&<A#EJ,XY:deFmmQxx\tv^[_F@C.6;%8@+<C19?39?59?58>48>47=37=37=38>48>48>48>48>48>48>48>47<67<67<66;56;56;55:45:438238216005//4.-2,,1+,1++0*+0**/)*/)+0*,1+-2,-2,05/.3-+0*+0*-2.2718=9;B:@K=@M;@M;=J68E13@,1?(0>'<I5BO;HUCIVEDPB;G;2>4.:0,6..5.-4-gdUgdUgdUheVheVifWifWifWifWifWifWifWifWifWifWifWkeWldWlcZmd[md]mf\mg[jiWkmXinWiqZgs_drejyt…••£²·½ÈÎÒÛâêïõôùýûüÿýÿþÿÿýÿÿúÿÿøÿÿóþÿñûþë÷úåðõÞæîÖâèÌÙÜ¿ÕÕ¹ÒйÔѾÚÔÈàÙÑêàßôéíþóùÿ÷üÿûÿÿýÿÿþÿÿþüÿþúÿþúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿþúÿÿúÿÿûÿÿþþþÿýþÿüüÿûüÿûúÿúøÿù÷ÿú÷ÿýøÿÿûÿÿûýÿúýþùùôîÿþöÿýôÿüóÿûîÿ÷çûæÕéÒÀÏ´¡¶š…”uawVClI6mH6rM;uP>mF7iB3gB2jE5nI9nJ:oK;oK;eA1cB1cB1cB1cB1cB1bC1`C3]F6\G6\G6ZG6ZG6WH5TG4RG3PH3NH2fdM~~frrZVX@JL4GM3<E*<F+?H+AG+AF(CH*MN/TU6xw[{z^nnVUU=CB.?A,>A.:>-;>39?59?58>48>47=37=37=38>48>48>48>48>48>48>48>49>89>89>88=78=78=77<67<66;55:449338227105//4./4.-2.-2.,1-+0,+0,,1--2.-2.,1-,1-,1-,1-.210513763:3;G9=L9@O<@O:<K67F/4C,2A*6E.:I4@O<BP??M>8F71?2-9-,6--4,,3+fcTfcTgdUheVheVifWjgXjgXifWifWifWifWifWifWifWifWldWldWlcZmd[md]mf\mg[jiWkmXinWiqZgs_bpcgxr‚””£²·ÀËÑÔÝäëðöõúþûüÿýÿþÿÿýÿÿúÿÿøÿÿôÿÿóÿÿïúýèò÷àçðÕâèÌ×Ú½ÑѵÍË´Î˺ÖÏÅÞ×Ñìâã÷îóÿöþÿúÿÿýÿÿþÿÿÿýþÿúþÿùþÿúþÿúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþþýþÿýþÿýþÿýÿþýÿüþÿúþÿùÿÿ÷ýýóýýõþýøþýùÿþüþþþÿýÿÿþÿþùÿÿüÿÿûüûöóÿøòÿýôÿþñÿþíÿúéÿóà÷äÓçÒ¿Ôº©ºŸŽŸ„srb|]K}^LtUCaD2W:(Y<*[>,X=*`E2`E2`E2_F2`G3`G3`G3]H3\K7\M:\M:YL9XM9UM8RL6QL6NL5QQ9ikS|€gmqXSY?FL2>G,<F+<F+?H+DJ.EJ,DG*HI*LM.xw[tsWfdMTR;LI6KJ6FE3<=-<?49?59?59?58>48>48>48>47=37=38>48>48>48>49?59?5:?9:?9:?9:?99>89>89>88=78=78=77<66;55:4493382382/40.3/-2.,1-+0,+0,+0,+0,+0,,1--10.21/32/32/32-4-3?17F3=L7@O:?N7<K49H18G05D-8G0<K6>M:<J97E61?2-9-+5,-4,,3+fcTfcTgdUheVheVifWjgXjgXifWifWifWifWifWifWifWifWldWldWlcZmd[md]mf\mg[jiWjlWhmVhrZfs_cqdhys…——¥·»ÇÒØÚãêðõûøýÿüýÿýÿþÿÿýÿþùÿÿõÿÿòÿÿðüÿìøûæïôÝâëÐÝãÇÕÕ¹Î̳ÉÄ°ÊÄ´ÐÉ¿ÚÒÏéàãöíòÿ÷ÿÿúÿÿýÿÿþÿÿÿýýÿúüÿøüÿøýÿúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþþÿüÿÿüÿþýÿýÿþúÿúøÿùøÿõøÿóøÿóøÿõüÿøýÿúÿýþÿüÿÿüÿÿûÿý÷ÿÿúÿÿüÿÿýÿÿþÿþýûûûóúøìÿÿñÿÿïÿÿíÿþíÿýíÿöçÿîàýêÛúãÑÿæÐãÊ´¥xt^GeO8bL5\H0^J2\J2\J2]K3]K3^L4^L4\M6YN8WO:XP;VP:UO9SN8PN7NN6MO7PT;`fLfoTU^CEO4AK0<F+?I.?I.CL/HN2HM/FI,OP1YX:qmRjfKa\F[V@YTATQ>MJ9BC3=@5;A7;A7;A7:@6:@6:@69?58>48>48>49?59?5:@6:@6:@6;@:;@:;@::?9:?99>89>89>8:?9:?99>89>88=78=77<67<6495273162/40-2.,1-+/.+/.,0/,0/-10.21.23.23-12+1-+9*/@-6H2;M7=O7<N4:L2:L25G/7I1:L6<M:;L:6G70@3-;.,6--4,,3+cdRcdRdeSefTgeVhfWigXigXhfWifWifWifWifWifWifWkeWkeWkeWldYle[md]mf\kg[jiWikVhmWhrZgt`dqgj{u‰›ª¼ÀÍØÞàçíôùÿúÿÿûÿÿýÿüþÿúþþöúúîúûíúûéøúåóöáêî×ßãÊÙÜÁÓѸÌDZž¬Æ¾±ÌüÖÎËæÝàôêòþ÷ÿÿúÿÿþÿÿÿÿþþþýÿúüÿúüÿúýÿüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüÿþüÿþüÿþüÿýþÿüÿÿüÿÿýÿýÿþûÿüøÿù÷ÿ÷÷ÿõøÿöùÿöüÿøÿÿûÿþýÿüÿÿûÿÿúÿÿúÿÿûÿÿüÿÿþÿþýÿûýüüþûþÿúùúòúüñýûïþúîÿùìÿûìÿýïÿýìÿðÛÿûâìÜގu`P7RB)XH/XH.[M3[M3ZN4ZN4[O5[O5\P6[Q8UO7TO9TO9QO8PN7NN6KM5IM4JN5LR8S\APY<@I,;F(AL.?J,DM0EN1JQ2MR4JM0LL0YY=kjNhdIb^E^ZA_ZD`[GZUAQO:KJ8AB4@B5@B5>A6>A6>A6=@7=@79?59?59?5:@6:@6:B7:B7:B7;@9;@9;@::?8:?99>79>89>7;@:;@9;@:;@9:?9:?8:?9:?98?87=94;4382160/4..3/.3/.21.21,2.,2.+2++2*,4))6%$6)=!2F+8L1;O6;M5;M5;M58J29K3;M5<N6<N87H51B0.<--7,.5--4,]eN^fO_gP`hQdiUfiVgjWijXfgUhgUheVheVjdVkeWkeYlfXlfXkhYmg[kg\mf^jf[ieYgeVghVfkUenYbo]dqhsƒ€–¨¬¸ÇÎÒÛââéïò÷û÷üÿøüûüþùüÿöüþñ÷øêùúêø÷åòñÝîíÙëêÖàßÊ×Ò¾×ϼËÀ®½´¥½³©Â·±ÌÂÁÛÒ×êàèúóûüöÿþûÿÿþÿþýÿüýÿüþýüþýþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüþýùþýùÿüùÿýúÿþûÿþûÿþÿÿýþÿüýÿýþÿÿÿþÿýýÿüûýøÿÿûüý÷ÿýøÿþúÿýøÿúöÿ÷ôÿùùÿúÿÿüÿÿýÿÿÿÿÿÿÿýÿüýÿþüþýþÿÿÿÿÿÿþüÿþùÿþóÿþîÿýêÿýæúôÜÿúàçàƏˆlUN2WN1TK.SJ+WN1WN1UN1UN1TO2TO2TN4SO4QM2RN5QO6PP6MO7KO6HM6GL5JP6FO4AJ-=F'>H&DK*FM+EL*PV4PT3KN/HK,TT8bbFccIZZ@[Y@\ZA][B][B[Y@XV=US:SQ8HF1GD1DA0A?0@>1>>2??5>@5;>59?59?59A69A48C58C58C5:B79A69@88@58?79A69@8:B7:A9:B79@8:B7;B:<D9=D<>E=8B:8B:7A88@56>14</39/271.40/51-7/-9-.;)0@&6G'7M';V+:W+;U0;T4;R8=P:>O<>P::L6@R8EW=FY=DX=@T;5H2+<),6+,3++2*ZgM[hN\hP]iQbjSckTfkWhkXghVghVifWifWkeWlfXmeZmgYmgYkhYkg[kg\jf]jf[hfZefVhiWglVfo\erajwnzŠ‰œ«²ºÈÑÐÙàáéìò÷ûøþþùþúúÿøûþóùýïøùë÷øæõôâíìØéæÓåâÏÜ×ÄÒ˹ʿ­Á´¤¹­¡¼¯¦Á¶²ËÁÀÛÐÖæßæûóþý÷ÿþûÿÿþÿþýÿüýÿüýÿüýÿþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüÿýúÿüùÿüùÿýúÿýùÿþúÿþýÿýüÿüýÿýþÿþÿÿþÿÿýþýûüÿþýÿþûÿüúÿøóøíçòåÝöèßûðêÿúýÿüÿÿýþÿÿýÿÿûýÿüýÿþüýÿýþÿýþÿÿýÿÿþüÿþõÿÿîÿþèÿÿäúöÝýûâäàNj‡lRM0SN0QK+QK+UO/UO/TO1TO1SO2SO2QP4QP4QM2PO3PN5NQ6MO7JP6HM6EM5HQ6EN1BK.?I'BI(FM+JP,KQ-NR/PT1VZ7^bA_bCY\?TW<TT<VV>WU>XV=YW>YW>XW;VU9XT9QK3OI3LG4ID1DA2B@3A?3>@5<=59?59?59A67B48C57D37D3:B79A69A68@58@59A69A6:B7:B7:B7:B7:B7;C8<D9=E:<F;=G?<H><F;;F6<D5:B39?38=67>74>63=40>-3C)9M*BY/Ga1Op;Ln;Li=Fa>BY?<R=<M;:K8=O9DV<K^BM`BL`DH\@:Q71D.+5*+2*)0(ZgM[hN\hP^jRbjSdlUglXilYijXijXkhYkhYlfXmgYnf[nhZmgYkhYkg[jf[jf]ieZhfZfgWhiWejTdmZgtco|sŽ¬³¶ÄÍÍÖÝÞæéñöúøþþúþýùþøøûòôøêóôæðñßêé×áàÌÜÙÆ×ÔÁÐ˸ÇÀ®¿³£½° ¼°¤Ã¶®ËÀ¼ÕËÊãØÞëäëüôÿý÷ÿþûÿÿþÿþýÿüýÿüýÿýÿþþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüÿýúÿýúÿüùÿüùÿüøÿüøÿýüÿüûÿûüÿüýÿþÿÿþÿÿýþþüýÿþýÿþûÿýûüóîêßÙâÕÍëÝÔ÷ìæÿúýÿüÿÿýþÿÿýÿÿûýÿüýÿþüýÿýþÿýþÿÿþÿÿÿýÿþõÿþíÿýçÿþãû÷ÞþüããßƉ…jPK.QL.QK+QK+TN.UO/TO1UP2TP3SO2QP4PO3QM2ON2PN5MP5MO7JP6IN7GO7HQ6FO2CL/CM+FM,JQ/OU1SY5QY2W_8jpJy[qvVY^@KO4MP5QQ9QQ9RP9SQ8TR9UT8YU:ZV;XR:VP:RM:NI6HE6DB5B@4?A6<=59?5:@69A67B47B47D37D3:B7:B79A69A69A69A6:B7:B7:B7:B7:B7:B7;C8<D9>F;<F;?IA>J@?I>>I9?G8>F7>D8=B;>E>9C;6@74B19I/BV3Pg=XrB^J[}JXuIMhECZ@9O:6G54E2<N8EW=NaESfHSgKNbF@W=6I3/9./6.-4,ZgM[hN\hP^jRblTemVhmYinZklZklZlj[liZnhZnhZog\oi[liZkhYjfZjf[ie\ieZhfZfgWfgUbgQajWerao|s~Ž–¥¬«¹ÂÇÐ×Úáçîó÷öüüøüûõúôóöíîòäéêÜãäÒÜÛÉÓÒ¾Î˸ÉƳþ«½¶¤Ã·§Ç¹¬ÍÁµÖÉÁßÔÐèÞÝóèîøñøýõÿþøÿÿüÿþýÿýüÿüýÿüþýýÿþþþþþþþþþþþþþþþþþþþÿþüÿþüþýûÿþüÿýúÿýúÿýúÿüùÿû÷ÿû÷ÿýüÿüûþúûÿûüÿýþÿþÿÿýþþüýÿüûÿþûÿýûýôïíâÜæÙÑðâÙüñëÿúýÿüÿÿýþÿÿýÿÿûýÿúýÿþüýÿýþÿýþÿÿþÿÿÿýÿþõÿþíÿüæÿüáþùãÿþçäßɊ†mPJ0RM0SL/SM-TN.UO/UP2UP2TP3SO2PO3ON2PL1NM1OM4LO4LN6JP6JO8HP8JS8GP3GP3IS1MT3OV4V\8\d=grHtU‡’h›s…lnuTZaBRV;LN6NN6MM5NL5OM6RP7WS:YU<ZT>XR<TO<QL9KH9GE8EC7@B7=>6:@6:@69A67B47B46C26C2;C8:B79A69A69A69A6:B7;C8;C8:B7:B7;C8;C8=E:>F;=G<=G?<H>>H=>I9?G8>F7>D8=B;=D=9C;6@74B19I/DX5Ri?[uEZ{FWyFTqEIdA=T:2H3/@.->+6H2@R8L_CReGSgKOcG@W=6I31;007/-4,YgMZhN\hP^jRblTemVinZjo[mn\lm[mk\mj[nhZnhZoi]oi[liZkhYjfZieZie\ieZig[ghXghVchRajWerao|s{‹ŠŸ¦¢°¹ÁËÔÔÝäéñôòúüôúøñöðíðçèìÞâãÕÚÛÉÒÑ¿ÌɶÉıþ«¿¸¦¼³¢Ê¾®ÓŸÜÐÄæÙÑíâàöìíÿôúÿùÿÿ÷ÿÿúÿÿüÿþýÿýüÿûüþüþýþÿÿþþþþþþþþþþþþþþþþþþÿþüÿþüþýûþýûÿýúÿýúÿýúÿüùÿúöþùõÿýüÿûúýùúþúûþüýÿýþÿþÿÿýþÿüûÿýúÿüúÿøó÷ìæòåÝøêáÿôîÿúýÿüÿÿýþÿÿýÿÿûýÿúýÿþüýÿüýÿýþÿÿþÿÿÿýÿþõÿýìÿûãþúßÿüæÿÿéçâ̌ˆoRL2TO2TM0UN1TN.UO/VQ3VQ3UQ4SO2ON2NM1PL1NM1NL3KN3LN6KQ7KP9JR:LU:IR5JS6OY7SZ9T[9]c?fnG‚’c’¦sŸ±›ª’¡zˆ”pr{\\bFLP7KM5LK6LJ5LJ5PK5TN8VP:WQ;VP:SN;QL9LI:IG:GE9CE:=>6:@6:@69A67B47B46C26C2;C8;C8:B79A69A6:B7;C8;C8;C8;C8;C8;C8<D9=E:>F;=G<=G?<H>=G<=H8>F7=E6=C7=B;:A:7A95?64B19I/BV3Ne;Uo?Tu@RtAPmAE`=9P6-C.+<*+<)1C-<N4H[?PcERfJNbF@W=5H2,6++2*'.&
\ No newline at end of file
diff --git a/testimages/testimgfst.jpg b/testimages/testimgfst.jpg
deleted file mode 100644
index a586047..0000000
--- a/testimages/testimgfst.jpg
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgfst.ppm b/testimages/testimgfst.ppm
deleted file mode 100644
index e158bd1..0000000
--- a/testimages/testimgfst.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-227 149
-255
-0/+0/+0/+10,21-32.52-63.83/83/83-83-:3-:3-:3+:3+72.72.61-61-61-61-72.72.50,50,50,50,50,50,50,50,4/+3.*3.*3.*2-)2-)2-)2-)1,(1,(1,(1,(1,(1,(1,(1,(.+$.+$/,%0-&1.'2/(2/(30)52+63,63,74-85.96/96/:7.@:.A;-C=/F@2GA3IC5JC3JC3LE3LE3LC2LC2LD1ME2PE3QD3P?/V?/`@1mC3}E6ŠG7˜G6¢E4¯H9¶F8¾F8ÂF:ÆH<ÈH=ÊG?ÐE@åFKëDLëDKëEIíBHîACñ>Bó<@õ;>ô:=ó9>î;>è>?ß@=Ö>;Ê@6»=.´?+´?+´?+²?,²?,°@,¯@,­@,©@+¥@,¡A+A*™A)˜@(•A)’@+?,‘>,‘>,=+=+>)>)“>*“>*“>*“>*”=)•>*–?+•?.”@5“?5—=2ž>0¨A0³D1¹D2½D1·?/³B2ªF7œI;†H9mB2T9(C3&<5/95484373262/51.40-3/,63.52-52-52-41,41,41,10+-/*+0*-/*-/*./*./*1-*1-*2-*1,)3)(2('5)):..D66O:9iAB|IH‹NM”POžVY©ck¨o€œtŽ’|¢…«†º|ˆ¼rz«^aŽTKvOEhRIfMH^KH[SQ_ddn{|‘˜—¢¨ž¡§™¡’Ž’z{iefTXWEQNETQZTPaTPa/.*/.*0/+10,21-21-52-52-72.72.72,72,92,92,92*92*72.61-61-50,50,61-61-72.61-61-61-61-61-61-61-61-3.*3.*3.*3.*2-)2-)2-)1,(2-)2-)2-)2-)2-)2-)2-)2-)/,%/,%0-&1.'2/(2/(30)30)52+63,63,74-85.96/96/96-@:.A;-C=/E?1F@2HB4IB2IB2JC3JC1KB1JA0KC0LD1OD2PC2O?/U@/]@0iB1wD3„E4‘D4›B2¦B2¬A1³B4ºB4¾D7ÁE9ÅF=ÊC=ßEGçBHèCIêDHìCFïBDó@Cõ>Bö<?÷:>ó:?ï<?ç??Þ@=Ô?;É@6¸=-³@+´?+²?*²?,°@,¯?+®?+¬?+©@+¥@,¡A+A*˜@(—A(•A)‘?*?,‘>,‘>,=+=+>)=(“>*“>*’=)’=)“>)•>*•>*•?.”@5“?5—=2Ÿ?1«A1³D1¼D3½D3º@1´C5«G8šH:‚G9j@0S9*A3&=60:6595484373051.51.40-63.52-52-52-41,41,41,10+.0+,1+.0+.0+/0+/0+0/+2.+3.+2-*3+)3)(5))9--A55M87d>=vEA†KGŽMK™SS£`g¢l|šrŒ{ …­~…¹y„ºmt¨Z\ŒPIuNDiNEdMH_NI_PM^ZZfmnsƒ‡ˆ“˜’˜–™Ÿ“˜œ‘€|nijX]\JURIURYUQ`UQ`..,..,0/+0/+10,21-41,52-52-52-61+61+61+61+81)81)61-61-50,4/+4/+50,61-61-61-61-61-61-61-61-61-61-3.*3.*3.*2-)2-)2-)1,(1,(3.*3.*3.*3.*3.*3.*3.*3.*1.'1.'1.'2/(30)30)41*41*52+52+63,74-74-85.96/96-?8.@:.B<0D>2E?1F@2G?2HA1HA1HA1H?.H?.I@/JA0MA1MA1L@0QA1YB2bC1nC2{C2‡B2‘@/—<*Ÿ;+¦<,®>0³B4¸D7¼F:ÄD;×CCß@DâAFæCFéCEïBDôADø@Bø;?÷:>ô<>ï==æA?ÜA<Ð@8ÅA5¶>-±A-²?,°@,°@,¯?+¯>,­@,ª?+¦@*£@+Ÿ@*›A)˜@(–@)•@+‘?*?,?,>+>+>+<*<*>)>)>)>)>)“>*”?+•?.”A3“?4˜?1¡@0­B2·D2¾C3ÀB3¾D5¸G9«I<™J=€F:fA1P:,B6*?82<74;63:5294183062/51.63.52-32-32-21,21,21,01+.0+,1+,1+,1+.0+/0+0/+0/+3/,4/,5-+3+)4*)7-,=32G53[:5k@9{F>…JBOM—[]™gs’o‡‰xœ~«{‚¸s~¶fm£TV‰MEvLAkKAcOFaOJaLI\LKYZZdqux„‰……Œ…‰‘†Š‚…‰zwykfiXZ[KSSIYTZYS_YS_--+--+/.*0/+0/+10,41,41,41,41,50*50*50*50*70(70*50,4/+4/+3.*3.*4/+4/+50,50,50,50,50,50,50,50,50,3.*3.*2-)2-)2-)1,(1,(1,(4/+4/+4/+4/+4/+4/+4/+4/+2/(2/(30)30)41*41*52+52+52+52+63,63,74-85.85.96-=6,>8,@:.B<0C=/D>0E=0E=0E>.E>.F=.F=,G>-H?.K?/JA0KD2MD3UD2]D0gD1rC/|A/†?-‹:'‘8&š9(¢=+ª@0°E5¶H9¿F;ÒDBÚACÝCCáCBçCBìBBò@@ö>@ø;?÷:>ô<>í?>åA?ÙB;ËA7¿?2³>,¯@,¯?+®?+¯?+­>*­>+¬?+¨?*¥?)£@+ž?)š@(–@'”@(’@*?*?,>+>+>+Ž=*<*<*>)=(=(=(<*>)‘>,’?-“A3–@3›?0¤@1°B3»C5ÁC5ÁC5ÀF9¹I=¬L@—K>{G:`@3L:,?7*@93=85<74;63:5294173062/63.32-32-32-21,12,21,01+,1+,1+,1+,1+.0+.0+/0+0/+21-40-4/,4,*3+)5-+91/A2/R7.b<1pB5yE:‚IBˆRR‹`j‰i€‚r—|y¨w~µmw²_eŸQRŠLEyL@pL@fODdOFcID[FCVLLX[^cimnsyuw~vyutzlkoa_aSVXJSSI[WX[V\\U\,,*,,*--+..,0/+0/+10,10,30+30+30+30+4/)4/)4/)4/)4/+3.+3.+2-*2-*3.+3.+4/,4/,4/,4/,4/,4/,4/,4/,4/,2-*2-*2-*2-*1,)1,)0+(0+(4/,4/,4/,4/,4/,4/,4/,4/+30)30)30)41*41*41*52+52+41*52+52+63,74-85.85.85,<5+=6,>8,@:.A;/A;/C;.C;.C;.C<,D;,D;,E<-G>/J>0I@1JG6JG4PH5WF2`E2jD/tA.|?,‚;'ˆ:&‘:'™>+¡B0ªF6°J;ºJ>ÊFAÓCBØDBÜDAâC@è@?ï==ó;;ø:<÷;<ó=<ë?=àC>ÓC:Ä@3¹@/¯?+­@+®?,¬?+­>+¬?+«=,©>,¥>+¤?+ ?,œ@+˜@*•?(”?*‘?)>)>+>+>+Ž=*Ž=*<)<)Ž=*Ž=*<)<)Ž=,Ž=*>-?.”C2–@1@1¨A2´C5½C6ÂB7ÂB7¼C:´H>¥KBJ@tE;Y?2E9+:6*>93?74?74=52:5194074/74/43.32-32-23-12,03,12,/2+-2,,3,,3,,3,-2,/1,/1,01,12-32.40-3.+2-*2-*50-;0,M7,W:,c=0l@3rC9xKHZbh~~o–yw©tz¶js´\d£SU’MGƒKAvNAmNBfKAcGA]EBWFEUILUMPU]ca`g`ah``f\Y_SVZLUYKVXM\YT]WW_VW++),,*,,*--+/.*0/+0/+0/+2/*2/*2/*2/*3.(3.(3.(3.*3.+2-*1,)1,)1,)1,)2-*3.+2-*2-*2-*2-*2-*2-*2-*2-*2-*2-*2-*1,)1,)0+(0+(0+(3.+3.+3.+3.+3.+3.+3.+3.+30+30)41*41*41*41*41*41*41*41*52+63,63,74-85.85.;4,;4*=6,>8,?9-?9-@8-@8+B:-B:-C9-C:+D;,F=.I=/G@0IH6IH6MH5SF3[D2dC0lA.s?*{<*‚;'‰;'’=)šA/¡F3ªK9²K<ÃH@ÌE?ÐE@ÕE=ÝD?äA<ë=<ñ;:ö:;ô:;ð<;ç@:ÚC<ËC7»@0±>+¬?*ª?+ª?+ª?+©>*©>*¨=+§>+£>*¢?*ž?+›?*—?)”?*‘?)>)>+>+>+Ž=*Ž=*‹=)<)‹=)<)‹<+Š;*Š;*‹<+Œ=,>/>-’C2—A0 @0«A3¸B6¿C7ÃC:ÁB9¸A9¯H?¡ODŒNCoG=S@2@:.77+>93?74>63=52:5194074/74/43.23-23-14-03,03,03,-2+,3,,3,,3,,3,-2,-2,/1,/1,01,12-32.10,1-*1-*2.+8/*F5+M5)V9+`<.e?6jGCwYck‚tœ{{¯w|¼luºbi¯[\ŸRMLC|MAoK?eH>aF?^FB[EDV?AN;@FBGJDMHIPIJQIIQFJRENVIUXM^ZQ`YQ`YS*,+*,+,,*,,*--+..,0/+0/+/.*/.*1.)1.)1.)1.)2-)2-)1-*0,)0,)/+(/+(0,)0,)1-*/+(/+(/+(/+(/+(/+(/+(/+(1-*1-*0,)0,)/+(/+(/+(/+(1-*1-*1-*1-*1-*1-*1-*1-*30+30+30+30+30+30+30+30+41,41,41,52-63.74/74/85.92*:3+;4,=6,=6,>7->5,>6+A9.A9,B8,C9-D:.F<0J>2H@3FD5GF4KF3PE3VC2]B/e@.l=+u=,|;)‚9(‰:)“=,™B/¡F4ªF7¹G<ÁE;ÇG>ÏF<ØE>àC<é@=ï=;ò::ð:9ë<9ã@9ÖC;ÅC5´A.¨>(©@+§@-¨?,¨?,¨?,¥>+¥>-¤?-¢?,Ÿ>+œ?-™>+•>*“>*>)>)>+>-Œ=,Œ=,‹<+Š=+‹<+Š=+‹<+‰<,‰<,‰<,‰<,Š=-‹=0Œ?/’C2—A0¡A1¬B5¹C9ÀD:ÃB<¿C;¸G?¯QI£YPŽZOpRGVH;BB6<?4<90>71>71<5/94.83-63,63,43.23-23-14-03,.3,.3,,3+,3,,3,,3,,3,,3,,3,-2,-2,.0+12-23.12-0/+/.*0/+4/+>1+C1'K3'S9,Y<4cGDs]iƒs‡€ªƒ„¼~ƒÇs{Äkq»ce¯VSšICK?oI=eF<aE>]EAZDCU>BN8?E:BD=HDDMHFPGGQFHRGLVKUXM^ZNaYLaYN)+**,+++),,*--+..,/.,0/+.-).-)0-(0-(0-(0-(1,(0-(0,)0,)/+(/+(/+(/+(0,)0,).*'.*'.*'.*'.*'.*'.*'.*'1-*0,)0,)0,)/+(/+(/+(.*'0,)0,)0,)0,)0,)0,)0,)0,)3/,30+30+2/*2/*2/*2/*2/*30+41,41,52-63.74/74/85092,:3+;4,<5-<5-=6,=4+=4+A8/A9.B8.C9-D:.F<0J>2I?3EB3FC4JC3NB2SB2Z@1`?0g<,p<.u:*|9)„8(Œ;*”>-šC2£C3±G:·E:¿F;ÈH=ÒH>ÝF?çD?íA=ë<9ê;8ç>9ÞA:ÏE:¿D4®A,¢>'¥@,¤A,¥@,¥@,¤?+¤?+¤?-¢?, ?,ž?-š?,—?+”?+‘?*>)Œ>*>-Œ=,Œ=,‹>,Š=+Š=+Š=+ˆ=*‰<,ˆ<,ˆ<,…<-†=.‡>/ˆ>1Š>0‘D2–C1¡A3­C6ºC;¿D=ÀC=ºE>·PI°\R¤f[fZu_R[UGGM?BJ=;8/>7/=6.:5/94.74-63,33+34.23-14-14-.3,.3,-4,,3+-4--4--4--4--4--4-.3-.3--/*/1,23.12-/0+./*/.*2.+7-+:,)B1)J6-Q=6]IHscn…{”ŠµŠŒÅ„‰ÍyËqwÃhl¶WWŸHDE=lE=bD=_B?\B@V@CT>DP=FKGQRKWSQ^US`VR`SR^RS_SY_Q]ZI`YG`YG+-,+-,+-,+-,+-,+-*,,*,,*/.*.-).-).-)/,'/,'/,'/,'.*',+),+)-,*-,*-,*-,*-,*-,*-,*-,*-,*-,*-,*-,*-,*,+),+)-,*-,*.-+.-+/.,/.,/.,/.,/.,/.,.-+.-+.-+.-)/+(0,)1-*2.+3/,3/,2.+2.+2.+2.+2.+3/,3/,51.62/74/70*81+92,;4,<5-<5-=4-=4+>5,>5,?5,@6,A7-C9/G:1F<2G?4G?2I?3M?2R>3W=0[9-a7)k:,r8*{9+ƒ9,Š;,‘>.–>0›>/¦B3¬B4³C5¾F8ÉG:ÓF<ÝD>åC>è@=éA>åC>ÙD=ÈD7¶C1§A+¡A+¡A+¡@- ?,Ÿ>+¡>+Ÿ>+Ÿ>-ž?-œ=+™<+–=+”=*=+Ž=*Ž=*Œ>*‹<+‰<*‰<*‰<*ˆ;+‡;+‡;+…<+ˆ<.†=.†=.…=/†>0ˆ@2‰A5ŠB4D4•D1£F7±I>¹F?ºC=ºE>¸ME´ZQ­g]¡qctf}qcik]T_QGRDLLBIE:B>3;8/85,74+63*33)34,34,14-03,-2,,1+,1+,1+-2,-2,-2,-2,-2,-2,-2,-2,02-02-02-02-12-12-12-21/4+.:/3A32B5/H;5ZNNwmxŠ…œ’‘»“–͐•×ƒ‰ÓtzÄgn´^c¥Z[”LJrFCb@=Z=?V?BUAITJU[Sabapmn}vu…{p€uiylguh^l]W`O]\Ha[Eb\F,.-,.-,.-,.-,.-,.---+--+.-+.-).-)-,(/,'/,'.+&.*',+),+),+),+),+),+),+),+),+),+),+),+),+),+),+),+),+),+)-,*-,*-,*.-+.-+.-+/.,.-+.-+.-+.-+.-+.-+.-+/+(0,)1-*2.+2.+2.+2.+2.+2.+1-*1-*2.+2.+40-51.62/7/,81+:3-;4.<5/<5-=4-=4->5.>5,?5,@6-A7.C9/F90G:1G=3H>4K>5L>3Q=4T<2Y8/]7,g8.m7+w7-€8,ˆ:-<.•=1™=0¡A3¥A2ªC4²D5½E7ÈF9ÒE<ÙD=ÞC>ßD?ÞG@ÓF=ÁF7±C2£C-B-žB-A,œ@+›?*>*>,ž?-œ?-™<+˜=+”=*“>*>+Œ>*Œ>*Š=)‰<*‰<*‰<*‡<)‡;+…<+„;*„;,…<-„<.„<.ƒ=1„>2„@3†B7ˆC4‘H9•F5¡H:¯J@¸IB·HA¶KE²TL¯eZ¨sež}lo}{llteZfXMYKKM@JH<EC7>>2=:177-44*11'23+12*03,/2+.0+,1+.0+,1+.0+,1+.0+,1+.0+,1+.0+.0+/1,/1,/1,/1,01,01,01,10.3,48.6<23>42G@:[VSvszŠˆ“¶“—ǐ–Ò„ŒÍx‚Àpz·jsªgo\_~XZqQViQXhT^hYfleuuq~s‡~}‘…–‡|Ž€u‡wpqdueZfRZYD_Y?a[C*./*./*./*./,.-,.-,.-,.+,,*,,*-,(-,(,+',+',+'+*&+*(+*(+*(+*(+*(+*(+*(+*(,+),+),+),+),+),+),+),+),+),+),+)-,*-,*-,*-,*.-+.-+.-+.-+.-+.-+.-+.-+.-+/+(0,)1-*1-*2.+2.+1-*1-*0,)0,)0,)0,)1-*3/,40-51.80-91.:2/;30<41<5/=4/=4-=4-=4->3-?5,@6-B8/E80F91H;3H;3I<4M<4N;4R94U82[6.b70h6-r6,{7,…9,;/’</–=/œ@3@1¡A3§A3°B5¹C7ÄD9ÌC9ÔE=ÖG?ÓI?ËH>¼F8­C3ŸC.™A-œA.›@-›@-š?,š=+š=,›>-š?-˜<-•<,“=,=+Ž=,Œ=,Š=+‰>+ˆ=*ˆ=*‡<)‡<)‡;+„;*„;,ƒ;,ƒ;-;/‚<0=2ƒ?4„B6…B9‡C6“K=–I9 H<«JA²KD±KF±TM­_U¬rd¦oŒxŽzŠwr€ocqbWcUKRBKM?GI;CE7??39;.35*/1&01)/0(/0*./)./*-/*./*.0+/0+.0+/0+.0+/0+.0+/0+/0+01,01,01,01,01,01,01,1/04,76,78/2;62HE>]^Vxy{‰˜Ž”®—¼•Ä„ŽÁ}‰¹{Šµ|Œ°|‹¨|‰šw„q~†n}€l~~nƒ~x„‚šŠ¢’¨•¦’‡œ‰€•‚zzj{i\hRWW?\X;_[@*./*./*./*./,.-,.-,.-,.-,,*,,*-,(,+',+',+'+*&+*&+*(+*(+*(+*(*)'*)'*)'*)',+),+),+),+),+),+),+),+),+),+),+),+),+),+),+),+)-,*-,*-,*.-+.-+.-+.-+.-+/+*/+(0,)1-*1-*0,)0,)/+(/+(/+(/+(/+(0,)1-*3/,3/,80.91.:2/;30<41<41<3.<3.=4/<3,=2,>3-?4.@6-C6.D7/F93F93G:4I:3L:6M83P72T50]6/c7.l5.v8-9/‰;/=1“=0˜@4™@2˜?1@1¤@1­A4·C6¾D7ÇG<ÊG=ÉI>ÂH=¶F8¨B3œA/—@-˜A-—@,–?+–>*–=+–=+™>,—>,•<,”=,=+Ž=*Œ=,Š=+ˆ=*ˆ=*‡<)‡<)†;(„;(„;*‚;)‚:+€;,€:.;.€<1€>2‚?6‚B8„D:…E;‘K?•I<œH>¥KC«LF¬RJ«]S©k^§n Žxšš‚Žš‚„–€x‹wlzian]R[HNUCJN?DH9?C5:>057,13(/0(./'-.(,-',-(.-)/.*0/+/.*./*/.*./*/.*./*/.*./*/0+/0+/0+/0+/0+/0+/0+0./5-85+63-/961HJ=aeWy€xˆ‘Œ”¡Œ–¯Š•³ƒ°€‘­…™±Œ£³©°“¨©£¡‹ ›‡ž–€š}˜‰€‹‡¤Ž“±™–²™“­”‹£‹„œ„|’{i|fXfOSV;XW9]Y<+/0+/0+/0+/0+/0+/0-/.-/.--+,,*,,*,,*,+',+',+',+'++)*********))))))((((((************************************************++++++,,,,,,---------/.,/+*/+*0,+0,+0,+/+*/+*.*)/+*/+*/+*/+*0,+1-,2.-3/,91/91/:20;31;30;30<20;2-<3.<3.=2.=2,=2,?4.B5/C60B73B73C84D93G96G83J71M60V80]7.g6/q8-z:0„<0‹=0>2–?5–?5•?2—?1œ@1¢B2«D5²D5¹E8¼F:¾H<¹G<¯E8£B2™@0•?.–A-•@,”?+”=*”=,”=,•<,•>-“;-’<-<,Ž=,‹<-‰<,‡;+…<+†;(†;(„;*ƒ:)ƒ:+‚:+9+:+;.~<0<3?5A8ƒD;…F=†G>K@‘I=˜H? KD§PI¨XQ§g[¤vf¡u™™}”£†‹¤‡„œ‚|’}s‚ojudZfRU^ILUBDM:>E59@07;-47,01)/0(.-(,+&,+'/+(0,+1-,0,).-)0,).-)0,).-)0,).-)/.*/.*/.*/.*/.*/.*/.*1-.7-85)31,)66*GL8_iQwƒm†’„‹—“š ‹™¤†—¡‡œ¡©¨™¹® À±˜·¨—¶¦˜·¥˜¸£²š‰«’‰«Ž±“’³”’³”¬Žˆ¢…ƒ›{’xg{bTbIPT9TU6XW9+/0+/0+/0+/0+/0+/0-/.-/...,--+--+--+-,(-,(-,(-,(,,,,,,+++******)))((((((*********************************))))))))))))(((******+++,,,,,,---.......*)/+*/+*/+*/+*.*)-)(-)(/+*/+*/+*/+*0,+1-,3/.3/.:20:20;31;31;30;30;1/;1/;2-;2-<1-<1-<1+=2,@3-?4.?61>71?82@93B94B94D71G6/O6/V5,^6,i7,t:.~<.…=/‹=0“?5’>4’>3“@2”A1™B1ŸD2¤C2ªC4¯C6³E8¯E8©C7 @2˜?1•?0“@.’@+“>*’=)“<+“<+“<+“=,‘;,<,Ž=,‹<+‰<,‡;+…<+„;*…:'…:'ƒ:)ƒ:)9*9*~9*}9,~<0}=1~>4A6‚C:ƒG=…IA‰JCJAG=—HCŸOH¦UQ¦`X¦qc¢ƒo›•{”¡ƒŽ©ˆ…¦‡Ÿƒ~–~x‡tp|hcoY[iRP]IGS?@K:<E49?19=/24)01)/.).+&-)&/*'1++2,,0+(/+(0+(/+(/+(/+(/+(/+(0,).-).-).-).-).-).-)1+-6*44)11,&56&CL/ZhEs‚a„’xœ‰Ÿ”ž—Šž•Š¥–•µž¡Æ¥§Ï¬¢É¬¥É¯¤Ë®¡È©—¿‹³‡¯Š³ŽŒ²‹Ž²ŒŒ«‰ƒ¡~™zvrcw\P`ELR6QR3QR3+/2+/2+/0+/0+/0+/0-/.-/..0/.0/..,..,..,--+.-).-)------,,,+++******))))))******************************)))))))))((((((((()))******+++,,,---.......*).*)/+*/+*.*)-)(,(',('0,+0,+0,+0,+1-,2.-3/.40/:12:12;31;31;31;31;1/;1/;1/;1/;0,;0,;0,<1-?2,>3-<5/;60;83<94<94=82?82@5/G4-N3*W5+b6+m8*x:+<,…<-=2<2Ž>3@3A2‘B1•B0™@.ž>.¥?1©A4©C7£A4œ>2–>2”>/’?-?*=+<*‘;*‘;*‘;,’<-‘;,<,;-Š;,ˆ;+‡;+„;*„;*ƒ:'ƒ:'‚9(‚9(9*€8)~9*}9,=1~>2?5€A8ƒD=…IA‡JE‰MEŠJAŽH@•JE¡TN§\W©i`§|l£Žy—œ~§…‡­ˆ€©‡~¡ƒ~˜}}Švw€miw`brXXgPO]FHT@CL;>D6<@257,34,0/*/,'.)&1)'2)*3*+2*(0+(2*(0+(0+(0+(0+(/+(0,)0,)0,)0,)0,)0,)0,)1++6)25)-1-"46 ?I&Ud9o€T€’j‹œ|Ž †¡‹Š¡‡‹©‡”·žÉ”¢Ñ›¦Óª¥Ñ­¤Ð¬žË¤½”‚¯„¬…°ƒ…®‚Š±…ˆ«ƒŸzy”sp‰i_sWM]BIO3JM0IL/,01,0/,0/,0/,0/.0/.0-.0-/1.00./0+/0+0/+/.*/.*/.*/.,/.,.-+-,*,+)+)*+)*)))))))))))))))))))))))+)))+),+)**(+*())'(((((&''''))))))***++),,*--+..,..,,+),+)-,*/+*.*)-)(,('+'&0,)0,)0,)1,)2-*4/,50-61.:20;31;30;30;30;30;1/:0.;1/:0.;0,;0,;0,;0,>0-=2,=4-<5-:70991891891991>7/@1*I0)R1(\4*f6*o:,t<-z<-†>2‰=0‹?2Œ@2ŽA1@1‘@/“=,–=-œ?0ŸB3 C4A4™@2•?0’?/“=,“=,=-Ž=,<+Œ=,Œ=,Œ=,Œ=.‹<-Š=-‰<,‡;+†:*„;*ƒ:):(:*:*~;*}:)|:*|:,y:+~>2~>2€@6B9‚F<…JB‡NEŠOGŠICHC’OI˜\Tœh]œufˆu™€“¢…Œªˆˆ­‹©‡~¡ƒ}š~yxs‡ng~baz\ZqUTfLM\EFR>@I8=A379.55-4/+0+(0(&1'(3)*3*+/)).*'.*).*)/))/))/))/))0**0**0**0**/+*/+*/+*0*,4)/4*+2.#35<F#Q`7l}Qf‡™sŠžy‹¡}ˆ¡z‡§xµšÃ‡ÊœÈ—œÇœ›Æ™–Á”‹¶ˆ¬~ƒ®€Šµ‡‚¬~‡®‚†©|œwuol„dZnSIY>DM2EI0CG.-/*.1*.1*.1*/2+01+01+01+23-32-43.32-52-41,30+2/*50,4/,4/,3.+1-*0,+0,+-,*.,-.,---/--/+,.*+-()-))+.)/.)-.)/.)-.)-,*-.*+,*+,+)-,*,,*,-(-.)-.)-.)-.),,*,,,---.,-/.,/.,2.+2.+1,(2-)3.(6/)70(81):1*:1*;4,<5-<5-<5-=4/<3.<3.;2-;2-;2-;0,;0,;0.;0.<1/>0-A0)?1(;4*57*28,19.36-83-?-)E)&K($R)%Y.'`5,d<0k@0x@/@.…@0ŠA2@1‘>0‘;.‘9+’:,“=.“@0”C2“B/’A.>+<,•;0–:/’</Œ=.‡>-ƒ?,‚?,>+…@0‡?0‡>/ˆ<.‡;-†:,„;,‚:+z8*~@1w<,q8'w>-w>-u<+{@0{<-?3‡E9‰I?‡K@†MBƒOB‡NELI”SO”bY‘pa‰}g†‰n‡–y‡Ÿ‹¤†‹¦‡§ŠŠ§‰…£‡|Ÿt™zn–tgh`„^Y{XYtSZnRVfLNWDIK=A?3=829/-4*)3')3'+2&*0'****',((*))))*()-)*.(*1(+2).2).2(02(00)0/*0/*0/*./)-1++0-(/0";>)S\?nyW|‹d‹›t‰žs‹¢t‹§v‰§s‡ªrŽ´{—¾…œÁš¿Œ•¹‰Ž´ƒˆ®}ƒ«y‚©zƒª}‚§{‚¥}Ÿzv“tp‰lf}aYkUMZFHQ<CH4>C/.1(.1(.1(/2)01)12*12*12*44,44,63,63,63,52+41*30)7/,6.+6.+5-*2-*2-*0,+0,+.,-.,---/--/+,0*+/)*.)(--(..'..'..(,-(,-)*-)*-)(-,*-,*,-(-.)-.(-.(./)./*,.+,.----..,/.,/.*1-*1.)2-'3.(6/'70(90':1(<2);2)=4+<5+=6,=6,>5.=4-<3,;2+;2-;2-;0,;0,;0.;0.<1/>0-B/(C1'?2)<3*95,95,83-:1,?-)C+'G)'N+'T/)Y4,_:1d=.o@.w@,@/ˆ@2Ž?2‘=2’;1‘:0Œ:,Š;,Š>.‰@/ŠA.‹B/ŽA/‘?1“;/”:1<1Š>0„?/@.@.€?-ƒ@/„?/†=.…<-„;,ƒ;,‚:+<,w7+x<1r:+n9)s>,r=+q9(t;*|=.‚@2ˆF:‰I=‡K@…M@ƒOB‡NEIG”SQ’f]Œwf‚†m~“tž~‚¥„ˆ§‡Š§‰¨‹Ž§Šˆ¥‡ ƒt›|m™vd“i^‹`X‚ZX|VYwUXoRRaJMVCDH:@@6:724/,1++2)*/)+-)****)+*))))))+),-(,/)-0*.2(00)00)0/*1/*1-*1,+1,+02-12.-/+(,,$8:,SXBnwZ}‹g…•nƒ™k†žl†¢o…£o‚¥m‡¬vµ˜¹„–·‚‘³€Š¯{…ªw‚¨u‚¨y‚§{‚¥{¤|~{w’soˆkh|cYkUMZHDM<=D4:>//2)/2)03*03*12*23+23+23+55-55-74-85.74-63,63,52+81+70*50,4/+3.*2-)1-*0,)/.,/.,......--/,,.)*,**,.)-/)-/)-/)-.)-.*+.*+.*).-+.-).-)/.*./)./)/0*/0+//-///..,..,/.,/.*1-*1.)4/)4/)70(81);2)<3*>4*=5*?6->7-?6-?6-?6->5,=4-<3,;2-;2-;0,:/+:/-;0.<1/>0-@1*D1*G/+J.+K+,J*-G*,C++A.*=.'@/(C/&M/'W0)b3-e5+i9+n:,v<.=1‡=4‹;4:2Š:1‰<2‡=2„?0ƒ@0†A2‡B3‹A4ŽB4>1>/‹>.‡>-†?-ƒ?,„=+…<+ˆ<,‡;+‡;+†:*‚;)<){=(z=*|>/z</u:,u;-w?0w?0u=.v;-~@3‚B6ˆE<‰I?‰K@‡K@†MBˆME‘JH•TP–f\ve‡…l’r{€¤€…©…†©ˆ‹ªŠŽ©ŠŒ¥‡†Ÿ‚€™|x—wg–l_“eZŠ`Z‡^[ƒ^Y|[SqUPhPFYE@Q?8D60:/,3+)0)(-))+(.((0'(/&'/&'.(*/)+0*,0,--+.,,.+,.)-0)-0(-0&.0(-01/23/..*'**"57)PU?luZ{‰f‘k}”h™jŸmŸm} j€¥q‡¬vŠ¯y‰®x†«w‚©t€¦s~¦r~¥v¦y¤y¢z|›yv‘roˆkg{bXjRMZFBI9<@18:,14+14+14+25,34,34,45-45-66.66.96/96/96/96/85.74-92,92,61-61-50,4/+2.+2.+0/-10.1/0//////---,,.+++/*.0*,0*.0*,/+,/+*/+*/+*/.*/.*/.*0/+/0*01+01+01+00.00.00.01,0/+0/*2/*2/(61+73*:3+;4*=4+>5,@6,@6,A8/@9/A8/A8/@7.?6->5.=4-<3.;2-;0,;0,;0.;0.<1/=2.A4.E2,L.,R+.U(/U&.P'-H*,A/+;2):3)<2&G1&R0'^/)b0)g8.j:.t;0|=4ƒ<6‰;7‰:5ˆ;5ˆ?8…A8„B6‚B6…C7†B7‡A7‰A5ˆ@1‡@.‡@.†?-…>,†=,‡;+ˆ;+ˆ9*ˆ9*†9)ƒ:)~;({=(v>'w=)?1z6+x8,y;.u:,v<.z@2z@2~C5ƒE:†H=‡I>†J?†L@‡NC‰PEŒSJ[PŽiYŠt_ƒfŠl~–tŸz‚¦‚ƒ©„‡«‡ˆª‡‰¦‡†¡‚š||—xp–oh“hdŽfdee‹fc‡c]~_Zw[OgOH^I?P>6D30;-,4)).'*+%1'&3%%1%'0&'/&'.((.*),,,)+*'+*'+,&,,&,,$-,&+.'+,.*+1,)/*&.+"79+PUAhrWtƒb{gyfz•hœl|žlxiy k~¥p€§r~¥n}¤o{£o|¤p}¥s}¤u}¤w{ u|Ÿwy˜vrnm†he|`WkPM[DBF7<=/78*25,25,36-36-56.56.56.56.77/880;81;81;81;81:70:70<5/;4.94083/72.61-51.51.51.32.32032021/10.0.//.,1+-3*+1+-1++1++1++0,+0,)2.+2.+10+10+10+21,12*12,22022022012-21-21,41,50*;4.;4,=4->5,@6-A7-B8.C9/C;0C;0C;0C;0B90@7.?6->5,=4-<3,<3.;2-;2-<3.<3.=4/@51E3/L0/S,1X)1W(2R)/I,.A2-:6+66*97(B4'L1&V0'[.(d90h<3q<4z=8‚<:…;:†;8„:7:6€=7€@7B9ƒC:‚B9…A8„@5B0C.ƒB.ƒ@-…>,‡;+ˆ;+ˆ9*‰7)‡8)…9);({=(x>(t@(v=)€8*{.$€8,ƒ?2v4(s5(|@5}C7}E8F;‚I>ƒJ?‚L@„NB‡QE…WH|aLyiP~pV€v[€|aƒ†i…“r‡y‚¢}§€€ª‚€­„€ª„|¤yŸzz™wz’pykwiuitŒjrŠjm„gkd_qWYhQO[GFM;?A3:8+40%4+$5'&5'&2&&0(&.)&+*&+,'*,)',(',('+*)+*)+****+),-)**%"0+'50*86*@C2RZCcpTl}[tˆcqŠ`sbv—hvšjs™fs›gwŸjy¡lu jtŸjv¡ly£q{¥uz¤t{¢uxryœtw–tpŽll…gd{^UlOL[DBD6=;/86*47.47.47.58/67/67/780780991991<92=:3=:3=:3=:3<92=82>71<73;62:5194084173084184165165154243/21/40-3--5-+3--3.+3--3.+2.+2.+3/,30+21,21,21,32-23+23-34/34/34/45/54/85.94.95,=6.=6,?6-@8-C9/D:0E;1E;1F<2E=2E=2D<1C:1B90@7.?6-?6/>5.=4/=4/=4/=4/>50?61?61C52I43O13S.5S/3O/2J20C60<8-;9,<9*A8)H6(P2'V1([3)b3+k4/t72}7576‚6674~73}:4|;5}>5?6‚?7…>8„@5€C1~D0€A/@.ƒ>.…<-†:,‡9,…9+ƒ:+€;+~=+z=*x>*v?*{<*„4)‰2)ŸLD¦XN‰@7{9-‚F;J<yH:xJ;zL=yN>}O@€RC…UG^JrkNosP{vX…w\y`–€hžŠr ”z•–wŽœy‹£ˆ¨„©€¥|z y|švo„n‚mŽm~oznv‹lqˆki}aeuZ\hPS[DLM;EB1?7*:0&:-'9+(5*&2+%.+$+,&*-&(-&).(+-(-,(/+(2)*3)*6'*6((1(#:3)D>2HF7OR=[cKdqShyWmƒ\i„YjŠ[m‘ap”dn”an–bošetŸjrhpœgržkw£ry¤vy£uxŸsušqv˜st“qnŒjj…fb{]UlOL\BAC5=9.:3)58/58/58/690780891891891::2;;3>;4>;4?<5?<5?<5>;4?:4?:4>93>93=82<71<73;62:72;83;83:72:6385073063.50-7/,7/-7/,50-50,50,50,41,41,52-52+33+44,44,43.43/540651761:72;81=82=90?80?8.A8/B:/D:0F<0H<0G=1G=3G=3G=3G=3E;1D:0A8/@7.@70@70?6/?6/>71>71?82?82?82@93B94D95G96H96H94G:2C90C9/C9-E9+F9)G:)J9)Q7(Z0$d/'m2,w60‚85†;8Š<:ˆ=8‹B<†A:>6~;2;2;3‡<6ˆ?6ƒA3€A2€A2?1=0;/€</€</~<.{=.{=.y>.{=.{=.|=.ƒ9,1)¡;6ÌkeÙwªZQŒC:ˆLA~M>tJ:qN;oP<nR=sT@zWD\I}eMmsMnzR„y[–v_§o`³mc»pj»wn»ƒv±Œy¦–}ž”¢‹¡{„žy„švˆ’p‰o†‘o„”p€–r{•ru“or‘okŠhhƒbax[[kPT^ELP9DC/@:*>4+=2,:1*51(2/&./'-0',/&/0*2/*6,*9+*=)+A&+D%+A'(C5,KD4VQ>[YD^bIenQhxTg|Ui‚Xe‚Tf†Wj\n“`m”_n–aošesžio›hm™fpœkv rx¡uxžutšss—qt–ss’pm‹ii„ca|[TmML\AGH8D=3@7.6906906906908918919:29:2::2;;3>;4?<5@=6@=6@=6?<5A<6A<6@;5?:4>93>93=84=84<94<92<94<92<73:70:5194.91.92,91.92,91.72,72.72.61-52-52+52+44,44,44,55-54/54/761880<92=:1?;2@<1@9/@:.B:/C;.E;/F=.I=/J>0H>2H>4H>4G=3F<2D:0A8/@7.B92A81@70?80?82?82@93A:4=82>93A:4C<4E<5G=4J;4L;3K7.M7,N6*O8*O8(O8(O:'T7'f6(q5*}:1‰@9“D?™IBJDœLEŸPI–LAŒD8„<0ƒ6,…7+Œ:/=4ˆ>5†=4†=6ƒ=3‚<4€<1}=3y>0w?2u@0u@2y?1}=3;1‡834/•(#°=8ì|zý–‘Ãia•H>ˆJ=wH8mL9gQ:eU<fV=lX@t\D|`JgMxrNƒvSs[´l^Ç_\ÒUY×QZÖS[ãpsØyu˅{À~´”}ª•z¢’xžu›‰q›‹r˜Œr•sŽ‘t‰’s„’qq{Œlw†gs|amoWgbL^S?UF3O<.L3,M1-H1+B/(=.'9/&6/%5.$81)90)=.)@-)D*+G)+J'+G)'RB3YQ:d^FfeIglLkuSk{Vi~Uk…Xg„Tf†UlŽ[o”`p—br™dtœhrœjo™im—ipšlwtyŸvw›wr–rr”su”tr‘qlŠhi„ca|[TnKK^BMN>KB9F=48;27:17:17:19:2:;3;<4<=5<<4<<4?<5@=6A>7B?8B?8C@9E@:E@:D?9D?9C>8C>8B=7B=7@;5@<3@;5?;2A:4?;2A:4A:2@93@70@72?80>71>7/=60;60<71;60:7096/85.74-74-63,66.66.85.96/;60=90@91A:0B90C;0F<2H>2K?3L@2L@2L@2MA5L@4K?3J>2G=3F<2E;1D:0B90A8/?8.?8.>7/=90=90>:1;4,MD=K@:J92S?8R70N/*^;5Z3,^6.^3*[/$\0#e7*k=0q=0ˆJ=ŒG8•I<žLAžG>˜>5š=5C:¤MC¤QC¨VHªXJ¥OB˜@2—;.›?4’95:7Œ65…40…73ƒ?6|@6r>0rB4n@0o>0v:/82‰41-0˜')ÇGFçc_ÂECËXUàzu«WM…E9„VFmR=g[AhcFhdGhaDm_DzdL‹kRbJ²n[ÑnhÚY]à;Kæ,Cð+Gò0Kö@WêIYä[eãorâ|zځ{؃~։Á~u¼‚v¹„v¶‡w¯ˆw«Šw§‹v¤‹w—l˜k•xh“oaŒbVƒTJzH?w@=t9;g/2d02_11N&$G&!J1,K70G4.D3,D1+E2,F.,D*)J.-O6/XI4`W:e`BeeCioKs}Xsƒ\m‚Wj„WkˆXnŽ]q“`t–cs˜bušdv›hq–jr™ms˜oq–mr”qw™vz™zx—xw”vqŽpj‡ii„eg‚a`{ZSnKM`DNO?QK?OI=7:17:17:17:19:2;<4<=5=>6>>6>>6@=6A>7A>7B?8B?8C@9E@:E@:E@:D?9D?9C>8C>8C?6C?6B>5D=5D=5C<4B;1B;3B;1B92A8/A81A81@91@91@91@91<71;60:7096/85.85.74-74-66.66.96/96-;7.=9.A:0B<0C;0C;.F<0H?0K?1L@0L@0L@0L@2L@4K?3J>2I<3F<2E;1D:0B90B90@9/?8.=90>:1>:1A:2C:1I81J.*U-+i65t88{;;„DBD?ƒH@„J?„H=JA˜OFŸPKŸOF‘C6‘B3˜B3D6Ÿ?3™7,œ7+ž</ C4›@.™@.ŸD2£F5§F6®H9µMB²LJ­IK¤@B˜89‘97Œ=8‚?6v>1n=.yH9„L?„@7‚-*‘*-²:CÒLUÚLKÞOKÈ>;À@=Ï`Y³YP„@3‚UBx`Hc\?]^?hiIrmOugJ}eK“cMºdWÕ_[äUYè@Mò,Cü&Bÿ%Gÿ&Hÿ-Kø2Kð=RîIYèP\ãQ[âR\ßWaÛbiÕflÓimÑmoÍqrÊtsÈwtÆxtÇ|yÈzxÆvwÆqtÅjoÅflÄ`jÃ_iÄ_m³S_¥KU™HOEI„FIk87FT1-K0)H1)I7-J91K:2M?6SF6[O5cZ9fb?ggCnrMx‚]z‰br‡^o‰\n‰\oŒ^qŽ^s“bu•cu•cu•fr’iv–qz˜vy–w~˜{ƒ‚„ž…ƒ›ƒ™y‘yq‰ok„gg‚c_zYSnKNaCPS@TQBRNB6906907:18;2:;3<=5>?7?@8@@8@@8B?8B?8C@9C@9C@9C@9EB;DA:DA:DA:EA8D@7D@7D@7HA9HA7H?6G>5F=4E=2D;2D<1C:1C;0C:1C:1C:1C:1B;1B;1<8/<8/<8/;7.96/85.85.74-96-96-96-:7.<8/=9.A:0B<0C;0D</G>/J>.L?/M@/M@/M@0M?2M?2L>3J>2I=1G;/E;1D:0C;0C;0A:0?;0?;0?;0?<3B;1UD<R3.i33@F¬KTÂT_Ë^dÅ^ašB>–H>”L>šNAªQI½TPÊORÄLK£A4›B0žA0£?0¥;-¢6)¢8*¤=.«F4 =*˜7$œ9$¤=*«@.²C2¶D9¥11®9?¹DJ¼KO¸NP­KJ–A<€7.y9-u7*w3(„1+œ45¸<DÓGTåMYáGGÏ61Î:6½30ÇMHÆd[”J=‹[G}cJi`AedEspO~qQ€iJŒhN°hYâbaóRZóCPõ6Hý2Hÿ1Kÿ/Lÿ+Hÿ1Lû/Hõ1Iô4Kò5Kï2Hò2Kó7Oò@XïC[íE\ìI^ëNcéReçTgçUhãThâSgâPgâNhçNjëQmòSsóVuýg„ña|ã[sÔVkË[kÉdr¥RZr-0\&&W0+V;2O?2A;-89)=A0KO8VO2`X4e_=ffBptQ|†c~Œiv‡cwgsŒer‹dqŒctevewfwh}•sƒš}ˆŸƒŠ¡‡£–©•—ª—•¦–¡†™†yŒxn…kfb^wWSnKOdESXBWVDXVG58/58/6907:1:;3<=5>?7@A9AA9AA9C@9C@9DA:DA:DA:DA:EB;EB;EB;EB;FB9FB9FB9FB7JC9IB8I@7H@5G?4E=2D<1D</C;0C;0C;0C;0D<1D;2C<2C<2?80=90<8/<8/:7096/85.85.96-96-96-:8,<8->:.A;/B<.D</F=.I=-K?/M@/M@/M@/M@/M@0L>1K=2J<1H<0G;/D:0D:0D<1B<0@<1@<1@<1@>2@=4F<3Q9/Z+%‡;=½U^ÙUcæUfâT`ÊINŸ30–:/?.—A0­F=ÄJEÔAGË?@±B7¥F4¨B3«A3®>0¯?1±A3³E6±H5¨A.£='¤;&¨;'ª9'«6%­3(¶97º9=¼7<»6;¾=AÄIKÄTR¾XS™?6Š3)†,#61ÂGIÜOWàBPÙ3=Ý98Í.(Ñ82Ç82ÅD>Ç\R¦WHXDvV=veG{pPpQ‚gJŽdK©o[ÒthìSVûCOö<Iø9Hþ=Nÿ@Qþ;L÷4Eÿ?Sþ9Mú5Iü2Hü0Iþ.Hÿ/Kÿ5Rÿ2Qý4Tû6Tú7Wø9X÷<[õ>]ô?^öCcõAdó?dó?fø?hûBlÿEpÿHqÿOuÿQrþUvòSqæSmãbvÖftµXb|15g0-W2*N8+E>,=@+8D.>F.PK.]T5f_BnjMy|_‡q„‘u|ŠpyŒpzŽr{syŽox‹m{Œl’r‡–y•£Š›©’¢¯¥²¡ªµ§®¸­®¸¯ª´«Ÿ«Ÿ” ’’€q„pe|b\uWRmLPeFPZBUXCWZE47.58/58/7:19:2<=5>?7?@8AA9AA9C@9DA:DA:DA:DA:EB;FC<FC<FC:FC:GC:GC:IB8IB8KB9KC8JB7H@3H>2G=1F<0E<-D:.D:.D:.D:.C;.D<1D<1C=1@9/@9/=90<8/<8/;7.:5/:5/96-97+;7,;7,>7-@:.C;0D</F<0G>/J>.K@.M@/M@/M@-M@/N>/N>1K=0J</H<0G;/D:.C9-C;0B<0@<0@<0?=1@>2A?3I<3\5.u40¬LPÚ^håM\ã@QÖ6B¶%(¨1)›7'’:&–;&ª@2ÁE;Ô>?Ë;:°>3§C3ª@2¬>1±?4¸C9»G:¹I;ª?-¨?,¦=*¥:(©8&°;*·<-º<0ÊG?ÎDAÏ==Ï68Ô8;ÝADàLLÝROÃD=ÆKCÌSJÓTMÙKIÛ?Bà5=à16Ú3-Ù8.Ï2)Î92¾71³A7´\N•TBxN6€bF…jM„aE[C­fRÊqcämgèEJï:Cè9@æ=BæCHæEJçFKêGLòKSñALò9Gö5Fø1Fý/Fÿ2Jÿ8Rÿ9Uþ9Uþ9Uý:Xý<[ý>_ý?aýAdÿCiþDküClüCmüCmüCoüCoýBmÿGpÿAfÿGmÿMpñJhèNhå\pØbr»Zc‹?A^'$O*!TA2TO;IL7?C,QK3cXBskVzf‹x—œˆ“œ‹†“‚~ˆ–…›Š‹˜‡†‘‰’–¢©—±µ¦¸¼­½Àµ¿Â¹ÂýÄÅÀÁÃÀ¼¾»¯´®¡¨ ‹—‹x†uh{e\sWSlLPhHN[AUZCW\E47.47.47.58/891:;3;<4<=5??7@@8B?8C@9DA:EB;FC<FC<FC<GD=GD;GD;HD;IE<KD:KD:KC8KC8JB5IA4I?3H?0G>/G>/F=.F=.F<0E;/D</C;.C;0C;0A:0@9/>:1=90<8/<8/;60;7.:7.:8,;7,<8-?9-A;/C;.D=-G>/H?.K@.LA/M@/M@/M@-M@/N>/N>1K=0J</H:/F:.C9-B:-C;0A;/?;/?;/?=1@>2@@4L;1k92•FBÃY]ÚW_ÝCOÜ8CÒ38¼+(¬0$ 7"•9 •9 ¢<&´?.Ã:0¾8/§7+¡;-¤8,§7+¯:1¶A7ºE;·F8¬>/ª?/¦;+£5$©5&µ=/ÃE9ÊH;¼8+Î@6ßD?ëAAñ=@ñ8=ê37Ü./Û75áFAçRKåNGÜ=9Ø0/â/2ê65Ø0'Ý;.Ê,!Ï8/½3)¯6-Çg[¶m\VA‡[B‚V=‰S;©\JÍk^ÞgaãUTïFKî=Câ<>Ø>>ÒA>ÑFAÝOKêVVíNRíDKð:Gõ4E÷2Dø/Bû2Eþ7Lþ8Qý8Tû9Tü:Uÿ:[ÿ=`ÿ@dÿChþ;eý<hý>jü>lú?l÷=ló<lõ:gÿGqþ:`þ>cÿLnþLlõNjïUoâYmçqÁ`i•IKq:5[5*O8*OB1UM:\P@qeU…|m“œœ¤§žŸ¦ž“•“ž˜œ§¡¤¯©¤­¨¡¦¢¥§¢´µ°ÂÁ¼ËÈ¿ÐÍÄÔÑÊÓÐËÔÎÎÔÎÐÐËÏÈÆɽ½½¯±®—ž–‹}l}j^uYVoQTlLRaDU_DV`E47.47.47.47.67/7809:2:;3==5>>6A>7B?8DA:EB;FC<GD=GD=GD=GD;HE<IE<JF=LE;LE;LD9LD7MC7LB6NB4MA3MA3L@0JA0I@/I@1G>/E=0D</C;.C;0A:0A:0@91@91=90<8/<71;7.:7.:8,;7+<8,?9-A;/C;.D=-H?0H?.K@.LA/M@-M@-O@-N>.N>/M=0L</I;.H:-F:,C9-B:-A9.@:.>:.?;/?=1@>2??3N:1p1,¦KJ¿RUº=A¼/5Á/2¿0,¸2'©1 £9!œ=!–;™;Ÿ="¥;#£6"6'Ÿ7,¢6,¤4)«81¶A8·B9±A5°B5­A4§;.¥7(®:-»B7ÃD;ÃA4Â=*Ì>*Ù5)à)#ì""ù$(ÿ*/ÿ.1ì)'æ-(à2)ß4*à4*ã2*ç0*æ1*Ú3#Õ5%Ð2&Î6+Ä7.»?5Æ`TÐ~p¦eSVBˆN:¡XGÇeZÞc^åSTçDIôGKëABÝ?<Ñ@;È?7Ä?6ÑF?âLKîIMó?Hø8Gü6Gû5Fø3Dö6Eú;Kú9Nù7Qû6Rû6Rþ7Xÿ9]ÿ;aÿ=eÿ>jÿ?mÿAoÿBqÿBuþBtüBtùBpÿElúAaýFeÿJiýLiÿVqÿ^yò]sñj{åo}Ònv­]^ƒD?f5.gA6qUIwe[Š}u •«¤ž¯®ª´¶µ±µ¶§¯±°¸»µ¼Â½ÂÈ¿ÂÇÃÂÈÈÆËÓÎÔÜÖØáÙÖæÝØçÝÜåÛÜä×ÞãØàÞÔÜÖÏÖÉÄʹ¹¹¡¦ ˆ’‡tƒpey`\sWYqQYiL[hJ[hJ47036/36/36/56067/780891;;3<<4>>6??7C@9CC9GD=EE=GD=GD;HE<HF:IG;JF:KG;MG9LF8LF8ME:MD;MD;LC<LC<LC:MD5LC2KB3I@1G?2E=0D</D<1B;1A:0A:2@91=90<71<71<71;7.;7,;7,<8-=9.?;0B;1C=1F@2H@3JA2JA0LA/M@-O@-N?,L?/J>0L</K:0J9/I80E8/A8/>:/=;/;;/:<.<=/=>0A?0S9,…<5Â\XÇZW­73ª-)°1*­/#¬3"ª:$«A'ªG*¤E'¡B$¢@#¡>!ž8!Ÿ9*£;0¤:-¦7,¬<1µE:µE9¯A4¨</¨</©;,¬<.ºD8ÆLAÃD;»7+½6"Ì;&Ü:+æ4*ñ+(ø((ü&(ù$&ø**õ.)î2)ê4)æ4(ä2&ã1%à3%à;(Ð1Û>-Î6)Æ8,À@5±A5Ïm`¾hY¥RB¦N@ÊdXåhbæRRé@EóBJê=Aß;:Ô<9Ë@9À=3¶8,¹6,Ã81âHHè?Dï:Cö;Fù;G÷9Eö;Fú>LþBSÿ?Tü<Sü:Tÿ8Uÿ9Xÿ:^ÿ:bÿ>hÿ>lÿ>lÿ?ný>qú@rø@r÷AqöBiþLlÿTsôJgêA^ûTpÿhƒÿeö\vî^uægxÛq}Åpu§geŒ]W{ZQ–w§˜‘¼±«Ã»¸ÃÁÂÆÇËÄÇÎÀÃÌÊÊÖÌÊØÑËÙÕÏÝÝÕâãÛæèáéìãèîäãñçåòèçîäåíàçìßæèÝåâØàÐÉÐÀ¾Á©«¨–Žy…wkzgbv]`rXbpVboS`mQ01,12-34/5616727828938939:2:;3<>3>@5AA7BD7DD:DF;HH>GG=GH:GH8HI7KJ6LK6NL7NK8JG8GE9IFAMIJNIOGDMD?CNE<LC4JA2KB3LD7KC6F>3B:/D=3B;3@93?82>93=84;62:51B94B94@93>93=:5>:7=<8=<7@@8B?6D@5H@3JA0L?,N?*L@*G@-E?/I;0J70K61K63J88D:8@?:4:01<.9D39B/58#?=(bC/¼j\¶J>¥7*¤6)¦8)£5$¤6%¨=)¡8"¤;%¨?)¬?(¬>'¬9$«6"ª7$¢7'Ÿ8) 9*£<-©?/¬B2¬B2ª@0¨=+«=,­<*³>-½C4ÄF8À>1·1%Æ8*Ë7)Ð8*Õ9,Û7-á6.è3,í0,ñ.,ó-*ò/+ï2,ê5*ã7)Ü9(Ú;(Û:&Ó4Ï2Í7&Æ7'»6'ÀB4ÎTGÇNCÎSKß[Ví^ZîPQë>Bñ8>ö?Dé@CÞCAÓB?Æ?9º;2±;/¯;,±;-ÉK?ÔKCÛFBà=>é<@÷?Gü>Hø:DýCNûCOüCQÿDUý@Vû;Tÿ<ZÿCbÿFkÿAhù<fô9dô=køBrüHxÿKzÿNyþNtöQsïPnåLhçMgûXuÿf…ÿ_ƒòGiöWwæ`yÂ]k¼z~‡a^yn§‘„»§œÐúÛÖÐÛÛÙÛÜàÝÜäâÜêåØìê×íñÙïôÝñ÷âñõèòôêòóíïùôñøóïöñîôîîñèéìãæèßäãÝáÕÐÔËÉʺº¸£¥ Š‘‰xƒuq}oo|j}†sw€kt}h//-00.11/3315616727827829:2:;3;=2=?4?A4AC6BD7CE8FH=GG;GH:HI7HJ5LK6LL4MM5KJ6KI<KKCPPPWU`XWgUSiRO`NHJLC<G>7F=6F?7G@8E>6B;3C<6?:4?82<71=82>95<73;60<3.=4/=60<73<94<;7=<8>=9@?:B?8D@5H@3L@0L?,O>*L@*A?*@?+D</H:1J73G53D45>64>=97<59?3?A3C<,I9)\B3L;­QBª;*Ÿ1 Ÿ6#¡:'8$9"£?(Ÿ;$¦=(­@+²?,´<+´9)¶7&³8(¨9(£9)£9)£<+¨?,ªA.ªA.ª?+¬?+ª9'±<*¿F5ÅG8Á?1»7*¿5(Ë7+Î6)Í9+Ï;-Ò<.×;.Þ7.æ3,í0,ñ.*ô.+ð1)è3(ß8(Õ:&Ó;$Ü<&Ø6!Ô7$Ó<)Ë:)À4%Â:,ÌF:ÐE>ØICçOLñQQòHIï<@ô:?÷BGåBCÚEAÐB>Ã>7·;1®:-©;*ª<+µD4ÅK>ÔNCÚG@â@>î>Aú?Fý@GøCJöCIúENüFRüCSù>Sû=VÿB^ÿFhÿCiú@gö@iøCnûGtüKyÿKzÿKzþItóKoñRrô[w÷^zúWvûMnûEkùEjíEhÙJfÆ_p°nrjKFr°–‰Ì²¥ãÓÆìãÚéèäèéëéæïêâñõâøûáúÿãûÿçýÿíýÿòüÿ÷üýùøýùöüùôû÷ôøôñöððòììïéëíçéäÞàÚÖÕÈÇŲ³® ™Ž”Šˆ…ˆƒ–…‰~†}--+..,/0+12-23.4505606718919:2:<1<>3>@3@B5BD7BD7GG;IG;JH;JI7KJ6MK6NL7NK8KI:NKBTSQ_^dihxll„klŠiiƒ^ZiYQ\OHPHAIE>EE?CC=?@::A;;>:7<73;60=82>:1>:1=9.84+95,;7.<90=:1>;2>;2><0C?4E?3G?2K?/M@0N>.O>,L?,:<$9>'=<*A;-A8/?6/<41940<94@;5G:4L3.W+(k/.†<= GC£;0 2!2 ¢9&¢<&9"œ8!¡=&¤;%¨>(¯@,µ@.¹>,»<+¾:+»<-±>,«<+©:)¨:)ª;*¬=,¬=*¬=*±>,­8&¸?.ÉK<ÊH8»6'º2$È:.Î6)Ò6)Ò9+Ó:,Ö:-Û9,â6,è3,ð0-ô.+ò/+í1(å3'Û6#Ó8"Ð8!Û9$Ú5!Ú9'Ú?-Ò<-È6)Å9,ËA6Æ;4Ì?8ØD@äJHéJGéCCì@@ëCBÜC>ÔD<ÌA:Â=4µ9-­7)©:)ª;*«7(ÀD8ÔNEÛJEà@@è>AöAHÿGOôAGòAG÷CLýGSþEUù@Rú>VþA]ÿEeÿDgþDküFoþItÿKxþM{ÿJzÿFwÿGvÿJsúOsÿZzÿa€ýVtïEbùJkýTsÚ@ZÊI]Ø{…·{{_B:|n¯‚Õ³§óÛÏúìã÷òîøöù÷ôýóëúùêýÿêÿÿëÿÿîÿÿôÿÿøÿÿüþÿýüÿýúÿýøÿû÷þùöýôõûòóúñôùðóïæçäÞÞÓÏξ½¹¬¯¨ ¦œ£™œ¤—£«œ¦•š¢“--+-.)./*/0+01,23-34.45/7808919;0;=2=?2?A4@B5BB6HF:JF:KH9LI8MJ7NL7OM8OL;KI=QPK]\amlzyy‘€ §}¥yu–qj‰d^zYTkQLbKGXD@N@=HA>E=;>986952:70<8/<8-;9-88.991::0>;2><0></>;,>;*E?/F?-H?.K@.M@0N>.N>.K@.?=(<>)>=+></=;/<90::29:4::2C:3J50T,*g)*ƒ159Aª=@¤8,¢7%¥:(¨?,§>)Ÿ9#ž8"¤;&¤:$«<(²?,¸?.½>-À<-Ä<.Â>1»@1³=/¯9+®8*®8*¯:)¯:)±9)´;*¹;,ÁB3ÊF9Æ>0¼2%À2&Í9-Ó5*Ø4*Ø6+Ú8+Ü8,ß7,æ3,ê2*ñ0+ô/)ñ.(ì0'ã1#Ù4!Ð5Í5Õ3×0Ø5$Û;+Ö=/Ð:,Í=2ÒD:Ã6-Ã6/Ê;5ØD@âMGäKFÞC>Õ<6Ð@7ËA6Å?4¾</´9*°8*®:+°<-«3%½?3ÎH?ØGBÜA?ä@AóFLüMRî?Dï>DôCKüHSþHUúCUú@WüA\þCbýDfþFlÿJrÿMxÿN{ÿLyÿIwÿEtÿO|ÿR}ýJq÷LnÿVuÿVs÷MhðFaîLdÐ>SádrþŸ¥ÑŽd92kI=¤znÐ¥œõÔËÿéãþóïÿüþÿûÿøòþúïÿýïÿÿïÿÿòÿÿõÿÿøþÿúúÿûøÿþùÿýøÿû÷þùõÿö÷þõöþó÷þôõöìíëãáÚÕÒÈÅÀ¹º²²µª±´©°¶ª¶¾±¯·ª«³¦10,0/*0/*0/*0/*21,32-44,77/88099/;;1==3??5@@4AA5HD8KE9MG9OI9PK8QL8QL9OL=NKBVTUeeowx„‡¨Œ¸Ž’ÂŽ‘ÄŒŒ¾†‚´zw¦ol—fd‹\[{SPmLJ`IFWDBM=<A97876285.86*55+56.581891::.=;.?<+A<(B<&G?*G?*J?+K@.L@0K?1K?1K?3L@4H>4E;2@91<928915<45<47=3<:.F3,W2,u87’@B¥=D¦89§;/¨=+ª?+«B-¨?*¤;&¤;%¨>(§8$­:'µ<)º=+¿;,Ã;-È</È>3Ä@4½=0¹9,·7*·7*¸8+¸8+·8)¹7)Æ@4ËC5Å9,¿1%Å2(Ê8+Ï5)Ú3*ß3)ß4*à6)ã5*æ4*ê2*ï0(ò/)ô/)ñ.(ë1%â2#Ù4!Ð5Ì4Ô1Ö/Õ2!Ø6'Ö9*Ô:.Ó?5ÕE<ÔE=Í@7È;2ËA7ÕKAÙOEÐF<Ä:/Ä<0Â=.¾<.º;,´9)³9*µ=-¸>/µ9-¼<1È?9ÑB>ØBAàDEìINõOSê@Cí>CòCJúISýKWûFWùBVúBZùA]úCbýEiÿKqÿNwÿOzÿMwþItÿJwÿS~ÿT}üIpùImÿStÿVq÷OhéE]ãG\çYiÿ–¡ÿµºòžœHCi.&¦kc̗ñÉÁÿâÝÿòîÿüûÿýÿùöýý÷ÿÿ÷ÿÿ÷ÿÿøÿÿúÿÿûþÿüüÿýúÿüøÿûõÿú÷ÿùöÿ÷øÿö÷ÿôöþôõüòóñéçäÜÚÕÐÌÍÊÃÈȾÉ˾ÉÍ¿ÈÐŽǾ·Á¸54/43.21,10+10+11)33+44,66.66.88.::0<<2>>4??3B@4IC7LD7OH8QJ8TL9SL:SL<QK?PKGYW\kjz}~œ‹Ž·“™Ë—×˜žÜ—˜Ø’’ÒŒŠÉ…„½~~²uv¤lk“ee‡[[wUShKJXCBJ==?:9787376246526556177/;9,?<+C>*E?'G?(G?(H@+HA/HB2GA5G@6G@8H=;H::C98=85664173.91.:05=275)A1$Z8.~C=”FB><40¦<.¨=+¨?*©@+¨?*©>*¬?*¯@,ª7$±8'¸;)¾:+Ã9,Ç9-Ì9/Ì<1É?4Å<2Â9/À7-Á8.Á9-À8,À6+Ä7-ÑC7Ð@5Ã0&Ã,#Ï8-Ô<1Ñ3(ß3)ä2(å3)è3*é2*ì1*ï0(ñ.(õ.)ô/)ð/&ë1%à3#×4!Ð5Í5×6"×4!Õ4"Ô4$Ó6'Ô8,Ô<1Ó?5ëXPáRJÓI?ËB8ÊF:ÌJ=ÅE8½>/¼:*º;(º;(·:(¶9'¶;+º?/¿A3ÁA6Á<3Å<6Î@<ÖDDÜHHãJLëLPèBFê@CðCIøJSýMXûKXùFYúG]õA\÷C`úEfþJmÿNuÿOxÿNvüKuÿQ{ÿRyþPuüNqÿVwÿ]{þTníE\÷TiîQb÷erÿ’œÿ‹“ò„‡Ç`c¦LK´ke̐ˆî½¶ÿÞØÿðìÿùøÿûýüúÿüûÿýúÿÿûÿÿûÿÿûýÿûûÿûøÿü÷ÿüöÿüöÿüùÿûøÿùøÿö÷þòôýññøììðæäæÜÚÞ×ÑÚÖÍÜÚÎÝÞÐÝáÓÛâÚÎØÐÄÍÈ::288055-33+22*22*33+33)55+66,77-99/;;1==3>>4A?3IC7MC7PG8SJ9VK9UL;TK<SJAQKK[Xalm‚‚¥”Ä™ŸÙž¦ç ¨ð ¤îœ é˜›â”—Ú“”ÔŽÉ‡‰¼„¯xz pqcc}XVkNM[IGREDLBAI>>H:=D;:?:97<92>:.C>+D?)E@*E@*DA.DC1CC7AB:@C<@BA?<EA>ICALBELBGM?IK=IG=HBCI?@<0K9+fF7‚N@ŒJ<Ž?2“7(£>,¨=)©>*©>*ª@*®A,®?+°=*°7$¶9'½9*Â8+Æ8*Ê8+Ð9.Ò;2Î;3Ê:1É90É90Ì91Ì91Ë80É6,Ñ=3Ô=2Ñ9.Ë1'Î4*Ø<0Ù;0×3)ã2(ç1&é1'ì1(ï0(ñ.(ó.(ó,'÷.'ô/'ð1'é3&à5$Ø5"Ò7!Î8 Ô9%Ô7$Ñ6$Ð5#Ñ5&Ó7+Ñ7-Ï8/ãNGçWNæYPÚQGÌG>ÃC8½A5¸?.¶<'¶<%¶<%·:&·:&¹:)½>/ÃA4ÈC:Å?6É@:ÑFAÔIFÕGFÖHGÞJJåEGçACìCHõLSûPYúNZúK\úJ^öD\öC`øEeûIkÿNrÿPwÿPwýQwöVxñVvôUuûXwÿ\xÿ[uúRiñJ^ýWkõTdéM[õ]jßKYæTaåTaÖYaÀfeDŽ~嫧ÿÓÎÿìèÿóñÿøöÿýþúûýøüÿûüÿúûýûûûûúøû÷ôûøóÿûõÿüöÿüùÿüùÿø÷ÿóóüîîùëëïááèÝÛâÙÔáÚÒåÞÔëçÛïíÞïñãëòëÚåßÍØÔ==5;;188044*22*22(33)33)44*55+66,88.::0<<2>>4A?3JB7MD5RF8UI9WL:WK;VJ<SJAQKM[Wemm‡ƒª–Ê›£â¢¬ô¦¯ü©°þ¤«ù ¦ò¤ìž£çœ ß–›Õ”—̐’‡‰²yyŸji‰^^zWToRPfOMcMKaIG\DAR@=F<:;=:3?;/@=,BA-@B-AB2@D5?E;=D=;DA:CH=DTBH^LSfU\n\bp_cnabgd`afXUaJBiG=ƒSE”WE’K7?*˜=(§B.«@,«@,­@,°A-²B.°=*¯7&µ8&»7(Â8+Æ8,Ë7+Ï7,Ó9/Õ;1Ñ80Ï81Ð71Ñ82Ó:4Ó:2Ò80Ð6,ÜB8Ô7.Ï2)Õ7,Û=2Ý<2Ú9/Ü5,ä2(ë0'í1(ð/(ó.(ô-(ö,(õ+'÷.'ô/'î2'è4&à6%Ø7#Ó8$Ï8#Í8"Í8"Î7$Í6#Ð7'Ò9+Ñ7+Í5*Ë4+âMFód\ëaWÔNEÃ@6¹;/´9*¶=(·=%·=&¸;%¸9&»:'À<-Ä>2ÉC8ÈB9ÌE?ÓLFÓLHÍFCÌEBÕGEâHHæCDèEHñLPøSZøQ[÷O\ùN`ôH^ôF_õFcùJiþOpÿQuÿSwÿUyîUsíZv÷^zü]yùUpóMeøPgÿZlÿ[mÿcsïM\÷UdóN^üUgôK^äR_À^_»vqל–ùÊÄÿåàÿíêþôòÿÿýûÿþúÿÿøÿÿúÿþûýúûûùýùöüùôýøòÿúôÿú÷ÿú÷ÿöõûïï÷ééóåäêÜÛçÙÖãØÒæÝÔïçÜøòäýúéýÿñóúóàëçÒÝÙ@=4@>2>;2<:.:7.85,74+63*52)52)52)74+96-=:1@=4C?4H@5LC4RF8TH8VI8VJ:YM?YOFRILUQ_eez}¦Œ“Ê–Ÿâ©õ£°ÿ¨´ÿª³þ«³üª²úª±÷§¯ð§¬ì¦ªçŸ¡Ú˜™ÏŒŽ¿€°sržhe]Z…VT|XQzXRvVOnMH_C?M=;@=<8@@8=?2?C5BF8?E;;B;:C@>FH>KTDVlQf`q‹htŒsuŒxŠo_j‰Zb’WYœWR¥UL¤L>Ÿ@.œ9"Ÿ9"§<(ª=)¬?*¯@,±A-´?-¶=*¸;)º6'¾6(Ä6(É7*Ï7,Õ8/×9.Ú91Ö71×:3Ø93×82Ö71Õ7.Ú91Ü;3åD<Þ=3Ø7-á@6ü[Qÿmbÿ]Rç?4ä2(ì1(ñ2*ò/)ô*&ó($ö)&÷,(õ.)ò/)ì1(ã1%Ø1!Ò1Ò5$Ñ:'Ë:%Ê<&Ì;&Î;'Ð:)Ñ9+Ñ7+Ñ7-ÙA6Î7.ÜG@ôd\äWPÊA9Å@9µ5(·:&¶<$¹=%º<&¼;(¾;)Á<-Å=1Æ=3ÊD;ÖOIÝXQÓOJÀ?9À?:ÏHDÞHGáFDæJKîPQðQVîMUíKVñLZøQcöOcõKeôJeúMkÿQrÿStüStòWvîXuóYuû[uÿ\uÿYpÿWlýVhÿbsýWeÿ]kÿ[jñFVñDWÿQeîVcÆfgªicɐ‰å¶°öÓÏÿêæúïëúöóøüûùÿÿøÿÿ÷ÿýùþúþþüÿûøûöòýúóÿúôÿøõþöóÿ÷õÿöôöèçæØ×ãÕÔÞÐÍàÖÍïçÜüõåýøåÿüéÿÿñúÿøçðíØáÞB@4A?3?=1><0<:.:8,85,74+52)52)52)74+96-<90?<3B>5G?4KA5OF7SG7UH7VI8XL>WMDTKLWQ]eb}wz£‰Ç’œáš§õ¡®ý©µÿª¶þ­·ÿ®¸ÿ®·ü­¶û­´ø®³õ¦ªê¡¢â–˜Õ‹‹Ç~¸sr¬gfŸc^–_V‹`Vˆ]TUOqMIbEBSBAI@BA<=8AB<EF@CD>??=CABNLQWWcbg„pt—~ †}œ‘|›ž|—¤p‡£_t«Xh¶VaºRS¶ID°@4¬;)«;#¬>%¦7#§8%©9%¬9&®9'±8'´7%·5%»3%Á5(É7*Ñ:/Ù<3Û=4ß<3Þ;2à=6Þ<7Ý;6à=6äA:çD=çB<åA8ä@7åA8â?6Û9.Ó1&×5*éI=ÿ[Oñ@6í5+ê+#ì)#ø-)ý0-ü,*ö)&÷0-ó3.í6.ä6+Ü4'Ô2#Ó3%Ï6$È:$È<%É;%Ë:%Î8'Î7&Î5'Î4(Ó9/Ï5+Ô=6åPJåTOÛLFÌA<¶0%¼;(º<%»=&¼;&½:(¾9(Á9+Â:,Æ>2ÉC8ÓNEÛXPÒRIÀD<¾B:ËGBÛLFÞIEäLKëSRìSUéPSéNTëNWöUd÷TeùSiúRküRmÿSqÿSqþSsòMm÷Tsÿ[yÿ_{ÿ[týUlúSgúSeûVf÷R`ý[hý[hñO\öTcýXhßS^¶a^–]Tµ€zÖ©£ìÉÅúáÝøéæûöóùûøøÿþøÿÿöÿýùþúþþüÿüûýøõüùôýúóýøôþöóÿøõÿöôõêèéÛØäÖÓÛÐÊÞÔÊîæÙüõãÿúæÿýèÿÿïùþ÷çðëÙâßEA5D@4C?3A=1?;/=9-;7,;7,73(73(73*84+:6-<8/?:4@<3E>4IA6NE6RF6SF5UH7WJ:XKBVLKVPZa_wsuœ„‰À˜Û–¤ïž«ù¨´ü«µü®¸ÿ°ºÿ²»ÿ²»ÿ³ºÿ´¸ÿ®²ù©¬ñ £è˜˜ÞÓ‚Çwv¼rm°i`¡g]™cZ‘]W…TRwLLfCDV??I76;A=>JB@L@@N>?ZFHoV\€ds–r–£z¦­~¨±z¡·u›½p’Áb€ÀTnÁG\ÎKYÐIPÄ=:¹6,¶9'µ=%°<#®;&®;(°;)³;*µ<)º=+¾<,Á<-Ã7(È9+Ò;0Ù?5Þ@5á>5à<3Þ:1Û81Ø5.×4-Þ93æA;êF=éB:ã<4â;3Ù5+×5*Ù7*Ò2$Ë+Ó6'éG:õK>ò@4ì4,î-(ö,*ú,,û++ö*)ï)(ë.*è3,á5+Û5)Ö2&Ô2'Ð4%Ë8&È:&Ë:'Î;)Ð:)Ñ:)Ò9+Ô8+Ð4(Ö<2Ó:4Ó;6äOKìXV×EEÀ3,À;,¿<(¾;'¿:'¿:)À8(À8*¿7)Ä>2Æ@5ÏK?ÕUJÑSGÃG=¿C9ÆF;ÖKDØIAÝLGãROåSSáOPâMQãNTòXdöZhú[mýZoüVnùQkúPkÿPoüHkÿQtÿ[{ÿ[xÿWqüTküVjþ[lúYh÷Ygú`ló`jë^fîgnêgmÉ]] ]U{OD˜md¿˜‘ÛºµïÓÐòàÞýõóùù÷ùÿû÷ÿþöÿýúþýÿÿÿÿüþþøøúùôúúòûøñýøòÿùöÿ÷õøïêîãßåÚÔÛÑÈÜÒÆìåÕýöãÿüæÿþçÿÿì÷ýóéðéÝãßHD8GC7FB6D@4B>2@<0?;0>:/:6+95*84+95,:6-<8/>93?:4B>5GA5KC6NE4QF4SF3UH7VJ>WMKVOW^[pmp“…¹‹•Ó” è›©ô¦°ø¨²÷¬´ü®·ü°¸ÿ°¸ÿ±¸ÿ±¸ÿ®²ûª®÷¤¦ðŸé”–ጋׁ€Ì|zÅtm³mf©e_›^[VWƒPPrFF`A>Q<6BF9@S<B_?DlCGNR•^d©fw½kÈm˜Ëo˜ÈhÈ`ƒÈVwÆJfÄ<TÆ3FØ@OÞDLÑ;<Å8/Ã@.»A*°9!«6"«6$®5$°5%µ8&»9)À;,Å;.ÐA3Ô@4Ø@5Ü?6Þ=3Ý90Ü5-Ø4+Ò/(Ð-&Ó.(Ø4+à91â;3ß80Û4+Ü5,Ï+!Ì*×7)ßB3Ú>/Ð4%É.çE6óI<úH>ù<6ó,)ð$%ô&(ù//ï-+é0-ã2,Ý2+Ø1)Ô0'Ó/&Ò0%Ó7*Ò9)Ô;+Ö=-Ú>/Û?0Û?2Û?2Ñ5)æLBãJBÍ50ßGDðZYÞJJÓB?Ç;.Ã<)Â:*Á9)Â:*Á9)À8*½8)Á;/Á=0ÈF8ÎNAÍOAÃH9¾C4ÀB4ÎH=ÏF<ÔIBÚQIÜQNÙNK×KLÛLPêW_ò\güaoÿ`rúXmôNfôLeüMjÿOrÿRvÿVuÿToûUkü[mÿesÿiwÿfsúbnôblìeláfi×ijËge´g__ReH:~^S©ˆÊ«¦äÈÅíÙØýòðø÷õøýù÷ÿýöÿüúþýÿþÿÿýÿÿúúûúõúûóúúòýúóÿüøÿüöýöð÷îçêà×ÝÕÊÜÔÇëæÓýøäÿÿæÿþåþÿê÷ûíéðèàåÞKH9JG8IF7GD5EB3C@1B>2A=1<8-<8-;7.:6-;60<71>93?:4B>5EA6JD6MF4PE1RF0TG4UH8XMGVMRYVgkl‹}°ˆ’Í‘žâ™¥í¦®÷§¯÷ª°ú¬³û®´þ®´þ¯³ý¯³ýª®ø¨¬ö£§ñŸ£í™ç’”ߊŒ×††Ð€Æyv¹llªbeœ]a‘WZƒRQqNI`WFY_CQmBLFM–MT¬U]»ZaÉYgÙRpâRußTsÙPjÒJ`ËBTÆ8HÂ.<Ô8EãCKãGJÖ>;Ë>5ËG8ÂH3¶?)¶=,¶=,º=+½>-Â@0ÇB3ÎE5ÓE7×E8ÙB7Ø>2Ù;0Ù6-Ø4+Ù2)×3)Ô1(Õ4*Ù6-Û8/Ü8/Û7-Ü5,Ü5,Ó/%Ø6)×7)Ï2#Í2 Ñ:'Ô=*Ò;(Õ<*æD7öJ@üE?ù64õ*-õ),ö.0ö65í53â3.Ú1,×0*Õ0*Ø/*Ù0+ã80á90â:/â<0à=.ß=.Û>/Ú>1Ð6*ïWLøaXÐ;5Ð;7äPNÛGGäPNÍ=4È:,Æ8,Ä8+Ä:-Ä<.Á<-¾:+¼:*»<+ÀA0ÅG8ÆH9ÂD5½?0¼>/ÉG:ÈF9ÍH?ÓPFÖQLÓNIÒLKÕMOçZ`ï]güfrÿgwü]oñOdòLbýOjÿVwÿSuÿSp÷Skõ\nùhuûkuöfpùeqñ_jë_héjqÜrtÁie©e\œl^~gUYI9kXJ”|r¼Ÿ›ÝÁÀëÖÕýííûõõûûùùÿýøþþüýÿÿþÿÿüÿÿùüþÿùúýôùüóýþöÿÿøÿþøÿúôÿøððçÞãÛÎàÙÉîéÕÿúäÿÿæÿÿãýÿçõùêëñåæéÞNK:NK:LI:JG8HE6FC4EA5D@4@<1?;0=90<8/<71=82>93?:4B?8DB6HE6MF4NF1PE/UF1UH7YLCTKLXTbhi…|«ˆ’Ç’Ý—¤ê¥­ö§­ù«®û«±ý®±þ­±û­±û¬°ú¨¬ö¦ªô£§ð¡¥îž¢ëšžç•™â‘•Ü’׆‰Ìz€¾rx²mt¨koœigŒi^|x_u€WiŽP_¡PYºR[ÍT]ÕOVÛHRå@Qç@QáCQÜENÔCHÎ?AÉ99È44ãIIçIJßA@Ó84Ì92ÎA7ÇC6½?0ÂD5ÂD5ÄE4ÇE5ËF7ÐF9ÕG9×H:Õ>3Ô<1Ô8,Õ4*Õ2)×3*Û4+Ú6,Ú7.Ü;1ß<3ß<3Ü8.Ù5+Ù5+Ú6,Ñ/"Û;-Ú=.Í1"Å.Í8$Ð=)Ì9%Ì9'Ò9)à:.ñ>7ÿ@?ÿ<>ü46ñ-/é++ß+*×*&Ó+(×/,Ü41ä88è88ï75ð74í81è7/â6*Ü6(Ö4'Ñ5&Í5'æODÿlbÚG@Æ50ÔBBÑ?@êVV×B<Ñ=3Ê7-Æ6+Æ:-Å=/Â=.¿=-¹:)º=+½@.¿B0¿B0½@.»>,º=+ÂD5ÂD6ÇI=ÎPDÑRIÏPIÏNIÓOMç_aíbgúhrÿnzÿeuôUgôQfþUlÿXuÿWsúXoñ^nómvõv}ëmqÜY_ïbkïamëblîrz뇉́|¨tg–ygsmWON:_VGƒqg®•‘Û¿¾íÕÕûèêþôõüúûüýÿùýþýýÿÿýÿÿûÿÿùþÿÿûùÿõøþôýÿ÷ÿÿøÿÿøÿþõÿþôøñçìæØèâÒòðÛþüåÿÿãþÿàþÿæøûèðôåîðãQN=PM<NK:MJ9KH9IF7GC7FB6C?4B>3@<3>:1>93>93?:4?<5@@8CC9IE9LF6NF1PE/UF/UH5WK?QIFUR[hh€}§ˆ’ѝٗ¡æ¢¨ô¥¨ù¨©û©¬ý¬­ý«®û«®ûª®ø§«õ¥©ò¡¨î §í §ëŸ¦ê¤è›¢æ–žß‘™ÚŠ”Ñ…È‚Œ¿ˆ´‚€§‡z˜m…˜cw¦XhºR]ÏMWßHQç@Hé<@è<:ã?6ÜD7ÕH7ÌI5ÅG1ÂD.Ä?.âTHßH?Ø;6Õ63×85Ö;9Ñ96Æ71¾:-»<+½;+¾:+À8*Â6)Ã4&Ä2%Í5(Î4(Ñ3(Ó2(Ö4)Ø4*Ú6,Ú8-Þ=3Û=2Û;/Ú:.Ú8-Ø6+×5(Õ3&Õ3&Ó3%Ï4"Ï6$Ñ<(Ð?*Ç9!¿1Ç9%Ê4%Ó1&ä6/ù;;ÿ>@ÿ8;õ26ê./ß--Ù-+×/.Ü43â88è:<ï8:ô23÷12ò0.í2-ä2(Ý1%Õ2#Î2#È2#Ï=.új_ãTLÇ82Ð@?Ë;;ãQQâJEÛB<Ð92È5-Æ8.Å;0Á=.¾<,¹<*º?-»A,»>*¹<(º;(»<)¼=*º?/º?0¿C7ÇK?ËOEÊNFÌMGÎNKæabéae÷gpÿnzÿix÷ZköUgýZoþZrù]rëapäkré}}æxw×_aðhrünzñerêlwö‘•ì¢ŸÃ•ˆ¡ŒyhlUHR:WVDth\¢ˆÙ¿¾ðÖ×÷ãåÿôöÿúüþþÿûüÿýüÿÿýÿÿúÿÿøýþÿû÷ÿô÷ýóüÿöÿÿøýýõÿýôÿÿôþúïóðáïìÙøößÿýäýýáýþßÿÿåýÿê÷úéö÷éTN>SM=RL<PJ:NH:LF8JD6JD8GA5E?3C<2B;1A:2A:2B;3@<3AA9CC9HF9LF6NF1QF0SG/UH5SI=PHETQZgh}}§‹“Ä’œ×˜¡æž¡ð¡¢õ¤¤ú¦¦ü§©ü¨ªý§¬ý¦¬ø¤ªô¢©ï §ë¢§é£¨ê¤©ë¥¨í¡¨î– å“ ä’ Ý“žÖ”œË˜–¾ž¯¨„ £e|³\mÂRbÏITÛAKã>Dê;@ë;;ê?7â@1ÚE1ÓH1ÉI0ÂF,¾B*Â?+áSEÜC;Ü:7ã==éCEåABÙ9;Ì65ËB8ÇE7ÇC6Æ@4Æ>2Ã9,Á5(À2$Å3&É2'Í3)Ó5*×4+Ù5,Ù5,Ù5,äB7Ý=1Ø6)Ù5)Ü8,Ý9-Ù6'Ó1"Ø8(Í2 Ê1Î9%Ì9%Å4É8#ÑC/Å6%Ë8(Õ7+Ü5,å4.í53ô;9ù@>ÿLKúHFðBAë?=é;<ç7:ä15å+0ó+.ö*-ó-,ï/,è1)à4(Ù6'Ó8&Æ0½.òdVê]SÎA:ÓEAÉ;9ßMMèSOßGBÑ<6É6/Å7-Æ9/Â:,¾;)½<)¿@-¾A-º=)·8%·8%»<)¼?+³8(²8)¸>1ÁG:ÅK>ÅI?ÅI?ÊKDä`\å_^ðehþouýjtó]iðXeø]k÷^póbqçeoàloë}÷ŽŠó†ƒèrr÷qxÿ{„ôaqÞWeô‰‘þ´³Ï¯¢›„]lOCS8QT?jbU›ˆØÀ¾ðØØõßáÿô÷ÿúýÿýÿüûÿýüÿÿþÿÿúÿýøüùÿúòÿññýñöÿóúÿöùüñýýóÿÿôÿÿóùöç÷òßüúåÿþåýüàþýßÿÿçÿÿñûýðüüðWP@VO?UN>TM=SL<QJ:PI9PH;KC6KC6IA4H@3G?2F>1F>1F@4EB9DD8HF7MH5PJ4RJ3UJ4SK8VNCRMJXU`hh€z}¦ˆŽÂ˜Ö—žä£¤ô¤¡ø¢¢ú¡¢ü ¤ÿ¡§ÿ¢ªÿ¡¬û¡«ó ©î ¨é¤§êª©í­ªñ¯¨õª©÷ž©ø•§ñ’¡â•ŸÔ¥£Ë³ž½¶…›¶hx¼LZÐDOÞ>Jå;Dæ;Aã=?ß??Þ@=á=>ß=;ÜD?É:2ÊD9ÉE9º1'ÚKCåJFèDCè>Aè;?å;>à<=Ú<;Ò=9ÎA:Å?4ÈD8ÑMAÌJ=¾<.¸6(¼8+¼6*À6)Ç4*Ë2*Î/)Ó0)Ù2,Ü5-åA5Ý:+Ù2"Ý3$ã9(æ<+Ý8%Ó2Î3Ê4Ç5Æ5 Ç4"È5%Ì6'Î6(É1$Ì2&Ï5)Ð7)Ñ5(Ô4&×3'Û3&à2)å4.ë52ë33è.1æ,1ç,3ì-4õ.3ó+-î,*ë0+è6,â8+Ø7%Ð5#Ã.Á3æYHèZLÊ<2ÝNFÃ4.ØIEàPOÚKGÒC?Ë<6Æ7/Å7+Ç9+È<+Å<*À;(½:&»:%º;(º=)·=(µ<)²8)´<.¸>/¹?0»@1½?1À@3Ã@6îkaôohéb^ômjúppë`côhkÿqwÿmxùhuîemèchêehímlîqoîqmúxxÿ}ƒ÷[iûdwÝcpø¬®³«˜z‘t[sQIZ8LR8pnY£•ˆÉ¶¯êÕÒÿîíÿö÷ÿ÷øüøùûûýýþÿþÿÿþýÿùÿýíÿóåþéäûéíÿíøÿöûÿôýÿòÿýðÿýîÿûëÿúèÿøåþøâýùàýùàûúåþþôþÿúÿÿúXO@XO@WN?VM>TK<SJ;RI:RI:NE6MD5LC4JA2I@1I@1I@1H@3FD8GE9JG8NI6QK5SK4SK4SK8VNCSNKXU`hh‚z|£…‹¿•Ó“šÞŸžêŸðó›Ÿø¢ýž¥ýŸ©þ¡¬ûŸ«óŸªìŸ§æ¡¥å¥¤è¨¢ê¨ í£ ïž§ö™§î™¡ß¢›Î±˜¿½Œ©Âq„ÂTaÑDMá=Dê9?í8=é;=ã=?ÜA?ÛAAÝ<AÚ9?ÚACË;:ÌB?Ç@<À2.äPNäDFè?Dé:?æ7<à7:Û89Ó97Î:6ÒE>Æ=5Ã=4ËI<ÌL?ÁC5¶;,¶8)¹7)¼6*Ä6,É5+Ð3,Ö3.Þ52â92â>2Þ;,Ý6&ß5&á6%à6%Ú5"Ó4Ï9!Ê: Å9"Å:%É<*Ê;+Ë7+Ì4)Ò4+Ó5,Ñ7+Í7(È5#Ç4 Ê3 Ï4 Ø2$Ý1%ã0)æ/+æ,-å-/æ-2ç.3é+-ë--é0-å2+à4(Û5'Ô7$Ð9$Ò?+¸*ÓD3Î?1ØE;êWO×B>ÖB@åUTàRPÚKGÒC=Í=4É9.Ç8*Æ9(Ç<)Ã:'¿;&½<'¼=*º?-·@,´?-¯9+±;-¶>0¸>/¹>/¼>/ÀA2ÂC4ëi\ñndåbZðkfõolça`ðjiütxÿq{ÿoyöirë^déZ^îbcöpmûzuøvtÿw{øSdüZoâ]nõ¦©§¦’iŒkZtON`:PY:ttZ§žÔÄ·óàÚÿóïÿúùÿùøþúùûûùúþýùÿýùýüóÿ÷êÿòàÿèàüåèÿêòÿð÷ÿñüÿñÿÿðÿýìÿüìÿúèþõäùòßøòÜúôÞú÷äúúòûü÷þþüYPAYPAXO@WN?VM>UL=TK<TK<PG8OF7NE6MD5LC4LC4LC4KC6HD8HF9KH7OJ6QK5SK4VK5TL9VNASNKZWbiiy{¡„ˆ¸ŠÊ•Õ–™Ü˜˜à˜šç—œí™ ñ›£ôž§ö ©ôŸ©î¨æ›£á›¡Ýœ ßžášáš˜á“šÞ™šÚ¢•Í®Œ¾¾ªÆqÆYpÆCQÕBLß>Cã=?å==å<?â>?á>Aß>Cà=BÖ7;Õ?@Î@>ÉB>Á<7Ã83éXUßABä>Bå;>â8;Û78Ô65Î93Ë<4ÔG>Ä;1¼6+ÄB4ËL=ÅG8»=.µ8&¸6&¼7(Å7+Í6+Ô6-Û60å95æ=6Ö4)×5(Ù5)Ü6*Ü6(Ú7(Ú;(Õ>)Ã5½5¸6 ¸7"¼9'¼8)¼4(¾.#á?:â=7Ü>3Ô;-Ì9'È7"È7"Ì7!Ò7%Ô2#Ö0$Ù.&Û.*Û/-Û/-Ú.,Ú-)Þ1+Þ6-Ú6*Ô4$Ï4"Ï8%Ð=)ÓB/À2æUDÏ;/Ù@8ÝB=èJIïUSëUTåTQáPKÚJBÖC;Ï<2È9+Ä7&Ã7&¿8%»8$¸9&¹<(·>+³>,°=+®:+°<-²<.²=,µ=-¸>/½B3ÂD6Û[Pçg\âbYðlgôplåa_êefòkoàS\î^hõhqòekñ`eôegùroýyuøtrÿnuòI\øRhãZl¡œˆ`]UoHPe>Va?ux[«¤‘ßÒÂþíåÿ÷ðÿüøÿýùýþøûÿúûÿýùÿýùÿûñÿöæÿíÛýâÙ÷Ýßùàèûåîûçõüêüÿìüûéÿûèÿøæùðßòê×ïçÔóëØöðàüùðýü÷ÿþúZQBYPAYPAXO@WN?VM>VM>UL=SJ;RI:QH9PG8OF7OF7PG8OH8JF:KG;NH8QJ7SK6TL5VK5TL9VL@UMJZW`jh}xxš„±‡Á“͐•Ï“”Ô“–Ù”˜á•›ç—Ÿê™¡ê›£ë›¤å˜¢ß•ž×’›Ô“™Õ’–Õ’“֏Ð…„½„¶£}®¶t¢ÇfÐWxÓGaÔ<KÜAGßABÝB@ßA>à@@ã>Bæ=Dæ=Dâ?DÔ:<Ð>>ÏEBÆA<º61Ä=7éZVÜBBâ?Bæ=@á;=Ù99Ó97Í=5ËA7ÎD:Á9-¹3'¿;,ÅF7ÃF4¼=,·8%»7(Á9)É:,Ð:,Ù80à91æ:6é>7Ø1)×3*Ú3+Ý5,Þ6-Ý7+Ú:,Õ<,Ç8'Ã:'¿<*¾<,½;-¿;/Â<3Í:3ëC@í@:ã?6Ø</Í:(Ç9%Æ:#É;#Ï<(Î7$Î1"Ð0$Ô1(Õ2+Ò1)Î0'Ñ3(Ò4)Ð7)Î6(Ë5$È7"É;%Ì>(Ê<(Î=,ó]OßG<Ñ2,Ñ/,å??ùVWÿtrÿrnÿkeô_XåQGÔB5Ä5%¼/Ä8'¿8%»8&·:(µ<)µ=,±>,­<,¬;-¬<.®=-¯;,±<+´?.»A2¿E6ÉMAÞ_Váb[ñpkõsqä_`â]`ä]cÕIRæYbógpôgmöelúkoûssüvuùqsúdmðCWóJaåXk뒖•‚\zXQlCRi?ZhDw|\®ª‘æÛÉÿôçÿøïÿûñýýóüÿöùÿøùÿúöÿøñýóéûëØùÞÍòÑÊëÌÎëÍ×îÒÝïÕèòÚñöàóõàùöãûôâôëÚêáÐèÜÌíáÑñèÙüõíü÷ñþùó\P@[O?[O?ZN>YM=YM=XL<XL<VJ:VJ:UI9TH8TH8TH8UI9SJ;MG;LI:PJ:RK8UM8WL6WL6UM:UK?TMG[V\hevtt|~¥ƒ‡¶‰ÁÃŽ‘Ɛ’͐”Ô’•Ú“—Þ”˜ß“šÞ•Ü‘šÕ‹”ˇ‘ƃŒÃ‡Á}ƒÁ~·}tŸŽp”¦h¿]„ÕNváAcæ:Tè9HæAGàECÜGAÛHAßECæAEî<Hí=HãBGÔ>?Î@>ÎID¿?6²3*ÇB;àUPÚEAàBAâ?@à=>Ù>:ÔA:ÎD:ÌF;Å=1¿7)»6'¼9'¿=-½>+½<)¼;&À;*Ç;*Î<-Ó;-Û8/à8/å82ç:4æ95ã62á2/ä2.ç40ã5.Û3*Ò/&Ë1'É7*Ç:0Â8.À3,Â40Ë;:Û@>é9;ì63ã60×5*Ë5$Ä6"Ã8!Å:#É=&É8#Ê4#Í6%Ò9+Ô:.Ó9-Ï9+Ï=.È;*Å8'Ã8%Å:'Å='Ç<'Ç:&Í>-Ì9)×=1àB9Ô/+ã99ß12ß56Ô52Ö;6ÝD>äKCèQHèTHçUFãVEÉ@0Ã>-¾<,·<*¶>-³@.°?-®?.ª<-«=.¬=,«<+¬;+¯>.¶@2ºD6ÅLAÜbWàd\ïpjóqqâ`bå`eæ_föjuûoxöjsë`eð_fýlqÿyyÿzzúnq÷]iò@VôC]èUh䈍£”f€]YtI\vGgxNˆf²±•çáËÿúçÿüìÿþïüÿñûÿô÷ÿôôÿñëüêãñàØî×Åèǻ伹߸½ßºÅà¿ËãÃØæÌáëÒëïØòñÜ÷òßóêÙëßÏçÙÌêÜÏîâÖñçÞñèáòéâ[O?[O?ZN>ZN>YM=YM=YM=XL<XL<WK;VJ:VJ:VJ:VJ:WK;UL=OI;PJ<RK;UM:XM9XM7XM7XM9UL=TMEXTUc_mmkst“{}¤ƒ†±†‰´†Š·‰‹¾‹ÆŒÎŒÒŒÔÓŽ”Ò‰É‚‰¿{‚¶u|²ov­io©lj›€lˆ“cy­ZtÉPoâCcï6Uö4L÷8Gí@BäEAÜIAÚJBßGDçBHñ<Kï=KâCGÔBBËA>ÍID¸<2®4)ÉI@ÔKEÖB>Ø><Ú<;Ú<;Ø@;ÓD<ÌH<ÊH:¿7)À8(¾9(½8'»8&º9$¼;&À;(Ä;)É<*Ð=-×;,Û7-Þ6+ã5.ç60è21ç//é-.í//ó55ó76í53ä2.×.)Ö3.Õ62Ñ30Î.0Ò/4Û6=é<Bí06î02å2.Ú3*Ð4'È7$Æ9%Ç<%Ä7#È7$Í7&Ò;*Ù=0Ü@3ÝA5ØB4ÒF5ÉB/À;(¾9&À;(Ä=)Ä;(Å8&Ì;*Í7(Ì0$æC<Ú1.í?>â/2Ù)+Ô1,Ò5.Ô70Ó9/Ñ9.Ì5*Ä2#¾1 ÍD4ÆA2»<+´9)®9(­:(«:*©:)ª<-«=.«=,«=,«<+¬=,°?/µA2ÉSGßf]ßd]èkgînoåbgíjpônuùpzüs}ömuîcjöelÿsxÿx|ÿquúipõWeø@Xó?ZèPeá~ƒ«–ƒxŽhmˆ[nŠYx‹^Œ˜p³·–ÞÝÁøóÝûùäÿÿíûÿïõÿïîÿëåûäÚñ×ÐãÍÅÞÁ°Ö­¨Ó¦¨Ñ¥®Ó¨µÕ®¼Ö±ÇÚºÑàÁàæÌçéÓïíØñêØîâÔéÛÐêÙÏëÚÒèÛÒäÚÐä×ÏZN>ZN>YM=YM=YM=XL<XL<XL<XL<WK;WK;VJ:VJ:WK;XL<VM>QI<RJ=SL<VN;XM9YN8YM7XM9WN?ULCVPP^Y`c`qigqpyy|¢}§‚²ƒ„º††Äˆ‡É‰ˆÌˆ‰Ê†ˆÅ‚„½{~³ux«pq§kl¢efžlaŒ`q•V^°N[ÑJ[ì@V÷6Ký2Eþ9Cò>?èC=ßHAÜIBàFFçBHñ<Kî>KÝADÔFBÇB;ÇH?³:/°7,ÎPDÈC:Î=8Î63Í2.Ï61Ñ>7ÏE;ÆG8ÂC4¾9(Á:'À;(¾9&¼9%¼:$½;%Á:&Ä9&Ë:'Ò;*×:)Ü6*ß5(ä3+è3,è**î*,õ-0û03ÿ26ÿ47ÿ47ü58ø7:õ8<ñ8=ñ8>ó8C÷7Fú5Gû5Dó*4ó,1ë20á4.Ø5,Ð8*Î;+Î=*È5%Ï7)Ø:.Þ>2á=3â>4â>4ßA6ØK:ÍH7ÉA1Ã<)Á:'Â9'Å9(Ç8(Æ2$Ò8,à?7êC=Ý1/Ý--é56æ66Ø3-Õ7.Ö8/×;/Õ;/Ñ;-Í;.È:,ÔL>ËG8¾?0´9*®6&«7(©8*¨8*«;-¬<.­>-­>-«<+ª;*¬=,®=-ÀL?ÙcYÝd\çlgîpqéhlôqwüvígp÷oyøqxójqþmtÿwÿu|ùhoùclõSbþB[ô<XéLaÜuy´›‡Œžx}™i{—d€–gžs¬²ŽÊË«ßßÃçêÏíöÛäóÖÙíÑÍæÈÄßÀ»Öµ³Ì®ªÈ¤˜Â”“ÂŽ˜Ä‘¡É—§Ë¬ËŸµÎ¦¾Ò­ÎÙ¹×ÝÁäãÎìåÓíáÓëÚÐæÓÌãÐÉÝÏÆØÌÀÕǾYM=YM=YM=XL<XL<XL<XL<XL<XL<WK;WK;WK;WK;WK;XL<YM=SJ;TK<UL;XM;YN:YN8ZN8YL9[O?WMCVNKZTX\Yd`]pig}ppŠvw•xxœ|{§~~²‚½…ƒÄ†ƒÈ†ƒÆ}¼~zµyv«vr¥un¢sjŸpgžza‰„QZ˜IE²FFÒGNíCNø:Fþ6@þ:>õ==íA=æD?áFBäDFçBIî>Kë?KÕ?@ÐGAÂB9ÂH=¯9-°:.ÐTHÃ>5Ç82Æ.)Ã*%Æ.)Ë;3ËB8ÁC4¼?-¿<*Ã<)À;(¿:'¿;&Á=(À<'À8"Ä6"É6$Ñ8&Ø8(Ý5(à4(å3)é2*õ31þ68ÿ9:ÿ58ÿ-1ý(,ý(,ý*0û,2õ*0ò&/ö(4ÿ,=ÿ*?ü 9ó/õ!/ð'-ë-/ã/.Ú1,Ó2(Ð4(Ï5)Ó5*Ü:/æ>5ê=6é;4è71ç60á90ÝI=ÖJ;ÒF7Ë=/Ã5'Á4#Å6(Í9+Í5*Ó6-æC<ß82Þ20Ïë99ã62Ô1(Ñ3(Ð4(Î4(Í5(Ê6(Æ7)Ã7(áYKÖREÊH;½?1¶</´<.²<0³=1¯9-¯;,±=.°<-¬;+«:*«:*«:*°</ÐZPÛbZêpmówwëmqóqyõrzýz‚ÿˆÿy€òiq÷eoÿpyÿt|ÿltø^höO`ÿB\ö9UéG\Ùosºž‰˜¨ƒ‚žm|šd—e‹œo ¨³¹•Ç̬ÑÛ¹ËÙ¸ÀÔ±±Ë¨¤Âœ»•˜¶–²Œ±†‰´†µ~‹ºƒ•ÀŠ›ÃžÃ¨Å—°ÉŸ¾Î©ÉÒ³ØÚÂäßÌèßÐçÖÎàÍÇÚÇÁÓº˽´Å·®ZM<ZM<ZM<ZM<ZM<ZM<ZM<ZM<WJ9XK:XK:XK:YL;YL;YL;YM=XL>YM?YM=ZM<[N;[O9[O9\O<YM=WMAVMFWNOXQX[Ua^Zi_\ohe€mlŽroštr¤zv³‚}¿ˆ‚ÈŠƒÇ‡~½‰»‡z°ƒr¦†r§Œt¨‡l£†\‚HL¢B6´A:ÊC?Ý?>è:;ô<<ÿDCö=;ñ=<ì>=ê@AèBFéBJê@KãBJÏ?>Ã@8»?5´>2®=-°<-¾D7ÑLCÆ7/Ó;6Ì1-Ë2,ÓC:Ç?3³5&»>,¹6"À9%Ã<(Á:&À9%À9%Ã;%Æ;&Ë:'Ð9&Ô7&Õ2#Ø.Þ1#ë6+ô>3ô71õ4/÷0+÷,(ù'&ú&%û%'û%'û&*õ"(ø",ÿ'5ÿ(;ÿ"9ÿ4ý/ÿ'8ï#,ä#(ã+-Ü,,Ó*'Ò-)Ü5/á51ç51î64ñ54ò21ï/.í-,ä.*Û:2Ð9.Ð90êSJº#Â+ ÞE=È.$Ë.'Ü>5çD=à;5Ö/)Ù-)â51ä;4Ü>3Õ<.Í5(É3%Ê6*Í;.É;/Å9,êbVßYMÇC7¹7*º:/·9-±3'³5)¯1%¯1#®3$®4%¯7'¯:)±<+²>/°:.¹C9Ö]Uêpmðtvõw{úx€óq{þ|„ûx€÷tzût{ÿs}ÿr}ÿmxþhsü`mòJ[ÿ>Yü>Zã@UÓdj´•€’¡zj{™c}•c…–i“ už¦ž¨ƒ™§ƒ’£‰¢{€Ÿuxœpw›oyo s€¥r‚­uƒ³w‰¹}¼’½…—¾‡œ¾‹ ½¦»’®¼™½Ã§Î͸ÝÔÅàÒÉÜÇÂÓ¾»Ê»´¿²ª¸«£YL;YL;YL;YL;YL;YL;YL;YL;WJ9XK:XK:XK:YL;YL;YL;ZM<YL<ZM=ZM<ZM<[N;[O9[O9\O<[P>[OAXNEXOJYPQ\SX]V]^Xda\sgb‚lgnjœuo«zº†ɀ́¿}·u©‹mŸ—q¤ t§šjœ˜Vz=A§5*°6)Á=1Ð@7Ü?6ç>7ò@<ô<:ó;;ò;?ð=Aí@FèAHá@HØAFÅ;8¼<3´;0¯;.ª9)®:+¼B5ÎJ>×G?ÛB=Ô72Ð50ÔA9ÌB7¹:+±4"»8$Á:&Ä=)Ã<(Ã<(Ä;(Æ;&È:&Î8'Î5#Ó3#Ù3%á5'é9,î9.ï9.å0%æ0%ê.#î,#ô+$ø+&ý+*ÿ-.ú),÷$*ù#-ÿ&4ÿ%9ÿ5ÿ0ü.õ.ö.9ô2;ä+1Ú'+Ü.0Þ01Û+-ð9;ó6:õ47õ13ö/2÷/1ø23ó76Ú1,Ú<3Ö71ãD>âE>Í0)Ï2-Á"Ó4.Þ?9æD?à=6Ö3,Ö2)Ý6.Ü90Ò:,Ì9)È6'Ç5&È9+Ê<0È</Ä:/ìbWàXLÈ?5¹3(À:/Á;0¼6+Á8.¾4*¾4)½5)º4(·5'´6'±6&¯7'´<.»B7ÓZRçmjíqsõx|û|ƒ÷uþ|„úx€ùv|üu|ÿs~ÿp{ÿjuüdpû_lôI\ÿ=Yÿ>[éCYÒdg­vˆ—nx”at’\vŽ\}‘`ˆ•iŒ™nŠ–nƒ“lp‡]kˆ\h‰\fŒ[j_p–cv›hy i­r‚²t‡·y»}½‘½€•¼ƒš½‡š´…¡³‹¬·—¼¾¦ÍÆ´ÕǼÔÁ»Î»·Ä¶³¸­©°¥£YL;YL;YL;YL;YL;YL;YL;YL;WJ9XK:XK:XK:YL;YL;YL;ZM<YL<\L=\L<\L<]N;]N;[O9\O<_RA]QA\PDYOEYNJYOM[QP[QY\SfbYvf^…jb‘pk£}w³†}¾ˆ~»—…¿˜€´šv¦žpž­t¡ºw£´h–°Rt¸<D»5,¹5)¿;.ÈC4ÒD8ÚB7ä?9ð><õ;>÷:@õ<Bï@Gå@FÙ@CÎ@?¿:3¶:.¯9+«:*¨9(¬8)»@1ÍG<×D=Õ:5×84Ù<7ÛD;ÝOCÍI:³4#¾9&À9&Â;(Å>+Ç>+È=*É:)Ê9&Ï8'Ï2#Ó0!ß5(ë=2ò@6ð91è3*Þ4%Þ4%á1"å/"é-$í,%ð*'ñ+(ò,+î'*ò&/÷'4ü%7û4û2÷1ó$6ñ/:ñ6?ð8@ï;Dí<Dé6<â-4ï29ò09ó.7õ.5ô-2õ.1ó/0ì31Û/+à=8Ù53ß;9ÿliäB?Ñ/-Î/+Ù:6Þ?9àA;Ú<3Õ7.Ó5*Ó5*Ï6(Å6%Â7$Â6%Â9)Ä:-Ä<.Å;0Å;1ê`VÞTJÈ;2¿0(É:2Ï?6Î;3Ñ=3Ö=5×=5Ô=4Ï<2É;/Á9+»7(µ7(·;/»A6ÏTMãhcëooöy}ÿ€‡ú{‚û|ƒûyûx€þw~ÿtÿn{þfsú_mù[jôG[ÿ;Wÿ>YìFZÍ_`ev…Zh„Qf„Nl„Rs‡Vy‰\{ˆ]u„[oW`zM_P`„TeYl”_t›dzŸi|£j­pƒ±q‡µuŠ¸x‹¹yŒºzŽº}’¹€’°|”«}š¨„¨¬‘¹´ Å¹«Ê¹±È¶´¶¬«§¡¡—™YL;YL;YL;YL;YL;YL;YL;YL;WJ9XK:XK:XK:YL;YL;YL;\L<[K<\L<\L<\L<]N;]N;[O9\P:^Q>]P?[O?ZN@ZMD[NF\OIZNN^P_`TldY{f^‡oh›zu­ƒ{´…z²˜„·¤„³¯€ª·y ÆyÐt™Êd‹ÆNjÙEQÝB@ÓC;ÍD:ËG:ÌF:ÓE9ÜB:ì@>ô=Aù<C÷>FïBFâBDÒ@@Æ?;¾>5µ;.®:+«:*©:)­9*¼>0ÌC9Ô=6Ó40ß;9åA?ßE=çTJÞVH¾:+¿:)¾7$¾7$Ä;)É=,É<*Ë:)Î8)Í4&Ñ1#Ø2&ä8,ð?5ó>5í5-ã.%ß5(Ü6(ß3'ã1'æ.&é,(ë+(ë+*ê)*è',ê&0ò*7õ';ö"9ú ;ú$>û6Gâ(5å/;ÿP[ÿ`lÿO[ñ=Hñ9Eè*6í+6ð.7ó19ð16é/0á+(Ö)#Õ0*Ø61Ó1.äB?ÿspõVSÜ=:åGDÚ<9Ù<7Ô:2Ò8.Ð8-Ï7*Ì4&Ä3"¼5!¹6"»8&¾:+¿;,¾:-À:/Ä;1ãYOÚMDÊ70Å0)Õ<6ÜB:Ù<5Þ;4æ=8ç>7ä@7à?5Ø>2Ï=0Ç;,¿:+¹9,º<0ËOGßd_ëooø|~ÿ„ˆü}„÷x÷xúx€ÿx‚ÿtÿmzýerù^löUeòEYý8Tþ<WíG[ÅYYŠlRbsGZuB[xBbzHkNqTs‚Wq‚VlVk†Yk[n”atœg{¤l¨n‚©pƒ«o„®o„°qˆµtˆ¶u†·u†·uŠ¸wŽ¸y³{«z‘£{š£„««“¼³¢Å·®Å·´±«­ ž£•’™ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8YJ7ZK8ZK8ZK8[L9[L9[L9\M:\K;]L<]L<]L:]N;]N;]N9^O:\O<[N;[N=[N=\O?]OB]OB^OJ`OYbQdaTncY{jeŽur¡~x¨u£”}§«…ªÂ‰©Î‚žÙv’Þl†ÚYuÙE]òCTùEPïJNÞHGÏF>ÊD;ÏE;ÙD>çCBïACô?DõBHëEGÞDBÍB=Â?7¼@4µ=/¬;+ª;*©:'­8'º:-Ë>5àE@à<:ëB?éA>Ü:5ãJBæXLÓK=È@0½8'º2"À7%Ç;*É:*Ë7)Ï7)Í1$Õ3(ß7.é;2í:3í60ê/*ä+&á0*à1,ç1.ë20ò12õ26ö37ö5:ë,4ê,6ì-<ð/@ó-Dô)Fü+Kÿ4Qó8Iä2>ïAKÿ`jÿkwÿWdôDQô?Ní4Bì4@ì4>ç6<â88Ú72Í3)Æ/$Ê3*Å.%Ï81ñZSö]XñWUÛA?àHEÛC@Ó;6Í6/Í6+Í9-Í9+É6&Â5#¶8"±8#³:%¸=+¶;+µ7)»9,Ä>5ÙOE×G?Í6/Î1,ß>6åA8á81å61ê1/ë2/ë51æ81à90Ø:/Î:,Ç;,¼6*º8+ÈI@Ü_Yênlù}}ÿ†ˆû~‚óv|öw~úx€ÿx‚ÿs€ÿl{üdsù^nñRdóE\ý8Tþ=XîK\¾URz_BVg:Wr?[vCdzIl~Nr‚Uu„Yx‰]w_w”dušgx k{¦n«p‚«oƒ«o„ªm…­o‡¯q‡´s‡µt„¶q…·r†¸s‹¸wŽ¶zŒ­x¤v–¤€ª­’¾¹¥ÍÁµÏÄÀ»¹¾¨«´™›§ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8YJ7ZK8ZK8ZK8[L9[L9[L9\M:]J;^K<]L:]L:^M;]N;]N9^O:ZM:[N;[N;\O<_P=`Q>`P@`OEbNPbNZ_Pc_Upga…qo–ytœ|r—u•­‚Ê‹ Ú“ãsƒêctçPcè=Pø4Jÿ;PûERèCJÔ@@Ë@=ÏD?ÖE@âDCèBDîAEíCFåEEÙD@ÊA9¿?4¹?2°:,©8(¨9(¨9&­8'»9,É:2çJEè@?ê<;ã75Ø3/ÞA:íZPôh[ÙQAÆA0½5%À7'Ç9+Ç8(Ê6(Ð8+Ï1&Ù5+å:2é;4é3/ç.)è-(ë.,ò46õ49ø3<û2<ý1<þ0=ü0<ù1>ô/@ò1Bó3Hô4Mõ0Nô-Nþ2Wÿ=^å2EøNYÿakÿ_kþVcùQ`òFVç8Gñ@Pè9FÛ29Ð02È3-Â8-½;+º;(Â:,¿3&ØKAùjbÛLFáPKÌ;6Í<7ßKGÔA:Ê7/Ê8+Í;,Í<+Ë:'Ã:'´<$«;%¬<&±>+¯:)®4%¹9.ÉD;ÔG@×D=Ñ61Ö3.å<7ê<5æ3.ì1,ñ+*ó++ñ/-ì3.è7/à90Ø</Ñ=/Â6)º4)ÃC:×XQæieø|zÿ††ü€‚ôw{öyûyÿx‚ÿq~ÿixúbqø]mïPbõI_ý;Vý?YðO_¹RMoX8Re7\tBa|Il‚Qt…Xz‡\|‹`€‘e€—i~›k{ l|¤o|¨m~ªm«l‚ªlƒ©l…«n†®p‡±q…²o„³o‚´m„¶o†¸sŠ´u‡­t‹¥u˜¨¯¶—ÅîÖ;ÚÑÌÁÁÉ«±¿š ®ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8YJ7ZK8ZK8ZK8[L9[L9[L9\M:]J9^K:^K:]L:^M;^M;]N9^O:^O:^O:_P;`Q<aP<aP<aP<`N@cNKcNU`P]^Ujgc~rq‘zw–}sŽŽvŒ­’̆‘ß~‡îqyøbk÷PZ÷;Jþ)Eÿ2Nÿ;Rñ<MÞ=EÕ@DÓCCÓAAÜCEàBCáAAáCBÜD?ÒC;Æ@5¾>1·=0­9*§8%©:'«<)³;+À</Î>5ß=:ä88ã11ß//Þ52Û94åNCûm_ñgZ×RCÅ=/Ã:*È:,É7*Ì5*Ó9-Õ4*Û7.å:3è71è1-ç.+ì0.õ13ÿ3Aÿ0Bÿ-Cÿ*Aÿ&>ÿ":ü 9ö 8÷'Aô*Dõ.Kõ/Pó+Pô)Tÿ2`ÿBhñDWÿ_jÿhr÷WcìJYðK[êCTØ1Bè@OÛ7BÇ,0¸('¯/$«8%¥?&¨@'·@,¹:)ÜZLà\PÌF=ÌC;À71Æ;6âUNÔE=È:0Ç8*É:*È9(Ç9%Ä='°>&¦<$¦<&ª=)©8&«3%»<3ÑLCÔG@ÚE?Ø95Ú3-ç83ë51ê/(ó0,ü,,ý+,ú.-ô1-í5-æ8/Þ<1×?2É9.¼4(¿<2ÎOFßb\öywÿ‡‡ÿƒ…ø{ù|‚ý{ƒÿwÿn~ýdvö]oôYkëL^ôH^û9Tú<VëM[±KFfR1Oe4\tBf~LuˆZ}Žaƒe…’g‡˜l‡žp‡¤t‚§s§r|¨m}©l©j©kƒªk…©l„ªm…­n‚¯l°l€²kµm„¸p‡´s†®rŒ¨wš¬„²¹šÇÅ®ÖͼØÑË»¿Ê¤¬¿’š­YJ7YJ7YJ7YJ7YJ7YJ7YJ7YJ7YJ7ZK8ZK8ZK8[L9[L9[L9]L:\K9^K:]L<]L:^M;]N;]N;\P:^R<`Q<aR=aR=aP<`O;_N:`M>gNIgNRbQ[`Whiguv”{|›y•{”©‚•Æ„Ú}…ïtwýklýYZûCKü1Gÿ4Qÿ<Tó<Nç?LãCKÝBFÖ=?ÛACÛ?@Û@>ÛA?ÖC<ÏB9Å?4¼>0´>0«:*¨9&«<)³@.»@1ÉA5×B;Ø63à74Þ/,á2/ä;4Ó0'Î5'éUGÿwié[OÐB6Ê<.Ê;-É7*Ê6*Ô:.Ù8.Þ7/ã62æ21é/0ì01ò46û4;ý&6ÿ"6ÿ"9ÿ#7ÿ 5ÿ5ý3ø4ï2í8ï$Aï(Gí%Ið(Mý3[ÿFhÿbsý`iôW`ðPZðP\îLYá?LÒ2<Ù>DË68º0-®/&¥6%¡=&›?&œ>$¨9&²8)ÑUIº:/Ã>7»4.»2,ÍB=ãXQÕH?Æ9/Ä6*Ä7&Ã4#Å4!Â;'±<(¦9$§8%ª9'¬4$­1%Â?5ÛRJØIAßHAÝ;6Û2-ç40ì1,î-(ù2/ü,,ü*-ø*,ó,)ë.(ã1'Ü4)Õ7+Ð>1½3&»9,ÇH?Ø]Vñwtÿ‰ˆÿ†ˆû€…ý€†ÿ}…ÿvÿj|þ_s÷XlõShêDZôC]õ7Sñ;TßMZ¢H@\L+Lb3To<a|GtŒZ€–e†™k‡šlˆ p‰¥t‹«y…¬w€©q{§l|¦g|¦f¦g¦g€¦i©k«l€­l~­g~°g€´jƒµlˆ·qˆ°q‹¬w™°„¬º—¿Å©ËʵÊÌÁ´½Äžªº‹—§[J6[J6[J6[J6[J6[J6[J6[J6\K7\K7\K7\K7\K7\K7\K7\K9[J:\K;\L=ZM<YL;YN:\Q=^S?^S?_R?^Q>aR?cRBbQAbO@bL?iMIfLMdPYf[lkk…v{›€…«‡‰°Ž„§¢‡¦¼ˆŸÎ‚ây~ðsoùkaÿ`\öKTð=Pé:Ië?MíCPç@Jä=Eã>Dá<BÜ<>Ú<;Õ=:Ï?7È>3À</¶;+§6$­B.«<)«8&½B3ÊF:Ì>4Ï60Ù72Û60Ý6.Ü6*Ü9*Ü;)×;%Ó6%Ù;0ïRIñZO×E8Æ7)Ì=-ÓA2Ò:-Ô3)ðGBà..î5:ò5;î,5ÿALï'2ù&/ÿ(0ÿ'1ù&û"*ÿ&.þ'.ð$î)÷+6é!.ì&7ñ-Aä":ÿWqç-Dÿ^lþ^hü[cùU\óPUéHMÚ>?Í84É>7ÄB5·?/¥8$™2—7!š;'œ;(§9,®7/ÂE?¿;7¼2/Ä64Ä51ÓD@ÛLFØKBÑD:Ç9-Ã3(Ç5(É5'Ä6(º<-±9)­/!©)²,!Á9-ÐC9×G>ãPFÝD<Ù80Û0)å/+ñ10ù11û11÷/1õ/0õ/0ó0.ð0+ë3+ä6+Û9,Ô>0¾2#¾?0·=0Õ^VèrnÿŽŒø„„û‡Šÿ„Œÿx…ÿhzÿ_wÿ[uþTnøLf÷Dað9Wó:Yå?WÚ\gŠ?9UG,L^4Xu?g†Kw–]}œc|f€¡j‚¥m§n¦m~§m|¥i{¥f{£e|£d|£d}¤e}¥i~§m~ªk¬k~­g~®d~±d‚²f†´k‰´oŠ°uŽ®|˜³ˆ§½™±Á¤²Á®¤³°‹˜žw„Š\K7\K7\K7\K7\K7\K7\K7\K7]L8]L8]L8]L8]L8]L8]L8]L:\K;]L<^N>\O>ZO=ZO=ZR?[S@\TA]QA\P@\NA_ND`OEbPFcOHgNJfMPcS]f^snoŽv}§}‡¸…‹¿œ•É¡‹¹ª~¡¸wÓy‚ê}zôvjõh^ø]aðNYëEQïHRöJVôFPí?Hê=Cã:?Þ:;Ù;:Ô<7Î@6Ç?3¿=/¶=,©<(©>*¨9&¯:)¿A3Ã=2Æ7/Ô=6Ò5.Õ4*×5*×5&Ø7%Ú9%Û;%Ú9%Ü5,æA;éLCàJ<Ð>/Æ7&Ê9(Ó;-Ü;1õIEä01ð6;õ6>ð.9ÿ?Mî&1ó$*ö%(ú(+ü,,û++õ''ö((÷+*î#&ì%(ê%,ý;DÙ'î4Bû@Q×%5ÿjuÿcjü[aûX]óNRåBC×<8Ñ>6½7+¾C3¹I5©>*—0*Ž(’(œ-$°;4ÐSOÌHDÁ54Æ44Ð<:êVT×FAÕF>ÏA7Æ8.Å2(É5+Ï7,Ì8.Æ<2Â91Á4+Ã3+È3,Ï81×@7ÞE=âJ?Ý@7Ù6-Ü1*æ0,ñ10ø00ú00ô02ò01ó/0ó0.ñ/-í2+ç5+Þ8,Ö>0¾2#¼=.µ=/Ó^Uévqÿ‘÷……÷„‡ý‰ÿuƒÿfyÿ\vÿXtÿQnýIfû?`ó:Zñ@]áI^ÀR[{?7RF.L^6Yv@e‡Jr–Yu›^wby¡e|¥i}¦j|¥i{¤hz¤ey£d{¢c{¢c}¡a{¢cz£gz¦k|¨i}ªg|«c|¬`~¯`€±b‚°e†±j‡¯p†¬s‰ª{Ž­„©†Œ£‰v‡aooO]]\K7\K7\K7\K7\K7\K7\K7\K7]L8]L8]L8]L8]L8]L8]L8]L8]L:_N<`Q>^Q@[P>ZO=YP?ZQB[RC[QEZPG\OI]OL`QNeSSgUUgUUfW\f^mkhƒrw¡y‚¹ŠËˆÑ–‘ÔŸ‹È©‚±¹}ŸÏ‘䄆í~uñriúegõT\íGQïENöHRöFPòAKí>Eâ7=Þ8:Ø:9Ò=7Ì?5Ä@3½@.¶>-­>+¨=+©:)·C4ÅI=¾</º1'È:0Ë7-Ð6,Ó7+Õ8)Ø6'Ø7#Ú:$Ý:)Ü5,Ù4.ßA6çOBÛG9Ç4$È2#Ø?1á?4õHBç10ò59ø6?÷3?ÿDPø3=ï(-ê$%ë#%÷10ô.-í'&ë%$ô01ê()õ79å+0à(0é4=ÿR^î<Jÿ[fý]eñVZîOTîMRèEHÜ9:Õ74Ó>8À6,À>1¼B3²>/©8*¥3(Ÿ-#›&š%§0*ÅEBÁ;8»-,Å33ÖB@ó_]Ð?:ÑB:ÐB8Ê<2Ì9/Ô=4Ú@6ÛA9Û@;Ø=9Ý>;ãA?â><Þ:8ã@;ëIDàB9Ü;3Ø5,Ú1*ä1,î20õ20ö0/ó12ò01ó/0ó0.ñ/-í2+ç5+Þ8,×?1À4%¸9*´<.Î\Rëztÿ“ù‡‡ù‚†û~†þrÿewÿ[tÿTqÿNlÿFeú<^ó<[îEbßTg¡AEp<1QJ0Pb:]xCfˆLp”Wr˜[tš_wŸcy¢fz£gy£dx¢cw¡bw¡bxŸ`y a{Ÿay az¢fz£gz§f{¨cy©_zª^|­^~¯`„²g‰´mŠ²s†¬s„¦t€Ÿux’mo†lRdX@OL2A>]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8\L5^N7`O;`Q>\O>ZN>XO@XPEXOFZSM^TSaWXeY]gZak[ek^glaikeonl‚rtšw~´~‡Ê„‹Ù‹ŽßŒ‡×œ‹Ñ«ŠÁ³§¼xËz€Ü‚yðvþrqÿcföU[óLSóHPóDKò>Gí<Bä5:ß77Ø96Ò=6ËA6ÄB4¼A/µ@.«:(­>-¯>.»G8ËSEÅI=¹6,·1&Æ9/Ì9/Ò<.Ù=0Ú:*Ø7%Ù6#Þ8(Ý5,Ù2*ß>4êNAãK=Ñ;*Ð7'ßB3Ý6-î@9è0.ï13õ3;û7AÿNXÿKTý>Cð45á%$ë/.é-,î21æ*)è/-ñ99Ñå26Ü-2æ9?ïFMÿX`ñMT×<@ÚACÞBEÞ>@Þ:;Ü89Ø96Ò94É91Á80¼9/ÀA8ËOGÕVPÐNLÃC@²72´92ÍLGØNKâRQîZXèRQïZVÈ5.Í=4ÑA8Ï?4Ò>4ØA8ÝC9â@;è>>ç9;ì9=ñ>Bî;>è58ë=<öMHÝ82Ù6-Ø4+Û3*â4-ë40ð3/ò0.ó12ó/1õ/0õ/.ò/-î1+é4+ß7,Ù@2Â6'´5&²:,ÈVLî}wÿ“û‡ˆü†üz„ûo|ûbtþXpÿOlÿFfÿ@bø9[ó>]éKd×Zh„32e;-UO5VhBa|IiˆMq“Ws—Zr˜]vžby¡ex¢cv av auŸ`vž`wž_xŸ`{Ÿay ay¡cz¤ez§dz§bx¨^yª[z«Z|­\„³e‰´l‹´rŠ­s…¥sœpsfhbPbTCRK7F?]N9]N9]N9]N9]N9]N9]N9]N9]N9]N9]N9]N9]N9]N9]N9^N7\L3]M3_O6^O:[N=WN?WODWPHVQN]WYg`hnfsphwpgxrfzqh{ol}oq†rw—u|ªyÀ‡Ò„ŠÞŠŒã“ŽâšŒ× „¿›užŸl¬nq¼vlÑuhñrlÿljÿgiþ^`úQVõFKï<Bé6:æ69á77Û96Ô=6Í@6ÅC3¼A/·?.¬8)´@3¯;.°</ÆREÓ]QÇMB¹;/¼:-À8,É:,Õ?1Û>/Ø6'Ú4&à6)ß2+á6/â>5âD8áE8ÜC3Ý@1á?0Û0&é61ê1.ï./ð-1ù6<ÿPXÿ\bÿZ^ÿORá.1â/2Þ,,öDDç55Û++Û-,ê>>Û33×34ÿcfÞ=BÊ+0Ê.2À')Ô::àBCÚ7:Ø24ß9;Ü89Í/.Ë30Í<9ÒGDÝSQëaaõilôeké^aÂA<ÅF?ã^Y÷igÿppÿkjãKJÖ>;Ç4-Î>5ÕE<ÓC8Ó?5Õ=2×:1Û30î5:ð,6ð,6ñ-7ï,4ë,3ò:<ýKIÞ2.Ú3+Ø4+Û4+á4-ç4-ì3.ñ1.ô02õ.1ö.0ö..ô.-ð0+é4+ß7,Ø?1Æ8*´2$±9+ÀNDí|vÿ’Žþ‡‰ÿ‰þv„øk{øatûUmýIfÿ=aÿ:_ø6YñB_ÝNbÁV^p/)\?-YW>]mHe}MkŠQq’Ys–\s™^wŸcy¡cx btž_tž^u^u^wž_xŸ`{Ÿaz¡by¡bz¤dz§bz¨`y§\y¨Z{ªZ|«[«^ƒ®f†¯k†ªm…¦qƒ rz•lq‰idwcZj]QaT^O:^O:^O:^O:^O:^O:^O:^O:]N9]N9]N9]N9]N9]N9]N9]N7^M3_N2_O6]N9ZN>XNBYRLYSS^Y_ebmrnyt‹{u‘yr‘wp‘tp‘ps’pyšt}¨v€µy‚Ã}…΃ˆÙŠ‹Û“Ù—‹Ë™†´ ƒ¡©ƒŽ«~yžkZšQ>ºNAÓPHáVQëWUðRQðJLí?Aå58è8:å99Þ:8Ø=8ÑA6ÈC4ÀA0º?/³9,¹@5«5)¢.!¼J?×g[ÖeWÉUH·=0µ5(¾4'Í;.Ø</Ø6)Ü4)â6,Þ0)ä70â;2Ü:-Ü?0âE4ãC3Þ8*ß1&é4-ï2.î.-ì*+ó25ÿEJÿUZÿ^`ÿ[^á8;á8;Ô,,òJJâ89×//Ú44Ì('Ê'(öXYÚ>AÔ:<Ð7:¸!Ä()Û;;æAEÝ47Ö,/Û14Ù03Ì'+Ñ34ãJLôbcùhköciõ_hù_k÷ajÔMJÓNGç\YçVSæNMáGEÎ31Í4/ÖA:ÞKCãPFÝJ@ØA6Õ;1Ô6-Ú.,ô19ú,9ø*7õ'3ô(3÷07ü>@ÿJIß0+Û2+Û4+Û4+á5+æ5-ì3.ð3/ô01ö.1ø-0ø..õ--ñ0+ê3+á6,Ù=0É;-³1#±7*¹D;êysÿ‹ÿˆŠÿ{‡þp€õgwõ^qøRjùEbÿ9^ÿ7]ù7ZïGaÏO^¡HJd4(VD.ZZ@^lIe|NkˆRs‘[t•\wš`xžcx bu^rœ\rœ\s›\s›\uœ]wž_zž`y ay¡bz¤b{¦_y§^x¦[x§Wy¨W{ªY}©\‚¬b…¬i†ªl‡¨qˆ¥uƒžs|”pu‰pm~kevd^O<^O<^O<^O<^O<^O<^O<^O<]N;]N;]N;]N;]N;]N;]N;]N7`O5aP4`P7\O<YPAZQJ\VV_Zaiguqo„|y–~ ~|£zw¢xu¢uv¤rx¨r~°w¶zƒ¼}…ÈȊŽÍ’“Ì—“Å –»«œ³»¦¯É­©Ä¥“ªˆl™fIœE1¬A1¶@4ÂB9ÖIBèNLíIHê@Aê<=è<<ã;:ß=:×@7ÏA5Æ>0À</º:/»?5®5,¦1'ºJ>ÎbUÓgZÓeVÃO@¸>/¸4'Æ8,Ó9-×5*Þ3+ä6/à2+â5.ß7.Ü8,à>1åC4â?0Ý3$å5(è0(ï2.ï--í-,ñ01ò67öBEöMPûX[éFIôQT×47éFIÜ89äABÚ78Î.0Ï12âHJÑ8:Í78·"$Ô;=Û89ã;;é<@ç6<à-3Ý*0á29ç<BïLQú^bÿlqÿipù_iôZf÷Ygö\fócbè[TçVS×?<Ì1/Ò41Ñ30ÞC>âKDæRHèTJßKAØ@5×:1Ù80á32ó.7ÿ0=ÿ/<ú*7û-9ÿ:BÿBEúBBà1,Ü3,Ü5,Ý5,à4*å4,ì4,ñ40ô01ö.1ø-0ø..õ--ñ0+ê3+á6,×;.Í>0µ1$±7*²;3çtoýŠ‡ÿ†Šÿvƒüi{òarò[n÷PjùBaÿ8^ÿ8_ù;_êLe¾NZ~86]=.LF.UX=ZeCcyKh„QrŽ[v”^wš`yŸdwŸarš[o™Yo™Wp™WqšXr›Yt[wž_xŸ`w ^x£^y¤]y¥Zv¥Wu¤Tw¦Ux§V}©\‚¬b„«h„¨j„¥n…£qšmx‘ip„hj~cbv]_P=_P=_P=_P=_P=_P=_P=_P=^O<^O<^O<^O<^O<^O<^O<^O:bR9aQ7`Q<[O?ZPG\TR_Za`^lsq‡yx—€€¦‚®}}¯xx®wv¯vx³t|»w€Á{…ˆÁ…ŠÁŒ¿•—¾¹«§¸´­´¾³¯Æ¹©Ð¾¦ÕÁ Ñ»”Ьˆ¯oS®\F¤G5¨>0ÁG<ßVPïWTðNLè@@ç==æ<<â=;Ý>:Ö=5Ï;1Ç9/Á80À=5¶:2·>5ÀPDÅYLÅ^OÏeWÒdUÂL>º:-Ã9,Ð9.Ö3*Ý2+ä6/ä6/à3,Þ6-ã=1æB6åB3á:*Þ2$é4)å-#í0*ï/,ï20ð42æ.,â22æBCôX[óWZÿosãDHèGLÞ=BûZ_ÿptÝ>Bÿ}€ÿnpêQSÓ=>Å02Õ;=òHKí:>ï8=÷>Dò7@ç.6ô?Hÿ]gÿltÿjrþck÷]eù]hþ`nþ]löYbò\]âOHáIFÒ73Ð1.×52Ò0-Ø93ÞD<áJAáJA×@7Ò5,Õ4,Ü71æ74è&.ù0:þ2=÷+4ù08ÿ>Bý?Aë52à3-Þ6-Ý6-Ý5*á3*å2+ì4,ô41ö01ö.1ø-0ø..õ--ò/+ë3+â6,×9-Î?1µ1$±5)¯6.çplý‡…ÿ‡‹ÿoücwñ\oòYm÷OiûBbÿ9_ÿ<aõ@aäRg°QWd/'VE1FF,QU:T_?btJgQq‹[v’_x™bzžav^p™Wm–Tl–Tn—Uo˜Vp™Wr›Yuœ]v\vŸ[w Zw¢Zw£Xt£Us¢Ru¤Tv¥Ux¤W}§]~¥b~ c}›ez–crŒ]hW`vRZoPRgHYL;[N=^Q@^Q@]P?\O>]P?^Q@_RA\O>ZM<[N=^Q@`SB_RA`Q>cT?_P;]P@\RHZPNZSZd`orp†wx—y{¢~€°€·}ºwy¶qr²lo´rxÆv|Ê„ȈÅ•˜Á¤¤¼±°¸¼¹²ÊŲÑÌ®×ϪØϤÛТÜРÙ˚ØÁ•Ñ¨ˆ¶€f ^FÊyfÕtd¼G>ÖQLöb`èJIã?>á99â:9à;9Ú64Ø61Õ:5É91¿6.ÅB:¸<2¾I?ÄXK¶O@ÌeVÏdTÉXHÃH9Ã;/Ê6,×6,ß6/á4.Þ1+Ý2+ß7.ã=1à<0Ú7(Ü5%ã7)ä/$ç/%ì/)î1+î3.ë40ç4/â62Ö66åKMóX\ëOS×8=Õ6;øW]ÿ~„ÿ~ƒÿpuÿhlÿimÿgjó]_ïY[ú[_ÿVZÿCJú8@ú6@ö2>ì,7ë0;ð<GýS\ÿ`hÿhpÿemýalÿbmÿ`lü\dìRRÞE?Õ74Õ30Ö1/Ô/+×2.Û94àC<ÛC8×?4Ô<1×90Ú70ß82ä73ï6;ò5;ó49ö5:ô5:ñ56ê22ã1-Þ3,Ü5,Ý6-à5+ã2*è1)ï2,ô20÷12÷/2ù.1ù//ö..ó0,ì4,ã7-Þ@4Ë9,¼6*®0$·<5ÒXUÿ˜—þ{ÿlÿd{ö]rðPhôGcûBbÿ;bù9^ôMkÒQc‰==P,B:%BH.IM4W`AZjCaxJo‰Yw“`w•_s•Yp—Xo˜Vl•Sl•Qm–Ro˜TqšXr›Yt›\uœ[r›UsUtŸWu¡Vv¢Uu¡Tt SsŸRz¤Xy£[wž[t–Yl‹Rb~KWqBQk>?X0=V/:S,YL;[N=]P?^Q@]P?]P?]P?_RA`SB^Q@\O>]P?`SBaTC`SB^Q@dWF_QD\OF^TR`Z^c_mmkuv•vy¢y}­}€·x|¹quµkp²lo´mq»uwÎ{}҇ŠÑ–˜Ë§¦Å¸·¿ÇŸÒÏ°ÚÖ©àÛ¤äÞ¤ãÝ¡äÛ¢âÙ¢ÛўÔǚһ™»œ€¦|cċwÉ}m³UIÑ`XõqmìZZæJKâ>?æ>>é?@å;;à66Û64Õ<7È72Ê?8·7.¶=2¼L>´H;Æ\LÚo_ÒaQÄI:º4(Ã0&Ó5,Ü5-Ú/(à40â94ä=5à<2Û7-Ù5)Ý5(â5'ë6+ì4*ì1*é1)æ1*ã2*à3,Ú5/Ó86×>@ãJLòW[ù]aû\aü\dÿ`hòRZíMUïRYü_fÿjpÿgjóZ]ñNSì38ð+2ó+5õ-7ï'4ä ,å&5ì6BûPYþZcÿcjÿemÿemÿblüZeõQXäEBÜ=7Ù53Ý52ß63Û2/Ú2/Ù6/Ý?6Ø>4Ö<2Ô:0Ù80Ý82â94ç:6è88é69ë78í68î87ê65å3/Þ1+Ü4+Ü5,Ý7+à5+ã2(é1)ð0+õ20õ12ö/2÷/1÷//ö..ó0,ì4,ä6-Þ>2Ò>2Â8-µ2(¶71Ø[Yÿúszþeyÿ]uøVnöOiùFdù@`ú<`óAaäNg¹KVx;6K1 ;9":C(EG/OS8Vd@^sHlƒUt\v“]q“Wo“Um”Qi’Nj“Ok”Pm–Rn—Up™Wr™ZsšYsœVsUtžVsŸTsŸTržQpœOo›Pm—Mk“MhŒL`‚EWv=Lh5D^.>X)=V,>W-?X.XK:[N=]P?^Q@]P?]P?^Q@`SB`SB_RA_RA`SBaTCaTC_RA]P@bUL[PL[QP`Y`jdrqm„vu•y{¤w{«x}´v|¸ns³di­`g­im¶ruÄ|~Յ…Û–•Ù§¨Ø»¹ÏËÊÆÙ×¾äá´èã©íè¦îé¥ëæ¤çá¥áÚ¤×Ï Ëܶ°§›™ƒk§ƒm§o^›QDº]UÜlh÷usî`^äJJäABéAAë??è8:â66ß=:Ó;6ÒB:Á80·7,·?1­<.¶G6ÙjYÞj[Ù[MÇA5Å2(Ï5+Ù6-Ø1+Ý52çB>êG@ß<3Õ3(Õ1%Ú2'Ü0$æ4*è1)æ/)ä/(ã2,â7/á=4ßA8Å-(É32ÙCBîUWø]a÷[_óS[ðPZçGQèHRîNXú]dÿmsÿntóX^â=Cè/4ò-4ù1;ü3=ó+8ì(4ô5DÿJVôIRôPW÷X]û_cÿdiÿ`föQXèBFÝ86Ü71á85è<8ê=9æ95à40Ü5-Û:0×;/Ö:.Õ9-Ú7.à91æ;4é<8ã:7â96ä86æ87ç:6å84à5.Ú1*Ù5+Ú6,Þ8,à6)ä2(ë0)ñ0)ö0-ô22ó12ö01ö0/õ/.ò1,ì4,ä6-Ü:/ÚC8Æ9/º4+´0,á__ÿ„†÷ktþbwýVpúPmýKküCeô;]ñ<_éIeÓRdœCGk<4O>,==%9?%EE-FI.Q\:YkCg}Oq‰YrYpTlOiMgJh‘Ki’Lj“Ml•Qn—Uo—Xo˜Vt›XsUrœTq›QnšOl˜Mj–Ki”LdG`‡D[ARt8Ki3D`-?Y)>U'B\-D^/Ic4WJ9YL;\O>]P?]P?]P?_RA`SB_RA_RA`SBaTCaTC`SB^Q@\PDYNLZQV_X`gcrqm„ut”xz¡z|­v{²syµjr³bi­\c«`g¯lr¼v|ȈŠÝ”“㣣㵴àÆÅ×ÓÒÎßÝÄçä¹éæ¯íé¬ìè«çâªâܪÙÒ¨Ëߺµ—¤¤Œ™˜ƒ“ˆt—€n˜qb›cV²h_ÏrkúŠ†ôtqê\[åKKçDEê@Aì<?è:;ß:8Ú;5ÝF?ÒB:À:/¸:.¯5(§1#¾H:ÚbRéj[ÛSEÉ:,Ë3&Ô6+×4-×31ñMKüZUëMD×9.Ö4)Û4+Ý2(Þ-%ß-#Þ+$Þ-%Û0(Ù5+Ø:/Ö>3Á.&Ã3+Î:6ØDBÜFGÞEHãFMéLUîQZóV_õXaø[dÿhpÿnsòY^Ý:?é26ð/2ñ.4ð+4ê&0ë)4ø=HÿS^ë@HæAGçFKòQVÿZ^ÿW\óFLã68á53ã81ê;8ñ?;ò@>î<8æ93à70Û8/Ø:/×9.×9.Û8/á81æ93è;5â:7ß:6á96ã:5æ:6ä92Þ5.Ù2)Ú6,Ú8+Ü9*à6)ä2(ë0)ò/)ô1-ó32ò12ô01ö0/õ/.ò1,ì4,ä6-Ú8-ßG<È8/¾3,´-*ìfgÿy}öirÿcxüTnýJjÿHjþ>cò6Yè=]áQhÂXb‡A?eC7YN:GG/AC+JH1DD*KU3Sc>btJkƒSoŠUnRjMhHgHfHgJi’Lk”Pl•Qm•Vn—Ut›Xs›UpšRm—Oj”Lf‘IcŽFcŒF_†CZ@Vx<Qp7Mi6Lf6Ne7Ne7Ke5Ni6Sm=SG7VJ:YM=[O?[O?\P@]QA_SC^RB^RB_SC_SC^RB^RB^RB\RHULQ^Xfkgxtp‰xt•xwŸxzªv{²syµiq²`g«Zbª^f®iqºu}Å}„Ì’–àžžä­®æ»½äÉÊßÓÒ×ÚÛÍàÞÅäá¾æâ¼äß¹ÞضØѵÎƯ¾µ¤­§™—™‹ŒŽ€‰†w‰{nujšrh¨me¹nh䉄í€}ñtpðdcðXWïLMîDEë??â96à;5áB<ÝF=Ë=3Ã=2ÃA4¯1#«0!ÇI:ÚXJÕM?Ë=/Ì8*Ð6*Ï2)Ô51õVSÿjgú]XâE<Ü;1Þ:0á9.á3*â1)ß1*Û0(Ó/&Í/$Å.#¿0"Å9,½3&¿2(Ê;3ÕDAÙEEÛBGÚ@HßDLëOZñU`ñW_ù_gþgnñZ_àAEè:;ì65í36í38ð3:õ:CÿHRÿV^â7=Û59Ü6:é@CøKMüIMó<@ç02ç51é;4ï<8ò<8ñ;8í:6ç83â70ß;2Ü;1Ù;0Ú9/Ü8/â70å61æ72ä;8â:7å95ç:6è;7æ93ß6/Ú3*Ú6,Ú8+Ü9*Þ6)ã3&è0&ð/(ò1,ñ42ð22ó11ô1/ó0.ñ1,ì4,ä6-Ü8.ÝC9É6.¾1*½10öknÿrxùgrû^sùOlþBeÿ>dÿ9`ô8[æFbØ[kªUZvD=bK;_YCQO:KI4PI6FD-FO0L\8ZlDe{Mm…Ql‰Ok‹JgŒGfŽGfFg‘Ih‘Kj“Ol•Qm•Vm–Tr™Vp˜Rm•Oi’LeŽHb‹E`‰C_ˆDZ@V}>St;Pn8Ok8Qk;Wn@YpBUp=Ur<Vr?PD4SG7WK;YM=YM=ZN>]QA^RB^RB_SC`TD_SC^RB^RBaUEaVP^Vaje{xt}yšzy¡wx¦ux­rx´iq°bi­[c«\d¬go¸u}ŀˆÐ†Ñ—œÜ ¥Ý¯³ã»¿åÄÈáÌÎÛÒÓ×Ö×ÑÙ×ËÙÖÇÖÐÄÐÉ¿Ìüù·³©ª£š›‰Š…wzsvvnulcleogŽbY›`X¿rlÖyrï{û{xÿqoýccõRSïGFïC?ç>7ß<5ßB9Ï;1ÑC9ÝUIÁ=0·5'¼:*Â=.Æ>.ÏB1ÖE4Õ?0Ï7*Î3/èMKú_]òWSßE=Ø:1Û8/ß8/ä91å9/å81à90Ú9/Ñ9,È9+Â9)°)·2!È@2ÒH=ÒE>Ë;:É78Ï:>Ï8AßHQìU^ðYbô]döagéTZÚACã?=ë?;ñAAúHHÿMRÿQVÿU[ÿW^Þ39Ù/2Ú03æ8:ô=Aø;?õ47ñ01ì51ì94ð95ì51ê1.é3/å3/á4.ã<4ß<3Ý<2Ü;1à91ã81ç51ç51ç98æ87è96ê86ë95ç83â5.Û3*Û7+Û9,Ý:+Þ6)ã3&ç1&ï0(ñ1,ð42í42ò21ó1/ò0.ñ1,ì4,æ5-à90Ø;2Ì70À/*ÐBAþosükröbnïQhõHfý;`ÿ5^ÿ5_û?bãMfÊ\g‹GFiE9\M:\WAWR>QJ7OG4ID.DJ.FV2Sc<]sEgKi†Li‰Hf‹EeFdŽDeGg‘Ii’Nj“Ok“Tl•So–Sm•Oj’LgIb‹E`‰E^‡C^‡E[‚CZ~AVw@Tr>Tp?VpA\sG^uGYtAWt>Vs=NB2QE5TH8WK;XL<YM=[O?]QAaUEaUEbVF`TD_SCaUEfZJg^YmfvtqŒzy™yyŸvw¥uwªpu¯jo¯`g«^f®_g¯em¶pxÀ}…͈Ø—Õ—¡ÔŸ§Õ©²Û²ºß¹¿ß½ÁÜÂÄÛÄÅ×ÅÂÓÄÀÏÁ»É»µÃ¸¯À´©º¥™­”Œ›ˆ†‹qqqrqmnfcwhc†oi‚`W”c\›[R¶f]Ösmízuÿ{wÿvrÿheûYVýQMôG@å>6æE=Ö<2ÚF<ê\PÄ:-Á<-¼7(¾6(Ç>.ÏC2ÑB1ÓB1ÕA3Ð83Ö>=ÜDAÚB=Õ<6Ô70×6.Ü8/ã;2å:2ã:3ß;2Û>5ÕA5ÐD5ÌG4®-»:$ÍJ6ÐK<Æ=3º/*¿10Ê9<ÕBJßLTëU^íZbï\dëZ_ÞMRÓ>@Î3.×6.à;7è@=ðCEóEGñBGì?Câ59ß26ã36î7;õ79õ13ú.1ú24î3.ï82ð72é0+ç+)ç0,å2-à3,ã<3à>3Þ=3ß<3â92å82é62ë52ê67ë46ì44í55î64ë52â4-Ü1)Ý7+Ü8,Ý:+ß7*á4&æ1&ë1%î2)î52ì42ð31ò2/ò0.ñ1,ì4,æ5-â;2Ñ3*Ò;4Æ2.çUVÿqvöckðZfåI_òEcÿ9`ÿ0[ÿ4aÿBhÛMc°OVj5/]F6WL8TO9YQ>TI7LA/ID.@F*@O.K[6Uk=`xDe‚Hg‡FeŠDbŠCc‹BcEeGfKh‘Mi‘Rj“Ql“Pj‘NgŽKd‹HaŠF_ˆF^‡E^†G_…H^G\}F\zF\xG^xIaxLczLZuBYv@Yv@IB2LE5QH9SJ;WK=XL>[O?]R@dWFcXFcXF_VE^UFaWMf]Ticewrˆyu˜wuœsržrr¤ru¬ko®af¨Za§^d®fl¸ms¿v|Ȁ†ÐŒ’Ü–›Ýš¡ÕŸ¨Ó¦­×­²Ü¯´Ü°´Ú±³Ù´´Ú²°Õ²®Ñ­§É¨¡Ã¨žÀ£™»–‹­‡—{xƒcadhdebZXi[Xzc]wVMcZ‰MC¡TJº`WÖkcïxrÿ€zÿzwÿniý`[õSNæGAìOHÜB:ÝF=éVL¹+¾2%¾5%Æ>.ÏF4Ê?,Á2!È7$ÙE7ÝF?Ò:5Ê2-Í6/Ó:2Ö<2Ü;3á=4ä<3á90Ý6-Õ4*Ï5)È9)Å<*Â?+ÔS>ÇF1½<)¾:+Â<1Ã:4Á63À43ÛLNÜMOàOTåTWçX\åVXÙMNÓD@É5+Ð6*Ø7-Ü5/à21ä14å05æ.6ì4<ì3;ñ6?ú8Aü3=ú+5ÿ)3ÿ07÷,0ø46ø46ð./ë*+é0.ç40â5/â;3ß<3ß>4à=4ä;4ç:4ê84í74ë33ë33ì23í42ì42ç4/à2+Ù1&Þ8,Ü9*Ý:+Þ8(à4&ã3$è2%ë3)í60ì51ï31ð3/ñ1,ï3*ë4,ä6+ã<3Ì.%×>8Ì43ùceÿoxö\hîOaèD\øEeÿ;cÿ1[ÿ6aöGhÈK[“CFR,!VI8RM9NI5YQ>SK8F?-GG/=D%?K'GU1Rd:]sDc~Ie„HeˆFb‡Ba‰BbŠAcEdGfKgNhNj‘NhJeGcŠGaˆE`‡F`‡H`‡H^G]€F_}G_{H`{Ha{Ke}Mf~L[x@[z?\{BAB4FD7JF;NG=RH>VJ>ZM=]P=`Q<^R<^S?]VDaZPgb_nimrp~vq‘zuž{x¥yw¨pp¦gg£`a¡[^£_a«dg´opÀyz̄‚Ս‹ß•“盘域ݟ Ö¢¢Ø££Ù££×¥¢×£¡Ó¤ Ó¤Ð¢œÌ šÈ ˜Ç–—Ž¹Œƒ®ƒ|pn{hfic_^e[YkYUqVO|VM‰WL’RH¨ZP³YP¿ZRØkdêyq÷‚{ÿŠ…÷vqývsë`]ßPLØD@ïZTË80Ã3(Ä6(Â6%À7%À7$Ä9&Ê;*Ï>-Ô>/Ð8+Õ;/×?2Ó;.Í5'Í4&×9-â@5à:.ß9-Û7+Õ7+Ï7)È;)Å>*ÄA-À<-Å?3ÊD9ÍG<ÌC;Ä;3»2,µ,&Å<6ÏF@ÙOLßVPãYVáXR×NHÍC9Ì;(Ó;&Ú8)á5+ç/-ë*/ð(2ò(6ù,=ù+?ú+?û*?ý(>ÿ&>ÿ$<ÿ$:ÿ$8ÿ(9ÿ+:û+8ó+5ë.4ç55ä;8Ý82Û:2Ü;3Þ;4ã:3å82è62é62ë33ë33ë54é62ç51â5.Ý5,Ø4(Û9*Û:(Û:(Ü9&ß8%â7%ã6%æ4&ê5,ì5/ï4-ð5,ð4+í5)è6*ã7)Þ:.Ö8/Õ:5Ú>?ÿlvÿbqúThôH`ÿKhõ6Xÿ5\ÿ=dö>båOh®JT]#!C-=:'GF2ON:OJ7DC/=B.>G,BH$EL#HR-R^8ZkAawIdLdƒJa…E`ˆAa‰@cŒ@eDgHj‘Nk’Oi‘JgFeFc‹EcŠGdˆHb†H`„FbƒJbHa€GbGdHdHc~Ga}C[{<\=_‚B;?1>B4DD:IE<NE>QG=XJ=[N=_P;]Q9\T?^WG`[Uecfmkxqo…so”sptq omŸgfŸ``œ\]žZ]¢ce°ik¸trÅ}{υÖŒˆß“å—’䘕ؙ˜Òœ˜Ó™Ö™Öœ˜Õœ–Ò›•Ñ•Ð›“Ìš“É™’ƘÁ‘‰¸†«xšskzlcfe][d[VhYRmWLzVJ‰UHšVK¯]RºYRÀYRÑf^Ýpiávnë~wú‡„ÿ‡„övsífcâTRëZUÆ3+¿0"Ä6(Ã7&À7$À7$Ä9&Ç:(Í<+Ò<-Ò9)Ô8)Ó:*Ñ:)Ñ:)Ð9(Ô8)×7)Ý;.Ý9-Û9,Ö9*Ï9(Ç:&Á:$½:&¾6*Á8.Å<4È?5É@8Ç>4Ä;3À:/½4,Ä>3ÑH@ÕODÚQIÕODËB:Á7*É8#Ñ9"Û8'å4*í0.ó*0ù)6ý(:ÿ+?ÿ+Aÿ(@ÿ'?ÿ#=ÿ!<ÿ;ÿ9ÿ9ÿ!9ÿ%:þ);õ-8ï29ç79å;;Ü94Û94Ü;3Þ;4ã:3å82æ72é62ë33ë33ë52è62ä71ß7.Ú6*×5(Ù9)Ù:'Ú9%Ú9%Ý8$Þ8"à7"á6$å4*é4+ê4)ë5*ë5(ç5'ã6&ß8(Þ</Ò4+Ó52óTXÿesÿZmõG`ÿIgÿ@bÿ:^ý3Yù;_ñNkËL_ˆ9>R+$@4&69&9<)@A/DA.@?+:B+>G*=DJQ%Zb;dpJfvOezQd~Od‚LaƒFa†A`‰=b‹=eDgHkKl‘Lj’IhGeFdŒEf‹HeŠGd†Ib„GdƒHc‚IbEbEd€FeFcDaA]€>_„?b‡B4:,9=/??5DA8IB:ND;TH<WK;\O<[P<[RA]WKb^[gfknm{ppˆqo”nk˜jg–ca“\[”ZZ–[\\_¤hj´mp½wxÈ~ц„׊ˆÜŽŠßÜ‘Ò‘‘Í“’Γ‘Ð’Ñ‘Ð‘ŒÎ‘ŒÌ’ŒÊ‘‹Ç’ŠÃ’‹¿’Š»Œ…±‚{¥~s“zevs^cf[Yc\Tb\PeYIrWFƒSE¢YP»aYÉ\YÏYWÚd`ÞkfÜohâwoú‹„ÿ‰…ÿ…€ÿ|wömgîaXÃ5+¿3$Â9)À9&¾:%½9$Á8%Å:'É:)Í:*Ô;-Ò6)Î5'Ð8*Ó=.Ó=.Î6(Ê.!Ù9-Ø8,×7+Ô8+Ð:+É:)Á8%¾7$½5'¿5*Á7-Ä:/Ç=3É?4ËA7ËC7Å;1ÉA5ÎD:ÏG;ÑG=ÐH<Ê@6Ä6(Ì6%Ô7$Þ6)ç4-ð/0õ,4ú*7ý*;û(;ú':ú%;ù$:ù!8ù7û6ý3ÿ6ÿ 6þ$5ú*8÷2<ð7=é9;ã;:Ü94Û94Ý:3ß:4ã:3å82è62ë52ì44ì44ë54è62ä71ß7.Ù5)×5(Ù9)Ù:'Ù:%Ú9%Û9$Ý8$Þ7$à6%ß5&á5)â5'ã6(â7&á7&Ý7'Ù7(Û=1Î1(Ö87ÿkqÿ]kýQgð=ZÿIjÿ7[ÿ=d÷5Zí?`äVp©@O_#%J/(:7(07%19$9<'@>)?=&>>&?B#?FW`1s~T}Œcvˆ^hSa}La€G`ƒC`…?`‰=cŒ>eDgHkMkMk“Li‘HhGgŒFgŒIf‹HfˆKd†Ie„Ic‚Gc€Fc€Fd‚FeƒGd€Eb€Bb‡DbŠDeG06(39-:;3>>6D?9KB;PF<SJ;VM<WN=XQA[WLa_`hgolmoo‹om”ih”`^VVŠSTŒVX•[^¡`d«ko¹qtÁz{ˁ‚Ò‡…؉‡ÚŒ‰ÚŠ×ŽÏŽŽÌÍŽ‹ÎŠÏ‹ˆÏŒ†ÎŒ†ÌŒ…É‹…ō…ÀŽ‡»Ž†µˆªy~pŠ€_pyY^jXVc\R]^N^[HjXD}SCžUL½\VÐZZÙYZâdeåkhßpiäwpé|uì{sý„|ÿ‡€ÿ{õkaÇ=2Â9)Â;(¿;&½;%¾:%À9%Ä9&Æ9'Ë8(×;.Ô6*Î5'Î8)Ð=-Ï>-Ê7'Æ0!Ñ5(Ñ5(Ò4(Ð7)Ï9*Ì;*Ç<)Ä;)Ã9,Ã9,Ã9.Ã9,Æ9/È</É<2Ê>1ÒE;ÒF9ÎA7É=0Ê=3ÎB5Í@6Ì=/Î5%×5&â6,ë40ñ.2õ,4ø*7ú*:÷&9÷&9÷&9÷&9ù&9û&:þ%:ÿ$8ÿ"7ý"6ú&5ù-9ö4=ð9>ç9:ß76Ü94Û94Ý:3ß:4ã:3å82è62ë52î45î45ì65é75ä71ß6/Ù5+×5(Ø8*Ö9(Ø9&Ø9$Ù8$Ú8#Ü7#Ü7$Ù6%Ù6%Ú7&Û8'Û8%Ù:'×:'Õ9*Ö:.Ð3,éIKÿoyÿVh÷E_ö=]ÿAfÿ3[ÿ8_÷?cçMiÅPc/:G@1*02%,7&4=*=B,@@(@<!C>!HE"T\-o}JŸp–©|‚˜jiƒT_{H]~E^ƒ@a†@b‹?fAhGj’Kl‘Nl‘Nl”Mj’IhGgŒFgŒIgŒIg‰LfˆKe„IdƒHdGd‚Ff„Hf„Ff‚EcAf‹FfŽHj’L,4'06*690;<6A>9E@:KD:OG<RJ=QK=SOCYVO`_dggsjk€klŠihb`VVˆOP†OQŒUY˜_b§ei²or¿rxÆ|Ђ…Ö†‡×‡ˆØ‰ˆÖŠŠÔŠ‹ÌŠ‹Ë‰ŠË‰ˆÎ‡…Ї„чƒÐ‰‚ÏŒ„ÍŠƒÆŒ„À†º†°‡£}x–mƒƒYg€SXmUQc]QZbMW_GbZCxUB’LB·TOÏTVÚSWå^bæfgákiåvoãtkàpeö}uÿ‚yÿ‡~ôl`È@2À8(À;(¿='¾<&¾<$Á:&Ä9&Ç8'Ì6'×9-Ø8,Ñ8*Ï9*É:)È;)Ç:(Ç8'Î8)Ð8*Ñ8*Ð7)Í7(Ë8(É<*Ê>-È<-È<-È</È<-Ç9-Æ8*Å7+Ä6(Î@4ÏA3Í=2È9+Ê7-Î</Ì9/Ë4)Ð/%Ù1(ä2.ì23ó.5ö-7ú,9ú+;û.?û.?ù/?ú0>ý.>ÿ/=ÿ.=ÿ.=ý)8û(7÷+7õ0:ñ7<ë8;á77Ù42Ü94Û94Ý:3á:4å95æ74è64ë54î45ñ56î66ê65ä71ß6/Ù5+Ö6(Õ8)Ô9'Õ8'Õ8%×8%Ø7#Ø7#Ø7#Õ8%Ô9%Ó;&Ó=%Ô=(Ó>(Ò=)Ò<-Ð6,Ù;8ÿ^dÿanÿOhô;ZÿBgþ5_ÿ5^ú0VóLjÚZq•?LY&+B**83/+1'.9+5A-:B*9<!?<RG'_Z2{…Q£l©½Š¦¾Œ‹¥uo‹ZaI^€D_„A`ˆ?eŽBi’Dl”Km•No”Qn“Po”Nl’IhGgŒFgŒIhJi‹NhŠMd†Jc…IdƒGdƒGf…Ig‡Hg…Gd„ChHi‘Jl”M*2%-5(2707:3;:5@=8EA8HD9NH<LH<NKBTSO]^cceteg~ef…a`ˆYZŠPR…LO†RS“[^£ei²jp¼sxÉw|΁ԃ…Ø…†Ø…†Öˆ‡Õ‰‰Ó†‡Ê…†Ç„„ʃƒËƒ€Ïƒ€Ñ…€Ò…€Ð…Ò‹„ÈŒ„ÀŒ…¸‹…«„yv‹{kxV`~PSnUPd^PZdLVcI_]DrWBOB´WPÎVXÚSYä\`èaeäfgèqmëyoãqfö{sòriþxmå]QÄ;+½6#À<'À<'¿=%Á=&Ä;(Ç:(Ê9(Î6(Ö6*Ø8,Ô;-Í:*Æ9'Â9&Ä;(É>+Ì=,Ó@0ÙA3Õ=/Î8)È5%Ç8'Ê=,Ç:)Ç;*É;-É<+È:,È9)Æ7)Ç5&È6)Í;,Ò>2Ò>0Õ>3Ô>0Í6+Ç-!Ð+%Ú-)ç10ï26ö1:ù/;ü/>û1?ù0@÷1>õ1=ô0:ö.9÷.8ù+7ù+7û/:ø.:ô0:ð3:ì59å78Ý54Ø42Ü94Ü94Þ93á:4å95æ74è64ë54ñ56ò67ï77ë76å82ß6/Ù5,Ö6*Ó7*Ñ8(Ñ8(Ñ8&Ó8&Ô7$Ö7$Ó8$Ï8#Í;$Í=#Ì>&Í?'Í?)Í?+Ð<.Ð50êGHÿgpÿPbÿHeù7ZÿAjú/Zÿ7_ð4UáQjºUem19E(*C7754034/39/4=*2:"18ACh`9„‚RŸ®w¨À†µÎ—¬Ç’’®{v”`d…N_ƒE]…?a‰@gDl•Go—Np˜Qq–So”Qo”Nk‘HgŒFf‹Ef‹HhJi‹Ni‹Ne‡Kc…HdƒGe…Fg‡HhˆGg‡Fd‡CjIi“Km•N%0"*2%/4-36/672:95?<7B?8IE<HE<IHCQQQ[[c`asac|]`\^‡WW‰RS‰SU\] fh²psÂsxÊz~Õ|‚Ú€„ہ†Ú‚„ׂ…Öƒ…Òƒ…σ†É‚…ȁ„Ë‚‚̃€ÑƒӅ€Ô†Ó…Ò‹„È‹ƒ¾‰ƒ±‡£|‘uqufm}VYzQOlVKd_L]eMZdI``Dp\C‡TC¬\QÇ[YÓUXáZ^æ^bç`díkkózrërgöwnÝXOäZOÐD7Ã6%Æ;(Ã:'Â;%Ã<&Å='Ê<(Í:(Ð8*Ó7*Ñ0&Ô3)Ñ9,Í;,Å<*Â;(À;(Ã<)Ç;*ÓD4ÞJ<ÝG9Ò<.È4&È6'Ë<,Æ9(Æ9'È9)È9(È9)Ê9(Ê8)Ë8(Í7)Ñ;,Ö>1×?1Ú@4ÚA3Ö:.Ñ0&Û/+ä22í57ô5<ø3=ù1>ø0=ö0?ð.9î.9í/9ì/6ì-4ï,2ñ*1ñ*1õ2:ó4;ò5;ì59ä46à44Û62Û83Ü94Ü94Þ95â96æ87æ66è66ë46ò59ò59ð67ë76å84ß6/Ø5,Ó5)Ñ8*Ð9(Ï9(Ï9(Ð9(Ð9&Ñ8&Ð9$É9!Ç;"Ç=#Ç?%Ç?'È@*Ë>-Ï;/×96ûT[ÿ^nÿH`ÿ?aÿ:aÿ7dý4`û9^êFaÃN`ˆ?FQ12@64@::956>64;607:)3<!7CQZ-€‚P §q°Ä‡®Ê¯Ì’¤ÂŒ‘¯{x™bgˆO]C\„>`ˆ?gDm–Hp˜Op˜Qn•Rm”Qn“MjGf‹Ed‰CeŠGgŒIhŒNhŒNe‡Jd†Ic…Hd‡Gf‰GgŠFf‰Ed‰Ci‘Jj•Mn˜P#. (0#,2(.3,241553984<;6FC<DD<FGANPOWZa^`o^`wZ]~]_ˆZZŒWX[]šff¬qsÀz|Ï|€Ù€„߁†âƒ‡â‚†ßƒØ‚Ó€ÑƒÍ„‡Êƒ†É‚…̃ƒÍ„Ð†ƒÔˆƒÕ‹„ÔŒ„ÏŒƒÄ‰‚¶ˆª„€™|x‡pnsnbbuXTrTJiWIe`LaeL^dHcbFn^DzQ=[K´ZRÄTR×WXâWZéX]ïcdõqlîncöpgÐC:ÑA6Ä2%Ç4$ÔC0Ç9%Æ9%È;'Ê<(Î;)Ò9)Õ7+×7+Î-#Í1%Í7)Ë<,Æ?,Á>*½:&»6#½4"ÐC2âN@äN@×A3Ë7)Ê8)Í>.È;)Ç:(Ç8'Å6%È7&Ê7'Ë8(Ï9*Ò:,Ô;-Ò9+Ñ5(Õ7+Ú<0Ý=1Û7.ç85ð9;õ;@ø9Aø4@õ/<ñ-;ì,9ê/8é19é27ê37ì46î45ò46ò46î47ï6;ì8;é69â45Ý33Ü73Þ;6Ü94Ý84à85â96æ87æ66é56ë46ò59ó6:ð67ë76å84Þ71×6,Ð6*Î8*Ì9)Ë:)Ë:'Ë:)Ë:'Ì9'Ë:%Ä8!Â: Â<!Â>%Â@(Å>*È<-Í81Þ;>ÿ]fÿRfÿIeþ8\ÿ>hø-Zý=fç=Zã\m£LT]--F93BC><87>56C34A4.<;)<E&EV*`s<›`­½°Ê‹ªÈŠ¤Ãˆ›¹ƒ‹¬wx™df‡NY}?\<_‡>fCl•Gp˜Op˜Qn•Rl“Pm’LjGf‹Ed‰DeŠGgŒIhŒLhŒNe‡Jd‡Gc†Fd‡EgŠFgŒFg‹EeŠDj”Lk–Nn™Q(0!'/"(.$).'.0-2206729:4==5?@8DE?IKHORWUWdY[p\]{YX€YZŠ]^”ce¢mmµvwÇ~€×„ቌ뉌퉉酆₂Ú€Ó‚ÑƒƒÍ€ƒÆ€„ăƒÉƒƒË…‚φƒÐˆ„ÑŠ„·€Ãˆ€»‡€¬„}ž€y‹upwjfee^Vl\Ok[Kh]Ke_Ib`IcaHf`Fm_DlP8„V?¥]O¿aWÐYUÛQQíRX÷\`øieæYPÔD;Î7.Ñ7-×;/Ö=/Ó<+Î8'Í:&Í:&Ï:&Ò9)×7)Ù5+Ú6,Í/$Ë3&Ë9*É=,Å@-Á@+¾=(¼9%Á8&¾1 éWHßK=Â. Ï=.Ì=-Å8'È;)È;)Ç8'Æ7&Ç6%È5%Ê4%Ì4&Ð7)×;.Û=1Ü<0Ù7,Ù7,â>4ìC<õCCñ8=ð3:ó4<õ3>ð.9ë-9ì2=ê7=ä5:ã57ä67é99î:9ð86î64ï;<ë;;é99æ87á85ß74Ü73Ü73Ý84Þ95â88æ::è:;é9;ì8;î7;ó6:ó6:ï77ì87ç;7ß:4Õ8/Ï7*É7(Ç8(Æ9(Æ;(Æ:)Æ;(Ç:(Æ;(À8 Â>%½;!µ5»:$ÉD1Í?3É0+ÿ^eÿVfÿIbþ;[ø2Võ1Yú7aóCgåZoªFNm32K2+@@6>C<>96>./J03J2.@9&BI'Xn=}›_›¸v§ÄªÈˆ¼€—¶}“±{Ÿmd„RVw@Xz=]€<a‡>hŽCm“Fl”Kk“Li’Nj‘NhHeŠDa†Aa†Ad‰Ff‹HfŠJdˆHg‹Kg‹Kg‹KeŠGeŠEdŒEjIk“Jo›PoRr U)/!)/!(.$).'-/*02/450782;=2=?4AD;GIDLPSSV_WXjY[t[Z|]\†bb”jk¤ttº}̆†ÞŠ‹èŒŒì‹‹íˆˆè…„ကÖ}Ѐ́É„…ƃ‡Æ†‡È‰ˆÌ‰ˆÎŠ‡Î‹…Ë‹…ň€»‡°„|¡€x{s~tkljc]d^Rd_Ld_Kd^He]Hh]Gh^Ei_Fm_Dt_D~Z@‘XD«YKÇVNÜRPõRWþX\îUPãLEØ>6Õ7.Ù6-Û9.Ú:,Ö9(Ð7%Ð9&Ð9&Ñ8&Õ8)Ù5)Ü4+Ü5,Í1%È4&Æ9(Å>+Ã@,Á@+¾=(½8%Á8&Ê;+ãQBÜH:Ç5&É:*Æ:)Á5$Ê=+É<*Ê;*É:)Ë:)Ë8(Í7(Ï6(Ò6)Õ7+Ø8,Ö4)Ô0&Ø1(Þ7.æ;4÷CDñ8=í07í07ï/:í/9é1;é5>ã6:Û25Ø22ß:8îEB÷JF÷EAñ?;ç:6å97ä86á85à85à85à85à85ß74à85å99ç9:é9;é9;ë8;î7;ð69ï58í76è96ã:5Ü;3Ò8.Ê6*Æ8*Ã:(Â:*À;(À;*¿<(À;*À;(¼:$À@'¸:#²6¸;'¿;,È91×==ÿ]fÿPcþ@Zù8Wõ7Yô;]ó?dãIeª?Iƒ?>];2E;/8>07?2E<7Q>:N0.Q9/VK5bjCvŽZŒ°pœÁ{ Å—ºz™¸˜¶€ŒªxrŽ^WtDSq=\{@^~=b†>hŽCl’Ek“Ji“Kh‘Mh‘Mf‹FcˆBa†Ab‡BeŠGgŒIg‹KfŠJhŒLiŽKhJeGdŒEfŽEi‘Hl–Lo›PoRqŸT-1",0",/&,/(-/*/1,25.36-8<.:>0?C5EH?KMJQRVUUaUVh\Yt^]}edŒnn yy·ƒƒË‹ŒÞêŽìŽëŠ‰æ„„Ü€~Ñ~{Ê}ƀņƆˆÅŠŠÈ‹‹É‹‰È‹‡Ä‰„¾Šƒ·ˆ¬…{ uŽzotimne`h^Tb\L]`K]aJc^Hg\HkZFm[En\Fo^DwbGxY=…S:¢UCÆWLàUP÷NQýOQâ><Ü=7Û81Ý6.ß7.ß7,Ý7+Ø6'Ó6%Ñ9$Ñ8&Ó8&×7)Û5)Þ3+Ü5,Ê4&Ä6(À8(À=+Á@-Á@-¿<*À8(Â4&ÛI<ÚF:ÓA4Ì=/Ã7(Â:*¾7$Å:'Å:'Ç:(È9(Ê9(Ê7'Ë5&Í4&Õ9,Ô6*Ö4)Ö2(Ú3*Þ6-ã;2è;5ï;<í49é06ì18í4<ì4<è7?ç<BÛ78à@@ìNK÷XTýZUøQKîC<ä71à72à72à72ß82à93á:4â:7ä;8á77ä88å99ç9:é9<é9<ë8<ì8;î68í57é56æ87á96Ø93Ï8/Æ6+Â8+¾;)½;+º=)¹<*¹<(¹<*¹<(·:$»A*²9$¯6!¶;+³1$Ã4.ëORÿZfÿJ]õ;Sï7Sñ@\ïHdæIdÍN_78d>3Q?1DA06=+48'>6+K9/E-!R=*e^Aˆ]‘ªs•¼{•Ày•Ày‘³w›·„˜´ƒšm\uKGb5Mi8[x@_}?c„?hŒBk‘Dj’Ih’JeIfKcˆCc‡Ac†Bd‡CgŒIiŽKjŽNjŽNhLiMhLfIeGg‘Gk•Im™Lr¡Ss¤Uu¦W13%04%/2'/2)/1,02-14+25*8<-:>->B1CG8HKBMOLQRVSS]XVd[Ynbb~mm“ww«‚‚‹‹ÕŽá”‘ꐐ莋⇅؂Ì|Ã|¿½‚‚¾…†¿ˆ‰ÂŠ‰Â‹ˆ½ˆ„¶…­ƒ}£„zœuxm}qflmb`k^Ui[Nb[I]aJ]aJc^Hg\HmZIqZHr\Gu]ErX?wR8‰S;ªYDÍ\NâSMïEFñ>AÚ1.Ù4.Þ5.á6.á5+à6)Ý5(Ù8&Ó6#Ñ9$Ñ8&Ñ8&×7)Ù5)Þ3+Ù7,È6'½8'»8&»:'½>+À?,Á<+Ä8)Æ4'èTHÑ=1Ë9,ÏA3½4$¿<*¿<*À9&Ä9&Å9(Ç:(È9)É8'Ë5'Í4&Ò6*Ó3'Ô1(Û4+â92ç?6ê?8ê=7ç55é58í6:í6;è38ã28á6<á<@ñVTûc`ÿmhÿhcòUPãA<Þ71Ý4/Þ71Ý60Þ71Þ71ß82á83ã:7æ:8ã77å78æ89ç9:é9<é9<ê7;ë6;î5:ì57é77å97Þ;6Ö<4Ì9/Â8+¼8)¹:)¶;)´=)³<(±<(³<(³<(±:&´?+¬9&«8%²:,«+"Â42û`fþTaøG[ï<Rë=VçH^áObÑN^³RYsG>WG7JC0ED/AD/;?(97";3>2OF)kkEˆ”f•¬t‘´tŒ·r¹wœº„ž¸‰§}kZJ`:@V/Lc7Wr=_{@bƒ>g‹AjCh’Fe‘FbHcŽGa†Ac‡Ad‡Cf‰EhJkMm‘Qk’QiMiMh‘Kg‘Ig‘Gi“Gm˜Io›Lu¤Vu¦Ww¨Y78(58'57*56.56056057,68*;>+<?*?C,DG4HJ<LMEOOMQQQVUSYY[``lhh€ttš~~°‡…ÆŒŠÓ”‘à’‘áÚ‰‡Ðƒ€Ã€|¹|µ|³µ·„„¸ˆ†·‡„¯…¦z›w}t‡yozrfjm_\j\ShZMjZJf[Ia_Ja_Jd]Jh[Hl[Iq\Iu\Hz[G|V?…S<šS?´WEÍSFÚG?â96â30Ü0,Ý4-â5/â5.á3*ß3'Ü6(Û:(Ò7#Ð:"Ï:$Ð9$Ô9'×7'Û5)Ö8,Ã:(¸9&´7#µ8$»<)Á>,Å<,Æ7)Ï8-ô\QÖ>3È4(ÍA2¸3"¶9%¼?+¿:)Ã:(Ä;+Æ:)É:,Ë9*Î7,Ï7*Ï2)Ò1'Ô1*Ý6.å<7ê?8è;7ä73ç85í;;ï;<é69â25ã68çCDëMLÿyuÿqlñ`[ÝJCÐ71Ð3,Û83ç@:Þ93Ý90ß80Þ7/à72â62ã75å76å78æ68ç7:è8;è8;è8;è7=é6:î7;ë78è88ä;8Þ<7Õ<4Ê<2À:.¸9*´;*°;)¯<)¬<(«<(«;'ª;'ª;'«>*¤9'¦8'ª8-ª/(Ê@@ÿgoôN\óHZíDWäGZØN]ÈS[¶SV™XRcN=NM8JH3HF/FD+?B%=D#>H#KV.\g<v‚RŠ˜e¤l©n²r—¹}·‡‘¦}s‡bPdA>O/AR0Mb9Vl;_{Ac‚?i‹AjCg‘Ee‘FaGbFa†Ad‡Cf‰Eh‹GiŽKkMk’Qm”Sj‘Nj“Oj“Mi“Kh”Gj–GnšKožNu¦Ws¦Wt§X<=+<=-<<0;=2;<4;<4;=0;>->A,@D+BF-FJ1IL9MOAPPHRRHWXJZZN^^\ffpnn†yx ƒ€µ†„όюÓŒ‹Ïˆ†Åƒ€¹|z¬zw¦yv¥zxªzz®}®~©} ~z•|t‰yq€tjrpfem`Zj\QjZKl[Kl[Ik\Ih]Kf^Kh]Kj]Lm]Mq^Mx]L€[I‘aM™VE¦O>µJ:ÅA5Î7.×0(Ý0)à5.á6.â5.á3*à2'Ý3$Ü6&Ù:'Ð8!Í;"Ì<$Í;$Ñ:'Ô9'Ø6)Ò:,À=)³:%®5 °6!¹:'Â=,È:,Ê6*Ø>4÷ZQãI?Í9-Ì@1·6#«4´='»:'¿7'À7'À7'Ä6(É7*Ë7+Ï7,Ù<3Û:2Ý:3ã<6ç>9é=9å84á40ç:6ê;8ê::æ87ê>>õOOÿccÿpmùhcâUNÇ<5¿0*É4.Ö;6ß=:à;7à;5ß;2à91à70â62ã62ä65å55ç79ç79è8;è8;è8;è8;ç6<ç7:ì8;ê88ç98ã;8Ü=7Ó?5É<2¿;.³8(¯:(¬<(ª;'¨;&¦<&¥;%¤;&£='Ÿ:&Ÿ:(Ÿ9*¡5)µ>8ÚTUúdmíKZîIZèI[ÛMYÇRX´VTŸYQˆaPOH5FN7KI2F@(;648=N"Kg6j‹Tv•\ƒšbˆ›cŠ›d¢j“¬r—°yƒ•kn}\P_@<K.8G*AP1O_:Wj<b{Be‚BiŠCjCg‘Ec‘F`F`ŽFa‰Cf‰EjIkŽJjLkMj‘Pl“Pk”Pl•Ol–Nj–Kj–Im™JqžMr¡Pz«Zx«\x«\CB0AB2BC5BB8BB:BB8AC5@C0CG0DH-GJ-JM2MO7OQ<SQDUSD[[CY[C\]Oaa_hguqqzy£~~²„ƒ¿…ƒÂ…„À„¸}{¬wtŸsn–om”omœqo tqžusšurspƒqkwoimlb`j`Wj\QjZKm\Jo\Ko]Iq\KpZLn[Mm]Nk^MlaOq`Nz_N…[Kž`Q¤PE­B8·;1Ä6,Ì2(Ô2'Ü4)à8/á6.á4-à4*à4(Ý6&Ø7%Ó8"Î9!Ê="É=$É=$Í<'Ð;'Ô8)Ï<,¿@-°;'©4 ¬3 ·8'Ã;-Ê:/Î5-×82íNHñTMÖB8ËA4½>-¦1±<(¼=,¿:+¿7)À6)Ä6*È8-Ï;1Ô;3áD=ãA<ã@;å=:æ=:è<:ç98å84æ95æ93å84è<:ôJJÿ\[ÿdböb^Ë=9À71¼3+Ã81ÒB:ÝF?Ü=9×4/à;5ß;2á:2á81ã73å84æ66é77é69é69ê7;ê7;è7=è7=æ7<ç7:ë7:é77å76à;7Ú=8Ð=5Æ<1½;-±8'­:'©:'¦;'¤;&£:% :$Ÿ;$ž=*“6$™<*™9)™3'ÁPJìjlí[eëO\èLZÝIWÍKS·SQ¤\PcP~jRIM6CK4HB,E9!@7DI!Tp=g“V}²n³n‚¥e|‘VŽX—bˆ–a{‹\WeAGT89F,6C)8E+=J.JX5Wh<d{EfƒCj‹Dk‘Fh’Fc‘F`F`ŽEc‹EiŒHmLmLkMjLiMj‘No˜Tp™SošRnšOo›NožNs¢Qw¦U|­\y¬]x«\GD3GD5GE8HF:HF:HF:FG9EF4FH2GI1KK1NN4PP8SR>VSDWTA[Z>[Z>ZYE^[Rdbgmj{vs|x~{¦€}ª„~ª‚}¦|vštonj…jfhd…gfˆkh‡lj‚khyifoiceha[f]Tg[MgZIk\In]Ip_Kq_Kp^Jo\Mo\No\Mp]Nu^N|]K†[K‘WIŸVG¢F9ª:/·7,È8/Ñ7-Ø6+Ý6-Ý6-Ý5,Þ3)ß4*Þ6)Ý7)×8%Ñ6"Î9#Ì<$Ê<&Ê<&Í<'Ð:)Ò9)Î=,¾A-¯<'¨3ª3¶9'À<-È</Ë7-Ì2*ßB;õ[SÝI?ÐD7ÈF6«2·>+ÉE6ÌB5É?2Ê>1Í?3ÔA7ÛD;àF>á?:â=9á96à74á53â64å97å<9â?8Þ=5Û94Þ?9èMHèQJÐ@8¶)"¶-%¾5-È>4Ï@8Õ=8Ù<7á<:æ>;á96á:4á83ä84ä84æ95è96é99é77ê88è8:è8:è8;ç9;æ8:å79è58å55á77Ý97Ö=7Ì>4Á;0·<-­:(¨;'¥<'¢;( ;'ž;&:%š;'—@,Š5!—>.–9*–/&Í\Xöx{ßX_äXaÙQ[ÈKQ¶LN¨UO›`RŽhS€oUOV5?J(<<B>TR)jvD‚¡e“Â~‡¼t…·p{`k‚JsOˆ\t|SXb=;D'2<#4=(<E0<E09C*CQ0Uf<e|HgƒFkŒGl’Gh’Fe‘DbGaFeŒIjLo’Nn“NkKgIgIh’JrœTqRržQožNpŸOr¡Pw¦UyªYu¥Yr¥Zr¢XQK;QK;QK=QK=QK=QK=QK=QK;TO<TO<UP=VQ>XS@YSCZTD[UEa\H`[G_YKaZPe][icgogtsj{wnƒyp…|sˆ}t‡yprjyjboe]hb_h`_e^]b][\]YX^YU^ZQa[Od]Me]Jg\Hh\Fi]Gk\El\Ej^FicMqfTpYIsO?ŠVIœ\P£YL©ZK¥N=¤E3¦;+°8(À8,Ï8/Ù40Û2-Ý6.Ý6-Þ7.Þ7.Û7+Ù7*×7)Ô7(Ô8)Ò9)Ñ:)Ñ:)Ñ:)Ñ9+Ò9+Í;,º;*µ=,®9'¦1ª2!µ:*À<-Â8+È5+Ì5,Ó;0ÚC8ÜJ=ÔH9Â=,¶1 ÜPCØJ@ÓE;ÔD;ÙF>ÞG@ÝC;ß=8ß63æ87ê;8é:7å84à72à93Ü=7×G>ÍC8Ä:/»3'·1%¹5(¾<.Ã?2À:.Ç=2Ò?7Ú@8à=8æ:8ê88ì89å78ä79ä88ä88ä86ä84ä84ä84à40á51â62ä84å97å97å99æ89é;<æ::ß99Û<9Õ@:ÌB8À@5µ?1¨:)¢;(Ÿ<'š;'—;&•9$”8#9%B.‰<*…/"/&ºMHânnälnÌUY¼KM´MNªQM¡TN›XO–^Q•aS‰iPVY,9L6EQ`%{O™¯nž»xœ½x‘´p~ž_h€LZm@WeAT]@HP8>C/9>*>C/<@18?/:C28D.?N1Rd>\t@f‚EoKm“Hi’Fh’Fg’JgJiOk’Qn“Pl”Nk“Lj”Jk•Il˜IpLnLp Lq£Ns¥Pu¦Tx©Wzª^w¦blšZbŽOVO?VO?VO?VO?VO?VO?VO?VO?WP@WP@XQAYRB[TD\UE]VF]VFc\Jb[IaZJbZOd[Th^\lbcodhpekrgmtiotioqfllaef[_`XV]ZSYYOWWMWUIWSHXTH[UG\VFaZHc[Hc[Ff[Eh]Gi_Fj^FhaGbbJngTv\K~SC™WKªXM®RG¶QE­G8¬C0«<)±9(½9,É90Ô72×4/Û7.Û7-Û7-Ú8-Ú8-Ø8,×7+Ô6*Õ7+Õ7+Ô8+Ô8+Ò9+Ò9+Ò9+Í;,½;+µ=,­8&¥2¦3 ¯:(¹>,½;+Å9,Å5*É5)Ñ:/ÚF:ÞOAÝQ@ÜN@ÛG=×@9Ò;4Ò;4Ô=6Ø=8Û94Ü41ç77ë78í68ê65æ74â94á<6ÛB:ÉA5¾@2·9+°5&®5$±8'·>-¾@1Ä@1Ì@3Ö?6Ü=7ã:5é75î66ï79é6:æ8:æ8:å99å97ã:5ã:3ã:3á81â92ã:3ã:3ä;6ã:7ã:7â88ã9:à::Þ<:Ø?:ÐA;Ä?6·=0¬;+¢;*<)™=(—<'“;'‘:&‘9%‹:%8$~:'‰5*›=5½QNÕedÏ^`·MM«MK£SLœXO—[P–]R•_S—^S‹eN_^.Rf']r1yN—°l£¾{—²o† _iG_v@Sf9IY5CP6?H58>059+9;-?B1=?28>0;A37C/?M3Od=]uCf„Fp‘Ln”Il’Gj“Gi“Kh‘Kk’Ql“Tm”Ql”Nj”Jj”Hj–Gl™HmImGn Iq£Nu§Rw¨VvªXx¨^j–W_‹PV‚G[TD[TD[TD[TD[TD[TD[TD[TDZSCZSC[TD\UE]VF_XH_XH`YGd]Jc]Gc\Ic\Ld\Oe\Sf]Xg][h^\i_^j`_j`^h^\d[V`WR]VNYULVTHSQEQOBQMARO@TQBXRB\WD^WD_XEbZEe]Hg_Hj_IhbJbaLmfSx^O†XK \S¯VP±JC¶E=·A7¸>1¶;,¸9*¼8+Â:,É;/Î:.Ö8,Ù7*Ú8+Ú8+Ú8+Ø8*×7)Ô7(Õ8)Ô8)Ô8)Ô8)Ò9)Ò9)Ò9)Í;,½>-´?.¬9&¡2¡2©9%³>*¹>,Ã>/Ä8+Â3%Ç3'Î</ÖG9ÛN=àNAØ93Ø42Ò3/Ó40Ù:6à<:å=<è:;ì8;í57ë35é24ç55å97á<8Ø?7¾8,³9*°6'¬4$­5$°8'µ<+»=.ÈD5ÍC6ÕA5Ü>5á:4å63ì65ï79ê69é69é69ç77æ95å:3å:3ã;2ã;2â;3ã<4á<6à;7ß:6Þ97Ý98Ü87Û=<Ú@>ÒA<È?7¹;/­7+¢7' ;)›<(—<)“<(‘:&:%Ž7$ˆ:&z6!x5"‡9,žF<¶SN¾VS¶PN©JF¢MHœSJ˜ZO—_R•aT•aT•_U‰eOrqEzU“¦n£¸ª¿†œ´zyXXo9?R$@R(AQ-=L/8D.4=,39-58-9;-?B1<>17=/:@27C/>L2Od=^vDh†Hr“Np–Kn”Il•Il–Nj“Mm”Qn•Ro—Qm•Nk•Kj”Hk—HmšImImIp Lt£Rx©Wx©Xt¥VpŸW]‰JU~DNw=`YI`YI`YI`YI`YI`YI`YI`YI^WG^WG_XH`YIaZJb[Kb[Kc\Jd^Hd^Fe_Ie^Ke^Le]Pd\Qd[Rg^Ug^Uf]Tf]Te]Rd\Qc[Na[M\XLXVITREPN?OL=OL=QN=RO>UR?XS@ZUA]XD`[Gb]Gf`JfaKcbNkdRt]O‡[P§b[µZU¶FD¹<8Á<7Å:3Ä:0Â8-½8)¼;(½>+Ä?,Ñ9+Ø8*Ù7*Ù9+Ø8*Ö9*Õ8)Ô8)Ô8)Ô8)Ô8)Ò9)Ò9)Ò9)Ò9)Í<+¾?.´?.ª:&¡4œ3¢8"¬>'´?+»>,¿:+Â6'Å6(É7*Ì:-Ì=-Ò:/Ý54Þ22Ú00Û32Þ97ä<;é==ë;=ë7:ê37é05æ23å57á85Ù53Î70¶4'®6&¬7&«6%«8%¬9&±9(³:)ÃA1ÉA3ÓA4Ú@6à;5ä84ê65í76ì57ê67ê67é75ç83æ93å:2ã;2à90Þ:1Þ;4Þ;6Ý;6Ú;7Ù:7×98Õ;9Ô@<ÒC=ÇA8º<0¬6*£5&6%š;'—<)“<(;&;%‹9$Š8#…8$€=*v5!6'—E9©OGªMF¦KF¥PI›QH—UI“[N’^P’aS•aT–`V‹fSˆ†_˜¨w­½Ž¦¸ˆ¡tr…WN`6.@,=2A"8F,:F09B17=/9<1;=29:,>A0;=06<.:@26B.=K1Nc<`xFi‡Is”Or˜Mp–Ko˜Ln˜Pm—Op˜Rp˜Rp˜Qn–Mk•Ik–Gl˜In›JnLožMs¢Qx§Wz«Zv§XlœPd“KZ„ER{AMv<b[Kb[Kb[Kb[Kb[Kb[Kb[Kb[KaZJb[Kb[Kc\Lc\Ld]Me^Ne^Ld^Fe_Gf`HgaKhaNg`Mf_Mf_OibRhaQg`Ng`Nf_Lf_LgaKfaMa^M^[LXUFSP?OL;NK:LK9ML:ON:PO;RQ=UT@XWCZYD]\G^]IbaOf_OlVH…[Oªja½d^¿MLÇABÏ;;Ó97Ñ83Ë6/Á9+¹<(´B*»C*Ì;(Õ8'Ø8(Ö9(Ö9(Ö9(Õ8'Ô9'Ô9'Ò9'Ò9'Ò9'Ò9'Ñ:'Ñ:'Ë<+Á?/´?.ª;( 7"™5™7¢<%¬B,±<(¸;)Á9+Ç9-Ë9,Í9-Ð<0Ø;2è8:ë49æ37â46á46ß56à35á35è59è38ç48æ68ã9:Û75Ï1.À0(´6(­:(«:(«:(©:&©:&¬9&­8&·:(À<-Í?3×@7ß>6ã<6è96ë76í76í76í76ë84ê84è:3ç:3ä<3Þ:1Þ;2Ý<4Û>7Ù>9×>9×=;Ó=<Î@<ÉB<ÅB:¹=3¬8+£5(œ6'š;)–;(‘<(Ž<'Œ;&ˆ:$†8$‡6#ƒ8#‚A-r3!x3#“G9¢RG NCœND VK–VJ’XJŽZL\N]R“_T—aWŽgV‰f–¥z›ªh\lHDS22A$$2,:#.;'2=-7?0:@4;>3:<199-89+=@/;=05;-9?15A-=K1Mb;`xFjˆJt•Ps™Nr˜Mp™Mo™Qn˜PpšRq›QpšPn˜Lm—Km˜Io›LqNq Pt£Sx§W{ª\y§\pžUb’HX‡A]‡HU~DPy?d]Md]Md]Md]Md]Md]Md]Md]Md]Md]Md]Me^Ne^Nf_Of_Of_Mf`JgaIhbJhbLicMicMhaNhaNhaNhbLgaKf`Hf`HgaGhbHhbJe`L`]J[XEVS@QN;MJ9IH6HG5KJ6JL7KM8LO<OR?QTARUBSVCXYI]WIbPBzXL g^µe^ÃUTÖQRÙ@BÝ:;Û76Ô70Ç;.»>*°D(³A&É;'Ó8&Ô9'Õ:(Õ:(Õ:(Ô9'Ò9'Ò9'Ò9'Ò9'Ñ:'Ñ:'Ñ:'Ñ:'Ë<+À>.³>-ª;( :$—7”6™;!¦B*±C,¸?,¿;,Â6)Å3&Ë4)Ò;0ß=8è59í4:ë6;è:<å;<ã;;á;;à::ß58à67â9<à<=Û=<Ð85Ã2-µ/&¯:)ª=)ª=)¨=)¨=)¦<&§:%¨8$­5$·8'Ä<.Ï<2×=5Ý<4ã<6è;7ë84í74í74ë83ê93ç:3æ;3ã<3á>5Þ@7ÛA9ÙB;ÖA;Ô@<Ñ@=ÌA<Á@:º@5³=3©9- 6(š6&–9'•<*;'<'Š<(‡:&„9$ƒ8#ƒ6$6#{:&q2 |9(”O?£YLœRE•OC˜VH”ZL[MŒ[MŒ[M[P’^S˜_VŽgV}wWx„^p|XSa@9F*.<#/<(0=,1=/2<15=29?5>A8?A6;;/78*78(<?.:</5;-8>04@,<J0La:`xFjˆJt•Ps™Nr˜MqšNpšRo™Qq›Qq›Oq›OošKn™JošKržOu¡Rw¦Vx§Yz©[w¥ZqŸVf”L[ŠBVƒ@_ˆLW€FR{Ae^Ne^Ne^Ne^Ne^Ne^Ne^Ne^Ne^Ne^Nf_Of_Of_Og`Pg`Pg`NibOicMhbLhaNhaNhaNhaNhaNgaKgaKgaKf`Hf`FgbEgbEgaGf`Jc^J`[G\WCUS>PM:KJ6IH4FG5FI6FI6GK:HL;IM<GN<JN?NPBUQEXO@hRDZK–VJ¯RKÓYVÛIIà@@Þ88Ù72Î;1Á@-±B'±>#È;'Ð7%Ñ8&Ò9'Ó:(Ó:(Ò9'Ñ:'Ñ:'Ñ:'Ñ:'Ñ:'Ñ:'Ð;'Ð;'Ë<+¿=/´<.«<)£='—9 5“9 @'­C-¶A/¼=,À6)Â/%Æ/&Ð6.Ý:5ç48í5=ì;Aé@CäABßA@Û@>Ù?=Ì22Ï53Ò:9Ð<:Ç<7½81´5,«5)§<*¤=*¥>+¤@)¤@)¤>(¥<'§:%ª7$°7&º8*Â8+Ë8.Ó:2Û<6ã>8è94ê93ê93è:3ç:3æ;1ã<3à>3àB7ÜB8×C9ÒB9ÎA:È?9Ä=9¿>8°;2§;.¡7*œ6(—7'”9';'Œ;(‰<(ˆ=(„;(ƒ:'€9%8$6%}6$v3"w6$…B2—TDœXK”PCPD”YK’ZM[M[N\O’\R•\S—\VcUkbCV`;IS1<G)4@(2>*4?15?44>56@8<C;AG=DG<BD6=>.;:(78(<?.9;.4:,7=/4@,;I/La:`xFi‡It•Ps™Nr˜MqšNq›SpšPpœOpœMpœMn›Jn›JpœMt Qx¤W~ª]~ª_x¦]oUd‘L\‰D[ˆC\‰H`‰MY€IRyBg`Pg`Pg`Pg`Pg`Pg`Pg`Pg`Pf_Of_Of_Of_Og`Pg`Pg`Pg`PkdTjcSibRg`Nf_Mf_Mf_Mf_Lg`MgaKgaKgaIgaGgaGgaGf`FgaIf`Jd^H`[E\WCUS>ON:JL7EF4DG4CG6BI7BI9AJ9BK:CJ:HL>PQCRPAXM;dK7sD4•I<ÁXR×QNÞDBÞ:9Û75Ô;3Ê>/¸>)´7!È:&Ï8%Ð9(Ð;'Ò;*Ñ<(Ñ:)Ð;'Ñ:)Ð;'Ñ:)Ð;'Ð:)Ð;'Ð:)Ë<+¿;.µ;.­<*¤>(˜:!Ž4’8ž@&¡8"¯<)¾?.Æ</Ê7-Ï6.Ø;4â><ë>@î?DéCEâDCÕ@<É:4¿4/¸1+·0*º50½94º;4°91¨8-£9, :+ž=*ž?+ ?,Ÿ@* @* @*¢?*¤>(¨;'­:(²7(·5'¿5*È:0Ó>7ßB;ä=5ç<4ç<4å=4ä=4â>4à@4ÝA5ÛC8ÖC9ÑC9ÊA7Ã>7¼;5¶93¯:1¢7-˜7'’5&’7%‘:)Ž;)‰;'„9&†=*„=)=(;(}:'{8%{7$z5%w2"~9)ŒG8•RB’PBŠK<QD™_QXKZL’\P•\Q–]T–[S–YTŠ^QaX9EM&5?7B$=G/:F23>0-7,/905?6@F<EK?GI;AD1>=);:&67%;>-9;.4:,7=/4@,;I/K`9_wEi‡Is”Os™Nr˜MqšNq›SpšPpœMpLoœKn›Jn›JqNv¢Uz¦[‚®cªbv¤\h•PZ‡DVƒ@\‰FeOcŠQY€IRyBhaQhaQhaQhaQhaQhaQhaQhaQg`Pg`Pg`Pg`Pg`Pg`Pg`Pg_Rh`Uh_Vh_Vh`Uh`Uh`Sh`ShaQhaOhaNhaNhaNhbLhbLhbLhbLg_He_Ge_Ic^Ha\HZXCRQ=KM8HI7DG4@D3=D4>E5>G6>F7=E6EL<HL;AF2JI4[Q8aE/J6½j\Üg^àUPâGEãA>ß@:Õ>5É=0Æ=-Æ5$Ê4#Ì6'Ê7%Í7(Ì9'Ð:+Ð=+Ñ;,Ï<*Ñ;,Ñ;*Ñ;,Ñ;*Ñ;,Ì=-º2&¶8,°<-¥:(š7"‘5“7œ9"¬B,±<*¸6(À3)Ê6,Õ<4áD=çHDçCDæFHÛCBÉ:6º5.´8.®8.¦4)©9-§8-£9,ž8*š8+–:+‘;*‘;*–=+–>*™>+˜@*š?*›@+A, @* ;'¤9'¨7'®6&µ7)½9-Ä;1Ë;2Ú=4Þ=3à?5âA7àB7ÛA5Ø@3Ô@4×H:ÐF9Æ@5»;0°5-©2*¤1*ž3)™9-’;*:+Œ9'ˆ9(‡:(„;(ƒ<*~:'|9&z9%z9'z9'z9'w6$w4$w- Š@3—OA“N?ŽL>‘RC“UHRD—[P—[QTLTL—ZU”WR“VQ“hX\S2HP'AK&@K)>H-5A+1<+3>.1<.EPBX^PU\JLO:CG.@@&<<$9:(<?.:</7=/9?12>*9G-Nc<bzHjˆJs”Oq—Lp–Kp™MpšRpšPpLoŸKoŸKožMpŸNs¢Rz¦Y~ª_‚­ew¢[i”O_‰G\†D`ŠJdŽNg‘RcŠSZLSzEhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQh`Si`Yi`[i`[i`Yi`Yi`WiaViaTibRibPibRibPibPibPibPibOh`Kg_Hf`Je_Ic^J^YEUR?ON:IJ8EH5AE4>E5>E5>G6>F7=F5?H7=F3=G/FL2MI.S?&}P9»raÉcUÞ_XéUQåGDß@<ß@<Ø?9Í9/Í9+Í<+Î<-Î=,Í;,Ì;*Í9+Í:*Î:,Î;+Î:,Í:*Ï9+Ï9*Ï9+Ì:-É?5ÄA7¼B5®=-6#•2•2›2¥4"±6&Á;/ÐB8ÝF=äJBèKDèKFÝB@ÚDCÍB=»;2¯9-©=0£?/=-@/š>/–?.“=.>-Š?,„@-†?+<)‘;*‘<(’=)“>*”?+–?+—?+@.Ÿ>-¢;*¦8)©8*±8-´:-¼:-É;1Ï;1Ò>4ÕA5ÕC6ÔE7ÒD8ÎD7ÇA5Â@2¹=1¯9-§7,¡5+5,™7,‘;,‹<+‰<*ˆ;)†;(ƒ:)=*€=*|;'y:(y:(x9'x9(v9't7%v4$}3&D7—OA‘L=ŒJ<ŽPA‘SFQF’RI—WN•TN–WP›\W“VQTM‰bQ]W3Xc8^i?ZfBGU46C'4@(<H4LXDWcO]fQT\EHN4AE*=@#;;!8:%;>-9;.7=/9?12>*:H.Nc<bzHk‰Kt•Pr˜Mq—Lp™MpšRo›NpŸNoŸInžJnLq Pu¤V|¨]~©a{¦_ošUa‹I\†D_‰IcMdŽNbŒMb‰RY€KRyDhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQh`Ui`[i_]i_]i`[i`Yi`WiaViaTiaTibRiaTibRibRibRiaTibRjbOiaLh`KgaKe`La\HYVCSR>KL:GJ7CF5@D3?F6?F6>E5<E4;G35A+>H0GK2D>$U@%…[Cµr_³VGØcYîe_ëSPåGFéGEæDBØ;6Ë7+È9)É:,É:*È9+Ç8(È6)Ç5&Ê8+Ê8)Ê8+Ê8)Ë7+Ë7)Ë7+Ê7-Á2*¾5/¶6-¬3(¡0 /Ÿ1 ¦3!»@1ÇC7ÔG=àKDæMGçJEåFBÞC?ÖB>ÎC>Á@:±;1£<-žA0šE1–E2“E1C1ŽC0ˆA/„A.A,|B.~@+„;*ˆ9(‰:)‰:)Š<(‹=)<)<*•@,–=+˜;*›:*ž8)£9+¦:-«:,¸:.¼:-À<0Â>2Ä@3Ä@3ÃA3ÁB3µ:+¯9+ª9+£7*7+™7,–8,’:.‹<-ˆ<,…<+ƒ:)‚;):*~;*~=+|;)z;*y:)w:(u7(r7'q6&t5&†<1“F<•MAI<ˆH<ŒNA‘SH‘QG‘NF˜SL—TN—VP™\W‘ZSŒZSlY€~X}Š\~‹_ixOJX45E!;J+L[>cqWboU[eLMV;CI-?D&=@!:="79$:=,8:-7=/:@23?+:H.Mb;bzHk‰Ku–Qs™Nr˜Mp™MpšRo›Nr¡Pn IlžInŸMt£Sy¨Zz¨_z¨`ošUdŽL[„B[„BcŒJh‘OdK^†Hb‡QY~JRwChaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQhaQh`Ui`[i_]i_]i`[i`Yi`WiaViaTiaTibRibRiaTiaTiaTiaViaTkcPjbMjbMjbOhaNd_K_ZGYVCPO=KL:GH8CF5AE4AE6?C4<C1<H27C+DI2KG.M;#oL6˜dO§^M @2ÅSIçc^ï`\ëSRéKLèEHßA@Ë80Å9*Å7+Å7)Æ8,É:,Ê:/Ê;-Ð=3Ð>1Ñ>4Ñ?2Ò>4Ò>2Ó?5Ò?7Ó?;ÑB>ËD>ÅE<ÀF9ÁI;ÇM@ÎPDØRGÝPGáLEãJDãFAáD?àA=ÚA<ÔE?ÈC<º?7ª</Ÿ>.˜C/•G3‘H5F2ŠF3ˆD1ƒB0~A.{@.xA-z@,:*„8*…9+…9+†:*†:*†:*‡;+ˆ;)‹:)Œ9)8*“:*—;,š=.¡=.¨9.«9.®9/¯;.°:,¯9+®8*¬8)§6&¤6'ž7(š6'”7(8*:-;-‡;+ƒ<*‚:+:*~9)}:)|;)|:*z;*z;*x:+t9)r7'p7&p7&t6'‰C7‘I=K>ˆI:†H;‹PB“UJ—TK•PI›TN—RM”SM•\U_Ug]•j’”mƒ“dr‚UXi=BS)>N'O_:csOdsRZiJLY;AL.<E(=C'<@%;=%78&9<+79,6<.;A34@,:H.La:ayGjˆJt•Ps™Nr˜MqšNq›So›Pq OmŸJmžLr£Ry¨Z{©^w¥]rŸZa‹I\†D[„B^‡EeŽLh‘OdK_‡Ha†PX}IQvBhaQhaQhaQhaQhaQhaQhaQhaQibRibRibRibRibRibRibRiaTjaZja\jaZjaXjbWjbUjcSjcQjcQjcQjcQjcSjbUjbUjbWjbUlcRkcPkcPkbQjcQgbOb]J]ZGURAQP>LK9GH6EF4BE4@C2=B.<J1=G,KG.S>)d>+“ZI«_Rš>3ž3+¶?7×XRñjfõedéSTäIMçNPØIAÐF9Ì?5Ê>1Í@6ÒD8ÔF<ÕG;ÚJAÚJ?ÚJAÛK@ÝJBÞKAÞKCßJCéTPçSQãTPÜSM×RK×RKÚTKßTMÝMEáIDãEBâC@áB>âC?âE@ßGBØIEÉD?¹>7«<1Ÿ?1–C1’E3H4‰E2ˆC3†A1ƒ@0~?.|>/y>.{=.9-‚8+‚:,‚:,€;,<-<,<,9*‚9*…9)‡:*‰:+Š;,Ž=,’<-˜:.›9,ž9-Ÿ9+ 9*¢8*¢8(¡7'ž7&›7'—7'“7(9*‹:)‰:+‡;-ƒ;,€;+:+}8)}8){8(z8(z8*x9*y:+x:-t9+q6(o5'q7)t9+‹I;ŒJ<ˆJ;„I9…K=ŒRD’VK—WN–QLšUP—SP’UP‘`YˆbW‚hY†zbmsOZi>IX-AR&HY-Wi?dvLi{SXjDL\8=L+8E':D+<D,;@,9<)78(8;*68+6<.<B45A-9G-J_8^vDh†Hs”Os™Nr˜MqšNq›So›PnMmžLp¡Ov§V|ª_y§^pXg”QYƒA[…E`‰GcŒJgŽKgŽKeŒId‹L`…OW|IPuBhaQhaQhaQhaQhaQhaQhaQhaQibRibRibRibRibRibRibRiaTjaXjaXjbWjbUjcSjcQjcPjcPjdNjdNjcPjcQjcSjbUjbUjbUlcRkbQkbQkbQkdRibPf_Mc^K\VFVSBQN=LK9IH6GF4DC1?B/:H.?H+NB*`=*ƒE:«XP´NJœ,(²;7¹>9ÒROñpküvuñefêY\î^^âUNÙODÑG=ÍC8ÑD;ÔG=ÖG?ÖH>ÙIAÙI@ÚJBÚJAÝJCÞKCÞKDàKEáGGàFFÜHFØGDÖHDÕGC×FAÚEAÝB>ãA?æBAèBBæB@âC?ßB=ÙA<ÚIFÉB>¹<6¬=2¢@3—A0B0D3‰A2†A1†>0…=/€</=/}=1=1€:0€:0€:0;.€<1=/=/=-=/=-<,€;+:+:*€8)8)‰:-9.:-’:,—;,™<+=->,˜8(–9(’9):(‰:)†:*ƒ:+‚:+:+~9*}8)|7({6'z7'z6)y7)u6'v8)w9,s8*o5'm5&q9,v<.ŠL?†K;„K:L:„Q@‹VH‘WL–VM“NI™TO–UQ“\Ud[z^PdWD`^EEO,>N'>O%J\2^qDiQcyKVl>M_5@R,6E$5D':F.<E28>06:,79+9<+68+6<.<B45A-7E+F[4Yq?d‚Dp‘Lq—Lr˜Mr›OrœTpœQmœLnŸNs¤Sx©Zz¨_s¡Yg”Q]ŠGX‚B\†FcŒJfMfJeGeGfL_„NV{HOtAhaQhaQhaQhaQhaQhaQhaQhaQibRibRibRibRibRibRibRibRjbUjbUjcSjcQjcPjdNjdLjdLjdLjdLjdNjdNjcQjcSjbUjbUlcTkbSjaRkbSlcTjcShaQf_OaZH\WDVQ>OL9LI6IF3FC0AC.:G+@F*U@+wF7¢OI²HH´7;¸36ÈBCÃ>?ÊJIßb^ïrpînmæabâZZÚQKÔKAÎE=ÌC9ÏD=ÐF<ÐC<Í@7ÑB<ÒC;ÓB=ÔD<×C?ØE>ØD@ÚEAÜ@AÝADÝCCÜFEßIHßIHâHFäFEæBCé@CëABëABæ@@á?=Ù>9Ô<7ÚFDÈ=:¹83®<2¦@4˜?/?.@0‰=/ˆ<.ˆ:.†9/†91„:1ƒ:3„;4€:2;2;2~<0~;2|<0|<0|=.{=0{=.{=.{<+z;,y:)z8(y7'9-‚:.…9,‡9,Œ:,‘;,“<+•<*‘8(9(:(Š;*‡;+ƒ<*€;+~;*~9*7)~6({6'z4(y5(y5*x6*s3't6)v8-r6+n4(m5(s;.x@1‰QBƒN<€M<€Q?…VFˆWHTI‘RI’OI–SM‘TOŽ\S‡eYl[INL5DJ.@N-JZ6WgB_qIezObyKUm=E\.AV+5J#/@ 4B(;G3;C67<57:39;.:=,68+6<.<B44@,6D*CX1Um;`~@mŽIp–Kq—Lr›OrœTpœQnŸPo£Qr¥Vt§Zq¡Wi˜P_ŒIY†E]‡G_‰Ib‹IdKfJeGdŒEcŠG^„KUzGNs@haQhaQhaQhaQhaQhaQhaQhaQjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdRkdQkeOkeOkeMkeMkeMkeMkeOkdQkdRkdTkdTkdTnbTmaSl`RkbSlcTlcTibRg`Pd]M^YFXS@SN;OJ6LG3ID0CC+>G*DD([B,ˆTF­]V¨>>ª/2ËHMÄ?B½;=º:9ÃFDØ[YâecÛYYËGEÐIEÎE?ÌA<ÎC<ÑF?ÒG@ÐC<É?5ÎC<ÏD=ÑD=ÓD>ÕD?ØDBÚDCÜDCÞ?CÞ@AÞBCßEEàFFßECßA@Þ>>çACè@@ç=>å==ã==â@>áD?ßFAÖB@Å:7¶5/¯:1¦@4—>0=/‹?1…<-…;.†9/†91†91‡81ˆ94‡:4‚92;2;0;0~<0};/{;/{<-w9,w9*x:+y;,y;,y;,y;,z;,};/}9.~8,7)‚6(„6)†7(†7&ˆ9*ˆ9*‡:*…<+<,€=,~<,};+|8+{7*{5)y5(x4'w5)w5)u5)p2%s5(s7,q7+m5(n6)r=/xC5ˆUD‚Q@P@‚SCˆWI‹UIŒRGNE”SM“TMˆRHƒYM}cRc[DGK0<I+HW6YjFezScxOYnCQh<Mb7I^3:L&/@+:3?'<E4<B6:=69<5:</9=.59+6<.<B46?,5B(DT0Si:azAm‹Kp”Nq—NršQrœRqRqŸTs£Wu¥[r¡YlšRcK]‡EYƒCbŒL`ˆI_ˆFaŠHfJgIc‹E^…D^„KTyFMr?h_Pi`Qi`Qi`QjaRjaRjaRkbSjaRjaRjaRjaRjaRjaRjaRjaRjcSjcSjcSjcQjcQjcQjcQjcQkdRkdRkdRkdRkdTkdTkdTlcTk^Np`QqdTpdTlcTi`Qh`ShbRgaQebOa^KYWBVQ;TO9OI1GC(JF)I?$bJ2‘hV´xmºme·YW¹OOµCCºBCÁCDÆFEÌGHÐJIÑKJÓKKÎDBÏCBÓECÔFBÓF?ÐC<ÎD:ÎF:ÇC7ÇE8ÊE<ÌF=ÒD@ÖBBÙ@CÛ?CàAEàBCáCDßDBàECàECáFDãEBâC@äB@ãA?ã?=â@=â@=àA=ÜC>ÒC=Ä?8µ91¨8-Ÿ:.•=/Œ>1„?0?1?3€>2€<1ƒ:1†91ˆ81‡81…;0ƒ;-ƒ;-<-€;,€;,<,<,~:-~:-};-};-};-};-};-};/{9-|8-|8-}7+~8,~8,~8,~9*}7+}7+|8+{9+z8*x9*x9*v8)x8,v8+w7+u7*t6)s5*s5*q5*k1%p6*n4(k3&o7*j5'n9+}I;ƒSEƒSG„RG‡QG‹PHPI‘PJ’QK’SLŠTJ†ZMbPm^GVU9KV4M^:jXaxNYpFSj@Qf=Mb;GX4BQ06C%2?#0:"3;&;?.?A3??3<>339+2:+39+5;-7>.9A,;C,;F(Q`9_rDn‡PrRq”Pq™RrœRržSužX}¦b}¦dp™Wb‹I]†D]…F\„E^†GaˆId‹LeŒMeŒKb‰H_†E]„E^KSuBKm;h_Pi`Qi`Qi`QjaRjaRjaRkbSjaRjaRjaRjaRjaRjaRjaRjaRjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdTkdTkdTkdTkdTlcTqaRqaQobRpdTpgXohXlfXgdUgdUkhWmjWjhShcMc_F[U=RL2PK.F?#P@'kP;XGƒOB†F<’E?°WS¶TQºQN½PM¿NJÀMJÂLHÃLHÏSQÇGFÁ=;Ä=:ÎC@ÑF?ÌB8Æ=3ÈD7ÈF9ÊF:ÍG>ÓEAÖDDÛBEÝAEÞBEàBCáCBßDBàECßECàEAáFBßD@ßD@àC>ßB=ÞA<ÞA<àA=ÜC=ÒE<ÇB9º>4«;/Ÿ:.“;-‰=/>.@1|A3}?2=1;1†91ˆ81ˆ8/†:-„;,ƒ;,ƒ;,‚:+‚:+€;,:+:+:+~:-~:-~:-~:-};/};/|8-|8-|8-|8-{9-{9-|:.z:.z:.z:.y9-x:-x:-w9,w9,w9,t8-t8-s7,s7,r6+p6*p6*o5)j2%o7*m5(k3&l7)j5'n9+}I<ƒSGƒSI†QI‹PJPK“OL”QK’SL‰NFƒSG[KzdMqjPhkLcqNfxR]tJUnDOe>Ka:J^9J[7FU6BQ4;E,6@(3;&6;';>-?@0>?1;=03;.1<.4:.5;-9=.;@,<A+;D)Q[9^mDkPpŒRo’Pp˜QsSržSwž[{¢cxŸ`k’S`‡H]„E]„E\ƒD_†GaˆIcŠKd‹Ld‹Lb‰J_†G]ƒF\IQs@Ik9g`PhaQhaQhaQibRibRibRjcSibRibRibRibRibRibRibRibRjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdTkdTkdTkdTkdTlcTpcSo_Pk]Pl`RndXogZkeYeaUfbVom^zwh~{jzgzuaojVcaJRU:DG,@>%LA+SA-S8']7*l>1n8.t8.~;3‡A9‘H?›QH¤WM¬YQ»^WºSN·GCºC?ÅCAÏEBÑC?Î?9ÏB9ÐC:ÏE;ÓF?ÕF@×FCÚDCÜCEÞBCÞBCßCDÞDDÞDBÞFCßECÞFCÝE@ÜD?ÛD=ÚC<ÚC<ÚC<ÜC=ÙD=ÔE=ÌF=ÁB9²=3¡9.‘8*‡9,<-@1~@1~?0=/;/„:/‡9/‡9/„:-ƒ;,ƒ;,‚:+‚:+‚:+:+:+~:-~:-~:-~:-};/};/};/};/{8/{8/{8/{8/{8/{8/y9/y9/z:0y;0y;0x:/x:/v:/u9.u9.t8.s7-r8-q7,p6+p6+o5*n5*j1&o6+j4(i3'l6*i3'm7+}G;†RG†RGŠQHŒQI‘RK’SL“TM‘VN‰WN†\P~^OsaKoiQnsUkvVevTPd?I_9DX3BV1BV3FW7ET7DR8?I0:D,6>)7<(9=,;>-;=/8<.5=22<16<26<09=.:>-<A+:C(MW5Yh?f|KlˆNmNp˜QtžRt Uz¡^wž_n•Vb‰J\ƒD[‚C\ƒDZB_†G`‡Hb‰JcŠKcŠKaˆI^…F]ƒFX}GMr?Di6g`PhaQhaQhaQibRibRibRjcSibRibRibRibRibRibRibRibRjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdTkdTkdTkdTkdTlcTqdToaTl`Ri_SiaTjdXieZhfZig[wuiˆ„x~•’Œ|ˆƒp{zf^cLLT<CG0DF0FD/E=*L<,UB3O8(R6(T4'V4(\6)`8,d<0m=1ŠLAœRI­VOµOK»ECÃAAÐDC×GFÑ@;Ô@<ÔD<ÕE=ÙEAÙEAÛFBÝEDÝCCÝCCÞDDÞDDÝEBÝEBÝEBÛFBÛF@ÙF?ØE=×D<×D<×D<×D<ÕE<ÔD<ÐF<ÈE;ºA6¦</•8)‹9+„<-?/@1~?0=/;/„:-‡9/‡9-„;,ƒ;,‚:+‚:+‚:+9*:+:+~:-~:-~:-~:-};/};/};/};/{8/{8/{8/{8/{8/x8.x8.w9.y;0y;0y;0v:/v:/u9.u9.u9.s7-s7-q7,p6+p6+o5*m4)m4)j1&m7+j4(h2&k5)h2&k5){E9ˆOFŠOGŒQIŽRJTLUMWN‹YNˆ^RƒbSv_MiZGc^HdhO\fKN]@EU8@Q1<L/;K.>N1BP6ES9GS;@L4>G29B-7?*7>,7>,8<-4:,5=23=46;46<29=/:>-;@*9B'IS1Ud;bxGi…KmNršSv Tv¢Wz¡^q˜Yd‹L[‚CY€A[‚C\ƒDZB^…F_†GaˆIaˆIaˆI_†G]„E\‚GW|HLq>Ch5g`PhaQhaQhaQibRibRibRjcSibRibRibRibRibRibRibRibRjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdTkdTkdTkdTkdTlcTrfXsgYqg[ldYhaWieZrnezwn€}v…Ÿœ“§¥™ª¦›¨¤˜¢œŽ˜•†x{hbgQMR<EJ4CH4@E1AD1DG4KL:HI7ED2A>-@:,?7*@6*F4&R5'oC8‹PHžOJ­IG½FHÊEHÑBDÕAAÖ@?ØC?ÙD@ÚE?ÚE?ÜD?ÜDAÞDDÞDDÝCCÜDCÜDCÜDCÚEAÙEAÙF?ÖF>ÕF>ÔE=ÒE;ÒE;ÓF<ÔF<Ó@8ÓC;ËE:¿C7­?2œ:-‘;,‰=-ƒ>.?/~?.=-€=-ƒ;,†:-†:,ƒ:+‚:+‚:+‚:+:+:+~9*~9*~:-~:-};/};/};/};/};/};/z:0y9/y9/y9/x8.w9.v8-v8-x:/v:/v:/u9.u9.t:.s9-s9-q7,q7,p6+p6+n5*m4)l3(l3(g1%l6*i3'f2%i5(f2%i5(|C8ŠNDNEPISKŽULŒXM‰YOƒ[OyYLt]MiZG\UBYXCY^GNX@?K39G.7E,4B+4B+7E.<H2@L6CO9@L6=I59E19B/7@-5>+4=*2:+3=42<45:35;18;09=.:?+9B'GQ/Ra8_uDh„Jo’PuVx¢Vw£XxŸ^k‘T]ƒFW}@YB\‚E\‚E[D^„G^„G_…H_…H_…H^„G]ƒF\‚GV{GKo?Bf6g`PhaQhaQhaQibRibRibRjcSibRibRibRibRibRibRibRibRjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdTkdTkdTkdTkdTkdTkbSndXnf[jcYhd[pmfƒ€yŠ¤£Ÿ²±¬ÁÀ»ÈžÇÄ»ÄÀ·¿¸®¸²¦¢œŽ…‚qedROP>DI5<E08D.8E18E17D07D28C3;C4>D8BE:GE9<4'M:,d?6~E>¡PO¿XYÊQVÉDGÒFGÖFFÙECÚFBÝEBÝE@ÝD?ÜC>ÞDDÞDDÜDCÜDCÜDCÛCBÙD@ØD@ÙEAÖF>ÔE=ÒE<ÐF;ÐF;ÏG;ÓF<Ó>7Õ@9ÎD:ÄD9³A6¤>0•<,‹<+…>,‚?.€>.>,€=,<,„;,„;,‚:+‚:+‚:+9*:+~9*~9*}:*~:-~:-};/};/};/};/};/{;/z:0x:/y9/w9.w9.v8-u7,s7,u9.t:.t8-s9-s9-q9,r8,r8,p6+p6+p6+n5*m4)l3(l3(i3'g1%k7*h4'f2%h4'e1$h4'{B7MD‘NF‘PJŽSK‹WL‡XN€ZMy\Ns\Lk[K_XFUT@PS@JR=@I64@,3<+1:)09(.9(0;*3>-7B19D3=H7<H4<H4:F28D04A-2>*0;+0:10943824927:18<.9=,8@(CL-O^7]sDhƒLp“SvžWx¢Vw¡Wp—Xc‰LW}BV|?YD[D[F[D]ƒH]ƒF^„I^„G]ƒH\‚E\‚G[HUzGJn>@d6f`PgaQgaQgaQhbRhbRhbRicShbRhbRhbRhbRhbRhbRhbRhbRjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdTkdTkdTkdTkdTkdTiaTicWjcYkg^spi…„ž™¯°«ÁÁ¿ÑÑÏààÞæåáæãÞáÞÙÛÖÐ×ÎÇÌ»±¶¤––†wvm\_ZGKM8>F/;D/=J6<I5;F5;C4;>3:;3983880<>1A=1L8/_81†IH¯^]Å`dÈWYÍQSÑOO×MKÙKIÞIEßFAáC@áB?ßCDÞDFÝCEÝCEÜBBÛCBÛCBØC?ÙEAÖE@ÔE=ÒE<ÐF<ÐF<ÏF<ÒE;×@9×@7ÑC9ÈD8ºD8«A3˜<-‹:)†=,ƒ?,€?->,€=,=*ƒ<*ƒ;,‚:+€;,:+:+~9*~9*}9,}9,~:/};/};/};/{;1{;1{;1{;1y:1w;1x90u9/t8.s7-s7-q7,r8-q8-r8-p7,p7,n8,o6+o6+o6-o6-n5,k5+j4*j4*i3)h4)f2'k7,h4)c1&e3(b0%e3(zA8ME’MF‘PJTK‡WMYLwZLo\Mk^N]XERQ?KN;BI79B/2;*08)06*06,06*.6).6'/7(2:+4<-9D4:E4<I7<I7:G35D/2?+0=,-7/-6116027058/7:/9=.8@)?H+KY5[qBg‚Kp“SvžXv VsUgP]€FUx@Wz@[~F[~D[~F]€F_‚J_‚H_‚J^G^I]€F]€H]€JSwGGk=>b6f`PgaQgaQgaQhbRhbRhbRicShbRhbRhbRhbRhbRhbRhbRhbRjcSjcSjcSjcSjcSjcSjcSjcSkdTkdTkdTkdTkdTkdTkdTkcVoi]kg\lh_spi„ƒ~Ÿ ›¼¼ºÍÏÎÍÏÎÞàßñññøøöø÷õöòïðëèíâÜæÐÅÛÁ´Á¬› ‘~}u`^\EJN7AI1@J2@I4@G5BD7AA7B=7B:7>;67=3=@5B71H/*c74ŽRQ³ggÆnm¿[[ÆYVÍVRÕQMÛMKàHEäEBåBCßCDÝDFÜCEÝCEÜBBÜBBÛAAÚBAÙD@ØD@ÕD?ÓD<ÑD;ÐF<ÏF<ÒE;ÛB<Ù@8ÒB9ÊD9¿E8­A4š:*‰6$ˆ<,ƒ?,>->,€=*€=*=*=*€;,:+:+:+~9*}:*}9,|8+};/};/};/};/{;1{;1{;1z<1w;1v<1v:0t:/t8.r8-q7,p7,p7,p7,p7,m7+m7+m7+l6*l6*m7-l6,l6,k5+j4*h4)h4)g3(f2'i7,e3(c1&e3(a/$e3(wA7ŽMG‘NHŽRJŠUMƒYMzZMq[Mh\LWTCHI9?C2=D49B108)/7(6<047047058117-06,/7*08+19,6A38E4<I7=L9:I46E02A,0>-*5-,50/4.16/4707:/9=.8@+<E*IW4YoAfLp’UuœYtWo˜T`†IX{CRu=VyAZ}EY|DY|D[~F^I^I^I^I]€H]€H]€H\~KStGEh>>^5icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSibRjcSjcSjcSkdTkdTkdTleUleUleUleUleUkdTkdTkdTjdVhdYee]ttl‘’Œ®¯ªÃÅÂÙÛÚéíîñõöôø÷ûýüÿÿýÿÿýýüøúöóûòë÷áÔòØÇÞɴŵž©Ÿ„ˆ„gbbFEH-DH/BF/FE1GD5E<3C60D52F;9>?:>A:B:7G83bFCjgēç¬¦ÑŠ„Çtn¿^WÅTNÔPNÞLLçGIêGJÝDFÞIKÚDEÔ;=Ø>@àFHßEEÕ;;Ù??ÙA>×B>ÖB>ÓB=ÑB<ÏB9Ð@7ÜC=Ú@8Ò?7É@6½A5®@3<,Ž8'Š=-†?-‚=-€=,~=)<)<)€<):+~9*}9,}9,|8+|8+|8-z8,{9-{9-y9/y9/y9/z:0z:1y:1w=2v=2v<1s:/s9.q8-p7,m7+m7+m7+m7+l8+l8+l8+l8+l8+g3(k7,o;0n:/j6+g3(f2'e3(c1&h6+c1&]-!_/#^."b2&q?4‹OGPK‡PIRJzXLu^PeXHSM=BE2=D29B14?/2=-3;.2:-39/68357257227016/06,.6+.6)-8*0=,6C19H5:I45G11C-0>-(4*)2--2,.3,25.58-8<-8@+6?$DR/WlAg‚Or“Zv\r›Wj“QX~CVyCSv@Sv@VyCY|FY|FX{E]€J`ƒM`ƒM\I[~H]€J_‚L]MRrIDd?9W3icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSibRjcSjcSjcSkdTkdTkdTleUleUleUleUleUleUleUleUjdVee]noi…†£¥ À¿ÕÙØéíîõûûöúû÷ûüùýüýÿüþþüÿþúÿüùÿûóÿðàúêÑìÞÄ×αÀ»¤£„€„cflJLQ3FI.CC+GB/H>2G:2J84J<9>:7CB>NIF\RPyje£Ž‰Í²«çżÿ×Î벧̓zÀc\ÉXT×RSÞIKÚADÔDDÓEDØHGÜJJÚFFÓ=>Õ=<ÛAAØ>>Ø@?ÖA=ÖA=ÕA=ÒA<ÐA9ÑA9ÛB<Ù@8ÑA8ÊA7¾B8°@4ž</8*‹<-†?-‚=-€=,~=+~=)<)<+:+~9*}9,}9,|8+|8+{9-z8,|:.|:.z:0z:0z:0z:0z:1y:1v<1t;0t;0p:.q8-n8,m7+k7*l6*k7*k7*i8*k7*i8*i8*i8*j6+l8-n:/n:/k7,f4)e3(d2'b0%c3'^."]-!b2&]/"_1$o?5JB†QK†WQ}WNqUJdRDPI9>?/9@.5@/3@/2?.2=/3;.4:069268368557246116/16//7,/7,-8*/;-4A07F39H56H23E/3A0,8.,6.-2,-2+03*36+6:+6>)6>&DR1VkBh‚Ss”]vœ_p˜YfŽOV|CUxBRu?Ru?UxBWzDX{EWzDZ}G]€J^K[~HZ}G\I]€J[{LPnJD_@7R3gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSibRjcSjcSjcSkdTkdTkdTleUleUleUleUleUleUleUleUieYde_y{x—™–²¶µËÑÑãééóøû÷ÿÿûÿÿûÿÿûÿþüþûþÿúÿÿúÿÿúÿÿôýûäùøÚïðÑàäÃÑ׳½Åžž©‡’j^hEPW6CG,DC.HE4LD7J@7H?8C;8QLIia_wtœ’½²¬ØÎÅéÛÐüéÛÿôçÿäÙï«¢Áfa´DCÆEIÙSTÚVTÎMHÉC@ËDAÑFCÕEDÕCCØBCÖ>=Ö>=Õ?>Ö@?ÕA=ÓB=ÑB<ÒB:ØA:×@9ÑB:ÊD;¿D<°A6¡<0’:,‹<-‡>/‚=.€=,~=+~=+~=+~=+~;+}9,}9,}9,{9-{9-{9-z8,{;1{;1{;1{;1z;2z;2y:1w;1s:1q;1p:0n:/n8.l8-k7,h6+k7,i7,i7,h8,i7,h8,h8,h8,k90k90j8/j8/j8/i7.e3*b2(c3)`1'[,"_0&c4*Y-"\0%pD9}PJWO}XPoSH^J?OC7?<-46(1:)0;+0=,1=/2<14<15;169268368368338227027008-08--8*.:,2?.4C07F38G46G48F52>20:1/4-,2(-0'03(37(4;)6>'DQ3XlGk„Zv–dwdl”X_‡KUxBTtBRr@Rr@TtBVvDVvDVvDWwE[{I\|JZzHZzH]}K[{IXwKNgG?W=1I1gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSibRjcSjcSjcSkdTkdTkdTleUleUleUleUleUleUmfVmfVjfZikf‚ˆˆ£§¨¹¿¿ÍÕ×åíïòúüôüþùÿÿùÿÿúÿüúÿùüÿøýþöÿÿõÿÿó÷ýãóüÝîøÖåñÍÝéÃÎÚ²µÁ™ ¬„}‡bfmKLS4DH/HJ5KI:GE9D@7OGDia^Š‚€¢š—·¯¬ËÆÀÝÚÑéåÚùóçÿüïÿ÷ìÿÝÕú·±åÇcc®@?ÁPJÈSJÇPHÂG@ÇF@ÒMHÕJGÐ@?Ï>;Ò><Ó?=Ô@>ÓB=ÓB=ÒC=ÒA<Ô?9ÔA:ÑD;ÊE<¾E<°A8 >3”:/‹</‡=0‚=.€=-~<,~=+~<,~<,~:-}9,}9,}9,{9-{9-{9-z8,|<2|<2{;1{;1z;2y:1y:1w;1q8/n:/m9.l8-k7,h6+h6+g5*i7,h8,h8,h8,h8,g9,h8,h8,k;1i7.g5,h6-j8/h8.d4*`0&c4*`1'].$`1'`4)V*^2'vND~\SuXPgNGWC:H<0@:.:;-7;,/7(.9)0;-2=/4<15;169058/69249249238138119.19.19,-8*.9+/<+2?.5B07F39H5:H78D64?117+,2&,/$-1#15&18&4<%ER8]pPqŠc{›lv›heŒWT{DQs@Qq?Pp>Pp>Qq?SsASsATtBUuCYyG[{IZzH[{I]}KYyGVsGH_C:M9,?,gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSibRjcSjcSjcSkdTkdTkdTleUkdTleUleUleUmfVmfVngWkg\pro…¤©¬·¿ÁÌÔ×ãíïñûüóýþ÷ÿÿøÿþúÿúúÿøûþóùûîøùëöùèõÿçóÿäðýßíøØèóÑÞèÅÊÒ­¶¾™›£~{‚aX^BGL5FK7DK9@F8@B7b]W…~x«¡ŸÀ¸µÊ¿ÕÐÌäáÚîîæùöíûôêþðåÿóéÿôìÿåß騤¶ic¥K@²PC¿YKÇ[OÃQG¾E=Á@:ÊE@Ê@=ÌA>ÏA=ÐB>ÐB>ÑC?ÐC<ÑB<Ò?8ÒB:ÏD=ÇG>»D<­A7Ÿ=4”<2‹;0‡=2‚<0€</~<.~<,~<.~<.~:-}9,|:.|:.{9-{9-y9/x8.|<2|<2z;2z;2y:1y:1x92v:2n8.k9.j8-i7,i7,h6+g5*f6*g7+f8+f8+f8+f8+d8+f8+f8+j:0g7-e5+e5+h8.h8.d4*`1'a2(b3)\0%\0%\0%V*a7+zVJqXQ_NFL?6@6,<6*:8,9;.9=/19,2:-4</4</5;/69.58-47.39/2:/2:/2:/19.19,19,/:,-8*-8(.9)/<+2?.5B18E4:G6;F67B24:,/5'.2$.2$04%/6$19$FR:buWvŽj{šqq•g\‚QJp=Mo=Nn=Oo>Oo>Oo>Oo>Pp?Qq@TtCXxGZzI[{J]}L]}LWwFQmDAT>3C6&6)gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdShbRjcSjcSjcSkdTkdTkdTleUkdTleUleUmfVmfVngWohXlh]jnmz‚…“›ž«µ·ÃÍÏÚåçë÷÷òþüóþúõÿúùÿùùÿöúþð÷ûìõöæñôáóüçóüçóûäóùßñõÚéìÍ×Ö·Ää°¯‘qdfNMP;DK9EN=EPBIQF{{qžš‘Ľ·×ÎÉÝÓÑåÛÙðëçú÷òïîéûûóÿÿöÿþóÿøîÿóéÿæÝÿ×Ëג‚·dR¢K8±TC½WI´E:¶@6ÈLDÄC=ÈC>ÊC=ÍD>ÍD>ÍD>ÌC;ÍB;ÐA;ÑB<ÌE?ÄE>¶A:©>6<5•<4‹;2‡=2‚<0€</~<.~<.~<.~<.~:-}9,|:.|:.{9-{9-y9/x8.{;1{;1y:1y:1x90w8/w81t91l8-j8-j8-i7,i7,g7+g7+g7+f8+f8+f8+f8+d8+d8+d8+d8+h8.f6,e5+f6,g7-g7-f7-d5+a2(d5+^2'[/$`4)`4)g=1tTIUG>C?695,86*99/79,48*37)4:.4:.4:.4:.58-47,46+36+28,19,19,08+08+08+.9+.9+.9)-8(-8(.9)/<+2?.4A06C2:E48C28?/4;+37)15'04%-4$07%HT>dw[tŒls‘keˆ^RwKBh9Ij;Ll;Mm<Mm<Ll;Ll;Mm<Nn=TtCXxGZzI[{J]}L\|KSsBLf?:K8.;1#.&feSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSgdShbRjcSjcSjcSkdTkdTkdTleUkdTkdTleUmfVngWohXohXmi^fjiluz„’¡¬°½ÈÌÒÝßäððïûùòýùóþöøÿ÷ùÿôûÿðùýìøùçö÷åõöèö÷éùöçûöãüôáöëÕæ×ÀÔƬ½±™›“|tmZXWENPBQWKYcZcmb•˜±±¥ÓÏÆçÞÙñãâøèèÿññÿöõÿýúüû÷÷úóöùîúüïÿÿóÿÿñÿûêÿåÐºoZ§VC¬SAµQB¸L@¿JAÂG?ÅF?ÈE=ÉF>ÉD=ÉD=ÈC<ÈB9ÐC<ÏD=ÊE>¿C;³>7¥:2›:3”=4‹;2‡=4‚<2€<1~<0~<0~<0~<0};/|:.|:.|:.{9-{9-y9/x8.z:1z:1x90x90w81v70s7/r7/l8-j8-i9-h8,h8,h8,h8,f8+f8+f8+d8+d8+d8+d8+d8+d8+f6,g6/h70h70g80g80i:2k<4d8/i=4b6-`4+nC:vKBsH?kMCB9047.25,7:/8;247,14)25*58-47,37)37)35(35(46)37)/7(-8(-8(-8(-8(-8(-8(-8(.9).9)-8(-8(.9)/:*0;+1<+7C/7C/:A/9@.9=.59*15&,3#18&IU?cu[m„geƒ_WzRHmD>c7Gg8Li9Nk;Nk;Li9Kh8Mj:Ol<UrBXuE[xH\yI^{K\yIQn>F`95C2)4, )$feSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSfeSgdShbRicSicUjbUkcVkcVkdTleUkdTkdRkeUlfVmgYnh\mi^jidjpplvxŠ¤®°ÂÌÎÔÞàãíîñûúóü÷öýö÷þöùÿóüÿòûÿðüÿîüýíúøìýùîÿùíÿúêÿøèÿòßîßÈÝη¾²šŸ—€zua`_KVZK\dWkwk|†{¦¬ ½¿´ÙÖÍìåßøíëÿòòÿóóüòóý÷÷ùúõøý÷úÿùõÿôóûîõýîÿÿñÿùèÿôãü˺ºošRC§TF´TH°D8¾I?ÃG=ÇG>ÉF<ÈE=ÇD<ÆC;ÅB8ÌF;ÌH<ÆH<¹C9ª>2Ÿ:0•;2>3‰<2„>4=4€<1;0;0€:0;0};/|:.|:.|:.{9-{9-y9/x8.z:1y90x90w8/v7.u6-r6.q6.l8-i9-i9-i9-i9-h8,g9,g9,e7*e7*e7*e7*e7*e7*e7*e6,f5.j81l:3k:3i9/i:0l=3nB7kA5mE9c=2b>2xVJ…cWwWLbLA<902:/2:/5=24:0.4*06*6<025*25*04&04&04&15'37)28*.6),7),7),7),7)-8*-8*-8*.9+.9+-8*-8*,7)-8*-8*-8(4?.6A0;D3<E4;B26=-07',3#2;(JVBbqZg{`]vVOmIEe@@`9Ed8Kh:Mj<Mj<Kh:Jg9Kh:Nk=TqCWtFYvH[xJ]zNZwKMj>BZ81?2'0-(%heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVgdSheVhdXjdXkdZkeYldWmfVe`MjeQnkXmk\hfZed_qqo€ƒˆ’•Ÿ ª´¶ÁÉË×Üßèîîó÷øøúùüþûýÿúýÿùüÿöúýòøûðùýïúþðøúïøúïøøìûùêþüçøöÝèäÇÔÓµ»ºœ˜™zptY]eMbkVq~lƒ‘‚œ´¹²ËÌÆãâÝïîé÷óðû÷ôüøõû÷ôÿþúþÿúýÿúýÿúýÿùýÿùýÿùýÿùùûöÿÿúÿüöÿêäר  `V™E;³ND¿MBÂF<ÄD9ÊH;ÎKAÎKAÉI>ÆH:ÅH6ÂI6ºJ6±H5¥F4šC2C3‹C5„B4B3€@4€>2;1ƒ9.…8.ƒ90~:/}:1|90{8/x8.w7-v6,v6,u7,v8-v8-w9.t8-r6+p4*m4)j8-g8.g8.g8.f7-f7-g7-f6,f6,f6,h8.h8.h8.g7-e5+e3*g2,j5/l7/k90i9-i;.i=0hA2eA1iJ8dI8u^N|kYˆxh‹~nQK=79,3;.2:-2:-19,19,19,19,17+06*06*06*/5)/5)/5)-5(.6+.6+.6+.6+.6+.6+.6+.6+.6+.6+-5*-5*-5*-5*.6+,6+0:/2>26@58C58C57B24?/3>-3?+DQ=WeN[kQPcGDW9@U6@X6Ic>Ie=Jf=Ie<He9If:Kh<Mj>SpDWtHZvM\xP\xRVrLFa>9P6$1($,.!)+heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVfeSgeVhdXhdYkd\kdZldYlfVidPjhQlkWjk[fh]jli{‚Ž“™¨°³¸ÂÃÎÖØßçéïóôøüýÿÿÿÿÿýýüøþýùþÿùýþöúýôøûòöüðöüðöùîöùî÷ùë÷úçøúäñôÙßâÃÌЯ´¸—‘–virUXbG_kUtm‹™ˆ›§›¼ÁºÒÓÎçèãòóî÷øóúûöûü÷úûöþÿúþÿúþÿúþÿúþÿúþÿúþÿúúÿþôÿÿóÿÿûÿþÿüúÿîèïÀ¸Á~užL@®L?¹K>ÂL@ÅK>ÁE9»?3¾@4ÁF7ÂG5ÁJ4»L8±J7£F4˜B1D4‹F7ƒD5C6€@6€=4€<1ƒ:1ƒ90‚90}:1{;1z:0y9/w9.v8-v8-u7,s7,s7,t8-t8-r8,q7+o5)m4)j8-h8.h8.g8.g7-g7-g7-g7-c3)d4*e5+g7-h8.i9/j8/j8/i70i70k90j:.j</i>.hA0gD1fG3jQ;fQ>ufQ~vcŠ…q„ƒqIL;39+2:-2:-19,19,08+08+08+08+/7*/7*/7*.6).6).6)-5(-4,-4,-4,-4,-4,-4,-4,-4,.5-.5--4,,3+,3+-4,.5-,6--7..:01=34@65A56B45B14A05B0BO;P^GQ_HHV=AO5BP6EU8Ia?Ie?Ke@Ie=Hb;Fb9Hc:Hd;RmDVrJ[uN[wQ[tTRmNB[=4J5$1*$,/"*-heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVfeSgeVhdXhdYkd\kdZkeYlfVmhTkiRkjVijZjlauwtŽ’•§¬²¾ÆÉÒÜÞêòôõýÿüÿÿüÿÿÿÿÿþþüýüøÿþúÿÿúýþöøûòõøíñ÷ëñ÷ëóöëôøêõøçô÷âôöÞìïÔÛܽÆɪ®±’’—yuy`kpYt{iˆ‘€¨š¬¶«ÌÏÈÞßÚïðë÷øóûü÷ýþùþÿúüýøþÿúþÿúþÿúþÿúþÿúþÿúþÿúýÿüùÿÿùÿÿýÿþÿüùÿúôÿ÷íýÑÄן’M=£RA¨K:¥?0·G9ÎZKÏUH¼A2ÃD5ÅG8ÃK=¸H:©C5?3•A6‘D:‡A9„A8€@7~?6~>5}=4}=4}=4z;2w;1v<1u;0u;0t:/r9.r9.q8-q8-o9-o9-n8,m7+l6*k5)l8-j8/j8/h8.i7.h6-h6-h6-e5+e5+e5+f6,g7-h9/j:0j;1k<4i:2i:0h</i?1iB1iD2eF2eJ5jV>iXDujT…~k‘z~mCF528*19,19,08+08+/7*/7*/7*08+/7*/7*/7*.6).6).6)-5(-4,-4,-4,-4,-4,-4,-4,-4,.5--4,,3+,3+,3+,3+-4,,6-*4+*6,,8./;12>24@26C27D37D2?L8ES<BP9;I08F-=K2CS8I^?Hb=Kc?Jd=Ha:E_8F_8E_8QjCVpI\tP\vSZrRNgI=T8.A-%0*$,."*,heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVfeSgeVhdXhdYkd\kdZkeYlfVkiTkkSlkWkn]nqf~ƒž¢¥¹¾ÄÄÌÏÛåçóûýùÿÿúþÿúþýüüüúúøþýùÿþùþÿ÷úüñô÷ìîñæèîâçíßíñâïóâñôáðóÞðòÚèëÐØؼÇÇ«±±•ž¡†’|“€›Ÿ©­Ÿ¶¼²¿Ä½ÛÝØéêåö÷òûü÷üýøþÿúþÿúüýøþÿúþÿúþÿúþÿúþÿúþÿúþÿúþþüúúúÿÿÿÿþÿõñîûôìÿýñÿûíÿôäë°¶‚m“P= O<µXGµN?´D6ÀH:À@3ÇE8ÅI?¼F<®?6¡<4˜>6•B<Š?:…@;@:|@8z?9x?6x?8x?6v=4u<1s=3s=1r<2r<0p<1p</o;0o;.l:/l;-k9.j9+i7,i7,l7/l7/l7/l7/k6.i7.h6-h6-k;1j:0h8.f7-e6,d8-e9.f:/j?6f>4c=0e@0hC3iF3eF2aF1aK4hV>i]GrjS…ƒnyor_9=,17)08+08+08+/7*/7*/7*.6)08+/7*/7*/7*.6).6).6)-5(-4,-4,-4,-4,-4,-4,-4,-4,-4,,3+,3++2*+2*,3+,3++5,*4+)5+)5++7--9-0<03?15B15B19F4;H47E.2@)1?&7E,=M3DY:E];J_>Ia=J`<F^:H^:F^:RhDWoK`uT^vVZnRKbF9M4(9'$/)#+-!)+heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVgdSheVhdXhdYkd\kdZkeYlfVjhSjjRmlXnq`svkƒˆ„¢§ª¼ÃÉÍ×Ùåïñùÿÿùÿÿùýþúþýÿÿÿýýûÿÿûÿÿúüýõ÷ùîîñææêÜàæØÞåÕæêÙéîÚìïÚìðÙìîÖèèÎÚØ¿ËÉ°¿½¦¶´Ÿ±®›µ³¤¾¾²ÈÈÀÑÒÍÙÚÕèéäñòíúûöüýøüýøþÿúþÿúüýøþÿúþÿúþÿúþÿúþÿúþÿúþÿúÿþúÿýýÿûþþúùúûöüÿøüÿöþÿóÿÿíÿþêõßÈ쇛bN“J7¤M:´P@»K=¾@4ÅB:ÅF?¾C<±>9¥9697˜=:‘=;Š?<ƒ?<|?:w@;tB9qB:qB8p>5r>3r=5r>3p>5o=2o=4o=2m=3m=1k<2j</i:0h:-h9/i9/n70p70o6/m6/m6/k6.i7.i7.k;1i:0h9/d8-c9-d:.e;/d<0cA5bB5bB3bE3cF4cH3aH2]I1]M4bV<iaJrnU‚„nƒˆqYaL29'08)08+08+08+/7*/7*/7*.6)08+/7*/7*/7*.6).6).6)-5(,3+,3+,3+,3+,3+,3+,3+,3+,3++2*+2**1)*1)+2*+2**4++5,*6,)5+)5+*6*,8,/;-1>-0=,3@.5B03@,0=)/=&3A*6F,=O5?S7EX<H]>I\>I^=J]=J_>SfFZoPbuY`tYYkSI\F5G1$5%"-'!*) )(heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVgdSheVhdXhdYkd\kdZkeYlfVigRjjRlnYnq`pvj€†‚œ¤¦·¾ÄÕßáí÷ùùÿÿùÿÿøüýüÿÿÿÿÿýþùÿÿúÿÿ÷úüñóõèéíßâæ×ÜãÓÛâÐãèÒæëÔéíÖéëÓëëÓèæÍÜÚÃÒÍ·Ò͹ÌƶÉõÍɾÖÑËÞÛÖéåäïîìòóîøùôýþùüýøüýøþÿúþÿúûü÷þÿúþÿúþÿúþÿúþÿúþÿúþÿúÿýúÿûúÿ÷øÿýüþÿýøÿûòÿ÷îÿòôÿõïöäÿÿìÿíØ˧‘¢kW¡ZFªXC­L;¸D7¿C9ÀE=»D<³@;ª=:¢:7š;9”<;Œ><…?=}@;wB:rC9oD;oE9o?5p>5p>5p>5o?5o?5o?5o?5n?5m>4j>3i=2h<1h<1h<1k;1n70q60p70o6/m6/l7/i7.h8.f7-d8-d:.b:.d<0d>1e?2cB3^B4^G7bK9dN9aL7`L4]K3\L3]Q7`Y=lhOssY€„mt|dEN92>*19*19,19,08+08+/7*/7*/7*08+/7*/7*/7*.6).6).6)-5(,3+,3+,3+,3+,3+,3+,3+,3++2*+2**1)*1)*1)*1)+2*+2*,6-*6,)5+(4*)5)*6*+7+-9+,8*/<+2?.3@.3@.2?+2?+3A*6E.8J0>P6BU9EX<GZ<J]?L_AReG[nRbtZ_qYVhRFWD2C0#1"#.("+* )(heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVgdSheVjdXjcYkd\kdZkeYlfVljUkkSjmXjn]kqey{”œž­¶»ÑÛÝéóõùÿÿ÷ÿÿöúûüÿÿÿÿÿøùôÿÿúþþöøúïðòåèìÞãçØßæÖßçÒäêÐæìÐèìÑçéÑèèÐæäÍàÛÇ×оÙÒÂØÐÃÜÓÊåÜ×íãáòéêùðñüø÷úùõýþùÿÿûýþùüýøÿÿûÿÿûüýøþÿúþÿúþÿúþÿúþÿúþÿúþÿúÿýúÿø÷ÿùøÿþýüÿýõÿýëþøðÿýñÿûõÿøñøèÿüêÿûèôιªwbŽP9©[E±M=¸J;·I<µI=´G@¬C=¤>9™:4•<8Ž=9ˆ?9@9xB8rD7oE9oE9o?5p>7p>7p>7o>7o>7p?8p?8pA9o@8k?6j>5i=4i=4j>5m<5n70q60p70p70m6/l7/i9/g8.c9-a9-b</b>0d@2bA2aA2^C2WD3]N;eVAfW@bS<[O5[O5\R7\W:ZY;mmSrv[yicmT1?(5B.0;+2:-2:-19,19,08+08+08+08+/7*/7*/7*.6).6).6)-5(,3+,3+,3+,3+,3+,3+,3+,3++2**1))0()0()0()0(*1)+2*+5-*4,*4+*4+*4++5*,6+,7),7).9)2=-5@07B26A05@/3@,0?*4C,9H1=M3@P6DT9HX=K[@RbG[kQapY^mXSbODRA1?.!/"%0*$-*"+(heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVgdSheVjdXjcYkd\kdZkeYjgVmlWkmUilWelZem`qzu—˜§°µÊÔÖâìîõýÿôüþ÷ûüüÿÿÿÿýøùôÿþùýýó÷ùìïñãèìÝåéØãêØåíÖæìÐèïÐèíÏæéÎææÎåãÎáÚÈØÑÁ×ÍÁÜÒÉéÞÚøíëÿóõÿõöÿôøþøøþýûÿÿûÿÿûþÿúýþùÿÿûÿÿûýþùþÿúþÿúþÿúþÿúþÿúþÿúþÿúÿþúÿþúÿþúüû÷øüûøÿÿôÿÿðÿÿëþúïþ÷ûÿøÿÿóÿõåÿòÞÿãÌ×­•¨pW¦XB¬Q>«N<¬O>¯OA¬L@¡C9™<4”=4Ž>5ˆ?6€@6zA6sC5pE5oE7o?5p>7p>7p>7q?8p?8p?8p?8rA:pA9n?7k?6l=5j>5k?6n=6o81p70n70m80j8/i9/h9/e;/d<0c?1bA2aA2_B2]B1[@/VA.SF3ZT>f^Gg`F`Y?YR6XS6[W:XW9WX9imRqw[u}eUbH%36C/1<,3;.2:-2:-19,19,19,19,08+/7*/7*/7*.6).6).6)-5(+2*+2*+2*+2*+2*+2*+2*+2**1)*1))0()0()0()0(*1)*1)(2*)3+)3**4++5,,6-,6+,6+,6+.9+2=/6A18C59D47B25@/.;)0?*4C.7F/;J3?O5DT:HX>QaGYhQ_nY[jWP^MAO@.<-!-#(1,&/,#,)heVheVheVheVgdUgdUgdUgdUifWifWifWifWifWifWifWifWjdTkeWkeYkdZle]le[jfZkhWpoZjnUinXjq_hsenys…‘‘ž©­»ÄÉÏÙÛçïòóûýúþÿûÿþþþüýþùÿÿúýýóøúíõ÷éòöåïôàêòÝçï×ïö×ëòÑêïÑéìÑææÎÞÛÈÜÕÅÜÔÉßÕÌçÙÖñãâøììþòôÿöúÿùýÿüýÿþüþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúÿþùþÿùþÿúýÿþûÿÿúÿÿøÿÿúÿÿüÿÿþÿúÿüóÿûïÿýìÿûäÿóÛÿéιf¥bH•O6ŸT=¨ZF£RAžK; M?=1‹?1†>0~?.w>-s@-sD2tF6p>5r=7s>8s>8u@:sA:r@9q?8r@9q@9q@9o@8p?8n?7n?7n=6j8/m80k90k;1j;1g=1d<0b>0`?0^>/bG6aG6T=+N9&L7$@1WQ;OO7KI0ML0[Z>gfHbaCUT6WX9]`AosXw€ccmT=J0,:#1>*1<,3;.3;.3;.2:-2:-2:-19,2:-19,19,08+08+/7*/7*/7*/6./6./6..5-.5-.5--4,-4,+2*+2*+2*+2**1)*1)*1)*1)+5-+5-*4,*4,*4++5,+5,,6-(2)*4).8-2=/7A6:E7=H:>I;5B04A/1>,/<(1?(7E.AO8HV?N\EUcL[hTXeSKXG:F8.:.(4*&/*%.)%.)heVheVheVheVgdUgdUgdUgdUifWifWifWifWifWifWifWifWjdTkeWkeYkdZle]le[jfZjiWnmXimTjoYjs`itfo|u†”” ®±½ÈÌÒÛàéñôôüþúþÿûÿþþþüüý÷ÿÿøýýóùûîöùèóøäðõßéñÚæïÔéðÏåêÊäçÊææÌãáÌÞØÈÜÔÉÞÕÌåÚÖíßÞõççûïñÿôöÿ÷ûÿùýÿýýÿþúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúýÿúûÿùûÿùýÿüýÿþþþÿÿýÿÿýÿÿüÿÿýÿÿýýÿûøÿýóÿÿñÿÿìÿúâÿòØÿìÐß²•§uZ‘[A–ZB›ZDšVAšSAŽG5‹G4‡F4‚E2}D1wC.qA-p?.r@5q?8t?9sA:t?9o=6p>7rA:p?8p?8o>7n?7m>6m>6m>6l=5k<2k<2j>3j@4h@4eA3cB3aA2cH7[A0\G4\J6N=)E6!F7">6KK3GK2CG,BE(JM0Z[<abC_`AWZ;cfGsw\rx\WaH7D*/;%7C/5=.5=05=04</4</4</3;.3;.4</4</3;.3;.2:-2:-2:-19,18007/07/07//6./6./6./6.,3+,3+,3++2*+2**1)*1))0((2*'1)'1)'1)'1((2))3*)3*+5,+5,,6-.8-2<37A6<F=?I>9F58E35B02?+1>*4B+:H1?M6JXAQ_HWdRUbPJVH;G90<2*6,(2*(1,'0+gdUgdUgdUgdUheVheVheVheVifWifWifWifWifWifWifWifWjdTkeWkeYkdZle]le[jfZjiWmlWhlShpYiuaiugq~w‰——¥³¶¿ÊÎÔÝâêòõõýÿúþÿûÿþþþüüý÷ÿÿ÷üüðùûíöùèò÷ãîóÝçïØãìÑàçÆÜáÁÛÞÁÞÞÆÞÛÈÝ×ÉßÖÍãÚÕîââóçéúîðþôõÿ÷øÿøùÿúûÿüüÿþúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúýÿúøÿúøÿüûÿüýÿþÿüÿÿûÿÿùÿÿùÿÿúÿÿûýÿúøÿþøÿÿôÿÿñüÿìüúãÿóÛÿøàÿíÓâÀ§«ƒj‡YA…S<’[F–]IŒS?F2u>)r>)s@+tD.vE4p>3l<2o=4sC9sA8n>4qA7wH>m>4m>4l=3j>3j>3i=2i=2h>2g?3f@3fB4dC4cC4aD4_D3[D2aL9TB.YJ5_S=RG1C;$D<%EA(AE,@I.?E):A"=B$LO0\_@fiJ^aBmpQuy^flPHP83=$4=(=F37?07?27?26>16>15=05=05=06>16>16>16>15=05=05=05=03:23:23:2291291180180180/6..5-.5-,3++2**1)*1))0((/('.''.''.'(/()0)*1**1*/6/-4-+2++2*.5.4;3<C<>H?>J<=K::I66E02A,0?(2A*3B+CR;IXAP_LO^KGUF:H91>4,8.+5-*4,)3+gdUgdUgdUgdUheVheVheVheVifWifWifWifWifWifWifWifWjdTkeWkeYkdZle]le[jfZjiWkmWimThpYht`gsem|u‡—–¤³¶¾ÉÍÓÜáéñôóûýùýÿúþýþþüýþøþþöüüðùûí÷úçò÷áíòÛåíÕâèÌÛàÀÖÙºÖÖ¼×ÖÁÜÖÆÞ×ÍäÛÖëáß÷íîûðôÿõùÿùüÿúúÿúúÿüùÿþúÿþúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúýÿúøÿüøÿþúÿþýÿþÿýþÿûþÿúþÿùþÿùüÿùùÿúùÿýùÿÿøþÿöúÿóúþïÿÿíÿüéÿñÝÿðÛÿðÚñÒ½³zxR;rI3rH0uH1yL5|O:xK6nA,d7$xM=pD7nB7qE8nB7f:-c9-g=/h>2f?0f>2f?0f>2e@0e?2d@2aA2_B2_D3^D3^D3ZE2WE1UD0WH3NB,\T=nhPb^ELJ1IG.IL1;D)=G,>G*9B#;@"DI)UX9adEjmNuxYtw\Z^C?D-4<$8@+<C18@17?27?27?26>16>16>16>17?27?27?27?27?27?27?27?26=56=55<45<44;34;34;34;329118007//6.-4,,3++2**1)*1*)0))0))0))0)*1*+2+,3,.5.,3,*1*)0),3,1817>79C:?K=?N;?N;<K67F/2A*0?(/>';J3AP9GVAHWDBP?9G81?2,9/+6.+5-*4,fcTfcTgdUgdUheVheVifWifWifWifWifWifWifWifWifWifWjdTkeWkeYkdZle]le[jfZjiWkmWhnThrZfs_cqbixq‚•“¢±´½ÈÌÑÚßèðóòúüøüÿúþýÿÿýþÿùÿÿöÿÿòüÿîúýêöûåïôÝåîÓâèÌØݽÒÕ¶ÑÑ·ÓÒ½ÚÔÆßØÐéáÞòéêýô÷ÿ÷üÿûÿÿþÿÿþüþýùýþøþÿùþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúýÿüúÿþúÿÿûÿþýÿþÿþüÿýúÿüúÿüùÿù÷ÿú÷ÿû÷ÿýøÿþúþÿûûÿúüÿø÷ôíÿþöÿýôÿýñÿûëÿ÷çùçÓéӾʹžµ™ƒ“t_vVAiH5kH5pM:sP=kF4gB0fA1jE3lH8nK8nJ:mL9bA0bA.bA0aB.aB0bC/bC1`C1\E3[F3ZH4YH4XG3TH2RG1PH1NH0MI0ecJ~~dpsXTX=IM2FL0;E*<F+>G*>G(@E'BG'JM.ST5vwXyz[nnTRU:@B,>B+>A.9=,8>07?27?27?26>16>16>15=06>16>16>17?27?27?27?27?28?78?77>67>66=56=56=56=54;34;33:229107//6..5--4,+2++2+*1**1**1**1*+1-,2.+1-+1-*0,+1--3//511731;2:H9;L9>O<=O9:L45G/1C)0B(3E-8J2=O9?P=<M;5F6.>1,:-*6,+5,*4+fcTfcTfcTgdUheVifWifWifWifWifWifWifWifWifWifWifWjdTkeWkeYkdZle]le[jfZjiWkmWhnThrZer^ao`fwo‚•“ ²´ÀËÏÓÜáéñôòúüøüÿûÿþÿÿýÿÿúÿÿöÿÿóÿÿñþÿìùþçòøÞæïÒâèÌÕÚ¼ÏÒµÌÌ´Î˸ÕÏÃÞ×Ñëãáöíðÿ÷üÿúþÿýÿÿþÿÿÿýüÿøýÿ÷ýÿ÷þÿùþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþþüýþÿûÿÿýþÿýÿþýÿúþÿùþÿ÷þÿõüþóüýõýýõüý÷ýýûþþþþþÿþþÿûùþÿûÿþúûúõòýøòÿþôÿþñÿþíÿúèÿòÝôãÏæѾѺ¨¸žƒrrax\Gz^IqU@aE0W;&W<'Z?*X=(_D/^E/_F0_F0_F0^H1^H1]I1\L5YM7YM7XM7UM6SM5QM4NL3MK2OR7ilQ|€elrVQZ=DM0=H*9F*9F*?H+BK,DI+CF'GH)LM.xwYsrTecJSS9KJ5HJ4DE3:=,9?18@38@37?27?26>16>16>16>16>16>16>17?27?27?27?29@89@89@88?78?77>67>67>67>66=56=55<44;33:2291180-4--4-+2+*1*)0))0))/+)/+)/+*0,,2.-3/.40.40-3/+5-2@/5G1;M7=O7=O79K17I/5G-2D,5G/9K5;M7:K94E3.?/,:-*6,*4+)3*ebSfcTfcTgdUheVifWifWjgXifWifWifWifWifWifWifWifWjdTkeWkeYkdZle]le[jfZjiWjlVhnThrZfs_`qahyq„—•¥·¹ÆÑ×Úãèíõøõýÿùýÿûÿþÿÿûþÿ÷ÿÿõÿÿòýÿïüÿê÷üåîôÚâëÎÝãÇÓÖ¹Ì̲ÇÅ°ÇijÏȾÚÒÏéàáöíòþ÷þÿùÿÿýÿÿþÿýÿúúÿøúÿõüÿöýÿùþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþþüÿýÿþýÿýþÿûÿþúÿúøÿ÷÷ÿó÷ÿóöÿòøÿóúÿ÷ûÿùýýûÿýÿÿüÿÿûÿûõÿýùÿÿýÿÿýÿÿýÿüýøúúò÷øêÿÿñÿÿïÿÿíÿþíÿüìÿõæÿíßüéØøãÎüæÎàʲ¤Žvt^FdP7aM4\H/]I0[J0\K1\K1]L2\L2\L2\N4YO6VP8VP8VP8TP7PN5NN4KN3LO4NT8`fJenQS^@DO1@K-9F(=J.<I-BK.FO0GL.FI*NO0YX:qmPjfI`\CXV=VT?SR=KJ8BC3;A3:B5:B59A49A48@38@38@36>16>17?27?28@38@38@39A49@89@89@89@88?78?78?77>68?78?78?77>67>66=56=56=538227105/.3-,1-+0,+0,+0,+0,,1--10-10.21-10,0/)3+*8'-@*5H2:N5;O6:N39M28L04H-5I08K5:M79L95H5.@0+<,*6*+5**4)bcQbcQcdRdeSefThgUigXigXhfWifWifWifWifWifWifWkeWjdTkeWkeYkdZle]jf[jfZjiWhjTglUhrZgt`aqdh|sˆšš©»¿ÍØÞßèíòúýøÿÿúÿÿûÿüýÿùýÿôùùíùúìùúè÷úåòößêîÕÞäÊ×ÛÀÒÒ¸ÌDZĿ¬Ä¾°ÊûÕÍÊæÝàóêïþ÷ÿÿúÿÿýÿÿÿÿýÿüúÿùúÿ÷ûÿøýÿúþÿúþþüþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúÿþúþÿúÿþúÿþüÿüÿÿýÿþþÿýÿþúÿúøÿ÷÷ÿõöÿõøÿöùÿöûÿøþÿúÿþúÿüþÿûÿÿúÿÿüÿÿüÿþüÿþýÿüýÿùûúùþøüÿú÷úñùûðúúîýùíÿùëÿúëÿýíÿþêüðØÿûáêܝt^P5PB'WI.WI.ZL1[M2YN2ZO3ZO3ZQ4ZQ4XQ5SO4SO6SO6QO6OO5KN3JM2HL1IO3IR5R[>OZ<>I);F&@K+?J*CN0DM0HQ2KR3IN0JM.YZ;jiKedH_^B]Y@][B][DXV?PN9KJ6@C2>B3>B3<B4;A3;A3;A5;A58>27?28@38@39A49A4:B5:B5:B79A69@89A68?78@58?77?49@89A69@89A69@89A69@88?77>66=64;318005./4..3--2,.3/-2.,2.+2++2++2*)3((5$#7'>!/F)6M18O3:N39M29M26J/7K09M4;O6:M76I50C0,=++7+,6++5*\fN\fN^fO`hQaiRejTgjWgjWfgUgfTheTgdSjdVjdVkeWlfXlfVjgXjfZjf[jf]ieZhdXefTehSdlUenYan\aqfq„€•§«¶ÇÎÑÜàáéìï÷ùôýü÷ýùùþøüÿöûÿñõ÷éøùçö÷åñðÜîíÙëêÕàßÊÔÒ½ÖϼÉÁ®½´¥¼²¨Â·±ËÁÀÛÒÕêáæùòúü÷þþûÿþýÿýþÿùýüùþúúÿûýÿüýÿüýÿþýÿüþþüþþüþþüþþüÿþúþýùþýùþýùÿýøþýøÿþùÿþûÿýüÿüûþüýþþüþÿýþÿýúÿùøý÷ýÿúûüöýýõÿþøÿþøÿúõÿøòÿúøÿúÿþûÿþýÿþþþýÿþýÿüúÿûúÿûýÿþþþþÿþüÿþùÿýòÿýíÿýêÿþæøôÛþúßåßŎ‰lTM0WN1TK,SJ+VM.VM.TN.TM0TO1SO2SO2QP4ON2ON2PO3PP6MP5JN5GM3DM2HQ6EN1AJ-=F'=G%AK(EL*EM(PV4OU3JO/HK,RU8bbFbbFZZ>YY?ZZ@\ZA[[AZZ>WW;UT8SQ8FE0ED0CB.?@.>?/=?1=?2=A3:=28>28@38@37B47B48C38C57B47B46@56A36@56A37A67B47A67B47A67B48B79D6;E:;E:8B98B97A66A36>14</2:/07/-4-.5.-7.,8*-:(0@&3F&6L&9V*8W+:V.9T3:S6:P9<O9=P:9K3@R8DX<EY=BY<=T82I/)<(*6(*4))3(YgMYgM[hN]jP`jRblTfkUglVghVfgUhgUheTkeWkeWmeXlfXjgVjgVjfZjfZjf]ieZgeXefTgjWemVfo\erahxnwŠˆš¬°·ÈÏÐÙÞàèëï÷ùõþûøþúøÿ÷ûþóøüíöùèöùæôóßìë×éæÓåâÏÚ×ÄÐ˸ɾ¬Á´¤¹­Ÿ»®¥Á¶²ËÁÀÚÏÓåßãúóûú÷þþûÿþýÿüýÿûüþùýüúþýýÿþýÿþýÿþýÿþþþüþþüþþüþþüÿþúþýùÿüùÿüùÿýøÿýøÿþùÿþùÿýúÿüùýüúþýûÿÿÿÿÿÿýýýûûûÿþýÿþûÿüùÿøð÷ìæïåÛõçÞûñèÿûýþüÿþþþþþüýÿúýÿúúþýúþÿýþÿýþÿÿýÿÿþüÿþõÿþíÿþèÿÿãöôÛûûááàĉˆjQL/RM/RL,QK+TN,TN,TO/SP/SO2QP2PP4OO3ON2ON2OO3MP3MO7IO5HN4EN3FP5DO1AJ->H&AH'EM(IO+JP*MS-NT0U[7\b@_bCY\?TW:TT:UU;VV<WW=WW=WW;VW8VU7UT6PL3NI3IG2FC0CB0?@0??3>@3:=28>29?38@37B47B47D37D37B47B46A36A36A36A37B47B47B47B47B47B48C5:E7;F8<G9;G=;G;;G9:G6;D39B18@37?46=54>52<30>-3C)7M)AX.Ea0No:Ln;Ih<Db>@Y<;S;:M:9L8<N6DW;I]ALaBJbBE\?9R5.D-)5'(2''1&YgMYgM[hN]jPakSblTglVglVijXhiWjiWjgVlfXlfXmgYmgYjgVjgVjfZjfZie\ieZhfYefTgjWckTdmZfsbm}s|š¬°³ÄËÌÕÜÞæéï÷ùöÿþøþú÷þö÷úïôøéñôãîñÞéèÔàßËÜÙÆ×ÔÁÏÊ·ÇÀ®¿´¢½° ¼°¢Ã¶­ËÀ¼ÕËÊâ×ÛêäèûôüûøÿþûÿþýÿüýÿûüþúþýúþýýÿþýÿþýÿþýÿþþþüþþüþþüþþüþýùþýùÿüùÿüùÿýøÿýøÿýøÿýøÿýúÿüùýüúýüúþþþÿÿÿþþþüüüÿþýÿþûÿýúúóëêßÙàÖÌëÝÔöìãÿûýþüÿþþþþþüýÿúýÿúúþýúþÿýþÿýþÿÿýÿÿþüÿþõÿþíÿþåÿþâøöÝûûáàßdžhOJ-QL.RL,RL,TN.TN.TO/SP/SO2QP2OO3OO3NM1ON2NN2MP3LN6JP6IO5FO4FP5DO1CL/BL*FM,IQ,OU1RX2QY0W_6iqJx€[pvTX]=KP2MP5NQ6NP8QQ9QQ7TR9UT8VU9WV:VR9TO9OM8KH5GF4BC3@@4>@3;>39?39?38@37B47B46C26C28C57B47B46A36A37B47B48C58C57B47B48C58C5:E7;F8<G9=I?=I==I;=J9?H7>G6<D7;C8=D<9C:6@73A08H.AW3Of<VrA^JZ|IUtHKiEB[>8P85H52E1<N6EX<MaEQfGPhHLcF?X;3I2.:,,6+*4)XfLYgM\iO]jPakScmUfnWhmWjmZjkYkjXkjXkhYnhZnhZnhZkhWkhWjfZieYie\ieZhfYfgUehU`hQajWeram}s{ŽŒ”¦ª¨¹ÀÆÑ×Ùàæìô÷ôýüöüøôûóòõêíñâçêÙáäÑÛÚÆÒѽÍÊ·ÈŲ½ª½¶¤Ã¸¦ÇºªÌÀ´ÖÉÀÞÓÏèÞÝóèìøòöýöþüùÿþûÿþýÿüýÿûüþúþýûÿþýÿþýÿþýÿþýÿþþþüþþüþþüþþüþýùþýùÿüùÿüùÿýøÿü÷ÿü÷þûöÿýúÿûøüûùüûùýýýþþþþþþýýýÿûúÿþûÿýúüõííâÜãÙÏðâÙüòéÿûûþüÿþþþþÿúýÿúýÿúúÿûúþÿüýÿýþÿÿýÿÿþüÿþõÿýìÿýäþýáúøßþþäáßƇ†jPK.RM/SL/SM-TN.UO/UP0TQ0TP3QP2NN2MM1NM1NM1MM1LO2LN6JP6JP6HQ6HR7FQ3FO2HR0LS2OW2V\8\d;grGs€T†“h›s„ŽikuRY`?RW9KO4KM5LL4LL4OM6QO6TR9VT;XS=VQ;RP;NK8JI7DE5BB6@B5;>39?39?38@37B47B46C26C28C58C57B47B47B47B48C58C58C58C58C58C59D6:E7;F8<G9<H><H<<H:<I8>G6=F5<D7<D9=D<8B96@73A09I/CY5Ri?ZvEZ{FWyFRqEGeA<U81I1.A.,?+6H0@S7J^BQfGQiIMdG?X;3I2/;-.8-+5*XfLYgM[iO^kQblTdnVfnWinXkn[klZlkYkjXliZliZnhZoi[liXkhWjfZieYhd[hdYhfYghVfiV`hQajWeral|rx‹‰Ÿ£Ÿ°·ÁÌÒÔÝâçñóñùûóù÷ð÷ðíðççëÝàâÔØÛÈÑмËʶÇŰþª½¸¤º³ Ê¾®ÓƶÜÐÄæÙÑíâÞõëêþó÷ÿúþþ÷ÿýúÿÿüÿþýÿüýÿûüþúþýûÿüýÿþýÿþýÿþýÿþþþüþþüþþüþþüýüøþýùÿýúÿýúÿýøÿü÷þûöýúõÿüùþú÷ûúøûúøüüüþþþþþþþþþÿûúÿüùÿûøÿøð÷ìæðæÜøêáÿõìÿûûþüýþþüþÿúýÿúýÿúúÿûúþÿüýÿüýÿÿýÿÿÿûÿþóÿýêÿüãüûßýûäÿÿèäâɊ‰mRM0TO1UN1UN1TN.UO/UP0UR1TP3QP2NO0LM.ML0ML0MM1KN1LN6JP6KQ7IR7KU:HS5JS6NX6RY8T\7\b>emD‘b’¦sž²š«~’¡x†•nq|ZZcDKO4KM5KJ5JI4LJ5PK5SN8UP:UP:TO9QO:OL9KJ8FG7DD8BD7<?4:@4:@49A46A36A35B15B19D68C57B47B47B47B48C59D68C58C58C59D69D6;F8<G9=H:;G=;G;<H:<I8>G6=F5<D7;C8:A96@75?64B19I/AW3Ne;Tp?St?RtANmACa=7P3,D,*=*)<(1C+;N2G[?OdEPhHLcF>W:3I2*6((2'%/$
\ No newline at end of file
diff --git a/testimages/testimgfst100.jpg b/testimages/testimgfst100.jpg
deleted file mode 100644
index 36d9b75..0000000
--- a/testimages/testimgfst100.jpg
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgfst1_2.ppm b/testimages/testimgfst1_2.ppm
deleted file mode 100644
index c17f310..0000000
--- a/testimages/testimgfst1_2.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-114 75
-255
-0/+10,32.63.83/83-:3-:3+72.61-61-72.61-61-61-61-4/+3.*3.*2-)2-)2-)2-)2-)/,%0-&2/(30)63,74-96/:70A;-E?1HB4JC3LE3LC2LD1PE3R?0fB2‚D5œE4²D5¾D7ÆF;ÊG?êCKëDKíCFó?B÷:>ó:?ä@?Ò>:µ@,´?+³@-°@,«@,¤A,œB*—A(‘@-‘>,‘>,>)”?+“>*•>*–?+“@8›?2®C1½D1¹A1¤G8xD6J7&:6584362/40-63.52-52-41,,1+.0+/0+2.+3.+3)(8,,G99sCCŽMK¥Z^¤p†Š©z‡¼ek›SGoNI`OL]jjr”ž¤˜•™ˆrsaWVDUQbUQb..,0/+21-52-52-61+61+81)61-50,50,61-61-61-61-61-3.*3.*2-)1,(4/+4/+4/+4/+2/(30)41*52+52+74-85.96/?9-B<0E?1G?2G@0H?.I@/MA1LC2[D2qD1ˆA/–;)¦<,³C5ºI;ÝADâDEìDDö?Aù:?ó<>áB>É@6±A-°@,¯?+®?,©@+¡A+™A)•A)?,?,>+<*>)>)‘?*”?+“A5žA2´C3ÂC4ÀF9£I>pD7F:*=85<7494173063.32-32-21,-2,-2,/1,10,40-5-+6,+>43`;2|F:PN’gxx£s¹Z_–NBrNCcMH_ON\ost†~…wjl^VWG[U_[U_,,*--+0/+10,30+30+4/)4/)3.+2-*2-*3.+4/,4/,4/,4/,3.+2-*1,)1,)4/,4/,4/,4/,41*41*41*52+52+63,74-96/<5+?9-A;/B:-C;.D;,F=.K?1FI6PH5aE0tA,‚;'’=)¡D2®M<ÏEBÖE@äB?ò<;ú9<ï=;ÖC;ºA0­@+­@,¬?+©>,¤?+ž?+–@)‘?)?,>+Ž=*Ž=*Ž=*<+Ž=,Ž?.”C2¥A2¼B5ÆA8¹D;›MCcC8:;-@85>63:5174/43.23-14-03,,3,,3,-2,/1,23.40-1-*50-N7)b=-qE<€ap|v¤ny»[`¢NF‚PAjJ@bFCXEHQRXVU\TQWKRXJ`XU`XU*,+,,*..,0/+/.*1.)1.)2-)1-*0,)0,)1-*/+(/+(/+(/+(1-*0,)0,)/+(1-*1-*1-*1-*30+30+30+30+41,52-74/850:3+<5-=6,>5,A9.C9-F<0K?3DE5LE3XC2f?.u<+ƒ:)”>-ŸF4»F<ÆH<ÚE>îA=ò::ç>9ÊD8©@*¦A-¦?,¦?,¤?- ?,š?,”?+>)>-Œ=,‹>,Š=+Š=-ˆ<,‰=/‹?1“E1¨B4¿C;ÃB<·PIš`UeUH>I;?80=6094.63,34.14-/4--4,-4--4--4-.3-/1,23./0+0/+</)K5([B;}l~ˆˆ¼z‚Íkn½OL“J=hD=_DBX<CMBLKLWOMYMR\QaZJaZJ,.-,.-,.---+/.*.-)/,'/,'-,*-,*-,*-,*-,*-,*-,*-,*-,*-,*.-+/.,/.,/.,/.,/.,0,)2.+3/,3/,2.+2.+40-62/81+;4.<5->5.>5,@6-C9/H;2G?4L@4T>3]9-m9,~8,Ž<.˜?1¦B3³E6ÉG:ÝD>çB>ÞE?¼D4ŸC, A-Ÿ@,Ÿ>+ž?-š=,•>+>+Œ>*Š=+‰<*ˆ<,…<+†=.…=/…?3ˆB6’G4ªH;¼E?»LE°f[—zjrtfM]PMI>A?3:7.33)34,03,-2,,1+-2,-2,-2,-2,02-02-12-12-7-6?42RID€|‹’”ʼnØqyÁbh¦TQpIKbLTa]klq‚zy‹m}p\j[a[Cc]E+/0+/0-/.-/.--+-,(,+',+',+)+*(+*(+*(,+),+),+),+),+)-,*-,*-,*.-+.-+/.,/.,0,)1-*2.+1-*0,)0,)2.+40-91.;30<41=4/=4-?4.A7.E80G:4J;4O:5U61c60u6-ˆ:.”<0šA3žA2®B5ÃE9ÑG=ÍH?±F6™B.šB.˜@,™>,š?-–=-‘>,Œ=,‰>+ˆ=*‡<)„;*ƒ;,;/=2ƒ@7…E;“K<£I@²LG®^U¦n“•z‹xao`PTCGI;=?224)01)./)./*/0+/0+/0+/0+/0+01,01,01,01,6+<711SUH‡‡Ž•²‡’¿€¶„š²‡–›|ŽŽv…€›Œ¬–Ž¦•€`q^ZV9`\?+/0+/0+/0-/...,--+-,(-,(,,,+++***)))+++++++++++++++*********+++,,,---.../+*0,+0,+.*)/+*/+*1-,3/.:20;31<41<20<3.=2.>3-B5/@72B94D95H70U7/h8.~<0Œ>1–?6“@2šC2¨D4µE9·G;§B6–@1“A,”?+”=,”=,’<-Ž=,‰<,…<+†;(„;*‚:+:+=1?5ƒD;…IAŽJ?JDªWQ¦uf™™}‰¨ˆ~šs~m\hRJVB=F58<.12*.-(/+(2,,0,)0,)0,)0,)/.*/.*/.*/.*7)841(Q\>~ŒrŽ›”Œ›ž¨ŸŸÆ«ŸÀ«žÂ¨´•‹°Ž´Š§ˆ}–yZnUST5UV7,03,01,01.0//10//-..,/.*...---+++*********************)))(((***+++---.../+*/+*.*),('0,+0,+2.-40/;23<42;31;1/;1/;0,<1-?2,:70983:94=82F3,Z6*q9*<+:3?3ŽB2“B/ž>.¨B6 @4•?2‘@+=+‘;*’<-<,‹<-‡;+„;*ƒ:'‚9(9*}9,~>2€A8„H@‰NHŒIAžPL«d^¥ˆvŽ¦„¯‰}ž†tfx^XgPHT@=C557,1.)1)'4*+2*(2*(0+(0+(0,)0,)0,)0,)9(232GW(vŠYŒ{Œ „°œÎ‰ Î§œË¡†µ‡„²ƒ†°‚‚¥}sjSgKIL/FI,.1(/2)12*12*44,63,52+30)7/,6.+2-*0,+.,---/+,0)*./(//(/.)-.*+-,*-.)./)./),.-.../.,2.+2-'6/'90'<2)<5+=6,=4-<3,<3.;0,;0.=20F/'97*.:,36-C+)Q'(Y2+]A3vB,†A1”>1“9.<,B/ŽC.Ž@,›82=1‚A/|B.ƒB0‡>/‡;-„;,x<1p;+s@-t<+€>0ŠH<‡MAQC˜LNm`zo}¢€Œ¦‰’¦‹‚£†kz`bX|VZnRQUDF?7:..6',2%,$-*'+*+),1(-3(03'10)1-*11-..-(HK8w‚`ˆ›nˆ£n†§p¶}»‡’´†«xƒ©zƒ¦|{˜ylƒiTcPCJ:;B203*14+34,34,66.96/85.74-92,61-4/+2.+0/-///..0*+-0*.0*./+,/+*/.*/.*/0*01+000//-0/+2/*61+92*=4+@6,?8.A8/?6-=4-<3.;0,;0.=20B4+O-,X&/L(,>1+;3(Q0'h.,i;,y=2‰<6‹;4‡>5„B4†B5ŠB6‹?/ˆ?.†?-‡;+‰:+‡:*<)w?(|<0v;-v>/x>0‚B6ŠG>‰K@ˆODšNN”l`ƒ‰m| |‚ª…‹ªŠ¡……•{a™j[ŽcZ‡`SvXE^H5G7)6,$/'4%(2&(1(+/+,+,.(..%/0$002,..+&DG6s~`|‘h›k}Ÿl€§r…ªt€§r~¦r~¥v}¢yx–tjeRdLAC58:,47.47.67/780991<92=:3<92>71:5194073073054232010.5,-3--3--2.+3/,21,32-23+33134/43.83-<5-?6-C9/D:0D<1D<1B90@7.>5.=4/=4/>50@72R/3\(4Q-1<8/4;+D6)Z/(`80q:59:ƒ98~:7}A9B;…@9~F/€B-†=,‹9+‰7)‚;)y?)qB(…2*’H=|:.E9|H;K>‚NAŠTHnoO{wZ†‚eŒ—w¤|}­ƒzª€uŸy„nl{Œlr…iesZUZFGA3</&9))3)',+&).('-)*,),*+1(+2*'?;0PUAhwXn‡_p‘br˜gqœgu jsŸjy¥ty£uwœss’pg€bNeHB>3:6+6906908919:2;;3?<5@=6?<5@;5?:4>93=84<94<94:7285091.91.72.72.52-52+44,55-540761<92?;2@9/C;0F<0J>0H>4H>4E;1A8/A81@70?82@93>93?<5A>7D=3F90I9,H;*F>)g3(z93Ž@<“D@‘HA‚?6‚90‹<5A5‚@4=2}=1y?1x@1|>1;1£3/å{¤TKL>nO;iU=sXCƒ`Lc€P“x]¾e_ÌbdÉv®–~– }‡wq‡•r~˜ss—qig_xXS]BBD.B5-:3)11'.1&41*<-*F',M$,ME2`[EfmNj|VhƒVi‹Yo–aq™dpœinšiw tu›tt–soŽle€_MgDLC:D;28;28;2:;3=>6==5A>7B?8C@9E@:E@:D?9C>8B>5C<4B;3B;3A81A81?80?80<71:7085.74-77/96/=90A:0C;0G=1L@2MA1MA5K?3G=3E;1B90@9/>:1?;2C<4I81[<7h=7o>7r>3y@5ƒI>ŽF7šG9?5ž>2¡I;£M<£G8¨D5 BC“99‡<7uA4lE4wC6‡10¬7@ÞPNÈFDÅb\‚M=h\BafFlgIjO¶eTâU[ú(Eÿ!Iÿ5SïQ`ågkàloÈuqÃzs¹€u²„u«s¨vm£d_›TR“HO>Bh86O0+J3-F3,J2.Q84]R2fc@qyRt†\m‰Yo]t—av™cr—nu—rz™y~›z”yl†ic~]NiFTNBQK?6907:1<=5?@8AA9C@9DA:DA:EB;EB;FB9EA8IB8I@7F>3D<1C;0D<1D;2C<2=90<8/96/85.96-:7.=9.B<0E=0J>.M@/M@/M?2L>3H<0E;1D<1@<1@<1@=4Q9/‘@FÕN_ÖR]™?7”H8¹LGÛ?J¡F3¦@1«;-­?0ªC0Ÿ;$ª=)±<+³6<¼?E¹HJ£LE~<.ƒ5+½BGá@RÚ83Ì73ÉTM•VEpbEopN€kL¥s\ÿKWý8Jÿ5Lÿ0Jÿ5Mý1Jÿ/Iÿ/Nÿ5Vÿ9ZüA`øEcõFeõCgüClÿHrÿXzïUqÛ\qªOXe/-O<.;@,9K3[Q.ieB|‚`}ŒkxŽjuixŽh~“lŸ…–¨’ ®¢°£”¢“wŠvaz\OjGVWEXYG58/690:;3>?7AA9DA:EB;FC<GD=GD;HD;KD:LD9JB5H>2G>/F=.E;/D</D<1A:0>:1<8/;60:8,<8-@:.D</G>/K@.NA0M@-N>1K=0G;/D:.B<0@<0@>2@@4u:4ÎU^ì<PÐ+2¤4&‘=#³?0×45¤@0ª</¶@6»G:ª?-¥:(¯:)ÁA4ÈB9Ý?>é:?ã?@ÑD=ÙSJÜ>=è08Ú6*Ð3*»80±bS€[A‚_C®cPÚg`ö=Eá<@×HDßQOêKOò9Gú1Dÿ4Jþ9Uü:Uÿ=]ÿBdþ?hüAlúAm÷@nÿ?hÿFkøKièZp¾Zdl71QA1FK4bUBƒ|j™›“’Ž›‘™¥›—“­¯¤Â¶ÉÉÁÍÉÈÆÄDZ³°‡“‡ey`RmLU^CW`E47.47.67/9:2==5A>7EB;GD=HE>HE<JF=MF<ME8ND8OC5NB2LC2JA2F>1D</B;1A:2=90<71;9-<8,A;/D=-I@/LA/NA.O@-N>1L</H:-C9-A;/?;/?=1@@4žCB¼HI¹//¶0%©9!C!™C"œ? Ÿ8/¥6-³>7´B8«?3§;.ºD:Á@:Ä>#ä2&ÿ $ÿ#)÷*'è3(å3'ë/&Ö7"Ô6*Ä;3ÃeYžaNŸ[HÜa\ñCLí?AÓ@9¾@4ÎA:òAIÿ4Gü6Gõ=Iý;Sý8Tÿ7Zÿ9cÿ>mÿ?pÿ@tùAuúGeÿLjþMjÿa|ïfxÏjr˜TQPHŽz³©§¼¼¾¸½Ã¼ÃÍÆÈÔÒÏÚãÙäìáÝíáãëÛåâÖàÇÂȘŸ˜n€j\uW^mN]lM11/442783893:;3=?4AC6DF9GI>HI9IK5MM3KJ6JIDRP[KI^OF7I@1IA4D<1B;3?82>95;63A62?74<;9===?@;EA6LA/O?(@B-J91N45F698?82E27A)QK3½L<¡6$Ÿ:& =&¡=&®?+³:)µ6%¡:)¡<*§B.©B/©>*´?-ÂD5¼8+Í6+Ë<,Ñ=/â7/ò/-õ/,ä7)Ó>(Ú8#Ñ9$Ã7&ÉE8ÕJCñSRöAFþ9BàDEÌA<²<0¨?,¿N>ÖI@ì?Aÿ<FøEKûEQû@SÿA[ÿEi÷>g÷CpüL{ÿKwíRrçYqÿYzÿHqôLpÀhvzmdº ‘ÝÕÊàäãäàîôÚõÿÞ÷ÿéöøôõûøó÷óðñëëéãåØÔÕ¶¶´Œ‘Š{…z„‹y~…s..,01,34/560891;=2?A4BD7IG;KI:NL7OM8NL@`_dttŒsv™jbwVO_JDPC>DA=>;83=90=9-96-<90><0></E?1K?/O?/P=,7A&<=+>:/;83?:4P20r*.¥:D¥7&¤9'¢<&Ÿ;$§='´A.½>-Á<-°?/«:*¬;+­<*³;*ÃE6Â=.Ã7*Ö6*Õ9,Ú:,æ3,ô.+ò/)à5$Î9!Ü3 Û:(Ñ;-Ì?5Å:3ØD@çIFå@>ÓC;Å?4²:,«<+µ=/ÕJCä@AüGNñ@FúFOýDTü@YÿDeÿFoÿLyýLzÿFxÿMuÿWxúPmøKiÖK`ӋŽdWGÁ—‹øâ×û÷öøõÿþíÿÿîÿÿöÿÿýüÿþùÿúöþõöýòöîåæÍÉÆ®±¨§­¡ª²£¤¬32-10+21,44,77/::0>>4AA5KE9PI9SL9RL<TPMqr†»’™ÐË€}´nm™\[{POaA@E:9497+3828:/?<+D>&I?&K@,JA2I?5O<6F93893/<24</M5+‰?>¨7=¨=+ªA,§>)©?)¬9&¹<*Ä:-Ì<1Ã?3½9-½9-¼8+Å=1Ê<0Æ2(Ï8-â1)â4)é4+ï0(ö/*ð/&Þ4#Ì7Ù2 ×4#Ö8,Ô@6ÛLDÌC9ÐL@ÅC6¿;,º;*µ:*¹?0½=2Ë@;ÛEFëLPí@DöGNýKYûE[ùB`þIlÿOxÿLvÿP|ÿOvÿTuùOiìMaÿz†ÿ›–KF¾vúÑËÿ÷ôþüÿýúÿÿùÿÿûþÿüùÿüöÿûøÿ÷øÿóõöìëàØÕÔÑÈÔÖÈËÕ;ÈÀ;;366.22*33)55+88.<<2??5LB6TH8WL:TJ>WRXwx—–œÖ¢«ö£¨ùœ ê—›ÛÅ€‚©hh„TSeKIVCET>=C>;2C>*CA*BC1?E;;DABASJM\MW`LXVLNCaJ:‰R>@,ª?+«@,¯B-°=*¶9'Â8+Ì8,Ô:0Î92Í81Ñ:3Î7.Ö>3Ò5,Ù;0Ù8.é1'í1(ô/)ö,(÷.'í3'Ü7$Ï9!Ñ9$Ï6$Ó7*Ñ7-ßJCç]SÈH=·?1¶>%¶<%¹:'Â>1ÈB9ÏF@ÓIGÓIGèDEïIMûQZúN^öE_úHhÿPtÿQxê[wö\xýVpùRfÿ[lóRaòO`óL^¼ql︳ÿíéÿüûùÿÿùÿÿüüúüùôÿûõÿüùÿôôøêêìÞÝäÛÔïéÝø÷åæñíÏÚÖA?3><0:8,85,52)63*;8/A>5KA5SG7WJ9ZNBUNUpp”˜Ù¬ÿ©µÿ¬¶þ«´ù«°ò ¢ß‹ŒÄtq¦_]]TƒVOqEAP??=>A6?G<;DAFOTMl‹i|š‚|–j| Xc¯RM«A1§;!¨;'¬=)±<*·:(½5'Ê8+×:1Ý:1Ü:5Ý;6ß<5á>7åA8Þ;2îLAóQFî6,ï,&ø+(ú*(ö1+é4)Ö3$Ò7%Å=%Ë='Ï9(Ð6*Ó9/ãNHÜOHÀ93·=%º<%½:(Ã;-É@6ÙTMÉJCÃF@âHFéOOíQUíMWøSd÷OhþQoÿRsíWtû\xÿZsýTgÿYgÿ[jøIZÿMcªg_ќ–ùÚ×ûóð÷ÿý÷ÿÿýÿüÿú÷ýúóÿ÷ôÿ÷õïáàáÓÐèÞÔþ÷åÿÿëïøõÖßÜGC7D@4@<0=9.95*84+;7.@;5G@6NE6TG4WJ:WNQhg†‡Í—§õ©³ø®¸ý²»ÿ²¹ÿ¬°ùž êŒ‹×yvÃod¨bZ“RRvBCU<:?QABdGK‡bi¯q¢¼u¡ÂfÈMlÒ>TØ?DÁ9+µ="¬9&°8'¸;)Á<-Ì=/Ù?5à=4ß80Ö3,Ø3-æ?7â;3Û4+Ö4'Ø8*Õ9*ñG8õ:3÷))û*-î.+â4-×3)Ó1&Ì;(Ï<*Õ>-Ø</Ù=1×>8êRQÔ@@Á<)À;(Á9)À8*Ä>2ÐNAÉMA¿E8ÕH?ÜOHßQOÞMPñYeý^púTlúPkÿKsÿWvÿVmÿ_pý_môbläfjÑcb}WJ¡{rßÀ½÷éè÷üøöÿþþÿÿÿúýúûóýúóÿú÷÷îéâØÏäÝÍÿûåÿÿæð÷ðÝäÝMJ9JG8FC4C?3>:/<8/<71?:4C@7KE5QF0WH3WLHb`vƒŠ¾”£è§­÷«±û­³ý®²ü©­÷¢¦ð™ç‘Û†…Ënr¯bh˜\YxkPcˆJUµQYÏS[ðBcåG^ÕAOÍ3=åAHÝ?>ÊB6ºE1¼A1ÁB1ÉD5ÔF8×@5×9.Ø4+Û4+Ø7-Ý:1Ü8.Ü5,Ö4'Ó6'Ì5"Ð=)Õ>-ðB9þ68ø-0ë20Ø/*×2.à42ê72æ:0à:,×:+ÝE8éTMÕA?ßJLÑ>4Ç7,Å;.À<-»<+ÀC1ÁD2¼?-ÅE8ÍMBÒQKÓOMê_dÿiuùZløRhÿRvýWoõlvífmôbmîhqÓ{yrb_]Hyg[Ë­«öàâüø÷ùÿÿÿÿÿÿùÿûÿ÷üÿöÿÿøÿüôïçÜëæÓÿýäÿÿâó÷éêîàQN=OL;KH9HD8D@5@<3?:4@;5AB<JG8QF0VG0SI?`^lƒŠ¶“¡Þ£¤÷¨©ü«¬üª­ú¦ªó¡¨ì¡©êž§è–Ÿà™Õˆ”ÄŒ†ª–i€¹SaàCL÷8?é@-ÖI-ÂK)¼E'ÝP?Ü:7â<>Ô59ÁB1Ã?0Å9,Å3&Í3'Ô3)Ø4*Ú6,Ý?4Ù9-Û9.Ö4'Ô4&Î5#Ì;&Ç;"Å:'Ü5,ù78þ7<ò;=ß:8á99å58ü+0ô.-ä2(Õ5%Æ5$îaWÎ@<ÕFHæIDÏ81Æ9/¿=-¹>,»A,¹:'¾=(¶<-ÀF9ÇMBÉLFæabújsýbrúYkú^sàkq㊄àwtþp|îaq󝞷šŠKbF`]N¾¤£öÜßÿ÷ûýþÿÿýÿÿ÷ÿøÿ÷øþôýþöÿÿöû÷ëöôßþþâÿÿáýþîûüìXO@WN?TK<RI:MD5KB3I@1I@1DD:KH7RJ3TL5UNFa`r…µ’›Þ£ŸôŸ üŸ¥ÿ ¬ÿŸ¬ò¢§ç«¥í¯¢ô•ªý’¢Ý¬¼¾lzÝ@Iõ6>ê<>ÕF@á:BÓ??ÊC?Ï@<éCGì9?â9<Ô<9Ë@9ÊE<ÅG9µ:+»9+Æ6-Ó1,à42á>-ß4#å8'Ú5!Í7Æ8 Ë8(Ï5)Ñ3*Î8*É8%Ñ6"à2'ì0.ì+0ê-4ó+.ì/+ã5*Ô7$Ä3ÜO>ÙKAÒA<ÞRQÒD@É90È9)Æ;(¾:%»>*¶?+³;-¸>/½?0ÃA3ñmaíhaïiföpoÿmzïdkòadõtoý{yÿVmõŽ}¦†SiB`dI¼¬Ÿûæãÿøøüúûûÿÿúþÿãÿíæÿê÷ÿóÿÿòÿýíÿøçüöàýùàüþûþÿýZQBXO@VM>UL=QH9OF7NE6OF7IG;OJ7TL5WL6VMDb_p}€«Š“Ζ—Ø–šä™ ðœ¥ðœ§å˜¡Ú˜œÛ—–ÜŒ’Ψ†¹Èl‘ÏG[ÝAEâ@>ã?@ä=DÜ;@ÑA@Â>9ÖKFâ?Bç:>Ø88Ì=5Ê@6¾:-ÆH9¹<*¼8)Ë9,Û81é<8Ó5)Ú6,Û7+Ö=+À9#¸;%º<-½9-ë?;Û=1È:&È:"Ð7%Ô0$Ø1+Ô1,Ø1)×7+Í6#Í<'Ê<(áM?×:5ïLMùecíZS×D:Ä5%Á8&º9&¶=*±>,­<.±=.¶>.¿D5Û]QéhbîjhédgåTaóho÷djûws÷qrýG`ñt„q’sQj@hoNÊ¿­ÿõíÿý÷ûÿùùÿûóýõØýÞÔòÖãõÝôùåûøåûòáíåÒòê×üùôÿýø\P@[O?ZN>YM=WK;VJ:UI9VJ:MI=SL:XM7XM7UMB_\etu”ƒ‡¶ŠŽ»È’א•Ù˜ÓƒÀy‚¹owµ„p‹¹[ì=dö5JæDBÖLAáEFö:KÛBDÌEA´:/ÐKDÜB@ß<=ÖA;ÊH;Â8+½8'½<)½<'Å<*Ò<-Þ7.æ81æ44ç10í53ß4-Î4,È91Ä22Ô>@ò14à5-È7$Â<#Æ;$Ë8&Õ=/Ô>0ÊC0¿:'Â>)Ä<&Ì;*Û=2â64â.1Ô72Ú@8ÝE:ÓD4ÉA1º=+°=+¬=,©>.«=,¬=,¶B3ÓZOæicëikìfmûozðgnûhnÿxxöelþ=Zîgx”ucOz‰`ÍÉ®ÿùæÿÿñõÿñèûçÖçÔ´ß´³Ù°ÁܹÔãÆêìÖôíÛëÝÐíÜÒìâÙëáØZN>YM=YM=XL<XL<WK;XL<YM=SK>VM<YN8ZN8XNBYSUb_rqpy|€±†„Ň†Ê½wy¬oo£gfŸŒWSÅISü8Lÿ4AðA<ÝJBäEIö;NÔEAÄE<¯;.ÉK?Ì73Ë0,ÎA8¾E4Â;(À;(¾<&¿;&Ç9%Õ:(à6)æ4*ö.1ÿ16ÿ-4ÿ.4ú17ô/8þ1Bý'?ö'/è0.Ô6+Í:*Ð8*á=3ä<3æ;3ÑL;ÉA1Á8&Ç:)Î6)ä?9Û++ì57Õ4*Õ7,Ð8+È9+ÖQB¾@2¯9+­<.¬<.®?.«<+¬=,ÆQGâhcîpsös{út}öpwÿnwÿpwõ[eÿ:ZëZkš¥…{œe‡šl´¸•ÚÝÀ×æǾٸ¬Ê¦£½šŒ½†–£Ç—³Ì¢ËÖ¶ãáÌêÜÑâÍÈÕǼ̾³ZM<ZM<ZM<ZM<XK:YL;YL;ZM<YM?[N=[O9\P:ZN>XOJZSZ_Yehe‚pmš|w¹‰‚È‹½ˆt©’t¨‰dš¡@0»?3Ý>8÷@<ö=;ð=@ëBIä@KÄ?8´>2¬=,ÆH<Ô?9Ñ4/ÏB8³:)¿8$Ã<(Â;'Æ;&Î9%Ö4%á4&ð:/î3*ô-&ü(%ÿ'(û$)ÿ$0ÿ"8ÿ1ö&3é*1Ú,-Ù0-í76ö45÷/1ö.0Ö90ßB;Ë1)Ï2+×82äA:Ø1+ã81Ô<.Ë5'Ì:-Ç;.æ^RÀ<0½:0·7,·3'´4'±7(°;*µ?3ße`òuyùwüz‚ùv|ÿr}ÿitôVdÿ:\æO`•œzwš`}“b’s’žx}–nq•gr˜g~ n±sŠº|’¾ƒ›¾ˆ¤¹½Á¨ÙÍÁ׿½Âµ¯µ¨¢YL;YL;YL;YL;XK:YL;YL;ZM<\L=]M=]N;\P:^Q@[OAZOI\QO`Tlf^‡upª…|»›„º¨y¥ÁvŸ¹\ˆÒ=9Ä@3ÈI8ÚC8õ=?ý9Cë@FÍA@»=1¬;+ª;*ÃC6Ö;6ß;9ßKAÄE4À9&Â;(É>+Ë:)Ð4%Ü4'ñ?5ì4,Ø9&ß4#é.%ì,'ë+*ð(2ø#7þ7ð-;ó;EüKUì7@ð,8÷.8õ.3ì,+Ú51Þ97ÿ]\Ú86Ü=9Ù<5Ñ7+Î5'¾7#À8(Á<-Ä;1áWMÅ5-Ó>7Õ<4à=6Û>5Ì=/½9*¸<0×\Uòvxþ†ùzüyÿp}ûcpóPaÿ7WäM\tW^HkPx…YrƒYfƒUj’^w h€¦mƒ¯p‰·vˆ¹wŽº{‘¯{œ§…º³¡Ê·±«¦ª™”˜[L9[L9[L9[L9ZK8[L9[L9\M:^K<^M;]N;^O:[N;\O<_O?`PAcO[bUomj•|v¢ž€¢Ï‡ämàKaÿ:MìGMÊE>ÑF?êBBô?FãEDÅB8·?1©:)©:'Â<1æB@ë=<ß=8æZMÉD3¾6&È;*Î8*Ó3'ç:3ì5/ê-)ç53ð45÷29÷4<ì1<î3Dó.Jÿ3Wë=GÿYdÿ[hñ@Pî8Gâ3:Ò72Á8(Æ3)åRJéTPÖA=ØC?Ë7-Í;,É8%¯=%²=)²8)À=3×H@Ñ4/æ=6æ5/ó-.í4/á90Ð<.¼6*ÐQJñusÿ„†õx~üz‚ÿn}ùapðOaÿ7WÞLWXh;[wDmPy†[}cy›iz¥m«n„ªm‡­p‡´s„¶o‡¹tŠ²v‘¦{¸¸ ÖȽ²¶Â™©ZK8ZK8ZK8ZK8ZK8[L9[L9\M:^K:_L;^M;^O:`Q<`Q:aQ:`P9gMN`Tbmm‰|y”ž~×€†üjmÿHNÿ)Mÿ9RÞAJÓCCÛBDÝB@ÔD<Â@3²<.¨;'°?-ËA6á77å.0Ý52æRFêeVÈ>1É:,Ð8-Ú7.å61é0-ò53ÿ'Bÿ$Aÿ=þ;ó"Aò*Nñ(Tÿ8jú]f÷[fîM]ß:KÜ:G»-,¥8$˜E%´;(ÑSEÄA7Â;5ÚOHÇ9-Æ9'Ç9#¥?&¥<'§6&ÈH?ÙHCÛ62ë40ò1*ÿ)-ú,,ê3+Ú:.Ä8+ÃC8élhÿ‡‡û~„ý{ƒÿgyóZlëJ\ÿ5TÓFNJ`/]xE|`‰”j‹žq‡©w}¨p|¨i¨i„¨kƒ­m±j·l†³r’¬}»¾£×˽ªµÉ‹–ª\K7\K7\K7\K7]L8]L8]L8]L8]K=[N>ZO=[S>^S?^Q@bPBbOAkLIgWbqv–Š·˜·¹‚™åywùlZñO\êDPñEQë=Fá;?×<:Ì?5½;-¦@*«:(ÅA5Ð92Ù6/Ú6*Û:&Ø:!ç@:æNCÉ<+Ï>-å@:ê16ó1<ù4Eü&(ý')ü('ø&%ñ%(ñ)3è(7÷:NÿdmýX_ïKLÔ=6¿B0«B,’3“4"§60ÇGDÄ44ÖB@ØICË>4Ç4*Î6+¹=1¶0%Ã5+×D:àI>Ù5,ì0.ÿ.1ò12ó/0ñ1,ã7+Ê8)¸>/Ýjcý‹Šø‡‹ÿmÿZvÿNjù=^ñ>\¨LOHV5^€Cvš]y¡e~§k}¦j{¥f|£d~¢b{¦n~ªk}­c²c†´kˆ°t”³‰¶–‚‘cqr]L8]L8]L8]L8^M9^M9^M9^M9^M9`Q>[P>YPAZRG^SOdVViY\k]]lj€w€¹„Ü–‹×°‚±Ñ}Šê~qÿbgòKSöFPñ=Fá7:Õ<7ÇA5¹@-©>,²A1ÄH<¾5+Ì8.Ö:-Ù8&Ü:%Ü3.äH<ÕB2Ó<+è@7ì23ù5?ÿDPò01ì**ï--ì,+é-.â-2ñAKûO[åOQçHLá=>Õ:6Ã:0¹?2»E;´94¨3,ËGCÕCCéSRÎ>6ÏA7Ò>4ÞA8á=<ç=>ç;;íEBÝ<4Ø4+é3/õ20ó12õ/0ò1,ä6+Ì:+³9*ÜkcÿŽù€‡ýjzÿVqÿGgú;]éMe€;6N\;c‚Gr–Yuay¢fx¢cv axŸ`| bz£g{¨ezª^}®]†´iŠ°s‚¢sq‹hKZU5D?^O:^O:^O:^O:]N9]N9]N9]N9^M1_O6ZN>YRJ^Y]qjzvn…vl‡nuˆt}¨{„Ï…Šæ•â´©w€¬m[ç\Wõ]\öLOí8=ç79Ú=8ÉD5ºC/³=1«7*ËYNÇQEº:-Ê;-Ú:,à6)â3.á?4ßC4ß?/ä3+ð0/ó04ÿQYÿZ\á46ç;;ß33Û32Ý:;ãGJÆ-0Ë32ß<?Û25Ô01Ô<;èZYôch÷_kÉJCê_\ô\[Ö<:Ó@8ÚJ?Õ>3×6.ú.:÷)6ô+5üBCÞ3,Ù5+ã5,ï4/ö/2ø-0õ/,æ5+Ð<.±6'Ôc[ÿŒÿx‡ôfvúNhÿ8_ú;]ÅO[`<,YdFg„Ns”[wbx btž^u^wž_| by£cz¨`y¨Z{ªY­`…¬k‡¥sz•lizj[l\_P=_P=_P=_P=^O<^O<^O<^O<bQ3_P;ZQH_Zarp†¥|z«yv«r~¸{…À„ŠÆ”–ɦŸÀ»¬±Ì¶¡²˜s®S>²B4ÚMFðJJë=>ä<;×@7Æ</½=4±8/ÂVIÍfWÂQA¾8,Õ7,â4-â4-ß9-äB3à9)è2'ñ/-ñ10ï56ñRV÷X\ãBGæFHäFGôZ\Ö@AÆ45ê=?î;?ç/7óBJÿagÿemú^küYjê]VÞFCÓ41×84âKBßKAÖ90Ý82ú*7ÿ-:ÿ4>ú>?ß4-Ü5,â4+ð5.÷/2ù-0ö/,ç5+Ó=/³5'ËVOþŠŠÿkî`pøIfÿ6_÷Dd’DDLH/V\@d~Nt]yœbtœ]n˜Vp™WsœZwž_x¡]y¤\u¤Tw¦U}©\‚¦fŸkubey]XlP[N=^Q@]P?^Q@_RA\O>`SB_RAbS>^RF^X\omƒx{¤}€·tx¸nq¶uyԉÍ§§¿ÃÀ¯ÖÒ¥ßٟß֝Ö̗ġƒ³yaÄdVæa\ëKKå;<ä::Þ63É83À=5¼L@½WHÐeUÀ@3Ï5+à3-á40â;2Ü8,à6'ê2(í0*é2,â5.Ø?AíRVéJOÿemÿckþahÿfiðZ\ÿ<Bù1;ñ)6í3@þXbÿfnÿcnüZgâGBÙ53Û2/Ú51ÜB8Õ=2Ú91ä84î7;ð69ð67ä2.Ü5,Þ8,ç2)ó2-÷03ù.1÷0-ê5,Ö>1¸6)ÅJEÿˆŠÿc{óWmùDcÿ:aßMdc9-9C(OQ9[pErŒ\v•\o–Uk”Pn—SqšXt›\tžVu¡Vt SržQsSm‘Q]zDKe6>W/<U-YL;]P?^Q@`SB`SB`SBaTC^Q@\QOaZaqm„xz£v{³ks´`g¯ou¿‡†ä¦¥ßÉÇÌãà³íé ìèŸâÛ¤ÊÙ¢¥Š—…oœgWÆibötrèLMí@Bì8;Û<8ÑA9¹;/­<,ÒaQÙWIÌ5*Ø4+ã><ìKCÖ6*Ý2(å0'ã.'ß4,Ü>3Å1-ÜFEìQUíMWîNXøXbÿlrçLRò.2ø/9ð(5þDQîGOñUYÿ^cíDKß82é<8î<:â70Ù;0Ö:.Ý90é:5ß<7â:7å95Û4,Ú8-Þ8*è1)÷0+ó23ö01ö0-ê5,Ü@4Á7-ÍIGûv{ÿ[tþLlû;`îCc°LTYF5;D)JE/Qa<j€RqŽVkMg‘Ii’Lm–Ro—XtœVq›Sk—Lg’J`‡DUw;Jf3G^0Hb2Pj:TH8YM=[O?^RB_SC`TD_SC`TD_Xhvr‹xwŸuz±jr³\d¬iqº~†Îšâµ·çÌÎÛÙÚÌàÜ¿ÛÕ»ÎŶ²§£†Žƒ}{ori¢i`Ù~yóvrø^^ñGHé>7àB9Ì>4ÄB5¹;,ÎI:Î?/Ò9+àBAû`\Ý@7Þ:0ä6-á4-Ó5*Ã5'º2"È>3ÓB?Ô=BßEOñWaøahãNRê;8ò;=üCIÿR[Ý49á8;øDGò46ê93ñ:6î73ä50ß<3Ú<1á81é62ã;8ç:6è94Ý5,Û9,Ü9*ç2'ò/)î53ó11ô1-ê5,Ý<2Å5-àTUûktõRmÿ;bÿ7`ÜSgLF[U?PN9NF3FT3]oEj…NiŒHeEg‘Ik”Pm•Vp˜Rj’LcŒF_ˆDY€ASt=Tn>[rFVs=Wt>OC3UI9XL<]QAcWGbVFaUEk_Ospwwtv©jo¯]e­em¶yÉ–Û›§Í«µÙµ»Ýº½Ü¼¹Ö´®Ê®¤¿˜‹§utyjfevgbƒaXZQÕphÿzuÿjgÿPJìE=ÜB8ÕG;¿7)Æ>.Ë>,ÒA.Ö=?Ò=9Ô;5Ü92å:2ß80Ò;0ÇB1¾B(ÅD/À:1Ä57ÛJQèU]êY`×HLÏ7,Ý;6ç=>ç:>ã6:ì8;ù25ÿ-1î71ï4/é-+å4.á=3ß>4å:3ì63ì46ï34í42à2+Ü8,Ý:+ã3&î2'ì63ð31ó2-ê5,Ù6-Í82ùgjò\gèJcÿ3_ÿ8e¼IVW@0PN7YL;LA/<I+N`8a|EeˆDbŠAcEfKi‘Rj‘NeŒI`‰G_‡H]ƒH^|H_yJezO[xB\yC?C5IF=RH?]M=`R8^VAc_\on|wqws¥gh¡^_¢eg´wuɊ†Ý˜’읜֡Ø¡Ø ›Õ ˜Ñ–ʘÁ…}¬lkpc^[oXR„VI¢XM¹[QÙqhîƒ{ø€êdcçSQÇ3)Ä7&À7$Ç:(Ñ>.Õ9*Ô=,Ï8'Ú:,à:.Ú8+Ë:'¿?&Ã:2ËB:É@8¼6-Ä>5×QHÛULÊD;Í<!ß8(ð,.û'6ÿ)@ÿ(@ÿ#@ÿ ?ÿ :ÿ'<ò.8æ::Û94Ý<4å:3è62ì23é62â7/Ø6)Ú;(Û:&ß9#â7#ë4.ï4+ï5)ä7'Ú:.ÝB@ÿdtùKfÿ?aÿ6^ëHet7679$CD2FC28G0FIX`;csNd€Oa†Cb‹=fŽEl‘Ni’FeFeŠGc…HdƒJc€FeGb~C\<a†A4:,??7KB;UI;ZO;\UEecfnoƒol—b`‘XY’]`£lpº|}͈†ÙŽŠÞÌ‘ÎŒÏŠÍ‰É‰¿Ž‡³}w^ge\W]^LvWE±ZSÔXXâheÞvmó„}ÿƒ}ùngÄ8+Á:'½;%Â9&É:)Ö8,Î6(Ñ>.Ë3%×7+Ô6*Í:*Â;'Á7,Ã9.Ç=2Ê@5ÌB7ÍC8ÎD9È>3Ò7#ä6-õ.3ü)8ú':ù%;û#:þ 7ÿ 7ü(7ó6=ä::Û94Þ;4å:3é62î45ê65â70Ø6)Ù9)Ù:%Û9$Þ7$Ý5(á7(ß8&Ù:'Ô8,÷X\þSfúBdÿ5^ö>b¸DWI*'-8'6>)B='EA&Wa/…”isˆ]`~J`…@cŒ>hGlPk“JhGgŒIfˆKe„IdGf„HeDc‹EhJ-5(7:3A>9KD:PJ<TQHccmhi…baQS†TX—ei²sxɀƒÔ†‡×Š‰Õˆ‰É††Ì†ƒÒ†ÓŒ„͍…¾‰ƒ§wt‰OZhZQRfKh\D§PIÜQXê_dãpmâvj÷|tørgÂ:*¿='¾>%Ä;(Ê9(Ú6,Ñ9+Æ;(Ç<)Ï<,Õ<.Í7(É<*È<-É;-È:,Æ7)Í>0Ï=0Ð<0Ê6*Õ.&ê22ø/9ü-=ú0@÷1>û/;ÿ-:ü,:õ0:ë7:Û64Û94ß:4æ95é54ó57í76ã81Ø6+Ô8)Ô9'×8%Ø7#Ñ:%Ð>%Ð@(Ð?,Ø=8ÿ^iþCbÿ:eÿ/ZÜRkc28841-7.4@*<=n^:“¨m¬Äla‚I_‡@i’Do—Np”Tn”KhGhJi‹Nd†Je„Hg‡Hg…Ei‘Jn–O&1#/4-664?<7GD;LMH\^k]a~[\ŒWX‘fh²v{Ï~ƒÞ‚†ß‚„ׂ„у‡ÇƒÍ„€Ô‡×Œ„ÏŠƒ¶‚~•nlqQOg[KYhKeaD“XHÉUUãX]íbgîujæaXÓE9É:)Ä<&Æ>(Î;)Õ7+Ò/&Í9+Â=*¾;'Ê>-âL>Ð:,Ì:+Ç:(Ç8'Ê9(Ì9)Ò:,Õ<.Ú>1×9-ç55õ8>ú2?ô.=í/9ë28î15ó/3ò5;í6:á55Ü94Ü94á96ç77é56ô5:ï77ã73Õ7,Ï9*Í:(Ï9(Ð9&Å;!Ä>#Ä@)Ê<.îIMÿPgÿ;cÿ2cõB`CI>=8:8;B449>(J\,›že¬ÊŠ¥Ä‹ƒ¤oa…H^†?j“Eq™Pn•Tl’If‹Ef‹HhŒNe‡Jd‡GgŠFg‹Ej•MošR(0!).'/1.783=?4FIBQT]Z\s[Z†cduwą†ãŠŒð‡‡ç€€Ö‚Ï‚†Å††Ìˆ…Ò‹…ω½„}ŸyrzfbYg_Lf^If`HhaEuZ?¦^OÕURýS\éZRÕ<4Ù8.Ø;,Ð9&Ð;'×7)ß4,Ì2&É<+ÂA,½<'Ã7&ãOAÉ7(Ç:)É<*É:)Ê7'Í5'Ô8+Ú:.Ø4*ä=4ö=Bñ2:ò0;ë1<ã6:ß76ïB>ô=9é;:å97à85Ý84Þ95æ::é9;í9<ô5:î87â;5Ð8-Ç:)Ä;)Ã<)Ä;)À<#¶8½?)Í:3ÿXeÿ>\ø4Zú;f®IOR:03C6G78S/1ON2r”XŸÃyŸ¾‚”²|l‰YWx?`„>k‘Dl”Ki’Pf‹Eb‡Bf‹HfŠJhŒLgŒIeFk“JoRs¡V/3$.1(/1.36-9=.AE6MOLUUa\Zrih~~¾ãíˆˆâ€}Ì~„†ÁŠŠÆ‹ˆÁˆ´…{žyn~od`h\NXeKd^HpYGq]Eq\?X?ÖWNþEKÜ94ß6/â6,Ý7)Ó8$Ò9'Ù5)á3,Ã7(½:(¿@-À;*ÔB5Ó?3È:,¿<*Å:'Ç:(Ê9(Í5'Ô6*×3)á90è=5í6:í4:ë3;å:@èMKù`[òPKå:3ß82ß82à93ä;8ä88ç9:é9<ë8<ï58ç77Ü:5È8-½9*·<*¶=*¶=*·=(­8$°5&ÜFGÿNaô9TëG`ÕJ_cF8ED07@+C5(O5$xxR‘³vŒÀu›·„…ŸrKd<Sm=a€=iBh’HcKdˆBeˆDiŽKlPiMh‘Kh’Hn™Jt¥Vx©Z:;)8:/8939;.>B+CG.KM?RQMXXNddlww‡…Æ‘ŽÛ‹ŠÐ‚~¹}y«}}³„‚³ƒ¢~vwmuna[k[LkZHb`Kh]Ko\Ky\J‰YC©R?ÑD:ã1-Þ5.ã5.à2'Û8'Ð:"Î<%Ô9'Ü6*¸>)±7"¾=*È9+æLBÕ=2Ã;+±=&Á9)Â9)È9+Î7,Õ7.Ü71é=9å84é:7ì::ê=?÷YXúkeÔG@Ð71á<8ß;2à91ã73å76ç79è8;é9<è7=î79æ:8Ø>6Å=1³:)­=)©<'©<'¦=(¡:)©9.éX]ôI[éHZÆSZ ZRHR:JE/@<AR(`xFƒ”^¢j’µuŠœvUfF?N1Se;cBjBf’E`Gd‡CiŒHkMl“Rk”Pk•Mk—HrŸNv©Zwª[ED2DD8EE=CF5EI.JM.OQ9USF[]8]^Nkjzzy¥€ºƒ€·xvžol‹ki›om•olmgii_Uk[Ko]Ir]JuYNl_OkcP^M¢RG¸92Î4*Ù6'ß7.à4*à6'Ô8"Ê="Ç?%Í<'Õ9*±A+©4 ¾:+Î7.à=8èNFÇC4¨;$Å@1Ä<.Ë=1ØA8àA;á<8å97è88æ;4æ:6õPNáLH¾71Ã=4ÕB;Þ<9à<3â92æ95ê88ë7:ë8<è7=æ7<ê67â96Ô=6À</­:'¦;'¡;%Ÿ;$–;(•<*¬E<îelìN]ÑHR§ZPnP?Q9J9%WO+mž]x¼oy—W†ŠWwOBO39C+<F.O]9gEkEg“F^‘FiŒHn‘MkMiMqšTqRpŸOw§Swª[t§XTM=TM=TM=TM=VO?XQA[TD]VFb[IbZOh^\qflwkuznxtiqh]c^^VZZP[WL^XJd\Ig\Fj^Fm_EdhQwVE¢UK±UH¦J5¨=)Æ9/ß12Ü8.Ü8.Ú8-×7+Õ7+Ô8+Ò9+Ò9+·>-ª5#¬7%¾<,É6,Ó;0ÜJ=ÍD2ÛH@ÑA8ØC<Þ<7é77î87ä84ß>6ÅE8·9+±8'»@0Ç?1Ù?7è96ò67æ7<å8:ã:7ã:3á81ã:3æ:6å97è:<Þ<:ÎA:·?1¢=+™=(“;'‘9%€?+3+Ñ[[Ï[^¬QNšZP”^R™_TK`Vm'’®d˜³ly‘SXk>JV><B6>?/;?1:B3HY9a}Co“Ij“Gh’Jk‘Tm”Qj”Jk˜GnžHp¢Kv¨Sw«Yl˜][‡L^WG^WG^WG^WG]VF^WG`YIaZJd^Fd]Je]Pf]Vh_Zh_Zf]VaXOYWKRPCQN?TQ@ZUB_XEd^Hh`IdfQ~\P®[U¹C?À=5¾:-½9*Ä?.Ù7*Ú8+Ù9+Õ8)Ô8)Ô8)Ò9)Ò9)¸@0¦7#¡7!³>*Á<-Æ4'Ï=0ÔG6Þ44×31ß;9ê<=î5:ë14æ66Ü94³9*®6&­8&µ<+ÈC4Ø@5å95î66ë7:ê88ç:4æ;3â;2à;5Þ;6Ý98Ú<;ÒA<»;0¥7(›<(“<(:%8$v9$Ž?2´QL¬MIœRI“^P“bT–`V~[£µ“§rSi88J"<K.8A08;2=>.:>09A2GX8cEr–Ln—Km—Oo–So—Pk•ImšIoŸKt£RyªYnžRY‚HNw=c\Lc\Lc\Lc\Lc\Ld]Me^Nf_Of`FhbJicMhaNhaNgaKgaIhbHa^MXUDOL;KJ8NM9OQ<TVAXZE^\MsWK±f`ÎLNÜ9<Ø63¾=*«H)Ô7&Ö9(Ö9(Õ:(Ò9'Ò9'Ñ:'Ñ:'¹?0¥:&–8£A(´?+Ã9,Ê6*Ô=2ð3:é49ä79ã68ç48ç7:Ý98Ç/*¬;)«<)©<'ª:&·:(Î>3ß<5è96î64í74ê93æ;3à=4Ü?8Ø@;Ô>=ÅB:·=2¢6)™:(‘<(Š<&†9%…7#u;'†A1¡SG›SG’ZKŒ\N^SšaXew…aBQ4-:&0<.7?4=@7::0;<,9=/8@1FW7eGs—MqšNpšRq›QpšNn™JržOu¤Ty¨ZqŸV[ŠB[„JR{Af_Of_Of_Of_Of_Og`Pg`PhaQjcPibOg`Mg`MhbLgaIgbEgbEf`J`[EUS>LK7EH5EI8EL<EN=OOCXQA{QA¹SNâFGã77Ë=/«B%Ð7%Ñ:'Ò;(Ò;(Ñ:'Ñ:'Ð;'Ð;'¸<0¨=)9•>"¬?*À<-Ê3*Ù<5ñ8@ê@CÙ?=Í97Ä30È:6¸92©7,¢?*£@+£@)¤>(¬9&»7*Ë8.Ý@9ë:4é;4ç<2á?4ÝC9ÑC9Å>8¼;6¢:-˜7'’9'‰;'†=*ƒ<(~:'}6$y6%L<“QC“XJ[M‘]P•\S—ZUU]6=F'7C/2<34=8AF?DF8=<(:;+7;-6>/DU5d€Fs—MqšNq›SpLoœKpLx¤W€¬ar X]ŠG^‹H^…NSzChaQhaQhaQhaQhaQhaQhaQhaQi`[i`Yi`WiaTibPibPibOibOh`If`J`[GQP<FI6?F6>G6<G7BI9AK3QL/š`LÚ_WêHFà>9Í=2Ë8(Í:*Í:*Ï<,Ï<,Ï<,Ñ;,Ð:+¿:1¯>.•6 •7¬<(Â:.Ù@8çHBãEFÌ>:°:.£<-¢>/š=.=-ˆ?,”=*•>*—@,›@+¡>+¨9(´8,À<0Ø;2Ü@4ÛC6ÕC6ÎD7»=1¨6, 3,<,Š;*†;(=*|;'y:(x9'u6%†8,–N@ŽO@TF—XO”UN˜YT–YTR[.OY4;H.5@/GRBX_MGK2>? ;<,9=/7?0EV6f‚Hs—Mp™Mq›Sp JoŸKs¢R}©^y¤]bŒJ`ŠJeO^…PSzEhaQhaQhaQhaQibRibRibRibRj`_j`^jaZjbWjbUjbUjbUjbWjbMjbMd_KYVCKL:DG6?F6=D48F/BJ2PE)šfP»UGî_[ìIJå?AÈ:,Ç9+È9+È9+Í;.Î</Ï;/Ï;/É:6½=4®=-µA2ÎJ>ßJCæIDáB>ÐE@µ?5œA/“H3F2†C0}C/vD-‡:*‡:*‰<*Š=+<*“:*œ;+¢<.³9.¸<0»=/¸=.¬8)¢8*–8,’:.†=,ƒ<*:*}<*z;*x:+s8(q8'‘C9K>ŠL?“SI˜QK˜SN–]V”i`^m}P@P)O^=apSNY;@G(<? 9:*8<.8@1DU5f‚Ht˜NqšNq›Sp¢Ko Ny¨Zx¦^eM\…CgNb‹I^ƒORwChaQhaQhaQhaQibRibRibRibRja\jaXjbUjcQjcPjcQjbUjbWldQkbQjcQc^KWTCML:EF4AB28K-RB+‹M@§F?°;4ãb]ôfeìSXØNCÍC8ÑD:ÖH>ÚJAÛKBÞKCßLDçMMÝNJ×NHÛNGáFBæBAäB?ßD?ÒGD³>5™B1F2‡D3…@1~?0z?1‚:.;/€</€=-<,‚:+„;,‡;+’:.—;.œ;+Ÿ;+š9(’9)‰:)„;,€;,~9*{8(z8*x9*u:,o5'r:-ŒJ<„K:‡RB–WN˜QM—VRˆbWsfSQ`7EV*Zm@awIL^68G(<F.9=.9:*7;-9A2AR2a}Cr–LqšNq›SmžLu¦Uy§^g”QZ„DcŒJfJeG[€MPuBhaQhaQhaQhaQjcSjcSjcSjcSkdTkdQkeMkeKkeKkeMkdRkcVlcTkbSlcThaQaZHUP=NI6HC07J*mF5¶HIÅ3=É=@ÈLJäjgÙYZÔKCÍD<ÑF?ÎA:ÒC=ÔC>ØD@ÙEAà?DßEEàHGãCCê@Cí?Aä@?ÛB<Ó??µ82Ÿ?1Ž?0‰;.‰90‡81‡:4€;4~;2|<2{=0w<.x=-w<,x;)};/‚:,ˆ9*9(Ž8'Š;*=*~=+€8*}5'z4(y5*t4(s7,n6)t?1‡R@R?†XHSH—RMVOvaPBL1L[<asM\sELd47L#0@#:E77;:;<,7;-9A2=N.Yu;n’HqšNrœTo£Qs¦Yj™Q[ˆG`ŠJb‹IfŽHbŠAZLNs@i`QjaRjaRkbSjaRjaRjaRjaRjcSjcSjcSjcSkdTkdTkdTkdTq`PqdTneVicUgdSdbM]Y@QK1MD'lP:œdW¤LH»KJÈHIÐHHÓIGÓGHÏ?>ÓD>ÌD8ÅG8ÈH=ÔDCÜ?FáCDáCBàECáFBâDAáB>â@=ãA>ÊD;±;1˜</„?0}B4}?4„:1‹82…<-ƒ;,ƒ;,€;,:+~:-~:-};/}9.}9.|:.|:.z:.z:.x:-w9,u9.t8-r6+p6*m5(l4'l7)wB4UJˆQJ“OL–QLRJ`LedFXjD^xKPi?Mb;ET58B)5:&>?/@>11<.5;-:>->C,Zf@p‰Rr—RržQz¡buœ]`‡H]„EaˆId‹Ld‹L_†GXzGJl:haQibRibRjcSibRibRibRibRjcSjcSjcSjcSkdTkdTkdTkdTrbSk_QmeXieYmk^…‚sˆƒpwr^MW<CE-K@,Y=/`8,k;/yC7…M@¨SLµLHÈDBÔC@ÒB:ÔE=×FAÛEDßCDÞDDÞFCÞFCÛF@ÙD=ØC<ÙD=ÓF=¾B8œ:-…<-A2~?0ƒ;/‰90ƒ;,ƒ;,‚:+:+~:-~:-};/};/|90|90{8/y9/z<1y;0v:/v:/t8.r8-p6+n5*m4)j4(j4(u?3‡QGRJ’SL‘VNˆ\Qu^LiiO^mNG[8?S0CT4ES9?I17?*:>-9;-3=46<2:>/<A*R^:hJp•Pu¡RxŸ`d‹L[‚C[‚C`‡Hb‰JaˆI^…FRwDDi6haQibRibRjcSibRibRibRibRjcSjcSjcSjcSkdTkdTkdTkdTqeWnf[kg^~w™˜“µ²«¸´©®¨œ‚oQVB=I5:G3<K68E39A2=A3O<-JCµMNÑFKÚACÜDAÝE@ÜD?ÞDDÝEDÜDCÙEA×G?ÓF=ÑG<ÐH<×@9ÈD8©?2Ž=,ƒ@/~?.<,‡;-ƒ;,‚:+:+:+~:-};/};/};/z:0y9/w9.v8-v:/u9.t:.s9-q7,p6+m4)l3(j4(g3&g3&q=0ME‘RKŒXM‚ZNs\L^WDRWA@L65A-2>*6B.=I5>J6:F26B.2>*2;65:38;0:?)KW3d|Hs˜Sx¤Un”WYB[D\‚E^„G_…H^„G\‚EQuEAe5gaQhbRhbRicShbRhbRhbRhbRjcSjcSjcSjcSkdTkdTkdTkdTlfZmi`‡†¶¶´ÐÒÑììêïëèãÞÚ×»°ž‹|c^JAI2=J6>F7=>6=966A3J70‰LKÅbeÌTU×PMáIFæB@ÞDFÞDFÝCCÛCBÙEAÔE=ÐF<ÎH=Ý@9ÏB8µD6‘:)†?-€?-€=*„=+€;,:+:+}9,};/};/{;1{;1w;1v:0t8.r8-q8-q8-n8,m7+o6-l6,j4*h4)i5*e3(d2'm;0•LFSKYMo\MYVEFJ99B12:+47017-/7*2:-9D4=J88G21@+-63160690:>-DO/ayGt™VsŸT`ƒKVyA[~F[~F_‚J_‚J^I]€HMqE=a5icSicSicSicSicSicSicSicSjcSkdTkdTleUleUleUleUleUgg_Œ‡ÃÅÂéíîõùúýÿþÿþüüøõþãÐÜʲ§ ƒefGGJ/HC0H92L76;@<LDAƒkgÖ¯¨à¤šÄf^×OOéCGØFGÚDEÛACÚ>?Ù??×B>ÓB=ÏB9ß@:ÏA7·A5–:+‡>->->*€=*:+}9,}9,{9-|:.z:0z:0z:1v=2t;0q8-n8,m7+l8+l8+j9+k7,o;0j6+e3(e3(_/#`0$g9,‹NI…TMrXKRL<;D14A02=/4:077557216//7,/;-6E27I32D.+4/.3-36-9=,>I+`wIvš]l—RX{ESv@WzDY|F^K^K\I]€JKiG7U3gdSgdSgdSgdSgdSgdSgdSgdSjcSkdTkdTleUleUleUmfVmfVpts©­¬Øàâôüþùÿÿûÿüþÿùÿÿø÷þÝçñÎÌØ°œ«€dnIFJ/JG6I@7SKI…}{µ°¬ßÝÑÿöèÿÚÐÖ{xÊLPÍRKÉHCÑGDÖDDÖ>=Ö@?ÕA=ÒC=Ø?9ÎD:¸C:š<0‰=/>.>,>,~:-}9,|:.{9-|<2{;1z;2z;2p:0n:/l8-i7,i7,h8,h8,h8,k90j8/j8/c3)b3)^/%\0%j>3}VOlOGMA58:,/:*1=/5=27:179449338119..9+2?.7F37H55?4.4*.1&48)>H/e{UxšgaŠPTtBRr@TtBUuCXxF[{I\|JXxFE[D-C.gdSgdSgdSgdSgdSgdSgdSgdSjcSkdTkdTleUleUmfVngWohXt|~¦®±ÔÞàñýýöÿýúÿùùýïö÷çñÿçïüàçñÏÆ̨”šxTZ@DM:AL>ƒ|v¹´ÙÑÎîíèö÷ïÿûðÿóê굯¼eT¶VF¼MBÃD=ÉB>ÎC>ÏD?ÎC<ÔA:ÊE>²A9™<4‰<2=0=/=/~:-|:.|:.y9/|<2z;2y:1x92k9.j8-i7,g7+g9,g9,e9,e9,h8.f6,h8.d5+c4*]1&]1&qG;WMD@9/99-6:,3;.4</69.57,2:/2:-19,/:,.9)/:*2?.8E49D34;+04&04%=F1m‚cr’iNuFMm<Nn=Nn=Pp?VvE[{J]}LQq@9F<$1'feSfeSfeSfeSfeSfeSfeSfeSjcSkdTkdTleUleUmfVohXpiYhqv’¡ÊÕÙé÷÷óþøùÿöûÿîúüçû÷îÿöêÿñáæѼ³¢ŽmfTVYNhsk«¯ áÚÒþêëÿóöÿûúõÿöõÿñûÿóÿÝÃʃm«R@¸L@ÂG?ÇG>ÈE=ÆC;ÒE>ÅD>«<5–<4‰<4=4=1=1};/|:.|:.y9/z:1x90v70s7/k9.i9-i9-g9,f8+d8+d8+d8+g6/j92h91m>6k?6e90|QHvKB4;33;039/36+47,35(35(46)-8(-8(-8(-8(.9)-8(.9)0;+7C-<C19=.04%>G4fz_YyTAg>Jg7Nk;Kh8Mj:VsC[xH\yIIf6/83 )$heVheVheVheVheVheVheVheVgfTieZkd\meXidNmlXghb~ˆŸ©ªÇÏÑëïðüüüÿþúþÿùùüó÷ýñ÷úñøúì÷ùáØÜ»¤ªˆbmOiw`ŒÅÆÁëìçøùôúûöþÿúþÿúþÿúþÿúíÿÿÿþüÛ®¨®VLÁH=ÊF:ÈF9ÄF:ÁJ2µL6œG2‰F5D4€@4„:/ˆ8/}:1|90x8.w7-v8-w9.s7,p4)h9/g8.g8.g7-e5+h8.i7.h6-k4/m80k;-hA0fJ5kYE€yfihV3;.2:-19,19,08+/7*/7*.6).5-.5-.5-.5-/6.-4,-4,/6./;14@67C54A0<I5UcLIW=ES9Ie?Jf>Hd;Jg;UqH[wOXsR<W8&.1"*-heVheVheVheVheVheVheVheVgfTieZkd\lfXkiRjlWsvo¤§®ÌÖØóûýüÿÿþþüÿþúýþöô÷ìíóçñõçóöãîñÖÑÒ³¤§ˆ€„m–†®¸­ÜÝØö÷òýþùþÿúþÿúþÿúþÿúþÿúûÿÿýüúÿûóÿÚÊ·zg¢K8¿QBÂH;ÇC6ÁH=¦?6•B:†A:@9{?7z>6w=2u<1t;0q;/p:.o;.m9,k7*m80j8/i7.i7.h8.g7-g8.i:0i=4g=1iB1fE2gQ:ocMŠˆs[^K19,19,08+/7*08+/7*/7*.6)-4,-4,-4,-4,-4,,3+,3+-4,*6,,8.1=16C2:G5?M66D+>L3F^<Jb>Ha:F_8UnG^vTTkN2I/%-/"*,heVheVheVheVheVheVheVheVheTieZkd\lfXkiRnp[zx­´ºÝçéùÿÿúþÿÿÿýÿÿúùûðèìÞÞåÕçìÖëïØêêÐÕÓºÆÁ­À½®ÐÍÆáàÞñòíüýøþÿúýþùþÿúþÿúþÿúþÿúÿùüÿÿý÷ÿúóÿòýüçɦ WD³P=Å@9ÃD>°=:ž::>=€@>tC<mE;q?6q?6o?5o?5m>4i=2h<1h<1q60p70m6/i7.i:0f:/c;/f>2`D6dI8cJ4_K3_S9okR~ƒlAJ519,19,08+/7*08+/7*/7*.6)-4,-4,-4,-4,,3++2*+2*,3++7-)5++7+.:,0=,4A/1>*4B+;M3DW;H[=K^>XkMbtZPbL+<)#,+ )(heVheVheVheVheVheVheVheVheTkdZkd\lfXllTinXovnž§¬Úäæ÷ÿÿûÿÿýýûÿÿ÷ôöéæêÛâé×æíÎçëÐçåÎÜÕÃÚÐÄêßÛøîïÿöùýþùÿÿûþÿúþÿúþÿúþÿúþÿúþÿúÿüúÿÿýñÿÿíÿÿóÿùÿÿíìȲ¬t[µN=³L=±I@ =7”<8…@9uC8mH8q?8q?8p?8p?8qB:k?6j>5k?6r71p70m80h9/c;/c?1aA2^A1WL8f[E^T9ZS7YX:nrWjt[0?(3;.2:-19,19,08+/7*/7*.6),3+,3+,3+,3++2*)0()0(+2**4,*4++5,,6+-8*4?/8C36A01@+8G0@P6HX>VfL_nYJXG(6'&/,#,)heVheVheVheVifWifWifWifWkeUle[le]khYlnViqZlxn’ £ÇÐÕîöùûÿÿþÿúÿÿ÷øúìò÷ãèðÙêñÏçêÍâàËÝÕÊçÙÖøêêÿõùÿûÿþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúýÿ÷ýÿüýþÿýþÿÿþÿÿüôÿÿìÿõÙÖ¡‚›^BŸWAžO>ŽA1„A0xA-qB.s>8t?9t?9r@9r@9p?8o>7m>6l:1m=3i?3d@2_B2_H6M;'F5!LN6FI.Z[<^_@\_@sy]KX>0>'5=04</3;.3;.3;.2:-19,19,07/07//6..5-,3++2*+2**1)*4,)3+)3*+5,+5,/9.7A6>H=7D23@.4B+AO8P^GYfTCOA,8.'0+&/*heVheVheVheVifWifWifWifWkeUle[le]khYkmUis[lyo–¥¨ÊÓØï÷úûÿÿþÿúþþôøûêðõßäìÔÜâÀÚÚÀÞØÊåÜ×õéëÿôøÿùúÿüüþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúöÿüûÿþÿûÿÿ÷ÿÿùÿÿûùÿÿôöÿîÿüãûåÍʨ‘eLƒQ:zF0xE0p@*sC9rB8o?5n?5k?4j>3i?3h>2cB3cC4`E4ZE2WF2aV@QK3HD+=G,<E(CH(_bCknOkoT<D,9B-8@37?26>16>17?27?26>16>15<44;33:23:207//6.,3+*1))0))0))0)+2+.5.+2+070<C<>L;;J73B+1@)BQ:L[H@N?/<2,6.*4,gdUgdUifWifWifWifWifWifWkeUle[le]khYjnUfrZeuj‘¥¦ÉÒ×îöùúþÿÿÿûÿÿôþÿîôùâãìÏÔ׸ÏÏ·ÜÕËïæçÿ÷üÿýÿþÿúýÿ÷þÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúúÿÿýÿþÿþúÿý÷ÿûõÿýøþþüûÿþþúùÿý÷ÿüïÿòáâкº¡‹˜{i…fTrQ>iH5bC/dE1`D/aE0`E0`E0[J6ZK6VK5QK3NL3qtY`fJCL/:G+@I,CH(MN/xwY__EEG1>A.8@38@37?26>17?27?27?28@39@88?77>67>66=54;329107/-4-+2+*1*+1-+1-,2..400626G4=O9:L44F,5G/=O98I7,</+5,)3*fcTgdUifWjgXifWifWifWifWkeUle[le]khYimTgs[ewk–ª«ÔÝäóûþûÿÿþÿùýýñûþëðöÜÜåÈÏϳůÓÌÆîåêÿùÿÿþÿûÿùùÿôþÿúþÿúþÿúþÿúþÿúþÿúþÿúþÿúÿûÿýþÿ÷ÿ÷óÿòõÿóùÿøÿýÿÿúÿÿûÿÿýÿüýÿøþôþÿóÿÿïÿûìÿöéÿîÔò˜bQ7ZJ0[K1[M2\N3[P4UQ8TP7PP6KN3LR6YdFCP2=J,?L0GP3HM-[Z<ieH^ZAXVALI8;C6;C6:B59A47?28@39A4:B5:A99@89@88?79@89@88?78?76;5271.3/-2.-2.-10-10-10(;%6J1;O49M16J/:M78K8-?/,6+*4)YgM]jPblTglVghVifUkeWmeXjgVkg[jf]hfWfkUdp\m~x§ºÁÚâåòûúùÿùûÿñ÷úçóòÞêçÔØÕÂÌ¿®½±¥Ç¼ºâÙÞûöýþýÿüýÿúþýýÿþýÿþþþüþþüÿþúÿüùÿþùÿÿúÿýúþýûÿÿÿýýýÿýüÿüöýóêÿòçþüÿÿÿýþÿûúþýþÿÿÿýÿÿþòÿÿäùù߶µ™SN0QK+UO-TO/QP2PP4ON2PP4KO6EN3FP5@I*CJ(HN(PT1RV5]]AZZ@XX>ZZ@XY:UT5LG1FD/A?0>@3;>38@38C57D38C56A36A38C58C58C59D6<G99E;8E48A05=0292/;/1D&;U&Be-@_6<T<;N;?Q7H]>D\<0H.*4)(2'YgM]jPblThmWjkYkjXmgYnhZkhWjfZie\hfWdiSdp\u†€¢µ¼ÓÚàñùû÷þ÷ó÷éëîÛÞÝÉÒϼÅÀ­Âµ¤É½±ÚÏÍïæëýøÿþýÿüýÿûÿþýÿþýÿþþþüþþüÿþúÿýúÿýøÿýøÿýúýüúÿÿÿþþþÿþýÿúôçÝÔõçÜþüÿÿÿýþÿûúþýýþÿÿþÿÿþòÿýàüüâµ´˜QL.RL,UO/UP0RQ3OO3ON2NN2KO6HQ6GQ6EN/KR0U[5`k@}ˆ`ovTQV8MO7OO7SQ8VU9WR<OM8GE6@B5;>39A47B47D38C57B47B48C58C58C5:E7<G9=I?=J9?H7<D7<C<5A5=P2Tn?Z}EOnE:R:1D1>P6OdEOgG9Q7/9.+5*YgM\jPdnVhpYlo\mlZliZoi[liXieYie\igXejTdp\s„~•¨¯ÊÓÚëõ÷ñ÷óéìáÜßÎÎ͹ÆÁ­¼·£ÐóãÖÍóèæÿ÷üÿúÿþýÿüýÿûÿüýÿþýÿþþþüþþüþýùÿýúÿýøýúõÿüùûúøýýýÿÿÿÿýüÿûõöìãýïäþüÿÿÿûþÿúúþýüýÿÿþÿÿþïÿûÞÿÿê¹·žTO2UN1UO/VQ1RQ3MN/ML0MM1KO6JS8JT9NW8U\:djDŒ£mš°žtgrPKO6KJ5OJ6SN:TO9PN9KI:DF9=@59A47B45B19D68C58C59D69D69D6;F8=H:=I?=J9>G6<D7:A:6B6=P2Oi:Tw?Ji@2J2*=*6H.K`ANfF9Q7*4)$.#
\ No newline at end of file
diff --git a/testimages/testimgfst1_4.ppm b/testimages/testimgfst1_4.ppm
deleted file mode 100644
index c4eda85..0000000
--- a/testimages/testimgfst1_4.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-57 38
-255
-0/-51.72.92,80-80-80-80-5-*4,)5-*5-*1.'41*63,96/B<0H@3JA2MA1YB4†C3«?2ÀF;æCHñAD÷:@Ù?=³@-°?-¦?,˜@*‘>.=+“=,”?+™?6¸B4±E9_>/;5572/62/41,.0-0/-5,-=13yD@œ`l±XT†PGbopuŽ‘†bcSYSa,,,0/-2.+3.*2-*2-*2-*2-*2-*1,)3.+3.+41,41,52-850<5-@7.C9/I=1JG6fA/„;*¢E6ÊE>è@?ò;=ÁB3ª?-¨?, ?.”?+>-Œ=,‹<-Œ=.žB3ÂA;ªPHPF<@6494043.02--3/02/21/3/.P5*sTY|~»YXžJ>fDCUMVQQYNaXQ,-/---.-+/+(.*).*)/+*/+*/+*0,+1-,1-,1-*2.+1-*40-:2/=4/?4.E80J=5V93r7/’;1¦A5ÇD:ÙF?ªC2@.=-–=-Œ=,‰<,‡;-„<0†B7žH;¶OJ¡}mhseKI=77-01+/0+.0-//-00.00.:/7jhiŒ’Äv‚¶hoiyx‚—ˆj{k`Z@,03./1....-+-+,+)*+)*+)*+)**(),*+/-./+*.*)0,+3/.;23<21<1/@2/?82A83X6-;.’>4™A3­C6?3’?-“;-<.‡;-…9)9+>5…IA–KE¨nc¥…|yZfR?E710+2)*1++1++1-,1-,6,-cqMŽœ–º–ŸÆ§Š²‰«†g~bOO312,32-74/72.7/,3--/-.,+0/(//)+.-+/0*///2.+70*=3*?6->5.<1-<1/D1+B0.D,*\4,x>0:3‹?2A3<1ƒ>.ˆ<.€;+w;0v>/†C:†MB–\X~•x‹¨Š€^‹bWpS>D:.-+-)*1(-/*0+,10,+^cL„šl†¨uŽ°}‚¨w~žy`r\?A4671872=:5?:6?74;629527327./50-41,43.540;62@7.G:1G=4C90?61?61D65M34A9.K8*m61‰==„A;„=7B1…;.<,{>,¨KFŠG>tM<VFxwX§xhž˜~…¡{†pxpasYGD39.*./)1-*=&,H@5clQmŠ^r˜erœjw tt“qZsUD;4893=<7B?:D?;FA;F?9F?7D;4B92B92<7194.96/@9/F<0M@0M?4H;2B90?;2\:8œEK„@7¤EAœD6¥=2¤E3ª@2©<A—D@y=2¶:DÔ@>ªVLjeG…kPæOVÿ+Iý;UôL]äWgØclÐ`lÐQfÁL_DKR3.E=2cZ9yƒ`sŒdx“f„Ž¤{‘|YrRURC671;:5B?:GB>HC=LE=MC9L@4H>2D<1A:2<71<8-C;0K?/O?/M=0F:.A:0A?3¨EHÌ17 <%°=+¥:0¶A8¨</»?5Ô<1÷.4â=7æ42Ö6*¾NC‘]HÖ[Vè>AÑFAô>Jü5Hþ9Uÿ;`ÿ?mûArÿFjùTr»WalLA‹~u©«¨«³µ¾¼¿ÚÒÏÙÏק©¦`w][eJ10.762<<4BB8JH<NL7PPHb`xWNQH?@A96=82>71>;6F?5P?-A>-D65>;4h;5«;-¡:'«>*»9+©;,¬>-·?/Á;/Ñ7+Þ7.ô.+Ú7&Ù6#Ë=1ÚFBñADÒA>­<.ÇG<ó@FøDMýAYÿBjýIxÿKwùVuóJkžnnÓ»¯îëòþäýÿóüýøõöíðÒÎ͗“•›74/52+:70@=4OE9TK<ecq–œÖ””Ú||®`^vECH<;@B<,G@-EB;I?G@IHLB8’B;ª?-­>+¸9*Í9/È91Ç9/Î:0Ñ7-è1)ð/*ò/)Ö7"Ö5#Ó9/ÜMEÅE:»<)»<-ÈA;ÛIJïDJüK]úEdÿNwûUyýSnù\mß`iٞšÿööýüÿÿûúÿûøÿñôêßÝäâÕÐÙÔC?4<8/84+>93MC7WJ:a\p”žæ¬¶þ®µûŸ ãvtµbXGDWD@=[OS‰sš§f„¾IR³<(­:(º;,Ì9/Ý:3Ú72ä=7Ý90äD8ñ91ú**ì1,Ö2&Ê<(Ó:,Ú?:×EE½<'Á9+ÎH?ÅIAßKGæOTøWiþQoÿTwÿXoý\kíWc§unóÞÛ÷ÿýÿüýýúõûðîåØÐÿüèæìèPJ<IC7B;3A:4GC8SG1[T[‹–Ωªü¬¯ü¥©óšžå†‹Íuw ’UgØHQåDIÇA8àB?Ï>9Â@2Ë=1Ô7.Ù5,Û:0Ú6,Ô4&Ì9%Û;/ü47ç55á55ò21Ü6*ßK?ÖDEÔ=6Ã;/½@.½>-ÃE9ÎMHôdmúXmû[sëtxôftȉ‚baOßÂÄýûþÿúÿúÿöÿþ÷ôîàÿÿãööêXO@TK<NE6LC4JF:VK5]V]‡ŒÃœì¥ø¦ç¤žè˜™ÚÁnˆä=Dâ?BÙ=AÌB?è=CØ:9È?7¿A3Ä7-ß63Ý7+Ý7'Ã8#Æ8,Ú91Ë8&Ý2(à-0å/,Õ8'ÔC2ÝD?æTTÍ:0Á:'·>-´<.¾@2éf^ïijóboöllþbo¶‹„[iFãÑÇÿüùúþýßýåôýìÿøè÷ïÚþýùZN>YM=WK;WK;SK>YL9ZRPsr”„†¶ŒÐ…‡Àqs¬¤Ymù7OáFAí>KÒDBÁB9Ô97ÌC9Á9)¾;'Í:*â6,ó04ô02á34æ2>í/1Ë8(Ñ9+Ý<2ÊB2Å9(×:1ã03Ö71Ò>2ÇC6¯;,¬<.®=-Ú_Xïlr÷nxÿpwûNb½~yxa×ÖºãñØÃÚ¾¢ÍŸ»Ô­äãÎêÙÑßÑÈZM<ZM<XK:ZM<\L=^O<[OC]RVh`…‚yº—|³¥iœ½>5ÝC9ö<?âAG¸<2¸@2×96ËC7Á:'È;)Õ5'ì7.ç2'ô*&õ&.ÿ8ò.:ç6<ó08ô-0Ü:5Ý>:Ý;8×4+Ç8(Ç:0ÔJ@È;2Ë80¼:,ÊOH÷xüyÿkxÿE`¶mdp‹X„hpczŸl‡µw’»¤´Î½µ¯¥¤ZK8ZK8ZK8\M:^K<_N<_P=aP>cQ_uq–º€–òZiÿ9PÓCCæAEÑB<¯;,»=/ç9:âH@ÏG9Ì8,ß6/í1/ü,<ý(<ð,Dü/XùS_ñIZÛ4>´;*ÍE9ÓD@ÑA9Ê9(«<(¹=1Ø?:ë51ù-.Þ7.Å@7øz{üz‚ýdvùA[•VEiP‚‘h~¥p©m†¬oƒµn­yÉ° §¹]L8]L8]L8]L8^M=[O?^RFfSOkZd|ƒ¹§‡¸çwsôP[ñALÝ:;Ã?2­>-Ç<5Õ7.Ú9%äA:Ñ=/ê86û7Eö*-õ)*ì)/ó=LôT\ß@=º?0§8-¹>9×CCÐA9Ò91Ï:6Ý>:Ü;3ò00ô02ë2-Â:,î{xþw…ÿQnôAaqM?jŒPz¢f{£g}¡c}¦j}­a‰²p‰¥[gg_N<_N<_N<_N<`N6\SNql‚xqšt|ª‡‹Ò¥•Â¶Œ|ÏSIëGHä:;È@2´91ÉYMÃ?3Ý6-â70á?0ì1,ú9<ôNRå<?åBEÓ:=à:>ã4;ðW\ù_kàRNÞ@?ÜG@Ø93û+9ü7>Ý4-ê5.ù.2î1-Â8+çtqûj{ÿAcÓCZWQ9m‰Vwbrš[wž_y¤_x§Y‚ªd~™las]\O>^Q@_RA`SBaTNnjƒwy²mq¸ŒÜÆüäÞ¢Øѝ¬xÃe[ìRTè:;Í<7¸G9ÐVIÕ4,å>8Ü6*è0(ã60ÚACóS[úZdø]cú2<ô1?ùU^üXaá<8â64Ù<3â94è8:å63Ý6-ð0+÷03ñ1.Ì9/ädeþWsû?b”EHCG0bwLp’Vj“Oq˜YtœVo™Og‹KNh8G^4UH8]P@bUEdWGpk‰sv«cj²|ƒË¥«ÛÆÇÙËÅDz¦°yxt‹hbÐoiübbîE@ÐB8Ã>/Ð=-âFGÜ;3ã60Ì:-Â=,Ë;;åMYèSYã:7öCIä7;ù59î73é30ß<3è64ê67æ40Ü8,ì1(ñ33ï2.Ò80ò_gûBdêCaaK=RG5LZ7g…IeFj’Sj‘N`‰G[|E]tHZuB@@6TG>^SAjgrol›`` qrŽŠá˜—Ó™“Ó˜Ë‹‚±n_drWL¸YSãtmözxÛKCÁ8&É:*Õ9,Ò9+Ú8-Ç<)Å;1Æ<2ÎD:ÏF<Ù7(ø*6ÿ'?ÿ!<ÿ#9í6;Ý:5ç85ì44Þ6-Ú9'Þ7$ç5+ã7)áB>ÿQjÿ:a˜;F8<+BA-_h=g~RcˆBkKjIfˆKdIeFcˆC/4-A<8OJDabwYYfh³~€×…†Ö††Ì†Õ„Å}xxUS^bG¹SQèagìqi×K>Ã<(Ì9)Ô6+Ä;)Ô@2Í9+É:,Ê8+Ò;0Ò:/é24ù/=ó1<ø/9õ1;â66Þ95é77ò59Ý6.Ò9)Ô9'Ê<$Ì?-÷MVÿ:dÜB\F678:-de;¥¾…s‘]fŒCq–SjIi‹Nf…IhˆGl–N-0'34/@B7TT`b_ŠƒÔ‹‰ê~͇‡ÇŠƒÆ„zŸndbd_Km\Hˆ[DêQTàA=Û7-Ñ8&Ý5,Æ7)À=+ÔB5Ç9+Ç:)Ì6(×6,â92ð5<ì2=êFGïB>ä86á96å99ë8<î68Ö92¿:)½<)¸:$Ç=3ÿG^î@akC;><0[J8²o•±~YuEe‰Ah‘KeˆDhŒLiŽKj’It¢WA?2??5EG/RPC]]Qzw¢ˆ…Èyv£xt§yqˆob\n[Ll\Mu^N¤L@Ø7/á4.Ý5(Ì<$Õ9*³:'Æ:-âE>¸=+Ã;-Ï;1Þ;6ç98é:7ïLMÔIDÙ;8á:4æ66ê7;è7=è8:Í=4®;)¥:&ž:*ÌOMçHZ£]SHG3R^6v˜\‡˜bWdHGT6i†Dc‘HiŒHlPn—QržOv©ZZQBZQB[RC_VGd[Jj_]qeij^`ZWNYSGaYFi^Hp_O¯RK³?0Ë80Û7-Ù7,Õ7+Ô8+²:*±9(Ê7-ÓD6Ù;8à<:í49â96¹;-´;*Ð@5í76é6:å95â92ã99Ü<<¸<2›<*‘8&†:,ÁUUŸVO˜_Vp„I„œ`Rd:>D6<>1AM7jˆHl”Kn•Tl–Jq¡Mu¦W]†Lf]Nf]Nf]Ng^OiaLiaNiaLhaG`[HOL;JK9ORA^TH­UQß;<¹@+Ô8)Õ9*Ò9)Ñ:)°<-™;"º<-Ñ80ï7?Ü9:Ø58Á61§>+§<(¼:,Ý;6î85ç<4Ú@8Í=<®<1–9(‰<(ƒ8%‚?/™SGZN–]VcmJ6A08?7>>29;.>J4m‹Kq™Pq›OsžOz¦[bJXHi`Qi`Qi`Qi`Qj_]j`WjaRjaRiaL]XEFI8?E7AI4wT>ÛVQàA>Ë9*Ì:+Ï;-Ñ;-¾;3¤;(ÈB7âE@ÍB?¡@0”A1ƒ@-Ž;+“=,œ;+¬:/É<2É?4¶</6-‰<,‚:+|:*u7(‘D:PF˜UO—]YdoE@M1U^I@C(9;.>J4nŒLq™Pp Lz¦[h‘OdŒMY~Ji`Qi`QkbSkbSlbXldQldQlbVnbThaQWQAHE4MG/¬BBÉIHæ^`ÒG@ÒE>×FAÛGCáGIÝIGçACáB?Ä>;—A2‰=0‚<2;3=1};-€;,Š:/•9,‘:)„;,€8*{5)v6,q9,‡M?UH—SPn]MP`;YoA;L*:@49;.<H2g…EršQr£Rj—R`ˆIeGWyGi`QkbSjaRjaRkbSkbSlcTlcTqaTldYpm^lfPSK4yK>”A=©HBÀHGÐB>ÎE=ÙCDáCDàECÞC?ßB=ÄA9‘:0?3‡81„;,‚:,9-~:/}90|90z:0x:/t8.p6+l3(p:.ˆQJ“RN‚[LbiJNd>HW89A,=>04<1<?,arFršSs™\]ƒFbˆK`†IOq?haQjcSibRibRkbSkbSlcTlcTnf[‹ˆƒÃ¾ÎÉ҆xHM9<G6=>6V?7·PSØJIàEAÞDFÜDCÖE@ÑG=ÔA: >1‚?.ƒ;,‚:,9-~:/}:1y:1v7.s9.r8-p5-l3*i3)k7,’OI€ZO\UE@G74:,6>/<G65A-284:</Vg=tœU`ƒI\E`ƒI^GIj=icSicSicSicSkbSmdUmdUmdU„„‚ßãäúþÿÿýúîç˞¢NQ6K>6XSP¼©£ì¶¬ÚUXÓIGØBCØ@?ÓB=Õ@:©>4…=.€=,9-|8-}:1{;2s:1m7-l8-j8-l7/g5,a1'd5+~QKSG94?/5;168516/1=/7F307046)Q`?o•ZUuCWwE\|J\|J>V<gdSgdSgdSgdSkbSmdUmdUpgX†“Þéëöÿúùûí÷úéåßŃ~hQYL´°§ôææûúõüëáՇsºK@ÉD?ÊC=ÎC>¤=8†<3=1~:/{8/{;2w81j8/i7.f7-d8-i70i81e6.qE<A>767/58-57,08+08+.9+2=-9B146(TbIWzPNk=Ol>ZwIVsE,6.heVheVheVheVhdXmd[khU€ƒÉÑÔúúüÿþúõøïõ÷êååɋt˜ˆááßûûùþþüþþüúÿÿç·­ºTFÃI>ÀH8™C6A7;3z;2u9/s9.o6+j81i70h6/h70j92j@2jUBtq^39/17-17-/5+/4./4..3-.3-.:04@2BO;AO6Ic@Hb;YsNF_B&+/heVheVheVheVjdXmd[mlXŽ””ëóöþþþûûóäèÙèìÓâÝÇÓËÀìãäúúøþþüþþüþþüÿüýôÿýñêØ»xe½G=¨?<Š?<qE<q?8p?8n?7i=4q62l71f;2c?3bM<_O8fbIV`H39/17-17-/5+-2,-2,,1+,1+*4,-7,2=-5@/;J3HX=]mS<J9%++heVheVifWifWkeYmf\knY‹ÜãéþþþüüðíòÜâåÈáÙÎôèêÿøüþþüþþüþþüþþüûÿüÿûÿÿüýÿýêß·ž¦lX„E3uB/t?9q?8o>7m>6i>5cA5_J9L?,DH/XX<jmR=G/7=36<26<25;1382271/4.,1+*1*+2+.5.9@89F57D0N[I8D8+1-gdUifWifWifWldYmf\hpY}ŠàçíþþþþÿñêðÖÏͶãÛÙÿúÿþÿúþþüþþüþþüþþüþýÿûÿ÷ýÿ÷ÿýÿÿüÿÿûòèÝËÊ´¦¨xaH2^H1^J2XM9QL6Z\DJS8@J/NN2ieLJI5:@69?58>49?5:?99>88=75:4051,1--10/325G19K39K53D4-4,\hPfkUjhYnfYkg[ieZemXŒ›žäìïùüóìíÛØÓÀÅ·ªÝÒÖþøÿüýÿþþþÿþüÿýúÿýùÿüûÿýþÿüùøêáÿýÿýÿüþýÿÿþëØÖ½RK.UO/QP4PN5IO5EN1KQ/agC]`CTT<VU9OJ6CA49?58C59A69A6:B7=E:<F;:B54>5AU0Kk<:M:EX<?V:-4,[iPgoXlm]nhZkg[ieZemX…”—ÚãêìñëÖÕû©ÚÌÁøíñÿüÿüþýþþþÿþüÿýúÿúöþúùÿýþÿüùûíäÿýþýÿüþþÿÿûçÝÛÆTM1VO2PO1MK2LR8LU8^dBªsxˆaKM7SL:SN:IG:;A76A3;C8;C8;C8>F;>H=<D77A8H\7PpA/B/AT8E\@)0(
\ No newline at end of file
diff --git a/testimages/testimgfst1_8.ppm b/testimages/testimgfst1_8.ppm
deleted file mode 100644
index 11dca13..0000000
--- a/testimages/testimgfst1_8.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-29 19
-255
-0/-50,4/,4/,2-*3.+30+74/B:/J>0dC2¥A2âBBá=;®?,œ@+>-=-¬B5‚F;:5232./1.5//uKLkj¤SNdci_\VX-.0.-+,*+,*+,*+.,-0,+2.-<20A4.H94w9.¦A5³C7—>,Ž=,„;,„A8¤TK„‰sGK<0/+/.,0/-PMF‰›§~˜‹wwWV:45/:728302012,.10,32.?5,B8/>3/F21K4,€;4ˆ?6†>0<-‡C8M@xc‹ m†fFJ<1,)1+-NO?y™h¥vk†g@>29:4C@;GB<I?5D<1<71=9.L?/J<1@<1š>AŸ?1§?4¬@3Â::¾:8ÃG?•bMé?Hù>QïKcçLlÔJanE?„‚m–£±¹¬wˆuX[F43/==3OI9kllg‡IDJ>;6H@3C?<a?6¨=+º;,¹;/Å;0â4+å3'Ô8+ÛFB½=0ØDB÷ESþHpýRtÚ]kèÎÍÿóýþôóÎËÆ°·¯EA5=82OF7wy ­°ý––Üih”‚KQ¸WhÈA=¼=.Ö8/Ý82Ø:.ð50â3.Û7-ÚEAÅ;.ÃC6ÖKHøYmû_têhp¶˜þüÿýúõ÷ðÞëîçYM?SG9SJ9lj€’•ÜŽÏ½fˆå@DÍB?Ø=;Á=0Ô7.é3/Ô51×6,Ú6-Ò9+Û<6ÖA:»=.³=/èfdøksÛns£¨ŠéñäÌçÈñè×îçá[K;[K;^M=^PEqgŒºp“Û?@ä@A·=0Ú?:É=,ã5,õ,2ù'@ð@KÝ34×?:Ó:2½;-Ø=8×6.à`]þp|ÑRYx‹^zŸkˆ´u²¸œ£¢ª^M;^M;]P@kalxy§¹ˆ›éJNÔ<7¼C8Ó9-Þ>2ó6:ñ7:æ9?å@DÒKHÓECÖ?8é6:æ5/ò00×XQÿ\t£IKr•[xŸ`z¨`…¦oYi^[N>aTDlhqvº°°ÒÎÇ­ukçWVÑB:ÎB5à=8Þ6-×CAðV^ñ8>ôGMæ74á:4è66å4*ò21ÝMLøFhcG;bzJj“Ok”N\yCPj;A>5^Z[ccŸ€ÔŒÑ‹ƒ´n\RÎa^å`YÇ:(Ï9*Ñ;,È:0Ð@7í04ú)<ò09â96ç53Ø9&Ù9)öGX°;NEG1w\iŽHiŒJf„HhJ780JKCom”„€Ê€|¯sgkm^K¼LAß8/Ô8)¿:+Í?3É:,à93ì<?âC@â96è8;Þ95´;*º@3ÞK[PI6y‹YcxQgŒFiŒJl–Nv¦Z`WHbYJh_PkaWXUFXUB‹WLÅ=1Ø8*Ò9)ª;(Ê<0à::Ú65¯<*Õ;3è96Û<8·;39(˜H=–\QdsHBI7=D2mLo™OpžSW€Hi`QjaRkaWkbSg^MHI9kJ9ÛONÎ@6Õ@9ÈA;ÝB>²@6ˆ?0ˆ<.’:,¬<0š:,9+v8+ŒNAXPTb>CK4;B0n‘MqŸTdKTyFjaRjaRkbSlcTumb›—ŒjZJnC<¦GC×FCÞDDÙD>±?5‚<0‚:,~:/y9/u9.p6+l6*ŒSL`XE?M4:A/6<0g„Jd‡Ma„JIj=gdSgdSlcTneV²·ºûýøÚØ¿[ZH¯¤¢ï½¶ÎWOÐB>¼?9‚<0}9.z:1m9.h8,i70g80RD;4:.28.2?.39-ZtMSp@[xH1B2heVheVkeYy{pëïòõöîèêÔµ¶¨õõóþþüöîëËq°D:€?9u<5o:2l71f:1eE6gbN28.06,.3--2,/90;G3EZ;NbG%*-heVifWlfZtqîòõô÷æÝÚÉý÷÷þþüþþüþþþÿþùáÆ»«†t{UHeD5]F6TN8JN3VXB8>47=36;5271-2.1628G2=K:+2+akSliZjfZw‚zéïï×ÔÃÜÑÍýüÿþþþÿüùþüýýôïþþþÿýö˜’xTO2LN6OV5r~VRP9KH79A4:B7<D9=E8=L5AZ:CW<*1)
\ No newline at end of file
diff --git a/testimages/testimggray.jpg b/testimages/testimggray.jpg
deleted file mode 100644
index 95505a2..0000000
--- a/testimages/testimggray.jpg
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgint.ppm b/testimages/testimgint.ppm
deleted file mode 100644
index 59c50a8..0000000
--- a/testimages/testimgint.ppm
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgint1_2.ppm b/testimages/testimgint1_2.ppm
deleted file mode 100644
index 391cd8a..0000000
--- a/testimages/testimgint1_2.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-114 75
-255
-0/-10.51.62/83/:3-:3-:3-91.80-80-91.80-80-80-80-6.+5-*5-*4,)4,)4,)4,)4,)/,%0-&2/(30)63,74-96/:70A;/E?3IA4JB5MD5NB2NB2PD4T>1gA4‚D5œD6²D7¿C7ÈE=ÊG?ìBKëDKíCFó?Bø9@ó:?å??Ò>:µ@.´?-´?-°?-«@,¤A,A*—A*’?/‘>.‘>,’<+”?+”=*•>+–?,”?8›?2®C3¾C3º@1¦F8yD6J7(:6584371150-72/61.61-50,.0-//-0/-2.-5,-3)*:+.G8;sCCLK¥Z_¥o‡‹~ª{†¼gjSGoPH`PK_jjr”ž¤˜–˜ŠrsaWVDUQbUQb/.,0/-40-51.61-61-81+81+80.7/-7/-80.80.80.80.80.5-+5-+4,*3+)6.,6.,6.,6.,2/(30)41*52+52+74-85.96/?9-B<0F>3G?2H?0J>0K?/MA1LC2[D4rC1ˆA/—:)§;.´B7¼G=ÝAEãCEìDDö?Cû:?ô;@áB?Ê?8³@-²?,¯>,®?,©@-£@+›@+–@)‘>.‘>.=+<*=+’<+“=,”?+”@6 @4¶B5ÂB5ÀF;£I>rD7G9,=77<6694183062/51.52-21,-2./1.00.10.5//5,-6,-?35a:3~E<PN“fzx£t~¹Z_—NBrNCeOFaPN\osv€…„yjl^VWI\T_\T_,,,.-+0/-3/,3/,4/+4/+6/)3.+2-*2-*3.+4/,4/,4/,4/,3.+2-*1,)1,)4/,4/,4/,4/,41,41,41,52-52-63.74/961<5+?8.B:/B:/D:.F:.H<.K?1GH6RG5aD2u@.‚;)“<+£C3°L=ÏEBÖEBäB@ò;=ú9<ð<=×B;º@1­@,­@,¬>-©>,¥>-ž?+—?+“>)?.>-Ž=,Ž=,Ž=,<+Ž=,?.”B4¥A2½A7ÇA8ºC;›MCcC8<:.A75>63<4183/43/23.23.02-,2.-2./1.00.32040/2,,5//P5*c<-qE<€aq|v¦ox»\_¤OE‚PAlK?cFBYEHQRXVU\TQWKTWL`XV`XV+++,,,/.,0/-1-*1-*2-)2-)2,,1++1++2,,0**0**0**0**2,,1++1++0**2,,2,,2,,2,,30+30+30+30+41,52-74/850:3+<5->5.>5,B8.E8/H<0K?3DE5LE5YB2h>.v;+…9)”>/¡E6¼F<ÇG>ÜD?î@?ò::ç>;ÊD8ª?+§@-¦?,¦?.¥>-¢>.š?-”?+>)>-Œ=,Œ=.‹<-Š=-‰;.‰=/‹?1“E1¨B4ÁB;ÃB=¸OK›_WeTJ>H=@72=6094.63.43.13./4..3--3/-3/.3/02/00.2200/-0/-=.)K5*]B;~k~Š‡¼zÏkn½OL“J=iF<aDBZ>BMBLMLWQNXOR\QaYLaYL,-/,-/---.-+/.,0,)/+(/+(/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*0,+1-,1-,1-,1-,1-,0,)2.+3/,3/,2.+2.+40-62/80-;4.=4/>5.?4.B5-E80H;2G>5M?6U=3^8/n8.~8,;0š>1§A3´D8ÉF<ÞC>çB@ÞE?½C4¡B, A- ?.Ÿ>-Ÿ>.š=,–=-=+Œ>*‹<+‰<,‰<,‡;-†</…=1‡?3ˆB6“F4ªH=¼E?»KG²e]™ylstfO\RMI>B>5:7.33+34.03,/1,,1+-2.-2./1./1.02/11/11/11/8,6?42SHD‚{’”ljÙrxÂbg§TQpJJdLTa]klq‚|zŠo}p\j]a[Cc]E-.0-.0-.0....-+-,*.*'.*',*++)*+)*+)*,*+,*+,*+,*+,*+-+,-+,-+,.,-.,-/-./-.0,+1-,2.-1-,0,+0,+2.-40/91/;30=31=4/>3/A4.C60E82H94L:6O:5U61e60v6-‰90”<0œ@5 @2®B5ÃE9ÑF?ÍH?³E8šB.šB.š?-™>,š>/–=-“=.Œ=,Š=-‰<*‡;+„;,ƒ;-;/=2ƒ@7‡D<”J=¥H@³KH°]U¦o“•|‹xao`QSEGI;=?224)01)./)./*0/-//-//-//-//-00.00.00.00.7*>713SUJ‚†‰”²‰‘À€¸…™´‡–|ŽwŒ…‚šŒ’«˜Ž¥‘”€`q_ZV9`\?+/2+/2-.0.......-+-,*-,*-+,,*++)**(),*+,*+,*+,*+,*++)*+)*+)*,*+-+,.,-/-./+*0,+0,+.*)/+*/+*1-,3/.:12;31=32<20=20?1.@2/B5/A62B94D95I62W61i7.;0Œ>1–?6“?4šB4¨D5µE9¸F<¨B6—?1•@,”>-”=,•<.“;-<.‰<,‡;+†:*†:*ƒ:+9-€<1?6ƒD=‡HAI?JDªWS¨tgš˜Š§ˆ€šs~n\hRLUB>E58;022*0-(0**2,,1++1++0,+0,+1-,1-,/.,/.,9(:41*R[>‹sš”Œ› Ž¨Ÿ¡Å«ŸÀ«ŸÁ©³—‹°‘³’Š§ˆ~•yZnUTS7VU9,03,03./1./1000////.,/.,/-..,-,*++)*+)*+)*+)*+)*+)*+)**())'(+)*,*+.,-/-./+*/+*.*),('0,+0,+2.-40/;23<34<21;10<1/=/.>0-?1.:70983:94?82F3,[5*q9,<,‘95?3ŽB2”A1ž>0¨A8 ?6–>4‘@-=+‘;*’<-‘;.;-ˆ:-„;,…9)‚9(9*~8,€=4€A8„H@‰NHHAžPL«d`§ˆv¥„€®‰}ž†thw`YgPHSB=C566.2-)1()4*+2)*2)*2)*0**1++0,+0,+0,+9(241 IV*w‰Yœ{ „°ƒžÌ‹¡Î§Ê¡‡´‰†±„ˆ¯ƒ„¤}sŒlSgKIL/FI,/0*01+12,21,43.63.52-4/+7/,6.,2-*0,+.,---/,+0)*./(//(//)-.*+/+*.-)./*./)--/.../.,2.+4,)7.):/)<2)=4+>5.=4-<3.=2.;0.=/.?10F/':6+.:,45-C++Q'(Y2+]A3vB,†A1”=3•9.<.ŽA/B0?,›83<2‚@0~A/„A1‰=0ˆ:.†:-x<1p;-s@/u;-€>2‹G<‡MAQCšKNm`|Œq~¡Œ¦‰“¥„¢†kzbbY{V[mSRUDG>7<..6',4%,&,,)*,-(,2'/4'03'10(3/)31-..-(HK8xb‰šnŠ¢pˆ¦p’µ}»‰“³†«xƒ©zƒ¦~}˜ynkTcPEI:=A212,23-34.43.65096185083/91.80-4/,3--2./0.1..0+*/2(00*.0*./+,1-,/.*0/*10+0000./0/-3.+81+:1*>4+@6-@7.A8/?6/=4-=2.;0.;0.=20D3,P,.X&/N&.?0-<2)Q0)i-,i;,{<3‰;7‹:6ˆ>5…A6†B5ŠB6Œ?/ˆ?.‡>-ˆ;+‹9+‡:*<)x>(~;2v:/v>/y>0‚B8ŠG>‰JAˆODšNN”l`ƒ‰o| |‚ª‡Œ©‹Ž †…•{c˜j]c\†bTuXE^H6F9*6,%/'5%(4%*2'+/*.,,.)-0&.0%/12,..+&DF8s~`~j›k}Ÿm€¦s‡©v¦s~¦t~¥x}¢yx•vjgRdNBB699-560560671872983<94=:5=84>63<419418307327323121/05,/5,/3--3--3/,40-32-32-43143162/83/<5/?6-C90F90E;2E;2C90A60?40>3/>31?42@72R/5\(4Q,3<714;+F6)Z/)b70s979:„8:~:7}A9B;…@;~E1‚A/ˆ<.‹9-‰7+ƒ:+y?+sA*…2*”G?|90E9}G;K>ƒMCŠTJnoP|wZ‡gŒ—w£~~­ƒ{©‚vžy„nŒn{‹nr…iesZVYHH@5=/&9))3)',+').('-)*,+.*+1(+3)'?;2QTAivZp†`qdr˜gs›iu ktžlz¤ty¢vw›ut’pg€bPdID=5<5-782782893:94;:5?<7@=8@;7B:7A96>95=84=84=85;63941:0/91/91/72.61-52-63.74/540952<94A:4A8/D:0H<0J>2J=4J=4E;2B8/A81@72@72A83>95?<7A>7E<5G90J8,H;+F>+g2*z93?>”C@‘HBƒ?6‚92‹<5A5ƒ?4‚<2=1y?1y?1}=1ƒ:1£31å{¦SMK?nO=jT?sXCƒ`Mc€R“x]¿daÎadÉ~x®–~– ~ˆœws‰”t~˜st–qjŒi`xXS]DDC.C4-;2+11'/0(41*>,,G',N#,OC3`[EgkPk{WhƒXi‹Yo•bq™dpœko™kw vu›vt–upne€_MfFLC<D;49:49:4:;5>=8=<7A>9B?:D?;E@<E@:D?9E>8D=7C<6B;5B;3A81A83@72?82<71;6085.74/96196/=82A:0C;0I=1L@2N@3N@7K>5I<3E;2B90@91@91A:4D;4I81[<7h=7p>7s<5z?7„H>F7œE;Ÿ>5ž=4¢I;£M>£G8¨C7¡AE“9;‡<9uA6lE6wC8‰02¬6BßOOÊEFÅb]‚M?h\BbeFmfIiO·dVäU[ú'Gÿ!Iÿ4TñP`çemâkoÈuqÃztº€u²„w¬€uªum£ca›ST“HO>Bi78Q/-J3-G2-J2.S74]R2gbBrxTu†\nˆ[o^u•cw˜cs–nu—tz™z€šz”{m†id}_OiFUMBRJ?782893<=7@?:A@;C@;DA<E@<FA;FA;FA;G@8JA:I@7G=4E;1D:0E;2D;2D;4?80<71:5/94.96/;7.?8.C;0F<0J>0M@0M@/O>4L>3H;2E;1D;2B;1@<3A=4Q81’?GÕMaÖR]›>7”H:»KGÛ?J¢E3§@1«;/®>0ªC0Ÿ:&ª=)³;+³6>¼?GºGL£KG;0„4-½AIâ?TÚ85Î65ÉSO•VGqaGppN‚kL¥s\ÿJWý8Jÿ5Nÿ/Jÿ5Oý1Jÿ.Kÿ/Nÿ5Xÿ9ZüA`øEc÷EgöBgüClÿGsÿXzñTsÛ\qªNYf.-Q;0<?,9K5]P0idD|a}Œkxlwiyh€‘mŸ…˜§’¡­Ÿ£¯¥”¢•x‰vby]OjIXWEZYG671782:;5?>9A@;DA<EB=GB>GD=HC=HD;KD:LC:KA7J>2I=1F<0E;/E;1D;2A:0@91<71;60;7.<8-@:.D</G>/L?/NA0O?/N=3M<2H:/D:0C;0@<1@>2B?6v94ÎT_í;QÐ*4¤4&’<#´>0×47¥?1ª</¶@6»G:¬>/¥:(¯9+ÁA6ÊA9Þ>>é:?ä>@ÑD=ÙSJÜ>=é08Ú6*Ñ2,»72³aS€ZC‚^D®bRÜg`÷<Gâ<@×HDßQOëJPò9Gú0Fÿ4Lþ9Uþ9Wÿ=_ÿAfþ?hþ@lú@o÷@pÿ?hÿFkùJkèZp¾Zdm63RA1FK4dUB„{jš›•’š’š¤›—“­¯¤Â¶ËÈÃÍÉÈÈÃDZ³²ˆ’‡exbSlNU]EW_G560560671:94=<7A>9EB=HC?HE>ID>JF=MF<ME:ND8OC5OA4LC4JA2G=1D<1C:1A:2?82<71<8-<8-A;/D</K?1M@/P@0O@-O=1L</H:/C9/B:/?;0?=1@@4 BC¼HK¹//¸/'©9#ŸB#›B"œ>"Ÿ8/¥6/´=9¶A:«?3§;/ºD:Ã?:Ä>%ä2(ÿ &ÿ"+÷))è3*å3)ë.(Ö7$Ô6*Å:3ÅeYžaN¡YJÜa\ñCLî=CÔ?;¿?4Î@<òAIÿ4Gü5Hõ<Jý;Uþ7Tÿ7Zÿ9cÿ=mÿ?rÿ@tùAuúGeÿKjþLlÿa|ïfzÏjr˜TSPH~z´¨¨½»¾¸¼Å¼ÂÎÆÈÕÔÎÜãØæìáßïàãëÛåãÕâÇÂșž˜nl\uW_lN^kM201542875984;:5>>6BB8EE9HH@HI;KJ5MM5KJ8JIERP]KI_OF7I?3J@6D;2C:5?74@86;55A62?75>:;===@?;G@6M@/O?(BA/J91P37H599>83D48@)QK3½L>£5& 9& =(¢<&®?+³:)·5'£9)¢;*§B0©B/«>*´?.ÂD5¼8+Í6+Í;.Ó=/â7/ò/-ö/,æ6)Ô=(Ü7#Ñ9$Ã7&ÊD9×JCòRRøAFþ9BàDGÎ@>²<0¨>.¿N>ÖIBí?Aÿ<FøDMýEQû@SÿA]ÿEiø=høBrýK{ÿKwïQrèXsÿYzÿGsõKpÀhvzmeº ‘ÝÕÊâãåæßïôÚõÿÝùÿèøùó÷ûøóøóðóêíëâçÙÓÕ·¶´Œ‘Š}…z„‹{~…u/.,10.43/650991<<4@@6CC9IF=LI:NL7OM8NL@`_ettŒuu™jbwVOaKCRD=EB<><73=90=9.96/<90>;2?;0F>1K?/P?/P=,8@(>=+@91<73A96Q20t).¥9F¦6(¤9'¢;( :$§<(µ@.½>/Ã;/²>1«:,¬;+¯;,³;+ÄE6Ä<0Ã6,×4+Ö8-Û9.æ3,ô.+ô/)á4&Ï9!Ý2 Û9*Ñ;-Ì?5Ç:3ØD@çIHå??ÕA=Å?6²:,¬;+µ=/×JCä@AüGPñ@FûEQýDVü?[ÿCgÿFoÿLyÿK{ÿExÿMvÿWxúOoùJkØJbӋŽfVGÁ—‹øáÙû÷öøõÿþìÿÿîÿÿõÿÿýþÿýùÿù÷ÿõöýòöîåèÍÉƯ°ª§­¡ª²¥¤¬Ÿ32.10,21,43.77/::2>>4AA7LD9PI9SL:SL<TPOsq†»“˜Ò’Í€}¶ol›^Z{QNaB@E<9497+574891?<+D>(I>(K@,L@2I?6O<6F859830;35;1O4-Š>@¨6>ª<+«@.§>+«>*¬9&º;,Ä:-Î;1Ä>5½8/½9-½7,Æ<1Ê<2Æ2(Ð7/â1)ä3+é4+ð/*ø.*ð/(Þ4#Í7Û1 Ù3%Ö8-Ö?6ÛLDÌC;ÐKBÆA8¿;,¼:*µ:*º?0½=2Ë@;ÝDFìKQí@DöFPÿJYüD\úAaÿHnÿOzÿKxÿP|ÿNwÿTuûNjîLcÿzˆÿšž–KH¿~xüÑËÿööþüÿÿùÿÿùÿÿûÿÿüùÿûøÿúùÿöøÿó÷÷ëíàØÕÔÑÈÕÕÉËÕ;ÈÀ=:385.41*52+74-:70>;2A>5LB6TH:WK;TJ@XQXxw—–œÖ¢«ö£¨ùœŸì—šÝŽÅ‚«hh„UReKIWEDV?<E?:4C>+E@*BC1?D=;DCC@UJM^NW`LXXLNCaJ:‰R>Ž@,ª?-­@,°A.±<*¶8)Â8+Î7.Ö92Ð83Ï72Ñ:3Ï60×=5Ò5,Ù;2Ù80ë0'ï0(ô/)÷,(÷-)í2)Ý6&Ï9!Ñ9$Ñ6$Ó7*Ñ7-ßJCç\UÈH=¸>3·=&¸;%»9)Â>1ÈB9ÏEBÓIGÓIIèDEïIMûQ\úM`öE_ûHhÿOvÿPyëZwö[zýVrúQhÿ[lóRbòO`óL^¾pl︵ÿìéÿüûùþÿúþÿýûüýøõÿú÷ÿûúÿôôøéììÞÞåÚÔïéÝùöçæðïÏÙØB>5?;2;7.95,62)73*<71B=7KA7SG7WJ9[MBWMVpp”˜ÙŸ«ÿ«´ÿ­µÿ¬³û¬¯ò ¢ßŒ‹Åtq¨`[‘]TƒVOqEAP@?=?@8@E>;DCFOTMki|œ‚|˜i~ Xc°QO«A1§:#©:'­<*±<*·9*½5'Ë7-Ø:1Ý:3Ü:7Þ;6ß<7â=7å@:ß;2ïKBóQFî6,ï,&ú*(û)(÷0-é4+Ø2$Ó6%Ç<'Ë=)Ð8*Ò6*Ó91ãNHÜNJÀ95·=%º<&¾9(Ä:/Ê@6ÚSMÊICÃF@âHFéOOíQUíMYøRføNhÿPoÿQuíWtü\xÿZtÿSiÿXiÿ[jùH\ÿMcªgaқ˜ûÙ×ûóñ÷ÿýøÿÿýÿüÿù÷þùõÿ÷ôÿööïáááÓÐèÞÕþ÷çÿÿëïø÷ÖßÞGC7D@4@<1=9.95,84+;60@;5H?6NE6VG4YI<XMQhg‡‡Í™¦õª²ú¯·ÿ³ºÿ´¸ÿ­¯ù ŸëŠ×yvÃpc©bZ“SQxBCW>9@QADdGL‰ai°p¢¼t¢ÂfÈLnÓ>TØ>FÂ8-µ=$­8&±8'¹:+Ã;-Î;1Ù?5à=6ß80×2,Ø3-æ?9ã:3Û4,×3)Ø8,Ö9*ñG:ö93÷))û*-ï--â4-Ù2*Ô0'Ì;*Ñ;*Ö=-Ø</Ù<3Ø=9êRQÕ?AÁ<)Â:*Á9)À8*Ä>3ÐMCËMA¿E:ÕHAÜNJßQPßLRñYeý^rúTlúPkÿJuÿVxÿUoÿ_pý_mõbläfjÓbd}WL¡zs´øèè÷ûú÷ÿÿþÿÿÿùýûûóýúõÿú÷øíëâ×ÑåÛÏÿúçÿÿæñöðÞãÝOI;LF8HB6E>4@91>7/>71A:4C@9LD7RE2WH5WLHc_vƒŠ¾”£è©¬ù­°ý¯²ÿ®±þ©¬ù¢¥ò™œéŽÝ‡„Ëpq±dh˜\XykPc‰JUµQYÏS[ðBcæF`×@QÍ3?åAHÞ>>ËA7ºE3¼A1ÁB3ËC5ÕE:Ø@5×90Ø4+Û4,Ø7/Ý:1Þ7/Ü5,×3)Ó6'Í4$Ò=)Õ>-òA9þ68ù,1ë12Ø/,Ù1.à44ê72ç92à:.×:+ÝE8éTMÖ@AàINÒ>4Ç7,Å;.Á<-½;-ÁB3ÁC4¼>/ÇD:ÏLDÔPKÔNMë^dÿiuùZnøRjÿRxýVpõkxîeoõaoîhsÓ{yŸqb_\Iyg[Ë­«öàâüøùùÿÿÿþÿÿùÿûÿ÷ýþøÿÿúÿüöïæÝìåÕÿýäÿÿäôöëëíâSM=QK=MG9JD8F?5B;3A:4B:7AB<JG8QF0VG0UH?a]nƒŠ¶“¡Þ¥¢ùª¨ü«¬þ«­ú¦ªô£§î¡¨ìŸ§è–Ÿâ™Õ‰“ƍ…ª˜h‚¹RcàCN÷8?é?.ØH.ÂK+¼D)ÞO?Ý97â<@Õ4:ÃA3Ä>2Å9,Æ2&Ï2)Ô3)Ø4+Ü5-Ý?4Ù8.Ü8.×3)Ô4&Î5%Í:&È:$Å:'Ý4-ú68ÿ6<ò;=á98á99å58ý*1ô./ä1*Õ5'Ç3%ïaWÎ@>ÕFHæHEÏ81Æ9/¿=/»>,½@.¹:'¿<*·;/ÀF9ÇLDËKHç`dûitýbrúXmû]tâjs䉆âvvÿo~ïaqõœž·šŒMaH`]NÀ¤£øÛàÿ÷ûýþÿÿýÿÿ÷ÿúÿ÷øþôþþöÿÿöý÷ëøóàÿýäÿÿâýþðûüîXO@WN?TK<RI:MD5KB3I@1I@1FC:MG7RJ3VK7VMHb_t…µ’›Þ£ŸöŸ üŸ¥ÿ ¬ÿŸ¬ò¢§é«¥ï°¡ô—©ý’¢Þ¬¼¿kzÝ@K÷5@ë;>ÖEBá:BÓ>@ÊC?Ð>>ëBGì8Aâ9<Õ;9Ë@;ËE<ÅG;µ:+¼8,È5-Ô0.á32ã=-ß3%ç7(Û4!Î6È7"Ë8(Ñ5)Ó1,Ï7*Ê7%Ñ6$à2'í0.ì+0ê-4õ*.î.-ã5*Ô7&Å2 ÝN>ÚJBÔ@>àQSÓD@É90È9)Æ;(¾9&»>*·>-´:/¹=1½?1ÃA3ñmaíhaïiföpqÿl|ðckó`fõtoý{{ÿVmö€~¥†ThCadI½«Ÿýåãÿ÷úüúýûÿÿüýÿäÿíæÿê÷ÿóÿÿòÿüïÿøçýõàþøàýýýÿÿÿ\PBZN@XL>WK=SG9QE7PD6QE7JF=OI9VK5WL8VMFb_p}€«Œ’Η–Ú—™æšŸñœ¥òœ§ç˜¡Üš›Û—•Þ‘Ϊ…»Èl“ÐG[ÝAEâ@>ã?@ä=DÞ:AÑAAÂ>9ÖKHâ?Bç:>Ú78Í<7Ê@6¿9.ÇG:¹;,½8)Ì8.Ü71ê;8Õ4*Ú6,Ý7+Ö=-Á8%¸;'»<-¾8/ì?;Û=2È:&È:$Ð7%Ô0&Ø1+Õ0.Ù0+Ø6+Î5%Í<'Ì;(ãLA×:5ðLMúdcîYUØD:Æ4'Ã7&º9&¶=,²=,¯;.²<.¶>.¿D5Ü\QëgbïihêcgåTcógp÷djüvuøprýG`ñt„s’sQj@hoP˾­ÿõíÿý÷ûÿúùÿûõüõØýÞÕòÖãõÝôùåý÷çûòáïãÓôé×üùôÿýø\PB[OAZN@YM?WK=VJ<UI;VJ<OH>SL<XM7YL9VLCa[gut–„†¶‹½Ê’ב”Ù‘—Ó…ŒÂy‚¹owµ„p‹ºZí=dø4LæDBÖLBáEHö9MÜBDÎDAµ91ÐKDÝABà<=ÖA=ËG;Â8+½8)½<)½<)Å<*Ó;.Þ7.è71æ44è02í55à3/Î4,É91Ä22Ö=Bò14à5.É6&Ã;%Ç:&Í7(Õ=0Ô=2ÊB2Á:'Ä=)Ä;(Ì;*Û=4â66â-2Ô72Ü?8ÝE:ÕC6ÉA1º<-±<+¬<.«=0«=.­<,¶B3ÓZOèheëikìfmûo|ðgoýgpÿwzöelþ=Zïfx“wd€P|ˆbÍÉ®ÿùçÿÿñõÿñéúçÖçÔ´ß´´Ø²ÂܹÕâÆêìÖôíÛëÝÐíÜÒìâÙëáØ[N>ZM=ZM=YL<YL<XK;YL<ZM=TJ@XL<ZM:ZM:XNB[RWc^tsoy|€€²†ƒÆˆ…Ê‚€¿xx®oo¥gf VSÅITü8Lÿ4AðA>ÞIBåDJö;PÔDCÅD>°:.ÊJAÌ73Í/,Ï@8¾D5Ã:(Â;(¿:'Á:&Ç8'Ö9*à5+æ4*ø-1ÿ07ÿ,4ÿ.4ú17ô/9þ0Dý'?ö'/é/0Ô6+Í:*Ñ8*á=3æ;3æ;3ÓK;ÉA1Á8(È9+Ï5+ä?9Ý++í57Õ4*Õ7,Ð8-È8-ØPD¾@2±9+¯;.­<.®>0«<+­<.ÆQGãgeðotör}ûs}÷pwÿnyÿpwõ[gÿ9ZìYlš¥‡{œgˆ™m´¸•ÚÝÀ×æɿظ®É¨£½š¼†—Á£Ç—³Ì¤ÌÕ¸åàÍìÛÓäÌÊ×ƾνµZM=ZM=ZM=ZM=XK;YL<YL<ZM=ZL?[N>]N;\O<ZN@YNJ\RZ`Xgid‚rlœ}v¹‰‚È‹¿ˆs«’t¨‰cœ£?0½>5ß=8÷?=ø<;ð=@ëAJä?MÅ>8¶=2­<.ÆH<Ö>9Ñ30ÐB8³:)¿8%Ã<)Ã:'Ç:(Î8'×4%ã2(ð91ï3*ô-(ý''ÿ'(ü$)ÿ#2ÿ :ÿ3ö&3é*1Ú,-Ù//í68ö37÷/2ö.0Ö92ßB;Í0)Ï2-Ù72å@<Ù0+ã73Õ<.Ì4'Ì:-È:0æ]SÀ;2½:0¹6,·2)´4'²7(²:,µ?5ßebóuyùwýz‚ûu~ÿq~ÿitöUeÿ9\æO`•œzx™b“b”œu“x}–nq•is—iŸp±uŒ¹~’¾ƒ›½Š¤¹¾À¨ÚÌÃ׿½Ã´¯¶§¢[K;[K;[K;[K;ZJ:[K;[K;\L<]K=^M=]N;^O<^Q@[OC\OI\QOaSlh]‡wo«†|»›„¼¨y¥Áv¡»[‰Ô<9Å?4ÈI8ÚC8ö<?ý9Cë@HÏ@B»<3®:+ª;*ÅC6Ø:7à;9áJCÆD6À9&Ã:(É=,Ë:)Ð4'Ü4)ó>5î3,Ø9&ß3%é.%í,'ì*+ð(2ú"9ÿ9ð-;ô:GþJUî6Bð,:ù-9õ.3ì+,Ü41Þ97ÿ]\Ú86Ü=:Ù<5Ó6-Ð4'¿6$À8(Ã;/Ä;1ãVOÅ5-Õ>7Õ<4á<8Û>7Î;1¾8,º<0×\Wóuxþ†ùzüyÿpûcróPcÿ6YäM\v~W`~HmPy„Zs‚YfƒUj’^w h‚¥mƒ¯r‰·wŠ¸wº{‘¯}œ§…»²£Ë¶±¬¦ªš”˜[K;[K;[K;[K;ZJ:[K;[K;\L<^K<_L=^M;^O<]N;^N>_O@`PAdN[bUqoi•}v¢ž€¢Ð†ŸämƒâJcÿ:MìGMËD>ÑF?êADô?FäDDÅB:¸>1©:)ª9)Â<1çB@ì<>ß=8æZMËC5¿6&È:,Î8*Ô1(ç:3ì51ê-)è45ð37÷29÷4<ì1<î3Fó.Jÿ2Yë<IÿYfÿZjò?Rð7Gã2:Ò72Á8(Ç2+æQKéTPØ@?ØC?Ë6/Î:,É8%°<%´<+³8)Á<3ØGBÒ3/è<8ç4/ó-.ï31á90Ò;0¼6*ÐQJòtuÿƒ‡öw~ýz‚ÿmù`rñNaÿ7YÞLWXg<[wDnRz…[~ezšiz¥oªp†ªm‡­r‰³s„¶q‡¹t‹±v“¥}¹·¢Øǽ²µÄ™œ«ZK8ZK8ZK8ZK8ZK8[L9[L9\M:`J<_L=^M;^O<`Q<aP<aP<aO9gMPaSdmm‰}x–ž}Ž×€†üjmÿGNÿ'Oÿ9Rà@JÓBEÛBDßABÖB>Ã?3²<.©:)°?-ËA7á77å.2Þ52æRHìdVÈ>1É9.Ñ7-Û60å61é0.ô43ÿ&Cÿ#Bÿ>þ;ó"Aò*Nò'Tÿ7jü\høZhîM]à9KÞ9I¼,,¦7&˜E%µ:*ÑSEÅ@9Ä:7ÜNJÇ9/Ç8(Ç9%¥?(¦;'©5(ÈH?ÛGCÝ52ë40ò1,ÿ(-ú,,ë2-Ú:.Å7+ÅB:éljÿ‡‰ü}„þz…ÿgyóZlëI^ÿ5VÕEOL_1]wG|a‰”lŒsˆ¨w}¨p~§kƒ§i…¦m„¬m±j‚¶nˆ²s”«}»¾£×˽ªµÉ‹–ª]K7]K7]K7]K7^L8^L8^L8^L8]K?]M>[N=]R@^SA`P@cPBdNCmKIgWbqv–‰¸™Œ¸»šåyyùl[ñO\ëCPóDQì<Gã:?×<:Ì?6¾:-¦?,­9*Æ@7Ð83Ù61Ü6*Û:&Ø:!ç?<çMCÉ<+Ï=.å@:ê16ó0>ù4Eü%*ý')ý''ø&'ò$(ñ)3è(7÷:NÿcmýX_ðJLÔ=6¿A2«B-“2”3"§60ÇGDÄ44ÖBBØICË>4Ç4*Ï5-¹=1¶0'Ä4,ØC<áI>Û4,í0.ÿ.3ò12ó/0ò0.ä6-Ê8)¹=1ÝjeþŠ‹ø‡‹ÿlÿZvÿNkù=^ñ>^¨LOHV5^€Dvš]z e~§k}¦j|¤h|¢e¡d}¦n~ªk}¬d²cˆ³k‰¯t”³‰¶˜„‘epr]L8]L8]L8]L8^M9^M9^M9^M9_L;aP>\O>YPA[QH^SOeUViY\k]]mi‚y¹…ŒÜ—ŠÙ°‚³Ñ}Šì~qÿaiòKS÷EQñ=Fã6:Ö;7ÇA5¹@/«=,´@3ÆG>¿4-Î70×9.Ù7(Þ9&Þ2.åG<ÕB2Ó;-ê?8î15ù5?ÿDRó/3í)+ð,-í++é-.ä-2òAKýN[åNSèGLá=>Õ:6Ã:0¹?4½D<´94©2,ÌFE×BDëSRÎ=8ÏA7Ò>4ÞA:á=>ç=>è:<íEBÝ<4Ú3+ê3/÷12ô02õ/0ó0.ä6-Ì:+´8,ÝjeÿŽŽû‰þi|ÿVqÿFhú;]éMe€:8P[=c‚Is”[vœaz¢fx¡ewŸaxŸ`| bz£i{¨g{©`~­_ˆ³kŠ°s„¡uq‹hKZU5D?_N<_N<_N<_N<^M;^M;^M;^M;`L3_O8ZN>YRL_X_qjzwm†vk‰otˆt}¨{„Ï…Šè–Œãž€´©w‚¬m[ç\Wö\\öLOí8?ç7:Û<8ËC7ºC/³=1«6,ÍXNÉPE¼9/Ì9/Û9.á5)â3.â>4ßC4à>/ä3+ð/0ô/6ÿQYÿZ^á46è:;ß33Û33Þ9=åFJÇ,2Ë24ß<?Ü17Ô/3Ô;=éX[õbj÷_kÊIDì^\õ[[×<:Ô?9ÜIAÖ=5×6.ú.:÷)6ô+5þAEÞ2.Û4,ã5.ï4/ö/2ø-0õ/.ç4-Ð<.±5)Õb]ÿÿx‡ôfvúMiÿ7_ü:_ÇN]`<.ZcFg„Nu“]yœbx btž_u^wž_| by£c|§by¨Z{ªZ­b‡«m‡¤t{”mizj[l\_P=_P=_P=_P=^O<^O<^O<^O<dP5`O;[QH_Zarpˆ€~¥|z«zu­s}¸|„„ŠÆ••É¦ŸÁ»¬³Ì¶¡²˜s¯R@²B4ÚLHðJLì<?ä<;Ø?7È</¿<4²7/ÄUJÏeWÂQCÀ8,×6.ä3-ä2.à8/äB3à9)è1)ñ/-ñ01ï56ñRWøW]åBGèEJåDIõY\Ö@BÈ35ê=?ï:Aè.9óBLÿ`iÿeoú^kþYjê]VÞFCÕ23×85ãJDáJAÖ92ß82ü)8ÿ-:ÿ4@û=?à3-Ü5,ä3+ð50÷/2ù-0ö..é4-Ó=/³5)ËUQÿ‰‹ÿk€ð^qøIfÿ5a÷Dd’DDMG1V\Bf~Nt]z›bu›^o˜Vp™Wt›\y_x¡]y¤\w£Vy¥V~¨^‚¦f‚žkubey]XlP]M=`P@_O?`P@aQA^N>bRBaQAcR@^QH^W^om…y{¤}¸tx¸nq¸uxՋŒÏ§§ÁÃÀ±ØÑ¥áØ¡ß֟Ö̙ġƒµyaÄdVæ`]ëKMç:<æ::Þ63É83Á<5½K@¾WHÒdUÂ@3Ñ4-à3-â30â;3Ü8,á5)ê2(ï/*é2.ã4/Ù>BîRVéIQÿemÿckÿ`hÿekòY\ÿ<Dú0<ñ)6í3@ÿXbÿepÿcnüZiâGBÙ53Û11Ú51ÞA:Ö<4Û81ä84î7;ð6;ð69ä20Ü5-ß7,ç2+ô1-÷03ú.1÷//ê5.×=1¹4+ÅIGÿ‡‹ÿc{ôVmùDeÿ8cßMdc8/9C(QQ9]oGt‹]v•\q•Wk”Pn—Sr™Xt›\uWv Xt SržQsUnS]yFMd6@V/>T-YL<]P@^QA`SC`SC`SCaTD^QA\QOcYbqm„xz£v{³mr¶`f°ouÁ‰…樤áÉÇÌäà³íé îçŸâۤʛ¤¤Š—…oœgYÆiböttèLOî?Dì8;Û<8Ó@9¹;/­<.Ô`QÙWIÌ5*Ø4+å=<íJE×4+Ý2(å0'ä-'ß4,Ü>3Å1-ÞFEíQUíMWïMZøXbÿktçLRô-4ú.9ñ'5ÿCQðFOóTYÿ]díDKà72ê;8î<:â62Û:2×90ß82ë97ß;9ã:7æ95Ü3,Ú8-ß7,é1)÷0-ô23÷/1ö0/ë4.Ý?4Á6/ÎHIüu|ÿ[vÿLlý;`ðBc°LV[F5;C+JE1Qa=jTqŽVkOhJi’Lm–Tp—XtœVršSl–Nh’J`‡DUw;Kf3G^0Hb3Pj;UH8ZM=\O?_RB`SCaTD`SCaTD_Xjvrxw¡wy²jqµ\d­iqº…Ïœœâµ·çÎÍÝÛÙÌàÜ¿ÜÕ»Îĸ²§¥†Žƒ~zqri¢ibÛ~yõutø^^ñGHé=9â@;Í=4ÄB5º:-ÎI:Ð>/Ò9+áACû`\Ý@9à91æ5/â4-Ó5*Ä5'º2"Ê=3ÓAAÔ=DàDOñWaú`jäMTì:8ò;?ýBKÿR[Þ3;â8;øDGô38ë83ñ97ï64ä50ß<3Ü;1ã73ë54ä;8ç98è96ß4-Û9.Þ8*ç2)ô/)ð43ó11ô1/ë4.Ý<2Å4/âSWüjuõRmÿ:bÿ7bÜRiLF\TARM9NF3FT3]oGj…PiŒJfŽGg‘Ik”Rn•Vr—Tj‘NcŒH_ˆFYBUs=Tn?\qFWr?Xs@OC3UI9XL<]QAcWGbVFaUEk_Otoxvtv©jo¯]e­em¶yÉŽ•Û¦Ï¬´Ù¶ºÝ¼½Ü¼¹Ö¶­Ê°£Á™Š©utykegvgd…`ZZRÖohÿzuÿigÿPJíD=ÜB:ÕG=¿7+Ç>.Ì=-Ó@.×=?Ô<;Ô;5Ü92æ93à70Ò;0ÇB3¾B(ÅD1Â91Ä57ÜIQèU_êY`ØGLÏ7,Þ:8é<@ç:@ã6:ì8;ù25ÿ,1î73ï31é-+å4.á=4à=4å95í55ì46ï26ï34â0,Ü8,Ý:+ã3&ð1)í53ð31ó1/ë4.Ù6-Í84ùfló[héIeÿ1aÿ7e¼IXW@0RM9[K;M@0>I+O_8c{GeˆFbŠAcEfMi‘SlPeŒK`‰G_‡H_‚H^|HaxLezQ\wB]xC@B7JE?TG?^M=aQ:`UCd^^pn|wqŸws¦hg¡^^¤gf´wuɊ…ߘ’쟛֡Ø¢œØ ›Õ ˜Ñž•Ê™Ã†|®lkqe\]oXR…UK¢XM»ZQÛphï‚{ú€êdcçSQÉ2+Ä6(À7%Ç:)Ó=/Õ9,Ô<.Ï7)Ú:.á90Û7-Ë:)À>(Ã:2ËB:É@8¾5-Æ=5×QHÛULÊD;Ï:"á7*ð,0û'6ÿ(@ÿ'Bÿ"@ÿ?ÿ:ÿ'<ó.8ç9:Û94Þ;4å95é62ì25é54â70Ù5)Û9*Ü9&ß8%â7#ì3.ð3-ï5)æ6)Û9.ÝABÿdtùJgÿ?bÿ6`ìGgv6678&EC4HB48G2FH Y_;csOd€Oa†Cb‹?hŽElPk‘HgŒGf‰Ie„Ie‚JdHeGd}C\<a†A69.??7KB;VH;[N;\TGechpnƒqk—b`‘XX”]`¥mo¼|}ψ†ÙŽŠÞ‘Î‘Ð‘‹Ñ‰Ï‘ˆÉ‘ˆÁŽ†µ~v]if[Y_]NvVG±ZSÔXZãggàuoôƒ}ÿ‚ùngÅ7+Á:'¾:%Â9&Ë9*Ø8,Î6(Ñ>.Ë3%×7+Ö6*Í:*Â;'Á7-Ã9/Ç=3Ì?6ÎA8ÏB9ÐC:Ê=4Ò7%ä6/õ.5ý(:û&<ù%<ü":ÿ9ÿ 7ü'9ô5=æ::Û94Þ;4æ95é62ð37ì65ã60Ù5+Ù9)Ú9'Ý8%à7$ß5(á7(ß8(Ú8)Ô7.øW\ÿQhûBdÿ5_÷=b¸DYK)'-8(6=+C<)F@(W`1‡”isˆ_`~J`…@cŒ@jIlPm“JhHh‹KfˆLe„KdGgƒHeFeŠEjJ.4(893B=:LC<QI<UPJdboii…baRR†VW—ei³uxɀ‚Õ†‡×Š‰×ˆ‰É‡…·‚Òˆ€ÓŒ„ύ…¾‰ƒ§xsŠO\hYRSeKh\D§OKÝPYê_fãooäul÷|tørgÃ:*¿='¿='Ä;(Ê8)Ú6-Ñ9,Ç:)È;*Ñ;-Õ;/Í7)Ê;+É;/É;/É9.Æ6+Ï<2Ï<2Ð<2Ê6,Ö-&ê24ø.:ý-=ú0@ù0@û.=ÿ,;þ+:õ0:ë6;Ý54Û94ß:6æ95ë54ó49î66ä71Ø6+Õ8)Õ8'×8%Ø7%Ñ:%Ò='Ð@(Ñ>.Ø=8ÿ]kÿBbÿ9eÿ/[ÝQkc28933/6.4@,=<p^:“¨o­Äma‚Ka†Ai’Fq–Pp”Vn”KhHiŒLi‹Of…Je„Ih†Hg…Ei‘Kn–P(0#/4.764?<7GD;MLJ\^k_`~][ŒWX‘fh²xzÑ‚ß„…ჄׄƒÑƒ†ÉƒƒÍ„€Õ‰×Œ„Ï‹ƒ¶ƒ~•pkr‚PQhZMYhKeaF•WJÉUVãX_îagïtlæaXÔD9É:*Æ;&È=(Î;+Õ7+Ó/&Ï9+Â=*¾;'Ë>-âL>Ð:,Í9+È9)Ç8(Ê8)Î8*Ò:-Õ;/Ú>2×9.è47ö7?ú2?ö-=í/;ë28ï05ó/3ò5;í6:á55Ý86Ü86á96ç77ë46ö5:ï79ã73×6,Ð8*Ï9*Ï9(Ð9(Æ:!Ä>#Ä@)Ë<.îIMÿOhÿ;cÿ2döAb’BK>=8<7;C369>(K[,že¬ÊŠ¦Ã‹…£qbƒJ`…?l’Es˜Rp”Vl‘Kf‹FgŠJi‹Ne‡Jd‡GgŠHgŠFk•MpšR(0!).(/10783=>6FHCQT]Z\u[Yˆdcwvą†ãŒ‹ñ‡‡é€€Ø‚Ñ‚†Æ††Î‰…ÒŒ„ÑŠ€½†|¡yq|ha[i^Lh]Kg_Hj`EuYA¦^OÖTTþR^êZRÕ<6Ù80Ø;,Ð9&Ñ:)Ø6)à3,Î1(É;-ÂA.½<)Ã7(ãOCÊ6*È9+Ê;*É:)Ê7'Í5'Ô8+Û9.Ø4*ä=4ö=Cñ1<ó/=ì1<ä5:ß77ðA>ô<:ê::å97à85Ý86à87æ::ë8;í9<ô5:î79â:7Ð8-Ç9+Ä;+Ä;)Ä;)Â;%¸7!¿>+Î93ÿWeÿ>\ú3\ú;f°HQT905C6G7:U.1QM2t“XŸÃ{Ÿ¾‚”²~l‰YYwA`ƒ?k‘Dl”Mj‘Pg‹Ec†BgŠJfŠLhŒNgŒIgŒFm“JqRu¡V/3$.1*00.45/:<.BD7MONUUa\Zrjh|¿ŒãŽïŠ‡â€}̀}……ËŠÆ‹‡Â‰´…{žyn~odbh\NXeKe]JpYIr\Gq\A‘WAÖWPþEMÜ94á51ã5.Þ6)Ô7$Ô9'Û5)ã2,Ã7(¾9(Á@-Â:*ÔB5Õ>3È:,¿<*Å9(Ç:)Ê8)Í5(Ô6+×3*á81é<6í6:î3:ì3;ç:@êLMù`[óPKæ95ß82ß82á85ä;8ä88è8;ë8<ë8<ñ48é77Ü:5È8-½9*¹<*·<*·<*¹<(¯8$°4(ÜFHÿNaô9VëGbÕJ_cE:ED08?-C5(O5&zwT‘³vÀuœ·„…žtMc<Sm=a?jŽDh’HeKd‡CeˆFjMlPiOi‘Kh’Hn˜Lt¤Xx¨\:;+99/8939;0>A,CG0LM?RQMXXNecnxvž‡…Æ‘ŽÛŒˆÒ‚~¹}y«|µ…³„~¤~vxmun`]k[NkZJb_Lh\Lq[M{\J‹XEªQAÑD;ã1/à40ã5.à2)Ý7'Ð9$Î;'Ô8)Ý5,¸=+³6$¿;,È8-èKDÕ<4Ã;-³<(Á9)Ä8)È9+Ï7,Õ7.Þ71é=9æ74ë97ì9<ë=?øXZúkgÔG@Ð72á<8á:2á83ã73æ66é69ê7;ë8>è7=ï79ç9:Ø=8Æ<1´9)­<*ª;(©<(§<*¡:)ª8.éX_ôI\éG\ÆSZ YSIQ<KD1@<!AR(awF…“`¢j’´wŠ›wVeH?N1Se=eBjŽDg‘E`Gf†CiŒJlOn’Rl“Pk•Mk—JrŸNx©Zyª[FC2FD8EE=DE7FI.JM.QP;VRF\\8]^Pkj|zx§ƒºƒ€·zt ol‹lgql•pk‚ofkk^Uk[Ko\Kr]LuYNn^OmbP^O¢RI¹82Ð3*Û5)á6/à3,à6)Ô7$Ê="Ç?%Í<)Õ9,±A+«4 ¾:+Î7.à=8êMFÇC4©;$Å@1Å;.Ë=1ØA8àA;ã;8æ87è88ç:4ç:6õOOãKHÀ71Ã<6ÖA=ß;:â;3â94æ95ê88ë7:ì7<è7=ç6<ê69â88Ô=6À</®9(¦;'¡:' 9&—:(—<*­E<îelîM_ÓGR§ZR‚mR?Q;K8'XM-o]x¼oy—Y‡‰Xw€QDN39B-<E0O]:gEmEg“F^Gk‹Hn‘OlOkOr™VrœTržOw¦Uwª[t§XUL=UL=UL=UL=WN?YPA\SD^UFcZKcYOi^\relxku{mzuhri\e^]X\YR[WL_WJd[Jh[Hl]Fm_EdhQyUG¢UM²TH¨I5¨=)Ç9/à02Þ7.Ü8.Û7-Ø6+×7+Õ7+Ô8+Ô8+·=.ª5$®6%¾<.É6,Ô:0ÝI=ÍD4ÛHAÑA8ØC<ß;9é77î79å76ß>6ÇD:·9+±7(½?0Ç?3Û>7è88ó57æ7>æ8:å97ã:5á81å95æ:8æ89è:<Þ;<ÎA:·>3£<+™<*•:'“8%>-‘3+ÒZ\ÑZ^¬QNšZP–]R™^VK_ Vm)“­d˜³ly‘UYj@LV><B6>?1<>3;A3IX9c|Eq’Kl’Gh’Jm‘Tm”Sk“Jl—GpHq¡Mv§UyªYn—]]†L_VG_VG_VG_VG^UF_VGaXIbYJe]Fe]Jf\Rg\Vi^\i^Zg\VbXOYWKSODQMAVPB[TB_XEe]Hh`KdeS€\P¯ZWºBAÀ=5¾:.½9*Æ>0Ú6,Ú8-Ú8-×7+Õ7+Ô8+Ô8+Ò9+¹?0§6$£6!³>,Á<-Æ3)Ð<0ÕF8Þ45Ø22à::ê<=ï5:ë16æ68Ü86´9*¯5&­8'µ<+ÈC4Ø?7æ95ï56ë7:ê88è96æ;4ã:3â;5ß:8Þ88Ú<;Ô@<»;2¥7(œ;*”;)9&Ž7$v9&>2¶PL®LIœQK•\Q•aV˜_V~[£µ“§tUh:8J$=J09?18;4=>0;=2:@2HW8e~Gt•Np–Km—Oq•Uo—Ql•In™IqžMt£Ryª[nžRY‚HNw=d[Ld[Ld[Ld[Ld[Le\Mf]Ng^Og`FiaJjbOi`Oi`Oh`Mh`KiaJc]MZTDOL;LI8NM;QP>TUCXYG^\OsWL³eaÎLNÝ8>Ø63¾=*«H+Ö6(Ö9*Ö9*Õ9*Ô8)Ò9)Ò9)Ò9)¹?2§:&—7¤@(¶>-Ã9,Ê6,Ô=2ñ1<é4;å6;ä5:ç4:ç6<Ý9:Ç/,¬;)«<)ª;(¬9&¸9*Ð=3à;7è96ï56î64ê93æ;3á=4Ý>8Ù?=Ô>=ÆA:·<4¤6)™9)‘<(Œ;(‡9%…6%w:'ˆ@2¡SI›RI“YMŒ\N^SšaX‚ey„bCP6-:(1<.8>4>?7::0;<.:<19?1GV7g€Iu–Os™NpšRršQqšNn™JsžOw£Ty§\qŸW\‰D\ƒLSzCg^Og^Og^Og^Og^Oh_Ph_Pi`QkbQjaPh_Nh`MiaLh`IhaGhaEf_LaZGWR?LK9FG5EI8EL<EN=QOCYPA{QC¹SNäEIä67Ì=/­A'Ð7'Ò9)Ó:*Ó:*Ñ:)Ñ:)Ñ:)Ñ:)º<0¨=+’8—=#¬?*Á;/Ê3*Ù<5ñ8@ê@CÚ>?Í97Æ22È:8¹82«6,£>,£@+£@+¦=(¬9'»7*Ì70Ý@;ì95ë:4è;4â>4ÝC9ÒB:Å>:¾:8¢:-š6'’9)‹:'ˆ=*ƒ<*€9'}6$z5&L<“QC“XJZN‘]R•\U™ZUU]8=F)9B12<46<8CF?EF8=<(:;-8:/7=/ET5fHu–Os™Nq›SqœLp›KqœMx¤W€«ctŸX_‰G`ŠJ^…NSzCi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qj_[j_Yj`Wj`TjaRjaPjaPjbOh`Kg_J`[GQP<FI8AE6>F7<G7DH:AJ5RK1š`LÛ^XëGHà>;Í=2Ë8(Í:*Í:*Ï<,Ñ;,Ñ;,Ñ;,Ð:+¿:3±=0–6 –6®;)Ã9.Ú?:éGDäDFÌ>:°:0¥;.£=1œ<.=-ˆ?,•<*•>+˜@,œ?-¢=+©8*¶8,À;2Ø;2Ü?6ÜB8ÖB6ÎD9»<3ª5. 3,<,Œ;*†:*ƒ<*}:){:(z8(w5%†8,–NBŽNBSH—XO•TN˜YT–YVR[0OY6=G.5@0IQDZ^MHJ2>? ;<.:<18>0FU6hJu–Or˜Mq›SrŸJqžMu¡R}¨`y¤_cŒJa‰JfŽP_„PTyEi`Qi`Qi`Qi`QjaRjaRjaRjaRk_ak__k`ZkaWkaUkaUkaWkaXlaMjbOe^KYVCMK<DF8AE6=C59E1DI2RD*œeP¼TIî^]íHLç>AÈ:,È9+È9+Ê8+Í;.Ï;/Ï;/Ï;/É:6½=4°</µA2ÎJ>ßJDçHDáB>ÐEBµ?5œA/”G3F4‡B2B0wC.ˆ9,‡:*‰<,‹<+<,•9*ž9-£;.´8.º<0»=/¸=.­7)¢8+˜8,“90†=.ƒ;,:*~;+z;,x:+s8*r7)’B9J>ŠL?“SJ˜QM™RP–\X•i`‚Ž`o|QAO+O^?boSOX=AE*<?"9:,9;09?1ET5hJv—Ps™Nq›Sq¡MpŸNy§\z¥^fM\„EhPcŠI^ƒPRwDi`Qi`Qi`Qi`QjaRjaRjaRjaRk`\kaXkaUkbQkbQkbSkaWkaXnbRkbQkbQd]MYSCNK<GE6CA2:J/RA-‹MB¨E?°:6ãa_õdgíRXØNDÏB9ÑD;ÖG?ÚJBÝJCÞKDàKEçMOÝML×NHÜMGáFDæBAäB@áC@ÒGD³>5šA1ŽE2ˆC3…@1€>0z?1‚90ƒ:1;/€</€;,‚:+†:,ˆ:-“9.˜:.œ:- :,š9)’9)‰:+„;,‚:,€8*|6*{7*x8,w9,o5)r:-I<„J<‡RD–WNšPM—VR‰aWueUQ`9EV,Zm@cuKL^89F(=E09=.9:,8:/:@2BQ2c|Et•Ns™Nq›SmžLu¦Wy§_i“Q[ƒDd‹LfJgŒG]MRtBi`Qi`Qi`Qi`QkbSkbSkbSkbSlbVlcRldOleKleKldOlcRlbVnbTkbSlcTi`QaZJVO?NH8HB27J*mF5¸GKÆ3=É<BÈLJäjiÙX\ÔKEÎC>ÑFAÎ@<ÒC?ÔC@ØDBÙECà?DßEGàHGãCEì?Cí>Cå??ÛB=Ó>@µ82Ÿ?3>0Š:/‹80ˆ73‡:4€;4:3|<2{=2y;.z</y;,x:+~:/ƒ9.Š8,9*Ž8'Š;*ƒ<*~=+€8,}5)|4(z4*v3*u7,o5*t>2‰QB€Q?†XHSH—RMUQw`PBL3L[<arN]sELd47L#1?%<D97;:;<.8:/:@2>M.[t=p‘Js™NrœTq¢Qu¥Yk˜S]‡Ga‰KcŠIfŽHd‰C\~LPr@i`QjaRjaRkbSjaRjaRjaRjaRkbSkbSkbSkbSlcTlcTlcTlcTq_QscVndXicUgdUfaN]XBQK1NB(lP;cW¤LH¼ILÉFKÐHJÓIIÓGJÏ??ÔC>ÌC9ÆG8ÈH=ÔDCÞ>FâBDáCDâDCãEDâDAáB?â@=ä@>ÌC=±:2™;1†>2}B4~>4„93‹82…<-„;,ƒ;-‚:,9-9-~:/~:/}90}90}90|90|90z:0y9/w9.w8/t8.r6,p6+n4)l3(n5*y@5ƒTLˆQJ“OL—PNRJ€_NgcFYiE^wMRi?Ma<FS59A)69&>?1@>23;05;/;=/?C,ZfBpˆTr—RržQz cu›^`†I]ƒFa‡JdŠMdŠM_…HXzHJl:i`QjaRjaRkbSjaRjaRjaRjaRkbSkbSkbSkbSlcTlcTlcTlcTrbSl^QndZkdZnj_…‚sˆ‚rxq^NV>CE/L?.Y=/`8.l:1yC9…LA¨SL·KHÉCBÖB@Ô@<ÔE?×FCÛEFßCDßCDßEEÞFCÝEBÙD>ÚC<ÛD=ÔE?¾B:ž9/‡;-€A2~>2„:1‰90„;,„;,‚:,9-9-~:/~:/~:/}90}90{8/{8/{;1y;0x:/x:/t8.s7-q5+o5*m4)l3(j4(u?3‰PIŽQL’SN‘VPŠ[Qu^LiiO^mNG[8AR2DS6FS9?I19=,;=/9;-5<46<2;=0=@+R^:h€Lp•Pu¡TxžadŠM[D[D`†IbˆKa‡J^„GTvDFh6haQibRibRjcSibRibRibRibRkbSkbSkbSkbSlcTlcTlcTlcTqeYoe[mf^~y›˜“µ²«¸´«®§€qQUD?H5:G3=J89D3:@2=A3Q;.ƒJCµMNÒEKÛ@DÝCCÝEBÜD?ÞDFÞDFÜDCÚDC×FAÔE=ÑG=ÑG=×@9ÉC:ª>2Ž<.ƒ@0€>.ƒ;-‡;.ƒ;-‚:,9+9-~:/~:/}:1}:1z:1y90x8/v7.x90u9/u9/s9.q6.p5-n3+l3*j4*h2(g3(q=2‘LG‘RKVO‚ZPs\N_VERWC@L65A-2>*6B.?H5@I6<E28A04=,4:65:49;0;>+KV4d|Hs˜Sx¤Wp“Y[~D]€F^G`ƒIa„J`ƒI^GQuEAe5haQibRibRjcSibRibRibRibRkbSkbSkbSkbSlcTlcTlcTlcTmd[mi`‰†¶¶´ÑÑÑíëìïëèåÝÛغ°ž‹}c^KCH4>J6>F7=>8>966@5K61‹KLÅbeÌTUÙOMâHFçAAÞDFÞDFÝCEÜBBÚDCÔE?ÐF<ÐG=Þ?9ÏB9·B8’9+†>/>-<,„=+‚:,9+9-}9.~:/}:1}:1}:1y:1x90t8.r8-r8-q8-p7,o6+o6-n5,l3*i3)i5*g3(d2'm;0•KHSKƒXOo\M[UEFJ;9A22:-47036/06,2:-9D4=J89F41@+/53162780;>-DO/ayGt˜XsŸT`ƒKVyA[~F[~F_‚J_‚J^I]€HOoF?_6icSicSicSicSicSicSicSicSkbSlcTlcTmdUmdUmdUmdUmdUifaŒˆÃÅÄéíîõùüþþÿÿþüýøõþãÐÞʲ§ „efGGJ/HC0J92N66=?<LDBƒkiÖ¯ªà¤œÄe_ÙOOéBIÚEGÚDFÛACÚ>AÚ>?ÙA@ÓB=ÐA;ß@:Ï@8·A7—9-‰=/>-€=,=*9-~8,}9.|8-|90|90z:1z:1w<4u:2q8/p7.m7-l8-l8-l8-l5.p92j5-g2*e3*_/%`0&h8.NI…TMrXKRL<;D34A04<14:0775664350/6.0;-7D39H52D.-3/.3-45-:<.>H-`wKv™_l—RYzETu@XyDZ{F_€K_€K]~I^JMhG9T3icSicSicSicSicSicSicSicSkbSlcTlcTmdUmdUmdUneVneVrts©­®Úßâôüþûÿÿýÿþÿþúÿÿø÷þßçñÎÌØ°žª‚dnKGJ/JG6I@7TJK†|{µ°¬ßÜÓÿõêÿÚÐØzzÌKPÎQMÉHCÒFEØDDÖ=?Ø@?ÕA?ÓB=Ù>9ÐC<¹B:š<2Š<0>.=-€=-9-}9.}9.{8/~;2{;2{;2z;4p:0o9/l8-k7,i7,i7,h8,h8,m80l7/j8/d2)c3)^/%^/%l=3}VQlOGMA58:,/:*2=/5=27:179668338119..9+2?.7E49H57>6.4*/1&57)>H0e{UxšhaŠPVtBTr@VtBWuCZxF]{I^|JZxFGZF/B.gdSgdSgdSgdSgdSgdSgdSgdSkbSlcTlcTmdUmdUneVofWpgXv{¦®±ÔÞàòüþ÷ÿÿûÿúúüñö÷éñÿçïüàçñÏÆ̨•˜yTZ@EL:AL>ƒ|v¸¶ÚÐÏðìéö÷ïÿúðÿóê굯¼eT·UH½MBÃD>ËA>ÐB@ÑC?ÎC>Ô@<ËD@³@;™<4Š<2=2=1=/~:/}9.|90{8/|<3{;2y:3x92m80l7/i7.h6-h8.g8.g8.g8.i70g5.h70e4-c4,]1(]1(rF=YLFA81;8/69.4:.5;/69.57,39/39/19.19,.9+/:,3>.9D4;C44:,13&13%=F1m‚cr’iNuFOl<Pm=Pm=Ro?XuE]zJ_|LSp@:F<%1'gdSgdSgdSgdSgdSgdSgdSgdSkbSlcTlcTmdUmdUneVpgXqhYipx“œ£ÊÕÙëöøóþúùÿöûÿîúüçûöðÿöìÿñáæѾ´¢ŽneVWYNisk«¯¡âÙÒþêìÿòöÿûú÷þööÿóûÿóÿÝÃ˃m«RBºK@ÂG?ÉE@ÈE=ÆC;ÒD@ÇC?«;7˜;6‰<4‚<4<3=1~:/}9.|90{8/z:1x92v70u6/k90j8/i9/h8.f7-f7-d8-d8-g6/j92i81m>6m>6e90}QHvKB5:349239/36-57,35*35*46)-8(-8(-8(/7(08)/7*08+2:-9B-<C1:<.13%>G4fz_YxVAg>Jg9Nk=Kh:Mj<VsE[xJ\yKIf8175"(&heVheVheVheVheVheVheVheVheVkdZlc\meXidPmlXghc€€ŠŸ©«ÉÎÑíîðýûüÿþüÿþùùüó÷ýóøùóøúì÷ùáØÜ»¤ªˆclOivbŽœÅÅÃëëéøøöúúøþþüþþüþþüþþüîÿÿÿýüÝ­©¯UMÁH=ÊF:ÉD;ÅE<ÂI4¶K7F3ŠE5C6‚@4„:1Š7/~92|91z7/w7.w7.w8/u6-p4*i81h70h70g6/f4-i70i70h6/k4/m80k;/j@0hI7kXGxgjgV4:039/28.28.17-06,06,/5+/4./4./4./4.05/.3-.3-05//:24@67C74A0<I7UcLIW>ES9Jd?Ke>Id;Kf=VqH\vQYrR=V8(-1$)-heVheVheVheVheVheVheVheVheVkdZlc\meXmhTlkWtup¤§°ÌÖØõúýþÿÿÿýþÿþúþýøô÷îíóçñõçóöåððØÑÒ³¦¦Š€„m•‡°·¯ÜÜÚööôýýûþþüþþüþþüþþüþþüüÿÿÿûúÿûóÿÚʸyg£J:¿QDÂH=ÇC7ÁH=¦?6–A:‡@:@9{?7z>6w<4v;3t;2s:1p:0p:0m9.k7,m82l71k60i70i70g6/h70j92i=4h<1iB3fE2iP<obOŠ‡t\]K28.28.17-06,17-06,06,/5+.3-.3-.3-.3-.3--2,-2,.3-*5-,8.1=16B4:G5?L86D->L3F^>Jb@H`<F_8UmI^vVTkO2I/',0$)-heVheVheVheVheVheVheVheVheVkdZlc\meXkiTnp[|~y¯³¼ÝçéûÿÿüýÿÿþÿÿÿûúúòèëàÞäÖçìÖëïØêêÒ×ÓºÆÁ­Â¼°ÐÍÈáàÞññïüüúþþüýýûþþüþþüþþüþþüÿùüÿÿýøÿûóÿôýüèÊ¥’¢WD³O?Å@9ÃD>°<<ž::>=€@>tC>mE=q?6q?6p>5o?5m>4k<2h<1h<1q62p62m61i70j92f:1e:1f>4aD6dI8cJ6_K3aR;ojT‚mAJ528.28.17-06,17-06,06,/5+.3-.3-.3-.3--2,,1+,1+-2,,6.*4+,6-/:,1<,5@/2>*5A-;M3DV<H[=K^@XkMbtZPbL+<*%*-"'*heVheVheVheVheVheVheVheVjdVkdZlc\lfXmkVjmXovož§®ÚãèùþÿýþÿþýûÿþùõõëæêÛâé×æìÐèëÐçåÎÜÕÃÚÐÆêßÛùíïÿõûýýûÿÿýþþüþþüþþüþþüþþüþþüÿûúÿÿýóÿÿîÿÿóÿúÿÿïìȲ­s[µN?µK>³H@¢<8”<8…@9uC8nF:q?8q?8q?8p?8qB:m>6j>5k?6r73p62m82i81e:1c?3aA4^A3XK8g[E_S;ZS7[W<oqYjt\1?(4:039/28.28.17-06,06,/5+-2,-2,-2,-2,,1+*/)*/),1+*4,*4,+5,,6+-7,4?18C36A12?-9F2AO8IW>WeN`mYKXG)5)(..%++heVheVheVheVifWifWifWifWkeWle[md]mgYnmXkpZmwo”Ÿ¥ÇÐÕðõùýþÿÿþüÿÿ÷ùùíòöåèðÛëðÐçêÍäßÌÞÔÊçÙØøéìÿõùÿúÿþþüþþüþþüþþüþþüþþüþþüþþüýÿùýÿþýþÿþýÿÿýÿÿüöÿþìÿôÚן„›]DŸWAžO>ŽA1…@1y@/rA0t=8u>9t?9t?9r@9p?8o>7n=6l:3m<5j>5e?4aA4aG8M:)F5#LN6HH.[Z>_^@\_Bsy_MW?0>'6<25;14:04:04:039/28.28.16016005//4.-2,,1+,1++0*,3,+2++2+-4,-4,1809A6@H=8C34?.5A-BN8Q]IZeUDNC-7.)/+(.*heVheVheVheVifWifWifWifWkeWmd[md]khYkmWjr[lyp–¥ªÊÓÚñöúýþÿÿþúþþôùúêðõßäìÔÝáÀÚÚÀÞØÊæÛÙõéíÿôúÿùüÿüüþþüþþüþþüþþüþþüþþüþþüþþüöÿüýÿþÿúÿÿ÷ÿÿøÿÿúúÿÿöøÿðÿüãýåÍʨ’dM„P:zF0xE0p@,tB9sA8o?5n?5m>4j>3j>3i=2eA5dB6aD6[D4WF4aVBQK5HC->G,>D(DG*abCmmQkoV>C-9B-9?58>47=37=38>48>47=37=36;55:449349316005/-2,+0**/)*/)*/),1+/4.,1+160=B<?L;<I74A-2@)CP<MZIAMA0<2.5.,3,gdUgdUifWifWifWifWifWifWldWmd[md]khYjnWhrZeuk’¤¨ÉÒÙðõûüýÿÿÿûÿÿöÿÿðôùâãìÑÖ׸ÐιÜÕÍðåéÿöþÿýÿÿþüþÿùþþüþþüþþüþþüþþüþþüþþüþþüúÿÿýþÿÿþüÿýùÿûõÿýøþþüýÿþþúùÿýùÿûñÿòáäϼº¡˜{i†eTtP@iH7cB1dE3bC1aD2aD2`E2[J8[J8WJ7RJ5PK5qs[aeLEK1<F+@I,CH*MN/xw[`^GGF2?@.9?59?58>47=38>48>48>49?5:?99>88=78=77<65:4382160.3/,1-+0,,1-,1--10/321548G4?N9<K46E.7F/?N;:H9.</-4,+2*fcTgdUifWjgXifWifWifWifWldWmd[md]khYimVgs[fvl–©­ÔÝäõúÿýþÿÿþúýýñüýëðõÞÜåÈÐϳǯÓÌÆîåêÿùÿÿþÿýÿúûÿöþþüþþüþþüþþüþþüþþüþþüþþüÿúÿþþÿ÷ÿùôÿòõÿóûÿúÿüÿÿúÿÿúÿÿüÿüýÿúýöÿÿóÿÿïÿûìÿöëÿíÕű™bP8[I1[K2[M3\N4]O5VP:TO9QO8KM5MQ8ZcHEO4=J.AK0GP3IL/[Z<ieJ^YCZUBLI8<B8<B8;A7:@68>49?5:@6;A7;@::?9:?99>8:?9:?99>89>86;7273.3/-10-10-10-12-12):'7I3<N4:L27I1;M79J8.>1.5-,3+ZgM]iQckThkXghVifWkeWmeZjgVkg\jf]hfYgjUdp\n}x¨¹ÃÚáçôùüúÿùûÿñøùéóòÞêçÔÚÕÂÌ¿®½±¥Ç¼ºâØàûõÿÿüÿüýÿüýÿþþþþþþþþþÿþüÿþüÿüùÿýúÿþúÿýüÿüýÿþÿþüýÿýüÿûøÿòêÿòéÿûÿÿþÿþÿûüýÿþþÿÿýÿÿþòÿþäúø߸´™SN0QK+UO/TO1SO2QP4QM2PP6KO6EM5GP5@I*CJ(HN*RS1TU5]]AZZBYW@[Y@YX<UT6MG1HC0B?0??5;>58@58C57D3:B78@58@5:B7:B7:B7;C8>F;9D<9D68A06<2284/;/1D&=T&Be-B^6=S><M;?Q9I\>FZ>1H.,3+*1)ZgM]iQckThmYjkYliZmgYog\khWjf[ie\hfYehSdp\v…€£´¾ÓÚàóøûøýùóöëìíÝÞÝÉÒϼÅÀ­Âµ¥É¼³ÚÏÍïåíý÷ÿÿüÿüýÿýÿþþþþþþþþþþÿþüÿþüÿýúÿüùÿüøÿýüÿûüÿþÿÿýþÿþýÿùöéÜÔõçÞÿûÿÿÿýþÿûüþýýþÿÿþÿÿþòÿüâýûä·³šQL/RL,UO/UP2TP3PO3QM2NN4KO6HP8HQ6EN/KR0U[7`kA‡`ovUQU:OO7PN7SQ8XT9XR<QL9HE6AA7;>59A67B47D3:B79A69A6:B7:B7:B7<D9>F;=H@>I;?H7=C9<B>5A5=P2Vm?Z}EQmE;Q<2C1>P8PcEQeI:Q7180-4,YgM]iQdnVjo[mn\mk\nhZoi]liXieZie\igZfiTdp\tƒ~–§±ÊÓÜìô÷ñ÷óéìáÝÞÎÏ̹ÆÁ®½¶¤ÐóãÖÍóççÿöþÿùÿÿüÿüýÿýÿþþþþþþþþþþÿþüþýûÿýúÿüùþùõÿüûýùúþüýÿþÿÿýüÿú÷øëãýïæÿûÿÿÿýþÿûüþýüýÿÿþÿÿýñÿûÞÿÿ껶 TN4UN1UO/VQ3TP3NM1OK0MM3KO6JR:KT9NW8U\:djFŒ£mœ°ŽvhqRKO6KJ5OJ6TM:UO9RM:LI:EE;=@79A67B45B1;C8:B7:B7;C8;C8;C8=E:?G<=H@>I;>G6=C9:@<6B6=P2Qh:Tw?Lh@3I4+<*6H0L_APdH:Q7,3+&-%
\ No newline at end of file
diff --git a/testimages/testimgint1_4.ppm b/testimages/testimgint1_4.ppm
deleted file mode 100644
index c4eda85..0000000
--- a/testimages/testimgint1_4.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-57 38
-255
-0/-51.72.92,80-80-80-80-5-*4,)5-*5-*1.'41*63,96/B<0H@3JA2MA1YB4†C3«?2ÀF;æCHñAD÷:@Ù?=³@-°?-¦?,˜@*‘>.=+“=,”?+™?6¸B4±E9_>/;5572/62/41,.0-0/-5,-=13yD@œ`l±XT†PGbopuŽ‘†bcSYSa,,,0/-2.+3.*2-*2-*2-*2-*2-*1,)3.+3.+41,41,52-850<5-@7.C9/I=1JG6fA/„;*¢E6ÊE>è@?ò;=ÁB3ª?-¨?, ?.”?+>-Œ=,‹<-Œ=.žB3ÂA;ªPHPF<@6494043.02--3/02/21/3/.P5*sTY|~»YXžJ>fDCUMVQQYNaXQ,-/---.-+/+(.*).*)/+*/+*/+*0,+1-,1-,1-*2.+1-*40-:2/=4/?4.E80J=5V93r7/’;1¦A5ÇD:ÙF?ªC2@.=-–=-Œ=,‰<,‡;-„<0†B7žH;¶OJ¡}mhseKI=77-01+/0+.0-//-00.00.:/7jhiŒ’Äv‚¶hoiyx‚—ˆj{k`Z@,03./1....-+-+,+)*+)*+)*+)**(),*+/-./+*.*)0,+3/.;23<21<1/@2/?82A83X6-;.’>4™A3­C6?3’?-“;-<.‡;-…9)9+>5…IA–KE¨nc¥…|yZfR?E710+2)*1++1++1-,1-,6,-cqMŽœ–º–ŸÆ§Š²‰«†g~bOO312,32-74/72.7/,3--/-.,+0/(//)+.-+/0*///2.+70*=3*?6->5.<1-<1/D1+B0.D,*\4,x>0:3‹?2A3<1ƒ>.ˆ<.€;+w;0v>/†C:†MB–\X~•x‹¨Š€^‹bWpS>D:.-+-)*1(-/*0+,10,+^cL„šl†¨uŽ°}‚¨w~žy`r\?A4671872=:5?:6?74;629527327./50-41,43.540;62@7.G:1G=4C90?61?61D65M34A9.K8*m61‰==„A;„=7B1…;.<,{>,¨KFŠG>tM<VFxwX§xhž˜~…¡{†pxpasYGD39.*./)1-*=&,H@5clQmŠ^r˜erœjw tt“qZsUD;4893=<7B?:D?;FA;F?9F?7D;4B92B92<7194.96/@9/F<0M@0M?4H;2B90?;2\:8œEK„@7¤EAœD6¥=2¤E3ª@2©<A—D@y=2¶:DÔ@>ªVLjeG…kPæOVÿ+Iý;UôL]äWgØclÐ`lÐQfÁL_DKR3.E=2cZ9yƒ`sŒdx“f„Ž¤{‘|YrRURC671;:5B?:GB>HC=LE=MC9L@4H>2D<1A:2<71<8-C;0K?/O?/M=0F:.A:0A?3¨EHÌ17 <%°=+¥:0¶A8¨</»?5Ô<1÷.4â=7æ42Ö6*¾NC‘]HÖ[Vè>AÑFAô>Jü5Hþ9Uÿ;`ÿ?mûArÿFjùTr»WalLA‹~u©«¨«³µ¾¼¿ÚÒÏÙÏק©¦`w][eJ10.762<<4BB8JH<NL7PPHb`xWNQH?@A96=82>71>;6F?5P?-A>-D65>;4h;5«;-¡:'«>*»9+©;,¬>-·?/Á;/Ñ7+Þ7.ô.+Ú7&Ù6#Ë=1ÚFBñADÒA>­<.ÇG<ó@FøDMýAYÿBjýIxÿKwùVuóJkžnnÓ»¯îëòþäýÿóüýøõöíðÒÎ͗“•›74/52+:70@=4OE9TK<ecq–œÖ””Ú||®`^vECH<;@B<,G@-EB;I?G@IHLB8’B;ª?-­>+¸9*Í9/È91Ç9/Î:0Ñ7-è1)ð/*ò/)Ö7"Ö5#Ó9/ÜMEÅE:»<)»<-ÈA;ÛIJïDJüK]úEdÿNwûUyýSnù\mß`iٞšÿööýüÿÿûúÿûøÿñôêßÝäâÕÐÙÔC?4<8/84+>93MC7WJ:a\p”žæ¬¶þ®µûŸ ãvtµbXGDWD@=[OS‰sš§f„¾IR³<(­:(º;,Ì9/Ý:3Ú72ä=7Ý90äD8ñ91ú**ì1,Ö2&Ê<(Ó:,Ú?:×EE½<'Á9+ÎH?ÅIAßKGæOTøWiþQoÿTwÿXoý\kíWc§unóÞÛ÷ÿýÿüýýúõûðîåØÐÿüèæìèPJ<IC7B;3A:4GC8SG1[T[‹–Ωªü¬¯ü¥©óšžå†‹Íuw ’UgØHQåDIÇA8àB?Ï>9Â@2Ë=1Ô7.Ù5,Û:0Ú6,Ô4&Ì9%Û;/ü47ç55á55ò21Ü6*ßK?ÖDEÔ=6Ã;/½@.½>-ÃE9ÎMHôdmúXmû[sëtxôftȉ‚baOßÂÄýûþÿúÿúÿöÿþ÷ôîàÿÿãööêXO@TK<NE6LC4JF:VK5]V]‡ŒÃœì¥ø¦ç¤žè˜™ÚÁnˆä=Dâ?BÙ=AÌB?è=CØ:9È?7¿A3Ä7-ß63Ý7+Ý7'Ã8#Æ8,Ú91Ë8&Ý2(à-0å/,Õ8'ÔC2ÝD?æTTÍ:0Á:'·>-´<.¾@2éf^ïijóboöllþbo¶‹„[iFãÑÇÿüùúþýßýåôýìÿøè÷ïÚþýùZN>YM=WK;WK;SK>YL9ZRPsr”„†¶ŒÐ…‡Àqs¬¤Ymù7OáFAí>KÒDBÁB9Ô97ÌC9Á9)¾;'Í:*â6,ó04ô02á34æ2>í/1Ë8(Ñ9+Ý<2ÊB2Å9(×:1ã03Ö71Ò>2ÇC6¯;,¬<.®=-Ú_Xïlr÷nxÿpwûNb½~yxa×ÖºãñØÃÚ¾¢ÍŸ»Ô­äãÎêÙÑßÑÈZM<ZM<XK:ZM<\L=^O<[OC]RVh`…‚yº—|³¥iœ½>5ÝC9ö<?âAG¸<2¸@2×96ËC7Á:'È;)Õ5'ì7.ç2'ô*&õ&.ÿ8ò.:ç6<ó08ô-0Ü:5Ý>:Ý;8×4+Ç8(Ç:0ÔJ@È;2Ë80¼:,ÊOH÷xüyÿkxÿE`¶mdp‹X„hpczŸl‡µw’»¤´Î½µ¯¥¤ZK8ZK8ZK8\M:^K<_N<_P=aP>cQ_uq–º€–òZiÿ9PÓCCæAEÑB<¯;,»=/ç9:âH@ÏG9Ì8,ß6/í1/ü,<ý(<ð,Dü/XùS_ñIZÛ4>´;*ÍE9ÓD@ÑA9Ê9(«<(¹=1Ø?:ë51ù-.Þ7.Å@7øz{üz‚ýdvùA[•VEiP‚‘h~¥p©m†¬oƒµn­yÉ° §¹]L8]L8]L8]L8^M=[O?^RFfSOkZd|ƒ¹§‡¸çwsôP[ñALÝ:;Ã?2­>-Ç<5Õ7.Ú9%äA:Ñ=/ê86û7Eö*-õ)*ì)/ó=LôT\ß@=º?0§8-¹>9×CCÐA9Ò91Ï:6Ý>:Ü;3ò00ô02ë2-Â:,î{xþw…ÿQnôAaqM?jŒPz¢f{£g}¡c}¦j}­a‰²p‰¥[gg_N<_N<_N<_N<`N6\SNql‚xqšt|ª‡‹Ò¥•Â¶Œ|ÏSIëGHä:;È@2´91ÉYMÃ?3Ý6-â70á?0ì1,ú9<ôNRå<?åBEÓ:=à:>ã4;ðW\ù_kàRNÞ@?ÜG@Ø93û+9ü7>Ý4-ê5.ù.2î1-Â8+çtqûj{ÿAcÓCZWQ9m‰Vwbrš[wž_y¤_x§Y‚ªd~™las]\O>^Q@_RA`SBaTNnjƒwy²mq¸ŒÜÆüäÞ¢Øѝ¬xÃe[ìRTè:;Í<7¸G9ÐVIÕ4,å>8Ü6*è0(ã60ÚACóS[úZdø]cú2<ô1?ùU^üXaá<8â64Ù<3â94è8:å63Ý6-ð0+÷03ñ1.Ì9/ädeþWsû?b”EHCG0bwLp’Vj“Oq˜YtœVo™Og‹KNh8G^4UH8]P@bUEdWGpk‰sv«cj²|ƒË¥«ÛÆÇÙËÅDz¦°yxt‹hbÐoiübbîE@ÐB8Ã>/Ð=-âFGÜ;3ã60Ì:-Â=,Ë;;åMYèSYã:7öCIä7;ù59î73é30ß<3è64ê67æ40Ü8,ì1(ñ33ï2.Ò80ò_gûBdêCaaK=RG5LZ7g…IeFj’Sj‘N`‰G[|E]tHZuB@@6TG>^SAjgrol›`` qrŽŠá˜—Ó™“Ó˜Ë‹‚±n_drWL¸YSãtmözxÛKCÁ8&É:*Õ9,Ò9+Ú8-Ç<)Å;1Æ<2ÎD:ÏF<Ù7(ø*6ÿ'?ÿ!<ÿ#9í6;Ý:5ç85ì44Þ6-Ú9'Þ7$ç5+ã7)áB>ÿQjÿ:a˜;F8<+BA-_h=g~RcˆBkKjIfˆKdIeFcˆC/4-A<8OJDabwYYfh³~€×…†Ö††Ì†Õ„Å}xxUS^bG¹SQèagìqi×K>Ã<(Ì9)Ô6+Ä;)Ô@2Í9+É:,Ê8+Ò;0Ò:/é24ù/=ó1<ø/9õ1;â66Þ95é77ò59Ý6.Ò9)Ô9'Ê<$Ì?-÷MVÿ:dÜB\F678:-de;¥¾…s‘]fŒCq–SjIi‹Nf…IhˆGl–N-0'34/@B7TT`b_ŠƒÔ‹‰ê~͇‡ÇŠƒÆ„zŸndbd_Km\Hˆ[DêQTàA=Û7-Ñ8&Ý5,Æ7)À=+ÔB5Ç9+Ç:)Ì6(×6,â92ð5<ì2=êFGïB>ä86á96å99ë8<î68Ö92¿:)½<)¸:$Ç=3ÿG^î@akC;><0[J8²o•±~YuEe‰Ah‘KeˆDhŒLiŽKj’It¢WA?2??5EG/RPC]]Qzw¢ˆ…Èyv£xt§yqˆob\n[Ll\Mu^N¤L@Ø7/á4.Ý5(Ì<$Õ9*³:'Æ:-âE>¸=+Ã;-Ï;1Þ;6ç98é:7ïLMÔIDÙ;8á:4æ66ê7;è7=è8:Í=4®;)¥:&ž:*ÌOMçHZ£]SHG3R^6v˜\‡˜bWdHGT6i†Dc‘HiŒHlPn—QržOv©ZZQBZQB[RC_VGd[Jj_]qeij^`ZWNYSGaYFi^Hp_O¯RK³?0Ë80Û7-Ù7,Õ7+Ô8+²:*±9(Ê7-ÓD6Ù;8à<:í49â96¹;-´;*Ð@5í76é6:å95â92ã99Ü<<¸<2›<*‘8&†:,ÁUUŸVO˜_Vp„I„œ`Rd:>D6<>1AM7jˆHl”Kn•Tl–Jq¡Mu¦W]†Lf]Nf]Nf]Ng^OiaLiaNiaLhaG`[HOL;JK9ORA^TH­UQß;<¹@+Ô8)Õ9*Ò9)Ñ:)°<-™;"º<-Ñ80ï7?Ü9:Ø58Á61§>+§<(¼:,Ý;6î85ç<4Ú@8Í=<®<1–9(‰<(ƒ8%‚?/™SGZN–]VcmJ6A08?7>>29;.>J4m‹Kq™Pq›OsžOz¦[bJXHi`Qi`Qi`Qi`Qj_]j`WjaRjaRiaL]XEFI8?E7AI4wT>ÛVQàA>Ë9*Ì:+Ï;-Ñ;-¾;3¤;(ÈB7âE@ÍB?¡@0”A1ƒ@-Ž;+“=,œ;+¬:/É<2É?4¶</6-‰<,‚:+|:*u7(‘D:PF˜UO—]YdoE@M1U^I@C(9;.>J4nŒLq™Pp Lz¦[h‘OdŒMY~Ji`Qi`QkbSkbSlbXldQldQlbVnbThaQWQAHE4MG/¬BBÉIHæ^`ÒG@ÒE>×FAÛGCáGIÝIGçACáB?Ä>;—A2‰=0‚<2;3=1};-€;,Š:/•9,‘:)„;,€8*{5)v6,q9,‡M?UH—SPn]MP`;YoA;L*:@49;.<H2g…EršQr£Rj—R`ˆIeGWyGi`QkbSjaRjaRkbSkbSlcTlcTqaTldYpm^lfPSK4yK>”A=©HBÀHGÐB>ÎE=ÙCDáCDàECÞC?ßB=ÄA9‘:0?3‡81„;,‚:,9-~:/}90|90z:0x:/t8.p6+l3(p:.ˆQJ“RN‚[LbiJNd>HW89A,=>04<1<?,arFršSs™\]ƒFbˆK`†IOq?haQjcSibRibRkbSkbSlcTlcTnf[‹ˆƒÃ¾ÎÉ҆xHM9<G6=>6V?7·PSØJIàEAÞDFÜDCÖE@ÑG=ÔA: >1‚?.ƒ;,‚:,9-~:/}:1y:1v7.s9.r8-p5-l3*i3)k7,’OI€ZO\UE@G74:,6>/<G65A-284:</Vg=tœU`ƒI\E`ƒI^GIj=icSicSicSicSkbSmdUmdUmdU„„‚ßãäúþÿÿýúîç˞¢NQ6K>6XSP¼©£ì¶¬ÚUXÓIGØBCØ@?ÓB=Õ@:©>4…=.€=,9-|8-}:1{;2s:1m7-l8-j8-l7/g5,a1'd5+~QKSG94?/5;168516/1=/7F307046)Q`?o•ZUuCWwE\|J\|J>V<gdSgdSgdSgdSkbSmdUmdUpgX†“Þéëöÿúùûí÷úéåßŃ~hQYL´°§ôææûúõüëáՇsºK@ÉD?ÊC=ÎC>¤=8†<3=1~:/{8/{;2w81j8/i7.f7-d8-i70i81e6.qE<A>767/58-57,08+08+.9+2=-9B146(TbIWzPNk=Ol>ZwIVsE,6.heVheVheVheVhdXmd[khU€ƒÉÑÔúúüÿþúõøïõ÷êååɋt˜ˆááßûûùþþüþþüúÿÿç·­ºTFÃI>ÀH8™C6A7;3z;2u9/s9.o6+j81i70h6/h70j92j@2jUBtq^39/17-17-/5+/4./4..3-.3-.:04@2BO;AO6Ic@Hb;YsNF_B&+/heVheVheVheVjdXmd[mlXŽ””ëóöþþþûûóäèÙèìÓâÝÇÓËÀìãäúúøþþüþþüþþüÿüýôÿýñêØ»xe½G=¨?<Š?<qE<q?8p?8n?7i=4q62l71f;2c?3bM<_O8fbIV`H39/17-17-/5+-2,-2,,1+,1+*4,-7,2=-5@/;J3HX=]mS<J9%++heVheVifWifWkeYmf\knY‹ÜãéþþþüüðíòÜâåÈáÙÎôèêÿøüþþüþþüþþüþþüûÿüÿûÿÿüýÿýêß·ž¦lX„E3uB/t?9q?8o>7m>6i>5cA5_J9L?,DH/XX<jmR=G/7=36<26<25;1382271/4.,1+*1*+2+.5.9@89F57D0N[I8D8+1-gdUifWifWifWldYmf\hpY}ŠàçíþþþþÿñêðÖÏͶãÛÙÿúÿþÿúþþüþþüþþüþþüþýÿûÿ÷ýÿ÷ÿýÿÿüÿÿûòèÝËÊ´¦¨xaH2^H1^J2XM9QL6Z\DJS8@J/NN2ieLJI5:@69?58>49?5:?99>88=75:4051,1--10/325G19K39K53D4-4,\hPfkUjhYnfYkg[ieZemXŒ›žäìïùüóìíÛØÓÀÅ·ªÝÒÖþøÿüýÿþþþÿþüÿýúÿýùÿüûÿýþÿüùøêáÿýÿýÿüþýÿÿþëØÖ½RK.UO/QP4PN5IO5EN1KQ/agC]`CTT<VU9OJ6CA49?58C59A69A6:B7=E:<F;:B54>5AU0Kk<:M:EX<?V:-4,[iPgoXlm]nhZkg[ieZemX…”—ÚãêìñëÖÕû©ÚÌÁøíñÿüÿüþýþþþÿþüÿýúÿúöþúùÿýþÿüùûíäÿýþýÿüþþÿÿûçÝÛÆTM1VO2PO1MK2LR8LU8^dBªsxˆaKM7SL:SN:IG:;A76A3;C8;C8;C8>F;>H=<D77A8H\7PpA/B/AT8E\@)0(
\ No newline at end of file
diff --git a/testimages/testimgint1_8.ppm b/testimages/testimgint1_8.ppm
deleted file mode 100644
index 11dca13..0000000
--- a/testimages/testimgint1_8.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-29 19
-255
-0/-50,4/,4/,2-*3.+30+74/B:/J>0dC2¥A2âBBá=;®?,œ@+>-=-¬B5‚F;:5232./1.5//uKLkj¤SNdci_\VX-.0.-+,*+,*+,*+.,-0,+2.-<20A4.H94w9.¦A5³C7—>,Ž=,„;,„A8¤TK„‰sGK<0/+/.,0/-PMF‰›§~˜‹wwWV:45/:728302012,.10,32.?5,B8/>3/F21K4,€;4ˆ?6†>0<-‡C8M@xc‹ m†fFJ<1,)1+-NO?y™h¥vk†g@>29:4C@;GB<I?5D<1<71=9.L?/J<1@<1š>AŸ?1§?4¬@3Â::¾:8ÃG?•bMé?Hù>QïKcçLlÔJanE?„‚m–£±¹¬wˆuX[F43/==3OI9kllg‡IDJ>;6H@3C?<a?6¨=+º;,¹;/Å;0â4+å3'Ô8+ÛFB½=0ØDB÷ESþHpýRtÚ]kèÎÍÿóýþôóÎËÆ°·¯EA5=82OF7wy ­°ý––Üih”‚KQ¸WhÈA=¼=.Ö8/Ý82Ø:.ð50â3.Û7-ÚEAÅ;.ÃC6ÖKHøYmû_têhp¶˜þüÿýúõ÷ðÞëîçYM?SG9SJ9lj€’•ÜŽÏ½fˆå@DÍB?Ø=;Á=0Ô7.é3/Ô51×6,Ú6-Ò9+Û<6ÖA:»=.³=/èfdøksÛns£¨ŠéñäÌçÈñè×îçá[K;[K;^M=^PEqgŒºp“Û?@ä@A·=0Ú?:É=,ã5,õ,2ù'@ð@KÝ34×?:Ó:2½;-Ø=8×6.à`]þp|ÑRYx‹^zŸkˆ´u²¸œ£¢ª^M;^M;]P@kalxy§¹ˆ›éJNÔ<7¼C8Ó9-Þ>2ó6:ñ7:æ9?å@DÒKHÓECÖ?8é6:æ5/ò00×XQÿ\t£IKr•[xŸ`z¨`…¦oYi^[N>aTDlhqvº°°ÒÎÇ­ukçWVÑB:ÎB5à=8Þ6-×CAðV^ñ8>ôGMæ74á:4è66å4*ò21ÝMLøFhcG;bzJj“Ok”N\yCPj;A>5^Z[ccŸ€ÔŒÑ‹ƒ´n\RÎa^å`YÇ:(Ï9*Ñ;,È:0Ð@7í04ú)<ò09â96ç53Ø9&Ù9)öGX°;NEG1w\iŽHiŒJf„HhJ780JKCom”„€Ê€|¯sgkm^K¼LAß8/Ô8)¿:+Í?3É:,à93ì<?âC@â96è8;Þ95´;*º@3ÞK[PI6y‹YcxQgŒFiŒJl–Nv¦Z`WHbYJh_PkaWXUFXUB‹WLÅ=1Ø8*Ò9)ª;(Ê<0à::Ú65¯<*Õ;3è96Û<8·;39(˜H=–\QdsHBI7=D2mLo™OpžSW€Hi`QjaRkaWkbSg^MHI9kJ9ÛONÎ@6Õ@9ÈA;ÝB>²@6ˆ?0ˆ<.’:,¬<0š:,9+v8+ŒNAXPTb>CK4;B0n‘MqŸTdKTyFjaRjaRkbSlcTumb›—ŒjZJnC<¦GC×FCÞDDÙD>±?5‚<0‚:,~:/y9/u9.p6+l6*ŒSL`XE?M4:A/6<0g„Jd‡Ma„JIj=gdSgdSlcTneV²·ºûýøÚØ¿[ZH¯¤¢ï½¶ÎWOÐB>¼?9‚<0}9.z:1m9.h8,i70g80RD;4:.28.2?.39-ZtMSp@[xH1B2heVheVkeYy{pëïòõöîèêÔµ¶¨õõóþþüöîëËq°D:€?9u<5o:2l71f:1eE6gbN28.06,.3--2,/90;G3EZ;NbG%*-heVifWlfZtqîòõô÷æÝÚÉý÷÷þþüþþüþþþÿþùáÆ»«†t{UHeD5]F6TN8JN3VXB8>47=36;5271-2.1628G2=K:+2+akSliZjfZw‚zéïï×ÔÃÜÑÍýüÿþþþÿüùþüýýôïþþþÿýö˜’xTO2LN6OV5r~VRP9KH79A4:B7<D9=E8=L5AZ:CW<*1)
\ No newline at end of file
diff --git a/testimages/testimgp.jpg b/testimages/testimgp.jpg
deleted file mode 100644
index 968a90e..0000000
--- a/testimages/testimgp.jpg
+++ /dev/null
Binary files differ
diff --git a/testimages/testimgpa.jpg b/testimages/testimgpa.jpg
deleted file mode 100644
index 815a691..0000000
--- a/testimages/testimgpa.jpg
+++ /dev/null
Binary files differ
diff --git a/tjbench.c b/tjbench.c
index 26a1972..3c10330 100644
--- a/tjbench.c
+++ b/tjbench.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2009-2012 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <ctype.h>
 #include <math.h>
 #include <errno.h>
 #include <cdjpeg.h>
@@ -138,7 +139,7 @@
 		if(yuv==YUVDECODE)
 		{
 			if(tjDecompressToYUV(handle, jpegbuf[0], jpegsize[0], dstbuf, flags)==-1)
-			_throwtj("executing tjDecompressToYUV()");
+				_throwtj("executing tjDecompressToYUV()");
 		}
 		else for(row=0, dstptr=dstbuf; row<ntilesh; row++, dstptr+=pitch*tileh)
 		{
@@ -216,7 +217,7 @@
 						dstbuf[bindex]=abs(dstbuf[bindex]-y);
 					}
 				}
-			}		
+			}
 			else
 			{
 				for(row=0; row<h; row++)
@@ -238,7 +239,7 @@
 }
 
 
-void dotestyuv(unsigned char *srcbuf, int w, int h, int subsamp,
+int dotestyuv(unsigned char *srcbuf, int w, int h, int subsamp,
 	char *filename)
 {
 	char tempstr[1024], tempstr2[80];
@@ -310,11 +311,11 @@
 	if(file) {fclose(file);  file=NULL;}
 	if(dstbuf) {free(dstbuf);  dstbuf=NULL;}
 	if(handle) {tjDestroy(handle);  handle=NULL;}
-	return;
+	return retval;
 }
 
 
-void dotest(unsigned char *srcbuf, int w, int h, int subsamp, int jpegqual,
+int dotest(unsigned char *srcbuf, int w, int h, int subsamp, int jpegqual,
 	char *filename)
 {
 	char tempstr[1024], tempstr2[80];
@@ -325,7 +326,7 @@
 	unsigned long *jpegsize=NULL;
 	int ps=tjPixelSize[pf], ntilesw=1, ntilesh=1, pitch=w*ps;
 
-	if(yuv==YUVENCODE) {dotestyuv(srcbuf, w, h, subsamp, filename);  return;}
+	if(yuv==YUVENCODE) return dotestyuv(srcbuf, w, h, subsamp, filename);
 
 	if((tmpbuf=(unsigned char *)malloc(pitch*h)) == NULL)
 		_throwunix("allocating temporary image buffer");
@@ -352,7 +353,7 @@
 		if((flags&TJFLAG_NOREALLOC)!=0)
 			for(i=0; i<ntilesw*ntilesh; i++)
 			{
-				if((jpegbuf[i]=(unsigned char *)malloc(tjBufSize(tilew, tileh,
+				if((jpegbuf[i]=(unsigned char *)tjAlloc(tjBufSize(tilew, tileh,
 					subsamp)))==NULL)
 					_throwunix("allocating JPEG tiles");
 			}
@@ -435,7 +436,7 @@
 
 		for(i=0; i<ntilesw*ntilesh; i++)
 		{
-			if(jpegbuf[i]) free(jpegbuf[i]);  jpegbuf[i]=NULL;
+			if(jpegbuf[i]) tjFree(jpegbuf[i]);  jpegbuf[i]=NULL;
 		}
 		free(jpegbuf);  jpegbuf=NULL;
 		free(jpegsize);  jpegsize=NULL;
@@ -449,18 +450,18 @@
 	{
 		for(i=0; i<ntilesw*ntilesh; i++)
 		{
-			if(jpegbuf[i]) free(jpegbuf[i]);  jpegbuf[i]=NULL;
+			if(jpegbuf[i]) tjFree(jpegbuf[i]);  jpegbuf[i]=NULL;
 		}
 		free(jpegbuf);  jpegbuf=NULL;
 	}
 	if(jpegsize) {free(jpegsize);  jpegsize=NULL;}
 	if(tmpbuf) {free(tmpbuf);  tmpbuf=NULL;}
 	if(handle) {tjDestroy(handle);  handle=NULL;}
-	return;
+	return retval;
 }
 
 
-void dodecomptest(char *filename)
+int dodecomptest(char *filename)
 {
 	FILE *file=NULL;  tjhandle handle=NULL;
 	unsigned char **jpegbuf=NULL, *srcbuf=NULL;
@@ -475,7 +476,7 @@
 
 	if((file=fopen(filename, "rb"))==NULL)
 		_throwunix("opening file");
-	if(fseek(file, 0, SEEK_END)<0 || (srcsize=ftell(file))<0)
+	if(fseek(file, 0, SEEK_END)<0 || (srcsize=ftell(file))==(unsigned long)-1)
 		_throwunix("determining file size");
 	if((srcbuf=(unsigned char *)malloc(srcsize))==NULL)
 		_throwunix("allocating memory");
@@ -520,10 +521,10 @@
 			_throwunix("allocating JPEG size array");
 		memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh);
 
-		if((flags&TJFLAG_NOREALLOC)!=0)
+		if((flags&TJFLAG_NOREALLOC)!=0 || !dotile)
 			for(i=0; i<ntilesw*ntilesh; i++)
 			{
-				if((jpegbuf[i]=(unsigned char *)malloc(tjBufSize(tilew, tileh,
+				if((jpegbuf[i]=(unsigned char *)tjAlloc(tjBufSize(tilew, tileh,
 					subsamp)))==NULL)
 					_throwunix("allocating JPEG tiles");
 			}
@@ -582,7 +583,7 @@
 					t[tile].customFilter=customFilter;
 					if(t[tile].options&TJXOPT_NOOUTPUT && jpegbuf[tile])
 					{
-						free(jpegbuf[tile]);  jpegbuf[tile]=NULL;
+						tjFree(jpegbuf[tile]);  jpegbuf[tile]=NULL;
 					}
 				}
 			}
@@ -637,7 +638,7 @@
 
 		for(i=0; i<ntilesw*ntilesh; i++)
 		{
-			free(jpegbuf[i]);  jpegbuf[i]=NULL;
+			tjFree(jpegbuf[i]);  jpegbuf[i]=NULL;
 		}
 		free(jpegbuf);  jpegbuf=NULL;
 		if(jpegsize) {free(jpegsize);  jpegsize=NULL;}
@@ -651,7 +652,7 @@
 	{
 		for(i=0; i<ntilesw*ntilesh; i++)
 		{
-			if(jpegbuf[i]) free(jpegbuf[i]);  jpegbuf[i]=NULL;
+			if(jpegbuf[i]) tjFree(jpegbuf[i]);  jpegbuf[i]=NULL;
 		}
 		free(jpegbuf);  jpegbuf=NULL;
 	}
@@ -659,7 +660,7 @@
 	if(srcbuf) {free(srcbuf);  srcbuf=NULL;}
 	if(t) {free(t);  t=NULL;}
 	if(handle) {tjDestroy(handle);  handle=NULL;}
-	return;
+	return retval;
 }
 
 
@@ -686,6 +687,9 @@
 	printf("     codec\n");
 	printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n");
 	printf("     underlying codec\n");
+	printf("-subsamp <s> = When testing JPEG compression, this option specifies the level\n");
+	printf("     of chrominance subsampling to use (<s> = 444, 422, 440, 420, or GRAY).\n");
+	printf("     The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in sequence.\n");
 	printf("-quiet = Output results in tabular rather than verbose format\n");
 	printf("-yuvencode = Encode RGB input as planar YUV rather than compressing as JPEG\n");
 	printf("-yuvdecode = Decode JPEG image to planar YUV rather than RGB\n");
@@ -700,6 +704,7 @@
 			if(i!=nsf-1) printf(", ");
 			if(i==nsf-2) printf("or ");
 		}
+		if(i%8==0 && i!=0) printf("\n     ");
 	}
 	printf(")\n");
 	printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n");
@@ -718,7 +723,7 @@
 {
 	unsigned char *srcbuf=NULL;  int w, h, i, j;
 	int minqual=-1, maxqual=-1;  char *temp;
-	int minarg=2;  int retval=0;
+	int minarg=2, retval=0, subsamp=-1;
 
 	if((scalingfactors=tjGetScalingFactors(&nsf))==NULL || nsf==0)
 		_throwtj("executing tjGetScalingFactors()");
@@ -825,7 +830,8 @@
 				{
 					for(j=0; j<nsf; j++)
 					{
-						if(temp1==scalingfactors[j].num && temp2==scalingfactors[j].denom)
+						if((double)temp1/(double)temp2
+							== (double)scalingfactors[j].num/(double)scalingfactors[j].denom)
 						{
 							sf=scalingfactors[j];
 							match=1;  break;
@@ -854,6 +860,22 @@
 			if(!strcmp(argv[i], "-?")) usage(argv[0]);
 			if(!strcasecmp(argv[i], "-alloc")) flags&=(~TJFLAG_NOREALLOC);
 			if(!strcasecmp(argv[i], "-bmp")) ext="bmp";
+			if(!strcasecmp(argv[i], "-subsamp") && i<argc-1)
+			{
+				i++;
+				if(toupper(argv[i][0])=='G') subsamp=TJSAMP_GRAY;
+				else
+				{
+					int temp=atoi(argv[i]);
+					switch(temp)
+					{
+						case 444:  subsamp=TJSAMP_444;  break;
+						case 422:  subsamp=TJSAMP_422;  break;
+						case 440:  subsamp=TJSAMP_440;  break;
+						case 420:  subsamp=TJSAMP_420;  break;
+					}
+				}
+			}
 		}
 	}
 
@@ -871,6 +893,13 @@
 		dotile=0;
 	}
 
+	if((flags&TJFLAG_NOREALLOC)==0 && dotile)
+	{
+		printf("Disabling tiled compression/decompression tests, because those tests do not\n");
+		printf("work when dynamic JPEG buffer allocation is enabled.\n\n");
+		dotile=0;
+	}
+
 	if(!decomponly)
 	{
 		if(loadbmp(argv[1], &srcbuf, &w, &h, pf, (flags&TJFLAG_BOTTOMUP)!=0)==-1)
@@ -893,18 +922,27 @@
 		printf("\n");
 		goto bailout;
 	}
-	for(i=maxqual; i>=minqual; i--)
-		dotest(srcbuf, w, h, TJ_GRAYSCALE, i, argv[1]);
-	printf("\n");
-	for(i=maxqual; i>=minqual; i--)
-		dotest(srcbuf, w, h, TJ_420, i, argv[1]);
-	printf("\n");
-	for(i=maxqual; i>=minqual; i--)
-		dotest(srcbuf, w, h, TJ_422, i, argv[1]);
-	printf("\n");
-	for(i=maxqual; i>=minqual; i--)
-		dotest(srcbuf, w, h, TJ_444, i, argv[1]);
-	printf("\n");
+	if(subsamp>=0 && subsamp<TJ_NUMSAMP)
+	{
+		for(i=maxqual; i>=minqual; i--)
+			dotest(srcbuf, w, h, subsamp, i, argv[1]);
+		printf("\n");
+	}
+	else
+	{
+		for(i=maxqual; i>=minqual; i--)
+			dotest(srcbuf, w, h, TJSAMP_GRAY, i, argv[1]);
+		printf("\n");
+		for(i=maxqual; i>=minqual; i--)
+			dotest(srcbuf, w, h, TJSAMP_420, i, argv[1]);
+		printf("\n");
+		for(i=maxqual; i>=minqual; i--)
+			dotest(srcbuf, w, h, TJSAMP_422, i, argv[1]);
+		printf("\n");
+		for(i=maxqual; i>=minqual; i--)
+			dotest(srcbuf, w, h, TJSAMP_444, i, argv[1]);
+		printf("\n");
+	}
 
 	bailout:
 	if(srcbuf) free(srcbuf);
diff --git a/tjbenchtest.in b/tjbenchtest.in
index bea12f1..2ae345f 100755
--- a/tjbenchtest.in
+++ b/tjbenchtest.in
@@ -22,9 +22,11 @@
 EXT=bmp
 IMAGES="vgl_5674_0098.${EXT} vgl_6434_0018a.${EXT} vgl_6548_0026a.${EXT} nightshot_iso_100.${EXT}"
 IMGDIR=@srcdir@/testimages
-OUTDIR=__tjbenchtest_output
+OUTDIR=`mktemp -d /tmp/__tjbenchtest_output.XXXXXX`
 EXEDIR=.
 BMPARG=
+ALLOC=0
+ALLOCARG=
 if [ "$EXT" = "bmp" ]; then BMPARG=-bmp; fi
 
 if [ -d $OUTDIR ]; then
@@ -34,6 +36,13 @@
 
 exec >$EXEDIR/tjbenchtest.log
 
+if [ $# -gt 0 ]; then
+	if [ "$1" = "-alloc" ]; then
+		ALLOCARG=-alloc
+		ALLOC=1
+	fi
+fi
+
 # Standard tests
 for image in $IMAGES; do
 
@@ -60,7 +69,7 @@
 
 	# Compression
 	for dct in accurate fast; do
-		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -${dct}dct
+		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -${dct}dct $ALLOCARG
 		for samp in GRAY 420 422 444; do
 			runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg
 		done
@@ -73,49 +82,70 @@
 		fi
 
 		# Tiled compression & decompression
-		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 ${dctarg}
+		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 ${dctarg} $ALLOCARG
 		for samp in GRAY 444; do
-			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
-				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
-				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
-				rm $i
-			done
+			if [ $ALLOC = 1 ]; then
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT}
+			else
+				for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+					$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+					runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
+					rm $i
+				done
+			fi
 		done
-		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -fastupsample ${dctarg}
+		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -fastupsample ${dctarg} $ALLOCARG
 		for samp in 420 422; do
-			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
-				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
-				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
-				rm $i
-			done
+			if [ $ALLOC = 1 ]; then
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT}
+			else
+				for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+					$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+					runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
+					rm $i
+				done
+			fi
 		done
 
 		# Tiled decompression
 		for samp in GRAY 444; do
-			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 ${dctarg}
-			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
-				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
-				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
-				rm $i
-			done
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 ${dctarg} $ALLOCARG
+			if [ $ALLOC = 1 ]; then
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT}
+			else
+				for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+					$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+					runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
+					rm $i
+				done
+			fi
 		done
 		for samp in 420 422; do
-			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -fastupsample ${dctarg}
-			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
-				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
-				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
-				rm $i
-			done
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -fastupsample ${dctarg} $ALLOCARG
+			if [ $ALLOC = 1 ]; then
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT}
+			else
+				for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+					$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+					runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
+					rm $i
+				done
+			fi
 		done
 	done
 
 	# Scaled decompression
-	for scale in 2 4 8; do
+	for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+		scalearg=`echo $scale | sed s@_@/@g`
 		for samp in GRAY 420 422 444; do
-			$EXEDIR/djpeg -rgb -scale 1/${scale} $BMPARG $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_1_${scale}_djpeg.${EXT}
-			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -scale 1/${scale} -quiet -benchtime 0.01
-			runme cmp $OUTDIR/${basename}_${samp}_Q95_1_${scale}.${EXT} $OUTDIR/${basename}_${samp}_1_${scale}_djpeg.${EXT}
-			rm $OUTDIR/${basename}_${samp}_Q95_1_${scale}.${EXT}
+			$EXEDIR/djpeg -rgb -scale ${scalearg} $BMPARG $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT}
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -scale ${scalearg} -quiet -benchtime 0.01 $ALLOCARG
+			runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT}
+			rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT}
 		done
 	done
 
@@ -132,44 +162,60 @@
 	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
 		for samp in GRAY 444; do
 			$EXEDIR/djpeg -rgb $BMPARG $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
-			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01
-			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
-				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
-				runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
-				rm $i
-			done
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 $ALLOCARG
+			if [ $ALLOC = 1 ]; then
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT}
+			else
+				for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+					$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+					runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
+					rm $i
+				done
+			fi
 		done
 		for samp in 420 422; do
 			$EXEDIR/djpeg -nosmooth -rgb $BMPARG $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
-			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -fastupsample
-			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
-				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
-				runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
-				rm $i
-			done
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -fastupsample $ALLOCARG
+			if [ $ALLOC = 1 ]; then
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT}
+			else
+				for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+					$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+					runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
+					rm $i
+				done
+			fi
 		done
 	done
 
 	# Grayscale transform
 	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
 		for samp in GRAY 444 422 420; do
-			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -grayscale
-			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
-				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
-				runme cmp $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT}
-				rm $i
-			done
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -grayscale $ALLOCARG
+			if [ $ALLOC = 1 ]; then
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT}
+			else
+				for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+					$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+					runme cmp $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT}
+					rm $i
+				done
+			fi
 		done
 	done
 
 	# Transforms with scaling
 	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
 		for samp in GRAY 444 422 420; do
-			for scale in 2 4 8; do
-				$EXEDIR/djpeg -rgb -scale 1/${scale} $BMPARG $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_1_${scale}_jpegtran.${EXT}
-				runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -scale 1/${scale} -quiet -benchtime 0.01
-				runme cmp $OUTDIR/${basename}_${samp}_Q95_1_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${xform}_1_${scale}_jpegtran.${EXT}
-				rm $OUTDIR/${basename}_${samp}_Q95_1_${scale}.${EXT}
+			for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+				scalearg=`echo $scale | sed s@_@/@g`
+				$EXEDIR/djpeg -rgb -scale ${scalearg} $BMPARG $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT}
+				runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -scale ${scalearg} -quiet -benchtime 0.01 $ALLOCARG
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT}
 			done
 		done
 	done
diff --git a/tjbenchtest.java.in b/tjbenchtest.java.in
new file mode 100755
index 0000000..44cdf4a
--- /dev/null
+++ b/tjbenchtest.java.in
@@ -0,0 +1,179 @@
+#!/bin/bash
+
+set -u
+set -e
+trap onexit INT
+trap onexit TERM
+trap onexit EXIT
+
+onexit()
+{
+	if [ -d $OUTDIR ]; then
+		rm -rf $OUTDIR
+	fi
+}
+
+runme()
+{
+	echo \*\*\* $*
+	$*
+}
+
+IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp"
+IMGDIR=@srcdir@/testimages
+OUTDIR=`mktemp -d /tmp/__tjbenchtest_java_output.XXXXXX`
+EXEDIR=.
+JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs"
+
+if [ -d $OUTDIR ]; then
+	rm -rf $OUTDIR
+fi
+mkdir -p $OUTDIR
+
+exec >$EXEDIR/tjbenchtest-java.log
+
+# Standard tests
+for image in $IMAGES; do
+
+	cp $IMGDIR/$image $OUTDIR
+	basename=`basename $image .bmp`
+	$EXEDIR/cjpeg -quality 95 -dct fast -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_accurate_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_accurate_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_accurate_cjpeg.jpg
+	for samp in GRAY 420 422 444; do
+		$EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_djpeg.bmp
+		$EXEDIR/djpeg -dct fast -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_djpeg.bmp
+		$EXEDIR/djpeg -dct int -rgb -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_djpeg.bmp
+	done
+	for samp in 420 422; do
+		$EXEDIR/djpeg -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp
+		$EXEDIR/djpeg -dct fast -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp
+		$EXEDIR/djpeg -dct int -nosmooth -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp
+	done
+
+	# Compression
+	for dct in accurate fast; do
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -${dct}dct
+		for samp in GRAY 420 422 444; do
+			runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg
+		done
+	done
+
+	for dct in fast accurate default; do
+		dctarg=-${dct}dct
+		if [ "${dct}" = "default" ]; then
+			dctarg=
+		fi
+
+		# Tiled compression & decompression
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 ${dctarg}
+		for samp in GRAY 444; do
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp
+				rm $i
+			done
+		done
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -fastupsample ${dctarg}
+		for samp in 420 422; do
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp
+				rm $i
+			done
+		done
+
+		# Tiled decompression
+		for samp in GRAY 444; do
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 ${dctarg}
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp
+				rm $i
+			done
+		done
+		for samp in 420 422; do
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -fastupsample ${dctarg}
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp $i -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp
+				rm $i
+			done
+		done
+	done
+
+	# Scaled decompression
+	for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+		scalearg=`echo $scale | sed s@_@/@g`
+		for samp in GRAY 420 422 444; do
+			$EXEDIR/djpeg -rgb -scale ${scalearg} -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
+			rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp
+		done
+	done
+
+	# Transforms
+	for samp in GRAY 420 422 444; do
+		$EXEDIR/jpegtran -flip horizontal -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg
+		$EXEDIR/jpegtran -flip vertical -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg
+		$EXEDIR/jpegtran -transpose -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg
+		$EXEDIR/jpegtran -transverse -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg
+		$EXEDIR/jpegtran -rotate 90 -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg
+		$EXEDIR/jpegtran -rotate 180 -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg
+		$EXEDIR/jpegtran -rotate 270 -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg
+	done
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444; do
+			$EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+				rm $i
+			done
+		done
+		for samp in 420 422; do
+			$EXEDIR/djpeg -nosmooth -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -fastupsample
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+				rm $i
+			done
+		done
+	done
+
+	# Grayscale transform
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -grayscale
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp
+				rm $i
+			done
+		done
+	done
+
+	# Transforms with scaling
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+				scalearg=`echo $scale | sed s@_@/@g`
+				$EXEDIR/djpeg -rgb -scale ${scalearg} -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
+				runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01
+				runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
+				rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp
+			done
+		done
+	done
+
+done
+
+echo SUCCESS!
diff --git a/tjexampletest.in b/tjexampletest.in
index 430088e..40b342e 100755
--- a/tjexampletest.in
+++ b/tjexampletest.in
@@ -84,12 +84,13 @@
 	done
 
 	# Scaled decompression
-	for scale in 2 4 8; do
+	for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+		scalearg=`echo $scale | sed s@_@/@g`
 		for samp in GRAY 420 422 444; do
-			$EXEDIR/djpeg -rgb -bmp -scale 1/${scale} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_1_${scale}_djpeg.bmp
-			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_1_${scale}.bmp -scale 1/${scale}
-			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_1_${scale}.bmp $OUTDIR/${basename}_${samp}_1_${scale}_djpeg.bmp
-			rm $OUTDIR/${basename}_${samp}_1_${scale}.bmp
+			$EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${scale}.bmp -scale ${scalearg}
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
+			rm $OUTDIR/${basename}_${samp}_${scale}.bmp
 		done
 	done
 
@@ -134,11 +135,12 @@
 	# Transforms with scaling
 	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
 		for samp in GRAY 444 422 420; do
-			for scale in 2 4 8; do
-				$EXEDIR/djpeg -rgb -bmp -scale 1/${scale} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_1_${scale}_jpegtran.bmp
-				runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_1_${scale}.bmp -$xform -scale 1/${scale} -crop 16,16,70x60
-				runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}_1_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_1_${scale}_jpegtran.bmp
-				rm $OUTDIR/${basename}_${samp}_${xform}_1_${scale}.bmp
+			for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+				scalearg=`echo $scale | sed s@_@/@g`
+				$EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
+				runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp -$xform -scale ${scalearg} -crop 16,16,70x60
+				runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
+				rm $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp
 			done
 		done
 	done
diff --git a/tjunittest.c b/tjunittest.c
index a61d266..4ec19c4 100644
--- a/tjunittest.c
+++ b/tjunittest.c
@@ -219,7 +219,6 @@
 	bailout:
 	if(retval==0)
 	{
-		printf("\n");
 		for(row=0; row<h; row++)
 		{
 			for(col=0; col<w; col++)
@@ -312,7 +311,6 @@
 				printf("%.3d ", buf[ypitch*ph + uvpitch*ch + (uvpitch*row+col)]);
 			printf("\n");
 		}
-		printf("\n");
 	}
 
 	return retval;
@@ -405,7 +403,7 @@
 	if(yuv==YUVENCODE) return;
 
 	if(yuv==YUVDECODE)
-		printf("JPEG -> YUV %s ... ", subName[subsamp]);
+		printf("JPEG -> YUV %s ... ", subNameLong[subsamp]);
 	else
 	{
 		printf("JPEG -> %s %s ", pixFormatStr[pf],
@@ -475,7 +473,7 @@
 			sf1);
 
 	bailout:
-	printf("\n");
+	return;
 }
 
 
@@ -515,10 +513,15 @@
 			decompTest(dhandle, dstBuf, size, w, h, pf, basename, subsamp,
 				flags);
 			if(pf>=TJPF_RGBX && pf<=TJPF_XRGB)
+			{
+				printf("\n");
 				decompTest(dhandle, dstBuf, size, w, h, pf+(TJPF_RGBA-TJPF_RGBX),
 					basename, subsamp, flags);
+			}
+			printf("\n");
 		}
 	}
+	printf("--------------------\n\n");
 
 	bailout:
 	if(chandle) tjDestroy(chandle);
@@ -610,7 +613,7 @@
 
 	bailout:
 	if(srcBuf) free(srcBuf);
-	if(dstBuf) free(dstBuf);
+	if(dstBuf) tjFree(dstBuf);
 	if(handle) tjDestroy(handle);
 }
 
@@ -647,7 +650,7 @@
 	bufSizeTest();
 	if(doyuv)
 	{
-		printf("\n\n");
+		printf("\n--------------------\n\n");
 		yuv=YUVDECODE;
 		doTest(48, 48, _onlyRGB, 1, TJSAMP_444, "test_yuv0");
 		doTest(35, 39, _onlyRGB, 1, TJSAMP_444, "test_yuv1");
diff --git a/transupp.c b/transupp.c
index ef13721..72ef8d8 100644
--- a/transupp.c
+++ b/transupp.c
@@ -2,8 +2,8 @@
  * transupp.c
  *
  * This file was part of the Independent JPEG Group's software:
- * Copyright (C) 1997-2009, Thomas G. Lane, Guido Vollbeding.
- * Modifications:
+ * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
  * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -21,9 +21,9 @@
 
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "transupp.h"		/* My own external interface */
+#include "transupp.h"           /* My own external interface */
 #include "jpegcomp.h"
-#include <ctype.h>		/* to declare isdigit() */
+#include <ctype.h>              /* to declare isdigit() */
 
 
 #if JPEG_LIB_VERSION >= 70
@@ -89,9 +89,9 @@
 
 LOCAL(void)
 do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	 jvirt_barray_ptr *src_coef_arrays,
-	 jvirt_barray_ptr *dst_coef_arrays)
+         JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+         jvirt_barray_ptr *src_coef_arrays,
+         jvirt_barray_ptr *dst_coef_arrays)
 /* Crop.  This is only used when no rotate/flip is requested with the crop. */
 {
   JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
@@ -107,18 +107,18 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       src_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	 dst_blk_y + y_crop_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, FALSE);
+        ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+         dst_blk_y + y_crop_blocks,
+         (JDIMENSION) compptr->v_samp_factor, FALSE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-			dst_buffer[offset_y],
-			compptr->width_in_blocks);
+        jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                        dst_buffer[offset_y],
+                        compptr->width_in_blocks);
       }
     }
   }
@@ -127,8 +127,8 @@
 
 LOCAL(void)
 do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		   JDIMENSION x_crop_offset,
-		   jvirt_barray_ptr *src_coef_arrays)
+                   JDIMENSION x_crop_offset,
+                   jvirt_barray_ptr *src_coef_arrays)
 /* Horizontal flip; done in-place, so no separate dest array is required.
  * NB: this only works when y_crop_offset is zero.
  */
@@ -153,39 +153,39 @@
     comp_width = MCU_cols * compptr->h_samp_factor;
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     for (blk_y = 0; blk_y < compptr->height_in_blocks;
-	 blk_y += compptr->v_samp_factor) {
+         blk_y += compptr->v_samp_factor) {
       buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	/* Do the mirroring */
-	for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
-	  ptr1 = buffer[offset_y][blk_x];
-	  ptr2 = buffer[offset_y][comp_width - blk_x - 1];
-	  /* this unrolled loop doesn't need to know which row it's on... */
-	  for (k = 0; k < DCTSIZE2; k += 2) {
-	    temp1 = *ptr1;	/* swap even column */
-	    temp2 = *ptr2;
-	    *ptr1++ = temp2;
-	    *ptr2++ = temp1;
-	    temp1 = *ptr1;	/* swap odd column with sign change */
-	    temp2 = *ptr2;
-	    *ptr1++ = -temp2;
-	    *ptr2++ = -temp1;
-	  }
-	}
-	if (x_crop_blocks > 0) {
-	  /* Now left-justify the portion of the data to be kept.
-	   * We can't use a single jcopy_block_row() call because that routine
-	   * depends on memcpy(), whose behavior is unspecified for overlapping
-	   * source and destination areas.  Sigh.
-	   */
-	  for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
-	    jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
-			    buffer[offset_y] + blk_x,
-			    (JDIMENSION) 1);
-	  }
-	}
+        /* Do the mirroring */
+        for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
+          ptr1 = buffer[offset_y][blk_x];
+          ptr2 = buffer[offset_y][comp_width - blk_x - 1];
+          /* this unrolled loop doesn't need to know which row it's on... */
+          for (k = 0; k < DCTSIZE2; k += 2) {
+            temp1 = *ptr1;      /* swap even column */
+            temp2 = *ptr2;
+            *ptr1++ = temp2;
+            *ptr2++ = temp1;
+            temp1 = *ptr1;      /* swap odd column with sign change */
+            temp2 = *ptr2;
+            *ptr1++ = -temp2;
+            *ptr2++ = -temp1;
+          }
+        }
+        if (x_crop_blocks > 0) {
+          /* Now left-justify the portion of the data to be kept.
+           * We can't use a single jcopy_block_row() call because that routine
+           * depends on memcpy(), whose behavior is unspecified for overlapping
+           * source and destination areas.  Sigh.
+           */
+          for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+            jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
+                            buffer[offset_y] + blk_x,
+                            (JDIMENSION) 1);
+          }
+        }
       }
     }
   }
@@ -194,9 +194,9 @@
 
 LOCAL(void)
 do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
 /* Horizontal flip in general cropping case */
 {
   JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
@@ -220,34 +220,34 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       src_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	 dst_blk_y + y_crop_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, FALSE);
+        ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+         dst_blk_y + y_crop_blocks,
+         (JDIMENSION) compptr->v_samp_factor, FALSE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	dst_row_ptr = dst_buffer[offset_y];
-	src_row_ptr = src_buffer[offset_y];
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Do the mirrorable blocks */
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	    /* this unrolled loop doesn't need to know which row it's on... */
-	    for (k = 0; k < DCTSIZE2; k += 2) {
-	      *dst_ptr++ = *src_ptr++;	 /* copy even column */
-	      *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
-	    }
-	  } else {
-	    /* Copy last partial block(s) verbatim */
-	    jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
-			    dst_row_ptr + dst_blk_x,
-			    (JDIMENSION) 1);
-	  }
-	}
+        dst_row_ptr = dst_buffer[offset_y];
+        src_row_ptr = src_buffer[offset_y];
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Do the mirrorable blocks */
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+            /* this unrolled loop doesn't need to know which row it's on... */
+            for (k = 0; k < DCTSIZE2; k += 2) {
+              *dst_ptr++ = *src_ptr++;   /* copy even column */
+              *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
+            }
+          } else {
+            /* Copy last partial block(s) verbatim */
+            jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                            dst_row_ptr + dst_blk_x,
+                            (JDIMENSION) 1);
+          }
+        }
       }
     }
   }
@@ -256,9 +256,9 @@
 
 LOCAL(void)
 do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
 /* Vertical flip */
 {
   JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
@@ -285,49 +285,49 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       if (y_crop_blocks + dst_blk_y < comp_height) {
-	/* Row is within the mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - y_crop_blocks - dst_blk_y -
-	   (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        /* Row is within the mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION) compptr->v_samp_factor,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       } else {
-	/* Bottom-edge blocks will be copied verbatim. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        /* Bottom-edge blocks will be copied verbatim. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       }
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (y_crop_blocks + dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  dst_row_ptr = dst_buffer[offset_y];
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  src_row_ptr += x_crop_blocks;
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	       dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[dst_blk_x];
-	    for (i = 0; i < DCTSIZE; i += 2) {
-	      /* copy even row */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = *src_ptr++;
-	      /* copy odd row with sign change */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = - *src_ptr++;
-	    }
-	  }
-	} else {
-	  /* Just copy row verbatim. */
-	  jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-			  dst_buffer[offset_y],
-			  compptr->width_in_blocks);
-	}
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          dst_row_ptr = dst_buffer[offset_y];
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          src_row_ptr += x_crop_blocks;
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[dst_blk_x];
+            for (i = 0; i < DCTSIZE; i += 2) {
+              /* copy even row */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = *src_ptr++;
+              /* copy odd row with sign change */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = - *src_ptr++;
+            }
+          }
+        } else {
+          /* Just copy row verbatim. */
+          jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                          dst_buffer[offset_y],
+                          compptr->width_in_blocks);
+        }
       }
     }
   }
@@ -336,9 +336,9 @@
 
 LOCAL(void)
 do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	      JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	      jvirt_barray_ptr *src_coef_arrays,
-	      jvirt_barray_ptr *dst_coef_arrays)
+              JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+              jvirt_barray_ptr *src_coef_arrays,
+              jvirt_barray_ptr *dst_coef_arrays)
 /* Transpose source into destination */
 {
   JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
@@ -357,25 +357,25 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	     dst_blk_x + x_crop_blocks,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
-	    for (i = 0; i < DCTSIZE; i++)
-	      for (j = 0; j < DCTSIZE; j++)
-		dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION) compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
+            for (i = 0; i < DCTSIZE; i++)
+              for (j = 0; j < DCTSIZE; j++)
+                dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+          }
+        }
       }
     }
   }
@@ -384,9 +384,9 @@
 
 LOCAL(void)
 do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
 /* 90 degree rotation is equivalent to
  *   1. Transposing the image;
  *   2. Horizontal mirroring.
@@ -413,50 +413,50 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Block is within the mirrorable area. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       comp_width - x_crop_blocks - dst_blk_x -
-	       (JDIMENSION) compptr->h_samp_factor,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  } else {
-	    /* Edge blocks are transposed but not mirrored. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       dst_blk_x + x_crop_blocks,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  }
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Block is within the mirrorable area. */
-	      src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		i++;
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      src_ptr = src_buffer[offset_x]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION) compptr->h_samp_factor,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          } else {
+            /* Edge blocks are transposed but not mirrored. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                i++;
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+            }
+          }
+        }
       }
     }
   }
@@ -465,9 +465,9 @@
 
 LOCAL(void)
 do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
+            JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+            jvirt_barray_ptr *src_coef_arrays,
+            jvirt_barray_ptr *dst_coef_arrays)
 /* 270 degree rotation is equivalent to
  *   1. Horizontal mirroring;
  *   2. Transposing the image.
@@ -494,40 +494,40 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	     dst_blk_x + x_crop_blocks,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (y_crop_blocks + dst_blk_y < comp_height) {
-	      /* Block is within the mirrorable area. */
-	      src_ptr = src_buffer[offset_x]
-		[comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++) {
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  j++;
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      src_ptr = src_buffer[offset_x]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION) compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[offset_x]
+                [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++) {
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                  j++;
+                  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                }
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+            }
+          }
+        }
       }
     }
   }
@@ -536,9 +536,9 @@
 
 LOCAL(void)
 do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
+            JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+            jvirt_barray_ptr *src_coef_arrays,
+            jvirt_barray_ptr *dst_coef_arrays)
 /* 180 degree rotation is equivalent to
  *   1. Vertical mirroring;
  *   2. Horizontal mirroring.
@@ -565,77 +565,77 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       if (y_crop_blocks + dst_blk_y < comp_height) {
-	/* Row is within the vertically mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - y_crop_blocks - dst_blk_y -
-	   (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        /* Row is within the vertically mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION) compptr->v_samp_factor,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       } else {
-	/* Bottom-edge rows are only mirrored horizontally. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        /* Bottom-edge rows are only mirrored horizontally. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       }
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	dst_row_ptr = dst_buffer[offset_y];
-	if (y_crop_blocks + dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Process the blocks that can be mirrored both ways. */
-	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	      for (i = 0; i < DCTSIZE; i += 2) {
-		/* For even row, negate every odd column. */
-		for (j = 0; j < DCTSIZE; j += 2) {
-		  *dst_ptr++ = *src_ptr++;
-		  *dst_ptr++ = - *src_ptr++;
-		}
-		/* For odd row, negate every even column. */
-		for (j = 0; j < DCTSIZE; j += 2) {
-		  *dst_ptr++ = - *src_ptr++;
-		  *dst_ptr++ = *src_ptr++;
-		}
-	      }
-	    } else {
-	      /* Any remaining right-edge blocks are only mirrored vertically. */
-	      src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
-	      for (i = 0; i < DCTSIZE; i += 2) {
-		for (j = 0; j < DCTSIZE; j++)
-		  *dst_ptr++ = *src_ptr++;
-		for (j = 0; j < DCTSIZE; j++)
-		  *dst_ptr++ = - *src_ptr++;
-	      }
-	    }
-	  }
-	} else {
-	  /* Remaining rows are just mirrored horizontally. */
-	  src_row_ptr = src_buffer[offset_y];
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Process the blocks that can be mirrored. */
-	      dst_ptr = dst_row_ptr[dst_blk_x];
-	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	      for (i = 0; i < DCTSIZE2; i += 2) {
-		*dst_ptr++ = *src_ptr++;
-		*dst_ptr++ = - *src_ptr++;
-	      }
-	    } else {
-	      /* Any remaining right-edge blocks are only copied. */
-	      jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
-			      dst_row_ptr + dst_blk_x,
-			      (JDIMENSION) 1);
-	    }
-	  }
-	}
+        dst_row_ptr = dst_buffer[offset_y];
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored both ways. */
+              src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                /* For even row, negate every odd column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = *src_ptr++;
+                  *dst_ptr++ = - *src_ptr++;
+                }
+                /* For odd row, negate every even column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = - *src_ptr++;
+                  *dst_ptr++ = *src_ptr++;
+                }
+              }
+            } else {
+              /* Any remaining right-edge blocks are only mirrored vertically. */
+              src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = *src_ptr++;
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = - *src_ptr++;
+              }
+            }
+          }
+        } else {
+          /* Remaining rows are just mirrored horizontally. */
+          src_row_ptr = src_buffer[offset_y];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored. */
+              dst_ptr = dst_row_ptr[dst_blk_x];
+              src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE2; i += 2) {
+                *dst_ptr++ = *src_ptr++;
+                *dst_ptr++ = - *src_ptr++;
+              }
+            } else {
+              /* Any remaining right-edge blocks are only copied. */
+              jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                              dst_row_ptr + dst_blk_x,
+                              (JDIMENSION) 1);
+            }
+          }
+        }
       }
     }
   }
@@ -644,9 +644,9 @@
 
 LOCAL(void)
 do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	       JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	       jvirt_barray_ptr *src_coef_arrays,
-	       jvirt_barray_ptr *dst_coef_arrays)
+               JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+               jvirt_barray_ptr *src_coef_arrays,
+               jvirt_barray_ptr *dst_coef_arrays)
 /* Transverse transpose is equivalent to
  *   1. 180 degree rotation;
  *   2. Transposition;
@@ -676,81 +676,81 @@
     x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Block is within the mirrorable area. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       comp_width - x_crop_blocks - dst_blk_x -
-	       (JDIMENSION) compptr->h_samp_factor,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  } else {
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       dst_blk_x + x_crop_blocks,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  }
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (y_crop_blocks + dst_blk_y < comp_height) {
-	      if (x_crop_blocks + dst_blk_x < comp_width) {
-		/* Block is within the mirrorable area. */
-		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      } else {
-		/* Right-edge blocks are mirrored in y only */
-		src_ptr = src_buffer[offset_x]
-		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      }
-	    } else {
-	      if (x_crop_blocks + dst_blk_x < comp_width) {
-		/* Bottom-edge blocks are mirrored in x only */
-		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		  [dst_blk_y + offset_y + y_crop_blocks];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      } else {
-		/* At lower right corner, just transpose, no mirroring */
-		src_ptr = src_buffer[offset_x]
-		  [dst_blk_y + offset_y + y_crop_blocks];
-		for (i = 0; i < DCTSIZE; i++)
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	      }
-	    }
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION) compptr->h_samp_factor,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          } else {
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Block is within the mirrorable area. */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                    j++;
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                  }
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                    j++;
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                  }
+                }
+              } else {
+                /* Right-edge blocks are mirrored in y only */
+                src_ptr = src_buffer[offset_x]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                    j++;
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                  }
+                }
+              }
+            } else {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Bottom-edge blocks are mirrored in x only */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                }
+              } else {
+                /* At lower right corner, just transpose, no mirroring */
+                src_ptr = src_buffer[offset_x]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++)
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+              }
+            }
+          }
+        }
       }
     }
   }
@@ -773,7 +773,7 @@
   }
   *result = val;
   if (ptr == *strptr)
-    return FALSE;		/* oops, no digits */
+    return FALSE;               /* oops, no digits */
   *strptr = ptr;
   return TRUE;
 }
@@ -783,7 +783,7 @@
  * The routine returns TRUE if the spec string is valid, FALSE if not.
  *
  * The crop spec string should have the format
- *	<width>x<height>{+-}<xoffset>{+-}<yoffset>
+ *      <width>[f]x<height>[f]{+-}<xoffset>{+-}<yoffset>
  * where width, height, xoffset, and yoffset are unsigned integers.
  * Each of the elements can be omitted to indicate a default value.
  * (A weakness of this style is that it is not possible to omit xoffset
@@ -805,14 +805,22 @@
     /* fetch width */
     if (! jt_read_integer(&spec, &info->crop_width))
       return FALSE;
-    info->crop_width_set = JCROP_POS;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_width_set = JCROP_FORCE;
+    } else
+      info->crop_width_set = JCROP_POS;
   }
-  if (*spec == 'x' || *spec == 'X') {	
+  if (*spec == 'x' || *spec == 'X') {
     /* fetch height */
     spec++;
     if (! jt_read_integer(&spec, &info->crop_height))
       return FALSE;
-    info->crop_height_set = JCROP_POS;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_height_set = JCROP_FORCE;
+    } else
+      info->crop_height_set = JCROP_POS;
   }
   if (*spec == '+' || *spec == '-') {
     /* fetch xoffset */
@@ -880,7 +888,7 @@
 
 GLOBAL(boolean)
 jtransform_request_workspace (j_decompress_ptr srcinfo,
-			      jpeg_transform_info *info)
+                              jpeg_transform_info *info)
 {
   jvirt_barray_ptr *coef_arrays;
   boolean need_workspace, transpose_it;
@@ -913,18 +921,18 @@
   if (info->perfect) {
     if (info->num_components == 1) {
       if (!jtransform_perfect_transform(srcinfo->output_width,
-	  srcinfo->output_height,
-	  srcinfo->_min_DCT_h_scaled_size,
-	  srcinfo->_min_DCT_v_scaled_size,
-	  info->transform))
-	return FALSE;
+          srcinfo->output_height,
+          srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
     } else {
       if (!jtransform_perfect_transform(srcinfo->output_width,
-	  srcinfo->output_height,
-	  srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size,
-	  srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size,
-	  info->transform))
-	return FALSE;
+          srcinfo->output_height,
+          srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
     }
   }
 
@@ -945,9 +953,9 @@
       info->iMCU_sample_height = srcinfo->_min_DCT_h_scaled_size;
     } else {
       info->iMCU_sample_width =
-	srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
       info->iMCU_sample_height =
-	srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
     }
     break;
   default:
@@ -958,9 +966,9 @@
       info->iMCU_sample_height = srcinfo->_min_DCT_v_scaled_size;
     } else {
       info->iMCU_sample_width =
-	srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
       info->iMCU_sample_height =
-	srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
     }
     break;
   }
@@ -971,11 +979,11 @@
   if (info->crop) {
     /* Insert default values for unset crop parameters */
     if (info->crop_xoffset_set == JCROP_UNSET)
-      info->crop_xoffset = 0;	/* default to +0 */
+      info->crop_xoffset = 0;   /* default to +0 */
     if (info->crop_yoffset_set == JCROP_UNSET)
-      info->crop_yoffset = 0;	/* default to +0 */
+      info->crop_yoffset = 0;   /* default to +0 */
     if (info->crop_xoffset >= info->output_width ||
-	info->crop_yoffset >= info->output_height)
+        info->crop_yoffset >= info->output_height)
       ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
     if (info->crop_width_set == JCROP_UNSET)
       info->crop_width = info->output_width - info->crop_xoffset;
@@ -983,9 +991,9 @@
       info->crop_height = info->output_height - info->crop_yoffset;
     /* Ensure parameters are valid */
     if (info->crop_width <= 0 || info->crop_width > info->output_width ||
-	info->crop_height <= 0 || info->crop_height > info->output_height ||
-	info->crop_xoffset > info->output_width - info->crop_width ||
-	info->crop_yoffset > info->output_height - info->crop_height)
+        info->crop_height <= 0 || info->crop_height > info->output_height ||
+        info->crop_xoffset > info->output_width - info->crop_width ||
+        info->crop_yoffset > info->output_height - info->crop_height)
       ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
     /* Convert negative crop offsets into regular offsets */
     if (info->crop_xoffset_set == JCROP_NEG)
@@ -997,10 +1005,16 @@
     else
       yoffset = info->crop_yoffset;
     /* Now adjust so that upper left corner falls at an iMCU boundary */
-    info->output_width =
-      info->crop_width + (xoffset % info->iMCU_sample_width);
-    info->output_height =
-      info->crop_height + (yoffset % info->iMCU_sample_height);
+    if (info->crop_width_set == JCROP_FORCE)
+      info->output_width = info->crop_width;
+    else
+      info->output_width =
+        info->crop_width + (xoffset % info->iMCU_sample_width);
+    if (info->crop_height_set == JCROP_FORCE)
+      info->output_height = info->crop_height;
+    else
+      info->output_height =
+        info->crop_height + (yoffset % info->iMCU_sample_height);
     /* Save x/y offsets measured in iMCUs */
     info->x_crop_offset = xoffset / info->iMCU_sample_width;
     info->y_crop_offset = yoffset / info->iMCU_sample_height;
@@ -1079,30 +1093,30 @@
   if (need_workspace) {
     coef_arrays = (jvirt_barray_ptr *)
       (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
-		SIZEOF(jvirt_barray_ptr) * info->num_components);
+                SIZEOF(jvirt_barray_ptr) * info->num_components);
     width_in_iMCUs = (JDIMENSION)
       jdiv_round_up((long) info->output_width,
-		    (long) info->iMCU_sample_width);
+                    (long) info->iMCU_sample_width);
     height_in_iMCUs = (JDIMENSION)
       jdiv_round_up((long) info->output_height,
-		    (long) info->iMCU_sample_height);
+                    (long) info->iMCU_sample_height);
     for (ci = 0; ci < info->num_components; ci++) {
       compptr = srcinfo->comp_info + ci;
       if (info->num_components == 1) {
-	/* we're going to force samp factors to 1x1 in this case */
-	h_samp_factor = v_samp_factor = 1;
+        /* we're going to force samp factors to 1x1 in this case */
+        h_samp_factor = v_samp_factor = 1;
       } else if (transpose_it) {
-	h_samp_factor = compptr->v_samp_factor;
-	v_samp_factor = compptr->h_samp_factor;
+        h_samp_factor = compptr->v_samp_factor;
+        v_samp_factor = compptr->h_samp_factor;
       } else {
-	h_samp_factor = compptr->h_samp_factor;
-	v_samp_factor = compptr->v_samp_factor;
+        h_samp_factor = compptr->h_samp_factor;
+        v_samp_factor = compptr->v_samp_factor;
       }
       width_in_blocks = width_in_iMCUs * h_samp_factor;
       height_in_blocks = height_in_iMCUs * v_samp_factor;
       coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
-	((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
-	 width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor);
+        ((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
+         width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor);
     }
     info->workspace_coef_arrays = coef_arrays;
   } else
@@ -1146,11 +1160,11 @@
     qtblptr = dstinfo->quant_tbl_ptrs[tblno];
     if (qtblptr != NULL) {
       for (i = 0; i < DCTSIZE; i++) {
-	for (j = 0; j < i; j++) {
-	  qtemp = qtblptr->quantval[i*DCTSIZE+j];
-	  qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i];
-	  qtblptr->quantval[j*DCTSIZE+i] = qtemp;
-	}
+        for (j = 0; j < i; j++) {
+          qtemp = qtblptr->quantval[i*DCTSIZE+j];
+          qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i];
+          qtblptr->quantval[j*DCTSIZE+i] = qtemp;
+        }
       }
     }
   }
@@ -1162,9 +1176,10 @@
  * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
  */
 
+#if JPEG_LIB_VERSION >= 70
 LOCAL(void)
 adjust_exif_parameters (JOCTET FAR * data, unsigned int length,
-			JDIMENSION new_width, JDIMENSION new_height)
+                        JDIMENSION new_width, JDIMENSION new_height)
 {
   boolean is_motorola; /* Flag for byte order */
   unsigned int number_of_tags, tagnum;
@@ -1281,36 +1296,37 @@
     }
     if (tagnum == 0xA002 || tagnum == 0xA003) {
       if (tagnum == 0xA002)
-	new_value = new_width; /* ExifImageWidth Tag */
+        new_value = new_width; /* ExifImageWidth Tag */
       else
-	new_value = new_height; /* ExifImageHeight Tag */
+        new_value = new_height; /* ExifImageHeight Tag */
       if (is_motorola) {
-	data[offset+2] = 0; /* Format = unsigned long (4 octets) */
-	data[offset+3] = 4;
-	data[offset+4] = 0; /* Number Of Components = 1 */
-	data[offset+5] = 0;
-	data[offset+6] = 0;
-	data[offset+7] = 1;
-	data[offset+8] = 0;
-	data[offset+9] = 0;
-	data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF);
-	data[offset+11] = (JOCTET)(new_value & 0xFF);
+        data[offset+2] = 0; /* Format = unsigned long (4 octets) */
+        data[offset+3] = 4;
+        data[offset+4] = 0; /* Number Of Components = 1 */
+        data[offset+5] = 0;
+        data[offset+6] = 0;
+        data[offset+7] = 1;
+        data[offset+8] = 0;
+        data[offset+9] = 0;
+        data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset+11] = (JOCTET)(new_value & 0xFF);
       } else {
-	data[offset+2] = 4; /* Format = unsigned long (4 octets) */
-	data[offset+3] = 0;
-	data[offset+4] = 1; /* Number Of Components = 1 */
-	data[offset+5] = 0;
-	data[offset+6] = 0;
-	data[offset+7] = 0;
-	data[offset+8] = (JOCTET)(new_value & 0xFF);
-	data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF);
-	data[offset+10] = 0;
-	data[offset+11] = 0;
+        data[offset+2] = 4; /* Format = unsigned long (4 octets) */
+        data[offset+3] = 0;
+        data[offset+4] = 1; /* Number Of Components = 1 */
+        data[offset+5] = 0;
+        data[offset+6] = 0;
+        data[offset+7] = 0;
+        data[offset+8] = (JOCTET)(new_value & 0xFF);
+        data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset+10] = 0;
+        data[offset+11] = 0;
       }
     }
     offset += 12;
   } while (--number_of_tags);
 }
+#endif
 
 
 /* Adjust output image parameters as needed.
@@ -1326,9 +1342,9 @@
 
 GLOBAL(jvirt_barray_ptr *)
 jtransform_adjust_parameters (j_decompress_ptr srcinfo,
-			      j_compress_ptr dstinfo,
-			      jvirt_barray_ptr *src_coef_arrays,
-			      jpeg_transform_info *info)
+                              j_compress_ptr dstinfo,
+                              jvirt_barray_ptr *src_coef_arrays,
+                              jpeg_transform_info *info)
 {
   /* If force-to-grayscale is requested, adjust destination parameters */
   if (info->force_grayscale) {
@@ -1338,11 +1354,11 @@
      * isn't worth extra code space.  But we check it to avoid crashing.)
      */
     if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
-	  dstinfo->num_components == 3) ||
-	 (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
-	  dstinfo->num_components == 1)) &&
-	srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
-	srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
+          dstinfo->num_components == 3) ||
+         (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
+          dstinfo->num_components == 1)) &&
+        srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
+        srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
       /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
        * properly.  Among other things, it sets the target h_samp_factor &
        * v_samp_factor to 1, which typically won't match the source.
@@ -1407,11 +1423,11 @@
 #if JPEG_LIB_VERSION >= 70
     /* Adjust Exif image parameters */
     if (dstinfo->jpeg_width != srcinfo->image_width ||
-	dstinfo->jpeg_height != srcinfo->image_height)
+        dstinfo->jpeg_height != srcinfo->image_height)
       /* Align data segment to start of TIFF structure for parsing */
       adjust_exif_parameters(srcinfo->marker_list->data + 6,
-	srcinfo->marker_list->data_length - 6,
-	dstinfo->jpeg_width, dstinfo->jpeg_height);
+        srcinfo->marker_list->data_length - 6,
+        dstinfo->jpeg_width, dstinfo->jpeg_height);
 #endif
   }
 
@@ -1433,9 +1449,9 @@
 
 GLOBAL(void)
 jtransform_execute_transform (j_decompress_ptr srcinfo,
-			      j_compress_ptr dstinfo,
-			      jvirt_barray_ptr *src_coef_arrays,
-			      jpeg_transform_info *info)
+                              j_compress_ptr dstinfo,
+                              jvirt_barray_ptr *src_coef_arrays,
+                              jpeg_transform_info *info)
 {
   jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
 
@@ -1446,39 +1462,39 @@
   case JXFORM_NONE:
     if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
       do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
+              src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_FLIP_H:
     if (info->y_crop_offset != 0 || info->slow_hflip)
       do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		src_coef_arrays, dst_coef_arrays);
+                src_coef_arrays, dst_coef_arrays);
     else
       do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
-			src_coef_arrays);
+                        src_coef_arrays);
     break;
   case JXFORM_FLIP_V:
     do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
+              src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_TRANSPOSE:
     do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		 src_coef_arrays, dst_coef_arrays);
+                 src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_TRANSVERSE:
     do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		  src_coef_arrays, dst_coef_arrays);
+                  src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_ROT_90:
     do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
+              src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_ROT_180:
     do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	       src_coef_arrays, dst_coef_arrays);
+               src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_ROT_270:
     do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	       src_coef_arrays, dst_coef_arrays);
+               src_coef_arrays, dst_coef_arrays);
     break;
   }
 }
@@ -1506,8 +1522,8 @@
 
 GLOBAL(boolean)
 jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
-			     int MCU_width, int MCU_height,
-			     JXFORM_CODE transform)
+                             int MCU_width, int MCU_height,
+                             JXFORM_CODE transform)
 {
   boolean result = TRUE; /* initialize TRUE */
 
@@ -1570,7 +1586,7 @@
 
 GLOBAL(void)
 jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		       JCOPY_OPTION option)
+                       JCOPY_OPTION option)
 {
   jpeg_saved_marker_ptr marker;
 
@@ -1581,34 +1597,34 @@
    */
   for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
     if (dstinfo->write_JFIF_header &&
-	marker->marker == JPEG_APP0 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x4A &&
-	GETJOCTET(marker->data[1]) == 0x46 &&
-	GETJOCTET(marker->data[2]) == 0x49 &&
-	GETJOCTET(marker->data[3]) == 0x46 &&
-	GETJOCTET(marker->data[4]) == 0)
-      continue;			/* reject duplicate JFIF */
+        marker->marker == JPEG_APP0 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x4A &&
+        GETJOCTET(marker->data[1]) == 0x46 &&
+        GETJOCTET(marker->data[2]) == 0x49 &&
+        GETJOCTET(marker->data[3]) == 0x46 &&
+        GETJOCTET(marker->data[4]) == 0)
+      continue;                 /* reject duplicate JFIF */
     if (dstinfo->write_Adobe_marker &&
-	marker->marker == JPEG_APP0+14 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x41 &&
-	GETJOCTET(marker->data[1]) == 0x64 &&
-	GETJOCTET(marker->data[2]) == 0x6F &&
-	GETJOCTET(marker->data[3]) == 0x62 &&
-	GETJOCTET(marker->data[4]) == 0x65)
-      continue;			/* reject duplicate Adobe */
+        marker->marker == JPEG_APP0+14 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x41 &&
+        GETJOCTET(marker->data[1]) == 0x64 &&
+        GETJOCTET(marker->data[2]) == 0x6F &&
+        GETJOCTET(marker->data[3]) == 0x62 &&
+        GETJOCTET(marker->data[4]) == 0x65)
+      continue;                 /* reject duplicate Adobe */
 #ifdef NEED_FAR_POINTERS
     /* We could use jpeg_write_marker if the data weren't FAR... */
     {
       unsigned int i;
       jpeg_write_m_header(dstinfo, marker->marker, marker->data_length);
       for (i = 0; i < marker->data_length; i++)
-	jpeg_write_m_byte(dstinfo, marker->data[i]);
+        jpeg_write_m_byte(dstinfo, marker->data[i]);
     }
 #else
     jpeg_write_marker(dstinfo, marker->marker,
-		      marker->data, marker->data_length);
+                      marker->data, marker->data_length);
 #endif
   }
 }
diff --git a/transupp.h b/transupp.h
index 122d448..5b0e1bb 100644
--- a/transupp.h
+++ b/transupp.h
@@ -1,7 +1,7 @@
 /*
  * transupp.h
  *
- * Copyright (C) 1997-2009, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -19,7 +19,7 @@
 
 /* If you happen not to want the image transform support, disable it here */
 #ifndef TRANSFORMS_SUPPORTED
-#define TRANSFORMS_SUPPORTED 1		/* 0 disables transform code */
+#define TRANSFORMS_SUPPORTED 1          /* 0 disables transform code */
 #endif
 
 /*
@@ -57,6 +57,7 @@
  * corner up and/or left to make it so, simultaneously increasing the region
  * dimensions to keep the lower right crop corner unchanged.  (Thus, the
  * output image covers at least the requested region, but may cover more.)
+ * The adjustment of the region dimensions may be optionally disabled.
  *
  * We also provide a lossless-resize option, which is kind of a lossless-crop
  * operation in the DCT coefficient block domain - it discards higher-order
@@ -79,13 +80,13 @@
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jtransform_parse_crop_spec	jTrParCrop
-#define jtransform_request_workspace	jTrRequest
-#define jtransform_adjust_parameters	jTrAdjust
-#define jtransform_execute_transform	jTrExec
-#define jtransform_perfect_transform	jTrPerfect
-#define jcopy_markers_setup		jCMrkSetup
-#define jcopy_markers_execute		jCMrkExec
+#define jtransform_parse_crop_spec      jTrParCrop
+#define jtransform_request_workspace    jTrRequest
+#define jtransform_adjust_parameters    jTrAdjust
+#define jtransform_execute_transform    jTrExec
+#define jtransform_perfect_transform    jTrPerfect
+#define jcopy_markers_setup             jCMrkSetup
+#define jcopy_markers_execute           jCMrkExec
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 
@@ -94,25 +95,27 @@
  */
 
 typedef enum {
-	JXFORM_NONE,		/* no transformation */
-	JXFORM_FLIP_H,		/* horizontal flip */
-	JXFORM_FLIP_V,		/* vertical flip */
-	JXFORM_TRANSPOSE,	/* transpose across UL-to-LR axis */
-	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
-	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
-	JXFORM_ROT_180,		/* 180-degree rotation */
-	JXFORM_ROT_270		/* 270-degree clockwise (or 90 ccw) */
+  JXFORM_NONE,            /* no transformation */
+  JXFORM_FLIP_H,          /* horizontal flip */
+  JXFORM_FLIP_V,          /* vertical flip */
+  JXFORM_TRANSPOSE,       /* transpose across UL-to-LR axis */
+  JXFORM_TRANSVERSE,      /* transpose across UR-to-LL axis */
+  JXFORM_ROT_90,          /* 90-degree clockwise rotation */
+  JXFORM_ROT_180,         /* 180-degree rotation */
+  JXFORM_ROT_270          /* 270-degree clockwise (or 90 ccw) */
 } JXFORM_CODE;
 
 /*
  * Codes for crop parameters, which can individually be unspecified,
- * positive, or negative.  (Negative width or height makes no sense, though.)
+ * positive or negative for xoffset or yoffset,
+ * positive or forced for width or height.
  */
 
 typedef enum {
-	JCROP_UNSET,
-	JCROP_POS,
-	JCROP_NEG
+  JCROP_UNSET,
+  JCROP_POS,
+  JCROP_NEG,
+  JCROP_FORCE
 } JCROP_CODE;
 
 /*
@@ -123,11 +126,11 @@
 
 typedef struct {
   /* Options: set by caller */
-  JXFORM_CODE transform;	/* image transform operator */
-  boolean perfect;		/* if TRUE, fail if partial MCUs are requested */
-  boolean trim;			/* if TRUE, trim partial MCUs as needed */
-  boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
-  boolean crop;			/* if TRUE, crop source image */
+  JXFORM_CODE transform;        /* image transform operator */
+  boolean perfect;              /* if TRUE, fail if partial MCUs are requested */
+  boolean trim;                 /* if TRUE, trim partial MCUs as needed */
+  boolean force_grayscale;      /* if TRUE, convert color image to grayscale */
+  boolean crop;                 /* if TRUE, crop source image */
   boolean slow_hflip;  /* For best performance, the JXFORM_FLIP_H transform
                           normally modifies the source coefficients in place.
                           Setting this to TRUE will instead use a slower,
@@ -139,23 +142,23 @@
   /* Crop parameters: application need not set these unless crop is TRUE.
    * These can be filled in by jtransform_parse_crop_spec().
    */
-  JDIMENSION crop_width;	/* Width of selected region */
-  JCROP_CODE crop_width_set;
-  JDIMENSION crop_height;	/* Height of selected region */
-  JCROP_CODE crop_height_set;
-  JDIMENSION crop_xoffset;	/* X offset of selected region */
-  JCROP_CODE crop_xoffset_set;	/* (negative measures from right edge) */
-  JDIMENSION crop_yoffset;	/* Y offset of selected region */
-  JCROP_CODE crop_yoffset_set;	/* (negative measures from bottom edge) */
+  JDIMENSION crop_width;        /* Width of selected region */
+  JCROP_CODE crop_width_set;    /* (forced disables adjustment) */
+  JDIMENSION crop_height;       /* Height of selected region */
+  JCROP_CODE crop_height_set;   /* (forced disables adjustment) */
+  JDIMENSION crop_xoffset;      /* X offset of selected region */
+  JCROP_CODE crop_xoffset_set;  /* (negative measures from right edge) */
+  JDIMENSION crop_yoffset;      /* Y offset of selected region */
+  JCROP_CODE crop_yoffset_set;  /* (negative measures from bottom edge) */
 
   /* Internal workspace: caller should not touch these */
-  int num_components;		/* # of components in workspace */
+  int num_components;           /* # of components in workspace */
   jvirt_barray_ptr * workspace_coef_arrays; /* workspace for transformations */
-  JDIMENSION output_width;	/* cropped destination dimensions */
+  JDIMENSION output_width;      /* cropped destination dimensions */
   JDIMENSION output_height;
-  JDIMENSION x_crop_offset;	/* destination crop offsets measured in iMCUs */
+  JDIMENSION x_crop_offset;     /* destination crop offsets measured in iMCUs */
   JDIMENSION y_crop_offset;
-  int iMCU_sample_width;	/* destination iMCU size */
+  int iMCU_sample_width;        /* destination iMCU size */
   int iMCU_sample_height;
 } jpeg_transform_info;
 
@@ -164,34 +167,34 @@
 
 /* Parse a crop specification (written in X11 geometry style) */
 EXTERN(boolean) jtransform_parse_crop_spec
-	JPP((jpeg_transform_info *info, const char *spec));
+        JPP((jpeg_transform_info *info, const char *spec));
 /* Request any required workspace */
 EXTERN(boolean) jtransform_request_workspace
-	JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
+        JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
 /* Adjust output image parameters */
 EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
+        JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+             jvirt_barray_ptr *src_coef_arrays,
+             jpeg_transform_info *info));
 /* Execute the actual transformation, if any */
 EXTERN(void) jtransform_execute_transform
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
+        JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+             jvirt_barray_ptr *src_coef_arrays,
+             jpeg_transform_info *info));
 /* Determine whether lossless transformation is perfectly
  * possible for a specified image and transformation.
  */
 EXTERN(boolean) jtransform_perfect_transform
-	JPP((JDIMENSION image_width, JDIMENSION image_height,
-	     int MCU_width, int MCU_height,
-	     JXFORM_CODE transform));
+        JPP((JDIMENSION image_width, JDIMENSION image_height,
+             int MCU_width, int MCU_height,
+             JXFORM_CODE transform));
 
 /* jtransform_execute_transform used to be called
  * jtransform_execute_transformation, but some compilers complain about
  * routine names that long.  This macro is here to avoid breaking any
  * old source code that uses the original name...
  */
-#define jtransform_execute_transformation	jtransform_execute_transform
+#define jtransform_execute_transformation       jtransform_execute_transform
 
 #endif /* TRANSFORMS_SUPPORTED */
 
@@ -201,17 +204,17 @@
  */
 
 typedef enum {
-	JCOPYOPT_NONE,		/* copy no optional markers */
-	JCOPYOPT_COMMENTS,	/* copy only comment (COM) markers */
-	JCOPYOPT_ALL		/* copy all optional markers */
+  JCOPYOPT_NONE,          /* copy no optional markers */
+  JCOPYOPT_COMMENTS,      /* copy only comment (COM) markers */
+  JCOPYOPT_ALL            /* copy all optional markers */
 } JCOPY_OPTION;
 
-#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS	/* recommended default */
+#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS     /* recommended default */
 
 /* Setup decompression object to save desired markers in memory */
 EXTERN(void) jcopy_markers_setup
-	JPP((j_decompress_ptr srcinfo, JCOPY_OPTION option));
+        JPP((j_decompress_ptr srcinfo, JCOPY_OPTION option));
 /* Copy markers saved in the given source object to the destination object */
 EXTERN(void) jcopy_markers_execute
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     JCOPY_OPTION option));
+        JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+             JCOPY_OPTION option));
diff --git a/turbojpeg-jni.c b/turbojpeg-jni.c
index 1ff9bba..77ca28d 100644
--- a/turbojpeg-jni.c
+++ b/turbojpeg-jni.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011-2014 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -29,6 +29,9 @@
 #include <stdlib.h>
 #include <string.h>
 #include "turbojpeg.h"
+#ifdef WIN32
+#include "tjutil.h"
+#endif
 #include <jni.h>
 #include "java/org_libjpegturbo_turbojpeg_TJCompressor.h"
 #include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h"
@@ -41,15 +44,20 @@
 	goto bailout;  \
 }
 
-#define bailif0(f) {if(!(f)) goto bailout;}
+#define bailif0(f) {if(!(f)) {  \
+	char temps[80];  \
+	snprintf(temps, 80, "Unexpected NULL condition in line %d", __LINE__);  \
+	_throw(temps);  \
+}}
 
 #define gethandle()  \
 	jclass _cls=(*env)->GetObjectClass(env, obj);  \
 	jfieldID _fid;  \
 	if(!_cls) goto bailout;  \
 	bailif0(_fid=(*env)->GetFieldID(env, _cls, "handle", "J"));  \
-	handle=(tjhandle)(jlong)(*env)->GetLongField(env, obj, _fid);  \
+	handle=(tjhandle)(size_t)(*env)->GetLongField(env, obj, _fid);  \
 
+/* TurboJPEG 1.2.x: TJ::bufSize() */
 JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize
 	(JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp)
 {
@@ -60,6 +68,7 @@
 	return retval;
 }
 
+/* TurboJPEG 1.2.x: TJ::bufSizeYUV() */
 JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV
 	(JNIEnv *env, jclass cls, jint width, jint height, jint subsamp)
 {
@@ -70,6 +79,7 @@
 	return retval;
 }
 
+/* TurboJPEG 1.2.x: TJCompressor::init() */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init
 	(JNIEnv *env, jobject obj)
 {
@@ -82,101 +92,111 @@
 
 	bailif0(cls=(*env)->GetObjectClass(env, obj));
 	bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
-	(*env)->SetLongField(env, obj, fid, (jlong)handle);
+	(*env)->SetLongField(env, obj, fid, (size_t)handle);
 
 	bailout:
 	return;
 }
 
+static jint TJCompressor_compress
+	(JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y,
+		jint width, jint pitch, jint height, jint pf, jbyteArray dst,
+		jint jpegSubsamp, jint jpegQual, jint flags)
+{
+	tjhandle handle=0;
+	unsigned long jpegSize=0;
+	jsize arraySize=0, actualPitch;
+	unsigned char *srcBuf=NULL, *jpegBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
+		|| pitch<0)
+		_throw("Invalid argument in compress()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+
+	actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch;
+	arraySize=(y+height-1)*actualPitch + (x+width)*tjPixelSize[pf];
+	if((*env)->GetArrayLength(env, src)*srcElementSize<arraySize)
+		_throw("Source buffer is not large enough");
+	jpegSize=tjBufSize(width, height, jpegSubsamp);
+	if((*env)->GetArrayLength(env, dst)<(jsize)jpegSize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjCompress2(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], width,
+		pitch, height, pf, &jpegBuf, &jpegSize, jpegSubsamp, jpegQual,
+		flags|TJFLAG_NOREALLOC)==-1)
+		_throw(tjGetErrorStr());
+
+	bailout:
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return (jint)jpegSize;
+}
+
+/* TurboJPEG 1.3.x: TJCompressor::compress() byte source */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width,
+		jint pitch, jint height, jint pf, jbyteArray dst, jint jpegSubsamp,
+		jint jpegQual, jint flags)
+{
+	return TJCompressor_compress(env, obj, src, 1, x, y, width, pitch, height,
+		pf, dst, jpegSubsamp, jpegQual, flags);
+}
+
+/* TurboJPEG 1.2.x: TJCompressor::compress() byte source */
 JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII
 	(JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
 		jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual,
 		jint flags)
 {
-	tjhandle handle=0;
-	unsigned long jpegSize=0;  jsize arraySize=0;
-	unsigned char *srcBuf=NULL, *jpegBuf=NULL;
-
-	gethandle();
-
-	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
-		|| pitch<0)
-		_throw("Invalid argument in compress()");
-	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
-		_throw("Mismatch between Java and C API");
-
-	arraySize=(pitch==0)? width*tjPixelSize[pf]*height:pitch*height;
-	if((*env)->GetArrayLength(env, src)<arraySize)
-		_throw("Source buffer is not large enough");
-	jpegSize=tjBufSize(width, height, jpegSubsamp);
-	if((*env)->GetArrayLength(env, dst)<(jsize)jpegSize)
-		_throw("Destination buffer is not large enough");
-
-	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
-	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
-
-	if(tjCompress2(handle, srcBuf, width, pitch, height, pf, &jpegBuf,
-		&jpegSize, jpegSubsamp, jpegQual, flags|TJFLAG_NOREALLOC)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
-		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
-		jpegBuf=srcBuf=NULL;
-		_throw(tjGetErrorStr());
-	}
-
-	bailout:
-	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
-	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
-	return (jint)jpegSize;
+	return TJCompressor_compress(env, obj, src, 1, 0, 0, width, pitch, height,
+		pf, dst, jpegSubsamp, jpegQual, flags);
 }
 
-JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII
-	(JNIEnv *env, jobject obj, jintArray src, jint width, jint pitch,
-		jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual,
-		jint flags)
+/* TurboJPEG 1.3.x: TJCompressor::compress() int source */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII
+	(JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width,
+		jint stride, jint height, jint pf, jbyteArray dst, jint jpegSubsamp,
+		jint jpegQual, jint flags)
 {
-	tjhandle handle=0;
-	unsigned long jpegSize=0;  jsize arraySize=0;
-	unsigned char *srcBuf=NULL, *jpegBuf=NULL;
-
-	gethandle();
-
-	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
-		|| pitch<0)
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
 		_throw("Invalid argument in compress()");
-	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
-		_throw("Mismatch between Java and C API");
 	if(tjPixelSize[pf]!=sizeof(jint))
 		_throw("Pixel format must be 32-bit when compressing from an integer buffer.");
 
-	arraySize=(pitch==0)? width*height:pitch*height;
-	if((*env)->GetArrayLength(env, src)<arraySize)
-		_throw("Source buffer is not large enough");
-	jpegSize=tjBufSize(width, height, jpegSubsamp);
-	if((*env)->GetArrayLength(env, dst)<(jsize)jpegSize)
-		_throw("Destination buffer is not large enough");
-
-	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
-	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
-
-	if(tjCompress2(handle, srcBuf, width, pitch*sizeof(jint), height, pf,
-		&jpegBuf, &jpegSize, jpegSubsamp, jpegQual, flags|TJFLAG_NOREALLOC)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
-		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
-		jpegBuf=srcBuf=NULL;
-		_throw(tjGetErrorStr());
-	}
+	return TJCompressor_compress(env, obj, src, sizeof(jint), x, y, width,
+		stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags);
 
 	bailout:
-	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
-	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
-	return (jint)jpegSize;
+	return 0;
 }
 
-JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII
-	(JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
-		jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
+/* TurboJPEG 1.2.x: TJCompressor::compress() int source */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII
+	(JNIEnv *env, jobject obj, jintArray src, jint width, jint stride,
+		jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual,
+		jint flags)
+{
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in compress()");
+	if(tjPixelSize[pf]!=sizeof(jint))
+		_throw("Pixel format must be 32-bit when compressing from an integer buffer.");
+
+	return TJCompressor_compress(env, obj, src, sizeof(jint), 0, 0, width,
+		stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags);
+
+	bailout:
+	return 0;
+}
+
+static void TJCompressor_encodeYUV
+	(JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint width,
+		jint pitch, jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
 {
 	tjhandle handle=0;
 	jsize arraySize=0;
@@ -191,7 +211,7 @@
 		_throw("Mismatch between Java and C API");
 
 	arraySize=(pitch==0)? width*tjPixelSize[pf]*height:pitch*height;
-	if((*env)->GetArrayLength(env, src)<arraySize)
+	if((*env)->GetArrayLength(env, src)*srcElementSize<arraySize)
 		_throw("Source buffer is not large enough");
 	if((*env)->GetArrayLength(env, dst)
 		<(jsize)tjBufSizeYUV(width, height, subsamp))
@@ -202,12 +222,7 @@
 
 	if(tjEncodeYUV2(handle, srcBuf, width, pitch, height, pf, dstBuf, subsamp,
 		flags)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
-		dstBuf=srcBuf=NULL;
 		_throw(tjGetErrorStr());
-	}
 
 	bailout:
 	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
@@ -215,49 +230,33 @@
 	return;
 }
 
-JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII
-	(JNIEnv *env, jobject obj, jintArray src, jint width, jint pitch,
+/* TurboJPEG 1.2.x: TJCompressor::encodeYUV() byte source */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
 		jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
 {
-	tjhandle handle=0;
-	jsize arraySize=0;
-	unsigned char *srcBuf=NULL, *dstBuf=NULL;
+	TJCompressor_encodeYUV(env, obj, src, 1, width, pitch, height, pf, dst,
+		subsamp, flags);
+}
 
-	gethandle();
-
-	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
-		|| pitch<0)
-		_throw("Invalid argument in compress()");
-	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
-		_throw("Mismatch between Java and C API");
+/* TurboJPEG 1.2.x: TJCompressor::encodeYUV() int source */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII
+	(JNIEnv *env, jobject obj, jintArray src, jint width, jint stride,
+		jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
+{
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in encodeYUV()");
 	if(tjPixelSize[pf]!=sizeof(jint))
 		_throw("Pixel format must be 32-bit when encoding from an integer buffer.");
 
-	arraySize=(pitch==0)? width*height:pitch*height;
-	if((*env)->GetArrayLength(env, src)<arraySize)
-		_throw("Source buffer is not large enough");
-	if((*env)->GetArrayLength(env, dst)
-		<(jsize)tjBufSizeYUV(width, height, subsamp))
-		_throw("Destination buffer is not large enough");
-
-	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
-	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
-
-	if(tjEncodeYUV2(handle, srcBuf, width, pitch*sizeof(jint), height, pf,
-		dstBuf, subsamp, flags)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
-		dstBuf=srcBuf=NULL;
-		_throw(tjGetErrorStr());
-	}
+	TJCompressor_encodeYUV(env, obj, src, sizeof(jint), width,
+		stride*sizeof(jint), height, pf, dst, subsamp, flags);
 
 	bailout:
-	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
 	return;
 }
 
+/* TurboJPEG 1.2.x: TJCompressor::destroy() */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy
 	(JNIEnv *env, jobject obj)
 {
@@ -272,6 +271,7 @@
 	return;
 }
 
+/* TurboJPEG 1.2.x: TJDecompressor::init() */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init
 	(JNIEnv *env, jobject obj)
 {
@@ -283,16 +283,17 @@
 
 	bailif0(cls=(*env)->GetObjectClass(env, obj));
 	bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
-	(*env)->SetLongField(env, obj, fid, (jlong)handle);
+	(*env)->SetLongField(env, obj, fid, (size_t)handle);
 
 	bailout:
 	return;
 }
 
+/* TurboJPEG 1.2.x: TJDecompressor::getScalingFactors() */
 JNIEXPORT jobjectArray JNICALL Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors
 	(JNIEnv *env, jclass cls)
 {
-  jclass sfcls=NULL;  jfieldID fid=0;
+	jclass sfcls=NULL;  jfieldID fid=0;
 	tjscalingfactor *sf=NULL;  int n=0, i;
 	jobject sfobj=NULL;
 	jobjectArray sfjava=NULL;
@@ -317,6 +318,7 @@
 	return sfjava;
 }
 
+/* TurboJPEG 1.2.x: TJDecompressor::decompressHeader() */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader
 	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize)
 {
@@ -331,12 +333,10 @@
 
 	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
 
-	if(tjDecompressHeader2(handle, jpegBuf, (unsigned long)jpegSize, 
+	if(tjDecompressHeader2(handle, jpegBuf, (unsigned long)jpegSize,
 		&width, &height, &jpegSubsamp)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
 		_throw(tjGetErrorStr());
-	}
+
 	(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);  jpegBuf=NULL;
 
 	bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I"));
@@ -347,89 +347,101 @@
 	(*env)->SetIntField(env, obj, _fid, height);
 
 	bailout:
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
 	return;
 }
 
+static void TJDecompressor_decompress
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jarray dst,
+		jint dstElementSize, jint x, jint y, jint width, jint pitch, jint height,
+		jint pf, jint flags)
+{
+	tjhandle handle=0;
+	jsize arraySize=0, actualPitch;
+	unsigned char *jpegBuf=NULL, *dstBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in decompress()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+
+	if((*env)->GetArrayLength(env, src)<jpegSize)
+		_throw("Source buffer is not large enough");
+	actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch;
+	arraySize=(y+height-1)*actualPitch + (x+width)*tjPixelSize[pf];
+	if((*env)->GetArrayLength(env, dst)*dstElementSize<arraySize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize,
+		&dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf,
+		flags)==-1)
+		_throw(tjGetErrorStr());
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+	return;
+}
+
+/* TurboJPEG 1.3.x: TJDecompressor::decompress() byte destination */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
+		jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags)
+{
+	TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, x, y, width,
+		pitch, height, pf, flags);
+}
+
+/* TurboJPEG 1.2.x: TJDecompressor::decompress() byte destination */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII
 	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
 		jint width, jint pitch, jint height, jint pf, jint flags)
 {
-	tjhandle handle=0;
-	jsize arraySize=0;
-	unsigned char *jpegBuf=NULL, *dstBuf=NULL;
-
-	gethandle();
-
-	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
-		_throw("Invalid argument in decompress()");
-	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
-		_throw("Mismatch between Java and C API");
-
-	if((*env)->GetArrayLength(env, src)<jpegSize)
-		_throw("Source buffer is not large enough");
-	arraySize=(pitch==0)? width*tjPixelSize[pf]*height:pitch*height;
-	if((*env)->GetArrayLength(env, dst)<arraySize)
-		_throw("Destination buffer is not large enough");
-
-	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
-	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
-
-	if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize, dstBuf, width,
-		pitch, height, pf, flags)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
-		dstBuf=jpegBuf=NULL;
-		_throw(tjGetErrorStr());
-	}
-
-	bailout:
-	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
-	return;
+	TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 0, 0, width,
+		pitch, height, pf, flags);
 }
 
-JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII
+/* TurboJPEG 1.3.x: TJDecompressor::decompress() int destination */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII
 	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst,
-		jint width, jint pitch, jint height, jint pf, jint flags)
+		jint x, jint y, jint width, jint stride, jint height, jint pf, jint flags)
 {
-	tjhandle handle=0;
-	jsize arraySize=0;
-	unsigned char *jpegBuf=NULL, *dstBuf=NULL;
-
-	gethandle();
-
 	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
 		_throw("Invalid argument in decompress()");
-	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
-		_throw("Mismatch between Java and C API");
 	if(tjPixelSize[pf]!=sizeof(jint))
 		_throw("Pixel format must be 32-bit when decompressing to an integer buffer.");
 
-	if((*env)->GetArrayLength(env, src)<jpegSize)
-		_throw("Source buffer is not large enough");
-	arraySize=(pitch==0)? width*height:pitch*height;
-	if((*env)->GetArrayLength(env, dst)<arraySize)
-		_throw("Destination buffer is not large enough");
-
-	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
-	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
-
-	if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize, dstBuf, width,
-		pitch*sizeof(jint), height, pf, flags)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
-		dstBuf=jpegBuf=NULL;
-		_throw(tjGetErrorStr());
-	}
+	TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), x, y,
+		width, stride*sizeof(jint), height, pf, flags);
 
 	bailout:
-	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
 	return;
 }
 
+/* TurboJPEG 1.2.x: TJDecompressor::decompress() int destination */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst,
+		jint width, jint stride, jint height, jint pf, jint flags)
+{
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in decompress()");
+	if(tjPixelSize[pf]!=sizeof(jint))
+		_throw("Pixel format must be 32-bit when decompressing to an integer buffer.");
+
+	TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), 0, 0,
+		width, stride*sizeof(jint), height, pf, flags);
+
+	bailout:
+	return;
+
+}
+
+/* TurboJPEG 1.2.x: TJDecompressor::decompressToYUV() */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV
 	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
 		jint flags)
@@ -457,12 +469,7 @@
 
 	if(tjDecompressToYUV(handle, jpegBuf, (unsigned long)jpegSize, dstBuf,
 		flags)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
-		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
-		dstBuf=jpegBuf=NULL;
 		_throw(tjGetErrorStr());
-	}
 
 	bailout:
 	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
@@ -470,6 +477,7 @@
 	return;
 }
 
+/* TurboJPEG 1.2.x: TJTransformer::init() */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init
 	(JNIEnv *env, jobject obj)
 {
@@ -481,7 +489,7 @@
 
 	bailif0(cls=(*env)->GetObjectClass(env, obj));
 	bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
-	(*env)->SetLongField(env, obj, fid, (jlong)handle);
+	(*env)->SetLongField(env, obj, fid, (size_t)handle);
 
 	bailout:
 	return;
@@ -501,20 +509,20 @@
 	JNICustomFilterParams *params=(JNICustomFilterParams *)transform->data;
 	JNIEnv *env=params->env;
 	jobject tobj=params->tobj, cfobj=params->cfobj;
-  jobject arrayRegionObj, planeRegionObj, bufobj, borobj;
+	jobject arrayRegionObj, planeRegionObj, bufobj, borobj;
 	jclass cls;  jmethodID mid;  jfieldID fid;
 
 	bailif0(bufobj=(*env)->NewDirectByteBuffer(env, coeffs,
 		sizeof(short)*arrayRegion.w*arrayRegion.h));
 	bailif0(cls=(*env)->FindClass(env, "java/nio/ByteOrder"));
-  bailif0(mid=(*env)->GetStaticMethodID(env, cls, "nativeOrder",
+	bailif0(mid=(*env)->GetStaticMethodID(env, cls, "nativeOrder",
 		"()Ljava/nio/ByteOrder;"));
 	bailif0(borobj=(*env)->CallStaticObjectMethod(env, cls, mid));
 	bailif0(cls=(*env)->GetObjectClass(env, bufobj));
 	bailif0(mid=(*env)->GetMethodID(env, cls, "order",
 		"(Ljava/nio/ByteOrder;)Ljava/nio/ByteBuffer;"));
 	(*env)->CallObjectMethod(env, bufobj, mid, borobj);
-  bailif0(mid=(*env)->GetMethodID(env, cls, "asShortBuffer",
+	bailif0(mid=(*env)->GetMethodID(env, cls, "asShortBuffer",
 		"()Ljava/nio/ShortBuffer;"));
 	bailif0(bufobj=(*env)->CallObjectMethod(env, bufobj, mid));
 
@@ -551,6 +559,7 @@
 	return -1;
 }
 
+/* TurboJPEG 1.2.x: TJTransformer::transform() */
 JNIEXPORT jintArray JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_transform
 	(JNIEnv *env, jobject obj, jbyteArray jsrcBuf, jint jpegSize,
 		jobjectArray dstobjs, jobjectArray tobjs, jint flags)
@@ -628,7 +637,6 @@
 		}
 	}
 
-	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0));
 	for(i=0; i<n; i++)
 	{
 		int w=jpegWidth, h=jpegHeight;
@@ -638,28 +646,29 @@
 		if((unsigned long)(*env)->GetArrayLength(env, jdstBufs[i])
 			<tjBufSize(w, h, jpegSubsamp))
 			_throw("Destination buffer is not large enough");
-		bailif0(dstBufs[i]=(*env)->GetPrimitiveArrayCritical(env, jdstBufs[i], 0));
 	}
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0));
+	for(i=0; i<n; i++)
+		bailif0(dstBufs[i]=(*env)->GetPrimitiveArrayCritical(env, jdstBufs[i], 0));
 
 	if(tjTransform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t,
 		flags|TJFLAG_NOREALLOC)==-1)
-	{
-		(*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0);
-		jpegBuf=NULL;
-		for(i=0; i<n; i++)
-		{
-			(*env)->ReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0);
-			dstBufs[i]=NULL;
-		}
 		_throw(tjGetErrorStr());
+
+	for(i=0; i<n; i++)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0);
+		dstBufs[i]=NULL;
 	}
+	(*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0);
+	jpegBuf=NULL;
 
 	jdstSizes=(*env)->NewIntArray(env, n);
 	bailif0(dstSizesi=(*env)->GetIntArrayElements(env, jdstSizes, 0));
 	for(i=0; i<n; i++) dstSizesi[i]=(int)dstSizes[i];
 
 	bailout:
-	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0);
+	if(dstSizesi) (*env)->ReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0);
 	if(dstBufs)
 	{
 		for(i=0; i<n; i++)
@@ -669,13 +678,14 @@
 		}
 		free(dstBufs);
 	}
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0);
 	if(jdstBufs) free(jdstBufs);
 	if(dstSizes) free(dstSizes);
-	if(dstSizesi) (*env)->ReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0);
 	if(t) free(t);
 	return jdstSizes;
 }
 
+/* TurboJPEG 1.2.x: TJDecompressor::destroy() */
 JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy
 	(JNIEnv *env, jobject obj)
 {
diff --git a/turbojpeg-mapfile.jni b/turbojpeg-mapfile.jni
index 9c046ce..115f076 100755
--- a/turbojpeg-mapfile.jni
+++ b/turbojpeg-mapfile.jni
@@ -49,7 +49,16 @@
 		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII;
 		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII;
 		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV;
-		Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy;		
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy;
 		Java_org_libjpegturbo_turbojpeg_TJTransformer_init;
 		Java_org_libjpegturbo_turbojpeg_TJTransformer_transform;
 } TURBOJPEG_1.1;
+
+TURBOJPEG_1.3
+{
+	global:
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII;
+} TURBOJPEG_1.2;
diff --git a/turbojpeg.c b/turbojpeg.c
index a507565..33ae875 100644
--- a/turbojpeg.c
+++ b/turbojpeg.c
@@ -26,7 +26,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* TurboJPEG/OSS:  this implements the TurboJPEG API using libjpeg-turbo */
+/* TurboJPEG/LJT:  this implements the TurboJPEG API using libjpeg or
+   libjpeg-turbo */
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -92,10 +93,22 @@
 	JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270
 };
 
-#define NUMSF 4
+#define NUMSF 16
 static const tjscalingfactor sf[NUMSF]={
+	{2, 1},
+	{15, 8},
+	{7, 4},
+	{13, 8},
+	{3, 2},
+	{11, 8},
+	{5, 4},
+	{9, 8},
 	{1, 1},
+	{7, 8},
+	{3, 4},
+	{5, 8},
 	{1, 2},
+	{3, 8},
 	{1, 4},
 	{1, 8}
 };
@@ -107,6 +120,16 @@
 	if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle");  \
 		return -1;}  \
 	cinfo=&this->cinfo;  dinfo=&this->dinfo;
+#define getcinstance(handle) tjinstance *this=(tjinstance *)handle;  \
+	j_compress_ptr cinfo=NULL;  \
+	if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle");  \
+		return -1;}  \
+	cinfo=&this->cinfo;
+#define getdinstance(handle) tjinstance *this=(tjinstance *)handle;  \
+	j_decompress_ptr dinfo=NULL;  \
+	if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle");  \
+		return -1;}  \
+	dinfo=&this->dinfo;
 
 static int getPixelFormat(int pixelSize, int flags)
 {
@@ -160,12 +183,17 @@
 			cinfo->in_color_space=JCS_EXT_XBGR;  break;
 		#else
 		case TJPF_RGB:
-			if(RGB_RED==0 && RGB_GREEN==1 && RGB_BLUE==2 && RGB_PIXELSIZE==3)
-			{
-				cinfo->in_color_space=JCS_RGB;  break;
-			}
-		default:
-			_throw("Unsupported pixel format");
+		case TJPF_BGR:
+		case TJPF_RGBX:
+		case TJPF_BGRX:
+		case TJPF_XRGB:
+		case TJPF_XBGR:
+		case TJPF_RGBA:
+		case TJPF_BGRA:
+		case TJPF_ARGB:
+		case TJPF_ABGR:
+			cinfo->in_color_space=JCS_RGB;  pixelFormat=TJPF_RGB;
+			break;
 		#endif
 	}
 
@@ -189,9 +217,6 @@
 	cinfo->comp_info[1].v_samp_factor=1;
 	cinfo->comp_info[2].v_samp_factor=1;
 
-	#if JCS_EXTENSIONS!=1
-	bailout:
-	#endif
 	return retval;
 }
 
@@ -229,10 +254,16 @@
 		#endif
 		#else
 		case TJPF_RGB:
-			if(RGB_RED==0 && RGB_GREEN==1 && RGB_BLUE==2 && RGB_PIXELSIZE==3)
-			{
-				dinfo->out_color_space=JCS_RGB;  break;
-			}
+		case TJPF_BGR:
+		case TJPF_RGBX:
+		case TJPF_BGRX:
+		case TJPF_XRGB:
+		case TJPF_XBGR:
+		case TJPF_RGBA:
+		case TJPF_BGRA:
+		case TJPF_ARGB:
+		case TJPF_ABGR:
+			dinfo->out_color_space=JCS_RGB;  break;
 		#endif
 		default:
 			_throw("Unsupported pixel format");
@@ -248,6 +279,14 @@
 static int getSubsamp(j_decompress_ptr dinfo)
 {
 	int retval=-1, i, k;
+
+	/* The sampling factors actually have no meaning with grayscale JPEG files,
+	   and in fact it's possible to generate grayscale JPEGs with sampling
+	   factors > 1 (even though those sampling factors are ignored by the
+	   decompressor.)  Thus, we need to treat grayscale as a special case. */
+	if(dinfo->num_components==1 && dinfo->jpeg_color_space==JCS_GRAYSCALE)
+		return TJSAMP_GRAY;
+
 	for(i=0; i<NUMSUBOPT; i++)
 	{
 		if(dinfo->num_components==pixelsize[i])
@@ -273,6 +312,149 @@
 }
 
 
+#ifndef JCS_EXTENSIONS
+
+/* Conversion functions to emulate the colorspace extensions.  This allows the
+   TurboJPEG wrapper to be used with libjpeg */
+
+#define TORGB(PS, ROFFSET, GOFFSET, BOFFSET) {  \
+	int rowPad=pitch-width*PS;  \
+	while(height--)  \
+	{  \
+		unsigned char *endOfRow=src+width*PS;  \
+		while(src<endOfRow)  \
+		{  \
+			dst[RGB_RED]=src[ROFFSET];  \
+			dst[RGB_GREEN]=src[GOFFSET];  \
+			dst[RGB_BLUE]=src[BOFFSET];  \
+			dst+=RGB_PIXELSIZE;  src+=PS;  \
+		}  \
+		src+=rowPad;  \
+	}  \
+}
+
+static unsigned char *toRGB(unsigned char *src, int width, int pitch,
+	int height, int pixelFormat, unsigned char *dst)
+{
+	unsigned char *retval=src;
+	switch(pixelFormat)
+	{
+		case TJPF_RGB:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=3
+			retval=dst;  TORGB(3, 0, 1, 2);
+			#endif
+			break;
+		case TJPF_BGR:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=3
+			retval=dst;  TORGB(3, 2, 1, 0);
+			#endif
+			break;
+		case TJPF_RGBX:
+		case TJPF_RGBA:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 0, 1, 2);
+			#endif
+			break;
+		case TJPF_BGRX:
+		case TJPF_BGRA:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 2, 1, 0);
+			#endif
+			break;
+		case TJPF_XRGB:
+		case TJPF_ARGB:
+			#if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 1, 2, 3);
+			#endif
+			break;
+		case TJPF_XBGR:
+		case TJPF_ABGR:
+			#if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 3, 2, 1);
+			#endif
+			break;
+	}
+	return retval;
+}
+
+#define FROMRGB(PS, ROFFSET, GOFFSET, BOFFSET, SETALPHA) {  \
+	int rowPad=pitch-width*PS;  \
+	while(height--)  \
+	{  \
+		unsigned char *endOfRow=dst+width*PS;  \
+		while(dst<endOfRow)  \
+		{  \
+			dst[ROFFSET]=src[RGB_RED];  \
+			dst[GOFFSET]=src[RGB_GREEN];  \
+			dst[BOFFSET]=src[RGB_BLUE];  \
+			SETALPHA  \
+			dst+=PS;  src+=RGB_PIXELSIZE;  \
+		}  \
+		dst+=rowPad;  \
+	}  \
+}
+
+static void fromRGB(unsigned char *src, unsigned char *dst, int width,
+	int pitch, int height, int pixelFormat)
+{
+	switch(pixelFormat)
+	{
+		case TJPF_RGB:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=3
+			FROMRGB(3, 0, 1, 2,);
+			#endif
+			break;
+		case TJPF_BGR:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=3
+			FROMRGB(3, 2, 1, 0,);
+			#endif
+			break;
+		case TJPF_RGBX:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 0, 1, 2,);
+			#endif
+			break;
+		case TJPF_RGBA:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 0, 1, 2, dst[3]=0xFF;);
+			#endif
+			break;
+		case TJPF_BGRX:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 2, 1, 0,);
+			#endif
+			break;
+		case TJPF_BGRA:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 2, 1, 0, dst[3]=0xFF;);  return;
+			#endif
+			break;
+		case TJPF_XRGB:
+			#if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 1, 2, 3,);  return;
+			#endif
+			break;
+		case TJPF_ARGB:
+			#if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 1, 2, 3, dst[0]=0xFF;);  return;
+			#endif
+			break;
+		case TJPF_XBGR:
+			#if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 3, 2, 1,);  return;
+			#endif
+			break;
+		case TJPF_ABGR:
+			#if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 3, 2, 1, dst[0]=0xFF;);  return;
+			#endif
+			break;
+	}
+}
+
+#endif
+
+
 /* General API functions */
 
 DLLEXPORT char* DLLCALL tjGetErrorStr(void)
@@ -355,9 +537,9 @@
 	if(width<1 || height<1 || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT)
 		_throw("tjBufSize(): Invalid argument");
 
-	// This allows for rare corner cases in which a JPEG image can actually be
-	// larger than the uncompressed input (we wouldn't mention it if it hadn't
-	// happened before.)
+	/* This allows for rare corner cases in which a JPEG image can actually be
+	   larger than the uncompressed input (we wouldn't mention it if it hadn't
+	   happened before.) */
 	mcuw=tjMCUWidth[jpegSubsamp];
 	mcuh=tjMCUHeight[jpegSubsamp];
 	chromasf=jpegSubsamp==TJSAMP_GRAY? 0: 4*64/(mcuw*mcuh);
@@ -367,16 +549,15 @@
 	return retval;
 }
 
-
 DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height)
 {
 	unsigned long retval=0;
 	if(width<1 || height<1)
 		_throw("TJBUFSIZE(): Invalid argument");
 
-	// This allows for rare corner cases in which a JPEG image can actually be
-	// larger than the uncompressed input (we wouldn't mention it if it hadn't
-	// happened before.)
+	/* This allows for rare corner cases in which a JPEG image can actually be
+	   larger than the uncompressed input (we wouldn't mention it if it hadn't
+	   happened before.) */
 	retval=PAD(width, 16) * PAD(height, 16) * 6 + 2048;
 
 	bailout:
@@ -413,8 +594,11 @@
 	unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)
 {
 	int i, retval=0, alloc=1;  JSAMPROW *row_pointer=NULL;
+	#ifndef JCS_EXTENSIONS
+	unsigned char *rgbBuf=NULL;
+	#endif
 
-	getinstance(handle)
+	getcinstance(handle)
 	if((this->init&COMPRESS)==0)
 		_throw("tjCompress2(): Instance has not been initialized for compression");
 
@@ -432,6 +616,16 @@
 
 	if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
 
+	#ifndef JCS_EXTENSIONS
+	if(pixelFormat!=TJPF_GRAY)
+	{
+		rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
+		if(!rgbBuf) _throw("tjCompress2(): Memory allocation failure");
+		srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
+		pitch=width*RGB_PIXELSIZE;
+	}
+	#endif
+
 	cinfo->image_width=width;
 	cinfo->image_height=height;
 
@@ -464,6 +658,9 @@
 
 	bailout:
 	if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
+	#ifndef JCS_EXTENSIONS
+	if(rgbBuf) free(rgbBuf);
+	#endif
 	if(row_pointer) free(row_pointer);
 	return retval;
 }
@@ -502,10 +699,11 @@
 	JSAMPLE *ptr=dstBuf;
 	unsigned long yuvsize=0;
 	jpeg_component_info *compptr;
+	#ifndef JCS_EXTENSIONS
+	unsigned char *rgbBuf=NULL;
+	#endif
 
-	getinstance(handle);
-	if((this->init&COMPRESS)==0)
-		_throw("tjEncodeYUV2(): Instance has not been initialized for compression");
+	getcinstance(handle);
 
 	for(i=0; i<MAX_COMPONENTS; i++)
 	{
@@ -513,6 +711,9 @@
 		tmpbuf2[i]=NULL;  _tmpbuf2[i]=NULL;  outbuf[i]=NULL;
 	}
 
+	if((this->init&COMPRESS)==0)
+		_throw("tjEncodeYUV2(): Instance has not been initialized for compression");
+
 	if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
 		|| pixelFormat>=TJ_NUMPF || dstBuf==NULL || subsamp<0
 		|| subsamp>=NUMSUBOPT)
@@ -527,6 +728,16 @@
 
 	if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
 
+	#ifndef JCS_EXTENSIONS
+	if(pixelFormat!=TJPF_GRAY)
+	{
+		rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
+		if(!rgbBuf) _throw("tjEncodeYUV2(): Memory allocation failure");
+		srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
+		pitch=width*RGB_PIXELSIZE;
+	}
+	#endif
+
 	cinfo->image_width=width;
 	cinfo->image_height=height;
 
@@ -619,6 +830,9 @@
 
 	bailout:
 	if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
+	#ifndef JCS_EXTENSIONS
+	if(rgbBuf) free(rgbBuf);
+	#endif
 	if(row_pointer) free(row_pointer);
 	for(i=0; i<MAX_COMPONENTS; i++)
 	{
@@ -685,7 +899,7 @@
 {
 	int retval=0;
 
-	getinstance(handle);
+	getdinstance(handle);
 	if((this->init&DECOMPRESS)==0)
 		_throw("tjDecompressHeader2(): Instance has not been initialized for decompression");
 
@@ -746,8 +960,12 @@
 {
 	int i, retval=0;  JSAMPROW *row_pointer=NULL;
 	int jpegwidth, jpegheight, scaledw, scaledh;
+	#ifndef JCS_EXTENSIONS
+	unsigned char *rgbBuf=NULL;
+	unsigned char *_dstBuf=NULL;  int _pitch=0;
+	#endif
 
-	getinstance(handle);
+	getdinstance(handle);
 	if((this->init&DECOMPRESS)==0)
 		_throw("tjDecompress2(): Instance has not been initialized for decompression");
 
@@ -783,7 +1001,7 @@
 		scaledw=TJSCALED(jpegwidth, sf[i]);
 		scaledh=TJSCALED(jpegheight, sf[i]);
 		if(scaledw<=width && scaledh<=height)
-				break;
+			break;
 	}
 	if(scaledw>width || scaledh>height)
 		_throw("tjDecompress2(): Could not scale down to desired image dimensions");
@@ -793,6 +1011,21 @@
 
 	jpeg_start_decompress(dinfo);
 	if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat];
+
+	#ifndef JCS_EXTENSIONS
+	if(pixelFormat!=TJPF_GRAY &&
+		(RGB_RED!=tjRedOffset[pixelFormat] ||
+			RGB_GREEN!=tjGreenOffset[pixelFormat] ||
+			RGB_BLUE!=tjBlueOffset[pixelFormat] ||
+			RGB_PIXELSIZE!=tjPixelSize[pixelFormat]))
+	{
+		rgbBuf=(unsigned char *)malloc(width*height*3);
+		if(!rgbBuf) _throw("tjDecompress2(): Memory allocation failure");
+		_pitch=pitch;  pitch=width*3;
+		_dstBuf=dstBuf;  dstBuf=rgbBuf;
+	}
+	#endif
+
 	if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)
 		*dinfo->output_height))==NULL)
 		_throw("tjDecompress2(): Memory allocation failure");
@@ -809,8 +1042,15 @@
 	}
 	jpeg_finish_decompress(dinfo);
 
+	#ifndef JCS_EXTENSIONS
+	fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat);
+	#endif
+
 	bailout:
 	if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
+	#ifndef JCS_EXTENSIONS
+	if(rgbBuf) free(rgbBuf);
+	#endif
 	if(row_pointer) free(row_pointer);
 	return retval;
 }
@@ -836,15 +1076,16 @@
 		tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
 	JSAMPLE *_tmpbuf=NULL, *ptr=dstBuf;  JSAMPROW *tmpbuf[MAX_COMPONENTS];
 
-	getinstance(handle);
-	if((this->init&DECOMPRESS)==0)
-		_throw("tjDecompressToYUV(): Instance has not been initialized for decompression");
+	getdinstance(handle);
 
 	for(i=0; i<MAX_COMPONENTS; i++)
 	{
 		tmpbuf[i]=NULL;  outbuf[i]=NULL;
 	}
 
+	if((this->init&DECOMPRESS)==0)
+		_throw("tjDecompressToYUV(): Instance has not been initialized for decompression");
+
 	if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL)
 		_throw("tjDecompressToYUV(): Invalid argument");
 
diff --git a/turbojpeg.h b/turbojpeg.h
index 7610221..951112b 100644
--- a/turbojpeg.h
+++ b/turbojpeg.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2009-2012 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2009-2013 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -59,6 +59,10 @@
  * perceptible loss of image clarity (the human eye is more sensitive to small
  * changes in brightness than small changes in color.)  This is called
  * "chrominance subsampling".
+ * <p>
+ * @note Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
  */
 enum TJSAMP
 {
@@ -85,6 +89,7 @@
   /**
    * 4:4:0 chrominance subsampling.  The JPEG or YUV image will contain one
    * chrominance component for every 1x2 block of pixels in the source image.
+   * @note 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
    */
   TJSAMP_440
 };
@@ -246,10 +251,11 @@
  */
 #define TJFLAG_FORCESSE3     128
 /**
- * When decompressing, use the fastest chrominance upsampling algorithm
- * available in the underlying codec.  The default is to use smooth upsampling,
- * which creates a smooth transition between neighboring chrominance components
- * in order to reduce upsampling artifacts in the decompressed image.
+ * When decompressing an image that was compressed using chrominance
+ * subsampling, use the fastest chrominance upsampling algorithm available in
+ * the underlying codec.  The default is to use smooth upsampling, which
+ * creates a smooth transition between neighboring chrominance components in
+ * order to reduce upsampling artifacts in the decompressed image.
  */
 #define TJFLAG_FASTUPSAMPLE  256
 /**
@@ -262,26 +268,26 @@
 #define TJFLAG_NOREALLOC     1024
 /**
  * Use the fastest DCT/IDCT algorithm available in the underlying codec.  The
- * default if this flag is not specified is implementation-specific.  The
- * libjpeg implementation, for example, uses the fast algorithm by default when
- * compressing, because this has been shown to have only a very slight effect
- * on accuracy, but it uses the accurate algorithm when decompressing, because
- * this has been shown to have a larger effect.
+ * default if this flag is not specified is implementation-specific.  For
+ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+ * algorithm by default when compressing, because this has been shown to have
+ * only a very slight effect on accuracy, but it uses the accurate algorithm
+ * when decompressing, because this has been shown to have a larger effect.
  */
 #define TJFLAG_FASTDCT       2048
 /**
  * Use the most accurate DCT/IDCT algorithm available in the underlying codec.
- * The default if this flag is not specified is implementation-specific.  The
- * libjpeg implementation, for example, uses the fast algorithm by default when
- * compressing, because this has been shown to have only a very slight effect
- * on accuracy, but it uses the accurate algorithm when decompressing, because
- * this has been shown to have a larger effect.
+ * The default if this flag is not specified is implementation-specific.  For
+ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+ * algorithm by default when compressing, because this has been shown to have
+ * only a very slight effect on accuracy, but it uses the accurate algorithm
+ * when decompressing, because this has been shown to have a larger effect.
  */
 #define TJFLAG_ACCURATEDCT   4096
 
 
 /**
- * Number of transform operations
+ * The number of transform operations
  */
 #define TJ_NUMXOP 8
 
@@ -439,8 +445,8 @@
   /**
    * A callback function that can be used to modify the DCT coefficients
    * after they are losslessly transformed but before they are transcoded to a
-   * new JPEG file.  This allows for custom filters or other transformations to
-   * be applied in the frequency domain.
+   * new JPEG image.  This allows for custom filters or other transformations
+   * to be applied in the frequency domain.
    *
    * @param coeffs pointer to an array of transformed DCT coefficients.  (NOTE:
    *        this pointer is not guaranteed to be valid once the callback
@@ -459,7 +465,7 @@
    *        0, 1, and 2 in typical JPEG images.)
    * @param transformID ID number of the transformed image to which
    *        <tt>coeffs</tt> belongs.  This is the same as the index of the
-   *        transform in the transforms array that was passed to
+   *        transform in the <tt>transforms</tt> array that was passed to
    *        #tjTransform().
    * @param transform a pointer to a #tjtransform structure that specifies the
    *        parameters and/or cropping region for this transform
@@ -485,7 +491,7 @@
 /**
  * Compute the scaled value of <tt>dimension</tt> using the given scaling
  * factor.  This macro performs the integer equivalent of <tt>ceil(dimension *
- * scalingFactor)</tt>. 
+ * scalingFactor)</tt>.
  */
 #define TJSCALED(dimension, scalingFactor) ((dimension * scalingFactor.num \
   + scalingFactor.denom - 1) / scalingFactor.denom)
@@ -562,7 +568,7 @@
  * the given parameters.  The number of bytes returned by this function is
  * larger than the size of the uncompressed source image.  The reason for this
  * is that the JPEG format uses 16-bit coefficients, and it is thus possible
- * for a very high-quality JPEG image with very high frequency content to
+ * for a very high-quality JPEG image with very high-frequency content to
  * expand rather than compress when converted to the JPEG format.  Such images
  * represent a very rare corner case, but since there is no way to predict the
  * size of a JPEG image prior to compression, the corner case has to be
@@ -602,13 +608,17 @@
  * uses the accelerated color conversion routines in TurboJPEG's underlying
  * codec to produce a planar YUV image that is suitable for X Video.
  * Specifically, if the chrominance components are subsampled along the
- * horizontal dimension, then the width of the luminance plane is padded to 2
- * in the output image (same goes for the height of the luminance plane, if the
- * chrominance components are subsampled along the vertical dimension.)  Also,
- * each line of each plane in the output image is padded to 4 bytes.  Although
- * this will work with any subsampling option, it is really only useful in
- * combination with TJ_420, which produces an image compatible with the I420
- * (AKA "YUV420P") format.
+ * horizontal dimension, then the width of the luminance plane is padded to the
+ * nearest multiple of 2 in the output image (same goes for the height of the
+ * luminance plane, if the chrominance components are subsampled along the
+ * vertical dimension.)  Also, each line of each plane in the output image is
+ * padded to 4 bytes.  Although this will work with any subsampling option, it
+ * is really only useful in combination with TJ_420, which produces an image
+ * compatible with the I420 (AKA "YUV420P") format.
+ * <p>
+ * @note Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
  *
  * @param handle a handle to a TurboJPEG compressor or transformer instance
  * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels
@@ -694,14 +704,15 @@
  *        image.  This buffer should normally be <tt>pitch * scaledHeight</tt>
  *        bytes in size, where <tt>scaledHeight</tt> can be determined by
  *        calling #TJSCALED() with the JPEG image height and one of the scaling
- *        factors returned by #tjGetScalingFactors().  The dstBuf pointer may
- *        also be used to decompress into a specific region of a larger buffer.
+ *        factors returned by #tjGetScalingFactors().  The <tt>dstBuf</tt>
+ *        pointer may also be used to decompress into a specific region of a
+ *        larger buffer.
  * @param width desired width (in pixels) of the destination image.  If this is
- *        smaller than the width of the JPEG image being decompressed, then
+ *        different than the width of the JPEG image being decompressed, then
  *        TurboJPEG will use scaling in the JPEG decompressor to generate the
  *        largest possible image that will fit within the desired width.  If
- *        width is set to 0, then only the height will be considered when
- *        determining the scaled image size.
+ *        <tt>width</tt> is set to 0, then only the height will be considered
+ *        when determining the scaled image size.
  * @param pitch bytes per line of the destination image.  Normally, this is
  *        <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt> if the decompressed
  *        image is unpadded, else <tt>#TJPAD(scaledWidth *
@@ -711,14 +722,14 @@
  *        calling #TJSCALED() with the JPEG image width and one of the scaling
  *        factors returned by #tjGetScalingFactors().)  You can also be clever
  *        and use the pitch parameter to skip lines, etc.  Setting this
- *        parameter to 0 is the equivalent of setting it to <tt>scaledWidth
- *        * #tjPixelSize[pixelFormat]</tt>.
+ *        parameter to 0 is the equivalent of setting it to <tt>scaledWidth *
+ *        #tjPixelSize[pixelFormat]</tt>.
  * @param height desired height (in pixels) of the destination image.  If this
- *        is smaller than the height of the JPEG image being decompressed, then
- *        TurboJPEG will use scaling in the JPEG decompressor to generate the
- *        largest possible image that will fit within the desired height.  If
- *        height is set to 0, then only the width will be considered when
- *        determining the scaled image size.
+ *        is different than the height of the JPEG image being decompressed,
+ *        then TurboJPEG will use scaling in the JPEG decompressor to generate
+ *        the largest possible image that will fit within the desired height.
+ *        If <tt>height</tt> is set to 0, then only the width will be
+ *        considered when determining the scaled image size.
  * @param pixelFormat pixel format of the destination image (see @ref
  *        TJPF "Pixel formats".)
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
@@ -735,16 +746,20 @@
  * Decompress a JPEG image to a YUV planar image.  This function performs JPEG
  * decompression but leaves out the color conversion step, so a planar YUV
  * image is generated instead of an RGB image.  The padding of the planes in
- * this image is the same as the images generated by #tjEncodeYUV2().  Note
- * that, if the width or height of the image is not an even multiple of the MCU
- * block size (see #tjMCUWidth and #tjMCUHeight), then an intermediate buffer
- * copy will be performed within TurboJPEG.
+ * this image is the same as in the images generated by #tjEncodeYUV2().  If
+ * the width or height of the image is not an even multiple of the MCU block
+ * size (see #tjMCUWidth and #tjMCUHeight), then an intermediate buffer copy
+ * will be performed within TurboJPEG.
+ * <p>
+ * @note Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
  *
  * @param handle a handle to a TurboJPEG decompressor or transformer instance
  * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
  * @param jpegSize size of the JPEG image (in bytes)
  * @param dstBuf pointer to an image buffer that will receive the YUV image.
- *        Use #tjBufSizeYUV to determine the appropriate size for this buffer
+ *        Use #tjBufSizeYUV() to determine the appropriate size for this buffer
  *        based on the image width, height, and level of subsampling.
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
  *        "flags".
@@ -771,12 +786,12 @@
  * to another without altering the values of the coefficients.  While this is
  * typically faster than decompressing the image, transforming it, and
  * re-compressing it, lossless transforms are not free.  Each lossless
- * transform requires reading and Huffman decoding all of the coefficients in
- * the source image, regardless of the size of the destination image.  Thus,
- * this function provides a means of generating multiple transformed images
- * from the same source or of applying multiple transformations simultaneously,
- * in order to eliminate the need to read the source coefficients multiple
- * times.
+ * transform requires reading and performing Huffman decoding on all of the
+ * coefficients in the source image, regardless of the size of the destination
+ * image.  Thus, this function provides a means of generating multiple
+ * transformed images from the same source or  applying multiple
+ * transformations simultaneously, in order to eliminate the need to read the
+ * source coefficients multiple times.
  *
  * @param handle a handle to a TurboJPEG transformer instance
  * @param jpegBuf pointer to a buffer containing the JPEG image to transform
@@ -792,9 +807,9 @@
  *        -# set <tt>dstBufs[i]</tt> to NULL to tell TurboJPEG to allocate the
  *        buffer for you, or
  *        -# pre-allocate the buffer to a "worst case" size determined by
- *        calling #tjBufSize() with the cropped width and height.  This should
- *        ensure that the buffer never has to be re-allocated (setting
- *        #TJFLAG_NOREALLOC guarantees this.)
+ *        calling #tjBufSize() with the transformed or cropped width and
+ *        height.  This should ensure that the buffer never has to be
+ *        re-allocated (setting #TJFLAG_NOREALLOC guarantees this.)
  *        .
  *        If you choose option 1, <tt>dstSizes[i]</tt> should be set to
  *        the size of your pre-allocated buffer.  In any case, unless you have
@@ -806,7 +821,7 @@
  *        <tt>dstSizes[i]</tt> should be set to the size of the buffer.  Upon
  *        return, <tt>dstSizes[i]</tt> will contain the size of the JPEG image
  *        (in bytes.)
- * @param transforms pointer to an array of n tjtransform structures, each of
+ * @param transforms pointer to an array of n #tjtransform structures, each of
  *        which specifies the transform parameters and/or cropping region for
  *        the corresponding transformed output image.
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
@@ -837,7 +852,7 @@
  * (re)allocation (by setting #TJFLAG_NOREALLOC.)
  *
  * @param bytes the number of bytes to allocate
- * 
+ *
  * @return a pointer to a newly-allocated buffer with the specified number of
  *         bytes
  *
diff --git a/usage.txt b/usage.txt
index 157972b..ef8e6d0 100644
--- a/usage.txt
+++ b/usage.txt
@@ -17,7 +17,7 @@
 
 These programs implement JPEG image encoding, decoding, and transcoding.
 JPEG (pronounced "jay-peg") is a standardized compression method for
-full-color and gray-scale images.
+full-color and grayscale images.
 
 
 GENERAL USAGE
@@ -26,31 +26,31 @@
 and djpeg to decompress a JPEG file back into a conventional image format.
 
 On Unix-like systems, you say:
-	cjpeg [switches] [imagefile] >jpegfile
+        cjpeg [switches] [imagefile] >jpegfile
 or
-	djpeg [switches] [jpegfile]  >imagefile
+        djpeg [switches] [jpegfile]  >imagefile
 The programs read the specified input file, or standard input if none is
 named.  They always write to standard output (with trace/error messages to
 standard error).  These conventions are handy for piping images between
 programs.
 
 On most non-Unix systems, you say:
-	cjpeg [switches] imagefile jpegfile
+        cjpeg [switches] imagefile jpegfile
 or
-	djpeg [switches] jpegfile  imagefile
+        djpeg [switches] jpegfile  imagefile
 i.e., both the input and output files are named on the command line.  This
 style is a little more foolproof, and it loses no functionality if you don't
 have pipes.  (You can get this style on Unix too, if you prefer, by defining
 TWO_FILE_COMMANDLINE when you compile the programs; see install.txt.)
 
 You can also say:
-	cjpeg [switches] -outfile jpegfile  imagefile
+        cjpeg [switches] -outfile jpegfile  imagefile
 or
-	djpeg [switches] -outfile imagefile  jpegfile
+        djpeg [switches] -outfile imagefile  jpegfile
 This syntax works on all systems, so it is useful for scripts.
 
 The currently supported image file formats are: PPM (PBMPLUS color format),
-PGM (PBMPLUS gray-scale format), BMP, Targa, and RLE (Utah Raster Toolkit
+PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster Toolkit
 format).  (RLE is supported only if the URT library is available.)
 cjpeg recognizes the input image format automatically, with the exception
 of some Targa-format files.  You have to tell djpeg which format to generate.
@@ -69,31 +69,35 @@
 
 The basic command line switches for cjpeg are:
 
-	-quality N[,...]  Scale quantization tables to adjust image quality.
-			Quality is 0 (worst) to 100 (best); default is 75.
-			(See below for more info.)
+        -quality N[,...]  Scale quantization tables to adjust image quality.
+                          Quality is 0 (worst) to 100 (best); default is 75.
+                          (See below for more info.)
 
-	-grayscale	Create monochrome JPEG file from color input.
-			Be sure to use this switch when compressing a grayscale
-			BMP file, because cjpeg isn't bright enough to notice
-			whether a BMP file uses only shades of gray.  By
-			saying -grayscale, you'll get a smaller JPEG file that
-			takes less time to process.
+        -grayscale      Create monochrome JPEG file from color input.
+                        Be sure to use this switch when compressing a grayscale
+                        BMP file, because cjpeg isn't bright enough to notice
+                        whether a BMP file uses only shades of gray.  By
+                        saying -grayscale, you'll get a smaller JPEG file that
+                        takes less time to process.
 
-	-optimize	Perform optimization of entropy encoding parameters.
-			Without this, default encoding parameters are used.
-			-optimize usually makes the JPEG file a little smaller,
-			but cjpeg runs somewhat slower and needs much more
-			memory.  Image quality and speed of decompression are
-			unaffected by -optimize.
+        -rgb            Create RGB JPEG file.
+                        Using this switch suppresses the conversion from RGB
+                        colorspace input to the default YCbCr JPEG colorspace.
 
-	-progressive	Create progressive JPEG file (see below).
+        -optimize       Perform optimization of entropy encoding parameters.
+                        Without this, default encoding parameters are used.
+                        -optimize usually makes the JPEG file a little smaller,
+                        but cjpeg runs somewhat slower and needs much more
+                        memory.  Image quality and speed of decompression are
+                        unaffected by -optimize.
 
-	-targa		Input file is Targa format.  Targa files that contain
-			an "identification" field will not be automatically
-			recognized by cjpeg; for such files you must specify
-			-targa to make cjpeg treat the input as Targa format.
-			For most Targa files, you won't need this switch.
+        -progressive    Create progressive JPEG file (see below).
+
+        -targa          Input file is Targa format.  Targa files that contain
+                        an "identification" field will not be automatically
+                        recognized by cjpeg; for such files you must specify
+                        -targa to make cjpeg treat the input as Targa format.
+                        For most Targa files, you won't need this switch.
 
 The -quality switch lets you trade off compressed file size against quality of
 the reconstructed image: the higher the quality setting, the larger the JPEG
@@ -160,38 +164,57 @@
 
 Switches for advanced users:
 
-	-arithmetic	Use arithmetic coding.  CAUTION: arithmetic coded JPEG
-			is not yet widely implemented, so many decoders will
-			be unable to view an arithmetic coded JPEG file at
-			all.
+        -arithmetic     Use arithmetic coding.  CAUTION: arithmetic coded JPEG
+                        is not yet widely implemented, so many decoders will
+                        be unable to view an arithmetic coded JPEG file at
+                        all.
 
-	-dct int	Use integer DCT method (default).
-	-dct fast	Use fast integer DCT (less accurate).
-	-dct float	Use floating-point DCT method.
-			The float method is very slightly more accurate than
-			the int method, but is much slower unless your machine
-			has very fast floating-point hardware.  Also note that
-			results of the floating-point method may vary slightly
-			across machines, while the integer methods should give
-			the same results everywhere.  The fast integer method
-			is much less accurate than the other two.
+        -dct int        Use integer DCT method (default).
+        -dct fast       Use fast integer DCT (less accurate).
+                        In libjpeg-turbo, the fast method is generally about
+                        5-15% faster than the int method when using the
+                        x86/x86-64 SIMD extensions (results may vary with other
+                        SIMD implementations, or when using libjpeg-turbo
+                        without SIMD extensions.)  For quality levels of 90 and
+                        below, there should be little or no perceptible
+                        difference between the two algorithms.  For quality
+                        levels above 90, however, the difference between
+                        the fast and the int methods becomes more pronounced.
+                        With quality=97, for instance, the fast method incurs
+                        generally about a 1-3 dB loss (in PSNR) relative to
+                        the int method, but this can be larger for some images.
+                        Do not use the fast method with quality levels above
+                        97.  The algorithm often degenerates at quality=98 and
+                        above and can actually produce a more lossy image than
+                        if lower quality levels had been used.  Also, in
+                        libjpeg-turbo, the fast method is not fully accerated
+                        for quality levels above 97, so it will be slower than
+                        the int method.
+        -dct float      Use floating-point DCT method.
+                        The float method is mainly a legacy feature.  It does
+                        not produce significantly more accurate results than
+                        the int method, and it is much slower.  The float
+                        method may also give different results on different
+                        machines due to varying roundoff behavior, whereas the
+                        integer methods should give the same results on all
+                        machines.
 
-	-restart N	Emit a JPEG restart marker every N MCU rows, or every
-			N MCU blocks if "B" is attached to the number.
-			-restart 0 (the default) means no restart markers.
+        -restart N      Emit a JPEG restart marker every N MCU rows, or every
+                        N MCU blocks if "B" is attached to the number.
+                        -restart 0 (the default) means no restart markers.
 
-	-smooth N	Smooth the input image to eliminate dithering noise.
-			N, ranging from 1 to 100, indicates the strength of
-			smoothing.  0 (the default) means no smoothing.
+        -smooth N       Smooth the input image to eliminate dithering noise.
+                        N, ranging from 1 to 100, indicates the strength of
+                        smoothing.  0 (the default) means no smoothing.
 
-	-maxmemory N	Set limit for amount of memory to use in processing
-			large images.  Value is in thousands of bytes, or
-			millions of bytes if "M" is attached to the number.
-			For example, -max 4m selects 4000000 bytes.  If more
-			space is needed, temporary files will be used.
+        -maxmemory N    Set limit for amount of memory to use in processing
+                        large images.  Value is in thousands of bytes, or
+                        millions of bytes if "M" is attached to the number.
+                        For example, -max 4m selects 4000000 bytes.  If more
+                        space is needed, temporary files will be used.
 
-	-verbose	Enable debug printout.  More -v's give more printout.
-	or  -debug	Also, version information is printed at startup.
+        -verbose        Enable debug printout.  More -v's give more printout.
+        or  -debug      Also, version information is printed at startup.
 
 The -restart option inserts extra markers that allow a JPEG decoder to
 resynchronize after a transmission error.  Without restart markers, any damage
@@ -209,22 +232,22 @@
 
 Switches for wizards:
 
-	-baseline	Force baseline-compatible quantization tables to be
-			generated.  This clamps quantization values to 8 bits
-			even at low quality settings.  (This switch is poorly
-			named, since it does not ensure that the output is
-			actually baseline JPEG.  For example, you can use
-			-baseline and -progressive together.)
+        -baseline       Force baseline-compatible quantization tables to be
+                        generated.  This clamps quantization values to 8 bits
+                        even at low quality settings.  (This switch is poorly
+                        named, since it does not ensure that the output is
+                        actually baseline JPEG.  For example, you can use
+                        -baseline and -progressive together.)
 
-	-qtables file	Use the quantization tables given in the specified
-			text file.
+        -qtables file   Use the quantization tables given in the specified
+                        text file.
 
-	-qslots N[,...] Select which quantization table to use for each color
-			component.
+        -qslots N[,...] Select which quantization table to use for each color
+                        component.
 
-	-sample HxV[,...]  Set JPEG sampling factors for each color component.
+        -sample HxV[,...]  Set JPEG sampling factors for each color component.
 
-	-scans file	Use the scan script given in the specified text file.
+        -scans file     Use the scan script given in the specified text file.
 
 The "wizard" switches are intended for experimentation with JPEG.  If you
 don't know what you are doing, DON'T USE THEM.  These switches are documented
@@ -235,105 +258,126 @@
 
 The basic command line switches for djpeg are:
 
-	-colors N	Reduce image to at most N colors.  This reduces the
-	or -quantize N	number of colors used in the output image, so that it
-			can be displayed on a colormapped display or stored in
-			a colormapped file format.  For example, if you have
-			an 8-bit display, you'd need to reduce to 256 or fewer
-			colors.  (-colors is the recommended name, -quantize
-			is provided only for backwards compatibility.)
+        -colors N       Reduce image to at most N colors.  This reduces the
+        or -quantize N  number of colors used in the output image, so that it
+                        can be displayed on a colormapped display or stored in
+                        a colormapped file format.  For example, if you have
+                        an 8-bit display, you'd need to reduce to 256 or fewer
+                        colors.  (-colors is the recommended name, -quantize
+                        is provided only for backwards compatibility.)
 
-	-fast		Select recommended processing options for fast, low
-			quality output.  (The default options are chosen for
-			highest quality output.)  Currently, this is equivalent
-			to "-dct fast -nosmooth -onepass -dither ordered".
+        -fast           Select recommended processing options for fast, low
+                        quality output.  (The default options are chosen for
+                        highest quality output.)  Currently, this is equivalent
+                        to "-dct fast -nosmooth -onepass -dither ordered".
 
-	-grayscale	Force gray-scale output even if JPEG file is color.
-			Useful for viewing on monochrome displays; also,
-			djpeg runs noticeably faster in this mode.
+        -grayscale      Force grayscale output even if JPEG file is color.
+                        Useful for viewing on monochrome displays; also,
+                        djpeg runs noticeably faster in this mode.
 
-	-scale M/N	Scale the output image by a factor M/N.  Currently
-			the scale factor must be 1/1, 1/2, 1/4, or 1/8.
-			Scaling is handy if the image is larger than your
-			screen; also, djpeg runs much faster when scaling
-			down the output.
+        -scale M/N      Scale the output image by a factor M/N.  Currently
+                        the scale factor must be M/8, where M is an integer
+                        between 1 and 16 inclusive, or any reduced fraction
+                        thereof (such as 1/2, 3/4, etc.  Scaling is handy if
+                        the image is larger than your screen; also, djpeg runs
+                        much faster when scaling down the output.
 
-	-bmp		Select BMP output format (Windows flavor).  8-bit
-			colormapped format is emitted if -colors or -grayscale
-			is specified, or if the JPEG file is gray-scale;
-			otherwise, 24-bit full-color format is emitted.
+        -bmp            Select BMP output format (Windows flavor).  8-bit
+                        colormapped format is emitted if -colors or -grayscale
+                        is specified, or if the JPEG file is grayscale;
+                        otherwise, 24-bit full-color format is emitted.
 
-	-gif		Select GIF output format.  Since GIF does not support
-			more than 256 colors, -colors 256 is assumed (unless
-			you specify a smaller number of colors).  If you
-			specify -fast, the default number of colors is 216.
+        -gif            Select GIF output format.  Since GIF does not support
+                        more than 256 colors, -colors 256 is assumed (unless
+                        you specify a smaller number of colors).  If you
+                        specify -fast, the default number of colors is 216.
 
-	-os2		Select BMP output format (OS/2 1.x flavor).  8-bit
-			colormapped format is emitted if -colors or -grayscale
-			is specified, or if the JPEG file is gray-scale;
-			otherwise, 24-bit full-color format is emitted.
+        -os2            Select BMP output format (OS/2 1.x flavor).  8-bit
+                        colormapped format is emitted if -colors or -grayscale
+                        is specified, or if the JPEG file is grayscale;
+                        otherwise, 24-bit full-color format is emitted.
 
-	-pnm		Select PBMPLUS (PPM/PGM) output format (this is the
-			default format).  PGM is emitted if the JPEG file is
-			gray-scale or if -grayscale is specified; otherwise
-			PPM is emitted.
+        -pnm            Select PBMPLUS (PPM/PGM) output format (this is the
+                        default format).  PGM is emitted if the JPEG file is
+                        grayscale or if -grayscale is specified; otherwise
+                        PPM is emitted.
 
-	-rle		Select RLE output format.  (Requires URT library.)
+        -rle            Select RLE output format.  (Requires URT library.)
 
-	-targa		Select Targa output format.  Gray-scale format is
-			emitted if the JPEG file is gray-scale or if
-			-grayscale is specified; otherwise, colormapped format
-			is emitted if -colors is specified; otherwise, 24-bit
-			full-color format is emitted.
+        -targa          Select Targa output format.  Grayscale format is
+                        emitted if the JPEG file is grayscale or if
+                        -grayscale is specified; otherwise, colormapped format
+                        is emitted if -colors is specified; otherwise, 24-bit
+                        full-color format is emitted.
 
 Switches for advanced users:
 
-	-dct int	Use integer DCT method (default).
-	-dct fast	Use fast integer DCT (less accurate).
-	-dct float	Use floating-point DCT method.
-			The float method is very slightly more accurate than
-			the int method, but is much slower unless your machine
-			has very fast floating-point hardware.  Also note that
-			results of the floating-point method may vary slightly
-			across machines, while the integer methods should give
-			the same results everywhere.  The fast integer method
-			is much less accurate than the other two.
+        -dct int        Use integer DCT method (default).
+        -dct fast       Use fast integer DCT (less accurate).
+                        In libjpeg-turbo, the fast method is generally about
+                        5-15% faster than the int method when using the
+                        x86/x86-64 SIMD extensions (results may vary with other
+                        SIMD implementations, or when using libjpeg-turbo
+                        without SIMD extensions.)  If the JPEG image was
+                        compressed using a quality level of 85 or below, then
+                        there should be little or no perceptible difference
+                        between the two algorithms.  When decompressing images
+                        that were compressed using quality levels above 85,
+                        however, the difference between the fast and int
+                        methods becomes more pronounced.  With images
+                        compressed using quality=97, for instance, the fast
+                        method incurs generally about a 4-6 dB loss (in PSNR)
+                        relative to the int method, but this can be larger for
+                        some images.  If you can avoid it, do not use the fast
+                        method when decompressing images that were compressed
+                        using quality levels above 97.  The algorithm often
+                        degenerates for such images and can actually produce
+                        a more lossy output image than if the JPEG image had
+                        been compressed using lower quality levels.
+        -dct float      Use floating-point DCT method.
+                        The float method is mainly a legacy feature.  It does
+                        not produce significantly more accurate results than
+                        the int method, and it is much slower.  The float
+                        method may also give different results on different
+                        machines due to varying roundoff behavior, whereas the
+                        integer methods should give the same results on all
+                        machines.
 
-	-dither fs	Use Floyd-Steinberg dithering in color quantization.
-	-dither ordered	Use ordered dithering in color quantization.
-	-dither none	Do not use dithering in color quantization.
-			By default, Floyd-Steinberg dithering is applied when
-			quantizing colors; this is slow but usually produces
-			the best results.  Ordered dither is a compromise
-			between speed and quality; no dithering is fast but
-			usually looks awful.  Note that these switches have
-			no effect unless color quantization is being done.
-			Ordered dither is only available in -onepass mode.
+        -dither fs      Use Floyd-Steinberg dithering in color quantization.
+        -dither ordered Use ordered dithering in color quantization.
+        -dither none    Do not use dithering in color quantization.
+                        By default, Floyd-Steinberg dithering is applied when
+                        quantizing colors; this is slow but usually produces
+                        the best results.  Ordered dither is a compromise
+                        between speed and quality; no dithering is fast but
+                        usually looks awful.  Note that these switches have
+                        no effect unless color quantization is being done.
+                        Ordered dither is only available in -onepass mode.
 
-	-map FILE	Quantize to the colors used in the specified image
-			file.  This is useful for producing multiple files
-			with identical color maps, or for forcing a predefined
-			set of colors to be used.  The FILE must be a GIF
-			or PPM file.  This option overrides -colors and
-			-onepass.
+        -map FILE       Quantize to the colors used in the specified image
+                        file.  This is useful for producing multiple files
+                        with identical color maps, or for forcing a predefined
+                        set of colors to be used.  The FILE must be a GIF
+                        or PPM file.  This option overrides -colors and
+                        -onepass.
 
-	-nosmooth	Use a faster, lower-quality upsampling routine.
+        -nosmooth       Use a faster, lower-quality upsampling routine.
 
-	-onepass	Use one-pass instead of two-pass color quantization.
-			The one-pass method is faster and needs less memory,
-			but it produces a lower-quality image.  -onepass is
-			ignored unless you also say -colors N.  Also,
-			the one-pass method is always used for gray-scale
-			output (the two-pass method is no improvement then).
+        -onepass        Use one-pass instead of two-pass color quantization.
+                        The one-pass method is faster and needs less memory,
+                        but it produces a lower-quality image.  -onepass is
+                        ignored unless you also say -colors N.  Also,
+                        the one-pass method is always used for grayscale
+                        output (the two-pass method is no improvement then).
 
-	-maxmemory N	Set limit for amount of memory to use in processing
-			large images.  Value is in thousands of bytes, or
-			millions of bytes if "M" is attached to the number.
-			For example, -max 4m selects 4000000 bytes.  If more
-			space is needed, temporary files will be used.
+        -maxmemory N    Set limit for amount of memory to use in processing
+                        large images.  Value is in thousands of bytes, or
+                        millions of bytes if "M" is attached to the number.
+                        For example, -max 4m selects 4000000 bytes.  If more
+                        space is needed, temporary files will be used.
 
-	-verbose	Enable debug printout.  More -v's give more printout.
-	or  -debug	Also, version information is printed at startup.
+        -verbose        Enable debug printout.  More -v's give more printout.
+        or  -debug      Also, version information is printed at startup.
 
 
 HINTS FOR CJPEG
@@ -376,12 +420,6 @@
 much lower quality than the default behavior.  "-dither none" may give
 acceptable results in two-pass mode, but is seldom tolerable in one-pass mode.
 
-If you are fortunate enough to have very fast floating point hardware,
-"-dct float" may be even faster than "-dct fast".  But on most machines
-"-dct float" is slower than "-dct int"; in this case it is not worth using,
-because its theoretical accuracy advantage is too small to be significant
-in practice.
-
 Two-pass color quantization requires a good deal of memory; on MS-DOS machines
 it may run out of memory even with -maxmemory 0.  In that case you can still
 decompress, with some loss of image quality, by specifying -onepass for
@@ -441,31 +479,31 @@
 
 jpegtran uses a command line syntax similar to cjpeg or djpeg.
 On Unix-like systems, you say:
-	jpegtran [switches] [inputfile] >outputfile
+        jpegtran [switches] [inputfile] >outputfile
 On most non-Unix systems, you say:
-	jpegtran [switches] inputfile outputfile
+        jpegtran [switches] inputfile outputfile
 where both the input and output files are JPEG files.
 
 To specify the coded JPEG representation used in the output file,
 jpegtran accepts a subset of the switches recognized by cjpeg:
-	-optimize	Perform optimization of entropy encoding parameters.
-	-progressive	Create progressive JPEG file.
-	-arithmetic	Use arithmetic coding.
-	-restart N	Emit a JPEG restart marker every N MCU rows, or every
-			N MCU blocks if "B" is attached to the number.
-	-scans file	Use the scan script given in the specified text file.
+        -optimize       Perform optimization of entropy encoding parameters.
+        -progressive    Create progressive JPEG file.
+        -arithmetic     Use arithmetic coding.
+        -restart N      Emit a JPEG restart marker every N MCU rows, or every
+                        N MCU blocks if "B" is attached to the number.
+        -scans file     Use the scan script given in the specified text file.
 See the previous discussion of cjpeg for more details about these switches.
 If you specify none of these switches, you get a plain baseline-JPEG output
 file.  The quality setting and so forth are determined by the input file.
 
 The image can be losslessly transformed by giving one of these switches:
-	-flip horizontal	Mirror image horizontally (left-right).
-	-flip vertical		Mirror image vertically (top-bottom).
-	-rotate 90		Rotate image 90 degrees clockwise.
-	-rotate 180		Rotate image 180 degrees.
-	-rotate 270		Rotate image 270 degrees clockwise (or 90 ccw).
-	-transpose		Transpose image (across UL-to-LR axis).
-	-transverse		Transverse transpose (across UR-to-LL axis).
+        -flip horizontal        Mirror image horizontally (left-right).
+        -flip vertical          Mirror image vertically (top-bottom).
+        -rotate 90              Rotate image 90 degrees clockwise.
+        -rotate 180             Rotate image 180 degrees.
+        -rotate 270             Rotate image 270 degrees clockwise (or 90 ccw).
+        -transpose              Transpose image (across UL-to-LR axis).
+        -transverse             Transverse transpose (across UR-to-LL axis).
 
 The transpose transformation has no restrictions regarding image dimensions.
 The other transformations operate rather oddly if the image dimensions are not
@@ -486,7 +524,7 @@
 For practical use, you may prefer to discard any untransformable edge pixels
 rather than having a strange-looking strip along the right and/or bottom edges
 of a transformed image.  To do this, add the -trim switch:
-	-trim		Drop non-transformable edge blocks.
+        -trim           Drop non-transformable edge blocks.
 Obviously, a transformation with -trim is not reversible, so strictly speaking
 jpegtran with this switch is not lossless.  Also, the expected mathematical
 equivalences between the transformations no longer hold.  For example,
@@ -494,8 +532,8 @@
 "-rot 180 -trim" trims both edges.
 
 If you are only interested in perfect transformations, add the -perfect switch:
-	-perfect	Fail with an error if the transformation is not
-			perfect.
+        -perfect        Fail with an error if the transformation is not
+                        perfect.
 For example, you may want to do
   jpegtran -rot 90 -perfect foo.jpg || djpeg foo.jpg | pnmflip -r90 | cjpeg
 to do a perfect rotation, if available, or an approximated one if not.
@@ -508,12 +546,12 @@
 nearest iMCU boundary (the lower right corner is unchanged.)
 
 The image can be losslessly cropped by giving the switch:
-	-crop WxH+X+Y	Crop to a rectangular region of width W and height H,
-			starting at point X,Y.
+        -crop WxH+X+Y   Crop to a rectangular region of width W and height H,
+                        starting at point X,Y.
 
 Other not-strictly-lossless transformation switches are:
 
-	-grayscale	Force grayscale output.
+        -grayscale      Force grayscale output.
 This option discards the chrominance channels if the input image is YCbCr
 (ie, a standard color JPEG), resulting in a grayscale JPEG file.  The
 luminance channel is preserved exactly, so this is a better method of reducing
@@ -525,24 +563,24 @@
 
 jpegtran also recognizes these switches that control what to do with "extra"
 markers, such as comment blocks:
-	-copy none	Copy no extra markers from source file.  This setting
-			suppresses all comments and other excess baggage
-			present in the source file.
-	-copy comments	Copy only comment markers.  This setting copies
-			comments from the source file but discards
-			any other data that is inessential for image display.
-	-copy all	Copy all extra markers.  This setting preserves
-			miscellaneous markers found in the source file, such
-			as JFIF thumbnails, Exif data, and Photoshop settings.
-			In some files, these extra markers can be sizable.
+        -copy none      Copy no extra markers from source file.  This setting
+                        suppresses all comments and other excess baggage
+                        present in the source file.
+        -copy comments  Copy only comment markers.  This setting copies
+                        comments from the source file but discards
+                        any other data that is inessential for image display.
+        -copy all       Copy all extra markers.  This setting preserves
+                        miscellaneous markers found in the source file, such
+                        as JFIF thumbnails, Exif data, and Photoshop settings.
+                        In some files, these extra markers can be sizable.
 The default behavior is -copy comments.  (Note: in IJG releases v6 and v6a,
 jpegtran always did the equivalent of -copy none.)
 
 Additional switches recognized by jpegtran are:
-	-outfile filename
-	-maxmemory N
-	-verbose
-	-debug
+        -outfile filename
+        -maxmemory N
+        -verbose
+        -debug
 These work the same as in cjpeg or djpeg.
 
 
@@ -561,7 +599,7 @@
 
 rdjpgcom searches a JPEG file and prints the contents of any COM blocks on
 standard output.  The command line syntax is
-	rdjpgcom [-raw] [-verbose] [inputfilename]
+        rdjpgcom [-raw] [-verbose] [inputfilename]
 The switch "-raw" (or just "-r") causes rdjpgcom to output non-printable
 characters in JPEG comments.  These characters are normally escaped for
 security reasons.
@@ -579,18 +617,18 @@
 
 The command line syntax for wrjpgcom is similar to cjpeg's.  On Unix-like
 systems, it is
-	wrjpgcom [switches] [inputfilename]
+        wrjpgcom [switches] [inputfilename]
 The output file is written to standard output.  The input file comes from
 the named file, or from standard input if no input file is named.
 
 On most non-Unix systems, the syntax is
-	wrjpgcom [switches] inputfilename outputfilename
+        wrjpgcom [switches] inputfilename outputfilename
 where both input and output file names must be given explicitly.
 
 wrjpgcom understands three switches:
-	-replace		 Delete any existing COM blocks from the file.
-	-comment "Comment text"	 Supply new COM text on command line.
-	-cfile name		 Read text for new COM block from named file.
+        -replace                 Delete any existing COM blocks from the file.
+        -comment "Comment text"  Supply new COM text on command line.
+        -cfile name              Read text for new COM block from named file.
 (Switch names can be abbreviated.)  If you have only one line of comment text
 to add, you can provide it on the command line with -comment.  The comment
 text must be surrounded with quotes so that it is treated as a single
diff --git a/win/jconfig.h.in b/win/jconfig.h.in
index be4b5c3..516ca59 100644
--- a/win/jconfig.h.in
+++ b/win/jconfig.h.in
@@ -5,13 +5,14 @@
 #define LIBJPEG_TURBO_VERSION @VERSION@
 #cmakedefine C_ARITH_CODING_SUPPORTED
 #cmakedefine D_ARITH_CODING_SUPPORTED
+#cmakedefine MEM_SRCDST_SUPPORTED
 
 #define HAVE_PROTOTYPES
 #define HAVE_UNSIGNED_CHAR
 #define HAVE_UNSIGNED_SHORT
 /* #define void char */
 /* #define const */
-#undef CHAR_IS_UNSIGNED
+#undef __CHAR_UNSIGNED__
 #define HAVE_STDDEF_H
 #define HAVE_STDLIB_H
 #undef NEED_BSD_STRINGS
diff --git a/win/config.h.in b/win/jconfigint.h.in
similarity index 80%
rename from win/config.h.in
rename to win/jconfigint.h.in
index ff556c4..2131bf5 100644
--- a/win/config.h.in
+++ b/win/jconfigint.h.in
@@ -1,10 +1,10 @@
-#define VERSION "@VERSION@"

-#define BUILD "@BUILD@"

-#define PACKAGE_NAME "@CMAKE_PROJECT_NAME@"

+#define VERSION "@VERSION@"
+#define BUILD "@BUILD@"
+#define PACKAGE_NAME "@CMAKE_PROJECT_NAME@"
 
 #ifndef INLINE
 #if defined(__GNUC__)
-#define INLINE __attribute__((always_inline))
+#define INLINE inline __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #define INLINE __forceinline
 #else
diff --git a/win/jpeg62-memsrcdst.def b/win/jpeg62-memsrcdst.def
new file mode 100755
index 0000000..4511c8e
--- /dev/null
+++ b/win/jpeg62-memsrcdst.def
@@ -0,0 +1,104 @@
+EXPORTS
+	jcopy_block_row @ 1 ; 
+	jcopy_sample_rows @ 2 ; 
+	jdiv_round_up @ 3 ; 
+	jinit_1pass_quantizer @ 4 ; 
+	jinit_2pass_quantizer @ 5 ; 
+	jinit_c_coef_controller @ 6 ; 
+	jinit_c_main_controller @ 7 ; 
+	jinit_c_master_control @ 8 ; 
+	jinit_c_prep_controller @ 9 ; 
+	jinit_color_converter @ 10 ; 
+	jinit_color_deconverter @ 11 ; 
+	jinit_compress_master @ 12 ; 
+	jinit_d_coef_controller @ 13 ; 
+	jinit_d_main_controller @ 14 ; 
+	jinit_d_post_controller @ 15 ; 
+	jinit_downsampler @ 16 ; 
+	jinit_forward_dct @ 17 ; 
+	jinit_huff_decoder @ 18 ; 
+	jinit_huff_encoder @ 19 ; 
+	jinit_input_controller @ 20 ; 
+	jinit_inverse_dct @ 21 ; 
+	jinit_marker_reader @ 22 ; 
+	jinit_marker_writer @ 23 ; 
+	jinit_master_decompress @ 24 ; 
+	jinit_memory_mgr @ 25 ; 
+	jinit_merged_upsampler @ 26 ; 
+	jinit_phuff_decoder @ 27 ; 
+	jinit_phuff_encoder @ 28 ; 
+	jinit_upsampler @ 29 ; 
+	jpeg_CreateCompress @ 30 ; 
+	jpeg_CreateDecompress @ 31 ; 
+	jpeg_abort @ 32 ; 
+	jpeg_abort_compress @ 33 ; 
+	jpeg_abort_decompress @ 34 ; 
+	jpeg_add_quant_table @ 35 ; 
+	jpeg_alloc_huff_table @ 36 ; 
+	jpeg_alloc_quant_table @ 37 ; 
+	jpeg_calc_output_dimensions @ 38 ; 
+	jpeg_consume_input @ 39 ; 
+	jpeg_copy_critical_parameters @ 40 ; 
+	jpeg_default_colorspace @ 41 ; 
+	jpeg_destroy @ 42 ; 
+	jpeg_destroy_compress @ 43 ; 
+	jpeg_destroy_decompress @ 44 ; 
+	jpeg_fdct_float @ 45 ; 
+	jpeg_fdct_ifast @ 46 ; 
+	jpeg_fdct_islow @ 47 ; 
+	jpeg_fill_bit_buffer @ 48 ; 
+	jpeg_finish_compress @ 49 ; 
+	jpeg_finish_decompress @ 50 ; 
+	jpeg_finish_output @ 51 ; 
+	jpeg_free_large @ 52 ; 
+	jpeg_free_small @ 53 ; 
+	jpeg_gen_optimal_table @ 54 ; 
+	jpeg_get_large @ 55 ; 
+	jpeg_get_small @ 56 ; 
+	jpeg_has_multiple_scans @ 57 ; 
+	jpeg_huff_decode @ 58 ; 
+	jpeg_idct_1x1 @ 59 ; 
+	jpeg_idct_2x2 @ 60 ; 
+	jpeg_idct_4x4 @ 61 ; 
+	jpeg_idct_float @ 62 ; 
+	jpeg_idct_ifast @ 63 ; 
+	jpeg_idct_islow @ 64 ; 
+	jpeg_input_complete @ 65 ; 
+	jpeg_make_c_derived_tbl @ 66 ; 
+	jpeg_make_d_derived_tbl @ 67 ; 
+	jpeg_mem_available @ 68 ; 
+	jpeg_mem_init @ 69 ; 
+	jpeg_mem_term @ 70 ; 
+	jpeg_new_colormap @ 71 ; 
+	jpeg_open_backing_store @ 72 ; 
+	jpeg_quality_scaling @ 73 ; 
+	jpeg_read_coefficients @ 74 ; 
+	jpeg_read_header @ 75 ; 
+	jpeg_read_raw_data @ 76 ; 
+	jpeg_read_scanlines @ 77 ; 
+	jpeg_resync_to_restart @ 78 ; 
+	jpeg_save_markers @ 79 ; 
+	jpeg_set_colorspace @ 80 ; 
+	jpeg_set_defaults @ 81 ; 
+	jpeg_set_linear_quality @ 82 ; 
+	jpeg_set_marker_processor @ 83 ; 
+	jpeg_set_quality @ 84 ; 
+	jpeg_simple_progression @ 85 ; 
+	jpeg_start_compress @ 86 ; 
+	jpeg_start_decompress @ 87 ; 
+	jpeg_start_output @ 88 ; 
+	jpeg_std_error @ 89 ; 
+	jpeg_stdio_dest @ 90 ; 
+	jpeg_stdio_src @ 91 ; 
+	jpeg_suppress_tables @ 92 ; 
+	jpeg_write_coefficients @ 93 ; 
+	jpeg_write_m_byte @ 94 ; 
+	jpeg_write_m_header @ 95 ; 
+	jpeg_write_marker @ 96 ; 
+	jpeg_write_raw_data @ 97 ; 
+	jpeg_write_scanlines @ 98 ; 
+	jpeg_write_tables @ 99 ; 
+	jround_up @ 100 ; 
+	jzero_far @ 101 ; 
+	jpeg_mem_dest @ 102 ; 
+	jpeg_mem_src @ 103 ; 
diff --git a/win/jpeg7-memsrcdst.def b/win/jpeg7-memsrcdst.def
new file mode 100644
index 0000000..8c9f517
--- /dev/null
+++ b/win/jpeg7-memsrcdst.def
@@ -0,0 +1,106 @@
+EXPORTS
+	jcopy_block_row @ 1 ; 
+	jcopy_sample_rows @ 2 ; 
+	jdiv_round_up @ 3 ; 
+	jinit_1pass_quantizer @ 4 ; 
+	jinit_2pass_quantizer @ 5 ; 
+	jinit_c_coef_controller @ 6 ; 
+	jinit_c_main_controller @ 7 ; 
+	jinit_c_master_control @ 8 ; 
+	jinit_c_prep_controller @ 9 ; 
+	jinit_color_converter @ 10 ; 
+	jinit_color_deconverter @ 11 ; 
+	jinit_compress_master @ 12 ; 
+	jinit_d_coef_controller @ 13 ; 
+	jinit_d_main_controller @ 14 ; 
+	jinit_d_post_controller @ 15 ; 
+	jinit_downsampler @ 16 ; 
+	jinit_forward_dct @ 17 ; 
+	jinit_huff_decoder @ 18 ; 
+	jinit_huff_encoder @ 19 ; 
+	jinit_input_controller @ 20 ; 
+	jinit_inverse_dct @ 21 ; 
+	jinit_marker_reader @ 22 ; 
+	jinit_marker_writer @ 23 ; 
+	jinit_master_decompress @ 24 ; 
+	jinit_memory_mgr @ 25 ; 
+	jinit_merged_upsampler @ 26 ; 
+	jinit_phuff_decoder @ 27 ; 
+	jinit_phuff_encoder @ 28 ; 
+	jinit_upsampler @ 29 ; 
+	jpeg_CreateCompress @ 30 ; 
+	jpeg_CreateDecompress @ 31 ; 
+	jpeg_abort @ 32 ; 
+	jpeg_abort_compress @ 33 ; 
+	jpeg_abort_decompress @ 34 ; 
+	jpeg_add_quant_table @ 35 ; 
+	jpeg_alloc_huff_table @ 36 ; 
+	jpeg_alloc_quant_table @ 37 ; 
+	jpeg_calc_jpeg_dimensions @ 38 ; 
+	jpeg_calc_output_dimensions @ 39 ; 
+	jpeg_consume_input @ 40 ; 
+	jpeg_copy_critical_parameters @ 41 ; 
+	jpeg_default_colorspace @ 42 ; 
+	jpeg_default_qtables @ 43 ;
+	jpeg_destroy @ 44 ; 
+	jpeg_destroy_compress @ 45 ; 
+	jpeg_destroy_decompress @ 46 ; 
+	jpeg_fdct_float @ 47 ; 
+	jpeg_fdct_ifast @ 48 ; 
+	jpeg_fdct_islow @ 49 ; 
+	jpeg_fill_bit_buffer @ 50 ; 
+	jpeg_finish_compress @ 51 ; 
+	jpeg_finish_decompress @ 52 ; 
+	jpeg_finish_output @ 53 ; 
+	jpeg_free_large @ 54 ; 
+	jpeg_free_small @ 55 ; 
+	jpeg_gen_optimal_table @ 56 ; 
+	jpeg_get_large @ 57 ; 
+	jpeg_get_small @ 58 ; 
+	jpeg_has_multiple_scans @ 59 ; 
+	jpeg_huff_decode @ 60 ; 
+	jpeg_idct_1x1 @ 61 ; 
+	jpeg_idct_2x2 @ 62 ; 
+	jpeg_idct_4x4 @ 63 ; 
+	jpeg_idct_float @ 64 ; 
+	jpeg_idct_ifast @ 65 ; 
+	jpeg_idct_islow @ 66 ; 
+	jpeg_input_complete @ 67 ; 
+	jpeg_make_c_derived_tbl @ 68 ; 
+	jpeg_make_d_derived_tbl @ 69 ; 
+	jpeg_mem_available @ 70 ; 
+	jpeg_mem_init @ 71 ; 
+	jpeg_mem_term @ 72 ; 
+	jpeg_new_colormap @ 73 ; 
+	jpeg_open_backing_store @ 74 ; 
+	jpeg_quality_scaling @ 75 ; 
+	jpeg_read_coefficients @ 76 ; 
+	jpeg_read_header @ 77 ; 
+	jpeg_read_raw_data @ 78 ; 
+	jpeg_read_scanlines @ 79 ; 
+	jpeg_resync_to_restart @ 80 ; 
+	jpeg_save_markers @ 81 ; 
+	jpeg_set_colorspace @ 82 ; 
+	jpeg_set_defaults @ 83 ; 
+	jpeg_set_linear_quality @ 84 ; 
+	jpeg_set_marker_processor @ 85 ; 
+	jpeg_set_quality @ 86 ; 
+	jpeg_simple_progression @ 87 ; 
+	jpeg_start_compress @ 88 ; 
+	jpeg_start_decompress @ 89 ; 
+	jpeg_start_output @ 90 ; 
+	jpeg_std_error @ 91 ; 
+	jpeg_stdio_dest @ 92 ; 
+	jpeg_stdio_src @ 93 ; 
+	jpeg_suppress_tables @ 94 ; 
+	jpeg_write_coefficients @ 95 ; 
+	jpeg_write_m_byte @ 96 ; 
+	jpeg_write_m_header @ 97 ; 
+	jpeg_write_marker @ 98 ; 
+	jpeg_write_raw_data @ 99 ; 
+	jpeg_write_scanlines @ 100 ; 
+	jpeg_write_tables @ 101 ; 
+	jround_up @ 102 ; 
+	jzero_far @ 103 ; 
+	jpeg_mem_dest @ 104 ; 
+	jpeg_mem_src @ 105 ; 
diff --git a/wizard.txt b/wizard.txt
index 54170b2..ede721e 100644
--- a/wizard.txt
+++ b/wizard.txt
@@ -30,7 +30,7 @@
 You can substitute a different set of quantization values by using the
 -qtables switch:
 
-	-qtables file	Use the quantization tables given in the named file.
+        -qtables file   Use the quantization tables given in the named file.
 
 The specified file should be a text file containing decimal quantization
 values.  The file should contain one to four tables, each of 64 elements.
@@ -43,27 +43,27 @@
 with '#' and extends to the end of the line.  Here is an example file that
 duplicates the default quantization tables:
 
-	# Quantization tables given in JPEG spec, section K.1
+        # Quantization tables given in JPEG spec, section K.1
 
-	# This is table 0 (the luminance table):
-	  16  11  10  16  24  40  51  61
-	  12  12  14  19  26  58  60  55
-	  14  13  16  24  40  57  69  56
-	  14  17  22  29  51  87  80  62
-	  18  22  37  56  68 109 103  77
-	  24  35  55  64  81 104 113  92
-	  49  64  78  87 103 121 120 101
-	  72  92  95  98 112 100 103  99
+        # This is table 0 (the luminance table):
+          16  11  10  16  24  40  51  61
+          12  12  14  19  26  58  60  55
+          14  13  16  24  40  57  69  56
+          14  17  22  29  51  87  80  62
+          18  22  37  56  68 109 103  77
+          24  35  55  64  81 104 113  92
+          49  64  78  87 103 121 120 101
+          72  92  95  98 112 100 103  99
 
-	# This is table 1 (the chrominance table):
-	  17  18  24  47  99  99  99  99
-	  18  21  26  66  99  99  99  99
-	  24  26  56  99  99  99  99  99
-	  47  66  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
+        # This is table 1 (the chrominance table):
+          17  18  24  47  99  99  99  99
+          18  21  26  66  99  99  99  99
+          24  26  56  99  99  99  99  99
+          47  66  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
 
 If the -qtables switch is used without -quality, then the specified tables
 are used exactly as-is.  If both -qtables and -quality are used, then the
@@ -75,8 +75,8 @@
 table 1 for chrominance components.  To override this choice, use the -qslots
 switch:
 
-	-qslots N[,...]		Select which quantization table to use for
-				each color component.
+        -qslots N[,...]         Select which quantization table to use for
+                                each color component.
 
 The -qslots switch specifies a quantization table number for each color
 component, in the order in which the components appear in the JPEG SOF marker.
@@ -93,8 +93,8 @@
 compressing YCbCr data, and no downsampling for all other color spaces.
 You can override this default with the -sample switch:
 
-	-sample HxV[,...]	Set JPEG sampling factors for each color
-				component.
+        -sample HxV[,...]       Set JPEG sampling factors for each color
+                                component.
 
 The -sample switch specifies the JPEG sampling factors for each color
 component, in the order in which they appear in the JPEG SOF marker.
@@ -119,7 +119,7 @@
 files or progressive JPEG files with custom progression parameters by using
 the -scans switch:
 
-	-scans file	Use the scan sequence given in the named file.
+        -scans file     Use the scan sequence given in the named file.
 
 The specified file should be a text file containing a "scan script".
 The script specifies the contents and ordering of the scans to be emitted.
@@ -138,10 +138,10 @@
 positional indexes.)
 
 The progression parameters for each scan are:
-	Ss	Zigzag index of first coefficient included in scan
-	Se	Zigzag index of last coefficient included in scan
-	Ah	Zero for first scan of a coefficient, else Al of prior scan
-	Al	Successive approximation low bit position for scan
+        Ss      Zigzag index of first coefficient included in scan
+        Se      Zigzag index of last coefficient included in scan
+        Ah      Zero for first scan of a coefficient, else Al of prior scan
+        Al      Successive approximation low bit position for scan
 If the progression parameters are omitted, the values 0,63,0,0 are used,
 producing a sequential JPEG file.  cjpeg automatically determines whether
 the script represents a progressive or sequential file, by observing whether
@@ -156,52 +156,52 @@
 legibility, commas or dashes can be placed between values.  (Actually, any
 single punctuation character other than ':' or ';' can be inserted.)  For
 example, the following two scan definitions are equivalent:
-	0 1 2: 0 63 0 0;
-	0,1,2 : 0-63, 0,0 ;
+        0 1 2: 0 63 0 0;
+        0,1,2 : 0-63, 0,0 ;
 
 Here is an example of a scan script that generates a partially interleaved
 sequential JPEG file:
 
-	0;			# Y only in first scan
-	1 2;			# Cb and Cr in second scan
+        0;                      # Y only in first scan
+        1 2;                    # Cb and Cr in second scan
 
 Here is an example of a progressive scan script using only spectral selection
 (no successive approximation):
 
-	# Interleaved DC scan for Y,Cb,Cr:
-	0,1,2: 0-0,   0, 0 ;
-	# AC scans:
-	0:     1-2,   0, 0 ;	# First two Y AC coefficients
-	0:     3-5,   0, 0 ;	# Three more
-	1:     1-63,  0, 0 ;	# All AC coefficients for Cb
-	2:     1-63,  0, 0 ;	# All AC coefficients for Cr
-	0:     6-9,   0, 0 ;	# More Y coefficients
-	0:     10-63, 0, 0 ;	# Remaining Y coefficients
+        # Interleaved DC scan for Y,Cb,Cr:
+        0,1,2: 0-0,   0, 0 ;
+        # AC scans:
+        0:     1-2,   0, 0 ;    # First two Y AC coefficients
+        0:     3-5,   0, 0 ;    # Three more
+        1:     1-63,  0, 0 ;    # All AC coefficients for Cb
+        2:     1-63,  0, 0 ;    # All AC coefficients for Cr
+        0:     6-9,   0, 0 ;    # More Y coefficients
+        0:     10-63, 0, 0 ;    # Remaining Y coefficients
 
 Here is an example of a successive-approximation script.  This is equivalent
 to the default script used by "cjpeg -progressive" for YCbCr images:
 
-	# Initial DC scan for Y,Cb,Cr (lowest bit not sent)
-	0,1,2: 0-0,   0, 1 ;
-	# First AC scan: send first 5 Y AC coefficients, minus 2 lowest bits:
-	0:     1-5,   0, 2 ;
-	# Send all Cr,Cb AC coefficients, minus lowest bit:
-	# (chroma data is usually too small to be worth subdividing further;
-	#  but note we send Cr first since eye is least sensitive to Cb)
-	2:     1-63,  0, 1 ;
-	1:     1-63,  0, 1 ;
-	# Send remaining Y AC coefficients, minus 2 lowest bits:
-	0:     6-63,  0, 2 ;
-	# Send next-to-lowest bit of all Y AC coefficients:
-	0:     1-63,  2, 1 ;
-	# At this point we've sent all but the lowest bit of all coefficients.
-	# Send lowest bit of DC coefficients
-	0,1,2: 0-0,   1, 0 ;
-	# Send lowest bit of AC coefficients
-	2:     1-63,  1, 0 ;
-	1:     1-63,  1, 0 ;
-	# Y AC lowest bit scan is last; it's usually the largest scan
-	0:     1-63,  1, 0 ;
+        # Initial DC scan for Y,Cb,Cr (lowest bit not sent)
+        0,1,2: 0-0,   0, 1 ;
+        # First AC scan: send first 5 Y AC coefficients, minus 2 lowest bits:
+        0:     1-5,   0, 2 ;
+        # Send all Cr,Cb AC coefficients, minus lowest bit:
+        # (chroma data is usually too small to be worth subdividing further;
+        #  but note we send Cr first since eye is least sensitive to Cb)
+        2:     1-63,  0, 1 ;
+        1:     1-63,  0, 1 ;
+        # Send remaining Y AC coefficients, minus 2 lowest bits:
+        0:     6-63,  0, 2 ;
+        # Send next-to-lowest bit of all Y AC coefficients:
+        0:     1-63,  2, 1 ;
+        # At this point we've sent all but the lowest bit of all coefficients.
+        # Send lowest bit of DC coefficients
+        0,1,2: 0-0,   1, 0 ;
+        # Send lowest bit of AC coefficients
+        2:     1-63,  1, 0 ;
+        1:     1-63,  1, 0 ;
+        # Y AC lowest bit scan is last; it's usually the largest scan
+        0:     1-63,  1, 0 ;
 
 It may be worth pointing out that this script is tuned for quality settings
 of around 50 to 75.  For lower quality settings, you'd probably want to use
diff --git a/wrbmp.c b/wrbmp.c
index 3283b0f..b8e213b 100644
--- a/wrbmp.c
+++ b/wrbmp.c
@@ -17,7 +17,7 @@
  * This code contributed by James Arthur Boucher.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef BMP_SUPPORTED
 
@@ -42,15 +42,15 @@
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
-  boolean is_os2;		/* saves the OS2 format request flag */
+  boolean is_os2;               /* saves the OS2 format request flag */
 
-  jvirt_sarray_ptr whole_image;	/* needed to reverse row order */
-  JDIMENSION data_width;	/* JSAMPLEs per row */
-  JDIMENSION row_width;		/* physical width of one row in the BMP file */
-  int pad_bytes;		/* number of padding bytes needed per row */
-  JDIMENSION cur_output_row;	/* next row# to write to virtual array */
+  jvirt_sarray_ptr whole_image; /* needed to reverse row order */
+  JDIMENSION data_width;        /* JSAMPLEs per row */
+  JDIMENSION row_width;         /* physical width of one row in the BMP file */
+  int pad_bytes;                /* number of padding bytes needed per row */
+  JDIMENSION cur_output_row;    /* next row# to write to virtual array */
 } bmp_dest_struct;
 
 typedef bmp_dest_struct * bmp_dest_ptr;
@@ -58,8 +58,8 @@
 
 /* Forward declarations */
 LOCAL(void) write_colormap
-	JPP((j_decompress_ptr cinfo, bmp_dest_ptr dest,
-	     int map_colors, int map_entry_size));
+        JPP((j_decompress_ptr cinfo, bmp_dest_ptr dest,
+             int map_colors, int map_entry_size));
 
 
 /*
@@ -69,7 +69,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 /* This version is for writing 24-bit pixels */
 {
   bmp_dest_ptr dest = (bmp_dest_ptr) dinfo;
@@ -90,7 +90,7 @@
   inptr = dest->pub.buffer[0];
   outptr = image_ptr[0];
   for (col = cinfo->output_width; col > 0; col--) {
-    outptr[2] = *inptr++;	/* can omit GETJSAMPLE() safely */
+    outptr[2] = *inptr++;       /* can omit GETJSAMPLE() safely */
     outptr[1] = *inptr++;
     outptr[0] = *inptr++;
     outptr += 3;
@@ -104,7 +104,7 @@
 
 METHODDEF(void)
 put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-	       JDIMENSION rows_supplied)
+               JDIMENSION rows_supplied)
 /* This version is for grayscale OR quantized color output */
 {
   bmp_dest_ptr dest = (bmp_dest_ptr) dinfo;
@@ -123,7 +123,7 @@
   inptr = dest->pub.buffer[0];
   outptr = image_ptr[0];
   for (col = cinfo->output_width; col > 0; col--) {
-    *outptr++ = *inptr++;	/* can omit GETJSAMPLE() safely */
+    *outptr++ = *inptr++;       /* can omit GETJSAMPLE() safely */
   }
 
   /* Zero out the pad bytes. */
@@ -160,13 +160,13 @@
   char bmpfileheader[14];
   char bmpinfoheader[40];
 #define PUT_2B(array,offset,value)  \
-	(array[offset] = (char) ((value) & 0xFF), \
-	 array[offset+1] = (char) (((value) >> 8) & 0xFF))
+        (array[offset] = (char) ((value) & 0xFF), \
+         array[offset+1] = (char) (((value) >> 8) & 0xFF))
 #define PUT_4B(array,offset,value)  \
-	(array[offset] = (char) ((value) & 0xFF), \
-	 array[offset+1] = (char) (((value) >> 8) & 0xFF), \
-	 array[offset+2] = (char) (((value) >> 16) & 0xFF), \
-	 array[offset+3] = (char) (((value) >> 24) & 0xFF))
+        (array[offset] = (char) ((value) & 0xFF), \
+         array[offset+1] = (char) (((value) >> 8) & 0xFF), \
+         array[offset+2] = (char) (((value) >> 16) & 0xFF), \
+         array[offset+3] = (char) (((value) >> 24) & 0xFF))
   INT32 headersize, bfSize;
   int bits_per_pixel, cmap_entries;
 
@@ -189,23 +189,23 @@
   /* File size */
   headersize = 14 + 40 + cmap_entries * 4; /* Header and colormap */
   bfSize = headersize + (INT32) dest->row_width * (INT32) cinfo->output_height;
-  
+
   /* Set unused fields of header to 0 */
   MEMZERO(bmpfileheader, SIZEOF(bmpfileheader));
   MEMZERO(bmpinfoheader, SIZEOF(bmpinfoheader));
 
   /* Fill the file header */
-  bmpfileheader[0] = 0x42;	/* first 2 bytes are ASCII 'B', 'M' */
+  bmpfileheader[0] = 0x42;      /* first 2 bytes are ASCII 'B', 'M' */
   bmpfileheader[1] = 0x4D;
   PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */
   /* we leave bfReserved1 & bfReserved2 = 0 */
   PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */
 
   /* Fill the info header (Microsoft calls this a BITMAPINFOHEADER) */
-  PUT_2B(bmpinfoheader, 0, 40);	/* biSize */
+  PUT_2B(bmpinfoheader, 0, 40); /* biSize */
   PUT_4B(bmpinfoheader, 4, cinfo->output_width); /* biWidth */
   PUT_4B(bmpinfoheader, 8, cinfo->output_height); /* biHeight */
-  PUT_2B(bmpinfoheader, 12, 1);	/* biPlanes - must be 1 */
+  PUT_2B(bmpinfoheader, 12, 1); /* biPlanes - must be 1 */
   PUT_2B(bmpinfoheader, 14, bits_per_pixel); /* biBitCount */
   /* we leave biCompression = 0, for none */
   /* we leave biSizeImage = 0; this is correct for uncompressed data */
@@ -254,23 +254,23 @@
   /* File size */
   headersize = 14 + 12 + cmap_entries * 3; /* Header and colormap */
   bfSize = headersize + (INT32) dest->row_width * (INT32) cinfo->output_height;
-  
+
   /* Set unused fields of header to 0 */
   MEMZERO(bmpfileheader, SIZEOF(bmpfileheader));
   MEMZERO(bmpcoreheader, SIZEOF(bmpcoreheader));
 
   /* Fill the file header */
-  bmpfileheader[0] = 0x42;	/* first 2 bytes are ASCII 'B', 'M' */
+  bmpfileheader[0] = 0x42;      /* first 2 bytes are ASCII 'B', 'M' */
   bmpfileheader[1] = 0x4D;
   PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */
   /* we leave bfReserved1 & bfReserved2 = 0 */
   PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */
 
   /* Fill the info header (Microsoft calls this a BITMAPCOREHEADER) */
-  PUT_2B(bmpcoreheader, 0, 12);	/* bcSize */
+  PUT_2B(bmpcoreheader, 0, 12); /* bcSize */
   PUT_2B(bmpcoreheader, 4, cinfo->output_width); /* bcWidth */
   PUT_2B(bmpcoreheader, 6, cinfo->output_height); /* bcHeight */
-  PUT_2B(bmpcoreheader, 8, 1);	/* bcPlanes - must be 1 */
+  PUT_2B(bmpcoreheader, 8, 1);  /* bcPlanes - must be 1 */
   PUT_2B(bmpcoreheader, 10, bits_per_pixel); /* bcBitCount */
 
   if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14)
@@ -290,7 +290,7 @@
 
 LOCAL(void)
 write_colormap (j_decompress_ptr cinfo, bmp_dest_ptr dest,
-		int map_colors, int map_entry_size)
+                int map_colors, int map_entry_size)
 {
   JSAMPARRAY colormap = cinfo->colormap;
   int num_colors = cinfo->actual_number_of_colors;
@@ -301,20 +301,20 @@
     if (cinfo->out_color_components == 3) {
       /* Normal case with RGB colormap */
       for (i = 0; i < num_colors; i++) {
-	putc(GETJSAMPLE(colormap[2][i]), outfile);
-	putc(GETJSAMPLE(colormap[1][i]), outfile);
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	if (map_entry_size == 4)
-	  putc(0, outfile);
+        putc(GETJSAMPLE(colormap[2][i]), outfile);
+        putc(GETJSAMPLE(colormap[1][i]), outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        if (map_entry_size == 4)
+          putc(0, outfile);
       }
     } else {
       /* Grayscale colormap (only happens with grayscale quantization) */
       for (i = 0; i < num_colors; i++) {
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	if (map_entry_size == 4)
-	  putc(0, outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        if (map_entry_size == 4)
+          putc(0, outfile);
       }
     }
   } else {
@@ -324,10 +324,10 @@
       putc(i, outfile);
       putc(i, outfile);
       if (map_entry_size == 4)
-	putc(0, outfile);
+        putc(0, outfile);
     }
   }
-  /* Pad colormap with zeros to ensure specified number of colormap entries */ 
+  /* Pad colormap with zeros to ensure specified number of colormap entries */
   if (i > map_colors)
     ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, i);
   for (; i < map_colors; i++) {
@@ -395,7 +395,7 @@
   /* Create module interface object, fill in method pointers */
   dest = (bmp_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(bmp_dest_struct));
+                                  SIZEOF(bmp_dest_struct));
   dest->pub.start_output = start_output_bmp;
   dest->pub.finish_output = finish_output_bmp;
   dest->is_os2 = is_os2;
diff --git a/wrgif.c b/wrgif.c
index 5fe8328..193f7b5 100644
--- a/wrgif.c
+++ b/wrgif.c
@@ -37,7 +37,7 @@
  *    CompuServe Incorporated."
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef GIF_SUPPORTED
 
@@ -45,31 +45,31 @@
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
-  j_decompress_ptr cinfo;	/* back link saves passing separate parm */
+  j_decompress_ptr cinfo;       /* back link saves passing separate parm */
 
   /* State for packing variable-width codes into a bitstream */
-  int n_bits;			/* current number of bits/code */
-  int maxcode;			/* maximum code, given n_bits */
-  INT32 cur_accum;		/* holds bits not yet output */
-  int cur_bits;			/* # of bits in cur_accum */
+  int n_bits;                   /* current number of bits/code */
+  int maxcode;                  /* maximum code, given n_bits */
+  INT32 cur_accum;              /* holds bits not yet output */
+  int cur_bits;                 /* # of bits in cur_accum */
 
   /* State for GIF code assignment */
-  int ClearCode;		/* clear code (doesn't change) */
-  int EOFCode;			/* EOF code (ditto) */
-  int code_counter;		/* counts output symbols */
+  int ClearCode;                /* clear code (doesn't change) */
+  int EOFCode;                  /* EOF code (ditto) */
+  int code_counter;             /* counts output symbols */
 
   /* GIF data packet construction buffer */
-  int bytesinpkt;		/* # of bytes in current packet */
-  char packetbuf[256];		/* workspace for accumulating packet */
+  int bytesinpkt;               /* # of bytes in current packet */
+  char packetbuf[256];          /* workspace for accumulating packet */
 
 } gif_dest_struct;
 
 typedef gif_dest_struct * gif_dest_ptr;
 
 /* Largest value that will fit in N bits */
-#define MAXCODE(n_bits)	((1 << (n_bits)) - 1)
+#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
 
 
 /*
@@ -81,10 +81,10 @@
 flush_packet (gif_dest_ptr dinfo)
 /* flush any accumulated data */
 {
-  if (dinfo->bytesinpkt > 0) {	/* never write zero-length packet */
+  if (dinfo->bytesinpkt > 0) {  /* never write zero-length packet */
     dinfo->packetbuf[0] = (char) dinfo->bytesinpkt++;
     if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt)
-	!= (size_t) dinfo->bytesinpkt)
+        != (size_t) dinfo->bytesinpkt)
       ERREXIT(dinfo->cinfo, JERR_FILE_WRITE);
     dinfo->bytesinpkt = 0;
   }
@@ -93,10 +93,10 @@
 
 /* Add a character to current packet; flush to disk if necessary */
 #define CHAR_OUT(dinfo,c)  \
-	{ (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c);  \
-	    if ((dinfo)->bytesinpkt >= 255)  \
-	      flush_packet(dinfo);  \
-	}
+        { (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c);  \
+            if ((dinfo)->bytesinpkt >= 255)  \
+              flush_packet(dinfo);  \
+        }
 
 
 /* Routine to convert variable-width codes into a byte stream */
@@ -173,7 +173,7 @@
     dinfo->code_counter++;
   } else {
     output(dinfo, dinfo->ClearCode);
-    dinfo->code_counter = dinfo->ClearCode + 2;	/* reset the counter */
+    dinfo->code_counter = dinfo->ClearCode + 2; /* reset the counter */
   }
 }
 
@@ -218,7 +218,7 @@
 LOCAL(void)
 emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap)
 /* Output the GIF file header, including color map */
-/* If colormap==NULL, synthesize a gray-scale colormap */
+/* If colormap==NULL, synthesize a grayscale colormap */
 {
   int BitsPerPixel, ColorMapSize, InitCodeSize, FlagByte;
   int cshift = dinfo->cinfo->data_precision - 8;
@@ -248,9 +248,9 @@
   /* Write the Logical Screen Descriptor */
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_width);
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_height);
-  FlagByte = 0x80;		/* Yes, there is a global color table */
+  FlagByte = 0x80;              /* Yes, there is a global color table */
   FlagByte |= (BitsPerPixel-1) << 4; /* color resolution */
-  FlagByte |= (BitsPerPixel-1);	/* size of global color table */
+  FlagByte |= (BitsPerPixel-1); /* size of global color table */
   putc(FlagByte, dinfo->pub.output_file);
   putc(0, dinfo->pub.output_file); /* Background color index */
   putc(0, dinfo->pub.output_file); /* Reserved (aspect ratio in GIF89) */
@@ -260,18 +260,18 @@
   for (i=0; i < ColorMapSize; i++) {
     if (i < num_colors) {
       if (colormap != NULL) {
-	if (dinfo->cinfo->out_color_space == JCS_RGB) {
-	  /* Normal case: RGB color map */
-	  putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file);
-	  putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file);
-	  putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file);
-	} else {
-	  /* Grayscale "color map": possible if quantizing grayscale image */
-	  put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift);
-	}
+        if (dinfo->cinfo->out_color_space == JCS_RGB) {
+          /* Normal case: RGB color map */
+          putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file);
+          putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file);
+          putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file);
+        } else {
+          /* Grayscale "color map": possible if quantizing grayscale image */
+          put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift);
+        }
       } else {
-	/* Create a gray-scale map of num_colors values, range 0..255 */
-	put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1));
+        /* Create a grayscale map of num_colors values, range 0..255 */
+        put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1));
       }
     } else {
       /* fill out the map to a power of 2 */
@@ -280,7 +280,7 @@
   }
   /* Write image separator and Image Descriptor */
   putc(',', dinfo->pub.output_file); /* separator */
-  put_word(dinfo, 0);		/* left/top offset */
+  put_word(dinfo, 0);           /* left/top offset */
   put_word(dinfo, 0);
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); /* image size */
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_height);
@@ -317,7 +317,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 {
   gif_dest_ptr dest = (gif_dest_ptr) dinfo;
   register JSAMPROW ptr;
@@ -364,8 +364,8 @@
   /* Create module interface object, fill in method pointers */
   dest = (gif_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(gif_dest_struct));
-  dest->cinfo = cinfo;		/* make back link for subroutines */
+                                  SIZEOF(gif_dest_struct));
+  dest->cinfo = cinfo;          /* make back link for subroutines */
   dest->pub.start_output = start_output_gif;
   dest->pub.put_pixel_rows = put_pixel_rows;
   dest->pub.finish_output = finish_output_gif;
diff --git a/wrjpgcom.c b/wrjpgcom.c
index 8c04b05..a1b2fb1 100644
--- a/wrjpgcom.c
+++ b/wrjpgcom.c
@@ -1,8 +1,10 @@
 /*
  * wrjpgcom.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2014, D. R. Commander
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a very simple stand-alone application that inserts
@@ -11,48 +13,48 @@
  * JPEG markers.
  */
 
-#define JPEG_CJPEG_DJPEG	/* to get the command-line config symbols */
-#include "jinclude.h"		/* get auto-config symbols, <stdio.h> */
+#define JPEG_CJPEG_DJPEG        /* to get the command-line config symbols */
+#include "jinclude.h"           /* get auto-config symbols, <stdio.h> */
 
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc() */
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc() */
 extern void * malloc ();
 #endif
-#include <ctype.h>		/* to declare isupper(), tolower() */
+#include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef USE_SETMODE
-#include <fcntl.h>		/* to declare setmode()'s parameter macros */
+#include <fcntl.h>              /* to declare setmode()'s parameter macros */
 /* If you have setmode() but not <io.h>, just delete this line: */
-#include <io.h>			/* to declare setmode() */
+#include <io.h>                 /* to declare setmode() */
 #endif
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#define WRITE_BINARY	"w"
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
+#define WRITE_BINARY    "w"
 #else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#define WRITE_BINARY	"wb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#define WRITE_BINARY	"wb"
+#ifdef VMS                      /* VMS is very nonstandard */
+#define READ_BINARY     "rb", "ctx=stm"
+#define WRITE_BINARY    "wb", "ctx=stm"
+#else                           /* standard ANSI-compliant case */
+#define READ_BINARY     "rb"
+#define WRITE_BINARY    "wb"
 #endif
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 #ifndef EXIT_SUCCESS
 #ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
+#define EXIT_SUCCESS  1         /* VMS is very nonstandard */
 #else
 #define EXIT_SUCCESS  0
 #endif
@@ -63,7 +65,7 @@
  */
 
 #ifndef MAX_COM_LENGTH
-#define MAX_COM_LENGTH 65000L	/* must be <= 65533 in any case */
+#define MAX_COM_LENGTH 65000L   /* must be <= 65533 in any case */
 #endif
 
 
@@ -72,12 +74,12 @@
  * To reuse this code in another application, you might need to change these.
  */
 
-static FILE * infile;		/* input JPEG file */
+static FILE * infile;           /* input JPEG file */
 
 /* Return next input byte, or EOF if no more */
 #define NEXTBYTE()  getc(infile)
 
-static FILE * outfile;		/* output JPEG file */
+static FILE * outfile;          /* output JPEG file */
 
 /* Emit an output byte */
 #define PUTBYTE(x)  putc((x), outfile)
@@ -154,11 +156,11 @@
  * in this program.  (See jdmarker.c for a more complete list.)
  */
 
-#define M_SOF0  0xC0		/* Start Of Frame N */
-#define M_SOF1  0xC1		/* N indicates which compression process */
-#define M_SOF2  0xC2		/* Only SOF0-SOF2 are now in common use */
+#define M_SOF0  0xC0            /* Start Of Frame N */
+#define M_SOF1  0xC1            /* N indicates which compression process */
+#define M_SOF2  0xC2            /* Only SOF0-SOF2 are now in common use */
 #define M_SOF3  0xC3
-#define M_SOF5  0xC5		/* NB: codes C4 and CC are NOT SOF markers */
+#define M_SOF5  0xC5            /* NB: codes C4 and CC are NOT SOF markers */
 #define M_SOF6  0xC6
 #define M_SOF7  0xC7
 #define M_SOF9  0xC9
@@ -167,10 +169,10 @@
 #define M_SOF13 0xCD
 #define M_SOF14 0xCE
 #define M_SOF15 0xCF
-#define M_SOI   0xD8		/* Start Of Image (beginning of datastream) */
-#define M_EOI   0xD9		/* End Of Image (end of datastream) */
-#define M_SOS   0xDA		/* Start Of Scan (begins compressed data) */
-#define M_COM   0xFE		/* COMment */
+#define M_SOI   0xD8            /* Start Of Image (beginning of datastream) */
+#define M_EOI   0xD9            /* End Of Image (end of datastream) */
+#define M_SOS   0xDA            /* Start Of Scan (begins compressed data) */
+#define M_COM   0xFE            /* COMment */
 
 
 /*
@@ -302,40 +304,40 @@
       /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be,
        * treated as SOFn.  C4 in particular is actually DHT.
        */
-    case M_SOF0:		/* Baseline */
-    case M_SOF1:		/* Extended sequential, Huffman */
-    case M_SOF2:		/* Progressive, Huffman */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_SOF9:		/* Extended sequential, arithmetic */
-    case M_SOF10:		/* Progressive, arithmetic */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
+    case M_SOF2:                /* Progressive, Huffman */
+    case M_SOF3:                /* Lossless, Huffman */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_SOF9:                /* Extended sequential, arithmetic */
+    case M_SOF10:               /* Progressive, arithmetic */
+    case M_SOF11:               /* Lossless, arithmetic */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
       return marker;
 
-    case M_SOS:			/* should not see compressed data before SOF */
+    case M_SOS:                 /* should not see compressed data before SOF */
       ERREXIT("SOS without prior SOFn");
       break;
 
-    case M_EOI:			/* in case it's a tables-only JPEG stream */
+    case M_EOI:                 /* in case it's a tables-only JPEG stream */
       return marker;
 
-    case M_COM:			/* Existing COM: conditionally discard */
+    case M_COM:                 /* Existing COM: conditionally discard */
       if (keep_COM) {
-	write_marker(marker);
-	copy_variable();
+        write_marker(marker);
+        copy_variable();
       } else {
-	skip_variable();
+        skip_variable();
       }
       break;
 
-    default:			/* Anything else just gets copied */
+    default:                    /* Anything else just gets copied */
       write_marker(marker);
-      copy_variable();		/* we assume it has a parameter count... */
+      copy_variable();          /* we assume it has a parameter count... */
       break;
     }
   } /* end loop */
@@ -344,7 +346,7 @@
 
 /* Command line parsing code */
 
-static const char * progname;	/* program name for error messages */
+static const char * progname;   /* program name for error messages */
 
 
 static void
@@ -370,7 +372,7 @@
   fprintf(stderr, "If you do not give either -comment or -cfile on the command line,\n");
   fprintf(stderr, "then the comment text is read from standard input.\n");
   fprintf(stderr, "It can be multiple lines, up to %u characters total.\n",
-	  (unsigned int) MAX_COM_LENGTH);
+          (unsigned int) MAX_COM_LENGTH);
 #ifndef TWO_FILE_COMMANDLINE
   fprintf(stderr, "You must specify an input JPEG file name when supplying\n");
   fprintf(stderr, "comment text from standard input.\n");
@@ -391,17 +393,17 @@
 
   while ((ca = *arg++) != '\0') {
     if ((ck = *keyword++) == '\0')
-      return 0;			/* arg longer than keyword, no good */
-    if (isupper(ca))		/* force arg to lcase (assume ck is already) */
+      return 0;                 /* arg longer than keyword, no good */
+    if (isupper(ca))            /* force arg to lcase (assume ck is already) */
       ca = tolower(ca);
     if (ca != ck)
-      return 0;			/* no good */
-    nmatched++;			/* count matched characters */
+      return 0;                 /* no good */
+    nmatched++;                 /* count matched characters */
   }
   /* reached end of argument; fail if it's too short for unique abbrev */
   if (nmatched < minchars)
     return 0;
-  return 1;			/* A-OK */
+  return 1;                     /* A-OK */
 }
 
 
@@ -427,21 +429,21 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "wrjpgcom";	/* in case C library doesn't provide it */
+    progname = "wrjpgcom";      /* in case C library doesn't provide it */
 
   /* Parse switches, if any */
   for (argn = 1; argn < argc; argn++) {
     arg = argv[argn];
     if (arg[0] != '-')
-      break;			/* not switch, must be file name */
-    arg++;			/* advance over '-' */
+      break;                    /* not switch, must be file name */
+    arg++;                      /* advance over '-' */
     if (keymatch(arg, "replace", 1)) {
       keep_COM = 0;
     } else if (keymatch(arg, "cfile", 2)) {
       if (++argn >= argc) usage();
       if ((comment_file = fopen(argv[argn], "r")) == NULL) {
-	fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
-	exit(EXIT_FAILURE);
+        fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
+        exit(EXIT_FAILURE);
       }
     } else if (keymatch(arg, "comment", 1)) {
       if (++argn >= argc) usage();
@@ -450,21 +452,36 @@
        * under MS-DOG and must parse out the quoted string ourselves.  Sigh.
        */
       if (comment_arg[0] == '"') {
-	comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH);
-	if (comment_arg == NULL)
-	  ERREXIT("Insufficient memory");
-	strcpy(comment_arg, argv[argn]+1);
-	for (;;) {
-	  comment_length = (unsigned int) strlen(comment_arg);
-	  if (comment_length > 0 && comment_arg[comment_length-1] == '"') {
-	    comment_arg[comment_length-1] = '\0'; /* zap terminating quote */
-	    break;
-	  }
-	  if (++argn >= argc)
-	    ERREXIT("Missing ending quote mark");
-	  strcat(comment_arg, " ");
-	  strcat(comment_arg, argv[argn]);
-	}
+        comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH);
+        if (comment_arg == NULL)
+          ERREXIT("Insufficient memory");
+        if (strlen(argv[argn]) + 2 >= (size_t) MAX_COM_LENGTH) {
+          fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                  (unsigned int) MAX_COM_LENGTH);
+          exit(EXIT_FAILURE);
+        }
+        strcpy(comment_arg, argv[argn]+1);
+        for (;;) {
+          comment_length = (unsigned int) strlen(comment_arg);
+          if (comment_length > 0 && comment_arg[comment_length-1] == '"') {
+            comment_arg[comment_length-1] = '\0'; /* zap terminating quote */
+            break;
+          }
+          if (++argn >= argc)
+            ERREXIT("Missing ending quote mark");
+          if (strlen(comment_arg) + strlen(argv[argn]) + 2 >=
+              (size_t) MAX_COM_LENGTH) {
+            fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                    (unsigned int) MAX_COM_LENGTH);
+            exit(EXIT_FAILURE);
+          }
+          strcat(comment_arg, " ");
+          strcat(comment_arg, argv[argn]);
+        }
+      } else if (strlen(argv[argn]) >= (size_t) MAX_COM_LENGTH) {
+        fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                (unsigned int) MAX_COM_LENGTH);
+        exit(EXIT_FAILURE);
       }
       comment_length = (unsigned int) strlen(comment_arg);
     } else
@@ -488,10 +505,10 @@
     }
   } else {
     /* default input file is stdin */
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
     setmode(fileno(stdin), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
     if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
       fprintf(stderr, "%s: can't open stdin\n", progname);
       exit(EXIT_FAILURE);
@@ -506,7 +523,7 @@
   /* Must have explicit output file name */
   if (argn != argc-2) {
     fprintf(stderr, "%s: must name one input and one output file\n",
-	    progname);
+            progname);
     usage();
   }
   if ((outfile = fopen(argv[argn+1], WRITE_BINARY)) == NULL) {
@@ -520,10 +537,10 @@
     usage();
   }
   /* default output file is stdout */
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
   setmode(fileno(stdout), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
   if ((outfile = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) {
     fprintf(stderr, "%s: can't open stdout\n", progname);
     exit(EXIT_FAILURE);
@@ -545,9 +562,9 @@
     src_file = (comment_file != NULL ? comment_file : stdin);
     while ((c = getc(src_file)) != EOF) {
       if (comment_length >= (unsigned int) MAX_COM_LENGTH) {
-	fprintf(stderr, "Comment text may not exceed %u bytes\n",
-		(unsigned int) MAX_COM_LENGTH);
-	exit(EXIT_FAILURE);
+        fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                (unsigned int) MAX_COM_LENGTH);
+        exit(EXIT_FAILURE);
       }
       comment_arg[comment_length++] = (char) c;
     }
@@ -579,5 +596,5 @@
 
   /* All done. */
   exit(EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/wrppm.c b/wrppm.c
index 68e0c85..6b7ebf4 100644
--- a/wrppm.c
+++ b/wrppm.c
@@ -16,7 +16,7 @@
  * an ordinary stdio stream.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef PPM_SUPPORTED
 
@@ -42,11 +42,11 @@
 #define PPM_MAXVAL 255
 #else
 /* The word-per-sample format always puts the MSB first. */
-#define PUTPPMSAMPLE(ptr,v)			\
-	{ register int val_ = v;		\
-	  *ptr++ = (char) ((val_ >> 8) & 0xFF);	\
-	  *ptr++ = (char) (val_ & 0xFF);	\
-	}
+#define PUTPPMSAMPLE(ptr,v)                     \
+        { register int val_ = v;                \
+          *ptr++ = (char) ((val_ >> 8) & 0xFF); \
+          *ptr++ = (char) (val_ & 0xFF);        \
+        }
 #define BYTESPERSAMPLE 2
 #define PPM_MAXVAL ((1<<BITS_IN_JSAMPLE)-1)
 #endif
@@ -67,13 +67,13 @@
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
   /* Usually these two pointers point to the same place: */
-  char *iobuffer;		/* fwrite's I/O buffer */
-  JSAMPROW pixrow;		/* decompressor output buffer */
-  size_t buffer_width;		/* width of I/O buffer */
-  JDIMENSION samples_per_row;	/* JSAMPLEs per output row */
+  char *iobuffer;               /* fwrite's I/O buffer */
+  JSAMPROW pixrow;              /* decompressor output buffer */
+  size_t buffer_width;          /* width of I/O buffer */
+  JDIMENSION samples_per_row;   /* JSAMPLEs per output row */
 } ppm_dest_struct;
 
 typedef ppm_dest_struct * ppm_dest_ptr;
@@ -89,7 +89,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
 
@@ -104,7 +104,7 @@
 
 METHODDEF(void)
 copy_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		 JDIMENSION rows_supplied)
+                 JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
   register char * bufferptr;
@@ -127,7 +127,7 @@
 
 METHODDEF(void)
 put_demapped_rgb (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		  JDIMENSION rows_supplied)
+                  JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
   register char * bufferptr;
@@ -152,7 +152,7 @@
 
 METHODDEF(void)
 put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		   JDIMENSION rows_supplied)
+                   JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
   register char * bufferptr;
@@ -183,14 +183,14 @@
   case JCS_GRAYSCALE:
     /* emit header for raw PGM format */
     fprintf(dest->pub.output_file, "P5\n%ld %ld\n%d\n",
-	    (long) cinfo->output_width, (long) cinfo->output_height,
-	    PPM_MAXVAL);
+            (long) cinfo->output_width, (long) cinfo->output_height,
+            PPM_MAXVAL);
     break;
   case JCS_RGB:
     /* emit header for raw PPM format */
     fprintf(dest->pub.output_file, "P6\n%ld %ld\n%d\n",
-	    (long) cinfo->output_width, (long) cinfo->output_height,
-	    PPM_MAXVAL);
+            (long) cinfo->output_width, (long) cinfo->output_height,
+            PPM_MAXVAL);
     break;
   default:
     ERREXIT(cinfo, JERR_PPM_COLORSPACE);
@@ -224,7 +224,7 @@
   /* Create module interface object, fill in method pointers */
   dest = (ppm_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(ppm_dest_struct));
+                                  SIZEOF(ppm_dest_struct));
   dest->pub.start_output = start_output_ppm;
   dest->pub.finish_output = finish_output_ppm;
 
diff --git a/wrrle.c b/wrrle.c
index a4e7337..4fdf372 100644
--- a/wrrle.c
+++ b/wrrle.c
@@ -16,7 +16,7 @@
  * with updates from Robert Hutchinson.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef RLE_SUPPORTED
 
@@ -47,15 +47,15 @@
  * though not all of the entries need be used.
  */
 
-#define CMAPBITS	8
-#define CMAPLENGTH	(1<<(CMAPBITS))
+#define CMAPBITS        8
+#define CMAPLENGTH      (1<<(CMAPBITS))
 
 typedef struct {
   struct djpeg_dest_struct pub; /* public fields */
 
-  jvirt_sarray_ptr image;	/* virtual array to store the output image */
-  rle_map *colormap;	 	/* RLE-style color map, or NULL if none */
-  rle_pixel **rle_row;		/* To pass rows to rle_putrow() */
+  jvirt_sarray_ptr image;       /* virtual array to store the output image */
+  rle_map *colormap;            /* RLE-style color map, or NULL if none */
+  rle_pixel **rle_row;          /* To pass rows to rle_putrow() */
 
 } rle_dest_struct;
 
@@ -64,7 +64,7 @@
 /* Forward declarations */
 METHODDEF(void) rle_put_pixel_rows
     JPP((j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-	 JDIMENSION rows_supplied));
+         JDIMENSION rows_supplied));
 
 
 /*
@@ -97,8 +97,8 @@
    */
 
   if (cinfo->output_width > 32767 || cinfo->output_height > 32767)
-    ERREXIT2(cinfo, JERR_RLE_DIMENSIONS, cinfo->output_width, 
-	     cinfo->output_height);
+    ERREXIT2(cinfo, JERR_RLE_DIMENSIONS, cinfo->output_width,
+             cinfo->output_height);
 
   if (cinfo->out_color_space != JCS_GRAYSCALE &&
       cinfo->out_color_space != JCS_RGB)
@@ -151,7 +151,7 @@
 
 METHODDEF(void)
 rle_put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		    JDIMENSION rows_supplied)
+                    JDIMENSION rows_supplied)
 {
   rle_dest_ptr dest = (rle_dest_ptr) dinfo;
 
@@ -172,7 +172,7 @@
 finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo)
 {
   rle_dest_ptr dest = (rle_dest_ptr) dinfo;
-  rle_hdr header;		/* Output file information */
+  rle_hdr header;               /* Output file information */
   rle_pixel **rle_row, *red, *green, *blue;
   JSAMPROW output_row;
   char cmapcomment[80];
@@ -223,7 +223,7 @@
     for (row = cinfo->output_height-1; row >= 0; row--) {
       rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray)
         ((j_common_ptr) cinfo, dest->image,
-	 (JDIMENSION) row, (JDIMENSION) 1, FALSE);
+         (JDIMENSION) row, (JDIMENSION) 1, FALSE);
       rle_putrow(rle_row, (int) cinfo->output_width, &header);
 #ifdef PROGRESS_REPORT
       if (progress != NULL) {
@@ -237,7 +237,7 @@
       rle_row = (rle_pixel **) dest->rle_row;
       output_row = * (*cinfo->mem->access_virt_sarray)
         ((j_common_ptr) cinfo, dest->image,
-	 (JDIMENSION) row, (JDIMENSION) 1, FALSE);
+         (JDIMENSION) row, (JDIMENSION) 1, FALSE);
       red = rle_row[0];
       green = rle_row[1];
       blue = rle_row[2];
diff --git a/wrtarga.c b/wrtarga.c
index cf104d2..ffdce63 100644
--- a/wrtarga.c
+++ b/wrtarga.c
@@ -14,7 +14,7 @@
  * Based on code contributed by Lee Daniel Crocker.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef TARGA_SUPPORTED
 
@@ -41,10 +41,10 @@
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
-  char *iobuffer;		/* physical I/O buffer */
-  JDIMENSION buffer_width;	/* width of one row */
+  char *iobuffer;               /* physical I/O buffer */
+  JDIMENSION buffer_width;      /* width of one row */
 } tga_dest_struct;
 
 typedef tga_dest_struct * tga_dest_ptr;
@@ -60,27 +60,27 @@
   MEMZERO(targaheader, SIZEOF(targaheader));
 
   if (num_colors > 0) {
-    targaheader[1] = 1;		/* color map type 1 */
+    targaheader[1] = 1;         /* color map type 1 */
     targaheader[5] = (char) (num_colors & 0xFF);
     targaheader[6] = (char) (num_colors >> 8);
-    targaheader[7] = 24;	/* 24 bits per cmap entry */
+    targaheader[7] = 24;        /* 24 bits per cmap entry */
   }
 
   targaheader[12] = (char) (cinfo->output_width & 0xFF);
   targaheader[13] = (char) (cinfo->output_width >> 8);
   targaheader[14] = (char) (cinfo->output_height & 0xFF);
   targaheader[15] = (char) (cinfo->output_height >> 8);
-  targaheader[17] = 0x20;	/* Top-down, non-interlaced */
+  targaheader[17] = 0x20;       /* Top-down, non-interlaced */
 
   if (cinfo->out_color_space == JCS_GRAYSCALE) {
-    targaheader[2] = 3;		/* image type = uncompressed gray-scale */
-    targaheader[16] = 8;	/* bits per pixel */
-  } else {			/* must be RGB */
+    targaheader[2] = 3;         /* image type = uncompressed grayscale */
+    targaheader[16] = 8;        /* bits per pixel */
+  } else {                      /* must be RGB */
     if (num_colors > 0) {
-      targaheader[2] = 1;	/* image type = colormapped RGB */
+      targaheader[2] = 1;       /* image type = colormapped RGB */
       targaheader[16] = 8;
     } else {
-      targaheader[2] = 2;	/* image type = uncompressed RGB */
+      targaheader[2] = 2;       /* image type = uncompressed RGB */
       targaheader[16] = 24;
     }
   }
@@ -97,7 +97,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 /* used for unquantized full-color output */
 {
   tga_dest_ptr dest = (tga_dest_ptr) dinfo;
@@ -118,7 +118,7 @@
 
 METHODDEF(void)
 put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-	       JDIMENSION rows_supplied)
+               JDIMENSION rows_supplied)
 /* used for grayscale OR quantized color output */
 {
   tga_dest_ptr dest = (tga_dest_ptr) dinfo;
@@ -142,7 +142,7 @@
 
 METHODDEF(void)
 put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		   JDIMENSION rows_supplied)
+                   JDIMENSION rows_supplied)
 {
   tga_dest_ptr dest = (tga_dest_ptr) dinfo;
   register JSAMPROW inptr;
@@ -183,14 +183,14 @@
       /* We only support 8-bit colormap indexes, so only 256 colors */
       num_colors = cinfo->actual_number_of_colors;
       if (num_colors > 256)
-	ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, num_colors);
+        ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, num_colors);
       write_header(cinfo, dinfo, num_colors);
       /* Write the colormap.  Note Targa uses BGR byte order */
       outfile = dest->pub.output_file;
       for (i = 0; i < num_colors; i++) {
-	putc(GETJSAMPLE(cinfo->colormap[2][i]), outfile);
-	putc(GETJSAMPLE(cinfo->colormap[1][i]), outfile);
-	putc(GETJSAMPLE(cinfo->colormap[0][i]), outfile);
+        putc(GETJSAMPLE(cinfo->colormap[2][i]), outfile);
+        putc(GETJSAMPLE(cinfo->colormap[1][i]), outfile);
+        putc(GETJSAMPLE(cinfo->colormap[0][i]), outfile);
       }
       dest->pub.put_pixel_rows = put_gray_rows;
     } else {
@@ -229,7 +229,7 @@
   /* Create module interface object, fill in method pointers */
   dest = (tga_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(tga_dest_struct));
+                                  SIZEOF(tga_dest_struct));
   dest->pub.start_output = start_output_tga;
   dest->pub.finish_output = finish_output_tga;
 
@@ -240,7 +240,7 @@
   dest->buffer_width = cinfo->output_width * cinfo->output_components;
   dest->iobuffer = (char *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(size_t) (dest->buffer_width * SIZEOF(char)));
+                                (size_t) (dest->buffer_width * SIZEOF(char)));
 
   /* Create decompressor output buffer. */
   dest->pub.buffer = (*cinfo->mem->alloc_sarray)