I'm not sure why, but this was necessary in order to return the 32-bit performance to the 0.0.90 baseline


git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@165 632fc199-4ca6-4c93-a231-07263d6284db
diff --git a/jdhuff.c b/jdhuff.c
index 0551179..9710142 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -765,21 +765,25 @@
 decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int usefast = 1;
 
   /* Process restart marker if needed; may have to suspend */
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
 	return FALSE;
+    usefast = 0;
   }
 
+  if (cinfo->src->bytes_in_buffer < BUFSIZE * cinfo->blocks_in_MCU)
+    usefast = 0;
+
   /* If we've run out of data, just leave the MCU set to zeroes.
    * This way, we return uniform gray for the remainder of the segment.
    */
   if (! entropy->pub.insufficient_data) {
 
-    if (cinfo->src->bytes_in_buffer >= BUFSIZE * cinfo->blocks_in_MCU
-      && !cinfo->restart_interval) {
+    if (usefast) {
       if (!decode_mcu_fast(cinfo, MCU_data)) return FALSE;
     }
     else {