Fixed regression caused by a bug in the 32-bit strict memory access code in jdmrgss2.asm (contributed by Chromium to stop valgrind from whining whenever the output buffer size was not evenly divisible by 16 bytes.) On Linux/x86, this regression caused incorrect pixels on the right-hand side of images whose rows were not 16-byte aligned, whenever fancy upsampling was used and when decompressing to a 32-bit (RGBX, etc.) buffer.
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.0.x@843 632fc199-4ca6-4c93-a231-07263d6284db
diff --git a/simd/jdmrgss2-64.asm b/simd/jdmrgss2-64.asm
index 36e2582..ba3de35 100644
--- a/simd/jdmrgss2-64.asm
+++ b/simd/jdmrgss2-64.asm
@@ -12,7 +12,7 @@
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ for
+; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
diff --git a/simd/jdmrgss2.asm b/simd/jdmrgss2.asm
index 6a0dbd9..a00e539 100644
--- a/simd/jdmrgss2.asm
+++ b/simd/jdmrgss2.asm
@@ -478,9 +478,9 @@
cmp ecx, byte SIZEOF_XMMWORD/8
jb short .column_st7
movq MMWORD [edi], xmmA
- add edi, byte SIZEOF_XMMWORD/2
+ add edi, byte SIZEOF_XMMWORD/8*4
sub ecx, byte SIZEOF_XMMWORD/8
- psrldq xmmA, 64
+ psrldq xmmA, SIZEOF_XMMWORD/8*4
.column_st7:
; Store one pixel (4 bytes) of xmmA to the output when it has enough
; space.