Win/x64: Fix improper callee save of xmm8-xmm11 The x86-64 SIMD accelerations for Huffman encoding used incorrect stack math to save xmm8-xmm11 on Windows. This caused TJBench to always report 1 Mpixel/sec for the compression performance, and it likely would have caused other application issues as well.

commit: 056536f6605596d0246fd2b75a1ada2ac03b5bbe [log] [tgz]
author: DRC <information@libjpeg-turbo.org> Mon Feb 29 17:21:02 2016 -0600
committer: DRC <information@libjpeg-turbo.org> Mon Feb 29 17:46:34 2016 -0600
tree: 6e7f1c0a00c47484902edb42f5dbe0d3180ca8f3
parent: 7c202f76e7fda45c9ab7d7680046c0304a8fb2ad [diff]
diff --git a/simd/jchuff-sse2-64.asm b/simd/jchuff-sse2-64.asm
index d22efc3..84eaeeb 100644
--- a/simd/jchuff-sse2-64.asm
+++ b/simd/jchuff-sse2-64.asm

@@ -196,11 +196,11 @@
         lea     rsp, [t2]
         collect_args
 %ifdef WIN64
-        sub     rsp, 4*SIZEOF_XMMWORD
-        movaps  XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm8
+        movaps  XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm8
         movaps  XMMWORD [rsp-2*SIZEOF_XMMWORD], xmm9
-        movaps  XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm10
-        movaps  XMMWORD [rsp-0*SIZEOF_XMMWORD], xmm11
+        movaps  XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm10
+        movaps  XMMWORD [rsp-4*SIZEOF_XMMWORD], xmm11
+        sub     rsp, 4*SIZEOF_XMMWORD
 %endif
         push rbx
 
@@ -344,10 +344,10 @@
 
         pop rbx
 %ifdef WIN64
-        movaps  xmm8, XMMWORD [rsp-3*SIZEOF_XMMWORD]
-        movaps  xmm9, XMMWORD [rsp-2*SIZEOF_XMMWORD]
-        movaps  xmm10, XMMWORD [rsp-1*SIZEOF_XMMWORD]
-        movaps  xmm11, XMMWORD [rsp-0*SIZEOF_XMMWORD]
+        movaps  xmm11, XMMWORD [rsp+0*SIZEOF_XMMWORD]
+        movaps  xmm10, XMMWORD [rsp+1*SIZEOF_XMMWORD]
+        movaps  xmm9, XMMWORD [rsp+2*SIZEOF_XMMWORD]
+        movaps  xmm8, XMMWORD [rsp+3*SIZEOF_XMMWORD]
         add     rsp, 4*SIZEOF_XMMWORD
 %endif
         uncollect_args
commit	056536f6605596d0246fd2b75a1ada2ac03b5bbe	[log] [tgz]
author	DRC <information@libjpeg-turbo.org>	Mon Feb 29 17:21:02 2016 -0600
committer	DRC <information@libjpeg-turbo.org>	Mon Feb 29 17:46:34 2016 -0600
tree	6e7f1c0a00c47484902edb42f5dbe0d3180ca8f3
parent	7c202f76e7fda45c9ab7d7680046c0304a8fb2ad [diff]