x86: Fix "short jump is out of range" w/ NASM<2.04
diff --git a/ChangeLog.md b/ChangeLog.md
index d694787..2aaa50c 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -64,6 +64,10 @@
 `-warmup` option is now used to specify the amount of warmup time rather than
 the number of warmup iterations.
 
+11. Fixed an error (`short jump is out of range`) that occurred when assembling
+the 32-bit x86 SIMD extensions with NASM versions prior to 2.04.  This was a
+regression introduced by 1.5 beta1[12].
+
 
 1.5.1
 =====
diff --git a/simd/jchuff-sse2.asm b/simd/jchuff-sse2.asm
index 36d1f2d..b81db75 100644
--- a/simd/jchuff-sse2.asm
+++ b/simd/jchuff-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jchuff-sse2.asm - Huffman entropy encoding (SSE2)
 ;
-; Copyright (C) 2009-2011, 2014-2016, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2017, D. R. Commander.
 ; Copyright (C) 2015, Matthieu Darbois.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -288,13 +288,13 @@
 
 .BLOOP:
         bsf ecx, edx  ; r = __builtin_ctzl(index);
-        jz .ELOOP
+        jz near .ELOOP
         lea esi, [esi+ecx*2]  ; k += r;
         shr edx, cl  ; index >>= r;
         mov DWORD [esp+temp3], edx
 .BRLOOP:
         cmp ecx, 16  ; while (r > 15) {
-        jl .ERLOOP
+        jl near .ERLOOP
         sub ecx, 16 ; r -= 16;
         mov DWORD [esp+temp], ecx
         mov   eax, INT [ebp + 240 * 4]  ; code_0xf0 = actbl->ehufco[0xf0];
@@ -348,7 +348,7 @@
         sub eax, esi
         shr eax, 1
         bsf ecx, edx  ; r = __builtin_ctzl(index);
-        jz .ELOOP2
+        jz near .ELOOP2
         shr edx, cl  ; index >>= r;
         add ecx, eax
         lea esi, [esi+ecx*2]  ; k += r;
@@ -356,13 +356,13 @@
         jmp .BRLOOP2
 .BLOOP2:
         bsf ecx, edx  ; r = __builtin_ctzl(index);
-        jz .ELOOP2
+        jz near .ELOOP2
         lea esi, [esi+ecx*2]  ; k += r;
         shr edx, cl  ; index >>= r;
         mov DWORD [esp+temp3], edx
 .BRLOOP2:
         cmp ecx, 16  ; while (r > 15) {
-        jl .ERLOOP2
+        jl near .ERLOOP2
         sub ecx, 16  ; r -= 16;
         mov DWORD [esp+temp], ecx
         mov   eax, INT [ebp + 240 * 4]  ; code_0xf0 = actbl->ehufco[0xf0];