fix vpextrw reg, xmm, imm
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index b1546ea..9944b0d 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1315,7 +1315,8 @@
printf("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }\n");
printf("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }\n");
- printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }\n");
+ // according to Intel' manual, VEX.W1 is ignored in 64-bit mode, then always VEX.W = 0, but I follow yasm encoding.
+ printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }\n");
printf("void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }\n");
printf("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }\n");
diff --git a/readme.txt b/readme.txt
index 7dabbc9..9150167 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.99
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.991
-----------------------------------------------------------------------------
◎概要
@@ -61,6 +61,16 @@
(注意) dword, word, byteはクラス変数です.従ってたとえばunsigned intの
つもりでdwordをtypedefしないでください.
+・AVX
+
+大抵の3オペランド形式の命令はデスティネーションを省略した形で呼び出すことができます.
+FMAについては簡略表記を導入するか検討中です(アイデア募集中).
+
+vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
+vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
+
+vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
+
・ラベル
L(文字列);
@@ -199,6 +209,7 @@
-----------------------------------------------------------------------------
◎履歴
+2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm
2011/02/04 ver 2.99 beta support AVX
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
diff --git a/readme_e.txt b/readme_e.txt
index 8488830..8bceced 100644
--- a/readme_e.txt
+++ b/readme_e.txt
@@ -1,5 +1,5 @@
- Xbyak 2.99 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+ Xbyak 2.991 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
-----------------------------------------------------------------------------
<Abstract>
@@ -57,6 +57,13 @@
NB. dword, word and byte are class members, then don't use dword as
unsigned int, for example.
+AVX
+
+You can omit a destination for almost 3-op mnemonics.
+
+vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
+vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
+
Label
L("L1");
@@ -148,6 +155,7 @@
-----------------------------------------------------------------------------
<History>
+2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm
2011/Feb/04 ver 2.99 beta support AVX
2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
@@ -190,5 +198,5 @@
MITSUNARI Shigeo(herumi at nifty dot com)
---
-$Revision: 1.54 $
-$Date: 2011/02/04 03:27:59 $
+$Revision: 1.56 $
+$Date: 2011/02/07 06:18:26 $
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 6f700b9..9fa4b12 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -1653,7 +1653,6 @@
}
}
put("vpextrb", REG32e|MEM, XMM, IMM);
- put("vpextrw", REG32e, XMM, IMM);
put("vpextrd", REG32|MEM, XMM, IMM);
for (int i = 0; i < 3; i++) {
@@ -1748,7 +1747,7 @@
put("vpcmpgtq", XMM, XMM | MEM);
put("vpcmpgtq", XMM, XMM, XMM | MEM);
- put("vpextrw", MEM, XMM, IMM); // nasm iw wrong?
+ put("vpextrw", REG32e | MEM, XMM, IMM); // nasm iw wrong?
#endif
}
public:
diff --git a/test/nm.cpp b/test/nm.cpp
index 02916e5..cbe1a71 100644
--- a/test/nm.cpp
+++ b/test/nm.cpp
@@ -1,1273 +1,133 @@
void gen0()
{
-vaddpd (xmm3, xmm7); dump();
-vaddpd (xmm2, ptr[eax+ecx+3]); dump();
-vaddpd (xmm2, xmm3, xmm5); dump();
-vaddpd (xmm0, xmm0, ptr[eax+ecx+3]); dump();
-vaddpd (ymm3, ptr[eax+ecx+3]); dump();
-vaddpd (ymm5, ymm3); dump();
-vaddpd (ymm3, ymm7, ptr[eax+ecx+3]); dump();
-vaddpd (ymm3, ymm0, ymm7); dump();
-vaddps (xmm0, xmm1); dump();
-vaddps (xmm6, ptr[eax+ecx+3]); dump();
-vaddps (xmm6, xmm0, xmm4); dump();
-vaddps (xmm0, xmm3, ptr[eax+ecx+3]); dump();
-vaddps (ymm3, ptr[eax+ecx+3]); dump();
-vaddps (ymm7, ymm3); dump();
-vaddps (ymm6, ymm7, ptr[eax+ecx+3]); dump();
-vaddps (ymm4, ymm6, ymm7); dump();
-vaddsd (xmm3, xmm1); dump();
-vaddsd (xmm7, ptr[eax+ecx+3]); dump();
-vaddsd (xmm3, xmm1, xmm6); dump();
-vaddsd (xmm5, xmm3, ptr[eax+ecx+3]); dump();
-vaddss (xmm2, xmm4); dump();
-vaddss (xmm5, ptr[eax+ecx+3]); dump();
-vaddss (xmm4, xmm4, xmm6); dump();
-vaddss (xmm6, xmm5, ptr[eax+ecx+3]); dump();
-vsubpd (xmm5, xmm0); dump();
-vsubpd (xmm4, ptr[eax+ecx+3]); dump();
-vsubpd (xmm2, xmm1, xmm2); dump();
-vsubpd (xmm1, xmm3, ptr[eax+ecx+3]); dump();
-vsubpd (ymm6, ptr[eax+ecx+3]); dump();
-vsubpd (ymm0, ymm6); dump();
-vsubpd (ymm6, ymm3, ptr[eax+ecx+3]); dump();
-vsubpd (ymm3, ymm6, ymm2); dump();
-vsubps (xmm0, xmm6); dump();
-vsubps (xmm6, ptr[eax+ecx+3]); dump();
-vsubps (xmm7, xmm0, xmm0); dump();
-vsubps (xmm6, xmm6, ptr[eax+ecx+3]); dump();
-vsubps (ymm6, ptr[eax+ecx+3]); dump();
-vsubps (ymm4, ymm1); dump();
-vsubps (ymm7, ymm6, ptr[eax+ecx+3]); dump();
-vsubps (ymm1, ymm5, ymm4); dump();
-vsubsd (xmm0, xmm2); dump();
-vsubsd (xmm1, ptr[eax+ecx+3]); dump();
-vsubsd (xmm6, xmm6, xmm3); dump();
-vsubsd (xmm4, xmm7, ptr[eax+ecx+3]); dump();
-vsubss (xmm6, xmm6); dump();
-vsubss (xmm6, ptr[eax+ecx+3]); dump();
-vsubss (xmm2, xmm4, xmm2); dump();
-vsubss (xmm2, xmm0, ptr[eax+ecx+3]); dump();
-vmulpd (xmm6, xmm4); dump();
-vmulpd (xmm2, ptr[eax+ecx+3]); dump();
-vmulpd (xmm5, xmm1, xmm7); dump();
-vmulpd (xmm4, xmm1, ptr[eax+ecx+3]); dump();
-vmulpd (ymm6, ptr[eax+ecx+3]); dump();
-vmulpd (ymm5, ymm6); dump();
-vmulpd (ymm7, ymm3, ptr[eax+ecx+3]); dump();
-vmulpd (ymm2, ymm6, ymm2); dump();
-vmulps (xmm0, xmm5); dump();
-vmulps (xmm6, ptr[eax+ecx+3]); dump();
-vmulps (xmm6, xmm0, xmm3); dump();
-vmulps (xmm1, xmm3, ptr[eax+ecx+3]); dump();
-vmulps (ymm7, ptr[eax+ecx+3]); dump();
-vmulps (ymm7, ymm0); dump();
-vmulps (ymm2, ymm3, ptr[eax+ecx+3]); dump();
-vmulps (ymm0, ymm7, ymm2); dump();
-vmulsd (xmm3, xmm6); dump();
-vmulsd (xmm0, ptr[eax+ecx+3]); dump();
-vmulsd (xmm4, xmm7, xmm2); dump();
-vmulsd (xmm5, xmm2, ptr[eax+ecx+3]); dump();
-vmulss (xmm7, xmm1); dump();
-vmulss (xmm1, ptr[eax+ecx+3]); dump();
-vmulss (xmm0, xmm0, xmm5); dump();
-vmulss (xmm2, xmm1, ptr[eax+ecx+3]); dump();
-vdivpd (xmm4, xmm6); dump();
-vdivpd (xmm7, ptr[eax+ecx+3]); dump();
-vdivpd (xmm7, xmm0, xmm7); dump();
-vdivpd (xmm0, xmm0, ptr[eax+ecx+3]); dump();
-vdivpd (ymm0, ptr[eax+ecx+3]); dump();
-vdivpd (ymm3, ymm0); dump();
-vdivpd (ymm0, ymm6, ptr[eax+ecx+3]); dump();
-vdivpd (ymm2, ymm3, ymm1); dump();
-vdivps (xmm0, xmm1); dump();
-vdivps (xmm2, ptr[eax+ecx+3]); dump();
-vdivps (xmm5, xmm0, xmm0); dump();
-vdivps (xmm2, xmm2, ptr[eax+ecx+3]); dump();
-vdivps (ymm0, ptr[eax+ecx+3]); dump();
-vdivps (ymm6, ymm6); dump();
-vdivps (ymm1, ymm4, ptr[eax+ecx+3]); dump();
-vdivps (ymm6, ymm2, ymm7); dump();
-vdivsd (xmm3, xmm2); dump();
-vdivsd (xmm1, ptr[eax+ecx+3]); dump();
-vdivsd (xmm0, xmm1, xmm3); dump();
-vdivsd (xmm7, xmm1, ptr[eax+ecx+3]); dump();
-vdivss (xmm0, xmm0); dump();
-vdivss (xmm3, ptr[eax+ecx+3]); dump();
-vdivss (xmm0, xmm7, xmm7); dump();
-vdivss (xmm1, xmm1, ptr[eax+ecx+3]); dump();
-vmaxpd (xmm1, xmm0); dump();
-vmaxpd (xmm5, ptr[eax+ecx+3]); dump();
-vmaxpd (xmm7, xmm4, xmm2); dump();
-vmaxpd (xmm4, xmm3, ptr[eax+ecx+3]); dump();
-vmaxpd (ymm3, ptr[eax+ecx+3]); dump();
-vmaxpd (ymm5, ymm3); dump();
-vmaxpd (ymm4, ymm5, ptr[eax+ecx+3]); dump();
-vmaxpd (ymm5, ymm5, ymm4); dump();
-vmaxps (xmm1, xmm3); dump();
-vmaxps (xmm4, ptr[eax+ecx+3]); dump();
-vmaxps (xmm2, xmm3, xmm2); dump();
-vmaxps (xmm1, xmm5, ptr[eax+ecx+3]); dump();
-vmaxps (ymm6, ptr[eax+ecx+3]); dump();
-vmaxps (ymm2, ymm4); dump();
-vmaxps (ymm2, ymm7, ptr[eax+ecx+3]); dump();
-vmaxps (ymm3, ymm3, ymm2); dump();
-vmaxsd (xmm7, xmm4); dump();
-vmaxsd (xmm1, ptr[eax+ecx+3]); dump();
-vmaxsd (xmm0, xmm3, xmm0); dump();
-vmaxsd (xmm4, xmm0, ptr[eax+ecx+3]); dump();
-vmaxss (xmm6, xmm7); dump();
-vmaxss (xmm5, ptr[eax+ecx+3]); dump();
-vmaxss (xmm3, xmm7, xmm6); dump();
-vmaxss (xmm3, xmm6, ptr[eax+ecx+3]); dump();
-vminpd (xmm4, xmm5); dump();
-vminpd (xmm6, ptr[eax+ecx+3]); dump();
-vminpd (xmm0, xmm7, xmm4); dump();
-vminpd (xmm6, xmm2, ptr[eax+ecx+3]); dump();
-vminpd (ymm1, ptr[eax+ecx+3]); dump();
-vminpd (ymm4, ymm0); dump();
-vminpd (ymm0, ymm4, ptr[eax+ecx+3]); dump();
-vminpd (ymm7, ymm7, ymm1); dump();
-vminps (xmm3, xmm6); dump();
-vminps (xmm7, ptr[eax+ecx+3]); dump();
-vminps (xmm7, xmm0, xmm4); dump();
-vminps (xmm4, xmm3, ptr[eax+ecx+3]); dump();
-vminps (ymm6, ptr[eax+ecx+3]); dump();
-vminps (ymm7, ymm2); dump();
-vminps (ymm5, ymm5, ptr[eax+ecx+3]); dump();
-vminps (ymm2, ymm3, ymm0); dump();
-vminsd (xmm7, xmm2); dump();
-vminsd (xmm5, ptr[eax+ecx+3]); dump();
-vminsd (xmm2, xmm6, xmm6); dump();
-vminsd (xmm4, xmm5, ptr[eax+ecx+3]); dump();
-vminss (xmm5, xmm7); dump();
-vminss (xmm2, ptr[eax+ecx+3]); dump();
-vminss (xmm5, xmm0, xmm5); dump();
-vminss (xmm2, xmm4, ptr[eax+ecx+3]); dump();
-vandpd (xmm3, xmm1); dump();
-vandpd (xmm0, ptr[eax+ecx+3]); dump();
-vandpd (xmm2, xmm3, xmm5); dump();
-vandpd (xmm2, xmm5, ptr[eax+ecx+3]); dump();
-vandpd (ymm2, ptr[eax+ecx+3]); dump();
-vandpd (ymm2, ymm5); dump();
-vandpd (ymm7, ymm5, ptr[eax+ecx+3]); dump();
-vandpd (ymm3, ymm1, ymm2); dump();
-vandps (xmm7, xmm5); dump();
-vandps (xmm1, ptr[eax+ecx+3]); dump();
-vandps (xmm0, xmm5, xmm2); dump();
-vandps (xmm4, xmm5, ptr[eax+ecx+3]); dump();
-vandps (ymm4, ptr[eax+ecx+3]); dump();
-vandps (ymm6, ymm2); dump();
-vandps (ymm7, ymm6, ptr[eax+ecx+3]); dump();
-vandps (ymm5, ymm0, ymm1); dump();
-vandnpd (xmm3, xmm0); dump();
-vandnpd (xmm5, ptr[eax+ecx+3]); dump();
-vandnpd (xmm6, xmm2, xmm0); dump();
-vandnpd (xmm2, xmm6, ptr[eax+ecx+3]); dump();
-vandnpd (ymm3, ptr[eax+ecx+3]); dump();
-vandnpd (ymm6, ymm2); dump();
-vandnpd (ymm6, ymm7, ptr[eax+ecx+3]); dump();
-vandnpd (ymm6, ymm3, ymm2); dump();
-vandnps (xmm3, xmm6); dump();
-vandnps (xmm4, ptr[eax+ecx+3]); dump();
-vandnps (xmm0, xmm3, xmm5); dump();
-vandnps (xmm6, xmm7, ptr[eax+ecx+3]); dump();
-vandnps (ymm6, ptr[eax+ecx+3]); dump();
-vandnps (ymm4, ymm3); dump();
-vandnps (ymm5, ymm0, ptr[eax+ecx+3]); dump();
-vandnps (ymm6, ymm1, ymm2); dump();
-vorpd (xmm7, xmm4); dump();
-vorpd (xmm6, ptr[eax+ecx+3]); dump();
-vorpd (xmm5, xmm4, xmm0); dump();
-vorpd (xmm2, xmm3, ptr[eax+ecx+3]); dump();
-vorpd (ymm5, ptr[eax+ecx+3]); dump();
-vorpd (ymm0, ymm7); dump();
-vorpd (ymm4, ymm0, ptr[eax+ecx+3]); dump();
-vorpd (ymm2, ymm5, ymm0); dump();
-vorps (xmm6, xmm4); dump();
-vorps (xmm0, ptr[eax+ecx+3]); dump();
-vorps (xmm5, xmm4, xmm1); dump();
-vorps (xmm2, xmm3, ptr[eax+ecx+3]); dump();
-vorps (ymm2, ptr[eax+ecx+3]); dump();
-vorps (ymm6, ymm0); dump();
-vorps (ymm4, ymm3, ptr[eax+ecx+3]); dump();
-vorps (ymm2, ymm3, ymm6); dump();
-vxorpd (xmm4, xmm0); dump();
-vxorpd (xmm3, ptr[eax+ecx+3]); dump();
-vxorpd (xmm6, xmm5, xmm3); dump();
-vxorpd (xmm7, xmm7, ptr[eax+ecx+3]); dump();
-vxorpd (ymm5, ptr[eax+ecx+3]); dump();
-vxorpd (ymm3, ymm6); dump();
-vxorpd (ymm4, ymm7, ptr[eax+ecx+3]); dump();
-vxorpd (ymm4, ymm7, ymm4); dump();
-vxorps (xmm7, xmm1); dump();
-vxorps (xmm4, ptr[eax+ecx+3]); dump();
-vxorps (xmm0, xmm7, xmm7); dump();
-vxorps (xmm3, xmm1, ptr[eax+ecx+3]); dump();
-vxorps (ymm7, ptr[eax+ecx+3]); dump();
-vxorps (ymm1, ymm2); dump();
-vxorps (ymm1, ymm7, ptr[eax+ecx+3]); dump();
-vxorps (ymm4, ymm6, ymm5); dump();
-vaddsubpd (xmm4, xmm4); dump();
-vaddsubpd (xmm1, ptr[eax+ecx+3]); dump();
-vaddsubpd (xmm0, xmm3, xmm5); dump();
-vaddsubpd (xmm0, xmm7, ptr[eax+ecx+3]); dump();
-vaddsubpd (ymm2, ptr[eax+ecx+3]); dump();
-vaddsubpd (ymm3, ymm5); dump();
-vaddsubpd (ymm1, ymm4, ptr[eax+ecx+3]); dump();
-vaddsubpd (ymm5, ymm5, ymm4); dump();
-vaddsubps (xmm6, xmm5); dump();
-vaddsubps (xmm0, ptr[eax+ecx+3]); dump();
-vaddsubps (xmm6, xmm7, xmm5); dump();
-vaddsubps (xmm4, xmm4, ptr[eax+ecx+3]); dump();
-vaddsubps (ymm7, ptr[eax+ecx+3]); dump();
-vaddsubps (ymm4, ymm6); dump();
-vaddsubps (ymm4, ymm2, ptr[eax+ecx+3]); dump();
-vaddsubps (ymm7, ymm0, ymm7); dump();
-vhaddpd (xmm1, xmm3); dump();
-vhaddpd (xmm2, ptr[eax+ecx+3]); dump();
-vhaddpd (xmm1, xmm3, xmm7); dump();
-vhaddpd (xmm4, xmm7, ptr[eax+ecx+3]); dump();
-vhaddpd (ymm6, ptr[eax+ecx+3]); dump();
-vhaddpd (ymm0, ymm6); dump();
-vhaddpd (ymm2, ymm5, ptr[eax+ecx+3]); dump();
-vhaddpd (ymm7, ymm1, ymm0); dump();
-vhaddps (xmm7, xmm1); dump();
-vhaddps (xmm6, ptr[eax+ecx+3]); dump();
-vhaddps (xmm0, xmm0, xmm3); dump();
-vhaddps (xmm0, xmm6, ptr[eax+ecx+3]); dump();
-vhaddps (ymm0, ptr[eax+ecx+3]); dump();
-vhaddps (ymm0, ymm2); dump();
-vhaddps (ymm3, ymm7, ptr[eax+ecx+3]); dump();
-vhaddps (ymm2, ymm4, ymm2); dump();
-vhsubpd (xmm6, xmm2); dump();
-vhsubpd (xmm6, ptr[eax+ecx+3]); dump();
-vhsubpd (xmm7, xmm0, xmm2); dump();
-vhsubpd (xmm2, xmm6, ptr[eax+ecx+3]); dump();
-vhsubpd (ymm3, ptr[eax+ecx+3]); dump();
-vhsubpd (ymm0, ymm7); dump();
-vhsubpd (ymm7, ymm6, ptr[eax+ecx+3]); dump();
-vhsubpd (ymm0, ymm7, ymm4); dump();
-vhsubps (xmm5, xmm5); dump();
-vhsubps (xmm2, ptr[eax+ecx+3]); dump();
-vhsubps (xmm6, xmm5, xmm1); dump();
-vhsubps (xmm0, xmm6, ptr[eax+ecx+3]); dump();
-vhsubps (ymm3, ptr[eax+ecx+3]); dump();
-vhsubps (ymm5, ymm2); dump();
-vhsubps (ymm7, ymm2, ptr[eax+ecx+3]); dump();
-vhsubps (ymm1, ymm5, ymm7); dump();
-vextractps (ptr[eax+ecx+3], xmm6, 4); dump();
-vextractps (edx, xmm6, 4); dump();
-vextractps (eax, xmm2, 4); dump();
-vldmxcsr (ptr[eax+ecx+3]); dump();
-vstmxcsr (ptr[eax+ecx+3]); dump();
-vmaskmovdqu (xmm4, xmm2); dump();
-vmovd (xmm1, ptr[eax+ecx+3]); dump();
-vmovd (xmm6, ebp); dump();
-vmovd (xmm7, eax); dump();
-vmovd (ptr[eax+ecx+3], xmm2); dump();
-vmovd (edx, xmm7); dump();
-vmovd (eax, xmm1); dump();
-vmovhlps (xmm4, xmm1); dump();
-vmovhlps (xmm2, xmm5, xmm4); dump();
-vmovlhps (xmm7, xmm3); dump();
-vmovlhps (xmm3, xmm0, xmm5); dump();
-vmovhpd (xmm7, xmm7, ptr[eax+ecx+3]); dump();
-vmovhpd (xmm2, ptr[eax+ecx+3]); dump();
-vmovhpd (ptr[eax+ecx+3], xmm7); dump();
-vmovhps (xmm3, xmm4, ptr[eax+ecx+3]); dump();
-vmovhps (xmm1, ptr[eax+ecx+3]); dump();
-vmovhps (ptr[eax+ecx+3], xmm6); dump();
-vmovlpd (xmm0, xmm6, ptr[eax+ecx+3]); dump();
-vmovlpd (xmm2, ptr[eax+ecx+3]); dump();
-vmovlpd (ptr[eax+ecx+3], xmm1); dump();
-vmovlps (xmm7, xmm1, ptr[eax+ecx+3]); dump();
-vmovlps (xmm2, ptr[eax+ecx+3]); dump();
-vmovlps (ptr[eax+ecx+3], xmm6); dump();
-vmovmskpd (edi, xmm2); dump();
-vmovmskpd (edx, ymm6); dump();
-vmovmskpd (eax, xmm5); dump();
-vmovmskpd (eax, ymm5); dump();
-vmovmskps (edi, xmm5); dump();
-vmovmskps (edi, ymm2); dump();
-vmovmskps (eax, xmm5); dump();
-vmovmskps (eax, ymm5); dump();
-vmovntdq (ptr[eax+ecx+3], xmm2); dump();
-vmovntdq (ptr[eax+ecx+3], ymm5); dump();
-vmovntpd (ptr[eax+ecx+3], xmm4); dump();
-vmovntpd (ptr[eax+ecx+3], ymm3); dump();
-vmovntps (ptr[eax+ecx+3], xmm4); dump();
-vmovntps (ptr[eax+ecx+3], ymm2); dump();
-vmovntdqa (xmm0, ptr[eax+ecx+3]); dump();
-vmovsd (xmm6, xmm7, xmm6); dump();
-vmovsd (xmm0, xmm3); dump();
-vmovsd (xmm5, ptr[eax+ecx+3]); dump();
-vmovsd (ptr[eax+ecx+3], xmm2); dump();
-vmovss (xmm4, xmm5, xmm1); dump();
-vmovss (xmm4, xmm7); dump();
-vmovss (xmm6, ptr[eax+ecx+3]); dump();
-vmovss (ptr[eax+ecx+3], xmm1); dump();
-vpextrb (ptr[eax+ecx+3], xmm6, 4); dump();
-vpextrb (ecx, xmm0, 4); dump();
-vpextrb (eax, xmm0, 4); dump();
-vpextrw (edi, xmm3, 4); dump();
-vpextrw (eax, xmm2, 4); dump();
-vpextrd (ptr[eax+ecx+3], xmm2, 4); dump();
-vpextrd (edi, xmm6, 4); dump();
-vpextrd (eax, xmm2, 4); dump();
-vpinsrb (xmm0, xmm6, ptr[eax+ecx+3], 4); dump();
-vpinsrb (xmm2, xmm2, edi, 4); dump();
-vpinsrb (xmm6, xmm3, eax, 4); dump();
-vpinsrb (xmm5, ptr[eax+ecx+3], 4); dump();
-vpinsrb (xmm2, ecx, 4); dump();
-vpinsrb (xmm5, eax, 4); dump();
-vpinsrw (xmm6, xmm6, ptr[eax+ecx+3], 4); dump();
-vpinsrw (xmm2, xmm2, esi, 4); dump();
-vpinsrw (xmm3, xmm3, eax, 4); dump();
-vpinsrw (xmm3, ptr[eax+ecx+3], 4); dump();
-vpinsrw (xmm6, ebp, 4); dump();
-vpinsrw (xmm4, eax, 4); dump();
-vpinsrd (xmm2, xmm2, ptr[eax+ecx+3], 4); dump();
-vpinsrd (xmm3, xmm5, edx, 4); dump();
-vpinsrd (xmm0, xmm3, eax, 4); dump();
-vpinsrd (xmm0, ptr[eax+ecx+3], 4); dump();
-vpinsrd (xmm0, ecx, 4); dump();
-vpinsrd (xmm3, eax, 4); dump();
-vpmovmskb (edx, xmm5); dump();
-vpmovmskb (eax, xmm3); dump();
-vblendvpd (xmm1, xmm1, xmm5, xmm3); dump();
-vblendvpd (xmm6, xmm0, ptr[eax+ecx+3], xmm1); dump();
-vblendvpd (xmm2, xmm5, xmm4); dump();
-vblendvpd (xmm7, ptr[eax+ecx+3], xmm3); dump();
-vblendvpd (ymm6, ymm0, ptr[eax+ecx+3], ymm2); dump();
-vblendvpd (ymm0, ymm1, ymm2, ymm7); dump();
-vblendvpd (ymm1, ptr[eax+ecx+3], ymm0); dump();
-vblendvpd (ymm3, ymm6, ymm2); dump();
-vblendvps (xmm0, xmm3, xmm5, xmm7); dump();
-vblendvps (xmm5, xmm1, ptr[eax+ecx+3], xmm4); dump();
-vblendvps (xmm1, xmm0, xmm5); dump();
-vblendvps (xmm4, ptr[eax+ecx+3], xmm0); dump();
-vblendvps (ymm2, ymm0, ptr[eax+ecx+3], ymm1); dump();
-vblendvps (ymm0, ymm6, ymm3, ymm6); dump();
-vblendvps (ymm7, ptr[eax+ecx+3], ymm3); dump();
-vblendvps (ymm1, ymm0, ymm4); dump();
-vpblendvb (xmm2, xmm4, xmm5, xmm1); dump();
-vpblendvb (xmm0, xmm1, ptr[eax+ecx+3], xmm4); dump();
-vpblendvb (xmm2, xmm3, xmm0); dump();
-vpblendvb (xmm4, ptr[eax+ecx+3], xmm4); dump();
-vcvtss2si (ecx, xmm5); dump();
-vcvtss2si (ebx, ptr[eax+ecx+3]); dump();
-vcvtss2si (eax, xmm4); dump();
-vcvtss2si (eax, ptr[eax+ecx+3]); dump();
-vcvttss2si (ebx, xmm3); dump();
-vcvttss2si (edx, ptr[eax+ecx+3]); dump();
-vcvttss2si (eax, xmm3); dump();
-vcvttss2si (eax, ptr[eax+ecx+3]); dump();
-vcvtsd2si (ecx, xmm3); dump();
-vcvtsd2si (ecx, ptr[eax+ecx+3]); dump();
-vcvtsd2si (eax, xmm7); dump();
-vcvtsd2si (eax, ptr[eax+ecx+3]); dump();
-vcvttsd2si (esp, xmm7); dump();
-vcvttsd2si (ecx, ptr[eax+ecx+3]); dump();
-vcvttsd2si (eax, xmm1); dump();
-vcvttsd2si (eax, ptr[eax+ecx+3]); dump();
-vcvtsi2ss (xmm4, xmm1, ptr[eax+ecx+3]); dump();
-vcvtsi2ss (xmm6, xmm3, edi); dump();
-vcvtsi2ss (xmm1, xmm3, eax); dump();
-vcvtsi2ss (xmm1, ptr[eax+ecx+3]); dump();
-vcvtsi2ss (xmm7, ebp); dump();
-vcvtsi2ss (xmm4, eax); dump();
-vcvtsi2sd (xmm3, xmm6, ptr[eax+ecx+3]); dump();
-vcvtsi2sd (xmm6, xmm1, esp); dump();
-vcvtsi2sd (xmm3, xmm6, eax); dump();
-vcvtsi2sd (xmm3, ptr[eax+ecx+3]); dump();
-vcvtsi2sd (xmm6, esp); dump();
-vcvtsi2sd (xmm3, eax); dump();
-vcvtps2pd (xmm2, xmm0); dump();
-vcvtps2pd (xmm4, ptr[eax+ecx+3]); dump();
-vcvtps2pd (ymm4, xmm6); dump();
-vcvtps2pd (ymm1, ptr[eax+ecx+3]); dump();
-vcvtdq2pd (xmm2, xmm0); dump();
-vcvtdq2pd (xmm5, ptr[eax+ecx+3]); dump();
-vcvtdq2pd (ymm5, xmm1); dump();
-vcvtdq2pd (ymm2, ptr[eax+ecx+3]); dump();
-vcvtpd2ps (xmm5, xmm0); dump();
-vcvtpd2ps (xmm2, ptr[eax+ecx+3]); dump();
-vcvtpd2ps (xmm2, ymm3); dump();
-vcvtpd2dq (xmm5, xmm7); dump();
-vcvtpd2dq (xmm0, ptr[eax+ecx+3]); dump();
-vcvtpd2dq (xmm3, ymm7); dump();
-vcvttpd2dq (xmm2, xmm7); dump();
-vcvttpd2dq (xmm3, ptr[eax+ecx+3]); dump();
-vcvttpd2dq (xmm5, ymm6); dump();
-vaesenc (xmm5, xmm2); dump();
-vaesenc (xmm4, ptr[eax+ecx+3]); dump();
-vaesenc (xmm6, xmm6, xmm3); dump();
-vaesenc (xmm3, xmm4, ptr[eax+ecx+3]); dump();
-vaesenclast (xmm3, xmm6); dump();
-vaesenclast (xmm0, ptr[eax+ecx+3]); dump();
-vaesenclast (xmm5, xmm1, xmm0); dump();
-vaesenclast (xmm6, xmm5, ptr[eax+ecx+3]); dump();
-vaesdec (xmm5, xmm5); dump();
-vaesdec (xmm3, ptr[eax+ecx+3]); dump();
-vaesdec (xmm3, xmm4, xmm6); dump();
-vaesdec (xmm6, xmm7, ptr[eax+ecx+3]); dump();
-vaesdeclast (xmm2, xmm5); dump();
-vaesdeclast (xmm3, ptr[eax+ecx+3]); dump();
-vaesdeclast (xmm0, xmm2, xmm5); dump();
-vaesdeclast (xmm3, xmm6, ptr[eax+ecx+3]); dump();
-vcvtsd2ss (xmm0, xmm0); dump();
-vcvtsd2ss (xmm6, ptr[eax+ecx+3]); dump();
-vcvtsd2ss (xmm7, xmm2, xmm0); dump();
-vcvtsd2ss (xmm0, xmm0, ptr[eax+ecx+3]); dump();
-vcvtss2sd (xmm4, xmm4); dump();
-vcvtss2sd (xmm1, ptr[eax+ecx+3]); dump();
-vcvtss2sd (xmm3, xmm4, xmm4); dump();
-vcvtss2sd (xmm7, xmm1, ptr[eax+ecx+3]); dump();
-vpacksswb (xmm0, xmm3); dump();
-vpacksswb (xmm7, ptr[eax+ecx+3]); dump();
-vpacksswb (xmm1, xmm7, xmm7); dump();
-vpacksswb (xmm4, xmm1, ptr[eax+ecx+3]); dump();
-vpackssdw (xmm4, xmm1); dump();
-vpackssdw (xmm4, ptr[eax+ecx+3]); dump();
-vpackssdw (xmm0, xmm3, xmm4); dump();
-vpackssdw (xmm6, xmm1, ptr[eax+ecx+3]); dump();
-vpackuswb (xmm4, xmm5); dump();
-vpackuswb (xmm7, ptr[eax+ecx+3]); dump();
-vpackuswb (xmm6, xmm0, xmm7); dump();
-vpackuswb (xmm0, xmm7, ptr[eax+ecx+3]); dump();
-vpackusdw (xmm5, xmm0); dump();
-vpackusdw (xmm0, ptr[eax+ecx+3]); dump();
-vpackusdw (xmm3, xmm1, xmm0); dump();
-vpackusdw (xmm2, xmm4, ptr[eax+ecx+3]); dump();
-vpaddb (xmm4, xmm1); dump();
-vpaddb (xmm6, ptr[eax+ecx+3]); dump();
-vpaddb (xmm2, xmm5, xmm7); dump();
-vpaddb (xmm7, xmm4, ptr[eax+ecx+3]); dump();
-vpaddw (xmm7, xmm0); dump();
-vpaddw (xmm4, ptr[eax+ecx+3]); dump();
-vpaddw (xmm3, xmm3, xmm5); dump();
-vpaddw (xmm1, xmm2, ptr[eax+ecx+3]); dump();
-vpaddd (xmm5, xmm5); dump();
-vpaddd (xmm3, ptr[eax+ecx+3]); dump();
-vpaddd (xmm0, xmm6, xmm5); dump();
-vpaddd (xmm0, xmm7, ptr[eax+ecx+3]); dump();
-vpaddq (xmm5, xmm1); dump();
-vpaddq (xmm0, ptr[eax+ecx+3]); dump();
-vpaddq (xmm0, xmm3, xmm2); dump();
-vpaddq (xmm6, xmm0, ptr[eax+ecx+3]); dump();
-vpaddsb (xmm5, xmm0); dump();
-vpaddsb (xmm5, ptr[eax+ecx+3]); dump();
-vpaddsb (xmm5, xmm0, xmm3); dump();
-vpaddsb (xmm2, xmm7, ptr[eax+ecx+3]); dump();
-vpaddsw (xmm5, xmm5); dump();
-vpaddsw (xmm3, ptr[eax+ecx+3]); dump();
-vpaddsw (xmm1, xmm1, xmm5); dump();
-vpaddsw (xmm4, xmm7, ptr[eax+ecx+3]); dump();
-vpaddusb (xmm4, xmm5); dump();
-vpaddusb (xmm4, ptr[eax+ecx+3]); dump();
-vpaddusb (xmm1, xmm3, xmm6); dump();
-vpaddusb (xmm1, xmm1, ptr[eax+ecx+3]); dump();
-vpaddusw (xmm0, xmm4); dump();
-vpaddusw (xmm5, ptr[eax+ecx+3]); dump();
-vpaddusw (xmm7, xmm3, xmm3); dump();
-vpaddusw (xmm0, xmm3, ptr[eax+ecx+3]); dump();
-vpand (xmm4, xmm7); dump();
-vpand (xmm1, ptr[eax+ecx+3]); dump();
-vpand (xmm5, xmm0, xmm6); dump();
-vpand (xmm1, xmm5, ptr[eax+ecx+3]); dump();
-vpandn (xmm3, xmm2); dump();
-vpandn (xmm3, ptr[eax+ecx+3]); dump();
-vpandn (xmm7, xmm3, xmm3); dump();
-vpandn (xmm6, xmm4, ptr[eax+ecx+3]); dump();
-vpavgb (xmm4, xmm2); dump();
-vpavgb (xmm7, ptr[eax+ecx+3]); dump();
-vpavgb (xmm0, xmm4, xmm3); dump();
-vpavgb (xmm1, xmm4, ptr[eax+ecx+3]); dump();
-vpavgw (xmm5, xmm6); dump();
-vpavgw (xmm4, ptr[eax+ecx+3]); dump();
-vpavgw (xmm6, xmm7, xmm6); dump();
-vpavgw (xmm3, xmm0, ptr[eax+ecx+3]); dump();
-vpcmpeqb (xmm1, xmm2); dump();
-vpcmpeqb (xmm4, ptr[eax+ecx+3]); dump();
-vpcmpeqb (xmm2, xmm1, xmm4); dump();
-vpcmpeqb (xmm3, xmm6, ptr[eax+ecx+3]); dump();
-vpcmpeqw (xmm6, xmm0); dump();
-vpcmpeqw (xmm2, ptr[eax+ecx+3]); dump();
-vpcmpeqw (xmm0, xmm0, xmm7); dump();
-vpcmpeqw (xmm3, xmm2, ptr[eax+ecx+3]); dump();
-vpcmpeqd (xmm0, xmm7); dump();
-vpcmpeqd (xmm1, ptr[eax+ecx+3]); dump();
-vpcmpeqd (xmm2, xmm5, xmm7); dump();
-vpcmpeqd (xmm6, xmm0, ptr[eax+ecx+3]); dump();
-vpcmpgtb (xmm0, xmm0); dump();
-vpcmpgtb (xmm2, ptr[eax+ecx+3]); dump();
-vpcmpgtb (xmm4, xmm3, xmm4); dump();
-vpcmpgtb (xmm1, xmm7, ptr[eax+ecx+3]); dump();
-vpcmpgtw (xmm0, xmm3); dump();
-vpcmpgtw (xmm7, ptr[eax+ecx+3]); dump();
-vpcmpgtw (xmm6, xmm7, xmm1); dump();
-vpcmpgtw (xmm4, xmm2, ptr[eax+ecx+3]); dump();
-vpcmpgtd (xmm6, xmm1); dump();
-vpcmpgtd (xmm1, ptr[eax+ecx+3]); dump();
-vpcmpgtd (xmm1, xmm4, xmm0); dump();
-vpcmpgtd (xmm1, xmm3, ptr[eax+ecx+3]); dump();
-vphaddw (xmm7, xmm4); dump();
-vphaddw (xmm0, ptr[eax+ecx+3]); dump();
-vphaddw (xmm5, xmm2, xmm3); dump();
-vphaddw (xmm7, xmm1, ptr[eax+ecx+3]); dump();
-vphaddd (xmm1, xmm2); dump();
-vphaddd (xmm3, ptr[eax+ecx+3]); dump();
-vphaddd (xmm1, xmm2, xmm0); dump();
-vphaddd (xmm2, xmm6, ptr[eax+ecx+3]); dump();
-vphaddsw (xmm6, xmm3); dump();
-vphaddsw (xmm7, ptr[eax+ecx+3]); dump();
-vphaddsw (xmm0, xmm1, xmm2); dump();
-vphaddsw (xmm1, xmm2, ptr[eax+ecx+3]); dump();
-vphsubw (xmm1, xmm7); dump();
-vphsubw (xmm6, ptr[eax+ecx+3]); dump();
-vphsubw (xmm7, xmm3, xmm1); dump();
-vphsubw (xmm3, xmm3, ptr[eax+ecx+3]); dump();
-vphsubd (xmm2, xmm2); dump();
-vphsubd (xmm4, ptr[eax+ecx+3]); dump();
-vphsubd (xmm0, xmm5, xmm0); dump();
-vphsubd (xmm0, xmm0, ptr[eax+ecx+3]); dump();
-vphsubsw (xmm7, xmm4); dump();
-vphsubsw (xmm0, ptr[eax+ecx+3]); dump();
-vphsubsw (xmm2, xmm7, xmm0); dump();
-vphsubsw (xmm3, xmm6, ptr[eax+ecx+3]); dump();
-vpmaddwd (xmm7, xmm5); dump();
-vpmaddwd (xmm0, ptr[eax+ecx+3]); dump();
-vpmaddwd (xmm5, xmm6, xmm7); dump();
-vpmaddwd (xmm7, xmm1, ptr[eax+ecx+3]); dump();
-vpmaddubsw (xmm2, xmm1); dump();
-vpmaddubsw (xmm3, ptr[eax+ecx+3]); dump();
-vpmaddubsw (xmm0, xmm4, xmm4); dump();
-vpmaddubsw (xmm6, xmm3, ptr[eax+ecx+3]); dump();
-vpmaxsb (xmm4, xmm0); dump();
-vpmaxsb (xmm7, ptr[eax+ecx+3]); dump();
-vpmaxsb (xmm0, xmm0, xmm0); dump();
-vpmaxsb (xmm6, xmm3, ptr[eax+ecx+3]); dump();
-vpmaxsw (xmm7, xmm1); dump();
-vpmaxsw (xmm4, ptr[eax+ecx+3]); dump();
-vpmaxsw (xmm5, xmm2, xmm3); dump();
-vpmaxsw (xmm2, xmm1, ptr[eax+ecx+3]); dump();
-vpmaxsd (xmm1, xmm3); dump();
-vpmaxsd (xmm5, ptr[eax+ecx+3]); dump();
-vpmaxsd (xmm0, xmm5, xmm6); dump();
-vpmaxsd (xmm4, xmm2, ptr[eax+ecx+3]); dump();
-vpmaxub (xmm7, xmm6); dump();
-vpmaxub (xmm0, ptr[eax+ecx+3]); dump();
-vpmaxub (xmm0, xmm0, xmm3); dump();
-vpmaxub (xmm0, xmm7, ptr[eax+ecx+3]); dump();
-vpmaxuw (xmm7, xmm4); dump();
-vpmaxuw (xmm6, ptr[eax+ecx+3]); dump();
-vpmaxuw (xmm2, xmm1, xmm4); dump();
-vpmaxuw (xmm4, xmm1, ptr[eax+ecx+3]); dump();
-vpmaxud (xmm0, xmm2); dump();
-vpmaxud (xmm5, ptr[eax+ecx+3]); dump();
-vpmaxud (xmm5, xmm6, xmm0); dump();
-vpmaxud (xmm3, xmm3, ptr[eax+ecx+3]); dump();
-vpminsb (xmm2, xmm0); dump();
-vpminsb (xmm7, ptr[eax+ecx+3]); dump();
-vpminsb (xmm3, xmm7, xmm3); dump();
-vpminsb (xmm6, xmm6, ptr[eax+ecx+3]); dump();
-vpminsw (xmm3, xmm4); dump();
-vpminsw (xmm5, ptr[eax+ecx+3]); dump();
-vpminsw (xmm6, xmm6, xmm1); dump();
-vpminsw (xmm1, xmm2, ptr[eax+ecx+3]); dump();
-vpminsd (xmm3, xmm0); dump();
-vpminsd (xmm4, ptr[eax+ecx+3]); dump();
-vpminsd (xmm6, xmm1, xmm0); dump();
-vpminsd (xmm4, xmm4, ptr[eax+ecx+3]); dump();
-vpminub (xmm5, xmm5); dump();
-vpminub (xmm0, ptr[eax+ecx+3]); dump();
-vpminub (xmm6, xmm3, xmm1); dump();
-vpminub (xmm7, xmm2, ptr[eax+ecx+3]); dump();
-vpminuw (xmm0, xmm6); dump();
-vpminuw (xmm3, ptr[eax+ecx+3]); dump();
-vpminuw (xmm6, xmm0, xmm2); dump();
-vpminuw (xmm5, xmm2, ptr[eax+ecx+3]); dump();
-vpminud (xmm6, xmm6); dump();
-vpminud (xmm4, ptr[eax+ecx+3]); dump();
-vpminud (xmm5, xmm1, xmm3); dump();
-vpminud (xmm5, xmm3, ptr[eax+ecx+3]); dump();
-vpmulhuw (xmm5, xmm1); dump();
-vpmulhuw (xmm2, ptr[eax+ecx+3]); dump();
-vpmulhuw (xmm5, xmm0, xmm1); dump();
-vpmulhuw (xmm6, xmm0, ptr[eax+ecx+3]); dump();
-vpmulhrsw (xmm4, xmm7); dump();
-vpmulhrsw (xmm0, ptr[eax+ecx+3]); dump();
-vpmulhrsw (xmm4, xmm4, xmm0); dump();
-vpmulhrsw (xmm4, xmm2, ptr[eax+ecx+3]); dump();
-vpmulhw (xmm0, xmm0); dump();
-vpmulhw (xmm5, ptr[eax+ecx+3]); dump();
-vpmulhw (xmm3, xmm3, xmm4); dump();
-vpmulhw (xmm5, xmm6, ptr[eax+ecx+3]); dump();
-vpmullw (xmm1, xmm4); dump();
-vpmullw (xmm2, ptr[eax+ecx+3]); dump();
-vpmullw (xmm7, xmm5, xmm3); dump();
-vpmullw (xmm1, xmm2, ptr[eax+ecx+3]); dump();
-vpmulld (xmm4, xmm6); dump();
-vpmulld (xmm1, ptr[eax+ecx+3]); dump();
-vpmulld (xmm5, xmm5, xmm7); dump();
-vpmulld (xmm3, xmm0, ptr[eax+ecx+3]); dump();
-vpmuludq (xmm5, xmm1); dump();
-vpmuludq (xmm0, ptr[eax+ecx+3]); dump();
-vpmuludq (xmm4, xmm1, xmm1); dump();
-vpmuludq (xmm7, xmm6, ptr[eax+ecx+3]); dump();
-vpmuldq (xmm7, xmm6); dump();
-vpmuldq (xmm2, ptr[eax+ecx+3]); dump();
-vpmuldq (xmm0, xmm2, xmm5); dump();
-vpmuldq (xmm1, xmm3, ptr[eax+ecx+3]); dump();
-vpor (xmm1, xmm5); dump();
-vpor (xmm4, ptr[eax+ecx+3]); dump();
-vpor (xmm6, xmm7, xmm6); dump();
-vpor (xmm5, xmm3, ptr[eax+ecx+3]); dump();
-vpsadbw (xmm5, xmm6); dump();
-vpsadbw (xmm5, ptr[eax+ecx+3]); dump();
-vpsadbw (xmm6, xmm0, xmm5); dump();
-vpsadbw (xmm0, xmm7, ptr[eax+ecx+3]); dump();
-vpsignb (xmm6, xmm6); dump();
-vpsignb (xmm0, ptr[eax+ecx+3]); dump();
-vpsignb (xmm4, xmm7, xmm0); dump();
-vpsignb (xmm7, xmm7, ptr[eax+ecx+3]); dump();
-vpsignw (xmm5, xmm0); dump();
-vpsignw (xmm7, ptr[eax+ecx+3]); dump();
-vpsignw (xmm3, xmm6, xmm0); dump();
-vpsignw (xmm7, xmm1, ptr[eax+ecx+3]); dump();
-vpsignd (xmm4, xmm4); dump();
-vpsignd (xmm2, ptr[eax+ecx+3]); dump();
-vpsignd (xmm0, xmm7, xmm0); dump();
-vpsignd (xmm5, xmm7, ptr[eax+ecx+3]); dump();
-vpsllw (xmm7, xmm3); dump();
-vpsllw (xmm3, ptr[eax+ecx+3]); dump();
-vpsllw (xmm2, xmm0, xmm5); dump();
-vpsllw (xmm2, xmm0, ptr[eax+ecx+3]); dump();
-vpslld (xmm4, xmm3); dump();
-vpslld (xmm2, ptr[eax+ecx+3]); dump();
-vpslld (xmm4, xmm4, xmm0); dump();
-vpslld (xmm5, xmm3, ptr[eax+ecx+3]); dump();
-vpsllq (xmm1, xmm0); dump();
-vpsllq (xmm3, ptr[eax+ecx+3]); dump();
-vpsllq (xmm0, xmm3, xmm3); dump();
-vpsllq (xmm6, xmm7, ptr[eax+ecx+3]); dump();
-vpsraw (xmm3, xmm4); dump();
-vpsraw (xmm2, ptr[eax+ecx+3]); dump();
-vpsraw (xmm1, xmm4, xmm5); dump();
-vpsraw (xmm6, xmm6, ptr[eax+ecx+3]); dump();
-vpsrad (xmm0, xmm3); dump();
-vpsrad (xmm6, ptr[eax+ecx+3]); dump();
-vpsrad (xmm6, xmm3, xmm5); dump();
-vpsrad (xmm4, xmm0, ptr[eax+ecx+3]); dump();
-vpsrlw (xmm2, xmm7); dump();
-vpsrlw (xmm7, ptr[eax+ecx+3]); dump();
-vpsrlw (xmm1, xmm5, xmm7); dump();
-vpsrlw (xmm2, xmm1, ptr[eax+ecx+3]); dump();
-vpsrld (xmm3, xmm4); dump();
-vpsrld (xmm3, ptr[eax+ecx+3]); dump();
-vpsrld (xmm1, xmm3, xmm7); dump();
-vpsrld (xmm6, xmm7, ptr[eax+ecx+3]); dump();
-vpsrlq (xmm7, xmm0); dump();
-vpsrlq (xmm6, ptr[eax+ecx+3]); dump();
-vpsrlq (xmm2, xmm2, xmm7); dump();
-vpsrlq (xmm5, xmm6, ptr[eax+ecx+3]); dump();
-vpsubb (xmm5, xmm7); dump();
-vpsubb (xmm5, ptr[eax+ecx+3]); dump();
-vpsubb (xmm3, xmm7, xmm0); dump();
-vpsubb (xmm3, xmm1, ptr[eax+ecx+3]); dump();
-vpsubw (xmm0, xmm2); dump();
-vpsubw (xmm4, ptr[eax+ecx+3]); dump();
-vpsubw (xmm3, xmm5, xmm4); dump();
-vpsubw (xmm3, xmm2, ptr[eax+ecx+3]); dump();
-vpsubd (xmm6, xmm1); dump();
-vpsubd (xmm3, ptr[eax+ecx+3]); dump();
-vpsubd (xmm6, xmm3, xmm6); dump();
-vpsubd (xmm5, xmm1, ptr[eax+ecx+3]); dump();
-vpsubq (xmm2, xmm6); dump();
-vpsubq (xmm1, ptr[eax+ecx+3]); dump();
-vpsubq (xmm1, xmm5, xmm4); dump();
-vpsubq (xmm7, xmm0, ptr[eax+ecx+3]); dump();
-vpsubsb (xmm0, xmm1); dump();
-vpsubsb (xmm4, ptr[eax+ecx+3]); dump();
-vpsubsb (xmm0, xmm6, xmm0); dump();
-vpsubsb (xmm5, xmm7, ptr[eax+ecx+3]); dump();
-vpsubsw (xmm0, xmm6); dump();
-vpsubsw (xmm4, ptr[eax+ecx+3]); dump();
-vpsubsw (xmm2, xmm2, xmm5); dump();
-vpsubsw (xmm7, xmm6, ptr[eax+ecx+3]); dump();
-vpsubusb (xmm3, xmm0); dump();
-vpsubusb (xmm4, ptr[eax+ecx+3]); dump();
-vpsubusb (xmm6, xmm5, xmm7); dump();
-vpsubusb (xmm0, xmm4, ptr[eax+ecx+3]); dump();
-vpsubusw (xmm3, xmm2); dump();
-vpsubusw (xmm7, ptr[eax+ecx+3]); dump();
-vpsubusw (xmm5, xmm0, xmm3); dump();
-vpsubusw (xmm6, xmm3, ptr[eax+ecx+3]); dump();
-vpunpckhbw (xmm0, xmm4); dump();
-vpunpckhbw (xmm7, ptr[eax+ecx+3]); dump();
-vpunpckhbw (xmm6, xmm2, xmm3); dump();
-vpunpckhbw (xmm3, xmm7, ptr[eax+ecx+3]); dump();
-vpunpckhwd (xmm1, xmm0); dump();
-vpunpckhwd (xmm0, ptr[eax+ecx+3]); dump();
-vpunpckhwd (xmm2, xmm4, xmm0); dump();
-vpunpckhwd (xmm2, xmm3, ptr[eax+ecx+3]); dump();
-vpunpckhdq (xmm2, xmm6); dump();
-vpunpckhdq (xmm3, ptr[eax+ecx+3]); dump();
-vpunpckhdq (xmm1, xmm1, xmm5); dump();
-vpunpckhdq (xmm3, xmm5, ptr[eax+ecx+3]); dump();
-vpunpckhqdq (xmm4, xmm4); dump();
-vpunpckhqdq (xmm1, ptr[eax+ecx+3]); dump();
-vpunpckhqdq (xmm7, xmm7, xmm4); dump();
-vpunpckhqdq (xmm3, xmm1, ptr[eax+ecx+3]); dump();
-vpunpcklbw (xmm2, xmm2); dump();
-vpunpcklbw (xmm4, ptr[eax+ecx+3]); dump();
-vpunpcklbw (xmm5, xmm7, xmm7); dump();
-vpunpcklbw (xmm4, xmm1, ptr[eax+ecx+3]); dump();
-vpunpcklwd (xmm6, xmm1); dump();
-vpunpcklwd (xmm6, ptr[eax+ecx+3]); dump();
-vpunpcklwd (xmm6, xmm6, xmm2); dump();
-vpunpcklwd (xmm3, xmm1, ptr[eax+ecx+3]); dump();
-vpunpckldq (xmm0, xmm6); dump();
-vpunpckldq (xmm2, ptr[eax+ecx+3]); dump();
-vpunpckldq (xmm2, xmm1, xmm6); dump();
-vpunpckldq (xmm4, xmm2, ptr[eax+ecx+3]); dump();
-vpunpcklqdq (xmm0, xmm5); dump();
-vpunpcklqdq (xmm0, ptr[eax+ecx+3]); dump();
-vpunpcklqdq (xmm6, xmm3, xmm1); dump();
-vpunpcklqdq (xmm5, xmm7, ptr[eax+ecx+3]); dump();
-vpxor (xmm6, xmm6); dump();
-vpxor (xmm1, ptr[eax+ecx+3]); dump();
-vpxor (xmm7, xmm7, xmm1); dump();
-vpxor (xmm1, xmm1, ptr[eax+ecx+3]); dump();
-vsqrtsd (xmm7, xmm1); dump();
-vsqrtsd (xmm5, ptr[eax+ecx+3]); dump();
-vsqrtsd (xmm3, xmm7, xmm0); dump();
-vsqrtsd (xmm2, xmm1, ptr[eax+ecx+3]); dump();
-vsqrtss (xmm0, xmm1); dump();
-vsqrtss (xmm6, ptr[eax+ecx+3]); dump();
-vsqrtss (xmm4, xmm5, xmm3); dump();
-vsqrtss (xmm5, xmm0, ptr[eax+ecx+3]); dump();
-vunpckhpd (xmm6, xmm5); dump();
-vunpckhpd (xmm7, ptr[eax+ecx+3]); dump();
-vunpckhpd (xmm0, xmm5, xmm5); dump();
-vunpckhpd (xmm7, xmm6, ptr[eax+ecx+3]); dump();
-vunpckhpd (ymm1, ptr[eax+ecx+3]); dump();
-vunpckhpd (ymm7, ymm6); dump();
-vunpckhpd (ymm3, ymm2, ptr[eax+ecx+3]); dump();
-vunpckhpd (ymm1, ymm2, ymm6); dump();
-vunpckhps (xmm3, xmm0); dump();
-vunpckhps (xmm5, ptr[eax+ecx+3]); dump();
-vunpckhps (xmm6, xmm2, xmm4); dump();
-vunpckhps (xmm0, xmm7, ptr[eax+ecx+3]); dump();
-vunpckhps (ymm1, ptr[eax+ecx+3]); dump();
-vunpckhps (ymm0, ymm7); dump();
-vunpckhps (ymm4, ymm0, ptr[eax+ecx+3]); dump();
-vunpckhps (ymm1, ymm6, ymm1); dump();
-vunpcklpd (xmm1, xmm5); dump();
-vunpcklpd (xmm4, ptr[eax+ecx+3]); dump();
-vunpcklpd (xmm4, xmm5, xmm0); dump();
-vunpcklpd (xmm6, xmm7, ptr[eax+ecx+3]); dump();
-vunpcklpd (ymm2, ptr[eax+ecx+3]); dump();
-vunpcklpd (ymm2, ymm0); dump();
-vunpcklpd (ymm0, ymm6, ptr[eax+ecx+3]); dump();
-vunpcklpd (ymm5, ymm7, ymm0); dump();
-vunpcklps (xmm0, xmm0); dump();
-vunpcklps (xmm6, ptr[eax+ecx+3]); dump();
-vunpcklps (xmm0, xmm2, xmm0); dump();
-vunpcklps (xmm4, xmm4, ptr[eax+ecx+3]); dump();
-vunpcklps (ymm5, ptr[eax+ecx+3]); dump();
-vunpcklps (ymm2, ymm0); dump();
-vunpcklps (ymm1, ymm5, ptr[eax+ecx+3]); dump();
-vunpcklps (ymm2, ymm4, ymm1); dump();
-vblendpd (xmm1, xmm5, xmm2, 4); dump();
-vblendpd (xmm5, xmm2, ptr[eax+ecx+3], 4); dump();
-vblendpd (xmm5, xmm7, 4); dump();
-vblendpd (xmm4, ptr[eax+ecx+3], 4); dump();
-vblendpd (ymm3, ymm7, ptr[eax+ecx+3], 4); dump();
-vblendpd (ymm3, ymm2, ymm6, 4); dump();
-vblendpd (ymm5, ptr[eax+ecx+3], 4); dump();
-vblendpd (ymm7, ymm4, 4); dump();
-vblendps (xmm0, xmm0, xmm4, 4); dump();
-vblendps (xmm5, xmm4, ptr[eax+ecx+3], 4); dump();
-vblendps (xmm4, xmm2, 4); dump();
-vblendps (xmm0, ptr[eax+ecx+3], 4); dump();
-vblendps (ymm2, ymm1, ptr[eax+ecx+3], 4); dump();
-vblendps (ymm0, ymm3, ymm4, 4); dump();
-vblendps (ymm3, ptr[eax+ecx+3], 4); dump();
-vblendps (ymm1, ymm5, 4); dump();
-vdppd (xmm6, xmm2, xmm0, 4); dump();
-vdppd (xmm0, xmm4, ptr[eax+ecx+3], 4); dump();
-vdppd (xmm6, xmm6, 4); dump();
-vdppd (xmm2, ptr[eax+ecx+3], 4); dump();
-vdpps (xmm2, xmm1, xmm3, 4); dump();
-vdpps (xmm3, xmm5, ptr[eax+ecx+3], 4); dump();
-vdpps (xmm0, xmm2, 4); dump();
-vdpps (xmm2, ptr[eax+ecx+3], 4); dump();
-vdpps (ymm3, ymm7, ptr[eax+ecx+3], 4); dump();
-vdpps (ymm2, ymm2, ymm3, 4); dump();
-vdpps (ymm4, ptr[eax+ecx+3], 4); dump();
-vdpps (ymm2, ymm5, 4); dump();
-vmpsadbw (xmm3, xmm4, xmm5, 4); dump();
-vmpsadbw (xmm4, xmm6, ptr[eax+ecx+3], 4); dump();
-vmpsadbw (xmm4, xmm1, 4); dump();
-vmpsadbw (xmm3, ptr[eax+ecx+3], 4); dump();
-vpblendw (xmm5, xmm2, xmm5, 4); dump();
-vpblendw (xmm1, xmm7, ptr[eax+ecx+3], 4); dump();
-vpblendw (xmm0, xmm0, 4); dump();
-vpblendw (xmm5, ptr[eax+ecx+3], 4); dump();
-vroundsd (xmm5, xmm4, xmm7, 4); dump();
-vroundsd (xmm6, xmm5, ptr[eax+ecx+3], 4); dump();
-vroundsd (xmm2, xmm6, 4); dump();
-vroundsd (xmm6, ptr[eax+ecx+3], 4); dump();
-vroundss (xmm7, xmm6, xmm6, 4); dump();
-vroundss (xmm0, xmm3, ptr[eax+ecx+3], 4); dump();
-vroundss (xmm3, xmm7, 4); dump();
-vroundss (xmm2, ptr[eax+ecx+3], 4); dump();
-vpclmulqdq (xmm5, xmm7, xmm1, 4); dump();
-vpclmulqdq (xmm1, xmm0, ptr[eax+ecx+3], 4); dump();
-vpclmulqdq (xmm3, xmm5, 4); dump();
-vpclmulqdq (xmm5, ptr[eax+ecx+3], 4); dump();
-vcmppd (xmm2, xmm0, xmm5, 4); dump();
-vcmppd (xmm7, xmm1, ptr[eax+ecx+3], 4); dump();
-vcmppd (xmm5, xmm5, 4); dump();
-vcmppd (xmm7, ptr[eax+ecx+3], 4); dump();
-vcmppd (ymm1, ymm3, ptr[eax+ecx+3], 4); dump();
-vcmppd (ymm0, ymm5, ymm7, 4); dump();
-vcmppd (ymm6, ptr[eax+ecx+3], 4); dump();
-vcmppd (ymm2, ymm6, 4); dump();
-vcmpps (xmm5, xmm2, xmm0, 4); dump();
-vcmpps (xmm1, xmm3, ptr[eax+ecx+3], 4); dump();
-vcmpps (xmm5, xmm3, 4); dump();
-vcmpps (xmm0, ptr[eax+ecx+3], 4); dump();
-vcmpps (ymm0, ymm3, ptr[eax+ecx+3], 4); dump();
-vcmpps (ymm5, ymm5, ymm6, 4); dump();
-vcmpps (ymm0, ptr[eax+ecx+3], 4); dump();
-vcmpps (ymm4, ymm4, 4); dump();
-vcmpsd (xmm6, xmm2, xmm7, 4); dump();
-vcmpsd (xmm3, xmm7, ptr[eax+ecx+3], 4); dump();
-vcmpsd (xmm1, xmm3, 4); dump();
-vcmpsd (xmm6, ptr[eax+ecx+3], 4); dump();
-vcmpss (xmm4, xmm5, xmm0, 4); dump();
-vcmpss (xmm6, xmm2, ptr[eax+ecx+3], 4); dump();
-vcmpss (xmm5, xmm1, 4); dump();
-vcmpss (xmm6, ptr[eax+ecx+3], 4); dump();
-vinsertps (xmm2, xmm0, xmm7, 4); dump();
-vinsertps (xmm0, xmm6, ptr[eax+ecx+3], 4); dump();
-vinsertps (xmm4, xmm2, 4); dump();
-vinsertps (xmm5, ptr[eax+ecx+3], 4); dump();
-vpalignr (xmm1, xmm2, xmm5, 4); dump();
-vpalignr (xmm5, xmm4, ptr[eax+ecx+3], 4); dump();
-vpalignr (xmm6, xmm4, 4); dump();
-vpalignr (xmm1, ptr[eax+ecx+3], 4); dump();
-vshufpd (xmm7, xmm7, xmm7, 4); dump();
-vshufpd (xmm0, xmm4, ptr[eax+ecx+3], 4); dump();
-vshufpd (xmm1, xmm0, 4); dump();
-vshufpd (xmm5, ptr[eax+ecx+3], 4); dump();
-vshufpd (ymm1, ymm6, ptr[eax+ecx+3], 4); dump();
-vshufpd (ymm6, ymm3, ymm6, 4); dump();
-vshufpd (ymm7, ptr[eax+ecx+3], 4); dump();
-vshufpd (ymm3, ymm2, 4); dump();
-vshufps (xmm5, xmm5, xmm0, 4); dump();
-vshufps (xmm3, xmm6, ptr[eax+ecx+3], 4); dump();
-vshufps (xmm0, xmm7, 4); dump();
-vshufps (xmm1, ptr[eax+ecx+3], 4); dump();
-vshufps (ymm7, ymm6, ptr[eax+ecx+3], 4); dump();
-vshufps (ymm5, ymm4, ymm1, 4); dump();
-vshufps (ymm5, ptr[eax+ecx+3], 4); dump();
-vshufps (ymm3, ymm4, 4); dump();
-vroundpd (xmm6, xmm4, 4); dump();
-vroundpd (xmm2, ptr[eax+ecx+3], 4); dump();
-vroundpd (ymm5, ptr[eax+ecx+3], 4); dump();
-vroundpd (ymm7, ymm2, 4); dump();
-vroundps (xmm5, xmm2, 4); dump();
-vroundps (xmm3, ptr[eax+ecx+3], 4); dump();
-vroundps (ymm2, ptr[eax+ecx+3], 4); dump();
-vroundps (ymm5, ymm1, 4); dump();
-vpcmpestri (xmm6, xmm7, 4); dump();
-vpcmpestri (xmm2, ptr[eax+ecx+3], 4); dump();
-vpcmpestrm (xmm7, xmm7, 4); dump();
-vpcmpestrm (xmm1, ptr[eax+ecx+3], 4); dump();
-vpcmpistri (xmm1, xmm5, 4); dump();
-vpcmpistri (xmm7, ptr[eax+ecx+3], 4); dump();
-vpcmpistrm (xmm4, xmm1, 4); dump();
-vpcmpistrm (xmm4, ptr[eax+ecx+3], 4); dump();
-vpermilpd (xmm0, xmm4, 4); dump();
-vpermilpd (xmm5, ptr[eax+ecx+3], 4); dump();
-vpermilpd (ymm0, ptr[eax+ecx+3], 4); dump();
-vpermilpd (ymm2, ymm7, 4); dump();
-vpermilps (xmm6, xmm7, 4); dump();
-vpermilps (xmm4, ptr[eax+ecx+3], 4); dump();
-vpermilps (ymm7, ptr[eax+ecx+3], 4); dump();
-vpermilps (ymm7, ymm4, 4); dump();
-vaeskeygenassist (xmm7, xmm6, 4); dump();
-vaeskeygenassist (xmm7, ptr[eax+ecx+3], 4); dump();
-vpshufd (xmm4, xmm2, 4); dump();
-vpshufd (xmm3, ptr[eax+ecx+3], 4); dump();
-vpshufhw (xmm7, xmm0, 4); dump();
-vpshufhw (xmm6, ptr[eax+ecx+3], 4); dump();
-vpshuflw (xmm0, xmm6, 4); dump();
-vpshuflw (xmm0, ptr[eax+ecx+3], 4); dump();
-vpermilpd (xmm3, xmm0, xmm1); dump();
-vpermilpd (xmm1, xmm5, ptr[eax+ecx+3]); dump();
-vpermilpd (ymm5, ymm3, ptr[eax+ecx+3]); dump();
-vpermilpd (ymm7, ymm2, ymm4); dump();
-vpermilps (xmm7, xmm5, xmm7); dump();
-vpermilps (xmm3, xmm0, ptr[eax+ecx+3]); dump();
-vpermilps (ymm6, ymm6, ptr[eax+ecx+3]); dump();
-vpermilps (ymm0, ymm4, ymm7); dump();
-vpshufb (xmm0, xmm5, xmm3); dump();
-vpshufb (xmm6, xmm4, ptr[eax+ecx+3]); dump();
-vaesimc (xmm0, xmm5); dump();
-vaesimc (xmm3, ptr[eax+ecx+3]); dump();
-vtestps (xmm6, xmm1); dump();
-vtestps (xmm4, ptr[eax+ecx+3]); dump();
-vtestps (ymm2, ptr[eax+ecx+3]); dump();
-vtestps (ymm1, ymm4); dump();
-vtestpd (xmm2, xmm0); dump();
-vtestpd (xmm2, ptr[eax+ecx+3]); dump();
-vtestpd (ymm2, ptr[eax+ecx+3]); dump();
-vtestpd (ymm5, ymm7); dump();
-vcomisd (xmm7, xmm2); dump();
-vcomisd (xmm6, ptr[eax+ecx+3]); dump();
-vcomiss (xmm7, xmm6); dump();
-vcomiss (xmm7, ptr[eax+ecx+3]); dump();
-vcvtdq2ps (xmm4, xmm2); dump();
-vcvtdq2ps (xmm6, ptr[eax+ecx+3]); dump();
-vcvtdq2ps (ymm7, ptr[eax+ecx+3]); dump();
-vcvtdq2ps (ymm5, ymm4); dump();
-vcvtps2dq (xmm6, xmm2); dump();
-vcvtps2dq (xmm6, ptr[eax+ecx+3]); dump();
-vcvtps2dq (ymm2, ptr[eax+ecx+3]); dump();
-vcvtps2dq (ymm3, ymm2); dump();
-vcvttps2dq (xmm4, xmm6); dump();
-vcvttps2dq (xmm4, ptr[eax+ecx+3]); dump();
-vcvttps2dq (ymm4, ptr[eax+ecx+3]); dump();
-vcvttps2dq (ymm4, ymm4); dump();
-vmovapd (xmm0, xmm3); dump();
-vmovapd (xmm6, ptr[eax+ecx+3]); dump();
-vmovapd (ymm1, ptr[eax+ecx+3]); dump();
-vmovapd (ymm0, ymm4); dump();
-vmovaps (xmm4, xmm4); dump();
-vmovaps (xmm7, ptr[eax+ecx+3]); dump();
-vmovaps (ymm4, ptr[eax+ecx+3]); dump();
-vmovaps (ymm6, ymm0); dump();
-vmovddup (xmm5, xmm4); dump();
-vmovddup (xmm2, ptr[eax+ecx+3]); dump();
-vmovddup (ymm0, ptr[eax+ecx+3]); dump();
-vmovddup (ymm0, ymm4); dump();
-vmovdqa (xmm1, xmm5); dump();
-vmovdqa (xmm7, ptr[eax+ecx+3]); dump();
-vmovdqa (ymm3, ptr[eax+ecx+3]); dump();
-vmovdqa (ymm0, ymm2); dump();
-vmovdqu (xmm5, xmm5); dump();
-vmovdqu (xmm0, ptr[eax+ecx+3]); dump();
-vmovdqu (ymm7, ptr[eax+ecx+3]); dump();
-vmovdqu (ymm1, ymm3); dump();
-vmovupd (xmm3, xmm6); dump();
-vmovupd (xmm7, ptr[eax+ecx+3]); dump();
-vmovupd (ymm4, ptr[eax+ecx+3]); dump();
-vmovupd (ymm6, ymm1); dump();
-vmovups (xmm0, xmm6); dump();
-vmovups (xmm7, ptr[eax+ecx+3]); dump();
-vmovups (ymm4, ptr[eax+ecx+3]); dump();
-vmovups (ymm0, ymm4); dump();
-vpabsb (xmm3, xmm0); dump();
-vpabsb (xmm0, ptr[eax+ecx+3]); dump();
-vpabsw (xmm5, xmm1); dump();
-vpabsw (xmm6, ptr[eax+ecx+3]); dump();
-vpabsd (xmm4, xmm5); dump();
-vpabsd (xmm6, ptr[eax+ecx+3]); dump();
-vphminposuw (xmm4, xmm5); dump();
-vphminposuw (xmm7, ptr[eax+ecx+3]); dump();
-vpmovsxbw (xmm4, xmm4); dump();
-vpmovsxbw (xmm0, ptr[eax+ecx+3]); dump();
-vpmovsxbd (xmm0, xmm5); dump();
-vpmovsxbd (xmm6, ptr[eax+ecx+3]); dump();
-vpmovsxbq (xmm3, xmm3); dump();
-vpmovsxbq (xmm2, ptr[eax+ecx+3]); dump();
-vpmovsxwd (xmm4, xmm3); dump();
-vpmovsxwd (xmm7, ptr[eax+ecx+3]); dump();
-vpmovsxwq (xmm4, xmm0); dump();
-vpmovsxwq (xmm0, ptr[eax+ecx+3]); dump();
-vpmovsxdq (xmm0, xmm7); dump();
-vpmovsxdq (xmm0, ptr[eax+ecx+3]); dump();
-vpmovzxbw (xmm5, xmm5); dump();
-vpmovzxbw (xmm1, ptr[eax+ecx+3]); dump();
-vpmovzxbd (xmm2, xmm6); dump();
-vpmovzxbd (xmm5, ptr[eax+ecx+3]); dump();
-vpmovzxbq (xmm1, xmm5); dump();
-vpmovzxbq (xmm3, ptr[eax+ecx+3]); dump();
-vpmovzxwd (xmm0, xmm1); dump();
-vpmovzxwd (xmm4, ptr[eax+ecx+3]); dump();
-vpmovzxwq (xmm3, xmm6); dump();
-vpmovzxwq (xmm7, ptr[eax+ecx+3]); dump();
-vpmovzxdq (xmm1, xmm7); dump();
-vpmovzxdq (xmm4, ptr[eax+ecx+3]); dump();
-vptest (xmm5, xmm2); dump();
-vptest (xmm5, ptr[eax+ecx+3]); dump();
-vrcpps (xmm2, xmm4); dump();
-vrcpps (xmm2, ptr[eax+ecx+3]); dump();
-vrcpps (ymm1, ptr[eax+ecx+3]); dump();
-vrcpps (ymm3, ymm2); dump();
-vrcpss (xmm6, xmm0); dump();
-vrcpss (xmm7, ptr[eax+ecx+3]); dump();
-vrsqrtps (xmm4, xmm0); dump();
-vrsqrtps (xmm7, ptr[eax+ecx+3]); dump();
-vrsqrtps (ymm0, ptr[eax+ecx+3]); dump();
-vrsqrtps (ymm7, ymm4); dump();
-vrsqrtss (xmm7, xmm5); dump();
-vrsqrtss (xmm2, ptr[eax+ecx+3]); dump();
-vsqrtpd (xmm2, xmm4); dump();
-vsqrtpd (xmm5, ptr[eax+ecx+3]); dump();
-vsqrtpd (ymm6, ptr[eax+ecx+3]); dump();
-vsqrtpd (ymm5, ymm7); dump();
-vsqrtps (xmm6, xmm6); dump();
-vsqrtps (xmm2, ptr[eax+ecx+3]); dump();
-vsqrtps (ymm7, ptr[eax+ecx+3]); dump();
-vsqrtps (ymm2, ymm5); dump();
-vucomisd (xmm4, xmm7); dump();
-vucomisd (xmm3, ptr[eax+ecx+3]); dump();
-vucomiss (xmm6, xmm7); dump();
-vucomiss (xmm1, ptr[eax+ecx+3]); dump();
-vmovapd (ptr[eax+ecx+3], xmm6); dump();
-vmovapd (ptr[eax+ecx+3], ymm0); dump();
-vmovaps (ptr[eax+ecx+3], xmm3); dump();
-vmovaps (ptr[eax+ecx+3], ymm3); dump();
-vmovdqa (ptr[eax+ecx+3], xmm5); dump();
-vmovdqa (ptr[eax+ecx+3], ymm4); dump();
-vmovdqu (ptr[eax+ecx+3], xmm6); dump();
-vmovdqu (ptr[eax+ecx+3], ymm6); dump();
-vmovupd (ptr[eax+ecx+3], xmm4); dump();
-vmovupd (ptr[eax+ecx+3], ymm2); dump();
-vmovups (ptr[eax+ecx+3], xmm4); dump();
-vmovups (ptr[eax+ecx+3], ymm4); dump();
-vpslldq (xmm2, xmm1, 4); dump();
-vpslldq (xmm0, 4); dump();
-vpsrldq (xmm3, xmm7, 4); dump();
-vpsrldq (xmm2, 4); dump();
-vpsllw (xmm0, xmm3, 4); dump();
-vpsllw (xmm0, 4); dump();
-vpslld (xmm1, xmm6, 4); dump();
-vpslld (xmm3, 4); dump();
-vpsllq (xmm0, xmm4, 4); dump();
-vpsllq (xmm5, 4); dump();
-vpsraw (xmm4, xmm5, 4); dump();
-vpsraw (xmm1, 4); dump();
-vpsrad (xmm5, xmm1, 4); dump();
-vpsrad (xmm0, 4); dump();
-vpsrlw (xmm4, xmm1, 4); dump();
-vpsrlw (xmm5, 4); dump();
-vpsrld (xmm1, xmm3, 4); dump();
-vpsrld (xmm3, 4); dump();
-vpsrlq (xmm5, xmm2, 4); dump();
-vpsrlq (xmm4, 4); dump();
-vfmadd132pd (xmm4, xmm5, xmm2); dump();
-vfmadd132pd (xmm3, xmm0, ptr[eax+ecx+3]); dump();
-vfmadd132pd (ymm6, ymm5, ptr[eax+ecx+3]); dump();
-vfmadd132pd (ymm5, ymm1, ymm0); dump();
-vfmadd132ps (xmm7, xmm7, xmm0); dump();
-vfmadd132ps (xmm2, xmm4, ptr[eax+ecx+3]); dump();
-vfmadd132ps (ymm2, ymm7, ptr[eax+ecx+3]); dump();
-vfmadd132ps (ymm3, ymm3, ymm0); dump();
-vfmadd213pd (xmm7, xmm5, xmm4); dump();
-vfmadd213pd (xmm0, xmm6, ptr[eax+ecx+3]); dump();
-vfmadd213pd (ymm6, ymm0, ptr[eax+ecx+3]); dump();
-vfmadd213pd (ymm1, ymm2, ymm4); dump();
-vfmadd213ps (xmm3, xmm1, xmm2); dump();
-vfmadd213ps (xmm5, xmm6, ptr[eax+ecx+3]); dump();
-vfmadd213ps (ymm5, ymm6, ptr[eax+ecx+3]); dump();
-vfmadd213ps (ymm4, ymm0, ymm3); dump();
-vfmadd231pd (xmm3, xmm0, xmm2); dump();
-vfmadd231pd (xmm6, xmm4, ptr[eax+ecx+3]); dump();
-vfmadd231pd (ymm4, ymm1, ptr[eax+ecx+3]); dump();
-vfmadd231pd (ymm7, ymm2, ymm6); dump();
-vfmadd231ps (xmm6, xmm0, xmm7); dump();
-vfmadd231ps (xmm2, xmm7, ptr[eax+ecx+3]); dump();
-vfmadd231ps (ymm4, ymm0, ptr[eax+ecx+3]); dump();
-vfmadd231ps (ymm4, ymm6, ymm7); dump();
-vfmadd132sd (xmm6, xmm4, xmm3); dump();
-vfmadd132sd (xmm5, xmm6, ptr[eax+ecx+3]); dump();
-vfmadd132ss (xmm6, xmm2, xmm4); dump();
-vfmadd132ss (xmm7, xmm6, ptr[eax+ecx+3]); dump();
-vfmadd213sd (xmm2, xmm1, xmm0); dump();
-vfmadd213sd (xmm4, xmm6, ptr[eax+ecx+3]); dump();
-vfmadd213ss (xmm2, xmm4, xmm6); dump();
-vfmadd213ss (xmm0, xmm5, ptr[eax+ecx+3]); dump();
-vfmadd231sd (xmm0, xmm2, xmm5); dump();
-vfmadd231sd (xmm0, xmm7, ptr[eax+ecx+3]); dump();
-vfmadd231ss (xmm0, xmm5, xmm6); dump();
-vfmadd231ss (xmm2, xmm1, ptr[eax+ecx+3]); dump();
-vfmaddsub132pd (xmm3, xmm0, xmm5); dump();
-vfmaddsub132pd (xmm6, xmm7, ptr[eax+ecx+3]); dump();
-vfmaddsub132pd (ymm4, ymm7, ptr[eax+ecx+3]); dump();
-vfmaddsub132pd (ymm3, ymm4, ymm0); dump();
-vfmaddsub132ps (xmm3, xmm5, xmm5); dump();
-vfmaddsub132ps (xmm0, xmm5, ptr[eax+ecx+3]); dump();
-vfmaddsub132ps (ymm5, ymm5, ptr[eax+ecx+3]); dump();
-vfmaddsub132ps (ymm2, ymm6, ymm2); dump();
-vfmaddsub213pd (xmm6, xmm4, xmm6); dump();
-vfmaddsub213pd (xmm5, xmm6, ptr[eax+ecx+3]); dump();
-vfmaddsub213pd (ymm0, ymm2, ptr[eax+ecx+3]); dump();
-vfmaddsub213pd (ymm4, ymm0, ymm2); dump();
-vfmaddsub213ps (xmm7, xmm2, xmm2); dump();
-vfmaddsub213ps (xmm7, xmm6, ptr[eax+ecx+3]); dump();
-vfmaddsub213ps (ymm0, ymm0, ptr[eax+ecx+3]); dump();
-vfmaddsub213ps (ymm3, ymm0, ymm1); dump();
-vfmaddsub231pd (xmm4, xmm5, xmm4); dump();
-vfmaddsub231pd (xmm0, xmm0, ptr[eax+ecx+3]); dump();
-vfmaddsub231pd (ymm3, ymm5, ptr[eax+ecx+3]); dump();
-vfmaddsub231pd (ymm7, ymm0, ymm3); dump();
-vfmaddsub231ps (xmm7, xmm1, xmm3); dump();
-vfmaddsub231ps (xmm3, xmm5, ptr[eax+ecx+3]); dump();
-vfmaddsub231ps (ymm6, ymm3, ptr[eax+ecx+3]); dump();
-vfmaddsub231ps (ymm0, ymm2, ymm2); dump();
-vfmsubadd132pd (xmm5, xmm0, xmm0); dump();
-vfmsubadd132pd (xmm7, xmm4, ptr[eax+ecx+3]); dump();
-vfmsubadd132pd (ymm0, ymm1, ptr[eax+ecx+3]); dump();
-vfmsubadd132pd (ymm2, ymm7, ymm5); dump();
-vfmsubadd132ps (xmm4, xmm2, xmm2); dump();
-vfmsubadd132ps (xmm7, xmm0, ptr[eax+ecx+3]); dump();
-vfmsubadd132ps (ymm0, ymm5, ptr[eax+ecx+3]); dump();
-vfmsubadd132ps (ymm3, ymm0, ymm6); dump();
-vfmsubadd213pd (xmm5, xmm7, xmm7); dump();
-vfmsubadd213pd (xmm1, xmm5, ptr[eax+ecx+3]); dump();
-vfmsubadd213pd (ymm2, ymm3, ptr[eax+ecx+3]); dump();
-vfmsubadd213pd (ymm2, ymm3, ymm5); dump();
-vfmsubadd213ps (xmm2, xmm2, xmm4); dump();
-vfmsubadd213ps (xmm7, xmm4, ptr[eax+ecx+3]); dump();
-vfmsubadd213ps (ymm4, ymm7, ptr[eax+ecx+3]); dump();
-vfmsubadd213ps (ymm5, ymm7, ymm5); dump();
-vfmsubadd231pd (xmm6, xmm5, xmm1); dump();
-vfmsubadd231pd (xmm6, xmm1, ptr[eax+ecx+3]); dump();
-vfmsubadd231pd (ymm2, ymm4, ptr[eax+ecx+3]); dump();
-vfmsubadd231pd (ymm1, ymm3, ymm4); dump();
-vfmsubadd231ps (xmm1, xmm5, xmm0); dump();
-vfmsubadd231ps (xmm7, xmm6, ptr[eax+ecx+3]); dump();
-vfmsubadd231ps (ymm3, ymm0, ptr[eax+ecx+3]); dump();
-vfmsubadd231ps (ymm7, ymm5, ymm0); dump();
-vfmsub132pd (xmm2, xmm0, xmm0); dump();
-vfmsub132pd (xmm5, xmm5, ptr[eax+ecx+3]); dump();
-vfmsub132pd (ymm1, ymm1, ptr[eax+ecx+3]); dump();
-vfmsub132pd (ymm2, ymm0, ymm2); dump();
-vfmsub132ps (xmm4, xmm0, xmm6); dump();
-vfmsub132ps (xmm3, xmm2, ptr[eax+ecx+3]); dump();
-vfmsub132ps (ymm0, ymm1, ptr[eax+ecx+3]); dump();
-vfmsub132ps (ymm1, ymm1, ymm1); dump();
-vfmsub213pd (xmm3, xmm5, xmm7); dump();
-vfmsub213pd (xmm7, xmm2, ptr[eax+ecx+3]); dump();
-vfmsub213pd (ymm5, ymm3, ptr[eax+ecx+3]); dump();
-vfmsub213pd (ymm4, ymm0, ymm0); dump();
-vfmsub213ps (xmm2, xmm2, xmm2); dump();
-vfmsub213ps (xmm3, xmm4, ptr[eax+ecx+3]); dump();
-vfmsub213ps (ymm3, ymm7, ptr[eax+ecx+3]); dump();
-vfmsub213ps (ymm7, ymm2, ymm2); dump();
-vfmsub231pd (xmm6, xmm1, xmm2); dump();
-vfmsub231pd (xmm1, xmm0, ptr[eax+ecx+3]); dump();
-vfmsub231pd (ymm0, ymm3, ptr[eax+ecx+3]); dump();
-vfmsub231pd (ymm6, ymm0, ymm0); dump();
-vfmsub231ps (xmm3, xmm6, xmm3); dump();
-vfmsub231ps (xmm6, xmm3, ptr[eax+ecx+3]); dump();
-vfmsub231ps (ymm7, ymm3, ptr[eax+ecx+3]); dump();
-vfmsub231ps (ymm0, ymm4, ymm0); dump();
-vfmsub132sd (xmm6, xmm7, xmm7); dump();
-vfmsub132sd (xmm6, xmm6, ptr[eax+ecx+3]); dump();
-vfmsub132ss (xmm6, xmm4, xmm7); dump();
-vfmsub132ss (xmm1, xmm4, ptr[eax+ecx+3]); dump();
-vfmsub213sd (xmm3, xmm3, xmm1); dump();
-vfmsub213sd (xmm0, xmm1, ptr[eax+ecx+3]); dump();
-vfmsub213ss (xmm0, xmm5, xmm7); dump();
-vfmsub213ss (xmm1, xmm4, ptr[eax+ecx+3]); dump();
-vfmsub231sd (xmm3, xmm2, xmm3); dump();
-vfmsub231sd (xmm7, xmm0, ptr[eax+ecx+3]); dump();
-vfmsub231ss (xmm0, xmm6, xmm6); dump();
-vfmsub231ss (xmm5, xmm3, ptr[eax+ecx+3]); dump();
-vfnmadd132pd (xmm4, xmm2, xmm5); dump();
-vfnmadd132pd (xmm3, xmm1, ptr[eax+ecx+3]); dump();
-vfnmadd132pd (ymm1, ymm2, ptr[eax+ecx+3]); dump();
-vfnmadd132pd (ymm1, ymm2, ymm7); dump();
-vfnmadd132ps (xmm2, xmm0, xmm2); dump();
-vfnmadd132ps (xmm1, xmm7, ptr[eax+ecx+3]); dump();
-vfnmadd132ps (ymm1, ymm3, ptr[eax+ecx+3]); dump();
-vfnmadd132ps (ymm2, ymm2, ymm4); dump();
-vfnmadd213pd (xmm1, xmm2, xmm1); dump();
-vfnmadd213pd (xmm6, xmm6, ptr[eax+ecx+3]); dump();
-vfnmadd213pd (ymm1, ymm0, ptr[eax+ecx+3]); dump();
-vfnmadd213pd (ymm5, ymm1, ymm6); dump();
-vfnmadd213ps (xmm7, xmm3, xmm1); dump();
-vfnmadd213ps (xmm2, xmm5, ptr[eax+ecx+3]); dump();
-vfnmadd213ps (ymm0, ymm7, ptr[eax+ecx+3]); dump();
-vfnmadd213ps (ymm0, ymm6, ymm1); dump();
-vfnmadd231pd (xmm5, xmm0, xmm5); dump();
-vfnmadd231pd (xmm0, xmm0, ptr[eax+ecx+3]); dump();
-vfnmadd231pd (ymm5, ymm2, ptr[eax+ecx+3]); dump();
-vfnmadd231pd (ymm4, ymm7, ymm4); dump();
-vfnmadd231ps (xmm2, xmm6, xmm4); dump();
-vfnmadd231ps (xmm5, xmm3, ptr[eax+ecx+3]); dump();
-vfnmadd231ps (ymm4, ymm1, ptr[eax+ecx+3]); dump();
-vfnmadd231ps (ymm3, ymm2, ymm4); dump();
-vfnmadd132sd (xmm4, xmm2, xmm4); dump();
-vfnmadd132sd (xmm4, xmm2, ptr[eax+ecx+3]); dump();
-vfnmadd132ss (xmm4, xmm6, xmm1); dump();
-vfnmadd132ss (xmm1, xmm6, ptr[eax+ecx+3]); dump();
-vfnmadd213sd (xmm7, xmm1, xmm2); dump();
-vfnmadd213sd (xmm5, xmm6, ptr[eax+ecx+3]); dump();
-vfnmadd213ss (xmm0, xmm6, xmm4); dump();
-vfnmadd213ss (xmm2, xmm4, ptr[eax+ecx+3]); dump();
-vfnmadd231sd (xmm7, xmm6, xmm7); dump();
-vfnmadd231sd (xmm0, xmm1, ptr[eax+ecx+3]); dump();
-vfnmadd231ss (xmm4, xmm4, xmm4); dump();
-vfnmadd231ss (xmm6, xmm0, ptr[eax+ecx+3]); dump();
-vfnmsub132pd (xmm7, xmm5, xmm1); dump();
-vfnmsub132pd (xmm6, xmm4, ptr[eax+ecx+3]); dump();
-vfnmsub132pd (ymm6, ymm1, ptr[eax+ecx+3]); dump();
-vfnmsub132pd (ymm7, ymm5, ymm4); dump();
-vfnmsub132ps (xmm6, xmm3, xmm0); dump();
-vfnmsub132ps (xmm2, xmm5, ptr[eax+ecx+3]); dump();
-vfnmsub132ps (ymm0, ymm5, ptr[eax+ecx+3]); dump();
-vfnmsub132ps (ymm7, ymm3, ymm0); dump();
-vfnmsub213pd (xmm1, xmm6, xmm2); dump();
-vfnmsub213pd (xmm3, xmm4, ptr[eax+ecx+3]); dump();
-vfnmsub213pd (ymm0, ymm2, ptr[eax+ecx+3]); dump();
-vfnmsub213pd (ymm2, ymm1, ymm1); dump();
-vfnmsub213ps (xmm6, xmm7, xmm3); dump();
-vfnmsub213ps (xmm5, xmm4, ptr[eax+ecx+3]); dump();
-vfnmsub213ps (ymm7, ymm3, ptr[eax+ecx+3]); dump();
-vfnmsub213ps (ymm6, ymm4, ymm5); dump();
-vfnmsub231pd (xmm6, xmm2, xmm2); dump();
-vfnmsub231pd (xmm5, xmm2, ptr[eax+ecx+3]); dump();
-vfnmsub231pd (ymm6, ymm1, ptr[eax+ecx+3]); dump();
-vfnmsub231pd (ymm0, ymm5, ymm5); dump();
-vfnmsub231ps (xmm2, xmm4, xmm7); dump();
-vfnmsub231ps (xmm6, xmm4, ptr[eax+ecx+3]); dump();
-vfnmsub231ps (ymm7, ymm2, ptr[eax+ecx+3]); dump();
-vfnmsub231ps (ymm0, ymm5, ymm1); dump();
-vfnmsub132sd (xmm7, xmm5, xmm4); dump();
-vfnmsub132sd (xmm6, xmm1, ptr[eax+ecx+3]); dump();
-vfnmsub132ss (xmm3, xmm0, xmm1); dump();
-vfnmsub132ss (xmm4, xmm6, ptr[eax+ecx+3]); dump();
-vfnmsub213sd (xmm0, xmm2, xmm3); dump();
-vfnmsub213sd (xmm3, xmm1, ptr[eax+ecx+3]); dump();
-vfnmsub213ss (xmm0, xmm1, xmm5); dump();
-vfnmsub213ss (xmm6, xmm0, ptr[eax+ecx+3]); dump();
-vfnmsub231sd (xmm4, xmm0, xmm3); dump();
-vfnmsub231sd (xmm1, xmm6, ptr[eax+ecx+3]); dump();
-vfnmsub231ss (xmm3, xmm3, xmm2); dump();
-vfnmsub231ss (xmm6, xmm2, ptr[eax+ecx+3]); dump();
-vmaskmovps (xmm4, xmm3, ptr[eax+ecx+3]); dump();
-vmaskmovps (ymm4, ymm7, ptr[eax+ecx+3]); dump();
-vmaskmovpd (ymm5, ymm5, ptr[eax+ecx+3]); dump();
-vmaskmovpd (xmm1, xmm1, ptr[eax+ecx+3]); dump();
-vmaskmovps (ptr[eax+ecx+3], xmm7, xmm4); dump();
-vmaskmovpd (ptr[eax+ecx+3], xmm5, xmm2); dump();
-vbroadcastf128 (ymm2, ptr[eax+ecx+3]); dump();
-vbroadcastsd (ymm0, ptr[eax+ecx+3]); dump();
-vbroadcastss (xmm2, ptr[eax+ecx+3]); dump();
-vbroadcastss (ymm3, ptr[eax+ecx+3]); dump();
-vinsertf128 (ymm1, ymm6, xmm1, 4); dump();
-vinsertf128 (ymm3, ymm4, ptr[eax+ecx+3], 4); dump();
-vperm2f128 (ymm0, ymm2, ptr[eax+ecx+3], 4); dump();
-vperm2f128 (ymm0, ymm0, ymm5, 4); dump();
+vextractf128 (xmm3, ymm7, 4); dump();
+vextractf128 (xmm4, ymm2, 4); dump();
+vextractf128 (ptr[eax+ecx+3], ymm0, 4); dump();
+vextractf128 (ptr[eax+ecx+3], ymm3, 4); dump();
+vextractf128 (ptr[rip - 0x13456], ymm3, 4); dump();
+vextractf128 (ptr[rdx+r15+0x12], ymm1, 4); dump();
+vextractf128 (xmm8, ymm7, 4); dump();
+vextractf128 (xmm9, ymm6, 4); dump();
+vmaskmovps (ptr[eax+ecx+3], ymm0, ymm4); dump();
+vmaskmovps (ptr[eax+ecx+3], ymm3, ymm7); dump();
+vmaskmovps (ptr[eax+ecx+3], ymm4, ymm7); dump();
+vmaskmovps (ptr[eax+ecx+3], ymm6, ymm7); dump();
+vmaskmovps (ptr[rip - 0x13456], ymm4, ymm6); dump();
+vmaskmovps (ptr[rdx+r15+0x12], ymm3, ymm1); dump();
+vmaskmovps (ptr[rip - 0x13456], ymm1, ymm3); dump();
+vmaskmovps (ptr[rdx+r15+0x12], ymm6, ymm5); dump();
+vmaskmovpd (ptr[eax+ecx+3], ymm6, ymm2); dump();
+vmaskmovpd (ptr[eax+ecx+3], ymm5, ymm0); dump();
+vmaskmovpd (ptr[eax+ecx+3], ymm4, ymm6); dump();
+vmaskmovpd (ptr[eax+ecx+3], ymm5, ymm0); dump();
+vmaskmovpd (ptr[rip - 0x13456], ymm0, ymm4); dump();
+vmaskmovpd (ptr[rdx+r15+0x12], ymm2, ymm1); dump();
+vmaskmovpd (ptr[rip - 0x13456], ymm1, ymm3); dump();
+vmaskmovpd (ptr[rdx+r15+0x12], ymm6, ymm5); dump();
+vlddqu (xmm0, ptr[eax+ecx+3]); dump();
+vlddqu (xmm6, ptr[rip - 0x13456]); dump();
+vlddqu (ymm6, ptr[eax+ecx+3]); dump();
+vlddqu (ymm6, ptr[rdx+r15+0x12]); dump();
+vlddqu (xmm8, ptr[eax+ecx+3]); dump();
+vlddqu (xmm14, ptr[rip - 0x13456]); dump();
+vlddqu (ymm7, ptr[eax+ecx+3]); dump();
+vlddqu (ymm0, ptr[rdx+r15+0x12]); dump();
+vmovshdup (xmm6, xmm5); dump();
+vmovshdup (xmm6, ptr[eax+ecx+3]); dump();
+vmovshdup (xmm4, ptr[rip - 0x13456]); dump();
+vmovshdup (xmm7, xmm14); dump();
+vmovshdup (xmm8, xmm1); dump();
+vmovshdup (xmm13, ptr[eax+ecx+3]); dump();
+vmovshdup (xmm8, ptr[rdx+r15+0x12]); dump();
+vmovshdup (xmm9, xmm9); dump();
+vmovshdup (ymm6, ptr[eax+ecx+3]); dump();
+vmovshdup (ymm3, ymm4); dump();
+vmovshdup (ymm7, ptr[rip - 0x13456]); dump();
+vmovshdup (ymm6, ymm6); dump();
+vmovshdup (ymm6, ptr[eax+ecx+3]); dump();
+vmovshdup (ymm2, ymm4); dump();
+vmovshdup (ymm2, ptr[rdx+r15+0x12]); dump();
+vmovshdup (ymm0, ymm0); dump();
+vmovsldup (xmm6, xmm4); dump();
+vmovsldup (xmm2, ptr[eax+ecx+3]); dump();
+vmovsldup (xmm5, ptr[rip - 0x13456]); dump();
+vmovsldup (xmm7, xmm12); dump();
+vmovsldup (xmm9, xmm3); dump();
+vmovsldup (xmm14, ptr[eax+ecx+3]); dump();
+vmovsldup (xmm13, ptr[rdx+r15+0x12]); dump();
+vmovsldup (xmm15, xmm11); dump();
+vmovsldup (ymm2, ptr[eax+ecx+3]); dump();
+vmovsldup (ymm6, ymm2); dump();
+vmovsldup (ymm0, ptr[rip - 0x13456]); dump();
+vmovsldup (ymm6, ymm7); dump();
+vmovsldup (ymm6, ptr[eax+ecx+3]); dump();
+vmovsldup (ymm3, ymm1); dump();
+vmovsldup (ymm3, ptr[rdx+r15+0x12]); dump();
+vmovsldup (ymm7, ymm1); dump();
+vpcmpeqq (xmm7, xmm0); dump();
+vpcmpeqq (xmm2, ptr[eax+ecx+3]); dump();
+vpcmpeqq (xmm0, ptr[rip - 0x13456]); dump();
+vpcmpeqq (xmm7, xmm10); dump();
+vpcmpeqq (xmm11, xmm6); dump();
+vpcmpeqq (xmm8, ptr[eax+ecx+3]); dump();
+vpcmpeqq (xmm12, ptr[rdx+r15+0x12]); dump();
+vpcmpeqq (xmm10, xmm13); dump();
+vpcmpeqq (xmm2, xmm6, xmm7); dump();
+vpcmpeqq (xmm1, xmm1, ptr[eax+ecx+3]); dump();
+vpcmpeqq (xmm0, xmm0, ptr[rip - 0x13456]); dump();
+vpcmpeqq (xmm2, xmm1, xmm8); dump();
+vpcmpeqq (xmm4, xmm14, xmm7); dump();
+vpcmpeqq (xmm4, xmm15, ptr[eax+ecx+3]); dump();
+vpcmpeqq (xmm7, xmm8, ptr[rdx+r15+0x12]); dump();
+vpcmpeqq (xmm7, xmm8, xmm11); dump();
+vpcmpeqq (xmm11, xmm0, xmm0); dump();
+vpcmpeqq (xmm14, xmm0, ptr[eax+ecx+3]); dump();
+vpcmpeqq (xmm11, xmm1, ptr[rip - 0x13456]); dump();
+vpcmpeqq (xmm9, xmm2, xmm10); dump();
+vpcmpeqq (xmm13, xmm8, xmm0); dump();
+vpcmpeqq (xmm10, xmm10, ptr[eax+ecx+3]); dump();
+vpcmpeqq (xmm8, xmm13, ptr[rdx+r15+0x12]); dump();
+vpcmpeqq (xmm14, xmm9, xmm12); dump();
+vpcmpgtq (xmm7, xmm6); dump();
+vpcmpgtq (xmm2, ptr[eax+ecx+3]); dump();
+vpcmpgtq (xmm3, ptr[rip - 0x13456]); dump();
+vpcmpgtq (xmm1, xmm15); dump();
+vpcmpgtq (xmm8, xmm1); dump();
+vpcmpgtq (xmm11, ptr[eax+ecx+3]); dump();
+vpcmpgtq (xmm9, ptr[rdx+r15+0x12]); dump();
+vpcmpgtq (xmm8, xmm8); dump();
+vpcmpgtq (xmm3, xmm0, xmm0); dump();
+vpcmpgtq (xmm7, xmm7, ptr[eax+ecx+3]); dump();
+vpcmpgtq (xmm1, xmm7, ptr[rip - 0x13456]); dump();
+vpcmpgtq (xmm0, xmm5, xmm14); dump();
+vpcmpgtq (xmm7, xmm12, xmm2); dump();
+vpcmpgtq (xmm4, xmm11, ptr[eax+ecx+3]); dump();
+vpcmpgtq (xmm3, xmm15, ptr[rdx+r15+0x12]); dump();
+vpcmpgtq (xmm3, xmm12, xmm13); dump();
+vpcmpgtq (xmm12, xmm5, xmm5); dump();
+vpcmpgtq (xmm12, xmm1, ptr[eax+ecx+3]); dump();
+vpcmpgtq (xmm12, xmm3, ptr[rip - 0x13456]); dump();
+vpcmpgtq (xmm11, xmm2, xmm9); dump();
+vpcmpgtq (xmm13, xmm9, xmm6); dump();
+vpcmpgtq (xmm12, xmm10, ptr[eax+ecx+3]); dump();
+vpcmpgtq (xmm10, xmm15, ptr[rdx+r15+0x12]); dump();
+vpcmpgtq (xmm11, xmm11, xmm10); dump();
+vpextrw (ptr[eax+ecx+3], xmm4, 4); dump();
+vpextrw (ptr[eax+ecx+3], xmm8, 4); dump();
+vpextrw (ecx, xmm4, 4); dump();
+vpextrw (ebp, xmm14, 4); dump();
+vpextrw (eax, xmm3, 4); dump();
+vpextrw (eax, xmm14, 4); dump();
+vpextrw (ptr[rip - 0x13456], xmm1, 4); dump();
+vpextrw (ptr[rdx+r15+0x12], xmm14, 4); dump();
+vpextrw (r8d, xmm7, 4); dump();
+vpextrw (r14d, xmm10, 4); dump();
+vpextrw (rdx, xmm7, 4); dump();
+vpextrw (rcx, xmm8, 4); dump();
+vpextrw (r11, xmm7, 4); dump();
+vpextrw (r9, xmm11, 4); dump();
+vpextrw (rax, xmm6, 4); dump();
+vpextrw (rax, xmm12, 4); dump();
}
void gen()
{
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index f4822f9..dfa9588 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -4,9 +4,9 @@
@file xbyak.h
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
@author herumi
- @version $Revision: 1.238 $
+ @version $Revision: 1.239 $
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
- @date $Date: 2011/02/04 03:46:09 $
+ @date $Date: 2011/02/07 06:09:35 $
@note modified new BSD license
http://www.opensource.org/licenses/bsd-license.php
*/
@@ -56,7 +56,7 @@
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x2990, /* 0xABCD = A.BC(D) */
+ VERSION = 0x2991, /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 5a6e334..88aadd0 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "2.99"; }
+const char *getVersionString() const { return "2.991"; }
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
@@ -943,7 +943,7 @@
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
-void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }
+void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index 65a949f..598f45b 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -44,6 +44,10 @@
#endif
#endif
+#ifdef _MSC_VER
+extern "C" unsigned __int64 __xgetbv(int);
+#endif
+
namespace Xbyak { namespace util {
/**
@@ -64,6 +68,16 @@
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
}
+ static inline uint64 getXfeature()
+ {
+#ifdef _MSC_VER
+ return __xgetbv(0);
+#else
+ unsigned int eax, edx;
+ __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
+ return ((uint64)edx << 32) | eax;
+#endif
+ }
enum Type {
NONE = 0,
tMMX = 1 << 0,
@@ -121,10 +135,15 @@
if (data[2] & (1U << 25)) type_ |= tAESNI;
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
- // QQQ
- // should check XFEATURE_ENABLED_MASK[2:1] = '11b' by xgetvb
- if (data[2] & (1U << 28)) type_ |= tAVX;
- if (data[2] & (1U << 12)) type_ |= tFMA;
+
+ if (type_ & tOSXSACE) {
+ // check XFEATURE_ENABLED_MASK[2:1] = '11b'
+ uint64 bv = getXfeature();
+ if ((bv & 6) == 6) {
+ if (data[2] & (1U << 28)) type_ |= tAVX;
+ if (data[2] & (1U << 12)) type_ |= tFMA;
+ }
+ }
if (data[3] & (1U << 15)) type_ |= tCMOV;
if (data[3] & (1U << 23)) type_ |= tMMX;