vmovapd, vmovaps, vmovupd, vmovups
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 53ed4f1..52674c2 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1204,15 +1204,15 @@
{ 0x5B, "cvtdq2ps", T_0F | T_YMM, false },
{ 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM, false },
{ 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM, false },
- { 0x28, "movapd", T_0F | T_66 | T_YMM, false },
- { 0x28, "movaps", T_0F | T_YMM, false },
+ { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false },
+ { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false },
{ 0x12, "movddup", T_0F | T_F2 | T_YMM, false },
{ 0x6F, "movdqa", T_0F | T_66 | T_YMM, false },
{ 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false },
{ 0x16, "movshdup", T_0F | T_F3 | T_YMM, false },
{ 0x12, "movsldup", T_0F | T_F3 | T_YMM, false },
- { 0x10, "movupd", T_0F | T_66 | T_YMM, false },
- { 0x10, "movups", T_0F | T_YMM, false },
+ { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false },
+ { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false },
{ 0x1C, "pabsb", T_0F38 | T_66 | T_YMM, false },
{ 0x1D, "pabsw", T_0F38 | T_66 | T_YMM, false },
@@ -1261,12 +1261,12 @@
const char *name;
int type;
} tbl[] = {
- { 0x29, "movapd", T_0F | T_66 | T_YMM},
- { 0x29, "movaps", T_0F | T_YMM},
+ { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1},
+ { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0},
{ 0x7F, "movdqa", T_0F | T_66 | T_YMM},
{ 0x7F, "movdqu", T_0F | T_F3 | T_YMM},
- { 0x11, "movupd", T_0F | T_66 | T_YMM},
- { 0x11, "movups", T_0F | T_YMM},
+ { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1},
+ { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0},
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 8f068f0..2446a84 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2553,14 +2553,19 @@
}
void putBroadcastSub(int disp)
{
+#ifdef XBYAK64
+ const char *a = "rax";
+#else
+ const char *a = "eax";
+#endif
if (isXbyak_) {
- printf("vaddpd(zmm0, zmm1, ptr_b[rax+%d]);dump();\n", disp);
- printf("vaddpd(ymm0, ymm1, ptr_b[rax+%d]);dump();\n", disp);
- printf("vaddpd(xmm0, xmm1, ptr_b[rax+%d]);dump();\n", disp);
+ printf("vaddpd(zmm0, zmm1, ptr_b[%s+%d]);dump();\n", a, disp);
+ printf("vaddpd(ymm0, ymm1, ptr_b[%s+%d]);dump();\n", a, disp);
+ printf("vaddpd(xmm0, xmm1, ptr_b[%s+%d]);dump();\n", a, disp);
} else {
- printf("vaddpd zmm0, zmm1, [rax+%d]{1to8}\n", disp);
- printf("vaddpd ymm0, ymm1, [rax+%d]{1to4}\n", disp);
- printf("vaddpd xmm0, xmm1, [rax+%d]{1to2}\n", disp);
+ printf("vaddpd zmm0, zmm1, [%s+%d]{1to8}\n", a, disp);
+ printf("vaddpd ymm0, ymm1, [%s+%d]{1to4}\n", a, disp);
+ printf("vaddpd xmm0, xmm1, [%s+%d]{1to2}\n", a, disp);
}
}
void putBroadcast()
@@ -2569,12 +2574,29 @@
putBroadcastSub(i);
}
}
+ void putAVX512_M_X()
+ {
+ const char *tbl[] = {
+ "vmovapd",
+ "vmovaps",
+// "vmovdqa",
+// "vmovdqu",
+ "vmovupd",
+ "vmovups",
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const char *name = tbl[i];
+ put(name, MEM, ZMM);
+ put(name, ZMM, MEM);
+ }
+ }
void putAVX512()
{
putOpmask();
putCombi();
putCmpK();
putBroadcast();
+ putAVX512_M_X();
}
#endif
};
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 0f2336c..592e0e2 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -978,15 +978,15 @@
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x5B); }
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_YMM, 0x5B); }
void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x5B); }
-void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_YMM, 0x28); }
-void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x28); }
+void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX, 0x28); }
+void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F2 | T_YMM, 0x12); }
void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_YMM, 0x6F); }
void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x6F); }
void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x16); }
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x12); }
-void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_YMM, 0x10); }
-void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x10); }
+void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX, 0x10); }
+void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x1C); }
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x1D); }
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x1E); }
@@ -1013,12 +1013,12 @@
void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x51); }
void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_EW1 | T_EVEX | T_SAE_X, 0x2E); }
void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2E); }
-void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_66 | T_YMM, 0x29); }
-void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_YMM, 0x29); }
+void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX, 0x29); }
+void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x29); }
void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_66 | T_YMM, 0x7F); }
void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_F3 | T_YMM, 0x7F); }
-void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_66 | T_YMM, 0x11); }
-void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_YMM, 0x11); }
+void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX, 0x11); }
+void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x11); }
void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_YMM, 0xD0); }
void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_YMM, 0xD0); }
void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_YMM, 0x7C); }