add vpabs{b,w,d,q}
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index b13cf37..648e588 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -134,6 +134,7 @@
std::string type = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
}
+ puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EVEX | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
}
void putM_X()
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index d41509d..3825e68 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1214,9 +1214,9 @@
{ 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false },
{ 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false },
- { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM, false },
- { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM, false },
- { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM, false },
+ { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false },
+ { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false },
+ { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false },
{ 0x41, "phminposuw", T_0F38 | T_66, false },
{ 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM, false },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 94a66ab..79162d0 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2667,7 +2667,7 @@
}
#endif
}
- void put512_X_MX()
+ void put512_X_XM()
{
const struct Tbl {
const char *name;
@@ -2680,6 +2680,10 @@
{ "vmovdqu16", true },
{ "vmovdqu32", true },
{ "vmovdqu64", true },
+ { "vpabsb", false },
+ { "vpabsw", false },
+ { "vpabsd", false },
+ { "vpabsq", false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
@@ -2700,6 +2704,9 @@
put("vsqrtps", YMM_KZ, M_1to8);
put("vsqrtps", ZMM_KZ, M_1to16);
put("vsqrtps", ZMM_KZ, ZMM_ER);
+
+ put("vpabsd", ZMM_KZ, M_1to16);
+ put("vpabsq", ZMM_KZ, M_1to8);
}
void put512_X_X_XM()
{
@@ -2755,7 +2762,7 @@
putBroadcast();
putAVX512_M_X();
put_vmov();
- put512_X_MX();
+ put512_X_XM();
put512_X_X_XM();
put512_X3_I();
}
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index f521ae1..b79cc3c 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -74,6 +74,7 @@
void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_F2 | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_F3 | T_EW0 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_F3 | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
+void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EVEX | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_0F | T_66 | T_EW0 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_0F | T_F2 | T_EW0 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 860161e..1c8e5a5 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -987,9 +987,9 @@
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_EW0 | T_YMM | T_EVEX, 0x12); }
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX, 0x10); }
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
-void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x1C); }
-void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x1D); }
-void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x1E); }
+void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM | T_EVEX, 0x1C); }
+void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM | T_EVEX, 0x1D); }
+void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x1E); }
void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66, 0x41); }
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x20); }
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x21); }