add vpacksswb, vpackssdw, vpackuswb, vpackusdw
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 3825e68..08f36a5 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1045,10 +1045,10 @@
{ 0x5A, "cvtsd2ss", T_0F | T_F2, false, true },
{ 0x5A, "cvtss2sd", T_0F | T_F3, false, true },
{ 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, true, true },
- { 0x63, "packsswb", T_0F | T_66 | T_YMM, false, true },
- { 0x6B, "packssdw", T_0F | T_66 | T_YMM, false, true },
- { 0x67, "packuswb", T_0F | T_66 | T_YMM, false, true },
- { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM, false, true },
+ { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true },
+ { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
+ { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true },
+ { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
{ 0xFC, "paddb", T_0F | T_66 | T_YMM, false, true },
{ 0xFD, "paddw", T_0F | T_66 | T_YMM, false, true },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 79162d0..9fa919c 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2726,6 +2726,37 @@
put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
}
}
+ void put512_X3()
+ {
+#ifdef XBYAK64
+ const struct Tbl {
+ const char *name;
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t xm;
+ } tbl[] = {
+ { "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
+ { "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
+
+ { "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 },
+ { "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 },
+ { "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
+
+ { "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 },
+ { "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 },
+ { "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
+
+ { "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
+ { "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl& p = tbl[i];
+ put(p.name, p.x1, p.x2, p.xm);
+ }
+#endif
+ }
void put512_X3_I()
{
const struct Tbl {
@@ -2764,6 +2795,7 @@
put_vmov();
put512_X_XM();
put512_X_X_XM();
+ put512_X3();
put512_X3_I();
}
#endif
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 1c8e5a5..55a221f 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -777,13 +777,13 @@
void vcvtss2sd(const Xmm& x, const Operand& op) { vcvtss2sd(x, x, op); }
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
void vinsertps(const Xmm& x, const Operand& op, uint8 imm) { vinsertps(x, x, op, imm); }
-void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x63); }
+void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0x63); }
void vpacksswb(const Xmm& x, const Operand& op) { vpacksswb(x, x, op); }
-void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x6B); }
+void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x6B); }
void vpackssdw(const Xmm& x, const Operand& op) { vpackssdw(x, x, op); }
-void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x67); }
+void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0x67); }
void vpackuswb(const Xmm& x, const Operand& op) { vpackuswb(x, x, op); }
-void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x2B); }
+void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x2B); }
void vpackusdw(const Xmm& x, const Operand& op) { vpackusdw(x, x, op); }
void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xFC); }
void vpaddb(const Xmm& x, const Operand& op) { vpaddb(x, x, op); }