add vunpckhpd, vunpckhps, vunpcklpd, vunpcklps
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 1db9164..88fa9f1 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1165,11 +1165,11 @@
{ 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X, false, true },
{ 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X, false, true },
- { 0x15, "unpckhpd", T_0F | T_66 | T_YMM, false, true },
- { 0x15, "unpckhps", T_0F | T_YMM, false, true },
+ { 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true },
+ { 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
- { 0x14, "unpcklpd", T_0F | T_66 | T_YMM, false, true },
- { 0x14, "unpcklps", T_0F | T_YMM, false, true },
+ { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true },
+ { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index c89e568..34a24e1 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2553,10 +2553,10 @@
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- put(p->name, K, XMM, XMM | MEM, IMM);
+ put(p->name, K, _XMM, _XMM | MEM, IMM);
if (!p->supportYMM) continue;
- put(p->name, K, YMM, YMM | MEM, IMM);
- put(p->name, K, ZMM, ZMM | MEM, IMM);
+ put(p->name, K, _YMM, _YMM | MEM, IMM);
+ put(p->name, K, _ZMM, _ZMM | MEM, IMM);
}
}
put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
@@ -2683,13 +2683,13 @@
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- put(p.name, XMM|XMM_KZ, XMM|MEM);
- put(p.name, YMM|YMM_KZ, YMM|MEM);
- put(p.name, ZMM|ZMM_KZ, ZMM|MEM);
+ put(p.name, _XMM|XMM_KZ, _XMM|MEM);
+ put(p.name, _YMM|YMM_KZ, _YMM|MEM);
+ put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
if (!p.M_X) continue;
- put(p.name, MEM, XMM);
- put(p.name, MEM, YMM);
- put(p.name, MEM, ZMM);
+ put(p.name, MEM, _XMM);
+ put(p.name, MEM, _YMM);
+ put(p.name, MEM, _ZMM);
}
put("vsqrtpd", XMM_KZ, M_1to2);
put("vsqrtpd", YMM_KZ, M_1to4);
@@ -2705,13 +2705,18 @@
{
const struct Tbl {
const char *name;
+ uint64_t mem;
} tbl[] = {
- { "vsqrtsd" },
- { "vsqrtss" },
+ { "vsqrtsd", MEM },
+ { "vsqrtss", MEM },
+ { "vunpckhpd", M_1to2 },
+ { "vunpckhps", M_1to4 },
+ { "vunpcklpd", M_1to2 },
+ { "vunpcklps", M_1to4 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- put(p.name, XMM_KZ, XMM, XMM|MEM);
+ put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
}
}
void putAVX512()
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 1969722..bb0fce7 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -954,13 +954,13 @@
void vsqrtsd(const Xmm& x, const Operand& op) { vsqrtsd(x, x, op); }
void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X, 0x51); }
void vsqrtss(const Xmm& x, const Operand& op) { vsqrtss(x, x, op); }
-void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x15); }
+void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x15); }
void vunpckhpd(const Xmm& x, const Operand& op) { vunpckhpd(x, x, op); }
-void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0x15); }
+void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x15); }
void vunpckhps(const Xmm& x, const Operand& op) { vunpckhps(x, x, op); }
-void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x14); }
+void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x14); }
void vunpcklpd(const Xmm& x, const Operand& op) { vunpcklpd(x, x, op); }
-void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0x14); }
+void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x14); }
void vunpcklps(const Xmm& x, const Operand& op) { vunpcklps(x, x, op); }
void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_0F3A | T_66, 0xDF, imm); }
void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_0F3A | T_66 | T_YMM, 0x09, imm); }