add vcomi{sd,ss}
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp
index e7b2883..726b46b 100644
--- a/gen/avx_type.hpp
+++ b/gen/avx_type.hpp
@@ -15,9 +15,13 @@
T_EW1 = 1 << 14,
T_YMM = 1 << 15,
T_EVEX = 1 << 16,
- T_ER = 1 << 17,
- T_SAE = 1 << 18,
- T_MUST_EVEX = 1 << 19
+ T_ER_X = 1 << 17, // xmm{er}
+ T_ER_Y = 1 << 18, // ymm{er}
+ T_ER_Z = 1 << 19, // zmm{er}
+ T_SAE_X = 1 << 20, // xmm{sae}
+ T_SAE_Y = 1 << 21, // ymm{sae}
+ T_SAE_Z = 1 << 22, // zmm{sae}
+ T_MUST_EVEX = 1 << 23
};
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
@@ -81,13 +85,29 @@
if (!str.empty()) str += " | ";
str += "T_EVEX";
}
- if (type & T_ER) {
+ if (type & T_ER_X) {
if (!str.empty()) str += " | ";
- str += "T_ER";
+ str += "T_ER_X";
}
- if (type & T_SAE) {
+ if (type & T_ER_Y) {
if (!str.empty()) str += " | ";
- str += "T_SAE";
+ str += "T_ER_Y";
+ }
+ if (type & T_ER_Z) {
+ if (!str.empty()) str += " | ";
+ str += "T_ER_Z";
+ }
+ if (type & T_SAE_X) {
+ if (!str.empty()) str += " | ";
+ str += "T_SAE_X";
+ }
+ if (type & T_SAE_Y) {
+ if (!str.empty()) str += " | ";
+ str += "T_SAE_Y";
+ }
+ if (type & T_SAE_Z) {
+ if (!str.empty()) str += " | ";
+ str += "T_SAE_Z";
}
if (type & T_MUST_EVEX) {
if (!str.empty()) str += " | ";
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 7d93f57..8ab0cc2 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -101,10 +101,10 @@
int type;
bool hasIMM;
} tbl[] = {
- { 0xC2, "cmppd", T_0F | T_EVEX | T_MUST_EVEX | T_EW1 | T_SAE | T_YMM | T_66, true },
- { 0xC2, "cmpps", T_0F | T_EVEX | T_MUST_EVEX | T_EW0 | T_SAE | T_YMM, true },
- { 0xC2, "cmpsd", T_0F | T_EVEX | T_MUST_EVEX | T_EW1 | T_SAE | T_F2, true },
- { 0xC2, "cmpss", T_0F | T_EVEX | T_MUST_EVEX | T_EW0 | T_SAE | T_F3, true },
+ { 0xC2, "cmppd", T_0F | T_EVEX | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66, true },
+ { 0xC2, "cmpps", T_0F | T_EVEX | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM, true },
+ { 0xC2, "cmpsd", T_0F | T_EVEX | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2, true },
+ { 0xC2, "cmpss", T_0F | T_EVEX | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 4b19075..3d5e7f0 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1003,11 +1003,11 @@
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x%02X); }\n", p->name, p->code);
- printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x%02X); }\n", p->name, p->code);
+ printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code);
+ printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code);
if (p->only_pd_ps) continue;
- printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER, 0x%02X); }\n", p->name, p->code);
- printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER, 0x%02X); }\n", p->name, p->code);
+ printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code);
+ printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code);
}
}
// (x, x, x/m[, imm]) or (y, y, y/m[, imm])
@@ -1199,8 +1199,8 @@
{ 0x62, "pcmpistrm", T_0F3A | T_66, true },
{ 0x0E, "testps", T_0F38 | T_66 | T_YMM, false },
{ 0x0F, "testpd", T_0F38 | T_66 | T_YMM, false },
- { 0x2F, "comisd", T_0F | T_66, false },
- { 0x2F, "comiss", T_0F, false },
+ { 0x2F, "comisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X, false },
+ { 0x2F, "comiss", T_0F | T_EVEX | T_EW0 | T_SAE_X, false },
{ 0x5B, "cvtdq2ps", T_0F | T_YMM, false },
{ 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM, false },
{ 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM, false },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index acc4b15..d22aba2 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -89,7 +89,11 @@
#endif
const uint64 K2 = 1ULL << 47;
const uint64 ZMM_SAE = 1ULL << 48;
-const uint64 ZMM_ER = 1ULL << 49; // max value
+const uint64 ZMM_ER = 1ULL << 49;
+#ifdef XBYAK64
+const uint64 _XMM3 = 1ULL << 50;
+#endif
+const uint64 XMM_SAE = 1ULL << 51; // max value
const uint64 NOPARA = 1ULL << (bitEnd - 1);
@@ -193,6 +197,13 @@
};
return tbl[idx];
}
+ case _XMM3:
+ {
+ static const char tbl[][6] = {
+ "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23"
+ };
+ return tbl[idx];
+ }
case _YMM2:
{
static const char tbl[][6] = {
@@ -356,11 +367,15 @@
case K2:
return isXbyak_ ? "k3 | k5" : "k3{k5}";
#ifdef XBYAK64
+ case XMM_SAE:
+ return isXbyak_ ? "xmm25 | T_sae" : "xmm25, {sae}";
case ZMM_SAE:
return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
case ZMM_ER:
return isXbyak_ ? "zmm20 | T_rd_sae" : "zmm20, {rd-sae}";
#else
+ case XMM_SAE:
+ return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
case ZMM_SAE:
return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
case ZMM_ER:
@@ -2498,23 +2513,31 @@
}
void putCmpK()
{
- const struct Tbl {
- const char *name;
- bool supportYMM;
- } tbl[] = {
- { "vcmppd", true },
- { "vcmpps", true },
- { "vcmpsd", false },
- { "vcmpss", false },
- };
- for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
- const Tbl *p = &tbl[i];
- put(p->name, K, XMM, XMM | MEM, IMM);
- if (!p->supportYMM) continue;
- put(p->name, K, YMM, YMM | MEM, IMM);
- put(p->name, K, ZMM, ZMM | MEM, IMM);
+ {
+ const struct Tbl {
+ const char *name;
+ bool supportYMM;
+ } tbl[] = {
+ { "vcmppd", true },
+ { "vcmpps", true },
+ { "vcmpsd", false },
+ { "vcmpss", false },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ put(p->name, K, XMM, XMM | MEM, IMM);
+ if (!p->supportYMM) continue;
+ put(p->name, K, YMM, YMM | MEM, IMM);
+ put(p->name, K, ZMM, ZMM | MEM, IMM);
+ }
}
put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
+#ifdef XBYAK64
+ put("vcomisd", _XMM3, XMM | MEM);
+ put("vcomisd", XMM, XMM_SAE);
+ put("vcomiss", _XMM3, XMM | MEM);
+ put("vcomiss", XMM, XMM_SAE);
+#endif
}
void putAVX512()
{
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 513520c..535c1ae 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1368,9 +1368,13 @@
T_EW1 = 1 << 14,
T_YMM = 1 << 15,
T_EVEX = 1 << 16,
- T_ER = 1 << 17,
- T_SAE = 1 << 18,
- T_MUST_EVEX = 1 << 19
+ T_ER_X = 1 << 17, // xmm{er}
+ T_ER_Y = 1 << 18, // ymm{er}
+ T_ER_Z = 1 << 19, // zmm{er}
+ T_SAE_X = 1 << 20, // xmm{sae}
+ T_SAE_Y = 1 << 21, // ymm{sae}
+ T_SAE_Z = 1 << 22, // zmm{sae}
+ T_MUST_EVEX = 1 << 23
};
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
{
@@ -1390,6 +1394,16 @@
db(code);
}
int Max(int a, int b) const { return a > b ? a : b; }
+ void verifySAE(const Reg& r, int type) const
+ {
+ if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
+ throw Error(ERR_SAE_IS_INVALID);
+ }
+ void verifyER(const Reg& r, int type) const
+ {
+ if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
+ throw Error(ERR_ER_IS_INVALID);
+ }
void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
{
if (!(type & T_EVEX)) throw Error(ERR_EVEX_IS_INVALID);
@@ -1409,8 +1423,8 @@
int LL;
int rounding = base.getRounding();
if (rounding) {
- if (!base.isZMM() || (rounding == inner::T_SAE && !(type & T_SAE))) throw Error(ERR_SAE_IS_INVALID);
- if (!base.isZMM() || (rounding != inner::T_SAE && !(type & T_ER))) throw Error(ERR_ER_IS_INVALID);
+ if (rounding == inner::T_SAE) verifySAE(base, type);
+ if (rounding != inner::T_SAE) verifyER(base, type);
LL = rounding - 1;
b = true;
} else {
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index 9f31568..b0b06e7 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -64,8 +64,8 @@
void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }
void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }
void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }
-void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }
-void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }
-void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_F2 | T_EW1 | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }
-void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_F3 | T_EW0 | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }
+void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
+void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
+void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_F2 | T_EW1 | T_EVEX | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
+void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_F3 | T_EW0 | T_EVEX | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
#endif
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 4e9ebba..af06495 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -704,38 +704,38 @@
void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); }
void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); }
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
-void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x58); }
-void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x58); }
-void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER, 0x58); }
-void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER, 0x58); }
-void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x5C); }
-void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x5C); }
-void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER, 0x5C); }
-void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER, 0x5C); }
-void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x59); }
-void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x59); }
-void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER, 0x59); }
-void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER, 0x59); }
-void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x5E); }
-void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x5E); }
-void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER, 0x5E); }
-void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER, 0x5E); }
-void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x5F); }
-void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x5F); }
-void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER, 0x5F); }
-void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER, 0x5F); }
-void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x5D); }
-void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x5D); }
-void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER, 0x5D); }
-void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER, 0x5D); }
-void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x54); }
-void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x54); }
-void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x55); }
-void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x55); }
-void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x56); }
-void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x56); }
-void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER, 0x57); }
-void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER, 0x57); }
+void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x58); }
+void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x58); }
+void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x58); }
+void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x58); }
+void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5C); }
+void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5C); }
+void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5C); }
+void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5C); }
+void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x59); }
+void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x59); }
+void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x59); }
+void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x59); }
+void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5E); }
+void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5E); }
+void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5E); }
+void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5E); }
+void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5F); }
+void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5F); }
+void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5F); }
+void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5F); }
+void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5D); }
+void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5D); }
+void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5D); }
+void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5D); }
+void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x54); }
+void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x54); }
+void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x55); }
+void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x55); }
+void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x56); }
+void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x56); }
+void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x57); }
+void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x57); }
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x0D, imm); }
void vblendpd(const Xmm& x, const Operand& op, uint8 imm) { vblendpd(x, x, op, imm); }
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x0C, imm); }
@@ -973,8 +973,8 @@
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_0F3A | T_66, 0x62, imm); }
void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x0E); }
void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x0F); }
-void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66, 0x2F); }
-void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F, 0x2F); }
+void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_EW1 | T_EVEX | T_SAE_X, 0x2F); }
+void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x5B); }
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_YMM, 0x5B); }
void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x5B); }