add vmovsd, vmovss
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp
index 745a0f8..b205c90 100644
--- a/gen/avx_type.hpp
+++ b/gen/avx_type.hpp
@@ -24,6 +24,7 @@
T_MUST_EVEX = 1 << 23,
T_B32 = 1 << 24, // m32bcst
T_B64 = 1 << 25, // m64bcst
+ T_M_K = 1 << 26, // mem{k}
T_XXX
};
@@ -124,5 +125,9 @@
if (!str.empty()) str += " | ";
str += "T_B64";
}
+ if (type & T_M_K) {
+ if (!str.empty()) str += " | ";
+ str += "T_M_K";
+ }
return str;
}
\ No newline at end of file
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 46e4ed5..6370009 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1541,10 +1541,12 @@
// vmovsd, vmovss
for (int i = 0; i < 2; i++) {
char c1 = i == 0 ? 'd' : 's';
- char c2 = i == 0 ? '2' : '3';
- printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F%c, 0x10); }\n", c1, c2);
- printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F%c, 0x10); }\n", c1, c2);
- printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F%c, 0x11); }\n", c1, c2);
+ int type = T_0F | T_EVEX;
+ type |= i == 0 ? T_F2 | T_EW1 : T_F3 | T_EW0;
+ std::string s = type2String(type);
+ printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str());
+ printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str());
+ printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str());
}
}
// cvt
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index a65030e..7babd15 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -97,12 +97,13 @@
#ifdef XBYAK64
const uint64 XMM_KZ = 1ULL << 52;
const uint64 YMM_KZ = 1ULL << 53;
-const uint64 ZMM_KZ = 1ULL << 54; // max value
+const uint64 ZMM_KZ = 1ULL << 54;
#else
const uint64 XMM_KZ = 0;
const uint64 YMM_KZ = 0;
const uint64 ZMM_KZ = 0;
#endif
+const uint64 MEM_K = 1ULL << 55; // max value
const uint64 NOPARA = 1ULL << (bitEnd - 1);
@@ -388,6 +389,8 @@
return isXbyak_ ? "ymm2 |k3|T_z" : "ymm2{k3}{z}";
case ZMM_KZ:
return isXbyak_ ? "zmm7|k1" : "zmm7{k1}";
+ case MEM_K:
+ return isXbyak_ ? "ptr [rax] | k1" : "[rax]{k1}";
#else
case XMM_SAE:
return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
@@ -395,6 +398,8 @@
return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
case ZMM_ER:
return isXbyak_ ? "zmm2 | T_rd_sae" : "zmm2, {rd-sae}";
+ case MEM_K:
+ return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}";
#endif
}
return 0;
@@ -2616,6 +2621,13 @@
put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
+
+ put("vmovsd", XMM_KZ, _XMM3, _XMM3);
+ put("vmovsd", XMM_KZ, MEM);
+ put("vmovsd", MEM_K, XMM);
+ put("vmovss", XMM_KZ, _XMM3, _XMM3);
+ put("vmovss", XMM_KZ, MEM);
+ put("vmovss", MEM_K, XMM);
{
const char tbl[][16] = {
"vmovhpd",
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 93173ab..4e545c5 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -172,6 +172,7 @@
ERR_SAE_IS_INVALID,
ERR_ER_IS_INVALID,
ERR_INVALID_BROADCAST,
+ ERR_INVALID_OPMASK_WITH_MEMORY,
ERR_INTERNAL
};
@@ -229,6 +230,7 @@
"sae(suppress all exceptions) is invalid",
"er(embedded rounding) is invalid",
"invalid broadcast",
+ "invalid opmask with memory",
"internal error",
};
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
@@ -546,7 +548,7 @@
template<class T>
T operator|(const T& x, const Opmask& k)
{
- if (!x.is(Operand::XMM | Operand::YMM | Operand::ZMM | Operand::OPMASK)) throw Error(ERR_BAD_COMBINATION);
+ if (!x.is(Operand::XMM | Operand::YMM | Operand::ZMM | Operand::OPMASK | Operand::MEM)) throw Error(ERR_BAD_COMBINATION);
T r(x);
r.setOpmaskIdx(k.getIdx());
return r;
@@ -955,7 +957,7 @@
}
#ifdef XBYAK64
explicit Address(size_t disp)
- : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false) { }
+ : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ }
Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(M_rip), permitVsib_(false), broadcast_(broadcast) { }
#endif
@@ -1364,6 +1366,7 @@
T_MUST_EVEX = 1 << 23,
T_B32 = 1 << 24, // m32bcst
T_B64 = 1 << 25, // m64bcst
+ T_M_K = 1 << 26, // mem{k}
T_XXX
};
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
@@ -1401,7 +1404,7 @@
T_RZ_SAE = 4,
T_SAE = 5,
};
- void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false)
+ void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0)
{
if (!(type & T_EVEX)) throw Error(ERR_EVEX_IS_INVALID);
int w = (type & T_EW1) ? 1 : 0;
@@ -1431,7 +1434,7 @@
}
bool Vp = !(v ? v->isExtIdx2() : 0);
bool z = reg.hasZero();
- int aaa = reg.getOpmaskIdx();
+ if (aaa == 0) aaa = reg.getOpmaskIdx();
db(0x62);
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
@@ -1756,7 +1759,9 @@
if (BIT == 64 && addr.is32bit()) db(0x67);
int disp8N = 0;
bool x = addr.getRegExp().getIndex().isExtIdx();
- if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast()) {
+ if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
+ int aaa = addr.getOpmaskIdx();
+ if (aaa & !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
bool b = false;
if (addr.isBroadcast()) {
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
@@ -1765,7 +1770,7 @@
} else {
disp8N = 1;
}
- evex(r, base, p1, type, code, x, b);
+ evex(r, base, p1, type, code, x, b, aaa);
} else {
vex(r, base, p1, type, code, x);
}
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 89d8715..51f04cf 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1467,12 +1467,12 @@
void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }
void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }
-void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F2, 0x10); }
-void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2, 0x10); }
-void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2, 0x11); }
-void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F3, 0x10); }
-void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3, 0x10); }
-void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3, 0x11); }
+void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F2 | T_EW1 | T_EVEX, 0x10); }
+void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2 | T_EW1 | T_EVEX, 0x10); }
+void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2 | T_EW1 | T_EVEX | T_M_K, 0x11); }
+void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F3 | T_EW0 | T_EVEX, 0x10); }
+void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3 | T_EW0 | T_EVEX, 0x10); }
+void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3 | T_EW0 | T_EVEX | T_M_K, 0x11); }
void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0, 0x2D); }
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0, 0x2C); }
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0, 0x2D); }