paddpd zmm2, zmm3, ptr [rax]
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index bc661cb..0c0a9b3 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -78,8 +78,13 @@
const uint64 XMM = _XMM | _XMM2;
const uint64 YMM = _YMM | _YMM2;
const uint64 K = 1ULL << 43;
-const uint64 _ZMM = 1ULL << 44; // max value
+const uint64 _ZMM = 1ULL << 44;
+const uint64 _ZMM2 = 1ULL << 45; // max value
+#ifdef XBYAK64
+const uint64 ZMM = _ZMM | _ZMM2;
+#else
const uint64 ZMM = _ZMM;
+#endif
const uint64 NOPARA = 1ULL << (bitEnd - 1);
@@ -190,6 +195,13 @@
};
return tbl[idx];
}
+ case _ZMM2:
+ {
+ static const char tbl[][6] = {
+ "zmm8", "zmm9", "zmm10", "zmm11", "zmm28", "zmm29", "zmm30", "zmm31",
+ };
+ return tbl[idx];
+ }
#endif
case _MEM:
return isXbyak_ ? "ptr[eax+ecx+3]" : "[eax+ecx+3]";
@@ -1373,7 +1385,7 @@
put(p, YMM, YMM | MEM);
put(p, YMM, YMM, YMM | MEM);
if (!tbl[i].supportZMM) continue;
- put(p, ZMM, ZMM, ZMM); // QQQ
+ put(p, ZMM, ZMM, ZMM | MEM);
}
}
}
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index b996d49..5f9d6ce 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1330,7 +1330,7 @@
T_EW1 = 1 << 14,
T_SUPPORT_YMM = 1 << 15
};
- void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x)
+ void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
{
int w = (type & T_W1) ? 1 : 0;
bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
@@ -1347,12 +1347,30 @@
}
db(code);
}
- void evex(bool R, bool X, bool B, bool Rp, int mm, bool W, int vvvv, int pp, bool z, int LL, bool b, bool Vp, int aaa)
+ void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code)
{
+ int w = (type & T_EW1) ? 1 : 0;
+ // bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
+ uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
+ uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
+
+ int idx = v ? v->getIdx() : 0;
+ uint32 vvvv = ~idx;
+
+ bool R = !reg.isExtIdx();
+ bool X = !base.isExtIdx2();
+ bool B = !base.isExtIdx();
+ bool Rp = !reg.isExtIdx2();
+ int LL = reg.isZMM() ? 2 : reg.isYMM() ? 1 : 0;
+ bool b = false;
+ bool Vp = !(v ? v->isExtIdx2() : 0);
+ bool z = reg.hasZero();
+ int aaa = reg.getOpmaskIdx();
db(0x62);
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
- db((W ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
+ db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
+ db(code);
}
void setModRM(int mod, int r1, int r2)
{
@@ -1659,50 +1677,26 @@
{
if (op2.isMEM()) {
const Address& addr = static_cast<const Address&>(op2);
+ const Reg& base = addr.getRegExp().getBase();
if (BIT == 64 && addr.is32bit()) db(0x67);
- bool x = addr.getRegExp().getIndex().isExtIdx();
- vex(r, addr.getRegExp().getBase(), p1, type, code, x);
+ if (r.hasEvex() || (p1 && p1->hasEvex()) /*|| base.hasEvex()*/) {
+ evex(r, base, p1, type, code);
+ } else {
+ bool x = addr.getRegExp().getIndex().isExtIdx();
+ vex(r, base, p1, type, code, x);
+ }
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0);
} else {
- const Reg& r3 = static_cast<const Reg&>(op2);
- if (r.hasEvex() || (p1 && p1->hasEvex()) || r3.hasEvex()) {
- assert(p1); // QQQ
- opEvex(r, static_cast<const Reg&>(*p1), r3, type, code);
+ const Reg& base = static_cast<const Reg&>(op2);
+ if (r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
+ evex(r, base, p1, type, code);
} else {
- bool x = false;
- vex(r, r3, p1, type, code, x);
- setModRM(3, r.getIdx(), r3.getIdx());
+ vex(r, base, p1, type, code);
}
+ setModRM(3, r.getIdx(), base.getIdx());
}
if (imm8 != NONE) db(imm8);
}
- void opEvex(const Reg& x1, const Reg& x2, const Reg& x3, int type, int code)
- {
- int w = (type & T_EW1) ? 1 : 0;
- // bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : x1.isYMM();
- uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
- uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
-
- int idx = x2.getIdx();
- uint32 vvvv = ~idx;
-
- bool R = !x1.isExtIdx();
- bool X = !x3.isExtIdx2();
- bool B = !x3.isExtIdx();
- bool Rp = !x1.isExtIdx2();
- int LL = x1.isZMM() ? 2 : x1.isYMM() ? 1 : 0;
- bool b = false;
- bool Vp = !x2.isExtIdx2();
- bool z = x1.hasZero();
- int aaa = x1.getOpmaskIdx();
- evex(R, X, B, Rp, mm, w == 1, vvvv, pp, z, LL, b, Vp, aaa);
- db(code);
- setModRM(3, x1.getIdx(), x3.getIdx());
-
- // opVex(x1, &x2, &x3, T_0F | T_66, 0x58, NONE);
- // opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66, 0x58, true);
- }
-public:
// (r, r, r/m) if isR_R_RM
// (r, r/m, r)
void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM, int imm8 = NONE)