vcmp{pd,ps,sd,ss}
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp
index 1fd253f..e7b2883 100644
--- a/gen/avx_type.hpp
+++ b/gen/avx_type.hpp
@@ -14,7 +14,10 @@
T_EW0 = 1 << 13,
T_EW1 = 1 << 14,
T_YMM = 1 << 15,
- T_EVEX = 1 << 16
+ T_EVEX = 1 << 16,
+ T_ER = 1 << 17,
+ T_SAE = 1 << 18,
+ T_MUST_EVEX = 1 << 19
};
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
@@ -78,5 +81,17 @@
if (!str.empty()) str += " | ";
str += "T_EVEX";
}
+ if (type & T_ER) {
+ if (!str.empty()) str += " | ";
+ str += "T_ER";
+ }
+ if (type & T_SAE) {
+ if (!str.empty()) str += " | ";
+ str += "T_SAE";
+ }
+ if (type & T_MUST_EVEX) {
+ if (!str.empty()) str += " | ";
+ str += "T_MUST_EVEX";
+ }
return str;
}
\ No newline at end of file
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 4b19075..7f86fa9 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1664,6 +1664,27 @@
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode);
}
}
+ // AVX-512
+ // vcmppd(k, x, op)
+ {
+ const struct Tbl {
+ uint8 code;
+ const char *name;
+ int type;
+ bool hasIMM;
+ } tbl[] = {
+ { 0xC2, "cmppd", T_0F | T_EVEX | T_MUST_EVEX | T_EW1 | T_SAE | T_YMM | T_66, true },
+ { 0xC2, "cmpps", T_0F | T_EVEX | T_MUST_EVEX | T_EW0 | T_SAE | T_YMM, true },
+ { 0xC2, "cmpsd", T_0F | T_EVEX | T_MUST_EVEX | T_EW1 | T_SAE | T_F2, true },
+ { 0xC2, "cmpss", T_0F | T_EVEX | T_MUST_EVEX | T_EW0 | T_SAE | T_F3, true },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ std::string type = type2String(p->type);
+ printf("void v%s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
+ , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
+ }
+ }
}
int main()
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index fdfd2c9..acc4b15 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -82,11 +82,14 @@
const uint64 _ZMM2 = 1ULL << 45;
#ifdef XBYAK64
const uint64 ZMM = _ZMM | _ZMM2;
-const uint64 _YMM3 = 1ULL << 46; // max value
+const uint64 _YMM3 = 1ULL << 46;
#else
const uint64 ZMM = _ZMM;
const uint64 _YMM3 = 0;
#endif
+const uint64 K2 = 1ULL << 47;
+const uint64 ZMM_SAE = 1ULL << 48;
+const uint64 ZMM_ER = 1ULL << 49; // max value
const uint64 NOPARA = 1ULL << (bitEnd - 1);
@@ -350,6 +353,19 @@
};
return kTbl[idx % 7];
}
+ case K2:
+ return isXbyak_ ? "k3 | k5" : "k3{k5}";
+#ifdef XBYAK64
+ case ZMM_SAE:
+ return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
+ case ZMM_ER:
+ return isXbyak_ ? "zmm20 | T_rd_sae" : "zmm20, {rd-sae}";
+#else
+ case ZMM_SAE:
+ return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
+ case ZMM_ER:
+ return isXbyak_ ? "zmm2 | T_rd_sae" : "zmm2, {rd-sae}";
+#endif
}
return 0;
}
@@ -2480,10 +2496,31 @@
}
}
}
+ void putCmpK()
+ {
+ const struct Tbl {
+ const char *name;
+ bool supportYMM;
+ } tbl[] = {
+ { "vcmppd", true },
+ { "vcmpps", true },
+ { "vcmpsd", false },
+ { "vcmpss", false },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ put(p->name, K, XMM, XMM | MEM, IMM);
+ if (!p->supportYMM) continue;
+ put(p->name, K, YMM, YMM | MEM, IMM);
+ put(p->name, K, ZMM, ZMM | MEM, IMM);
+ }
+ put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
+ }
void putAVX512()
{
putOpmask();
putCombi();
+ putCmpK();
}
#endif
};
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 324aa62..513520c 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -339,7 +339,7 @@
static const uint8 EXT8BIT = 0x80;
unsigned int idx_:8; // 0..31, EXT8BIT = 1 if spl/bpl/sil/dil
unsigned int kind_:8;
- unsigned int bit_:9;
+ unsigned int bit_:10;
protected:
unsigned int zero_:1;
unsigned int mask_:3;
@@ -372,7 +372,7 @@
Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
: idx_(static_cast<uint8>(idx | (ext8bit ? EXT8BIT : 0)))
, kind_(static_cast<uint8>(kind))
- , bit_(static_cast<uint16>(bit))
+ , bit_(bit)
, zero_(0), mask_(0), rounding_(0)
{
assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
@@ -516,10 +516,6 @@
explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { }
};
-struct Opmask : public Reg {
- explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
-};
-
struct EvexModifierRounding {
explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
int rounding;
@@ -559,10 +555,14 @@
Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
};
+struct Opmask : public Reg {
+ explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
+};
+
template<class T>
T operator|(const T& x, const Opmask& k)
{
- if (!x.is(Operand::XMM | Operand::YMM | Operand::ZMM)) throw Error(ERR_BAD_COMBINATION);
+ if (!x.is(Operand::XMM | Operand::YMM | Operand::ZMM | Operand::OPMASK)) throw Error(ERR_BAD_COMBINATION);
T r(x);
r.setOpmaskIdx(k.getIdx());
return r;
@@ -1369,7 +1369,8 @@
T_YMM = 1 << 15,
T_EVEX = 1 << 16,
T_ER = 1 << 17,
- T_SAE = 1 << 18
+ T_SAE = 1 << 18,
+ T_MUST_EVEX = 1 << 19
};
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
{
@@ -1388,6 +1389,7 @@
}
db(code);
}
+ int Max(int a, int b) const { return a > b ? a : b; }
void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
{
if (!(type & T_EVEX)) throw Error(ERR_EVEX_IS_INVALID);
@@ -1404,17 +1406,16 @@
bool B = !base.isExtIdx();
bool Rp = !reg.isExtIdx2();
bool b = false;
- int LL = 2;
- if (reg.isZMM()) {
- int rounding = base.getRounding();
- if (rounding) {
- if (rounding == inner::T_SAE && !(type & T_SAE)) throw Error(ERR_SAE_IS_INVALID);
- if (rounding != inner::T_SAE && !(type & T_ER)) throw Error(ERR_ER_IS_INVALID);
- LL = rounding - 1;
- b = true;
- }
+ int LL;
+ int rounding = base.getRounding();
+ if (rounding) {
+ if (!base.isZMM() || (rounding == inner::T_SAE && !(type & T_SAE))) throw Error(ERR_SAE_IS_INVALID);
+ if (!base.isZMM() || (rounding != inner::T_SAE && !(type & T_ER))) throw Error(ERR_ER_IS_INVALID);
+ LL = rounding - 1;
+ b = true;
} else {
- LL = reg.isYMM() ? 1 : 0;
+ int bit = Max(Max(reg.getBit(), base.getBit()), (v ? v->getBit() : 0));
+ LL = (bit == 512) ? 2 : (bit == 256) ? 1 : 0;
}
bool Vp = !(v ? v->isExtIdx2() : 0);
bool z = reg.hasZero();
@@ -1734,7 +1735,7 @@
if (BIT == 64 && addr.is32bit()) db(0x67);
bool disp32 = false;
bool x = addr.getRegExp().getIndex().isExtIdx();
- if (r.hasEvex() || (p1 && p1->hasEvex()) /*|| base.hasEvex()*/) {
+ if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex())) {
evex(r, base, p1, type, code, x);
disp32 = true;
} else {
@@ -1743,7 +1744,7 @@
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp32);
} else {
const Reg& base = static_cast<const Reg&>(op2);
- if (r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
+ if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
evex(r, base, p1, type, code);
} else {
vex(r, base, p1, type, code);
@@ -1780,6 +1781,11 @@
if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) throw Error(ERR_BAD_COMBINATION);
opVex(x1, x2, *op, type, code0, imm8);
}
+ void opAVX_K_X_XM(const Opmask& k1, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
+ {
+ if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw Error(ERR_BAD_COMBINATION);
+ opVex(k1, &x2, op3, type, code0, imm8);
+ }
// if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op
void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, int imm8 = NONE)
{
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 4e9ebba..123071a 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1519,3 +1519,7 @@
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x91, 2); }
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x90, 0); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x91, 1); }
+void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }
+void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }
+void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_F2 | T_EW1 | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }
+void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_F3 | T_EW0 | T_EVEX | T_SAE | T_MUST_EVEX, 0xC2, imm); }