add vcvtph2ps, vcvtps2ph
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index b7a34b4..7622626 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -139,6 +139,7 @@
{ 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
+ { 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 9881eb3..a83f45a 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1584,17 +1584,18 @@
printf("void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F3; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }\n");
printf("void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F2; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }\n");
- printf("void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }\n");
- printf("void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!(op.isMEM() || (x.is(Operand::XMM | Operand::YMM) && op.isXMM()) || (x.isZMM() && op.isYMM()))) throw Error(ERR_BAD_COMBINATION);"
- "opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }\n");
+ printf("void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }\n");
+ printf("void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }\n");
puts("void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }");
puts("void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
printf("void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, cvtIdx0(op), op, T_0F | T_66 | T_YMM, 0xE6); }\n");
- printf("void vcvtph2ps(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F38 | T_66 | T_W0, 0x13); }\n");
- printf("void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0, 0x1d, imm); }\n");
+ printf("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }\n");
+ printf("void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op);"
+ "int type = T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y;"
+ "if (op.isYMM()) { Xmm x1 = static_cast<const Xmm&>(op), x2 = x; x2.swapAttr(x1); opVex(x2, 0, x1, type, 0x1D, imm); } else { opVex(x, 0, op, type, 0x1D, imm); } }\n");
}
// x64
{
diff --git a/test/make_512.cpp b/test/make_512.cpp
index bba0afd..b5c90be 100644
--- a/test/make_512.cpp
+++ b/test/make_512.cpp
@@ -9,7 +9,7 @@
const int bitEnd = 64;
-const uint64 MMX = 1ULL << 0;
+const uint64 YMM_SAE = 1ULL << 0;
const uint64 _XMM = 1ULL << 1;
const uint64 _MEM = 1ULL << 2;
const uint64 _REG32 = 1ULL << 3;
@@ -178,13 +178,6 @@
return "st2";
}
switch (type) {
- case MMX:
- {
- static const char MmxTbl[][4] = {
- "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
- };
- return MmxTbl[idx];
- }
case _XMM:
{
static const char tbl[][6] = {
@@ -393,6 +386,8 @@
#ifdef XBYAK64
case XMM_SAE:
return isXbyak_ ? "xmm25 | T_sae" : "xmm25, {sae}";
+ case YMM_SAE:
+ return isXbyak_ ? "ymm25 | T_sae" : "ymm25, {sae}";
case ZMM_SAE:
return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
case XMM_ER:
@@ -410,6 +405,8 @@
#else
case XMM_SAE:
return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
+ case YMM_SAE:
+ return isXbyak_ ? "ymm5 | T_sae" : "ymm5, {sae}";
case ZMM_SAE:
return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
case XMM_ER:
@@ -1427,22 +1424,31 @@
put("vcvtpd2udq", _XMM | _XMM3, _XMM | M_xword | M_1to2);
put("vcvtpd2udq", _XMM | _XMM3, _YMM | M_yword | MY_1to4);
put("vcvtpd2udq", YMM | YMM_KZ, ZMM | _MEM | M_1to8);
+
+ put("vcvtpd2uqq", XMM_KZ, _XMM | _MEM | M_1to2);
+ put("vcvtpd2uqq", YMM_KZ, _YMM | _MEM | M_1to4);
+ put("vcvtpd2uqq", ZMM_KZ, _ZMM | _MEM | M_1to8);
+
+ put("vcvtph2ps", XMM_KZ, _XMM | _MEM);
+ put("vcvtph2ps", YMM_KZ, _XMM | _MEM);
+ put("vcvtph2ps", ZMM_KZ, _YMM | _MEM | YMM_SAE);
+
+ put("vcvtps2ph", _XMM | _MEM, _XMM, IMM8);
+ put("vcvtps2ph", _XMM | _MEM, _YMM, IMM8);
+ put("vcvtps2ph", _YMM | YMM_KZ | _MEM, _ZMM, IMM8);
+ put("vcvtps2ph", _YMM | YMM_KZ, ZMM_SAE, IMM8);
#endif
}
void putMin()
{
#ifdef XBYAK64
- put("vpcmpeqb", K, _XMM3|_YMM, _MEM);
- put("vpcmpeqw", K, _XMM3|_YMM, _MEM);
- put("vpcmpeqd", K, _XMM3|_YMM, _MEM);
- put("vpcmpeqq", K, _XMM3|_YMM, _MEM);
+ put512_cvt();
#endif
}
void putAVX512()
{
#ifdef MIN_TEST
-// putMin();
- put512_cvt();
+ putMin();
#else
putOpmask();
separateFunc();
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 99bf96c..dc55461 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -407,6 +407,14 @@
kind_ = kind;
bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
}
+ // swap zero_, mask_, rounding_
+ void swapAttr(Operand& rhs)
+ {
+ int t;
+ t = zero_; zero_ = rhs.zero_; rhs.zero_ = t;
+ t = mask_; mask_ = rhs.mask_; rhs.mask_ = t;
+ t = rounding_; rounding_ = rhs.rounding_; rhs.rounding_ = t;
+ }
void setOpmaskIdx(int idx, bool ignore_idx0 = false)
{
if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
@@ -1853,9 +1861,19 @@
// use static_cast to avoid calling unintentional copy constructor on gcc
opAVX_X_X_XM(x, op1, cvt ? kind == Operand::XMM ? static_cast<const Operand&>(Xmm(op2.getIdx())) : static_cast<const Operand&>(Ymm(op2.getIdx())) : op2, type, code0, imm8);
}
- void opCvt2(const Xmm& x, const Operand& op, int type, int code)
+ // (x, x/m), (y, x/m256), (z, y/m)
+ void checkCvt1(const Operand& x, const Operand& op) const
+ {
+ if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) throw Error(ERR_BAD_COMBINATION);
+ }
+ // (x, x/m), (x, y/m256), (y, z/m)
+ void checkCvt2(const Xmm& x, const Operand& op) const
{
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) throw Error(ERR_BAD_COMBINATION);
+ }
+ void opCvt2(const Xmm& x, const Operand& op, int type, int code)
+ {
+ checkCvt2(x, op);
Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
}
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index c9ecc9b..3925525 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -83,6 +83,7 @@
void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
+void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 108769c..6c1856c 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1480,13 +1480,13 @@
void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0, 0x2C); }
void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F3; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }
void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F2; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }
-void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }
-void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!(op.isMEM() || (x.is(Operand::XMM | Operand::YMM) && op.isXMM()) || (x.isZMM() && op.isYMM()))) throw Error(ERR_BAD_COMBINATION);opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
+void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }
+void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, cvtIdx0(op), op, T_0F | T_66 | T_YMM, 0xE6); }
-void vcvtph2ps(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F38 | T_66 | T_W0, 0x13); }
-void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0, 0x1d, imm); }
+void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
+void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op);int type = T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y;if (op.isYMM()) { Xmm x1 = static_cast<const Xmm&>(op), x2 = x; x2.swapAttr(x1); opVex(x2, 0, x1, type, 0x1D, imm); } else { opVex(x, 0, op, type, 0x1D, imm); } }
#ifdef XBYAK64
void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }