add vpmov*
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index d724edb..6625e4e 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -461,6 +461,43 @@
puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }");
}
+void putMov()
+{
+ puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }");
+ puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }");
+ puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }");
+ puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }");
+
+ puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }");
+ puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }");
+ puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }");
+ puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }");
+
+ puts("void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x32, false); }");
+ puts("void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x22, false); }");
+ puts("void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x12, false); }");
+
+ puts("void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x34, false); }");
+ puts("void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x24, false); }");
+ puts("void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x14, false); }");
+
+ puts("void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x35, true); }");
+ puts("void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x25, true); }");
+ puts("void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x15, true); }");
+
+ puts("void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x31, false); }");
+ puts("void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x21, false); }");
+ puts("void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x11, false); }");
+
+ puts("void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x33, true); }");
+ puts("void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x23, true); }");
+ puts("void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x13, true); }");
+
+ puts("void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x30, true); }");
+ puts("void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x20, true); }");
+ puts("void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x10, true); }");
+}
+
int main()
{
puts("#ifndef XBYAK_DISABLE_AVX512");
@@ -478,5 +515,6 @@
putCvt();
putGather();
putShuff();
+ putMov();
puts("#endif");
}
diff --git a/test/make_512.cpp b/test/make_512.cpp
index 9a40304..55f2f57 100644
--- a/test/make_512.cpp
+++ b/test/make_512.cpp
@@ -1752,6 +1752,57 @@
put("vpternlogq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
put("vpternlogq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
}
+ void putMov()
+ {
+ put("vpmovm2b", _XMM | _YMM | _ZMM, K);
+ put("vpmovm2w", _XMM | _YMM | _ZMM, K);
+ put("vpmovm2d", _XMM | _YMM | _ZMM, K);
+ put("vpmovm2q", _XMM | _YMM | _ZMM, K);
+
+ put("vpmovb2m", K, _XMM | _YMM | _ZMM);
+ put("vpmovw2m", K, _XMM | _YMM | _ZMM);
+ put("vpmovd2m", K, _XMM | _YMM | _ZMM);
+ put("vpmovq2m", K, _XMM | _YMM | _ZMM);
+
+ put("vpmovqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+ put("vpmovsqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+ put("vpmovusqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+
+ put("vpmovqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+ put("vpmovsqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+ put("vpmovusqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+
+ put("vpmovqd", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovqd", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovsqd", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovsqd", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovusqd", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovusqd", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+ put("vpmovsdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+ put("vpmovusdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
+
+ put("vpmovdw", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovdw", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovsdw", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovsdw", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovusdw", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovusdw", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovwb", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovwb", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovswb", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovswb", YMM_KZ | _MEM, _ZMM);
+
+ put("vpmovuswb", XMM_KZ | _MEM, _XMM | _YMM);
+ put("vpmovuswb", YMM_KZ | _MEM, _ZMM);
+ }
void putMin()
{
#ifdef XBYAK64
@@ -1807,6 +1858,8 @@
putShuff();
separateFunc();
putMisc2();
+ separateFunc();
+ putMov();
#endif
}
};
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 156000f..3e9ae03 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1936,6 +1936,19 @@
addr.permitVsib();
opVex(x, 0, addr, type, code);
}
+ /*
+ xx_xy_yz ; mode = true
+ xx_xy_xz ; mode = false
+ */
+ void opVmov(const Operand& op, const Xmm& x, int type, uint8 code, int mode)
+ {
+ if (mode) {
+ if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) throw Error(ERR_BAD_COMBINATION);
+ } else {
+ if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION);
+ }
+ opVex(x, 0, op, type, code);
+ }
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index e22bd05..88a6959 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -228,4 +228,30 @@
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
+void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
+void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
+void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
+void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
+void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
+void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
+void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
+void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
+void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x32, false); }
+void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x22, false); }
+void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x12, false); }
+void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x34, false); }
+void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x24, false); }
+void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x14, false); }
+void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x35, true); }
+void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x25, true); }
+void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x15, true); }
+void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x31, false); }
+void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x21, false); }
+void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x11, false); }
+void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x33, true); }
+void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x23, true); }
+void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x13, true); }
+void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x30, true); }
+void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x20, true); }
+void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x10, true); }
#endif