add vcvtph2ps, vcvtps2ph
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index b7a34b4..7622626 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -139,6 +139,7 @@
 		{ 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z  },
 		{ 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z  },
 		{ 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
+		{ 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
 	};
 	for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 		const Tbl *p = &tbl[i];
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 9881eb3..a83f45a 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1584,17 +1584,18 @@
 		printf("void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F3; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }\n");
 		printf("void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F2; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }\n");
 
-		printf("void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }\n");
-		printf("void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!(op.isMEM() || (x.is(Operand::XMM | Operand::YMM) && op.isXMM()) || (x.isZMM() && op.isYMM()))) throw Error(ERR_BAD_COMBINATION);"
-			"opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }\n");
+		printf("void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }\n");
+		printf("void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }\n");
 
 		puts("void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }");
 		puts("void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
 
 		printf("void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, cvtIdx0(op), op, T_0F | T_66 | T_YMM, 0xE6); }\n");
 
-		printf("void vcvtph2ps(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F38 | T_66 | T_W0, 0x13); }\n");
-		printf("void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0, 0x1d, imm); }\n");
+		printf("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }\n");
+		printf("void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op);"
+			"int type = T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y;"
+			"if (op.isYMM()) { Xmm x1 = static_cast<const Xmm&>(op), x2 = x; x2.swapAttr(x1); opVex(x2, 0, x1, type, 0x1D, imm); } else { opVex(x, 0, op, type, 0x1D, imm); } }\n");
 	}
 	// x64
 	{
diff --git a/test/make_512.cpp b/test/make_512.cpp
index bba0afd..b5c90be 100644
--- a/test/make_512.cpp
+++ b/test/make_512.cpp
@@ -9,7 +9,7 @@
 
 const int bitEnd = 64;
 
-const uint64 MMX = 1ULL << 0;
+const uint64 YMM_SAE = 1ULL << 0;
 const uint64 _XMM = 1ULL << 1;
 const uint64 _MEM = 1ULL << 2;
 const uint64 _REG32 = 1ULL << 3;
@@ -178,13 +178,6 @@
 			return "st2";
 		}
 		switch (type) {
-		case MMX:
-			{
-				static const char MmxTbl[][4] = {
-					"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
-				};
-				return MmxTbl[idx];
-			}
 		case _XMM:
 			{
 				static const char tbl[][6] = {
@@ -393,6 +386,8 @@
 #ifdef XBYAK64
 		case XMM_SAE:
 			return isXbyak_ ? "xmm25 | T_sae" : "xmm25, {sae}";
+		case YMM_SAE:
+			return isXbyak_ ? "ymm25 | T_sae" : "ymm25, {sae}";
 		case ZMM_SAE:
 			return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
 		case XMM_ER:
@@ -410,6 +405,8 @@
 #else
 		case XMM_SAE:
 			return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
+		case YMM_SAE:
+			return isXbyak_ ? "ymm5 | T_sae" : "ymm5, {sae}";
 		case ZMM_SAE:
 			return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
 		case XMM_ER:
@@ -1427,22 +1424,31 @@
 		put("vcvtpd2udq", _XMM | _XMM3, _XMM | M_xword | M_1to2);
 		put("vcvtpd2udq", _XMM | _XMM3, _YMM | M_yword | MY_1to4);
 		put("vcvtpd2udq", YMM | YMM_KZ, ZMM | _MEM | M_1to8);
+
+		put("vcvtpd2uqq", XMM_KZ, _XMM | _MEM | M_1to2);
+		put("vcvtpd2uqq", YMM_KZ, _YMM | _MEM | M_1to4);
+		put("vcvtpd2uqq", ZMM_KZ, _ZMM | _MEM | M_1to8);
+
+		put("vcvtph2ps", XMM_KZ, _XMM | _MEM);
+		put("vcvtph2ps", YMM_KZ, _XMM | _MEM);
+		put("vcvtph2ps", ZMM_KZ, _YMM | _MEM | YMM_SAE);
+
+		put("vcvtps2ph", _XMM | _MEM, _XMM, IMM8);
+		put("vcvtps2ph", _XMM | _MEM, _YMM, IMM8);
+		put("vcvtps2ph", _YMM | YMM_KZ | _MEM, _ZMM, IMM8);
+		put("vcvtps2ph", _YMM | YMM_KZ, ZMM_SAE, IMM8);
 #endif
 	}
 	void putMin()
 	{
 #ifdef XBYAK64
-		put("vpcmpeqb", K, _XMM3|_YMM, _MEM);
-		put("vpcmpeqw", K, _XMM3|_YMM, _MEM);
-		put("vpcmpeqd", K, _XMM3|_YMM, _MEM);
-		put("vpcmpeqq", K, _XMM3|_YMM, _MEM);
+		put512_cvt();
 #endif
 	}
 	void putAVX512()
 	{
 #ifdef MIN_TEST
-//		putMin();
-		put512_cvt();
+		putMin();
 #else
 		putOpmask();
 		separateFunc();
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 99bf96c..dc55461 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -407,6 +407,14 @@
 		kind_ = kind;
 		bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
 	}
+	// swap zero_, mask_, rounding_
+	void swapAttr(Operand& rhs)
+	{
+		int t;
+		t = zero_; zero_ = rhs.zero_; rhs.zero_ = t;
+		t = mask_; mask_ = rhs.mask_; rhs.mask_ = t;
+		t = rounding_; rounding_ = rhs.rounding_; rhs.rounding_ = t;
+	}
 	void setOpmaskIdx(int idx, bool ignore_idx0 = false)
 	{
 		if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
@@ -1853,9 +1861,19 @@
 		// use static_cast to avoid calling unintentional copy constructor on gcc
 		opAVX_X_X_XM(x, op1, cvt ? kind == Operand::XMM ? static_cast<const Operand&>(Xmm(op2.getIdx())) : static_cast<const Operand&>(Ymm(op2.getIdx())) : op2, type, code0, imm8);
 	}
-	void opCvt2(const Xmm& x, const Operand& op, int type, int code)
+	// (x, x/m), (y, x/m256), (z, y/m)
+	void checkCvt1(const Operand& x, const Operand& op) const
+	{
+		if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) throw Error(ERR_BAD_COMBINATION);
+	}
+	// (x, x/m), (x, y/m256), (y, z/m)
+	void checkCvt2(const Xmm& x, const Operand& op) const
 	{
 		if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) throw Error(ERR_BAD_COMBINATION);
+	}
+	void opCvt2(const Xmm& x, const Operand& op, int type, int code)
+	{
+		checkCvt2(x, op);
 		Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
 		opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
 	}
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index c9ecc9b..3925525 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -83,6 +83,7 @@
 void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
 void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
 void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
+void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
 void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
 void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
 void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 108769c..6c1856c 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1480,13 +1480,13 @@
 void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0, 0x2C); }
 void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F3; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }
 void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); int type = T_0F | T_F2; if (!op1.isMEM() && !op2.isMEM()) type |= (op1.isREG(32) || op2.isREG(32)) ? T_W0 : T_W1; opAVX_X_X_XMcvt(x, false, op1, op2, op2.isREG(), Operand::XMM, type, 0x2A); }
-void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }
-void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!(op.isMEM() || (x.is(Operand::XMM | Operand::YMM) && op.isXMM()) || (x.isZMM() && op.isYMM()))) throw Error(ERR_BAD_COMBINATION);opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
+void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_YMM, 0x5A); }
+void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opAVX_X_X_XMcvt(x, false, cvtIdx0(x), op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
 void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
 void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
 void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, cvtIdx0(op), op, T_0F | T_66 | T_YMM, 0xE6); }
-void vcvtph2ps(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F38 | T_66 | T_W0, 0x13); }
-void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0, 0x1d, imm); }
+void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
+void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op);int type = T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y;if (op.isYMM()) { Xmm x1 = static_cast<const Xmm&>(op), x2 = x; x2.swapAttr(x1); opVex(x2, 0, x1, type, 0x1D, imm); } else { opVex(x, 0, op, type, 0x1D, imm); } }
 #ifdef XBYAK64
 void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
 void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }