add vunpckhpd, vunpckhps, vunpcklpd, vunpcklps
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 1db9164..88fa9f1 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1165,11 +1165,11 @@
 			{ 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X, false, true },
 			{ 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X, false, true },
 
-			{ 0x15, "unpckhpd", T_0F | T_66 | T_YMM, false, true },
-			{ 0x15, "unpckhps", T_0F | T_YMM, false, true },
+			{ 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true },
+			{ 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
 
-			{ 0x14, "unpcklpd", T_0F | T_66 | T_YMM, false, true },
-			{ 0x14, "unpcklps", T_0F | T_YMM, false, true },
+			{ 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true },
+			{ 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index c89e568..34a24e1 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2553,10 +2553,10 @@
 			};
 			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 				const Tbl *p = &tbl[i];
-				put(p->name, K, XMM, XMM | MEM, IMM);
+				put(p->name, K, _XMM, _XMM | MEM, IMM);
 				if (!p->supportYMM) continue;
-				put(p->name, K, YMM, YMM | MEM, IMM);
-				put(p->name, K, ZMM, ZMM | MEM, IMM);
+				put(p->name, K, _YMM, _YMM | MEM, IMM);
+				put(p->name, K, _ZMM, _ZMM | MEM, IMM);
 			}
 		}
 		put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
@@ -2683,13 +2683,13 @@
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl& p = tbl[i];
-			put(p.name, XMM|XMM_KZ, XMM|MEM);
-			put(p.name, YMM|YMM_KZ, YMM|MEM);
-			put(p.name, ZMM|ZMM_KZ, ZMM|MEM);
+			put(p.name, _XMM|XMM_KZ, _XMM|MEM);
+			put(p.name, _YMM|YMM_KZ, _YMM|MEM);
+			put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
 			if (!p.M_X) continue;
-			put(p.name, MEM, XMM);
-			put(p.name, MEM, YMM);
-			put(p.name, MEM, ZMM);
+			put(p.name, MEM, _XMM);
+			put(p.name, MEM, _YMM);
+			put(p.name, MEM, _ZMM);
 		}
 		put("vsqrtpd", XMM_KZ, M_1to2);
 		put("vsqrtpd", YMM_KZ, M_1to4);
@@ -2705,13 +2705,18 @@
 	{
 		const struct Tbl {
 			const char *name;
+			uint64_t mem;
 		} tbl[] = {
-			{ "vsqrtsd" },
-			{ "vsqrtss" },
+			{ "vsqrtsd", MEM },
+			{ "vsqrtss", MEM },
+			{ "vunpckhpd", M_1to2 },
+			{ "vunpckhps", M_1to4 },
+			{ "vunpcklpd", M_1to2 },
+			{ "vunpcklps", M_1to4 },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl& p = tbl[i];
-			put(p.name, XMM_KZ, XMM, XMM|MEM);
+			put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
 		}
 	}
 	void putAVX512()
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 1969722..bb0fce7 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -954,13 +954,13 @@
 void vsqrtsd(const Xmm& x, const Operand& op) { vsqrtsd(x, x, op); }
 void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X, 0x51); }
 void vsqrtss(const Xmm& x, const Operand& op) { vsqrtss(x, x, op); }
-void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x15); }
+void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x15); }
 void vunpckhpd(const Xmm& x, const Operand& op) { vunpckhpd(x, x, op); }
-void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0x15); }
+void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x15); }
 void vunpckhps(const Xmm& x, const Operand& op) { vunpckhps(x, x, op); }
-void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x14); }
+void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x14); }
 void vunpcklpd(const Xmm& x, const Operand& op) { vunpcklpd(x, x, op); }
-void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0x14); }
+void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x14); }
 void vunpcklps(const Xmm& x, const Operand& op) { vunpcklps(x, x, op); }
 void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_0F3A | T_66, 0xDF, imm); }
 void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_0F3A | T_66 | T_YMM, 0x09, imm); }