add movddup
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 24bd3aa..2435396 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1206,7 +1206,7 @@
 			{ 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM, false },
 			{ 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false },
 			{ 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false },
-			{ 0x12, "movddup", T_0F | T_F2 | T_YMM, false },
+			{ 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z, false },
 			{ 0x6F, "movdqa", T_0F | T_66 | T_YMM, false },
 			{ 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false },
 			{ 0x16, "movshdup", T_0F | T_F3 | T_YMM, false },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 2c74346..22575d9 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -93,7 +93,16 @@
 #ifdef XBYAK64
 const uint64 _XMM3 = 1ULL << 50;
 #endif
-const uint64 XMM_SAE = 1ULL << 51; // max value
+const uint64 XMM_SAE = 1ULL << 51;
+#ifdef XBYAK64
+const uint64 XMM_KZ = 1ULL << 52;
+const uint64 YMM_KZ = 1ULL << 53;
+const uint64 ZMM_KZ = 1ULL << 54; // max value
+#else
+const uint64 XMM_KZ = 0;
+const uint64 YMM_KZ = 0;
+const uint64 ZMM_KZ = 0;
+#endif
 
 const uint64 NOPARA = 1ULL << (bitEnd - 1);
 
@@ -373,6 +382,12 @@
 			return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
 		case ZMM_ER:
 			return isXbyak_ ? "zmm20 | T_rd_sae" : "zmm20, {rd-sae}";
+		case XMM_KZ:
+			return isXbyak_ ? "xmm5 | k5" : "xmm5{k5}";
+		case YMM_KZ:
+			return isXbyak_ ? "ymm2 |k3|T_z" : "ymm2{k3}{z}";
+		case ZMM_KZ:
+			return isXbyak_ ? "zmm7|k1" : "zmm7{k1}";
 #else
 		case XMM_SAE:
 			return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
@@ -2599,6 +2614,18 @@
 		put("vmovq", MEM|REG64|XMM, _XMM3);
 #endif
 	}
+	void put512_X_MX()
+	{
+		const char *tbl[] = {
+			"vmovddup",
+		};
+		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+			const char *name = tbl[i];
+			put(name, XMM|XMM_KZ, XMM|MEM);
+			put(name, YMM|YMM_KZ, YMM|MEM);
+			put(name, ZMM|ZMM_KZ, ZMM|MEM);
+		}
+	}
 	void putAVX512()
 	{
 		putOpmask();
@@ -2607,6 +2634,7 @@
 		putBroadcast();
 		putAVX512_M_X();
 		put_vmov();
+		put512_X_MX();
 	}
 #endif
 };
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 5845639..7ffb053 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -980,7 +980,7 @@
 void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x5B); }
 void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX, 0x28); }
 void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
-void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F2 | T_YMM, 0x12); }
+void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F2 | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); }
 void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_YMM, 0x6F); }
 void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x6F); }
 void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x16); }