add vmovshdup, vmovsldup
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 6370009..b95931a 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1209,8 +1209,8 @@
 			{ 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z, false },
 			{ 0x6F, "movdqa", T_0F | T_66 | T_YMM, false },
 			{ 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false },
-			{ 0x16, "movshdup", T_0F | T_F3 | T_YMM, false },
-			{ 0x12, "movsldup", T_0F | T_F3 | T_YMM, false },
+			{ 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false },
+			{ 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false },
 			{ 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false },
 			{ 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false },
 
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 7babd15..7ecc893 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2628,6 +2628,10 @@
 		put("vmovss", XMM_KZ, _XMM3, _XMM3);
 		put("vmovss", XMM_KZ, MEM);
 		put("vmovss", MEM_K, XMM);
+
+		put("vmovshdup", _ZMM, _ZMM);
+		put("vmovsldup", _ZMM, _ZMM);
+
 		{
 			const char tbl[][16] = {
 				"vmovhpd",
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 51f04cf..a6971c5 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -983,8 +983,8 @@
 void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F2 | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); }
 void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_YMM, 0x6F); }
 void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x6F); }
-void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x16); }
-void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_YMM, 0x12); }
+void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_EW0 | T_YMM | T_EVEX, 0x16); }
+void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_F3 | T_EW0 | T_YMM | T_EVEX, 0x12); }
 void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX, 0x10); }
 void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
 void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F38 | T_66 | T_YMM, 0x1C); }