add vinsertps
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 88fa9f1..fc62eb8 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1044,7 +1044,7 @@
 			{ 0xC2, "cmpss", T_0F | T_F3, true, true },
 			{ 0x5A, "cvtsd2ss", T_0F | T_F2, false, true },
 			{ 0x5A, "cvtss2sd", T_0F | T_F3, false, true },
-			{ 0x21, "insertps", T_0F3A | T_66 | T_W0, true, true },
+			{ 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, true, true },
 			{ 0x63, "packsswb", T_0F | T_66 | T_YMM, false, true },
 			{ 0x6B, "packssdw", T_0F | T_66 | T_YMM, false, true },
 			{ 0x67, "packuswb", T_0F | T_66 | T_YMM, false, true },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 34a24e1..aeb3c45 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2719,6 +2719,21 @@
 			put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
 		}
 	}
+	void put512_X_X_XM_I()
+	{
+		const struct Tbl {
+			const char *name;
+			uint64_t mem;
+		} tbl[] = {
+#ifdef XBYAK64
+			{ "vinsertps", _XMM3 },
+#endif
+		};
+		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+			const Tbl& p = tbl[i];
+			put(p.name, _XMM, _XMM, p.mem, IMM);
+		}
+	}
 	void putAVX512()
 	{
 		putOpmask();
@@ -2729,6 +2744,7 @@
 		put_vmov();
 		put512_X_MX();
 		put512_X_X_XM();
+		put512_X_X_XM_I();
 	}
 #endif
 };
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index bb0fce7..7d6b7a6 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -775,7 +775,7 @@
 void vcvtsd2ss(const Xmm& x, const Operand& op) { vcvtsd2ss(x, x, op); }
 void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_F3, 0x5A); }
 void vcvtss2sd(const Xmm& x, const Operand& op) { vcvtss2sd(x, x, op); }
-void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0, 0x21, imm); }
+void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
 void vinsertps(const Xmm& x, const Operand& op, uint8 imm) { vinsertps(x, x, op, imm); }
 void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x63); }
 void vpacksswb(const Xmm& x, const Operand& op) { vpacksswb(x, x, op); }