add vpmaddwd, vpmaddubsw
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index b491f03..6d1aba4 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1086,8 +1086,8 @@
 			{ 0x05, "phsubw", T_0F38 | T_66 | T_YMM, false, true },
 			{ 0x06, "phsubd", T_0F38 | T_66 | T_YMM, false, true },
 			{ 0x07, "phsubsw", T_0F38 | T_66 | T_YMM, false, true },
-			{ 0xF5, "pmaddwd", T_0F | T_66 | T_YMM, false, true },
-			{ 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM, false, true },
+			{ 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true },
+			{ 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true },
 
 			{ 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM, false, true },
 			{ 0xEE, "pmaxsw", T_0F | T_66 | T_YMM, false, true },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index a776bf1..7ba81ca 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2785,6 +2785,9 @@
 			{ "vpcmpgtw", K2, _ZMM, _ZMM | _MEM },
 			{ "vpcmpgtd", K2, _ZMM, _ZMM | M_1to16 },
 			{ "vpcmpgtq", K2, _ZMM, _ZMM | M_1to8 },
+
+			{ "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM },
+			{ "vpmaddwd", ZMM_KZ, _ZMM, _ZMM },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl& p = tbl[i];
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index adc2347..265bbb4 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -839,9 +839,9 @@
 void vphsubd(const Xmm& x, const Operand& op) { vphsubd(x, x, op); }
 void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x07); }
 void vphsubsw(const Xmm& x, const Operand& op) { vphsubsw(x, x, op); }
-void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xF5); }
+void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0xF5); }
 void vpmaddwd(const Xmm& x, const Operand& op) { vpmaddwd(x, x, op); }
-void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x04); }
+void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM | T_EVEX, 0x04); }
 void vpmaddubsw(const Xmm& x, const Operand& op) { vpmaddubsw(x, x, op); }
 void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x3C); }
 void vpmaxsb(const Xmm& x, const Operand& op) { vpmaxsb(x, x, op); }