add vpmaxs{b,w,d,q}
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 217eb00..71f7f7c 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -182,6 +182,7 @@
 		{ 0xDB, "vpandq", T_EVEX | T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
 		{ 0xDF, "vpandnd", T_EVEX | T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
 		{ 0xDF, "vpandnq", T_EVEX | T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
+		{ 0x3D, "vpmaxsq", T_66 | T_0F38 | T_EVEX | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
 	};
 	for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 		const Tbl *p = &tbl[i];
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 6d1aba4..47fb669 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1089,9 +1089,9 @@
 			{ 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true },
 			{ 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true },
 
-			{ 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM, false, true },
-			{ 0xEE, "pmaxsw", T_0F | T_66 | T_YMM, false, true },
-			{ 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM, false, true },
+			{ 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true },
+			{ 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true },
+			{ 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
 
 			{ 0xDE, "pmaxub", T_0F | T_66 | T_YMM, false, true },
 			{ 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM, false, true },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 7ba81ca..2eddc80 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2786,8 +2786,12 @@
 			{ "vpcmpgtd", K2, _ZMM, _ZMM | M_1to16 },
 			{ "vpcmpgtq", K2, _ZMM, _ZMM | M_1to8 },
 
-			{ "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM },
-			{ "vpmaddwd", ZMM_KZ, _ZMM, _ZMM },
+			{ "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
+			{ "vpmaddwd", ZMM_KZ, _ZMM, _ZMM | _MEM },
+			{ "vpmaxsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
+			{ "vpmaxsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
+			{ "vpmaxsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
+			{ "vpmaxsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl& p = tbl[i];
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index c7aed05..4885ed3 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -95,4 +95,5 @@
 void vpandq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_MUST_EVEX | T_B64, 0xDB); }
 void vpandnd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW0 | T_YMM | T_EVEX | T_MUST_EVEX | T_B32, 0xDF); }
 void vpandnq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_MUST_EVEX | T_B64, 0xDF); }
+void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_EVEX | T_MUST_EVEX | T_B64, 0x3D); }
 #endif
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 265bbb4..b68e08d 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -843,11 +843,11 @@
 void vpmaddwd(const Xmm& x, const Operand& op) { vpmaddwd(x, x, op); }
 void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM | T_EVEX, 0x04); }
 void vpmaddubsw(const Xmm& x, const Operand& op) { vpmaddubsw(x, x, op); }
-void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x3C); }
+void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM | T_EVEX, 0x3C); }
 void vpmaxsb(const Xmm& x, const Operand& op) { vpmaxsb(x, x, op); }
-void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xEE); }
+void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0xEE); }
 void vpmaxsw(const Xmm& x, const Operand& op) { vpmaxsw(x, x, op); }
-void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x3D); }
+void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3D); }
 void vpmaxsd(const Xmm& x, const Operand& op) { vpmaxsd(x, x, op); }
 void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xDE); }
 void vpmaxub(const Xmm& x, const Operand& op) { vpmaxub(x, x, op); }