add vpmaxu{b,w,d,q}
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 71f7f7c..b2e36fd 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -183,6 +183,7 @@
 		{ 0xDF, "vpandnd", T_EVEX | T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
 		{ 0xDF, "vpandnq", T_EVEX | T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
 		{ 0x3D, "vpmaxsq", T_66 | T_0F38 | T_EVEX | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
+		{ 0x3F, "vpmaxuq", T_66 | T_0F38 | T_EVEX | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
 	};
 	for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 		const Tbl *p = &tbl[i];
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 47fb669..1ce82a2 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1093,9 +1093,9 @@
 			{ 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true },
 			{ 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
 
-			{ 0xDE, "pmaxub", T_0F | T_66 | T_YMM, false, true },
-			{ 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM, false, true },
-			{ 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM, false, true },
+			{ 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX, false, true },
+			{ 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true },
+			{ 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true },
 
 			{ 0x38, "pminsb", T_0F38 | T_66 | T_YMM, false, true },
 			{ 0xEA, "pminsw", T_0F | T_66 | T_YMM, false, true },
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 2eddc80..6f017fd 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2788,10 +2788,16 @@
 
 			{ "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
 			{ "vpmaddwd", ZMM_KZ, _ZMM, _ZMM | _MEM },
+
 			{ "vpmaxsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
 			{ "vpmaxsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
 			{ "vpmaxsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
 			{ "vpmaxsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
+
+			{ "vpmaxub", ZMM_KZ, _ZMM, _ZMM | _MEM },
+			{ "vpmaxuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
+			{ "vpmaxud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
+			{ "vpmaxuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl& p = tbl[i];
@@ -2830,14 +2836,23 @@
 	void putAVX512()
 	{
 		putOpmask();
+		separateFunc();
 		putCombi();
+		separateFunc();
 		putCmpK();
+		separateFunc();
 		putBroadcast();
+		separateFunc();
 		putAVX512_M_X();
+		separateFunc();
 		put_vmov();
+		separateFunc();
 		put512_X_XM();
+		separateFunc();
 		put512_X_X_XM();
+		separateFunc();
 		put512_X3();
+		separateFunc();
 		put512_X3_I();
 	}
 #endif
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index 4885ed3..8f0f61e 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -96,4 +96,5 @@
 void vpandnd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW0 | T_YMM | T_EVEX | T_MUST_EVEX | T_B32, 0xDF); }
 void vpandnq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_MUST_EVEX | T_B64, 0xDF); }
 void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_EVEX | T_MUST_EVEX | T_B64, 0x3D); }
+void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_EVEX | T_MUST_EVEX | T_B64, 0x3F); }
 #endif
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index b68e08d..5f8f04d 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -849,11 +849,11 @@
 void vpmaxsw(const Xmm& x, const Operand& op) { vpmaxsw(x, x, op); }
 void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3D); }
 void vpmaxsd(const Xmm& x, const Operand& op) { vpmaxsd(x, x, op); }
-void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xDE); }
+void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0xDE); }
 void vpmaxub(const Xmm& x, const Operand& op) { vpmaxub(x, x, op); }
-void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x3E); }
+void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM | T_EVEX, 0x3E); }
 void vpmaxuw(const Xmm& x, const Operand& op) { vpmaxuw(x, x, op); }
-void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x3F); }
+void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3F); }
 void vpmaxud(const Xmm& x, const Operand& op) { vpmaxud(x, x, op); }
 void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x38); }
 void vpminsb(const Xmm& x, const Operand& op) { vpminsb(x, x, op); }