vpclmulqdq supports AVX-512
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 08c471c..01139ea 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -60,7 +60,7 @@
 			{ 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
 			{ 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
 			{ 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 },
-			{ 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0, true, true, 3 },
+			{ 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0 | T_YMM | T_EVEX, true, true, 3 },
 			{ 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
 			{ 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
 
diff --git a/test/misc.cpp b/test/misc.cpp
index bd686fe..5fa5ea7 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -173,4 +173,31 @@
 	CYBOZU_TEST_EQUAL(c.getSize(), n);
 	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 }
+CYBOZU_TEST_AUTO(vpclmulqdq)
+{
+	struct Code : Xbyak::CodeGenerator {
+		Code()
+		{
+			vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
+			vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
+			vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
+
+			vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
+			vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
+			vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
+		}
+	} c;
+	const uint8_t tbl[] = {
+		0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
+		0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
+		0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
+		0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
+		0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
+		0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
+	};
+	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+	CYBOZU_TEST_EQUAL(c.getSize(), n);
+	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
 #endif
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 35f451c..5591bb3 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1100,7 +1100,7 @@
 void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
 void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
 void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
-void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x44, imm); }
+void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
 void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); }
 void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); }
 void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); }