add vcompressb, vcompressw
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 1c8cf9b..8a11b7d 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -228,6 +228,9 @@
 
 		{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
 		{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
+
+		{ 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
+		{ 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
 	};
 	for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 		const Tbl *p = &tbl[i];
diff --git a/test/misc.cpp b/test/misc.cpp
index 5fa5ea7..e7db693 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -199,5 +199,44 @@
 	CYBOZU_TEST_EQUAL(c.getSize(), n);
 	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 }
+CYBOZU_TEST_AUTO(vcompressb_w)
+{
+	struct Code : Xbyak::CodeGenerator {
+		Code()
+		{
+			vcompressb(ptr[rax + 64], xmm1);
+			vcompressb(xmm30 | k5, xmm1);
+			vcompressb(ptr[rax + 64], ymm1);
+			vcompressb(ymm30 | k3 |T_z, ymm1);
+			vcompressb(ptr[rax + 64], zmm1);
+			vcompressb(zmm30 | k2 |T_z, zmm1);
 
+			vcompressw(ptr[rax + 64], xmm1);
+			vcompressw(xmm30 | k5, xmm1);
+			vcompressw(ptr[rax + 64], ymm1);
+			vcompressw(ymm30 | k3 |T_z, ymm1);
+			vcompressw(ptr[rax + 64], zmm1);
+			vcompressw(zmm30 | k2 |T_z, zmm1);
+		}
+	} c;
+	const uint8_t tbl[] = {
+		0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
+		0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
+		0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
+		0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
+		0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
+		0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
+
+
+		0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
+		0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
+		0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
+		0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
+		0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
+		0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
+	};
+	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+	CYBOZU_TEST_EQUAL(c.getSize(), n);
+	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
 #endif
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 5591bb3..79aa8a2 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1662,8 +1662,10 @@
 void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
 void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
 void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
+void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
 void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
 void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
+void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
 void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
 void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
 void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }