add gf2p8affineqb
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index eaeba06..5f11ea6 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -203,6 +203,7 @@
 			{ 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
 			{ 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
 			{ 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
+			{ 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
diff --git a/test/misc.cpp b/test/misc.cpp
index 97264f2..0e811f3 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -574,6 +574,7 @@
 	struct Code : Xbyak::CodeGenerator {
 		Code()
 		{
+			///
 			gf2p8affineinvqb(xmm1, xmm2, 3);
 			gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
 
@@ -593,6 +594,26 @@
 			vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
 			vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
 			vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
+			///
+			gf2p8affineqb(xmm1, xmm2, 3);
+			gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
+
+			vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
+			vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
+			vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
+			vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
+
+			vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
+			vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
+			vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
+
+			vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
+			vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
+			vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
+
+			vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
+			vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
+			vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
 		}
 	} c;
 	const uint8_t tbl[] = {
@@ -611,6 +632,22 @@
 		0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
 		0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
 		0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
+
+		0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
+		0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
+		0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
+		0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
+		0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
+		0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
+		0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
+		0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
+		0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
+		0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
+		0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
+		0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
+		0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
+		0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
+		0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
 	};
 	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
 	CYBOZU_TEST_EQUAL(c.getSize(), n);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 531ec1e..9c50aba 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -295,6 +295,7 @@
 void fyl2x() { db(0xD9); db(0xF1); }
 void fyl2xp1() { db(0xD9); db(0xF9); }
 void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
 void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
 void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
 void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
@@ -1004,6 +1005,7 @@
 void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); }
 void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); }
 void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
+void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
 void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); }
 void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); }
 void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); }