unify T_66, T_F3, T_F2 flags
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp
index 4462499..083f646 100644
--- a/gen/avx_type.hpp
+++ b/gen/avx_type.hpp
@@ -12,9 +12,10 @@
 		//
 		T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
 		T_DUP = 1 << 4, // N = (8, 32, 64)
-		T_66 = 1 << 5,
-		T_F3 = 1 << 6,
-		T_F2 = 1 << 7,
+		T_66 = 1 << 5, // pp = 1
+		T_F3 = 1 << 6, // pp = 2
+		T_F2 = T_66 | T_F3, // pp = 3
+		// 1 << 7, not used
 		T_0F = 1 << 8,
 		T_0F38 = 1 << 9,
 		T_0F3A = 1 << 10,
@@ -44,6 +45,9 @@
 		T_MAP6 = T_FP16 | T_0F38,
 		T_XXX
 	};
+	// T_66 = 1, T_F3 = 2, T_F2 = 3
+	uint32_t getPP(int type) { return (type >> 5) & 3; }
+
 
 const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
 
@@ -66,17 +70,14 @@
 		if (!str.empty()) str += " | ";
 		str += "T_DUP";
 	}
-	if (type & T_66) {
-		if (!str.empty()) str += " | ";
-		str += "T_66";
-	}
-	if (type & T_F3) {
-		if (!str.empty()) str += " | ";
-		str += "T_F3";
-	}
 	if (type & T_F2) {
 		if (!str.empty()) str += " | ";
-		str += "T_F2";
+		switch (type & T_F2) {
+		case T_66: str += "T_66"; break;
+		case T_F3: str += "T_F3"; break;
+		case T_F2: str += "T_F2"; break;
+		default: break;
+		}
 	}
 	if (type & T_0F) {
 		if (!str.empty()) str += " | ";
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 19022e6..2ad8c59 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1303,7 +1303,8 @@
 			if (p->mode & 1) {
 				const char *immS1 = p->hasIMM ? ", uint8_t imm" : "";
 				const char *immS2 = p->hasIMM ? ", imm" : ", NONE";
-				const char *pref = p->type & T_66 ? "0x66" : p->type & T_F2 ? "0xF2" : p->type & T_F3 ? "0xF3" : "NONE";
+				const char *prefTbl[5] = { "NONE", "0x66", "0xF3", "0xF2" };
+				const char *pref = prefTbl[getPP(p->type)];
 				const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE";
 				printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf);
 			}
@@ -1354,11 +1355,12 @@
 			{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
 			{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
 		};
+		const uint8_t ppTbl[] = { 0, 0x66, 0xf3, 0xf2 };
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
 			std::string type = type2String(p->type);
 			if (p->mode & 1) {
-				uint8_t pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0;
+				uint8_t pref = ppTbl[getPP(p->type)];
 				printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : "");
 			}
 			if (p->mode & 2) {
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 0598ef6..42ce5ce 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1652,9 +1652,10 @@
 		//
 		T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
 		T_DUP = 1 << 4, // N = (8, 32, 64)
-		T_66 = 1 << 5,
-		T_F3 = 1 << 6,
-		T_F2 = 1 << 7,
+		T_66 = 1 << 5, // pp = 1
+		T_F3 = 1 << 6, // pp = 2
+		T_F2 = T_66 | T_F3, // pp = 3
+		// 1 << 7, not used
 		T_0F = 1 << 8,
 		T_0F38 = 1 << 9,
 		T_0F3A = 1 << 10,
@@ -1684,6 +1685,8 @@
 		T_MAP6 = T_FP16 | T_0F38,
 		T_XXX
 	};
+	// T_66 = 1, T_F3 = 2, T_F2 = 3
+	uint32_t getPP(int type) const { return (type >> 5) & 3; }
 	void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
 	{
 		int w = (type & T_W1) ? 1 : 0;
@@ -1692,7 +1695,7 @@
 		bool b = base.isExtIdx();
 		int idx = v ? v->getIdx() : 0;
 		if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
-		uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
+		uint32_t pp = getPP(type);
 		uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
 		if (!b && !x && !w && (type & T_0F)) {
 			db(0xC5); db((r ? 0 : 0x80) | vvvv);
@@ -1725,8 +1728,7 @@
 		int w = (type & T_EW1) ? 1 : 0;
 		uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
 		if (type & T_FP16) mmm |= 4;
-		uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
-
+		uint32_t pp = getPP(type);
 		int idx = v ? v->getIdx() : 0;
 		uint32_t vvvv = ~idx;