fix Reg::changeBit
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 29b95c1..069ca54 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -705,8 +705,8 @@
 		}
 	}
 
-	puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
-	puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
+	puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
+	puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
 	puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
 	puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
 
diff --git a/readme.md b/readme.md
index 480c0c1..421674d 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
 
-# Xbyak 5.77 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+# Xbyak 5.78 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
 
 ## Abstract
 
@@ -392,6 +392,7 @@
 http://opensource.org/licenses/BSD-3-Clause
 
 ## History
+* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
 * 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
 * 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
 * 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
diff --git a/readme.txt b/readme.txt
index b5c02fc..20c77e9 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.77

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.78

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -373,6 +373,7 @@
 -----------------------------------------------------------------------------

 ◎履歴

 

+2019/04/15 ver 5.78 Reg::changeBit()のリファクタリング(thanks to MerryMage)

 2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov)

 2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)

 2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元

diff --git a/test/Makefile b/test/Makefile
index 3180f18..37a678c 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,4 +1,4 @@
-TARGET = make_nm normalize_prefix jmp address bad_address misc
+TARGET = make_nm normalize_prefix jmp address bad_address misc cvt_test cvt_test32
 XBYAK_INC=../xbyak/xbyak.h
 BIT=32
 ifeq ($(shell uname -m),x86_64)
@@ -32,6 +32,10 @@
 	$(CXX) $(CFLAGS) bad_address.cpp -o $@
 misc: misc.cpp ../xbyak/xbyak.h
 	$(CXX) $(CFLAGS) misc.cpp -o $@
+cvt_test: cvt_test.cpp ../xbyak/xbyak.h
+	$(CXX) $(CFLAGS) $< -o $@
+cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
+	$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
 
 test: normalize_prefix jmp bad_address $(TARGET)
 	$(MAKE) -C ../gen
@@ -42,6 +46,8 @@
 	./jmp
 	./bad_address
 	./misc
+	./cvt_test
+	./cvt_test32
 ifeq ($(BIT),64)
 	./test_address.sh 64
 	./test_nm.sh 64
diff --git a/test/cvt_test.cpp b/test/cvt_test.cpp
index cce7613..ba1917e 100644
--- a/test/cvt_test.cpp
+++ b/test/cvt_test.cpp
@@ -1,4 +1,7 @@
+#define XBYAK_NO_OP_NAMES
 #include <xbyak/xbyak.h>
+#include <cybozu/inttype.hpp>
+#include <cybozu/test.hpp>
 
 using namespace Xbyak;
 using namespace Xbyak::util;
@@ -44,61 +47,30 @@
 };
 #endif
 
-int errNum = 0;
-int testNum = 0;
-
-template<class T>
-void verify(const T& x, const T& y)
-{
-	if (x != y) {
-		printf("ERR %s %s\n", x.toString(), y.toString());
-		errNum++;
-	}
-	testNum++;
-}
-
-#define verifyExp(state) \
-{ \
-	bool isOK = false; \
-	try { \
-		state; \
-	} catch (const Xbyak::Error& e) { \
-		if ((int)e == ERR_CANT_CONVERT) { \
-			isOK = true; \
-		} \
-	} \
-	if (!isOK) { \
-		printf("ERR " #state "\n"); \
-		errNum++; \
-	} \
-	testNum++; \
-}
-
-int main()
-	try
+CYBOZU_TEST_AUTO(cvt)
 {
 	for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) {
 		if (tbl[i].reg8) {
-			verify(tbl[i].reg8->cvt8(), *tbl[i].reg8);
-			verify(tbl[i].reg8->cvt16(), tbl[i].reg16);
-			verify(tbl[i].reg8->cvt32(), tbl[i].reg32);
-			verify(tbl[i].reg16.cvt8(), *tbl[i].reg8);
-			verify(tbl[i].reg32.cvt8(), *tbl[i].reg8);
+			CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
+			CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
+			CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
+			CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
+			CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
 		}
-		verify(tbl[i].reg16.cvt16(), tbl[i].reg16);
-		verify(tbl[i].reg16.cvt32(), tbl[i].reg32);
-		verify(tbl[i].reg32.cvt16(), tbl[i].reg16);
-		verify(tbl[i].reg32.cvt32(), tbl[i].reg32);
+		CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
+		CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
+		CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
+		CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
 #ifdef XBYAK64
 		if (tbl[i].reg8) {
-			verify(tbl[i].reg64.cvt8(), *tbl[i].reg8);
-			verify(tbl[i].reg8->cvt64(), tbl[i].reg64);
+			CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
+			CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt64() == tbl[i].reg64);
 		}
-		verify(tbl[i].reg64.cvt16(), tbl[i].reg16);
-		verify(tbl[i].reg64.cvt32(), tbl[i].reg32);
-		verify(tbl[i].reg64.cvt64(), tbl[i].reg64);
-		verify(tbl[i].reg16.cvt64(), tbl[i].reg64);
-		verify(tbl[i].reg32.cvt64(), tbl[i].reg64);
+		CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
+		CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
+		CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
+		CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
+		CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
 #endif
 	}
 	{
@@ -106,7 +78,7 @@
 			ah, bh, ch, dh
 		};
 		for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
-			verifyExp(errTbl[i].cvt16());
+			CYBOZU_TEST_EXCEPTION(errTbl[i].cvt16(), std::exception);
 		}
 	}
 #ifdef XBYAK32
@@ -115,11 +87,65 @@
 			si, di, bp, sp
 		};
 		for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
-			verifyExp(errTbl[i].cvt8());
+			CYBOZU_TEST_EXCEPTION(errTbl[i].cvt8(), std::exception);
 		}
 	}
 #endif
-	printf("test=%d(err=%d)\n", testNum, errNum);
-} catch (std::exception& e) {
-	printf("ERR %s\n", e.what());
+}
+
+CYBOZU_TEST_AUTO(changeBit)
+{
+	using namespace Xbyak::util;
+#ifdef XBYAK64
+	const size_t N = 7;
+	const Reg* tbl[][N] = {
+		{ &al, &ax, &eax, &rax, &xmm0, &ymm0, &zmm0 },
+		{ &cl, &cx, &ecx, &rcx, &xmm1, &ymm1, &zmm1 },
+		{ &dl, &dx, &edx, &rdx, &xmm2, &ymm2, &zmm2 },
+		{ &bl, &bx, &ebx, &rbx, &xmm3, &ymm3, &zmm3 },
+		{ &spl, &sp, &esp, &rsp, &xmm4, &ymm4, &zmm4 },
+		{ &bpl, &bp, &ebp, &rbp, &xmm5, &ymm5, &zmm5 },
+		{ &sil, &si, &esi, &rsi, &xmm6, &ymm6, &zmm6 },
+		{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
+		{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
+		{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
+		{ 0, 0, 0, 0, &xmm16, &ymm16, &zmm16 },
+		{ 0, 0, 0, 0, &xmm31, &ymm31, &zmm31 },
+	};
+	const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
+#else
+	const size_t N = 6;
+	const Reg* tbl[][N] = {
+		{ &al, &ax, &eax, &xmm0, &ymm0, &zmm0 },
+		{ &cl, &cx, &ecx, &xmm1, &ymm1, &zmm1 },
+		{ &dl, &dx, &edx, &xmm2, &ymm2, &zmm2 },
+		{ &bl, &bx, &ebx, &xmm3, &ymm3, &zmm3 },
+		{ 0, &sp, &esp, &xmm4, &ymm4, &zmm4 },
+		{ 0, &bp, &ebp, &xmm5, &ymm5, &zmm5 },
+		{ 0, &si, &esi, &xmm6, &ymm6, &zmm6 },
+		{ 0, &di, &edi,  &xmm7, &ymm7, &zmm7 },
+	};
+	const int bitTbl[N] = { 8, 16, 32, 128, 256, 512 };
+#endif
+
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		for (size_t j = 0; j < N; j++) {
+			const Reg *r1 = tbl[i][j];
+			if (r1 == 0) continue;
+			for (size_t k = 0; k < N; k++) {
+				if (tbl[i][k]) {
+					CYBOZU_TEST_ASSERT(*tbl[i][k] == r1->changeBit(bitTbl[k]));
+// printf("%s->changeBit(%d)=%s %s\n", r1->toString(), bitTbl[k], r1->changeBit(bitTbl[k]).toString(), tbl[i][k]->toString());
+				} else {
+					CYBOZU_TEST_EXCEPTION(r1->changeBit(bitTbl[k]), std::exception);
+				}
+			}
+		}
+	}
+#ifdef XBYAK64
+	const Reg8 *special8bitTbl[] = { &ah, &bh, &ch, &dh };
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(special8bitTbl); i++) {
+		CYBOZU_TEST_EXCEPTION(special8bitTbl[i]->changeBit(16), std::exception);
+	}
+#endif
 }
diff --git a/test/misc.cpp b/test/misc.cpp
index 701111c..3967fef 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <string>
+#define XBYAK_NO_OP_NAMES
 #include <xbyak/xbyak.h>
 #include <cybozu/inttype.hpp>
 #include <cybozu/test.hpp>
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index c77b9b1..f768927 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -113,7 +113,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5770 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5780 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -433,7 +433,8 @@
 		kind_ = kind;
 		bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
 	}
-	void setBit(int bit) { bit_ = bit; }
+	// err if MMX/FPU/OPMASK/BNDREG
+	void setBit(int bit);
 	void setOpmaskIdx(int idx, bool ignore_idx0 = false)
 	{
 		if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
@@ -516,6 +517,48 @@
 	const Reg& getReg() const;
 };
 
+inline void Operand::setBit(int bit)
+{
+	if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR;
+	if (isBit(bit)) return;
+	if (is(MEM)) {
+		bit_ = bit;
+		return;
+	}
+	if (is(REG | XMM | YMM | ZMM)) {
+		int idx = getIdx();
+		// err if converting ah, bh, ch, dh
+		if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR;
+		Kind kind = REG;
+		switch (bit) {
+		case 8:
+			if (idx >= 16) goto ERR;
+#ifdef XBYAK32
+			if (idx >= 4) goto ERR;
+#else
+			if (4 <= idx && idx < 8) idx |= EXT8BIT;
+#endif
+			break;
+		case 16:
+		case 32:
+		case 64:
+			if (idx >= 16) goto ERR;
+			break;
+		case 128: kind = XMM; break;
+		case 256: kind = YMM; break;
+		case 512: kind = ZMM; break;
+		}
+		idx_ = idx;
+		kind_ = kind;
+		bit_ = bit;
+		mask_ = 0;
+		rounding_ = 0;
+		return;
+	}
+ERR:
+	throw Error(ERR_CANT_CONVERT);
+}
+
 class Label;
 
 struct Reg8;
@@ -528,7 +571,8 @@
 public:
 	Reg() { }
 	Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
-	Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); }
+	// convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
+	Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
 	uint8 getRexW() const { return isREG(64) ? 8 : 0; }
 	uint8 getRexR() const { return isExtIdx() ? 4 : 0; }
 	uint8 getRexX() const { return isExtIdx() ? 2 : 0; }
@@ -652,34 +696,23 @@
 
 inline Reg8 Reg::cvt8() const
 {
-	const int idx = getIdx();
-	if (isBit(8)) return Reg8(idx, isExt8bit());
-#ifdef XBYAK32
-	if (idx >= 4) throw Error(ERR_CANT_CONVERT);
-#endif
-	return Reg8(idx, 4 <= idx && idx < 8);
+	Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit());
 }
 
 inline Reg16 Reg::cvt16() const
 {
-	const int idx = getIdx();
-	if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
-	return Reg16(idx);
+	return Reg16(changeBit(16).getIdx());
 }
 
 inline Reg32 Reg::cvt32() const
 {
-	const int idx = getIdx();
-	if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
-	return Reg32(idx);
+	return Reg32(changeBit(32).getIdx());
 }
 
 #ifdef XBYAK64
 inline Reg64 Reg::cvt64() const
 {
-	const int idx = getIdx();
-	if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
-	return Reg64(idx);
+	return Reg64(changeBit(64).getIdx());
 }
 #endif
 
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index f925d64..bb40f8c 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.77"; }
+const char *getVersionString() const { return "5.78"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -1725,8 +1725,8 @@
 void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
 void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
 void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
-void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
-void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
+void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
+void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
 void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
 void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
 void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }