Merge branch 'dev'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5a710cf..9660119 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5)
 
-project(xbyak LANGUAGES CXX VERSION 7.04)
+project(xbyak LANGUAGES CXX VERSION 7.05)
 
 file(GLOB headers xbyak/*.h)
 
diff --git a/doc/changelog.md b/doc/changelog.md
index 401bfb8..027757b 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
 # History
 
+* 2024/Jan/03 ver 7.05 support RAO-INT for APX
 * 2023/Dec/28 ver 7.04 rex2 supports two-byte opecode
 * 2023/Dec/26 ver 7.03 set the default value of dfv to 0
 * 2023/Dec/20 ver 7.02 SHA* support APX
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index a174e6a..3959cc5 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -630,7 +630,7 @@
 			printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
 			printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg);
 			printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
-			printf("void set%s(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | %d)) return; opRext(op, 8, 0, T_0F, 0x90 | %d); }%s\n", p->name, p->ext, p->ext, msg);
+			printf("void set%s(const Operand& op) { opSetCC(op, %d); }%s\n", p->name, p->ext, msg);
 
 			// ccmpscc
 			// true if SCC = 0b1010, false if SCC = 0b1011 (see APX Architecture Specification p.266)
@@ -860,14 +860,13 @@
 			const char *prefix;
 		} tbl[] = {
 			{ "aadd", "" },
-			{ "aand", " | T_66" },
-			{ "aor", " | T_F2" },
-			{ "axor", " | T_F3" },
+			{ "aand", "|T_66" },
+			{ "aor", "|T_F2" },
+			{ "axor", "|T_F3" },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
-			printf("void %s(const Address& addr, const Reg32e &reg) { ", p->name);
-			printf("opMR(addr, reg, T_0F38%s, 0x0FC); }\n", p->prefix);
+			printf("void %s(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38%s, 0x0FC, T_APX%s); }\n", p->name, p->prefix, p->prefix);
 		}
 	}
 
@@ -1149,10 +1148,10 @@
 
 		puts("void xadd(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xC0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
 		puts("void cmpxchg(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
-		puts("void movbe(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg, T_APX, 0x60)) return; opMR(addr, reg, T_0F38, 0xF0); }");
-		puts("void movbe(const Address& addr, const Reg& reg) { if (opROO(Reg(), addr, reg, T_APX, 0x61)) return; opMR(addr, reg, T_0F38, 0xF1); }");
-		puts("void movdiri(const Address& addr, const Reg32e& reg) { if (opROO(Reg(),  addr, reg, T_APX, 0xF9)) return; opMR(addr, reg, T_0F38, 0xF9); }");
-		puts("void movdir64b(const Reg& reg, const Address& addr) { if (opROO(Reg(),  addr, reg.cvt32(), T_APX|T_66, 0xF8)) return; opMR(addr, reg.cvt32(), T_66 | T_0F38, 0xF8); }");
+		puts("void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); }");
+		puts("void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); }");
+		puts("void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); }");
+		puts("void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); }");
 		puts("void cmpxchg8b(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xC7); }");
 
 		puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }");
diff --git a/meson.build b/meson.build
index b184932..dbe4e8a 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
 project(
 	'xbyak',
 	'cpp',
-	version: '7.04',
+	version: '7.05',
 	license: 'BSD-3-Clause',
 	default_options: 'b_ndebug=if-release'
 )
diff --git a/readme.md b/readme.md
index b7b6fc8..7e5e9f2 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
 
-# Xbyak 7.04 [![Badge Build]][Build Status]
+# Xbyak 7.05 [![Badge Build]][Build Status]
 
 *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
 
@@ -33,6 +33,7 @@
 
 ### News
 
+- support RAO-INT for APX
 - support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE
 - support APX except for a few instructions
 - add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
diff --git a/readme.txt b/readme.txt
index 507cce4..fb0f374 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.04

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.05

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -404,6 +404,7 @@
 -----------------------------------------------------------------------------

 ◎履歴

 

+2024/01/03 ver 7.05 APX対応RAO-INT

 2023/12/28 ver 7.04 2バイトオペコードのrex2対応

 2023/12/26 ver 7.03 dfvのデフォルト値を0に設定

 2023/12/20 ver 7.02 SHA*のAPX対応

diff --git a/test/apx.cpp b/test/apx.cpp
index 70b7d02..45fe5ee 100644
--- a/test/apx.cpp
+++ b/test/apx.cpp
@@ -1936,3 +1936,29 @@
 	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 }
 
+CYBOZU_TEST_AUTO(rao_int)
+{
+	struct Code : Xbyak::CodeGenerator {
+		Code()
+		{
+			aadd(ptr [r16+r31*1], r17d);
+			aadd(ptr [r16+r31*1], r17);
+			aand(ptr [r16+r31*1], r17d);
+			aand(ptr [r16+r31*1], r17);
+			aor(ptr [r16+r31*1], r17d);
+			aor(ptr [r16+r31*1], r17);
+			axor(ptr [r16+r31*1], r17d);
+			axor(ptr [r16+r31*1], r17);
+		}
+	} c;
+	const uint8_t tbl[] = {
+		0x62, 0xac, 0x78, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf8, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac,
+		0x79, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf9, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7b, 0x08,
+		0xfc, 0x0c, 0x38, 0x62, 0xac, 0xfb, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7a, 0x08, 0xfc, 0x0c,
+		0x38, 0x62, 0xac, 0xfa, 0x08, 0xfc, 0x0c, 0x38,
+	};
+	const size_t n = sizeof(tbl);
+	CYBOZU_TEST_EQUAL(c.getSize(), n);
+	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 990728d..0e96ff5 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -155,7 +155,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x7040 /* 0xABCD = A.BC(.D) */
+	VERSION = 0x7050 /* 0xABCD = A.BC(.D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -727,6 +727,7 @@
 	bool operator==(const Operand& rhs) const;
 	bool operator!=(const Operand& rhs) const { return !operator==(rhs); }
 	const Address& getAddress() const;
+	Address getAddress(int immSize) const;
 	const Reg& getReg() const;
 };
 
@@ -1298,15 +1299,15 @@
 		M_ripAddr
 	};
 	XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
-		: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast), optimize_(true)
+		: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true)
 	{
 		e_.verify();
 	}
 #ifdef XBYAK64
 	explicit XBYAK_CONSTEXPR Address(size_t disp)
-		: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false), optimize_(true) { }
+		: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), immSize(0), disp8N(0), permitVsib(false), broadcast_(false), optimize_(true) { }
 	XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
-		: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast), optimize_(true) { }
+		: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) { }
 #endif
 	RegExp getRegExp() const
 	{
@@ -1323,7 +1324,7 @@
 	const Label* getLabel() const { return label_; }
 	bool operator==(const Address& rhs) const
 	{
-		return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
+		return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && immSize == rhs.immSize && disp8N == rhs.disp8N && permitVsib == rhs.permitVsib && broadcast_ == rhs.broadcast_ && optimize_ == rhs.optimize_;
 	}
 	bool operator!=(const Address& rhs) const { return !operator==(rhs); }
 	bool isVsib() const { return e_.isVsib(); }
@@ -1331,6 +1332,11 @@
 	RegExp e_;
 	const Label* label_;
 	Mode mode_;
+public:
+	int immSize; // the size of immediate value of nmemonics (0, 1, 2, 4)
+	int disp8N; // 0(normal), 1(force disp32), disp8N = {2, 4, 8}
+	bool permitVsib;
+private:
 	bool broadcast_;
 	bool optimize_;
 };
@@ -1340,6 +1346,12 @@
 	assert(isMEM());
 	return static_cast<const Address&>(*this);
 }
+inline Address Operand::getAddress(int immSize) const
+{
+	Address addr = getAddress();
+	addr.immSize = immSize;
+	return addr;
+}
 
 inline bool Operand::operator==(const Operand& rhs) const
 {
@@ -2044,12 +2056,14 @@
 		writeCode(type, reg1, code, rex2);
 		setModRM(3, reg1.getIdx(), reg2.getIdx());
 	}
-	void opMR(const Address& addr, const Reg& r, uint64_t type, int code, int immSize = 0)
+	void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE)
 	{
+		if (code2 == NONE) code2 = code;
+		if (type2 && opROO(Reg(), addr, r, type2, code2)) return;
 		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
 		bool rex2 = rex(addr, r, type);
 		writeCode(type, r, code, rex2);
-		opAddr(addr, r.getIdx(), immSize);
+		opAddr(addr, r.getIdx());
 	}
 	void opLoadSeg(const Address& addr, const Reg& reg, uint64_t type, int code)
 	{
@@ -2130,21 +2144,20 @@
 	}
 	// reg is reg field of ModRM
 	// immSize is the size for immediate value
-	// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
-	void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
+	void opAddr(const Address &addr, int reg)
 	{
-		if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
+		if (!addr.permitVsib && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 		if (addr.getMode() == Address::M_ModRM) {
-			setSIB(addr.getRegExp(), reg, disp8N);
+			setSIB(addr.getRegExp(), reg, addr.disp8N);
 		} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
 			setModRM(0, reg, 5);
 			if (addr.getLabel()) { // [rip + Label]
-				putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize);
+				putL_inner(*addr.getLabel(), true, addr.getDisp() - addr.immSize);
 			} else {
 				size_t disp = addr.getDisp();
 				if (addr.getMode() == Address::M_ripAddr) {
 					if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW)
-					disp -= (size_t)getCurr() + 4 + immSize;
+					disp -= (size_t)getCurr() + 4 + addr.immSize;
 				}
 				dd(inner::VerifyInInt32(disp));
 			}
@@ -2201,11 +2214,12 @@
 		if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false)
 		if (p2->isMEM()) {
 			const Reg& r = *static_cast<const Reg*>(p1);
-			const Address& addr = p2->getAddress();
+			Address addr = p2->getAddress();
 			const RegExp e = addr.getRegExp();
 			evexLeg(r, e.getBase(), e.getIndex(), d, type, sc);
 			writeCode(type, d, code);
-			opAddr(addr, r.getIdx(), immSize);
+			addr.immSize = immSize;
+			opAddr(addr, r.getIdx());
 		} else {
 			evexLeg(static_cast<const Reg&>(op2), static_cast<const Reg&>(op1), Reg(), d, type, sc);
 			writeCode(type, d, code);
@@ -2220,13 +2234,18 @@
 		const Reg r(ext, Operand::REG, opBit);
 		if ((type & T_APX) && op.hasRex2NFZU() && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
 		if (op.isMEM()) {
-			opMR(op.getAddress(), r, type, code, immSize);
+			opMR(op.getAddress(immSize), r, type, code);
 		} else if (op.isREG(bit)) {
 			opRR(r, op.getReg().changeBit(opBit), type, code);
 		} else {
 			XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 	}
+	void opSetCC(const Operand& op, int ext)
+	{
+		if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | ext)) return;
+		opRext(op, 8, 0, T_0F, 0x90 | ext);
+	}
 	void opShift(const Operand& op, int imm, int ext, const Reg *d = 0)
 	{
 		if (d == 0) verifyMemHasSize(op);
@@ -2246,7 +2265,7 @@
 	void opRO(const Reg& r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0)
 	{
 		if (op.isMEM()) {
-			opMR(op.getAddress(), r, type, code, immSize);
+			opMR(op.getAddress(immSize), r, type, code);
 		} else if (condR) {
 			opRR(r, op.getReg(), type, code);
 		} else {
@@ -2431,7 +2450,7 @@
 	void opVex(const Reg& r, const Operand *p1, const Operand& op2, uint64_t type, int code, int imm8 = NONE)
 	{
 		if (op2.isMEM()) {
-			const Address& addr = op2.getAddress();
+			Address addr = op2.getAddress();
 			const RegExp& regExp = addr.getRegExp();
 			const Reg& base = regExp.getBase();
 			const Reg& index = regExp.getIndex();
@@ -2450,7 +2469,10 @@
 			} else {
 				vex(r, base, p1, type, code, index.isExtIdx());
 			}
-			opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
+			if (type & T_VSIB) addr.permitVsib = true;
+			if (disp8N) addr.disp8N = disp8N;
+			if (imm8 != NONE) addr.immSize = 1;
+			opAddr(addr, r.getIdx());
 		} else {
 			const Reg& base = op2.getReg();
 			if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
@@ -2945,7 +2967,7 @@
 				if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
 				immSize = 4;
 			}
-			opMR(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0, 0xC6, immSize);
+			opMR(op.getAddress(immSize), Reg(0, Operand::REG, op.getBit()), 0, 0xC6);
 			db(static_cast<uint32_t>(imm), immSize);
 		} else {
 			XBYAK_THROW(ERR_BAD_COMBINATION)
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 7771417..ac2a38f 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,6 +1,6 @@
-const char *getVersionString() const { return "7.04"; }
-void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC); }
-void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); }
+const char *getVersionString() const { return "7.05"; }
+void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
+void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
 void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x10); }
 void adc(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NONE, 2); }
@@ -34,8 +34,8 @@
 void andnps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x55, isXMM_XMMorMEM); }
 void andpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x54, isXMM_XMMorMEM); }
 void andps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x54, isXMM_XMMorMEM); }
-void aor(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_F2, 0x0FC); }
-void axor(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_F3, 0x0FC); }
+void aor(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_F2, 0x0FC, T_APX|T_F2); }
+void axor(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_F3, 0x0FC, T_APX|T_F3); }
 void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf7); }
 void blendpd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0D, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
 void blendps(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0C, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
@@ -684,15 +684,15 @@
 void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_66); }
 void movaps(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x29); }
 void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_NONE); }
-void movbe(const Address& addr, const Reg& reg) { if (opROO(Reg(), addr, reg, T_APX, 0x61)) return; opMR(addr, reg, T_0F38, 0xF1); }
-void movbe(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg, T_APX, 0x60)) return; opMR(addr, reg, T_0F38, 0xF0); }
+void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); }
+void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); }
 void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }
 void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }
 void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }
 void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }
 void movddup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12, isXMM_XMMorMEM, NONE); }
-void movdir64b(const Reg& reg, const Address& addr) { if (opROO(Reg(),  addr, reg.cvt32(), T_APX|T_66, 0xF8)) return; opMR(addr, reg.cvt32(), T_66 | T_0F38, 0xF8); }
-void movdiri(const Address& addr, const Reg32e& reg) { if (opROO(Reg(),  addr, reg, T_APX, 0xF9)) return; opMR(addr, reg, T_0F38, 0xF9); }
+void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); }
+void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); }
 void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }
 void movdqa(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x7F); }
 void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_66); }
@@ -956,36 +956,36 @@
 void scasd() { db(0xAF); }
 void scasw() { db(0x66); db(0xAF); }
 void serialize() { db(0x0F); db(0x01); db(0xE8); }
-void seta(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 7)) return; opRext(op, 8, 0, T_0F, 0x90 | 7); }//-V524
-void setae(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524
-void setb(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524
-void setbe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 6)) return; opRext(op, 8, 0, T_0F, 0x90 | 6); }//-V524
-void setc(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524
-void sete(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524
-void setg(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 15)) return; opRext(op, 8, 0, T_0F, 0x90 | 15); }//-V524
-void setge(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 13)) return; opRext(op, 8, 0, T_0F, 0x90 | 13); }//-V524
-void setl(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 12)) return; opRext(op, 8, 0, T_0F, 0x90 | 12); }//-V524
-void setle(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 14)) return; opRext(op, 8, 0, T_0F, 0x90 | 14); }//-V524
-void setna(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 6)) return; opRext(op, 8, 0, T_0F, 0x90 | 6); }//-V524
-void setnae(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524
-void setnb(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524
-void setnbe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 7)) return; opRext(op, 8, 0, T_0F, 0x90 | 7); }//-V524
-void setnc(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524
-void setne(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 5)) return; opRext(op, 8, 0, T_0F, 0x90 | 5); }//-V524
-void setng(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 14)) return; opRext(op, 8, 0, T_0F, 0x90 | 14); }//-V524
-void setnge(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 12)) return; opRext(op, 8, 0, T_0F, 0x90 | 12); }//-V524
-void setnl(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 13)) return; opRext(op, 8, 0, T_0F, 0x90 | 13); }//-V524
-void setnle(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 15)) return; opRext(op, 8, 0, T_0F, 0x90 | 15); }//-V524
-void setno(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 1)) return; opRext(op, 8, 0, T_0F, 0x90 | 1); }//-V524
-void setnp(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 11)) return; opRext(op, 8, 0, T_0F, 0x90 | 11); }//-V524
-void setns(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 9)) return; opRext(op, 8, 0, T_0F, 0x90 | 9); }//-V524
-void setnz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 5)) return; opRext(op, 8, 0, T_0F, 0x90 | 5); }//-V524
-void seto(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 0)) return; opRext(op, 8, 0, T_0F, 0x90 | 0); }//-V524
-void setp(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 10)) return; opRext(op, 8, 0, T_0F, 0x90 | 10); }//-V524
-void setpe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 10)) return; opRext(op, 8, 0, T_0F, 0x90 | 10); }//-V524
-void setpo(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 11)) return; opRext(op, 8, 0, T_0F, 0x90 | 11); }//-V524
-void sets(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 8)) return; opRext(op, 8, 0, T_0F, 0x90 | 8); }//-V524
-void setz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524
+void seta(const Operand& op) { opSetCC(op, 7); }//-V524
+void setae(const Operand& op) { opSetCC(op, 3); }//-V524
+void setb(const Operand& op) { opSetCC(op, 2); }//-V524
+void setbe(const Operand& op) { opSetCC(op, 6); }//-V524
+void setc(const Operand& op) { opSetCC(op, 2); }//-V524
+void sete(const Operand& op) { opSetCC(op, 4); }//-V524
+void setg(const Operand& op) { opSetCC(op, 15); }//-V524
+void setge(const Operand& op) { opSetCC(op, 13); }//-V524
+void setl(const Operand& op) { opSetCC(op, 12); }//-V524
+void setle(const Operand& op) { opSetCC(op, 14); }//-V524
+void setna(const Operand& op) { opSetCC(op, 6); }//-V524
+void setnae(const Operand& op) { opSetCC(op, 2); }//-V524
+void setnb(const Operand& op) { opSetCC(op, 3); }//-V524
+void setnbe(const Operand& op) { opSetCC(op, 7); }//-V524
+void setnc(const Operand& op) { opSetCC(op, 3); }//-V524
+void setne(const Operand& op) { opSetCC(op, 5); }//-V524
+void setng(const Operand& op) { opSetCC(op, 14); }//-V524
+void setnge(const Operand& op) { opSetCC(op, 12); }//-V524
+void setnl(const Operand& op) { opSetCC(op, 13); }//-V524
+void setnle(const Operand& op) { opSetCC(op, 15); }//-V524
+void setno(const Operand& op) { opSetCC(op, 1); }//-V524
+void setnp(const Operand& op) { opSetCC(op, 11); }//-V524
+void setns(const Operand& op) { opSetCC(op, 9); }//-V524
+void setnz(const Operand& op) { opSetCC(op, 5); }//-V524
+void seto(const Operand& op) { opSetCC(op, 0); }//-V524
+void setp(const Operand& op) { opSetCC(op, 10); }//-V524
+void setpe(const Operand& op) { opSetCC(op, 10); }//-V524
+void setpo(const Operand& op) { opSetCC(op, 11); }//-V524
+void sets(const Operand& op) { opSetCC(op, 8); }//-V524
+void setz(const Operand& op) { opSetCC(op, 4); }//-V524
 void sfence() { db(0x0F); db(0xAE); db(0xF8); }
 void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); }
 void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); }