Merge branch 'dev'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1d91b0e..03f79a0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 2.6...3.0.2)
 
-project(xbyak LANGUAGES CXX VERSION 6.66)
+project(xbyak LANGUAGES CXX VERSION 6.67)
 
 file(GLOB headers xbyak/*.h)
 
diff --git a/doc/changelog.md b/doc/changelog.md
index 5e6fa9a..907d003 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
 # History
 
+* 2022/Nov/30 ver 6.67 support CMPccXADD
 * 2022/Nov/25 ver 6.66 support RAO-INT
 * 2022/Nov/22 ver 6.65 consider x32
 * 2022/Nov/04 ver 6.64 some vmov* support addressing with mask
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 7a5c575..feb7e36 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1883,6 +1883,34 @@
 
 	puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
 	puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
+	// CMPccXADD
+	{
+		const struct Tbl {
+			const char *name;
+			uint8_t code;
+		} tbl[] = {
+			{ "be", 0xE6 },
+			{ "b", 0xE2 },
+			{ "le", 0xEE },
+			{ "l", 0xEC },
+			{ "nbe", 0xE7 },
+			{ "nb", 0xE3 },
+			{ "nle", 0xEF },
+			{ "nl", 0xED },
+			{ "no", 0xE1 },
+			{ "np", 0xEB },
+			{ "ns", 0xE9 },
+			{ "nz", 0xE5 },
+			{ "o", 0xE0 },
+			{ "p", 0xEA },
+			{ "s", 0xE8 },
+			{ "z", 0xE4 },
+		};
+		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+			const Tbl *p = &tbl[i];
+			printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0x%02X, false); }\n", p->name, p->code);
+		}
+	}
 }
 
 void putAMX_TILE()
diff --git a/meson.build b/meson.build
index f296532..b712c71 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
 project(
 	'xbyak',
 	'cpp',
-	version: '6.66',
+	version: '6.67',
 	license: 'BSD-3-Clause',
 	default_options: 'b_ndebug=if-release'
 )
diff --git a/readme.md b/readme.md
index 0fa16fa..072bca2 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
 
-# Xbyak 6.66 [![Badge Build]][Build Status]
+# Xbyak 6.67 [![Badge Build]][Build Status]
 
 *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
 
diff --git a/readme.txt b/readme.txt
index ed1f2dd..6223653 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.66

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.67

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -402,6 +402,7 @@
 -----------------------------------------------------------------------------

 ◎履歴

 

+2022/11/30 ver 6.67 CMPccXADDサポート

 2022/11/25 ver 6.66 RAO-INTサポート

 2022/11/22 ver 6.65 x32動作確認

 2022/11/04 ver 6.64 vmov*命令をmaskつきアドレッシング対応修正

diff --git a/sample/Makefile b/sample/Makefile
index 7e8ab7b..4c57767 100644
--- a/sample/Makefile
+++ b/sample/Makefile
@@ -1,7 +1,7 @@
 XBYAK_INC=../xbyak/xbyak.h
 CXX?=g++
 
-#BOOST_EXIST=$(shell echo "#include <boost/spirit/core.hpp>" | $CXX -x c++ -c - 2>/dev/null && echo 1)
+#BOOST_EXIST=$(shell echo "#include <boost/spirit/core.hpp>" | $(CXX) -x c++ -c - 2>/dev/null && echo 1)
 # I don't know why the above code causes an error on GitHub action.
 BOOST_EXIST?=0
 UNAME_M=$(shell uname -m)
diff --git a/sample/test_util.cpp b/sample/test_util.cpp
index ef6e3fa..b87b803 100644
--- a/sample/test_util.cpp
+++ b/sample/test_util.cpp
@@ -94,6 +94,7 @@
 		{ Cpu::tAVX_NE_CONVERT, "avx_ne_convert" },
 		{ Cpu::tAVX_IFMA, "avx_ifma" },
 		{ Cpu::tRAO_INT, "rao-int" },
+		{ Cpu::tCMPCCXADD, "cmpccxadd" },
 	};
 	for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 		if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
diff --git a/test/misc.cpp b/test/misc.cpp
index 7653673..f0e577f 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -2056,3 +2056,87 @@
 	CYBOZU_TEST_EQUAL(c.getSize(), n);
 	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 }
+
+#ifdef XBYAK64
+CYBOZU_TEST_AUTO(CMPccXADD)
+{
+	struct Code : Xbyak::CodeGenerator {
+		Code()
+		{
+			// 32bit reg
+			cmpbexadd(ptr[rax+r10*4], ecx, edx);
+			cmpbxadd(ptr[rax+r10*4], ecx, edx);
+			cmplexadd(ptr[rax+r10*4], ecx, edx);
+			cmplxadd(ptr[rax+r10*4], ecx, edx);
+			cmpnbexadd(ptr[rax+r10*4], ecx, edx);
+			cmpnbxadd(ptr[rax+r10*4], ecx, edx);
+			cmpnlexadd(ptr[rax+r10*4], ecx, edx);
+			cmpnlxadd(ptr[rax+r10*4], ecx, edx);
+			cmpnoxadd(ptr[rax+r10*4], ecx, edx);
+			cmpnpxadd(ptr[rax+r10*4], ecx, edx);
+			cmpnsxadd(ptr[rax+r10*4], ecx, edx);
+			cmpnzxadd(ptr[rax+r10*4], ecx, edx);
+			cmpoxadd(ptr[rax+r10*4], ecx, edx);
+			cmppxadd(ptr[rax+r10*4], ecx, edx);
+			cmpsxadd(ptr[rax+r10*4], ecx, edx);
+			cmpzxadd(ptr[rax+r10*4], ecx, edx);
+			// 64bit reg
+			cmpbexadd(ptr[rax+r10*4], rcx, rdx);
+			cmpbxadd(ptr[rax+r10*4], rcx, rdx);
+			cmplexadd(ptr[rax+r10*4], rcx, rdx);
+			cmplxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpoxadd(ptr[rax+r10*4], rcx, rdx);
+			cmppxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpsxadd(ptr[rax+r10*4], rcx, rdx);
+			cmpzxadd(ptr[rax+r10*4], rcx, rdx);
+		}
+	} c;
+	const uint8_t tbl[] = {
+		// 32bit reg
+		0xc4, 0xa2, 0x69, 0xe6, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe2, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xee, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xec, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe7, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe3, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xef, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xed, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe1, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xeb, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe9, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe5, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe0, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xea, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe8, 0x0c, 0x90,
+		0xc4, 0xa2, 0x69, 0xe4, 0x0c, 0x90,
+		// 64bit reg
+		0xc4, 0xa2, 0xe9, 0xe6, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe2, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xee, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xec, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe7, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe3, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xef, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xed, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe1, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xeb, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe9, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe5, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe0, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xea, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe8, 0x0c, 0x90,
+		0xc4, 0xa2, 0xe9, 0xe4, 0x0c, 0x90,
+	};
+	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+	CYBOZU_TEST_EQUAL(c.getSize(), n);
+	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+#endif
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 86372a0..e080708 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -155,7 +155,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x6660 /* 0xABCD = A.BC(.D) */
+	VERSION = 0x6670 /* 0xABCD = A.BC(.D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 13e52e1..cc657fd 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "6.66"; }
+const char *getVersionString() const { return "6.67"; }
 void aadd(const Address& addr, const Reg32e &reg) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
 void aand(const Address& addr, const Reg32e &reg) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
 void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
@@ -1662,6 +1662,22 @@
 void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
 void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
 void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
+void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE6, false); }
+void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE2, false); }
+void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEE, false); }
+void cmplxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEC, false); }
+void cmpnbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE7, false); }
+void cmpnbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE3, false); }
+void cmpnlexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEF, false); }
+void cmpnlxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xED, false); }
+void cmpnoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE1, false); }
+void cmpnpxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEB, false); }
+void cmpnsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE9, false); }
+void cmpnzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE5, false); }
+void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE0, false); }
+void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEA, false); }
+void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE8, false); }
+void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE4, false); }
 void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
 void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
 void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index e1e4476..ebf15e6 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -415,6 +415,7 @@
 	XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT);
 	XBYAK_DEFINE_TYPE(71, tAVX_IFMA);
 	XBYAK_DEFINE_TYPE(72, tRAO_INT);
+	XBYAK_DEFINE_TYPE(73, tCMPCCXADD);
 
 #undef XBYAK_SPLIT_ID
 #undef XBYAK_DEFINE_TYPE
@@ -559,6 +560,7 @@
 				if (type_ & tAVX512F) {
 					if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
 				}
+				if (EAX & (1U << 7)) type_ |= tCMPCCXADD;
 				if (EAX & (1U << 21)) type_ |= tAMX_FP16;
 				if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
 				if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;