Merge branch 'dev'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f3f67b4..64d3204 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 2.6...3.0.2)
 
-project(xbyak LANGUAGES CXX VERSION 6.60.1)
+project(xbyak LANGUAGES CXX VERSION 6.60.2)
 
 file(GLOB headers xbyak/*.h)
 
diff --git a/doc/changelog.md b/doc/changelog.md
index f4456f9..b52fe26 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
 # History
 
+* 2022/Jun/16 ver 6.60.2 fix detection of GFNI, VAES, and VPCLMULQDQ
 * 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
 * 2022/Jun/06 ver 6.60 change the version format to avoid it going backward
 * 2022/Jun/01 ver 6.06 refactor Cpu::Type class and improve MmapAllocator when XBYAK_USE_MEMFD is defined.
diff --git a/meson.build b/meson.build
index 7572e8d..5685857 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
 project(
 	'xbyak',
 	'cpp',
-	version: '6.60.1',
+	version: '6.60.2',
 	license: 'BSD-3-Clause',
 	default_options: 'b_ndebug=if-release'
 )
diff --git a/readme.md b/readme.md
index 91e18bd..986bda1 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
 
-# Xbyak 6.60.1 [![Badge Build]][Build Status]
+# Xbyak 6.60.2 [![Badge Build]][Build Status]
 
 *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
 
diff --git a/readme.txt b/readme.txt
index 6381ef1..442a10f 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.60.1

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.60.2

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -400,6 +400,7 @@
 -----------------------------------------------------------------------------

 ◎履歴

 

+2022/06/16 ver 6.60.2 GFNI, VAES, VPCLMULQDQの判定修正

 2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応

 2022/06/06 ver 6.60 バージョンのつけ方を数値が戻らないように変更

 2022/06/01 ver 6.06 Cpu::TypeクラスのリファクタリングとXBYAK_USE_MEMFDが定義されたときのMmapAllocatorの改善

diff --git a/sample/cpuid/adl.txt b/sample/cpuid/adl.txt
new file mode 100644
index 0000000..a5b2c4b
--- /dev/null
+++ b/sample/cpuid/adl.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx_vnni waitpkg clflushopt cldemote movdiri movdir64b
diff --git a/sample/cpuid/bdw.txt b/sample/cpuid/bdw.txt
new file mode 100644
index 0000000..42b55c9
--- /dev/null
+++ b/sample/cpuid/bdw.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe
diff --git a/sample/cpuid/clx.txt b/sample/cpuid/clx.txt
new file mode 100644
index 0000000..979ffc9
--- /dev/null
+++ b/sample/cpuid/clx.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt
diff --git a/sample/cpuid/cnl.txt b/sample/cpuid/cnl.txt
new file mode 100644
index 0000000..40cf483
--- /dev/null
+++ b/sample/cpuid/cnl.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi clflushopt
diff --git a/sample/cpuid/cpuid.sh b/sample/cpuid/cpuid.sh
new file mode 100755
index 0000000..c312282
--- /dev/null
+++ b/sample/cpuid/cpuid.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+make -C ../ test_util64
+
+cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
+for cpu in ${cpus[@]} ; do
+  echo $cpu
+  ~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt
+  diff tmp.txt $cpu.txt
+done
+
diff --git a/sample/cpuid/cpx.txt b/sample/cpuid/cpx.txt
new file mode 100644
index 0000000..367210a
--- /dev/null
+++ b/sample/cpuid/cpx.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt
diff --git a/sample/cpuid/glm.txt b/sample/cpuid/glm.txt
new file mode 100644
index 0000000..4c7733e
--- /dev/null
+++ b/sample/cpuid/glm.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
diff --git a/sample/cpuid/glp.txt b/sample/cpuid/glp.txt
new file mode 100644
index 0000000..4c7733e
--- /dev/null
+++ b/sample/cpuid/glp.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
diff --git a/sample/cpuid/hsw.txt b/sample/cpuid/hsw.txt
new file mode 100644
index 0000000..9652a96
--- /dev/null
+++ b/sample/cpuid/hsw.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand f16c movbe
diff --git a/sample/cpuid/icl.txt b/sample/cpuid/icl.txt
new file mode 100644
index 0000000..eaa9029
--- /dev/null
+++ b/sample/cpuid/icl.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
diff --git a/sample/cpuid/icx.txt b/sample/cpuid/icx.txt
new file mode 100644
index 0000000..eaa9029
--- /dev/null
+++ b/sample/cpuid/icx.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
diff --git a/sample/cpuid/ivb.txt b/sample/cpuid/ivb.txt
new file mode 100644
index 0000000..a501281
--- /dev/null
+++ b/sample/cpuid/ivb.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx enh_rep rdrand f16c
diff --git a/sample/cpuid/knl.txt b/sample/cpuid/knl.txt
new file mode 100644
index 0000000..8c8d8f5
--- /dev/null
+++ b/sample/cpuid/knl.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd
diff --git a/sample/cpuid/knm.txt b/sample/cpuid/knm.txt
new file mode 100644
index 0000000..f787fb0
--- /dev/null
+++ b/sample/cpuid/knm.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd avx512_4vnniw avx512_4fmaps avx512_vpopcntdq
diff --git a/sample/cpuid/mrm.txt b/sample/cpuid/mrm.txt
new file mode 100644
index 0000000..4d69ce3
--- /dev/null
+++ b/sample/cpuid/mrm.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3
diff --git a/sample/cpuid/nhm.txt b/sample/cpuid/nhm.txt
new file mode 100644
index 0000000..63dc555
--- /dev/null
+++ b/sample/cpuid/nhm.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt rdtscp
diff --git a/sample/cpuid/p4p.txt b/sample/cpuid/p4p.txt
new file mode 100644
index 0000000..8e6e5cb
--- /dev/null
+++ b/sample/cpuid/p4p.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3
diff --git a/sample/cpuid/pnr.txt b/sample/cpuid/pnr.txt
new file mode 100644
index 0000000..59de9b5
--- /dev/null
+++ b/sample/cpuid/pnr.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41
diff --git a/sample/cpuid/skl.txt b/sample/cpuid/skl.txt
new file mode 100644
index 0000000..2d80258
--- /dev/null
+++ b/sample/cpuid/skl.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe clflushopt
diff --git a/sample/cpuid/skx.txt b/sample/cpuid/skx.txt
new file mode 100644
index 0000000..c9e17dd
--- /dev/null
+++ b/sample/cpuid/skx.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt
diff --git a/sample/cpuid/slm.txt b/sample/cpuid/slm.txt
new file mode 100644
index 0000000..1a79965
--- /dev/null
+++ b/sample/cpuid/slm.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp pclmulqdq prefetchw enh_rep rdrand movbe
diff --git a/sample/cpuid/slt.txt b/sample/cpuid/slt.txt
new file mode 100644
index 0000000..3a854c9
--- /dev/null
+++ b/sample/cpuid/slt.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 movbe
diff --git a/sample/cpuid/snb.txt b/sample/cpuid/snb.txt
new file mode 100644
index 0000000..ebc8e91
--- /dev/null
+++ b/sample/cpuid/snb.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx
diff --git a/sample/cpuid/spr.txt b/sample/cpuid/spr.txt
new file mode 100644
index 0000000..acf875c
--- /dev/null
+++ b/sample/cpuid/spr.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 avx512_vp2intersect amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b
diff --git a/sample/cpuid/tgl.txt b/sample/cpuid/tgl.txt
new file mode 100644
index 0000000..8ac740a
--- /dev/null
+++ b/sample/cpuid/tgl.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt movdiri movdir64b
diff --git a/sample/cpuid/tmp.txt b/sample/cpuid/tmp.txt
new file mode 100644
index 0000000..acf875c
--- /dev/null
+++ b/sample/cpuid/tmp.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 avx512_vp2intersect amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b
diff --git a/sample/cpuid/tnt.txt b/sample/cpuid/tnt.txt
new file mode 100644
index 0000000..4c7733e
--- /dev/null
+++ b/sample/cpuid/tnt.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
diff --git a/sample/cpuid/update-txt.sh b/sample/cpuid/update-txt.sh
new file mode 100755
index 0000000..fcdca42
--- /dev/null
+++ b/sample/cpuid/update-txt.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+make -C ../ test_util64
+
+cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
+for cpu in ${cpus[@]} ; do
+  echo $cpu
+  ~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt
+done
+
diff --git a/sample/cpuid/wsm.txt b/sample/cpuid/wsm.txt
new file mode 100644
index 0000000..6a5e33e
--- /dev/null
+++ b/sample/cpuid/wsm.txt
@@ -0,0 +1,2 @@
+vendor intel
+ mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp pclmulqdq
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 8fa37c0..fd89a46 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -144,7 +144,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x6601 /* 0xABCD = A.BC(.D) */
+	VERSION = 0x6602 /* 0xABCD = A.BC(.D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 8fa6f62..4e19bd3 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "6.60.1"; }
+const char *getVersionString() const { return "6.60.2"; }
 void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index 4b94d37..db8ac00 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -506,9 +506,6 @@
 						if (EBX & (1U << 31)) type_ |= tAVX512VL;
 						if (ECX & (1U << 1)) type_ |= tAVX512_VBMI;
 						if (ECX & (1U << 6)) type_ |= tAVX512_VBMI2;
-						if (ECX & (1U << 8)) type_ |= tGFNI;
-						if (ECX & (1U << 9)) type_ |= tVAES;
-						if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
 						if (ECX & (1U << 11)) type_ |= tAVX512_VNNI;
 						if (ECX & (1U << 12)) type_ |= tAVX512_BITALG;
 						if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
@@ -537,6 +534,9 @@
 			if (EBX & (1U << 29)) type_ |= tSHA;
 			if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
 			if (ECX & (1U << 5)) type_ |= tWAITPKG;
+			if (ECX & (1U << 8)) type_ |= tGFNI;
+			if (ECX & (1U << 9)) type_ |= tVAES;
+			if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
 			if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
 			if (ECX & (1U << 27)) type_ |= tMOVDIRI;
 			if (ECX & (1U << 28)) type_ |= tMOVDIR64B;