Merge branch 'dev'
diff --git a/CMakeLists.txt b/CMakeLists.txt index b8d0616..e2a9710 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.10) -project(xbyak LANGUAGES CXX VERSION 7.37.3) +project(xbyak LANGUAGES CXX VERSION 7.37.4) file(GLOB headers xbyak/*.h)
diff --git a/doc/changelog.md b/doc/changelog.md index 3606734..11ec5ce 100644 --- a/doc/changelog.md +++ b/doc/changelog.md
@@ -1,5 +1,6 @@ # History +* 2026/Jun/19 ver 7.37.4 strict check of TMUL information * 2026/May/23 ver 7.37.3 fix meson.build to skip pkgconfig/cmake generation when used as a subproject * 2026/May/20 ver 7.37.2 fix APX encoding for tpause/umonitor/umwait * 2026/May/14 ver 7.37.1 fix false positive in memory operand size check
diff --git a/meson.build b/meson.build index a34ad6e..7191a06 100644 --- a/meson.build +++ b/meson.build
@@ -5,7 +5,7 @@ project( 'xbyak', 'cpp', - version: '7.37.3', + version: '7.37.4', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' )
diff --git a/readme.md b/readme.md index a58630d..bb57223 100644 --- a/readme.md +++ b/readme.md
@@ -1,5 +1,5 @@ -# Xbyak 7.37.3 [![Badge Build]][Build Status] +# Xbyak 7.37.4 [![Badge Build]][Build Status] *A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*
diff --git a/readme.txt b/readme.txt index 98eaf46..a255c45 100644 --- a/readme.txt +++ b/readme.txt
@@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.37.3 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.37.4 ----------------------------------------------------------------------------- ◎概要 @@ -404,6 +404,7 @@ ----------------------------------------------------------------------------- ◎履歴 +2026/06/19 ver 7.37.4 TMUL information cpuidの厳密チェック 2026/05/23 ver 7.37.3 mesond.buildがサブプロジェクトとして使用される場合pkgconfig/cmakeの生成をスキップするように修正 2026/05/20 ver 7.37.2 tpause/umonitor/umwaitのAPX encoding対応修正 2026/05/14 ver 7.37.1 正しいメモリオペランドサイズのチェックエラーを修正
diff --git a/sample/cpuid/cpuid.sh b/sample/cpuid/cpuid.sh index 6eb6cbc..8d644d8 100755 --- a/sample/cpuid/cpuid.sh +++ b/sample/cpuid/cpuid.sh
@@ -1,5 +1,7 @@ #!/bin/bash + "${SDE:=sde}" + UPDATE=0 if [ $# -eq 1 ]; then UPDATE=1 @@ -16,9 +18,9 @@ for cpu in ${cpus[@]} ; do echo $cpu if [ $UPDATE == 1 ]; then - ~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt + ${SDE} -$cpu -- ../test_util64 -cpuid > $cpu.txt else - ~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt + ${SDE} -$cpu -- ../test_util64 -cpuid > tmp.txt diff $cpu.txt tmp.txt fi done
diff --git a/sample/cpuid/cpx.txt b/sample/cpuid/cpx.txt index 707159d..c315b4f 100644 --- a/sample/cpuid/cpx.txt +++ b/sample/cpuid/cpx.txt
@@ -1,2 +1,2 @@ vendor intel - mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt clwb amx_fp8 amx_transpose amx_tf32 amx_avx512 + mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt clwb
diff --git a/sample/cpuid/dmr.txt b/sample/cpuid/dmr.txt index 2afa498..ed124cc 100644 --- a/sample/cpuid/dmr.txt +++ b/sample/cpuid/dmr.txt
@@ -1,2 +1,2 @@ vendor intel - mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize amx_fp16 avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd prefetchiti sha512 sm3 sm4 avx_vnni_int16 apx_f avx10 amx_fp8 amx_transpose amx_tf32 amx_avx512 amx_movrs movrs + mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize amx_fp16 avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd prefetchiti sha512 sm3 sm4 avx_vnni_int16 apx_f avx10 amx_fp8 amx_tf32 amx_avx512 amx_movrs movrs amx_complex
diff --git a/sample/cpuid/gnr.txt b/sample/cpuid/gnr.txt index 3dcda77..8fa0776 100644 --- a/sample/cpuid/gnr.txt +++ b/sample/cpuid/gnr.txt
@@ -1,2 +1,2 @@ vendor intel - mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize amx_fp16 prefetchiti avx10 + mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize amx_fp16 prefetchiti avx10 amx_complex
diff --git a/sample/cpuid/nvl.txt b/sample/cpuid/nvl.txt new file mode 100644 index 0000000..62a767e --- /dev/null +++ b/sample/cpuid/nvl.txt
@@ -0,0 +1,2 @@ +vendor intel + mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 avx_vnni avx512_fp16 waitpkg clflushopt clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16 apx_f avx10 aeskle wide_kl keylocker keylocker_wide movrs hybrid
diff --git a/sample/test_util.cpp b/sample/test_util.cpp index bfa6870..33cc595 100644 --- a/sample/test_util.cpp +++ b/sample/test_util.cpp
@@ -108,7 +108,7 @@ { Cpu::tKEYLOCKER_WIDE, "keylocker_wide" }, { Cpu::tTSXLDTRK, "tsxldtrk" }, { Cpu::tAMX_FP8, "amx_fp8" }, - { Cpu::tAMX_TRANSPOSE, "amx_transpose" }, +// { Cpu::tAMX_TRANSPOSE, "amx_transpose" }, { Cpu::tAMX_TF32, "amx_tf32" }, { Cpu::tAMX_AVX512, "amx_avx512" }, { Cpu::tAMX_MOVRS, "amx_movrs" },
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 9ebec62..caff1af 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h
@@ -177,7 +177,7 @@ enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x7373 /* 0xABCD = A.BC(.D) */ + VERSION = 0x7374 /* 0xABCD = A.BC(.D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index bdcecbc..7f679a3 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@ -const char *getVersionString() const { return "7.37.3"; } +const char *getVersionString() const { return "7.37.4"; } void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); } void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); } void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index a75d3f9..191ab4f 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h
@@ -582,7 +582,7 @@ XBYAK_DEFINE_TYPE(88, tSSE4a); XBYAK_DEFINE_TYPE(89, tCLWB); XBYAK_DEFINE_TYPE(90, tTSXLDTRK); - XBYAK_DEFINE_TYPE(91, tAMX_TRANSPOSE); +// XBYAK_DEFINE_TYPE(91, tAMX_TRANSPOSE); XBYAK_DEFINE_TYPE(92, tAMX_TF32); XBYAK_DEFINE_TYPE(93, tAMX_AVX512); XBYAK_DEFINE_TYPE(94, tAMX_MOVRS); @@ -748,13 +748,18 @@ if (edx & (1U << 14)) type_ |= tPREFETCHITI; if (edx & (1U << 19)) type_ |= tAVX10; if (edx & (1U << 21)) type_ |= tAPX_F; - - getCpuidEx(0x1e, 1, data); - if (eax & (1U << 4)) type_ |= tAMX_FP8; - if (eax & (1U << 5)) type_ |= tAMX_TRANSPOSE; - if (eax & (1U << 6)) type_ |= tAMX_TF32; - if (eax & (1U << 7)) type_ |= tAMX_AVX512; - if (eax & (1U << 8)) type_ |= tAMX_MOVRS; + } + if (maxNum >= 0x1e) { + getCpuidEx(0x1e, 0, data); + if (eax /* maxNumSubLeaves */ >= 1) { // 0 on SPR/EMR + getCpuidEx(0x1e, 1, data); + // eax bits 0-3 (AMX-INT8/BF16/COMPLEX/FP16) mirror the leaf 7 bits, so use leaf 7 + if (eax & (1U << 4)) type_ |= tAMX_FP8; +// if (eax & (1U << 5)) type_ |= tAMX_TRANSPOSE; // removed at 319433-059 + if (eax & (1U << 6)) type_ |= tAMX_TF32; + if (eax & (1U << 7)) type_ |= tAMX_AVX512; + if (eax & (1U << 8)) type_ |= tAMX_MOVRS; + } } } if (maxNum >= 0x19) {