gen_amx.cpp is merged into gen_avx512.cpp
diff --git a/gen/Makefile b/gen/Makefile
index f9068e4..53d1a94 100644
--- a/gen/Makefile
+++ b/gen/Makefile
@@ -1,5 +1,5 @@
TARGET=../xbyak/xbyak_mnemonic.h
-BIN=sortline gen_code gen_avx512 gen_amx
+BIN=sortline gen_code gen_avx512
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers
all: $(TARGET)
sortline: sortline.cpp
@@ -8,8 +8,6 @@
$(CXX) $(CFLAGS) $< -o $@
gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
-gen_amx: gen_amx.cpp ../xbyak/xbyak.h avx_type.hpp
- $(CXX) $(CFLAGS) $< -o $@
$(TARGET): $(BIN)
./gen_code | ./sortline > $@
@@ -21,7 +19,6 @@
./gen_avx512 | ./sortline >> $@
echo "#ifdef XBYAK64" >> $@
./gen_avx512 64 | ./sortline >> $@
- ./gen_amx | ./sortline >> $@
echo "#endif" >> $@
echo "#endif" >> $@
diff --git a/gen/gen_amx.cpp b/gen/gen_amx.cpp
deleted file mode 100644
index 7145e83..0000000
--- a/gen/gen_amx.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#define XBYAK_DONT_READ_LIST
-#include <stdio.h>
-#include <string.h>
-#include "../xbyak/xbyak.h"
-
-using namespace Xbyak;
-#ifdef _MSC_VER
- #pragma warning(disable : 4996) // scanf
- #define snprintf _snprintf_s
-#endif
-
-#include "avx_type.hpp"
-
-void putAMX_TILE()
-{
- puts("void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0 | T_TMM, 0x49); }");
- puts("void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0 | T_TMM, 0x49); }");
- puts("void tileloadd(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_F2 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
- puts("void tileloaddt1(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_66 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
- puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
- puts("void tilestored(const Operand& op, const Tmm& tm) { opAMX(tm, tmm0, op, T_F3 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
- puts("void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0 | T_TMM, 0x49); }");
-}
-void putAMX_INT8()
-{
- puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
- puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
- puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
- puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0 | T_TMM, 0x5e); }");
-}
-void putAMX_BF16()
-{
- puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5c); }");
-}
-
-int main()
-{
- putAMX_TILE();
- putAMX_INT8();
- putAMX_BF16();
-}
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index a4f67d5..4fb1cce 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -730,12 +730,40 @@
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
}
+void putAMX_TILE()
+{
+ puts("void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0 | T_TMM, 0x49); }");
+ puts("void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0 | T_TMM, 0x49); }");
+ puts("void tileloadd(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_F2 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
+ puts("void tileloaddt1(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_66 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
+ puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
+ puts("void tilestored(const Operand& op, const Tmm& tm) { opAMX(tm, tmm0, op, T_F3 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
+ puts("void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0 | T_TMM, 0x49); }");
+}
+void putAMX_INT8()
+{
+ puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
+ puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
+ puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
+ puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0 | T_TMM, 0x5e); }");
+}
+void putAMX_BF16()
+{
+ puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5c); }");
+}
+
+
int main(int argc, char *[])
{
bool only64bit = argc == 2;
putOpmask(only64bit);
putBroadcast(only64bit);
- if (only64bit) return 0;
+ if (only64bit) {
+ putAMX_TILE();
+ putAMX_INT8();
+ putAMX_BF16();
+ return 0;
+ }
putVcmp();
putX_XM();
putM_X();
diff --git a/gen/update.bat b/gen/update.bat
index f821d85..161ed87 100644
--- a/gen/update.bat
+++ b/gen/update.bat
@@ -13,7 +13,5 @@
gen_avx512 | %SORT% >> %TARGET%
echo #ifdef XBYAK64>> %TARGET%
gen_avx512 64 | %SORT% >> %TARGET%
-cl gen_amx.cpp %OPT%
-gen_amx | %SORT% >> %TARGET%
echo #endif>> %TARGET%
echo #endif>> %TARGET%
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 8dd733a..b2beaac 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -2033,18 +2033,18 @@
#ifdef XBYAK64
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
-void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0 | T_TMM, 0x49); }
void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0 | T_TMM, 0x49); }
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5c); }
void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0 | T_TMM, 0x5e); }
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5e); }
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0 | T_TMM, 0x5e); }
-void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0 | T_TMM, 0x5e); }
+void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0 | T_TMM, 0x5e); }
void tileloadd(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_F2 | T_0F38 | T_W0 | T_TMM, 0x4b); }
void tileloaddt1(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_66 | T_0F38 | T_W0 | T_TMM, 0x4b); }
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
void tilestored(const Operand& op, const Tmm& tm) { opAMX(tm, tmm0, op, T_F3 | T_0F38 | T_W0 | T_TMM, 0x4b); }
void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0 | T_TMM, 0x49); }
+void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
#endif
#endif