add generation of Intel(R) AMX ISA mnemonics
diff --git a/gen/Makefile b/gen/Makefile
index 53d1a94..f9068e4 100644
--- a/gen/Makefile
+++ b/gen/Makefile
@@ -1,5 +1,5 @@
 TARGET=../xbyak/xbyak_mnemonic.h
-BIN=sortline gen_code gen_avx512
+BIN=sortline gen_code gen_avx512 gen_amx
 CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers
 all: $(TARGET)
 sortline: sortline.cpp
@@ -8,6 +8,8 @@
 	$(CXX) $(CFLAGS) $< -o $@
 gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
 	$(CXX) $(CFLAGS) $< -o $@
+gen_amx: gen_amx.cpp ../xbyak/xbyak.h avx_type.hpp
+	$(CXX) $(CFLAGS) $< -o $@
 
 $(TARGET): $(BIN)
 	./gen_code | ./sortline > $@
@@ -19,6 +21,7 @@
 	./gen_avx512 | ./sortline >> $@
 	echo "#ifdef XBYAK64" >> $@
 	./gen_avx512 64 | ./sortline >> $@
+	./gen_amx | ./sortline >> $@
 	echo "#endif" >> $@
 	echo "#endif" >> $@
 
diff --git a/gen/gen_amx.cpp b/gen/gen_amx.cpp
new file mode 100644
index 0000000..ed31463
--- /dev/null
+++ b/gen/gen_amx.cpp
@@ -0,0 +1,41 @@
+#define XBYAK_DONT_READ_LIST
+#include <stdio.h>
+#include <string.h>
+#include "../xbyak/xbyak.h"
+
+using namespace Xbyak;
+#ifdef _MSC_VER
+	#pragma warning(disable : 4996) // scanf
+	#define snprintf _snprintf_s
+#endif
+
+#include "avx_type.hpp"
+
+void putAMX_TILE()
+{
+    puts("void ldtilecfg(const Address& addr) { opAMX(tm0, tm0, addr, T_0F38 | T_W0 | T_TMM, 0x49); }");
+    puts("void sttilecfg(const Address& addr) { opAMX(tm0, tm0, addr, T_66 | T_0F38 | T_W0 | T_TMM, 0x49); }");
+    puts("void tileloadd(const Tmm& tm, const Operand& op) { opAMX(tm, tm0, op, T_F2 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
+    puts("void tileloaddt1(const Tmm& tm, const Operand& op) { opAMX(tm, tm0, op, T_66 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
+    puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
+    puts("void tilestored(const Operand& op, const Tmm& tm) { opAMX(tm, tm0, op, T_F3 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
+    puts("void tilezero(const Tmm& Tmm) { opAMX(Tmm, tm0, tm0, T_F2 | T_0F38 | T_W0 | T_TMM, 0x49); }");
+}
+void putAMX_INT8()
+{
+    puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
+    puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
+    puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
+    puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2,        T_0F38 | T_W0 | T_TMM, 0x5e); }");
+}
+void putAMX_BF16()
+{
+    puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5c); }");
+}
+
+int main()
+{
+	putAMX_TILE();
+	putAMX_INT8();
+	putAMX_BF16();
+}
diff --git a/gen/update.bat b/gen/update.bat
index 161ed87..f821d85 100644
--- a/gen/update.bat
+++ b/gen/update.bat
@@ -13,5 +13,7 @@
 gen_avx512 | %SORT% >> %TARGET%
 echo #ifdef XBYAK64>> %TARGET%
 gen_avx512 64 | %SORT% >> %TARGET%
+cl gen_amx.cpp %OPT%
+gen_amx | %SORT% >> %TARGET%
 echo #endif>> %TARGET%
 echo #endif>> %TARGET%