add ttcmm, ttdp, ttmmult, ttransposed
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 3e93c3e..dfa9bb1 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -2051,6 +2051,7 @@
 	puts("void tilezero(const Tmm& t) { opVex(t, &tmm0, tmm0, T_F2|T_0F38|T_W0, 0x49); }");
 
 	puts("void tconjtfp16(const Tmm& t1, const Tmm& t2) { opVex(t1, 0, t2, T_66|T_0F38|T_W0, 0x6B); }");
+	puts("void ttransposed(const Tmm& t1, const Tmm& t2) { opVex(t1, 0, t2, T_F3|T_0F38|T_W0, 0x5F); }");
 }
 
 void putAMX_TM()
@@ -2101,7 +2102,11 @@
 		{ "tcmmimfp16ps", T_66 | T_0F38 | T_W0, 0x6C },
 		{ "tcmmrlfp16ps", T_0F38 | T_W0, 0x6C },
 		{ "tconjtcmmimfp16ps", T_0F38 | T_W0, 0x6B },
-
+		{ "ttcmmimfp16ps", T_F2 | T_0F38 | T_W0, 0x6B },
+		{ "ttcmmrlfp16ps", T_F3 | T_0F38 | T_W0, 0x6B },
+		{ "ttdpbf16ps", T_F3 | T_0F38 | T_W0, 0x6C },
+		{ "ttdpfp16ps", T_F2 | T_0F38 | T_W0, 0x6C },
+		{ "ttmmultf32ps", T_0F38 | T_W0, 0x48 },
 	};
 	for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 		const Tbl& t = tbl[i];
diff --git a/test/avx10/amx.txt b/test/avx10/amx.txt
index 9f73573..bc50a7f 100644
--- a/test/avx10/amx.txt
+++ b/test/avx10/amx.txt
@@ -77,3 +77,14 @@
 
 tilemovrow(zmm1, tmm2, r30d);
 tilemovrow(zmm29, tmm2, 0x12);
+
+ttcmmimfp16ps(tmm1, tmm2, tmm3);
+ttcmmrlfp16ps(tmm1, tmm2, tmm3);
+
+ttdpbf16ps(tmm1, tmm2, tmm3);
+ttdpfp16ps(tmm1, tmm2, tmm3);
+
+ttmmultf32ps(tmm1, tmm2, tmm3);
+
+ttransposed(tmm1, tmm2);
+
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index b4a4b92..56f3c11 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1937,6 +1937,11 @@
 void tcmmimfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66|T_0F38|T_W0, 0x6C); }
 void tcmmrlfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38|T_W0, 0x6C); }
 void tconjtcmmimfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38|T_W0, 0x6B); }
+void ttcmmimfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_0F38|T_W0, 0x6B); }
+void ttcmmrlfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_0F38|T_W0, 0x6B); }
+void ttdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_0F38|T_W0, 0x6C); }
+void ttdpfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_0F38|T_W0, 0x6C); }
+void ttmmultf32ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38|T_W0, 0x48); }
 void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }
 void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4B); }
 void tileloaddrs(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4A); }
@@ -1955,6 +1960,7 @@
 void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
 void tilezero(const Tmm& t) { opVex(t, &tmm0, tmm0, T_F2|T_0F38|T_W0, 0x49); }
 void tconjtfp16(const Tmm& t1, const Tmm& t2) { opVex(t1, 0, t2, T_66|T_0F38|T_W0, 0x6B); }
+void ttransposed(const Tmm& t1, const Tmm& t2) { opVex(t1, 0, t2, T_F3|T_0F38|T_W0, 0x5F); }
 #else
 void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
 void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }