refactoring tile and add tests
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 73c4650..6a56614 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -2043,10 +2043,11 @@
void putAMX_TILE()
{
- puts("void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }");
- puts("void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }");
+ puts("void ldtilecfg(const Address& addr) { opAMX(tmm0, addr, T_0F38|T_W0, 0x49); }");
+ puts("void sttilecfg(const Address& addr) { opAMX(tmm0, addr, T_66|T_0F38|T_W0, 0x49); }");
+ puts("void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, addr, T_F3|T_0F38|T_W0, 0x4B); }");
+
puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
- puts("void tilestored(const Address& addr, const Tmm& tm) { if (opROO(Reg(), addr, tm, T_APX|T_F3|T_0F38|T_W0, 0x4B)) return; opVex(tm, &tmm0, addr, T_F3|T_0F38|T_W0, 0x4B); }");
puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
}
@@ -2060,7 +2061,8 @@
{ "tileloadd", T_F2 | T_0F38 | T_W0, 0x4B },
{ "tileloaddt1", T_66 | T_0F38 | T_W0, 0x4B },
{ "tileloaddrs", T_F2 | T_0F38 | T_W0, 0x4A },
- { "tileloaddrst1", T_66 | T_0F38 | T_W0, 0x4A }
+ { "tileloaddrst1", T_66 | T_0F38 | T_W0, 0x4A },
+// { "t2rpntlvwz0", T_0F38 | T_W0, 0x6E },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& t = tbl[i];
diff --git a/test/avx10/amx.txt b/test/avx10/amx.txt
index 74cf06d..46dea5f 100644
--- a/test/avx10/amx.txt
+++ b/test/avx10/amx.txt
@@ -1,9 +1,14 @@
ldtilecfg(ptr[rax + rcx * 4 + 64]);
+ldtilecfg(ptr [r30+r29*4+0x12]);
sttilecfg(ptr[rsp + rax * 8 + 128]);
+sttilecfg(ptr [r30+r29*4+0x12]);
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
+tileloadd(tmm2, ptr [r30+r29*4+0x12]);
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
+tileloaddt1(tmm7, ptr [r30+r29*4+0x12]);
tilerelease();
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
+tilestored(ptr [r30+r29*4+0x12], tmm1);
tilezero(tmm7);
tdpbssd(tmm1, tmm2, tmm3);
tdpbsud(tmm2, tmm3, tmm4);
@@ -14,8 +19,11 @@
tileloadd(tmm1, ptr[r8+r8]);
tileloadd(tmm1, ptr[rax+rcx*4]);
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
+tileloadd(tmm1, ptr[r30+r29*1+0x80]);
tileloaddrs(tmm3, ptr[rdi + rdx * 2 + 8]);
+tileloaddrs(tmm7, ptr[r31 + rdx * 2 + 8]);
tileloaddrst1(tmm4, ptr[r8 + r9 + 32]);
+tileloaddrst1(tmm4, ptr[r25 + r9 + 32]);
tdpbf8ps(tmm1, tmm2, tmm3);
tdpbhf8ps(tmm1, tmm2, tmm3);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 38f2468..1317544 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1923,10 +1923,10 @@
void rdgsbase(const Reg32e& r) { opRR(ecx, r, T_F3|T_0F|T_ALLOW_DIFF_SIZE, 0xAE); }
void wrfsbase(const Reg32e& r) { opRR(edx, r, T_F3|T_0F|T_ALLOW_DIFF_SIZE, 0xAE); }
void wrgsbase(const Reg32e& r) { opRR(ebx, r, T_F3|T_0F|T_ALLOW_DIFF_SIZE, 0xAE); }
-void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
-void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }
+void ldtilecfg(const Address& addr) { opAMX(tmm0, addr, T_0F38|T_W0, 0x49); }
+void sttilecfg(const Address& addr) { opAMX(tmm0, addr, T_66|T_0F38|T_W0, 0x49); }
+void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, addr, T_F3|T_0F38|T_W0, 0x4B); }
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
-void tilestored(const Address& addr, const Tmm& tm) { if (opROO(Reg(), addr, tm, T_APX|T_F3|T_0F38|T_W0, 0x4B)) return; opVex(tm, &tmm0, addr, T_F3|T_0F38|T_W0, 0x4B); }
void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_0F38|T_W0, 0x5E); }
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_0F38|T_W0, 0x5E); }