remove T_TMM
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 4fb1cce..b7966bc 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -732,27 +732,26 @@
void putAMX_TILE()
{
- puts("void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0 | T_TMM, 0x49); }");
- puts("void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0 | T_TMM, 0x49); }");
- puts("void tileloadd(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_F2 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
- puts("void tileloaddt1(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_66 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
+ puts("void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0, 0x49); }");
+ puts("void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }");
+ puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_F2 | T_0F38 | T_W0, 0x4b); }");
+ puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_66 | T_0F38 | T_W0, 0x4b); }");
puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
- puts("void tilestored(const Operand& op, const Tmm& tm) { opAMX(tm, tmm0, op, T_F3 | T_0F38 | T_W0 | T_TMM, 0x4b); }");
- puts("void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0 | T_TMM, 0x49); }");
+ puts("void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }");
+ puts("void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
}
void putAMX_INT8()
{
- puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
- puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
- puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0 | T_TMM, 0x5e); }");
- puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0 | T_TMM, 0x5e); }");
+ puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }");
+ puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
+ puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
+ puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0, 0x5e); }");
}
void putAMX_BF16()
{
- puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5c); }");
+ puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }");
}
-
int main(int argc, char *[])
{
bool only64bit = argc == 2;
@@ -762,8 +761,8 @@
putAMX_TILE();
putAMX_INT8();
putAMX_BF16();
- return 0;
}
+ if (only64bit) return 0;
putVcmp();
putX_XM();
putM_X();
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index d57a996..6acd31f 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1595,7 +1595,6 @@
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
T_MEM_EVEX = 1 << 30, // use evex if mem
- T_TMM = 1 << 31,
T_XXX
};
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
@@ -2263,18 +2262,10 @@
}
throw Error(ERR_BAD_COMBINATION);
}
- void opAMX(const Tmm& t1, const Operand& op1, const Operand& op2, int type, int code0, int imm8 = NONE)
+ void opAMX(const Tmm& t1, const Tmm& t2, const Operand& op, int type, int code0, int imm8 = NONE)
{
- const Reg *t2 = static_cast<const Reg*>(&op1);
- const Operand *op = &op2;
- if (op2.isNone()) { // <i>(t1, op1) -> <i>(t1, t1, op1)
- t2 = &t1;
- op = &op1;
- }
- // <i>(t1, t2, op)
- if (!((type & T_TMM) && (t1.isTMM() && t2->isTMM()))) throw Error(ERR_BAD_COMBINATION);
-
- opVex(t1, t2, *op, type, code0, imm8);
+ if (!t1.isTMM() || !t2.isTMM()) throw Error(ERR_BAD_COMBINATION);
+ opVex(t1, &t2, op, type, code0, imm8);
}
public:
unsigned int getVersion() const { return VERSION; }
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index b2beaac..17f0909 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -2033,18 +2033,18 @@
#ifdef XBYAK64
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
-void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0 | T_TMM, 0x49); }
-void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0 | T_TMM, 0x49); }
-void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5c); }
-void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0 | T_TMM, 0x5e); }
-void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0 | T_TMM, 0x5e); }
-void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0 | T_TMM, 0x5e); }
-void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0 | T_TMM, 0x5e); }
-void tileloadd(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_F2 | T_0F38 | T_W0 | T_TMM, 0x4b); }
-void tileloaddt1(const Tmm& tm, const Operand& op) { opAMX(tm, tmm0, op, T_66 | T_0F38 | T_W0 | T_TMM, 0x4b); }
+void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0, 0x49); }
+void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
+void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
+void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }
+void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
+void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
+void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0, 0x5e); }
+void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
+void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_66 | T_0F38 | T_W0, 0x4b); }
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
-void tilestored(const Operand& op, const Tmm& tm) { opAMX(tm, tmm0, op, T_F3 | T_0F38 | T_W0 | T_TMM, 0x4b); }
-void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0 | T_TMM, 0x49); }
+void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }
+void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
#endif
#endif