add tcvtrowd2ps
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 741473c..a63ca36 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -1130,6 +1130,8 @@
uint8_t code;
bool imm;
} tbl[] = {
+ { "tcvtrowd2ps", T_F3|T_0F38|T_MUST_EVEX|T_W0, 0x4A, false },
+ { "tcvtrowd2ps", T_F3|T_0F3A|T_MUST_EVEX|T_W0, 0x07, true },
{ "tcvtrowps2bf16h", T_F2|T_0F38|T_MUST_EVEX|T_W0, 0x6D, false },
{ "tcvtrowps2bf16h", T_F2|T_0F3A|T_MUST_EVEX|T_W0, 0x07, true },
{ "tcvtrowps2bf16l", T_F3|T_0F38|T_MUST_EVEX|T_W0, 0x6D, false },
diff --git a/test/dataset/amx.txt b/test/dataset/amx.txt
index 2946756..bfe57fc 100644
--- a/test/dataset/amx.txt
+++ b/test/dataset/amx.txt
@@ -90,3 +90,5 @@
ttransposed(tmm1, tmm2);
+tcvtrowd2ps(zmm20, tmm1, r30d);
+tcvtrowd2ps(zmm20, tmm1, 0x12);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index c677997..7f4ebfd 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -2685,6 +2685,8 @@
void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_W0|T_SAE_X|T_MUST_EVEX, 0x2E); }
#ifdef XBYAK64
void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
+void tcvtrowd2ps(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F3|T_0F38|T_W0|T_MUST_EVEX, 0x4A); }
+void tcvtrowd2ps(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F3|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); }
void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F2|T_0F38|T_W0|T_MUST_EVEX, 0x6D); }
void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F2|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); }
void tcvtrowps2bf16l(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F3|T_0F38|T_W0|T_MUST_EVEX, 0x6D); }