support sae
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 0c0a9b3..71c257a 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2401,19 +2401,21 @@
put("kmovq", REG64, K);
#endif
}
- void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false)
+ void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false, int sae = 0)
{
std::string modifier;
char pk[16] = "";
const char *pz = "";
+ const char *saeTblXbyak[] = { "", "|T_rn_sae", "|T_rd_sae", "|T_ru_sae", "|T_rz_sae" };
+ const char *saeTblNASM[] = { "", ",{rn-sae}", ",{rd-sae}", ",{ru-sae}", ",{rz-sae}" };
if (isXbyak_) {
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx);
if (z) pz = "|T_z";
- printf("vaddpd(%s%s%s, %s, %s); dump();\n", r1, pk, pz, r2, r3);
+ printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
} else {
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
if (z) pz = "{z}";
- printf("vaddpd %s%s%s, %s, %s\n", r1, pk, pz, r2, r3);
+ printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
}
}
void putCombi()
@@ -2445,19 +2447,24 @@
const size_t N = NUM_OF_ARRAY(zTbl);
for (size_t i = 0; i < N; i++) {
for (size_t j = 0; j < N; j++) {
+ separateFunc();
for (size_t k = 0; k < N; k++) {
#ifdef XBYAK64
for (int kIdx = 0; kIdx < 8; kIdx++) {
for (int z = 0; z < 2; z++) {
put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1);
put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1);
- put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1);
+ for (int sae = 0; sae < 5; sae++) {
+ put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae);
+ }
}
}
#else
put_vaddpd(xTbl[i], xTbl[j], xTbl[k]);
put_vaddpd(yTbl[i], yTbl[j], yTbl[k]);
- put_vaddpd(zTbl[i], zTbl[j], zTbl[k]);
+ for (int sae = 0; sae < 5; sae++) {
+ put_vaddpd(zTbl[i], zTbl[j], zTbl[k], sae);
+ }
#endif
}
}
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 967488c..f83ab4f 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -516,11 +516,23 @@
int rounding;
};
-static const EvexModifierRounding T_sae(1);
-static const EvexModifierRounding T_rn_sae(2);
-static const EvexModifierRounding T_rd_sae(3);
-static const EvexModifierRounding T_ru_sae(4);
-static const EvexModifierRounding T_rz_sae(5);
+namespace inner {
+
+enum SAEtype {
+ T_SAE,
+ T_RN_SAE = 1,
+ T_RD_SAE = 2,
+ T_RU_SAE = 3,
+ T_RZ_SAE = 4,
+};
+
+} // inner
+
+static const EvexModifierRounding T_sae(inner::T_SAE);
+static const EvexModifierRounding T_rn_sae(inner::T_RN_SAE);
+static const EvexModifierRounding T_rd_sae(inner::T_RD_SAE);
+static const EvexModifierRounding T_ru_sae(inner::T_RU_SAE);
+static const EvexModifierRounding T_rz_sae(inner::T_RZ_SAE);
static const struct EvexModifierZero{} T_z; // {z}
struct Xmm : public Mmx {
@@ -1361,8 +1373,17 @@
bool X = x ? false : !base.isExtIdx2();
bool B = !base.isExtIdx();
bool Rp = !reg.isExtIdx2();
- int LL = reg.isZMM() ? 2 : reg.isYMM() ? 1 : 0;
bool b = false;
+ int LL = 2;
+ if (reg.isZMM()) {
+ int rounding = base.getRounding();
+ if (rounding) {
+ LL = rounding - 1;
+ b = true;
+ }
+ } else {
+ LL = reg.isYMM() ? 1 : 0;
+ }
bool Vp = !(v ? v->isExtIdx2() : 0);
bool z = reg.hasZero();
int aaa = reg.getOpmaskIdx();