test all patterns of {k2}{z}
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 999af36..bc661cb 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2,6 +2,7 @@
#include "xbyak/xbyak.h"
#include <stdlib.h>
#include <string.h>
+#include "cybozu/inttype.hpp"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
using namespace Xbyak;
@@ -2388,6 +2389,21 @@
put("kmovq", REG64, K);
#endif
}
+ void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false)
+ {
+ std::string modifier;
+ char pk[16] = "";
+ const char *pz = "";
+ if (isXbyak_) {
+ if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx);
+ if (z) pz = "|T_z";
+ printf("vaddpd(%s%s%s, %s, %s); dump();\n", r1, pk, pz, r2, r3);
+ } else {
+ if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
+ if (z) pz = "{z}";
+ printf("vaddpd %s%s%s, %s, %s\n", r1, pk, pz, r2, r3);
+ }
+ }
void putCombi()
{
const char *xTbl[] = {
@@ -2418,15 +2434,19 @@
for (size_t i = 0; i < N; i++) {
for (size_t j = 0; j < N; j++) {
for (size_t k = 0; k < N; k++) {
- if (isXbyak_) {
- printf("vaddpd(%s, %s, %s); dump();\n", xTbl[i], xTbl[j], xTbl[k]);
- printf("vaddpd(%s, %s, %s); dump();\n", yTbl[i], yTbl[j], yTbl[k]);
- printf("vaddpd(%s, %s, %s); dump();\n", zTbl[i], zTbl[j], zTbl[k]);
- } else {
- printf("vaddpd %s, %s, %s\n", xTbl[i], xTbl[j], xTbl[k]);
- printf("vaddpd %s, %s, %s\n", yTbl[i], yTbl[j], yTbl[k]);
- printf("vaddpd %s, %s, %s\n", zTbl[i], zTbl[j], zTbl[k]);
+#ifdef XBYAK64
+ for (int kIdx = 0; kIdx < 8; kIdx++) {
+ for (int z = 0; z < 2; z++) {
+ put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1);
+ put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1);
+ put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1);
+ }
}
+#else
+ put_vaddpd(xTbl[i], xTbl[j], xTbl[k]);
+ put_vaddpd(yTbl[i], yTbl[j], yTbl[k]);
+ put_vaddpd(zTbl[i], zTbl[j], zTbl[k]);
+#endif
}
}
}
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index de17066..b996d49 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -383,7 +383,7 @@
bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
bool isExtIdx() const { return (getIdx() & 8) != 0; }
bool isExtIdx2() const { return (getIdx() & 16) != 0; }
- bool hasEvex() const { return isZMM() || (is(XMM | YMM) && isExtIdx2()); }
+ bool hasEvex() const { return isZMM() || isExtIdx2() || hasZero() || getOpmaskIdx() || getRounding(); }
bool hasRex() const { return isExt8bit() | isREG(64) | isExtIdx(); }
bool hasZero() const { return zero_; }
int getOpmaskIdx() const { return mask_; }
@@ -1016,7 +1016,7 @@
int getId() const { return id; }
// backward compatibility
- static std::string toStr(int num)
+ static inline std::string toStr(int num)
{
char buf[16];
#ifdef _MSC_VER
@@ -1693,8 +1693,8 @@
int LL = x1.isZMM() ? 2 : x1.isYMM() ? 1 : 0;
bool b = false;
bool Vp = !x2.isExtIdx2();
- bool z = x1.isZMM() && x1.hasZero() ? true : false;
- int aaa = x1.isZMM() ? x1.getOpmaskIdx() : 0;
+ bool z = x1.hasZero();
+ int aaa = x1.getOpmaskIdx();
evex(R, X, B, Rp, mm, w == 1, vvvv, pp, z, LL, b, Vp, aaa);
db(code);
setModRM(3, x1.getIdx(), x3.getIdx());