test all patterns of {k2}{z}
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 999af36..bc661cb 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2,6 +2,7 @@
 #include "xbyak/xbyak.h"
 #include <stdlib.h>
 #include <string.h>
+#include "cybozu/inttype.hpp"
 #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
 
 using namespace Xbyak;
@@ -2388,6 +2389,21 @@
 		put("kmovq", REG64, K);
 #endif
 	}
+	void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false)
+	{
+		std::string modifier;
+		char pk[16] = "";
+		const char *pz = "";
+		if (isXbyak_) {
+			if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx);
+			if (z) pz = "|T_z";
+			printf("vaddpd(%s%s%s, %s, %s); dump();\n", r1, pk, pz, r2, r3);
+		} else {
+			if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
+			if (z) pz = "{z}";
+			printf("vaddpd %s%s%s, %s, %s\n", r1, pk, pz, r2, r3);
+		}
+	}
 	void putCombi()
 	{
 		const char *xTbl[] = {
@@ -2418,15 +2434,19 @@
 		for (size_t i = 0; i < N; i++) {
 			for (size_t j = 0; j < N; j++) {
 				for (size_t k = 0; k < N; k++) {
-					if (isXbyak_) {
-						printf("vaddpd(%s, %s, %s); dump();\n", xTbl[i], xTbl[j], xTbl[k]);
-						printf("vaddpd(%s, %s, %s); dump();\n", yTbl[i], yTbl[j], yTbl[k]);
-						printf("vaddpd(%s, %s, %s); dump();\n", zTbl[i], zTbl[j], zTbl[k]);
-					} else {
-						printf("vaddpd %s, %s, %s\n", xTbl[i], xTbl[j], xTbl[k]);
-						printf("vaddpd %s, %s, %s\n", yTbl[i], yTbl[j], yTbl[k]);
-						printf("vaddpd %s, %s, %s\n", zTbl[i], zTbl[j], zTbl[k]);
+#ifdef XBYAK64
+					for (int kIdx = 0; kIdx < 8; kIdx++) {
+						for (int z = 0; z < 2; z++) {
+							put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1);
+							put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1);
+							put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1);
+						}
 					}
+#else
+					put_vaddpd(xTbl[i], xTbl[j], xTbl[k]);
+					put_vaddpd(yTbl[i], yTbl[j], yTbl[k]);
+					put_vaddpd(zTbl[i], zTbl[j], zTbl[k]);
+#endif
 				}
 			}
 		}
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index de17066..b996d49 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -383,7 +383,7 @@
 	bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
 	bool isExtIdx() const { return (getIdx() & 8) != 0; }
 	bool isExtIdx2() const { return (getIdx() & 16) != 0; }
-	bool hasEvex() const { return isZMM() || (is(XMM | YMM) && isExtIdx2()); }
+	bool hasEvex() const { return isZMM() || isExtIdx2() || hasZero() || getOpmaskIdx() || getRounding(); }
 	bool hasRex() const { return isExt8bit() | isREG(64) | isExtIdx(); }
 	bool hasZero() const { return zero_; }
 	int getOpmaskIdx() const { return mask_; }
@@ -1016,7 +1016,7 @@
 	int getId() const { return id; }
 
 	// backward compatibility
-	static std::string toStr(int num)
+	static inline std::string toStr(int num)
 	{
 		char buf[16];
 #ifdef _MSC_VER
@@ -1693,8 +1693,8 @@
 		int LL = x1.isZMM() ? 2 : x1.isYMM() ? 1 : 0;
 		bool b = false;
 		bool Vp = !x2.isExtIdx2();
-		bool z = x1.isZMM() && x1.hasZero() ? true : false;
-		int aaa = x1.isZMM() ? x1.getOpmaskIdx() : 0;
+		bool z = x1.hasZero();
+		int aaa = x1.getOpmaskIdx();
 		evex(R, X, B, Rp, mm, w == 1, vvvv, pp, z, LL, b, Vp, aaa);
 		db(code);
 		setModRM(3, x1.getIdx(), x3.getIdx());