vaes* supports AVX-512
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 413e632..08c471c 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1258,10 +1258,10 @@
 			{ 0x7D, "hsubpd", T_0F | T_66 | T_YMM, 3 },
 			{ 0x7D, "hsubps", T_0F | T_F2 | T_YMM, 3 },
 
-			{ 0xDC, "aesenc", T_0F38 | T_66 | T_W0, 3 },
-			{ 0xDD, "aesenclast", T_0F38 | T_66 | T_W0, 3 },
-			{ 0xDE, "aesdec", T_0F38 | T_66 | T_W0, 3 },
-			{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_W0, 3 },
+			{ 0xDC, "aesenc", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
+			{ 0xDD, "aesenclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
+			{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
+			{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
diff --git a/test/cybozu/test.hpp b/test/cybozu/test.hpp
index d192b1c..fa735d2 100644
--- a/test/cybozu/test.hpp
+++ b/test/cybozu/test.hpp
@@ -86,13 +86,15 @@
 		}
 		fflush(stdout);
 		if (msg.empty()) {
+			int err = ngCount_ + exceptionCount_;
+			int total = okCount_ + err;
 			std::cout << "ctest:name=" << getBaseName(*argv)
 					  << ", module=" << list_.size()
-					  << ", total=" << (okCount_ + ngCount_ + exceptionCount_)
+					  << ", total=" << total
 					  << ", ok=" << okCount_
 					  << ", ng=" << ngCount_
 					  << ", exception=" << exceptionCount_ << std::endl;
-			return 0;
+			return err > 0 ? 1 : 0;
 		} else {
 			std::cout << msg << std::endl;
 			return 1;
@@ -128,6 +130,15 @@
 	return lhs == rhs;
 }
 
+// avoid warning of comparision of integers of different signs
+inline bool isEqual(size_t lhs, int rhs)
+{
+	return lhs == size_t(rhs);
+}
+inline bool isEqual(int lhs, size_t rhs)
+{
+	return size_t(lhs) == rhs;
+}
 inline bool isEqual(const char *lhs, const char *rhs)
 {
 	return strcmp(lhs, rhs) == 0;
@@ -188,9 +199,9 @@
 	@param y [in]
 */
 #define CYBOZU_TEST_EQUAL(x, y) { \
-	bool eq = cybozu::test::isEqual(x, y); \
-	cybozu::test::test(eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
-	if (!eq) { \
+	bool _cybozu_eq = cybozu::test::isEqual(x, y); \
+	cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
+	if (!_cybozu_eq) { \
 		std::cout << "ctest:  lhs=" << (x) << std::endl; \
 		std::cout << "ctest:  rhs=" << (y) << std::endl; \
 	} \
@@ -201,22 +212,39 @@
 	@param y [in]
 */
 #define CYBOZU_TEST_NEAR(x, y, eps) { \
-	bool isNear = fabs((x) - (y)) < eps; \
-	cybozu::test::test(isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
-	if (!isNear) { \
+	bool _cybozu_isNear = fabs((x) - (y)) < eps; \
+	cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
+	if (!_cybozu_isNear) { \
 		std::cout << "ctest:  lhs=" << (x) << std::endl; \
 		std::cout << "ctest:  rhs=" << (y) << std::endl; \
 	} \
 }
 
 #define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
-	bool eq = x == y; \
-	cybozu::test::test(eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
-	if (!eq) { \
+	bool _cybozu_eq = x == y; \
+	cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
+	if (!_cybozu_eq) { \
 		std::cout << "ctest:  lhs=" << static_cast<const void*>(x) << std::endl; \
 		std::cout << "ctest:  rhs=" << static_cast<const void*>(y) << std::endl; \
 	} \
 }
+/**
+	alert if x[] != y[]
+	@param x [in]
+	@param y [in]
+	@param n [in]
+*/
+#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
+	for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
+		bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
+		cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
+		if (!_cybozu_eq) { \
+			std::cout << "ctest:  i=" << _cybozu_test_i << std::endl; \
+			std::cout << "ctest:  lhs=" << (x)[_cybozu_test_i] << std::endl; \
+			std::cout << "ctest:  rhs=" << (y)[_cybozu_test_i] << std::endl; \
+		} \
+	} \
+}
 
 /**
 	always alert
@@ -229,25 +257,25 @@
 */
 #define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
 { \
-	int ret = 0; \
-	std::string errMsg; \
+	int _cybozu_ret = 0; \
+	std::string _cybozu_errMsg; \
 	try { \
 		statement; \
-		ret = 1; \
-	} catch (const Exception& e) { \
-		errMsg = e.what(); \
-		if (errMsg.find(msg) == std::string::npos) { \
-			ret = 2; \
+		_cybozu_ret = 1; \
+	} catch (const Exception& _cybozu_e) { \
+		_cybozu_errMsg = _cybozu_e.what(); \
+		if (_cybozu_errMsg.find(msg) == std::string::npos) { \
+			_cybozu_ret = 2; \
 		} \
 	} catch (...) { \
-		ret = 3; \
+		_cybozu_ret = 3; \
 	} \
-	if (ret) { \
+	if (_cybozu_ret) { \
 		cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
-		if (ret == 1) { \
+		if (_cybozu_ret == 1) { \
 			std::cout << "ctest:  no exception" << std::endl; \
-		} else if (ret == 2) { \
-			std::cout << "ctest:  bad exception msg:" << errMsg << std::endl; \
+		} else if (_cybozu_ret == 2) { \
+			std::cout << "ctest:  bad exception msg:" << _cybozu_errMsg << std::endl; \
 		} else { \
 			std::cout << "ctest:  unexpected exception" << std::endl; \
 		} \
@@ -258,17 +286,17 @@
 
 #define CYBOZU_TEST_EXCEPTION(statement, Exception) \
 { \
-	int ret = 0; \
+	int _cybozu_ret = 0; \
 	try { \
 		statement; \
-		ret = 1; \
+		_cybozu_ret = 1; \
 	} catch (const Exception&) { \
 	} catch (...) { \
-		ret = 2; \
+		_cybozu_ret = 2; \
 	} \
-	if (ret) { \
+	if (_cybozu_ret) { \
 		cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
-		if (ret == 1) { \
+		if (_cybozu_ret == 1) { \
 			std::cout << "ctest:  no exception" << std::endl; \
 		} else { \
 			std::cout << "ctest:  unexpected exception" << std::endl; \
diff --git a/test/misc.cpp b/test/misc.cpp
index 56534f0..bd686fe 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -130,4 +130,47 @@
 	CYBOZU_TEST_EQUAL(c.getSize(), n);
 	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 }
+CYBOZU_TEST_AUTO(vaes)
+{
+	struct Code : Xbyak::CodeGenerator {
+		Code()
+		{
+			vaesdec(xmm20, xmm30, ptr [rcx + 64]);
+			vaesdec(ymm1, ymm2, ptr [rcx + 64]);
+			vaesdec(zmm1, zmm2, ptr [rcx + 64]);
+
+			vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
+			vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
+			vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
+
+			vaesenc(xmm20, xmm30, ptr [rcx + 64]);
+			vaesenc(ymm1, ymm2, ptr [rcx + 64]);
+			vaesenc(zmm1, zmm2, ptr [rcx + 64]);
+
+			vaesenclast(xmm20, xmm30, ptr [rax + 64]);
+			vaesenclast(ymm20, ymm30, ptr [rax + 64]);
+			vaesenclast(zmm20, zmm30, ptr [rax + 64]);
+		}
+	} c;
+	const uint8_t tbl[] = {
+		0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
+		0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
+		0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
+
+		0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
+		0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
+		0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
+
+		0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
+		0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
+		0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
+
+		0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
+		0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
+		0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
+	};
+	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+	CYBOZU_TEST_EQUAL(c.getSize(), n);
+	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
 #endif
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index ea306d2..35f451c 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -759,10 +759,10 @@
 void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x58); }
 void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
 void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
-void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDE); }
-void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDF); }
-void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDC); }
-void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDD); }
+void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDE); }
+void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDF); }
+void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDC); }
+void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDD); }
 void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_W0, 0xDB); }
 void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); }
 void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); }