vmov* support disp8N
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 9dd56bb..e28c818 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1204,15 +1204,15 @@
 			{ 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false },
 			{ 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM, false },
 			{ 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM, false },
-			{ 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false },
-			{ 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false },
+			{ 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, false },
+			{ 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, false },
 			{ 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z, false },
-			{ 0x6F, "movdqa", T_0F | T_66 | T_YMM, false },
-			{ 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false },
+			{ 0x6F, "movdqa", T_0F | T_66 | T_YMM | T_N16 | T_N_VL | T_N16 | T_N_VL, false },
+			{ 0x6F, "movdqu", T_0F | T_F3 | T_YMM | T_N16 | T_N_VL | T_N16 | T_N_VL, false },
 			{ 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false },
 			{ 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false },
-			{ 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false },
-			{ 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false },
+			{ 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, false },
+			{ 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, false },
 
 			{ 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false },
 			{ 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false },
@@ -1261,12 +1261,12 @@
 			const char *name;
 			int type;
 		} tbl[] = {
-			{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1},
-			{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0},
-			{ 0x7F, "movdqa", T_0F | T_66 | T_YMM},
-			{ 0x7F, "movdqu", T_0F | T_F3 | T_YMM},
-			{ 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1},
-			{ 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0},
+			{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL },
+			{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL },
+			{ 0x7F, "movdqa", T_0F | T_66 | T_YMM | T_N16 | T_N_VL },
+			{ 0x7F, "movdqu", T_0F | T_F3 | T_YMM | T_N16 | T_N_VL },
+			{ 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL },
+			{ 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
diff --git a/test/make_512.cpp b/test/make_512.cpp
index 68e6b18..5b0a4e6 100644
--- a/test/make_512.cpp
+++ b/test/make_512.cpp
@@ -761,8 +761,8 @@
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const char *name = tbl[i];
-			put(name, MEM, ZMM);
-			put(name, ZMM, MEM);
+			put(name, MEM, _XMM3|ZMM);
+			put(name, _XMM3|ZMM, MEM);
 		}
 	}
 	void put_vmov()
@@ -1429,8 +1429,8 @@
 		separateFunc();
 #endif
 		putAVX512_M_X();
-#if 0
 		separateFunc();
+#if 0
 		put_vmov();
 		separateFunc();
 		put512_X_XM();
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 30672fe..e92c45a 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -978,15 +978,15 @@
 void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x5B); }
 void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x5B); }
 void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x5B); }
-void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
-void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
+void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x28); }
+void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x28); }
 void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); }
-void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); }
-void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); }
+void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0x6F); }
+void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_N16 | T_N_VL, 0x6F); }
 void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); }
 void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); }
-void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
-void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
+void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x10); }
+void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x10); }
 void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); }
 void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1D); }
 void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x1E); }
@@ -1013,12 +1013,12 @@
 void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); }
 void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X | T_N8, 0x2E); }
 void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_EVEX | T_SAE_X | T_N4, 0x2E); }
-void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x29); }
-void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x29); }
-void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); }
-void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); }
-void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x11); }
-void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x11); }
+void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x29); }
+void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x29); }
+void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0x7F); }
+void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM | T_N16 | T_N_VL, 0x7F); }
+void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x11); }
+void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x11); }
 void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
 void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
 void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); }