fix push(qword[mem])
diff --git a/readme.md b/readme.md
index 4c92835..04bbc94 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
 
-Xbyak 5.631 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+Xbyak 5.65 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
 =============
 
 Abstract
@@ -333,7 +333,8 @@
 
 History
 -------------
-* 2018/Mar/06 ver 5.631 fix zero division in Cpu() on some cpu
+* 2018/Jun/26 ver 5.65 fix push(qword [mem])
+* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
 * 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
 * 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
 * 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
diff --git a/readme.txt b/readme.txt
index 2418e30..b1c15a2 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.631

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.65

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -343,7 +343,8 @@
 -----------------------------------------------------------------------------

 ◎履歴

 

-2018/03/06 ver 5.631 Cpu()の中でzero divisionが出ることがあるのを修正

+2018/06/26 ver 5.65 fix push(qword [mem])

+2018/03/07 ver 5.64 Cpu()の中でzero divisionが出ることがあるのを修正

 2018/02/14 ver 5.63 Cpu::setCacheHierarchy()の修正とclang<3.9のためのEvexModifierZero修正(thanks to mgouicem)

 2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso

 2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)

diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index dbdba42..cd9db1b 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -1054,15 +1054,19 @@
 			push word 2
 			reduce 2-byte stack, so I can't support it
 		*/
-		const char *p = "push";
-		put(p, REG16);
-		put(p, IMM8); // IMM16 decrease -2 from esp
-		put(p, MEM16);
 
+		put("push", IMM8|IMM32);
+		if (isXbyak_) {
+			puts("push(word, 1000);dump();");
+		} else {
+			puts("push word 1000");
+		}
+
+		put("push", REG16|MEM16);
 		put("pop", REG16|MEM16);
 #ifdef XBYAK64
-		put("push", REG64);
-		put("pop", REG64);
+		put("push", REG64|IMM32|MEM64);
+		put("pop", REG64|MEM64);
 #else
 		put("push", REG32|IMM32|MEM32);
 		put("pop", REG32|MEM32);
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 6c83b26..d703522 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -105,7 +105,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5631 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5650 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -1812,15 +1812,20 @@
 	}
 	void opPushPop(const Operand& op, int code, int ext, int alt)
 	{
-		if (op.isREG()) {
-			if (op.isBit(16)) db(0x66);
-			if (op.getReg().getIdx() >= 8) db(0x41);
-			db(alt | (op.getIdx() & 7));
-		} else if (op.isMEM()) {
-			opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
-		} else {
-			throw Error(ERR_BAD_COMBINATION);
+		int bit = op.getBit();
+		if (bit == 16 || bit == BIT) {
+			if (bit == 16) db(0x66);
+			if (op.isREG()) {
+				if (op.getReg().getIdx() >= 8) db(0x41);
+				db(alt | (op.getIdx() & 7));
+				return;
+			}
+			if (op.isMEM()) {
+				opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
+				return;
+			}
 		}
+		throw Error(ERR_BAD_COMBINATION);
 	}
 	void verifyMemHasSize(const Operand& op) const
 	{
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index f4f77df..fea242a 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.631"; }
+const char *getVersionString() const { return "5.65"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index c4434dd..eb27cc9 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -128,8 +128,9 @@
 			if (cacheType == NO_CACHE) break;
 			if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
 				unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
-				if (n_cores != 0) // true only if leaf 0xB is supported and valid
+				if (n_cores != 0) { // true only if leaf 0xB is supported and valid
 					nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
+				}
 				assert(nb_logical_cores != 0);
 				data_cache_size[data_cache_levels] =
 					(extractBit(data[1], 22, 31) + 1)