change format and add getter for data_cache_size
diff --git a/readme.md b/readme.md
index d3f0b8f..9f57151 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
 
-Xbyak 5.61 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+Xbyak 5.62 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
 =============
 
 Abstract
@@ -333,6 +333,7 @@
 
 History
 -------------
+* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
 * 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
 * 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
 * 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
diff --git a/readme.txt b/readme.txt
index c68bf4e..99b5f33 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.610

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.62

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -343,6 +343,7 @@
 -----------------------------------------------------------------------------

 ◎履歴

 

+2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso

 2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)

 2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加

 2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)

diff --git a/sample/test_util.cpp b/sample/test_util.cpp
index bb515db..9b19935 100644
--- a/sample/test_util.cpp
+++ b/sample/test_util.cpp
@@ -104,6 +104,9 @@
 		Core i7-3930K        6           2D
 	*/
 	cpu.putFamily();
+	for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
+		printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
+	}
 }
 
 int main()
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 2af06fc..96809d8 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -105,7 +105,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5610 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5620 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 29233c3..4661908 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.61"; }
+const char *getVersionString() const { return "5.62"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index d0f5c29..36fe3c0 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -84,52 +84,54 @@
 			displayModel = model;
 		}
 	}
-	unsigned int value_from_bits(unsigned int val, unsigned int base, unsigned int end)
+	unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
 	{
-		unsigned int shift = sizeof(val) * 8 - end - 1;
-		return (val << shift) >> (shift + base);
+		return (val >> base) & ((1u << (end - base)) - 1);
 	}
 	void setCacheHierarchy()
 	{
-		unsigned int cache_type = 42;
+		if ((type_ & tINTEL) == 0) return;
+		const unsigned int NO_CACHE = 0;
+		const unsigned int DATA_CACHE = 1;
+//		const unsigned int INSTRUCTION_CACHE = 2;
+		const unsigned int UNIFIED_CACHE = 3;
 		unsigned int smt_width = 0;
-		unsigned int n_cores;
+		unsigned int n_cores = 0;
 		unsigned int data[4];
 
-		if ((type_ & tINTEL) == 0) {
-			fprintf(stderr, "ERR cache hierarchy querying is not supported\n");
-			throw Error(ERR_INTERNAL);
-		}
-
-		// if leaf 11 exists, we use it to get the number of smt cores and cores on socket
-		// If x2APIC is supported, these are the only correct numbers.
+		/*
+			if leaf 11 exists, we use it to get the number of smt cores and cores on socket
+			If x2APIC is supported, these are the only correct numbers.
+		*/
 		getCpuidEx(0x0, 0, data);
-		if(data[0] >= 11){
+		if (data[0] >= 11) {
 			getCpuidEx(0xB, 0, data); // CPUID for SMT Level
-			smt_width = (data[1] & 0x7FFF);
+			smt_width = data[1] & 0x7FFF;
 			getCpuidEx(0xB, 1, data); // CPUID for CORE Level
-			n_cores = (data[1] & 0x7FFF);
+			n_cores = data[1] & 0x7FFF;
 		}
 
-		/* Assumptions:
-		 * - the first level of data cache is not shared (which is the
-		 *   case for every existing architecture) and use this to
-		 *   determine the SMT width for arch not supporting leaf 11
-		 * - when leaf 4 reports a number of core less than n_cores
-		 *   on socket reported by leaf 11, then it is a correct number
-		 *   of cores not an upperbound */
-		for (int i = 0; ((cache_type != NO_CACHE) && (data_cache_levels < max_number_cache_levels)); i++) {
+		/*
+			Assumptions:
+			the first level of data cache is not shared (which is the
+			case for every existing architecture) and use this to
+			determine the SMT width for arch not supporting leaf 11.
+			when leaf 4 reports a number of core less than n_cores
+			on socket reported by leaf 11, then it is a correct number
+			of cores not an upperbound.
+		*/
+		for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
 			getCpuidEx(0x4, i, data);
-			cache_type = value_from_bits(data[0], 0, 4);
-			if ((cache_type == DATA_CACHE) || (cache_type == UNIFIED_CACHE)) {
-				int nb_logical_cores = (std::min)(value_from_bits(data[0], 14, 25) + 1,
-								n_cores);
+			unsigned int cacheType = extractBit(data[0], 0, 4);
+			if (cacheType == NO_CACHE) break;
+			if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
+				unsigned int nb_logical_cores = (std::min)(extractBit(data[0], 14, 25) + 1, n_cores);
 				data_cache_size[data_cache_levels] =
-					(value_from_bits(data[1], 22, 31) + 1)
-					* (value_from_bits(data[1], 12, 21) + 1)
-					* (value_from_bits(data[1], 0, 11) + 1)
+					(extractBit(data[1], 22, 31) + 1)
+					* (extractBit(data[1], 12, 21) + 1)
+					* (extractBit(data[1], 0, 11) + 1)
 					* (data[2] + 1);
-				if ((cache_type == DATA_CACHE) && (smt_width == 0)) smt_width = nb_logical_cores;
+				if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
 				assert(smt_width != 0);
 				cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
 				data_cache_levels++;
@@ -146,11 +148,24 @@
 	int displayFamily; // family + extFamily
 	int displayModel; // model + extModel
 
-	static const unsigned int max_number_cache_levels = 10;
-	unsigned int data_cache_size[max_number_cache_levels];
-	unsigned int cores_sharing_data_cache[max_number_cache_levels];
+	// may I move these members into private?
+	static const unsigned int maxNumberCacheLevels = 10;
+	unsigned int data_cache_size[maxNumberCacheLevels];
+	unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
 	unsigned int data_cache_levels;
 
+	unsigned int getDataCacheLevels() const { return data_cache_levels; }
+	unsigned int getCoresSharingDataCache(unsigned int i) const
+	{
+		if (i >= data_cache_levels) throw  Error(ERR_BAD_PARAMETER);
+		return cores_sharing_data_cache[i];
+	}
+	unsigned int getDataCacheSize(unsigned int i) const
+	{
+		if (i >= data_cache_levels) throw  Error(ERR_BAD_PARAMETER);
+		return data_cache_size[i];
+	}
+
 	/*
 		data[] = { eax, ebx, ecx, edx }
 	*/
@@ -183,10 +198,6 @@
 #endif
 	}
 	typedef uint64 Type;
-	static const Type NO_CACHE = 0;
-	static const Type DATA_CACHE = 1;
-	static const Type INSTRUCTION_CACHE = 2;
-	static const Type UNIFIED_CACHE = 3;
 
 	static const Type NONE = 0;
 	static const Type tMMX = 1 << 0;
@@ -346,8 +357,7 @@
 			if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
 		}
 		setFamily();
-		if ((type_ & tINTEL) == tINTEL)
-			setCacheHierarchy();
+		setCacheHierarchy();
 	}
 	void putFamily() const
 	{