Merge branch 'feature/upgrade-to-vs2017/jrmwng' of https://github.com/jrmwng/xbyak into jrmwng-feature/upgrade-to-vs2017/jrmwng
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index dae0c5e..b414fd9 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -231,6 +231,18 @@
 	}
 }
 
+void putMemOp(const char *name, uint8 prefix, uint8 ext, uint8 code1, int code2, int bit = 32)
+{
+	printf("void %s(const Address& addr) { ", name);
+	if (prefix) printf("db(0x%02X); ", prefix);
+	printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2);
+}
+
+void putLoadSeg(const char *name, uint8 code1, int code2 = NONE)
+{
+	printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2);
+}
+
 void put()
 {
 	const int NO = CodeGenerator::NONE;
@@ -667,6 +679,10 @@
 			{ "stosw", 0x66, 0xAB },
 			{ "stosd", 0xAB },
 			{ "rep", 0xF3 },
+			{ "repe", 0xF3 },
+			{ "repz", 0xF3 },
+			{ "repne", 0xF2 },
+			{ "repnz", 0xF2 },
 
 			{ "lahf", 0x9F },
 			{ "lock", 0xF0 },
@@ -710,7 +726,8 @@
 			{ "fabs", 0xD9, 0xE1 },
 			{ "faddp", 0xDE, 0xC1 },
 			{ "fchs", 0xD9, 0xE0 },
-
+			{ "fclex", 0x9B, 0xDB, 0xE2 },
+			{ "fnclex", 0xDB, 0xE2 },
 			{ "fcom", 0xD8, 0xD1 },
 			{ "fcomp", 0xD8, 0xD9 },
 			{ "fcompp", 0xDE, 0xD9 },
@@ -754,6 +771,9 @@
 		putGeneric(tbl, NUM_OF_ARRAY(tbl));
 		puts("void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }");
 		puts("void int_(uint8 x) { db(0xCD); db(x); }");
+		putLoadSeg("lss", 0x0F, 0xB2);
+		putLoadSeg("lfs", 0x0F, 0xB4);
+		putLoadSeg("lgs", 0x0F, 0xB5);
 	}
 	{
 		const struct Tbl {
@@ -944,17 +964,27 @@
 			{ 0x0F, 0xAE, 3, "stmxcsr", 0 },
 			{ 0x0F, 0xAE, 7, "clflush", 0 },
 			{ 0x0F, 0xAE, 7, "clflushopt", 0x66 },
+			{ 0xDF, NONE, 4, "fbld", 0 },
+			{ 0xDF, NONE, 6, "fbstp", 0 },
 			{ 0xD9, NONE, 5, "fldcw", 0 },
 			{ 0xD9, NONE, 4, "fldenv", 0 },
+			{ 0xDD, NONE, 4, "frstor", 0 },
+			{ 0xDD, NONE, 6, "fsave", 0x9B },
+			{ 0xDD, NONE, 6, "fnsave", 0 },
 			{ 0xD9, NONE, 7, "fstcw", 0x9B },
 			{ 0xD9, NONE, 7, "fnstcw", 0 },
+			{ 0xD9, NONE, 6, "fstenv", 0x9B },
+			{ 0xD9, NONE, 6, "fnstenv", 0 },
+			{ 0xDD, NONE, 7, "fstsw", 0x9B },
+			{ 0xDD, NONE, 7, "fnstsw", 0 },
+			{ 0x0F, 0xAE, 1, "fxrstor", 0 },
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
-			printf("void %s(const Address& addr) { ", p->name);
-			if (p->prefix) printf("db(0x%02X); ", p->prefix);
-			printf("opModM(addr, Reg32(%d), 0x%02X, 0x%02X); }\n", p->ext, p->code1, p->code2);
+			putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2);
 		}
+		puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }");
+		puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }");
 	}
 	{
 		const struct Tbl {
@@ -1717,6 +1747,8 @@
 		{ "popa", 0x61 },
 	};
 	putGeneric(tbl, NUM_OF_ARRAY(tbl));
+	putLoadSeg("lds", 0xC5, NONE);
+	putLoadSeg("les", 0xC4, NONE);
 }
 
 void put64()
@@ -1739,7 +1771,8 @@
 	};
 	putGeneric(tbl, NUM_OF_ARRAY(tbl));
 
-	puts("void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }");
+	putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64);
+	putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64);
 	puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }");
 	puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }");
 	puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }");
diff --git a/readme.md b/readme.md
index d48ba25..f3c3781 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
 
-# Xbyak 5.802 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+# Xbyak 5.81 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
 
 ## Abstract
 
@@ -392,6 +392,7 @@
 http://opensource.org/licenses/BSD-3-Clause
 
 ## History
+* 2019/Sep/14 ver 5.81 support some generic mnemonics.
 * 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
 * 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
 * 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
diff --git a/readme.txt b/readme.txt
index 1fc67ff..54caee5 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.802

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.81

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -373,6 +373,7 @@
 -----------------------------------------------------------------------------

 ◎履歴

 

+2019/09/14 ver 5.81 いくつかの一般命令をサポート

 2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov)

 2019/05/27 support vp2intersectd, vp2intersectq (not tested)

 2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps

diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 70897fc..69b6bab 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -557,6 +557,8 @@
 			"fabs",
 			"faddp",
 			"fchs",
+			"fclex",
+			"fnclex",
 			"fcom",
 			"fcomp",
 			"fcompp",
@@ -600,16 +602,35 @@
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			put(tbl[i]);
 		}
+		{
+			const char memTbl[][16] = {
+				"clflush",
+				"clflushopt",
+				"fbld",
+				"fbstp",
+				"fldcw",
+				"fldenv",
+				"frstor",
+				"fsave",
+				"fnsave",
+				"fstcw",
+				"fnstcw",
+				"fstenv",
+				"fnstenv",
+				"fstsw",
+				"fnstsw",
+				"fxrstor",
+			};
+			for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) {
+				put(memTbl[i], MEM);
+			}
+			put("fstsw", AX);
+			put("fnstsw", AX);
+		}
 
 		put("bswap", REG32e);
 		put("lea", REG32e|REG16, MEM);
-		put("clflush", MEM);
-		put("clflushopt", MEM);
 		put("enter", IMM, IMM);
-		put("fldcw", MEM);
-		put("fldenv", MEM);
-		put("fstcw", MEM);
-		put("fnstcw", MEM);
 		put(isXbyak_ ? "int_" : "int", IMM8);
 		put(isXbyak_ ? "in_" : "in", AL|AX|EAX, IMM8);
 		puts(isXbyak_ ? "in_(al, dx); dump();" : "in al, dx");
@@ -1152,6 +1173,30 @@
 			put("mov", REG64, tbl[i].a, tbl[i].b);
 		}
 	}
+	void putLoadSeg() const
+	{
+		const struct Tbl {
+			const char *name;
+			bool support64Bit;
+		} tbl[] = {
+#ifdef XBYAK32
+			{ "lds", false },
+			{ "les", false },
+#endif
+			{ "lss", true },
+			{ "lfs", true },
+			{ "lgs", true },
+		};
+		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+			const Tbl *p = &tbl[i];
+			put(p->name, REG16|REG32, MEM);
+#ifdef XBYAK64
+			if (p->support64Bit) {
+				put(p->name, REG64, MEM);
+			}
+#endif
+		}
+	}
 	// only nasm
 	void putMovImm64() const
 	{
@@ -1207,6 +1252,7 @@
 		put("cmpxchg8b", MEM);
 #ifdef XBYAK64
 		put("cmpxchg16b", MEM);
+		put("fxrstor64", MEM);
 #endif
 		{
 			const char tbl[][8] = {
@@ -2454,6 +2500,7 @@
 		putPushPop();
 		putTest();
 		separateFunc();
+		putLoadSeg();
 		putEtc();
 		putShift();
 		putShxd();
diff --git a/test/test_nm.bat b/test/test_nm.bat
index 0d63b65..60644a9 100644
--- a/test/test_nm.bat
+++ b/test/test_nm.bat
@@ -27,7 +27,7 @@
 echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
 cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
 make_nm > a.asm
-rm a.lst
+rm -rf a.lst
 echo %EXE% -f %OPT3% -l a.lst a.asm
 %EXE% -f %OPT3% -l a.lst a.asm
 rem connect "?????-" and "??"
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index c8c0507..fa5b405 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -113,7 +113,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5802 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5810 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -186,8 +186,8 @@
 	ERR_INVALID_ZERO,
 	ERR_INVALID_RIP_IN_AUTO_GROW,
 	ERR_INVALID_MIB_ADDRESS,
-	ERR_INTERNAL,
-	ERR_X2APIC_IS_NOT_SUPPORTED
+	ERR_X2APIC_IS_NOT_SUPPORTED,
+	ERR_INTERNAL, // last err
 };
 
 class Error : public std::exception {
@@ -1714,6 +1714,14 @@
 		db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
 		opAddr(addr, reg.getIdx(), immSize);
 	}
+	void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
+	{
+		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
+		if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		rex(addr, reg);
+		db(code0); if (code1 != NONE) db(code1);
+		opAddr(addr, reg.getIdx());
+	}
 	void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
 	{
 		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index b2fb1d8..88b190b 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.802"; }
+const char *getVersionString() const { return "5.81"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -181,7 +181,10 @@
 void faddp() { db(0xDE); db(0xC1); }
 void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
 void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
+void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); }
+void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); }
 void fchs() { db(0xD9); db(0xE0); }
+void fclex() { db(0x9B); db(0xDB); db(0xE2); }
 void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
 void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
 void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); }
@@ -255,14 +258,21 @@
 void fmulp() { db(0xDE); db(0xC9); }
 void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); }
 void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
+void fnclex() { db(0xDB); db(0xE2); }
 void fninit() { db(0xDB); db(0xE3); }
 void fnop() { db(0xD9); db(0xD0); }
+void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); }
 void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); }
+void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); }
+void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }
 void fpatan() { db(0xD9); db(0xF3); }
 void fprem() { db(0xD9); db(0xF8); }
 void fprem1() { db(0xD9); db(0xF5); }
 void fptan() { db(0xD9); db(0xF2); }
 void frndint() { db(0xD9); db(0xFC); }
+void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); }
+void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); }
 void fscale() { db(0xD9); db(0xFD); }
 void fsin() { db(0xD9); db(0xFE); }
 void fsincos() { db(0xD9); db(0xFB); }
@@ -270,8 +280,11 @@
 void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
 void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
 void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); }
+void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); }
 void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
 void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
+void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }
 void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
 void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
 void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
@@ -298,6 +311,7 @@
 void fxam() { db(0xD9); db(0xE5); }
 void fxch() { db(0xD9); db(0xC9); }
 void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
+void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); }
 void fxtract() { db(0xD9); db(0xF4); }
 void fyl2x() { db(0xD9); db(0xF1); }
 void fyl2xp1() { db(0xD9); db(0xF9); }
@@ -442,6 +456,8 @@
 void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
 void leave() { db(0xC9); }
 void lfence() { db(0x0F); db(0xAE); db(0xE8); }
+void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
+void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
 void lock() { db(0xF0); }
 void lodsb() { db(0xAC); }
 void lodsd() { db(0xAD); }
@@ -455,6 +471,7 @@
 void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
 void loopne(const char *label) { loopne(std::string(label)); }
 void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
+void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
 void lzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
 void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66);  opModR(reg1, reg2, 0x0F, 0xF7); }
 void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
@@ -692,6 +709,10 @@
 void rdtsc() { db(0x0F); db(0x31); }
 void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
 void rep() { db(0xF3); }
+void repe() { db(0xF3); }
+void repne() { db(0xF2); }
+void repnz() { db(0xF2); }
+void repz() { db(0xF3); }
 void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
 void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
 void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
@@ -1591,6 +1612,7 @@
 void syscall() { db(0x0F); db(0x05); }
 void sysret() { db(0x0F); db(0x07); }
 void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
+void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
 void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
 void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
 void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
@@ -1620,6 +1642,8 @@
 void pushad() { db(0x60); }
 void pushfd() { db(0x9C); }
 void popa() { db(0x61); }
+void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
+void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
 #endif
 #ifndef XBYAK_NO_OP_NAMES
 void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }