Merge branch 'feature/upgrade-to-vs2017/jrmwng' of https://github.com/jrmwng/xbyak into jrmwng-feature/upgrade-to-vs2017/jrmwng
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index dae0c5e..b414fd9 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -231,6 +231,18 @@
}
}
+void putMemOp(const char *name, uint8 prefix, uint8 ext, uint8 code1, int code2, int bit = 32)
+{
+ printf("void %s(const Address& addr) { ", name);
+ if (prefix) printf("db(0x%02X); ", prefix);
+ printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2);
+}
+
+void putLoadSeg(const char *name, uint8 code1, int code2 = NONE)
+{
+ printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2);
+}
+
void put()
{
const int NO = CodeGenerator::NONE;
@@ -667,6 +679,10 @@
{ "stosw", 0x66, 0xAB },
{ "stosd", 0xAB },
{ "rep", 0xF3 },
+ { "repe", 0xF3 },
+ { "repz", 0xF3 },
+ { "repne", 0xF2 },
+ { "repnz", 0xF2 },
{ "lahf", 0x9F },
{ "lock", 0xF0 },
@@ -710,7 +726,8 @@
{ "fabs", 0xD9, 0xE1 },
{ "faddp", 0xDE, 0xC1 },
{ "fchs", 0xD9, 0xE0 },
-
+ { "fclex", 0x9B, 0xDB, 0xE2 },
+ { "fnclex", 0xDB, 0xE2 },
{ "fcom", 0xD8, 0xD1 },
{ "fcomp", 0xD8, 0xD9 },
{ "fcompp", 0xDE, 0xD9 },
@@ -754,6 +771,9 @@
putGeneric(tbl, NUM_OF_ARRAY(tbl));
puts("void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }");
puts("void int_(uint8 x) { db(0xCD); db(x); }");
+ putLoadSeg("lss", 0x0F, 0xB2);
+ putLoadSeg("lfs", 0x0F, 0xB4);
+ putLoadSeg("lgs", 0x0F, 0xB5);
}
{
const struct Tbl {
@@ -944,17 +964,27 @@
{ 0x0F, 0xAE, 3, "stmxcsr", 0 },
{ 0x0F, 0xAE, 7, "clflush", 0 },
{ 0x0F, 0xAE, 7, "clflushopt", 0x66 },
+ { 0xDF, NONE, 4, "fbld", 0 },
+ { 0xDF, NONE, 6, "fbstp", 0 },
{ 0xD9, NONE, 5, "fldcw", 0 },
{ 0xD9, NONE, 4, "fldenv", 0 },
+ { 0xDD, NONE, 4, "frstor", 0 },
+ { 0xDD, NONE, 6, "fsave", 0x9B },
+ { 0xDD, NONE, 6, "fnsave", 0 },
{ 0xD9, NONE, 7, "fstcw", 0x9B },
{ 0xD9, NONE, 7, "fnstcw", 0 },
+ { 0xD9, NONE, 6, "fstenv", 0x9B },
+ { 0xD9, NONE, 6, "fnstenv", 0 },
+ { 0xDD, NONE, 7, "fstsw", 0x9B },
+ { 0xDD, NONE, 7, "fnstsw", 0 },
+ { 0x0F, 0xAE, 1, "fxrstor", 0 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Address& addr) { ", p->name);
- if (p->prefix) printf("db(0x%02X); ", p->prefix);
- printf("opModM(addr, Reg32(%d), 0x%02X, 0x%02X); }\n", p->ext, p->code1, p->code2);
+ putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2);
}
+ puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }");
+ puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }");
}
{
const struct Tbl {
@@ -1717,6 +1747,8 @@
{ "popa", 0x61 },
};
putGeneric(tbl, NUM_OF_ARRAY(tbl));
+ putLoadSeg("lds", 0xC5, NONE);
+ putLoadSeg("les", 0xC4, NONE);
}
void put64()
@@ -1739,7 +1771,8 @@
};
putGeneric(tbl, NUM_OF_ARRAY(tbl));
- puts("void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }");
+ putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64);
+ putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64);
puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }");
puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }");
puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }");
diff --git a/readme.md b/readme.md
index d48ba25..f3c3781 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
-# Xbyak 5.802 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+# Xbyak 5.81 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
## Abstract
@@ -392,6 +392,7 @@
http://opensource.org/licenses/BSD-3-Clause
## History
+* 2019/Sep/14 ver 5.81 support some generic mnemonics.
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
diff --git a/readme.txt b/readme.txt
index 1fc67ff..54caee5 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.802
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.81
-----------------------------------------------------------------------------
◎概要
@@ -373,6 +373,7 @@
-----------------------------------------------------------------------------
◎履歴
+2019/09/14 ver 5.81 いくつかの一般命令をサポート
2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov)
2019/05/27 support vp2intersectd, vp2intersectq (not tested)
2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 70897fc..69b6bab 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -557,6 +557,8 @@
"fabs",
"faddp",
"fchs",
+ "fclex",
+ "fnclex",
"fcom",
"fcomp",
"fcompp",
@@ -600,16 +602,35 @@
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
put(tbl[i]);
}
+ {
+ const char memTbl[][16] = {
+ "clflush",
+ "clflushopt",
+ "fbld",
+ "fbstp",
+ "fldcw",
+ "fldenv",
+ "frstor",
+ "fsave",
+ "fnsave",
+ "fstcw",
+ "fnstcw",
+ "fstenv",
+ "fnstenv",
+ "fstsw",
+ "fnstsw",
+ "fxrstor",
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) {
+ put(memTbl[i], MEM);
+ }
+ put("fstsw", AX);
+ put("fnstsw", AX);
+ }
put("bswap", REG32e);
put("lea", REG32e|REG16, MEM);
- put("clflush", MEM);
- put("clflushopt", MEM);
put("enter", IMM, IMM);
- put("fldcw", MEM);
- put("fldenv", MEM);
- put("fstcw", MEM);
- put("fnstcw", MEM);
put(isXbyak_ ? "int_" : "int", IMM8);
put(isXbyak_ ? "in_" : "in", AL|AX|EAX, IMM8);
puts(isXbyak_ ? "in_(al, dx); dump();" : "in al, dx");
@@ -1152,6 +1173,30 @@
put("mov", REG64, tbl[i].a, tbl[i].b);
}
}
+ void putLoadSeg() const
+ {
+ const struct Tbl {
+ const char *name;
+ bool support64Bit;
+ } tbl[] = {
+#ifdef XBYAK32
+ { "lds", false },
+ { "les", false },
+#endif
+ { "lss", true },
+ { "lfs", true },
+ { "lgs", true },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ put(p->name, REG16|REG32, MEM);
+#ifdef XBYAK64
+ if (p->support64Bit) {
+ put(p->name, REG64, MEM);
+ }
+#endif
+ }
+ }
// only nasm
void putMovImm64() const
{
@@ -1207,6 +1252,7 @@
put("cmpxchg8b", MEM);
#ifdef XBYAK64
put("cmpxchg16b", MEM);
+ put("fxrstor64", MEM);
#endif
{
const char tbl[][8] = {
@@ -2454,6 +2500,7 @@
putPushPop();
putTest();
separateFunc();
+ putLoadSeg();
putEtc();
putShift();
putShxd();
diff --git a/test/test_nm.bat b/test/test_nm.bat
index 0d63b65..60644a9 100644
--- a/test/test_nm.bat
+++ b/test/test_nm.bat
@@ -27,7 +27,7 @@
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
make_nm > a.asm
-rm a.lst
+rm -rf a.lst
echo %EXE% -f %OPT3% -l a.lst a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index c8c0507..fa5b405 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -113,7 +113,7 @@
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x5802 /* 0xABCD = A.BC(D) */
+ VERSION = 0x5810 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -186,8 +186,8 @@
ERR_INVALID_ZERO,
ERR_INVALID_RIP_IN_AUTO_GROW,
ERR_INVALID_MIB_ADDRESS,
- ERR_INTERNAL,
- ERR_X2APIC_IS_NOT_SUPPORTED
+ ERR_X2APIC_IS_NOT_SUPPORTED,
+ ERR_INTERNAL, // last err
};
class Error : public std::exception {
@@ -1714,6 +1714,14 @@
db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
opAddr(addr, reg.getIdx(), immSize);
}
+ void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
+ {
+ if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
+ if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+ rex(addr, reg);
+ db(code0); if (code1 != NONE) db(code1);
+ opAddr(addr, reg.getIdx());
+ }
void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
{
if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index b2fb1d8..88b190b 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.802"; }
+const char *getVersionString() const { return "5.81"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -181,7 +181,10 @@
void faddp() { db(0xDE); db(0xC1); }
void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
+void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); }
+void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); }
void fchs() { db(0xD9); db(0xE0); }
+void fclex() { db(0x9B); db(0xDB); db(0xE2); }
void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); }
@@ -255,14 +258,21 @@
void fmulp() { db(0xDE); db(0xC9); }
void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); }
void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
+void fnclex() { db(0xDB); db(0xE2); }
void fninit() { db(0xDB); db(0xE3); }
void fnop() { db(0xD9); db(0xD0); }
+void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); }
void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); }
+void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); }
+void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }
void fpatan() { db(0xD9); db(0xF3); }
void fprem() { db(0xD9); db(0xF8); }
void fprem1() { db(0xD9); db(0xF5); }
void fptan() { db(0xD9); db(0xF2); }
void frndint() { db(0xD9); db(0xFC); }
+void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); }
+void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); }
void fscale() { db(0xD9); db(0xFD); }
void fsin() { db(0xD9); db(0xFE); }
void fsincos() { db(0xD9); db(0xFB); }
@@ -270,8 +280,11 @@
void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); }
+void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); }
void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
+void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }
void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
@@ -298,6 +311,7 @@
void fxam() { db(0xD9); db(0xE5); }
void fxch() { db(0xD9); db(0xC9); }
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
+void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); }
void fxtract() { db(0xD9); db(0xF4); }
void fyl2x() { db(0xD9); db(0xF1); }
void fyl2xp1() { db(0xD9); db(0xF9); }
@@ -442,6 +456,8 @@
void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
void leave() { db(0xC9); }
void lfence() { db(0x0F); db(0xAE); db(0xE8); }
+void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
+void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
void lock() { db(0xF0); }
void lodsb() { db(0xAC); }
void lodsd() { db(0xAD); }
@@ -455,6 +471,7 @@
void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
void loopne(const char *label) { loopne(std::string(label)); }
void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
+void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); }
void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
@@ -692,6 +709,10 @@
void rdtsc() { db(0x0F); db(0x31); }
void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
void rep() { db(0xF3); }
+void repe() { db(0xF3); }
+void repne() { db(0xF2); }
+void repnz() { db(0xF2); }
+void repz() { db(0xF3); }
void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
@@ -1591,6 +1612,7 @@
void syscall() { db(0x0F); db(0x05); }
void sysret() { db(0x0F); db(0x07); }
void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
+void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
@@ -1620,6 +1642,8 @@
void pushad() { db(0x60); }
void pushfd() { db(0x9C); }
void popa() { db(0x61); }
+void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
+void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
#endif
#ifndef XBYAK_NO_OP_NAMES
void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }