| commit | f72646a7c817885d7fd29abe6f1f5b50dd621ef6 | [log] [tgz] | 
|---|---|---|
| author | MITSUNARI Shigeo <herumi@nifty.com> | Thu Jan 17 13:49:23 2019 +0900 | 
| committer | MITSUNARI Shigeo <herumi@nifty.com> | Thu Jan 17 13:49:23 2019 +0900 | 
| tree | 96d7bf332dea2565439a32eab1e5d5fb8d7802a6 | |
| parent | 4612528faace6fe58ad8de71729cd37eb4d162b5 [diff] | 
update version
This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
Note: Xbyak uses and(), or(), xor(), not() functions, so -fno-operator-names option is necessary for gcc/clang.
Or define XBYAK_NO_OP_NAMES before including xbyak.h and use and_(), or_(), xor_(), not_() instead of them.
and_(), or_(), xor_(), not_() are always available.
XBYAK_NO_OP_NAMES will be defined in the feature version.
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
The following files are necessary. Please add the path to your compile directory.
Linux:
make install
These files are copied into /usr/local/include/xbyak.
Inherit Xbyak::CodeGenerator class and make the class method.
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
struct Code : Xbyak::CodeGenerator {
    Code(int x)
    {
        mov(eax, x);
        ret();
    }
};
Make an instance of the class and get the function pointer by calling getCode() and call it.
Code c(5);
int (*f)() = c.getCode<int (*)()>();
printf("ret=%d\n", f()); // ret = 5
Similar to MASM/NASM syntax with parentheses.
NASM Xbyak mov eax, ebx --> mov(eax, ebx); inc ecx inc(ecx); ret --> ret();
Use qword, dword, word and byte if it is necessary to specify the size of memory, otherwise use ptr.
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
                            [rip + 32bit disp] ; x64 only
NASM                   Xbyak
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
mov al, [ebx+ecx]  --> mov(al, ptr [ebx + ecx]);
test byte [esp], 4 --> test(byte [esp], 4);
inc qword [rax]    --> inc(qword [rax]);
Note: qword, ... are member variables, then don't use dword as unsigned int type.
mov eax, [fs:eax] --> putSeg(fs);
                      mov(eax, ptr [eax]);
mov ax, cs        --> mov(ax, cs);
Note: Segment class is not derived from Operand.
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
Note: If XBYAK_ENABLE_OMITTED_OPERAND is defined, then you can use two operand version for backward compatibility. But the newer version will not support it.
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
vaddpd zmm2, zmm5, zmm30                --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax]              --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax]              --> vaddps(xmm30, xmm20, ptr [rax]);
vaddpd zmm2{k5}, zmm4, zmm2             --> vaddpd(zmm2 | k5, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2          --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
                                            vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5     --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
vaddpd xmm1, xmm2, [rax+256]            --> vaddpd(xmm1, xmm2, ptr [rax+256]);
vaddpd xmm1, xmm2, [rax+256]{1to2}      --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4}      --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8}      --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
vmovsd [rax]{k1}, xmm4                  --> vmovsd(ptr [rax] | k1, xmm4);
vcvtpd2dq xmm16, oword [eax+33]         --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
                                            vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
vcvtpd2dq xmm21, [eax+32]{1to2}         --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
vcvtpd2dq xmm0, yword [eax+33]          --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
vcvtpd2dq xmm19, [eax+32]{1to4}         --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
vfpclassps k5{k3}, zword [rax+64], 5    --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5    --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5    --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
k1, ..., k7 are opmask registers.| T_z, | T_sae, | T_rn_sae, | T_rd_sae, | T_ru_sae, | T_rz_sae instead of ,{z}, ,{sae}, ,{rn-sae}, ,{rd-sae}, ,{ru-sae}, ,{rz-sae} respectively.k4 | k3 is different from k3 | k4.ptr_b for broadcast {1toX}. X is automatically determined.xword/yword/zword(_b) for m128/m256/m512 if necessary.Two kinds of Label are supported. (String literal and Label class).
L("L1");
  jmp("L1");
  jmp("L2");
  ...
  a few mnemonics (8-bit displacement jmp)
  ...
L("L2");
  jmp("L3", T_NEAR);
  ...
  a lot of mnemonics (32-bit displacement jmp)
  ...
L("L3");
hasUndefinedLabel() to verify your code has no undefined label.mov(eax, "L2").@@, @f, @b like MASML("@@"); // <A>
  jmp("@b"); // jmp to <A>
  jmp("@f"); // jmp to <B>
L("@@"); // <B>
  jmp("@b"); // jmp to <B>
  mov(eax, "@b");
  jmp(eax); // jmp to <B>
Label symbols beginning with a period between inLocalLabel() and outLocalLabel() are treated as a local label. inLocalLabel() and outLocalLabel() can be nested.
void func1()
{
    inLocalLabel();
  L(".lp"); // <A> ; local label
    ...
    jmp(".lp"); // jmp to <A>
  L("aaa"); // global label <C>
    outLocalLabel();
    inLocalLabel();
  L(".lp"); // <B> ; local label
    func1();
    jmp(".lp"); // jmp to <B>
    inLocalLabel();
    jmp("aaa"); // jmp to <C>
}
L() and jxx() support Label class.
Xbyak::Label label1, label2; L(label1); ... jmp(label1); ... jmp(label2); ... L(label2);
Use putL for jmp table
    Label labelTbl, L0, L1, L2;
    mov(rax, labelTbl);
    // rdx is an index of jump table
    jmp(ptr [rax + rdx * sizeof(void*)]);
L(labelTbl);
    putL(L0);
    putL(L1);
    putL(L2);
L(L0);
    ....
L(L1);
    ....
assignL(dstLabel, srcLabel) binds dstLabel with srcLabel.
Label label2; Label label1 = L(); // make label1 ; same to Label label1; L(label1); ... jmp(label2); // label2 is not determined here ... assignL(label2, label1); // label2 <- label1
The jmp in the above code jumps to label1 assigned by assignL.
Note:
L().L().Label::getAddress() returns the address specified by the label instance and 0 if not specified.
// not AutoGrow mode Label label; assert(label.getAddress() == 0); L(label); assert(label.getAddress() == getCurr());
Label label; mov(eax, ptr [rip + label]); // eax = 4 ... L(label); dd(4);
int x; ... mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
The default max code size is 4096 bytes. Specify the size in constructor of CodeGenerator() if necessary.
class Quantize : public Xbyak::CodeGenerator {
public:
  Quantize()
    : CodeGenerator(8192)
  {
  }
  ...
};
You can make jit code on prepaired memory.
Call setProtectModeRE yourself to change memory mode if using the prepaired memory.
uint8_t alignas(4096) buf[8192]; // C++11 or later
struct Code : Xbyak::CodeGenerator {
    Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
    {
        mov(rax, 123);
        ret();
    }
};
int main()
{
    Code c;
    c.setProtectModeRE(); // set memory to Read/Exec
    printf("%d\n", c.getCode<int(*)()>()());
}
Note: See sample/test0.cpp.
The memory region for jit is automatically extended if necessary when AutoGrow is specified in a constructor of CodeGenerator.
Call ready() or readyRE() before calling getCode() to fix jump address.
struct Code : Xbyak::CodeGenerator {
  Code()
    : Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
  {
     ...
  }
};
Code c;
// generate code for jit
c.ready(); // mode = Read/Write/Exec
Note:
getCurr() before calling ready() because it may be invalid address.Xbyak set Read/Write/Exec mode to memory to run jit code. If you want to use Read/Exec mode for security, then specify DontSetProtectRWE for CodeGenerator and call setProtectModeRE() after generating jit code.
struct Code : Xbyak::CodeGenerator {
    Code()
        : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
    {
        mov(eax, 123);
        ret();
    }
};
Code c;
c.setProtectModeRE();
...
Call readyRE() instead of ready() when using AutoGrow mode. See protect-re.cpp.
-fno-operator-namesvaddps(xmm2, xmm3);(deprecated in the future)modified new BSD License http://opensource.org/licenses/BSD-3-Clause
MITSUNARI Shigeo(herumi@nifty.com)