Xbyak 5.84 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++


This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.


  • header file only
  • Intel/MASM like syntax
  • fully support AVX-512

Note: The default setting has changed: XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined. Use and_(), or_(), ... instead ofand(),or(). If you want to useand(),or(),... then specify-DXBYAK_USE_OP_NAMES -fno-operator-names` option to gcc/clang.

Supported OS

  • Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
  • Linux(32bit, 64bit)
  • Intel macOS

Supported Compilers

Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.


The following files are necessary. Please add the path to your compile directory.

  • xbyak.h
  • xbyak_mnemonic.h
  • xbyak_util.h


make install

These files are copied into /usr/local/include/xbyak.

How to use it

Inherit Xbyak::CodeGenerator class and make the class method.

#include <xbyak/xbyak.h>

struct Code : Xbyak::CodeGenerator {
    Code(int x)
        mov(eax, x);

Make an instance of the class and get the function pointer by calling getCode() and call it.

Code c(5);
int (*f)() = c.getCode<int (*)()>();
printf("ret=%d\n", f()); // ret = 5


Similar to MASM/NASM syntax with parentheses.

NASM              Xbyak
mov eax, ebx  --> mov(eax, ebx);
inc ecx           inc(ecx);
ret           --> ret();


Use qword, dword, word and byte if it is necessary to specify the size of memory, otherwise use ptr.

(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
                            [rip + 32bit disp] ; x64 only

NASM                   Xbyak
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
mov al, [ebx+ecx]  --> mov(al, ptr [ebx + ecx]);
test byte [esp], 4 --> test(byte [esp], 4);
inc qword [rax]    --> inc(qword [rax]);

Note: qword, ... are member variables, then don't use dword as unsigned int type.

How to use Selector (Segment Register)

mov eax, [fs:eax] --> putSeg(fs);
                      mov(eax, ptr [eax]);
mov ax, cs        --> mov(ax, cs);

Note: Segment class is not derived from Operand.


vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);

Note: If XBYAK_ENABLE_OMITTED_OPERAND is defined, then you can use two operand version for backward compatibility. But the newer version will not support it.

vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3


vaddpd zmm2, zmm5, zmm30                --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax]              --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax]              --> vaddps(xmm30, xmm20, ptr [rax]);
vaddpd zmm2{k5}, zmm4, zmm2             --> vaddpd(zmm2 | k5, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2          --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
                                            vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5     --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);

vaddpd xmm1, xmm2, [rax+256]            --> vaddpd(xmm1, xmm2, ptr [rax+256]);
vaddpd xmm1, xmm2, [rax+256]{1to2}      --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4}      --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8}      --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
vmovsd [rax]{k1}, xmm4                  --> vmovsd(ptr [rax] | k1, xmm4);

vcvtpd2dq xmm16, oword [eax+33]         --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
                                            vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
vcvtpd2dq xmm21, [eax+32]{1to2}         --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
vcvtpd2dq xmm0, yword [eax+33]          --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
vcvtpd2dq xmm19, [eax+32]{1to4}         --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast

vfpclassps k5{k3}, zword [rax+64], 5    --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5    --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5    --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit


  • k1, ..., k7 are opmask registers.
  • use | T_z, | T_sae, | T_rn_sae, | T_rd_sae, | T_ru_sae, | T_rz_sae instead of ,{z}, ,{sae}, ,{rn-sae}, ,{rd-sae}, ,{ru-sae}, ,{rz-sae} respectively.
  • k4 | k3 is different from k3 | k4.
  • use ptr_b for broadcast {1toX}. X is automatically determined.
  • specify xword/yword/zword(_b) for m128/m256/m512 if necessary.


Two kinds of Label are supported. (String literal and Label class).

String literal


  a few mnemonics (8-bit displacement jmp)

  jmp("L3", T_NEAR);
  a lot of mnemonics (32-bit displacement jmp)
  • Call hasUndefinedLabel() to verify your code has no undefined label.
  • you can use a label for immediate value of mov like as mov(eax, "L2").

Support @@, @f, @b like MASM

L("@@"); // <A>
  jmp("@b"); // jmp to <A>
  jmp("@f"); // jmp to <B>
L("@@"); // <B>
  jmp("@b"); // jmp to <B>
  mov(eax, "@b");
  jmp(eax); // jmp to <B>

Local label

Label symbols beginning with a period between inLocalLabel() and outLocalLabel() are treated as a local label. inLocalLabel() and outLocalLabel() can be nested.

void func1()
  L(".lp"); // <A> ; local label
    jmp(".lp"); // jmp to <A>
  L("aaa"); // global label <C>

  L(".lp"); // <B> ; local label
    jmp(".lp"); // jmp to <B>
    jmp("aaa"); // jmp to <C>

Label class

L() and jxx() support Label class.

  Xbyak::Label label1, label2;

Use putL for jmp table

    Label labelTbl, L0, L1, L2;
    mov(rax, labelTbl);
    // rdx is an index of jump table
    jmp(ptr [rax + rdx * sizeof(void*)]);

assignL(dstLabel, srcLabel) binds dstLabel with srcLabel.

  Label label2;
  Label label1 = L(); // make label1 ; same to Label label1; L(label1);
  jmp(label2); // label2 is not determined here
  assignL(label2, label1); // label2 <- label1

The jmp in the above code jumps to label1 assigned by assignL.


  • srcLabel must be used in L().
  • dstLabel must not be used in L().

Label::getAddress() returns the address specified by the label instance and 0 if not specified.

// not AutoGrow mode
Label  label;
assert(label.getAddress() == 0);
assert(label.getAddress() == getCurr());

Rip ; relative addressing

Label label;
mov(eax, ptr [rip + label]); // eax = 4

int x;
  mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB

Code size

The default max code size is 4096 bytes. Specify the size in constructor of CodeGenerator() if necessary.

class Quantize : public Xbyak::CodeGenerator {
    : CodeGenerator(8192)

User allocated memory

You can make jit code on prepaired memory.

Call setProtectModeRE yourself to change memory mode if using the prepaired memory.

uint8_t alignas(4096) buf[8192]; // C++11 or later

struct Code : Xbyak::CodeGenerator {
    Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
        mov(rax, 123);

int main()
    Code c;
    c.setProtectModeRE(); // set memory to Read/Exec
    printf("%d\n", c.getCode<int(*)()>()());

Note: See sample/test0.cpp.


The memory region for jit is automatically extended if necessary when AutoGrow is specified in a constructor of CodeGenerator.

Call ready() or readyRE() before calling getCode() to fix jump address.

struct Code : Xbyak::CodeGenerator {
    : Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
Code c;
// generate code for jit
c.ready(); // mode = Read/Write/Exec


  • Don't use the address returned by getCurr() before calling ready() because it may be invalid address.

Read/Exec mode

Xbyak set Read/Write/Exec mode to memory to run jit code. If you want to use Read/Exec mode for security, then specify DontSetProtectRWE for CodeGenerator and call setProtectModeRE() after generating jit code.

struct Code : Xbyak::CodeGenerator {
        : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
        mov(eax, 123);

Code c;

Call readyRE() instead of ready() when using AutoGrow mode. See protect-re.cpp.


  • XBYAK32 is defined on 32bit.
  • XBYAK64 is defined on 64bit.
  • XBYAK64_WIN is defined on 64bit Windows(VC)
  • XBYAK64_GCC is defined on 64bit gcc, cygwin
  • define XBYAK_USE_OP_NAMES on gcc with -fno-operator-names if you want to use and(), ....
  • define XBYAK_ENABLE_OMITTED_OPERAND if you use omitted destination such as vaddps(xmm2, xmm3);(deprecated in the future)
  • define XBYAK_UNDEF_JNL if Bessel function jnl is defined as macro


  • test0.cpp ; tiny sample (x86, x64)
  • quantize.cpp ; JIT optimized quantization by fast division (x86 only)
  • calc.cpp ; assemble and estimate a given polynomial (x86, x64)
  • bf.cpp ; JIT brainfuck (x86, x64)


modified new BSD License


