Merge branch 'dev'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f0ffc06..8978c3b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 2.6...3.0.2)
 
-project(xbyak LANGUAGES CXX VERSION 6.00)
+project(xbyak LANGUAGES CXX VERSION 6.01)
 
 file(GLOB headers xbyak/*.h)
 
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 2ad8c59..bb83983 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -665,6 +665,7 @@
 			{ "cmpsd", 0xA7 },
 			{ "endbr32", 0xF3, 0x0F, 0x1E, 0xFB },
 			{ "endbr64", 0xF3, 0x0F, 0x1E, 0xFA },
+			{ "hlt", 0xF4 },
 			{ "int3", 0xCC },
 			{ "scasb", 0xAE },
 			{ "scasw", 0x66, 0xAF },
@@ -1044,6 +1045,7 @@
 		puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }");
 		puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }");
 		puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }");
+		puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }");
 
 		puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }");
 		puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }");
diff --git a/meson.build b/meson.build
index c699787..a1e69a5 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
 project(
 	'xbyak',
 	'cpp',
-	version: '6.00',
+	version: '6.01',
 	license: 'BSD-3-Clause',
 	default_options: 'b_ndebug=if-release'
 )
diff --git a/readme.md b/readme.md
index 6c9dbc4..4c78f7b 100644
--- a/readme.md
+++ b/readme.md
@@ -1,6 +1,6 @@
 [![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml)
 
-# Xbyak 6.00 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+# Xbyak 6.01 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
 
 ## Abstract
 
@@ -19,6 +19,7 @@
 If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
 
 ### News
+- add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump.
 - vnni instructions such as vpdpbusd supports vex encoding.
 - (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
 - (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
@@ -330,6 +331,24 @@
   mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
 ```
 
+## Far jump
+
+Use `word|dword|qword` instead of `ptr` to specify the address size.
+
+### 32 bit mode
+```
+jmp(word[eax], T_FAR);  // jmp m16:16(FF /5)
+jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
+```
+
+### 64 bit mode
+```
+jmp(word[rax], T_FAR);  // jmp m16:16(FF /5)
+jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
+jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
+```
+The same applies to `call`.
+
 ## Code size
 The default max code size is 4096 bytes.
 Specify the size in constructor of `CodeGenerator()` if necessary.
diff --git a/readme.txt b/readme.txt
index e526d21..460da05 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
 

-    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.00

+    C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.01

 

 -----------------------------------------------------------------------------

 ◎概要

@@ -277,6 +277,24 @@
 assert(label.getAddress(), getCurr());

 ```

 

+4. farジャンプ

+

+`jmp(mem, T_FAR)`, `call(mem, T_FAR)`, `retf()`をサポートします。

+サイズを明示するために`ptr`の代わりに`word|dword|qword`を利用してください。

+

+32bit

+```

+jmp(word[eax], T_FAR);  // jmp m16:16(FF /5)

+jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)

+```

+

+64bit

+```

+jmp(word[rax], T_FAR);  // jmp m16:16(FF /5)

+jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)

+jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)

+```

+

 ・Xbyak::CodeGenerator()コンストラクタインタフェース

 

 @param maxSize [in] コード生成最大サイズ(デフォルト4096byte)

diff --git a/test/jmp.cpp b/test/jmp.cpp
index e9192b2..67882c7 100644
--- a/test/jmp.cpp
+++ b/test/jmp.cpp
@@ -1383,3 +1383,18 @@
 		}
 	}
 }
+
+CYBOZU_TEST_AUTO(ambiguousFarJmp)
+{
+	struct Code : Xbyak::CodeGenerator {
+#ifdef XBYAK32
+		void genJmp() { jmp(ptr[eax], T_FAR); }
+		void genCall() { call(ptr[eax], T_FAR); }
+#else
+		void genJmp() { jmp(ptr[rax], T_FAR); }
+		void genCall() { call(ptr[rax], T_FAR); }
+#endif
+	} code;
+	CYBOZU_TEST_EXCEPTION(code.genJmp(), std::exception);
+	CYBOZU_TEST_EXCEPTION(code.genCall(), std::exception);
+}
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 6fd875a..8af0670 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -512,6 +512,7 @@
 			"cmpsb",
 			"cmpsw",
 			"cmpsd",
+			"hlt",
 			"int3",
 			"leave",
 			"lodsb",
@@ -700,6 +701,24 @@
 #endif
 #endif
 	}
+	void putFarJmp() const
+	{
+#ifdef XBYAK64
+		put("jmp", "word[rax],T_FAR", "far word [rax]");
+		put("jmp", "dword[rax],T_FAR", "far dword [rax]");
+		put("jmp", "qword[rax],T_FAR", "far qword [rax]");
+
+		put("call", "word[rax],T_FAR", "far word [rax]");
+		put("call", "dword[rax],T_FAR", "far dword [rax]");
+		put("call", "qword[rax],T_FAR", "far qword [rax]");
+#else
+		put("jmp", "dword[eax],T_FAR", "far dword [eax]");
+		put("jmp", "word[eax],T_FAR", "far word [eax]");
+
+		put("call", "dword[eax],T_FAR", "far dword [eax]");
+		put("call", "word[eax],T_FAR", "far word [eax]");
+#endif
+	}
 	void putMMX1() const
 	{
 		// emms etc
@@ -1257,6 +1276,9 @@
 			const char *p = "ret";
 			put(p);
 			put(p, IMM);
+			p = "retf";
+			put(p);
+			put(p, IMM);
 			p = "mov";
 			put(p, EAX|REG32|MEM|MEM_ONLY_DISP, REG32|EAX);
 			put(p, REG64|MEM|MEM_ONLY_DISP, REG64|RAX);
@@ -2529,6 +2551,7 @@
 #else // USE_AVX
 
 		putJmp();
+		putFarJmp();
 
 #ifdef USE_YASM
 
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 41b5cc0..637ea12 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -142,7 +142,7 @@
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x6000 /* 0xABCD = A.BC(D) */
+	VERSION = 0x6010 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -1574,6 +1574,7 @@
 	enum LabelType {
 		T_SHORT,
 		T_NEAR,
+		T_FAR, // far jump
 		T_AUTO // T_SHORT if possible
 	};
 private:
@@ -1887,6 +1888,7 @@
 	template<class T>
 	void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
 	{
+		if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
 		if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
 		size_t offset = 0;
 		if (labelMgr_.getOffset(&offset, label)) { /* label exists */
@@ -1907,6 +1909,7 @@
 	}
 	void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
 	{
+		if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
 		if (isAutoGrow()) {
 			if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
 			if (size_ + 16 >= maxSize_) growMemory();
@@ -1919,6 +1922,16 @@
 		}
 
 	}
+	void opJmpOp(const Operand& op, LabelType type, int ext)
+	{
+		const int bit = 16|i32e;
+		if (type == T_FAR) {
+			if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED)
+			opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false);
+		} else {
+			opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true);
+		}
+	}
 	// reg is reg field of ModRM
 	// immSize is the size for immediate value
 	// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
@@ -2474,13 +2487,13 @@
 
 	// set default type of `jmp` of undefined label to T_NEAR
 	void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
-	void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
+	void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); }
 	void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
 	void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
 	void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
 	void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
 
-	void call(const Operand& op) { opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true); }
+	void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); }
 	// call(string label), not const std::string&
 	void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
 	void call(const char *label) { call(std::string(label)); }
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 72bcb22..09f2dcf 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "6.00"; }
+const char *getVersionString() const { return "6.01"; }
 void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -323,6 +323,7 @@
 void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
 void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
+void hlt() { db(0xF4); }
 void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
 void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
 void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
@@ -719,6 +720,7 @@
 void repnz() { db(0xF2); }
 void repz() { db(0xF3); }
 void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
+void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }
 void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
 void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
 void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); }