Merge branch 'dev'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cbb81f1..e61de1f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)
-project(xbyak LANGUAGES CXX VERSION 7.01)
+project(xbyak LANGUAGES CXX VERSION 7.02)
file(GLOB headers xbyak/*.h)
diff --git a/doc/changelog.md b/doc/changelog.md
index 59932c3..aec29a4 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
# History
+* 2023/Dec/20 ver 7.02 SHA* support APX
* 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX
* 2023/Dec/01 ver 7.00 support APX
* 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 892fdbc..d1a0bd7 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1402,14 +1402,6 @@
{ 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 },
{ 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 },
-
- { 0xCC, "sha1rnds4", T_0F3A, true, 1 },
- { 0xC8, "sha1nexte", T_0F38, false, 1 },
- { 0xC9, "sha1msg1", T_0F38, false, 1 },
- { 0xCA, "sha1msg2", T_0F38, false, 1 },
- { 0xCB, "sha256rnds2", T_0F38, false, 1 },
- { 0xCC, "sha256msg1", T_0F38, false, 1 },
- { 0xCD, "sha256msg2", T_0F38, false, 1 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@@ -1425,6 +1417,26 @@
}
}
}
+ // sha
+ {
+ const struct Tbl {
+ uint8_t code;
+ uint8_t code2;
+ const char *name;
+ } tbl[] = {
+ { 0xC8, 0xD8, "sha1nexte" },
+ { 0xC9, 0xD9, "sha1msg1" },
+ { 0xCA, 0xDA, "sha1msg2" },
+ { 0xCB, 0xDB, "sha256rnds2" },
+ { 0xCC, 0xDC, "sha256msg1" },
+ { 0xCD, 0xDD, "sha256msg2" },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ printf("void %s(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0x%02X, T_MUST_EVEX, 0x%02X); }\n", p->name, p->code, p->code2);
+ }
+ puts("void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }");
+ }
// (m, x), (m, y)
{
const struct Tbl {
@@ -2036,9 +2048,9 @@
std::string s1 = type2String(p->type1);
std::string s2 = type2String(p->type2);
if (p->idx == 8) {
- printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code);
+ printf("void %s(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, s1.c_str(), p->code, s2.c_str(), p->code);
} else {
- printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code);
+ printf("void %s(const Address& addr) { opSSE_APX(xmm%d, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), p->code, s2.c_str(), p->code);
}
}
}
diff --git a/meson.build b/meson.build
index a9f354e..edc97cd 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
- version: '7.01',
+ version: '7.02',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
diff --git a/readme.md b/readme.md
index 14ab86c..0de9024 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
-# Xbyak 7.01 [![Badge Build]][Build Status]
+# Xbyak 7.02 [![Badge Build]][Build Status]
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
diff --git a/readme.txt b/readme.txt
index 7d82356..08e9deb 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.01
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.02
-----------------------------------------------------------------------------
◎概要
@@ -404,6 +404,9 @@
-----------------------------------------------------------------------------
◎履歴
+2023/12/20 ver 7.02 SHA*のAPX対応
+2023/12/19 ver 7.01 AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE対応 APX10/APX判定対応
+2023/12/01 ver 7.00 APX対応
2023/08/07 ver 6.73 sha512/sm3/sm4/avx-vnni-int16追加
2023/08/02 ver 6.72 xabort, xbegin, xend追加
2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。
diff --git a/test/apx.cpp b/test/apx.cpp
index b2675b8..207389c 100644
--- a/test/apx.cpp
+++ b/test/apx.cpp
@@ -1870,3 +1870,31 @@
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
+CYBOZU_TEST_AUTO(sha)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ sha1msg1(xmm15, ptr [r30+r29*8+0x12]);
+ sha1msg2(xmm15, ptr [r30+r29*8+0x12]);
+ sha1nexte(xmm15, ptr [r30+r29*8+0x12]);
+ sha256msg1(xmm15, ptr [r30+r29*8+0x12]);
+ sha256msg2(xmm15, ptr [r30+r29*8+0x12]);
+ sha256rnds2(xmm15, ptr [r30+r29*8+0x12]);
+ sha1rnds4(xmm15, ptr [r30+r29*8+0x12], 0x23);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0x1c, 0x78, 0x08, 0xd9, 0x7c, 0xee, 0x12,
+ 0x62, 0x1c, 0x78, 0x08, 0xda, 0x7c, 0xee, 0x12,
+ 0x62, 0x1c, 0x78, 0x08, 0xd8, 0x7c, 0xee, 0x12,
+ 0x62, 0x1c, 0x78, 0x08, 0xdc, 0x7c, 0xee, 0x12,
+ 0x62, 0x1c, 0x78, 0x08, 0xdd, 0x7c, 0xee, 0x12,
+ 0x62, 0x1c, 0x78, 0x08, 0xdb, 0x7c, 0xee, 0x12,
+ 0x62, 0x1c, 0x78, 0x08, 0xd4, 0x7c, 0xee, 0x12, 0x23,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 8c633ca..c99e809 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -155,7 +155,7 @@
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x7010 /* 0xABCD = A.BC(.D) */
+ VERSION = 0x7020 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -2738,15 +2738,6 @@
if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
}
- void opAESKL(const Xmm *x, const Address& addr, uint64_t type1, uint64_t type2, uint8_t code)
- {
- if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_INVALID_REG_IDX)
- if (addr.hasRex2()) {
- opROO(Reg(), addr, *x, type2, code);
- return;
- }
- opRO(*x, addr, type1, code);
- }
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
{
if (r1.getIdx() < 8 && r2.getIdx() < 8) {
@@ -2755,6 +2746,14 @@
}
opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2);
}
+ void opSSE_APX(const Xmm& x, const Operand& op, uint64_t type1, uint8_t code1, uint64_t type2, uint8_t code2, int imm = NONE)
+ {
+ if (x.getIdx() <= 15 && op.hasRex2() && opROO(Reg(), op, x, type2, code2, imm != NONE ? 1 : 0)) {
+ if (imm != NONE) db(imm);
+ return;
+ }
+ opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
+ }
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
@@ -3139,6 +3138,10 @@
// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
+ void sha1msg12(const Xmm& x, const Operand& op)
+ {
+ opROO(Reg(), op, x, T_MUST_EVEX, 0xD9);
+ }
/*
use single byte nop if useMultiByteNop = false
*/
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 196f5d4..d863d46 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "7.01"; }
+const char *getVersionString() const { return "7.02"; }
void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC); }
void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
@@ -988,13 +988,13 @@
void sets(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 8)) return; opRext(op, 8, 0, T_0F, 0x90 | 8); }//-V524
void setz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
-void sha1msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC9, isXMM_XMMorMEM, NONE); }
-void sha1msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCA, isXMM_XMMorMEM, NONE); }
-void sha1nexte(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC8, isXMM_XMMorMEM, NONE); }
-void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_0F3A, 0xCC, isXMM_XMMorMEM, imm); }
-void sha256msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCC, isXMM_XMMorMEM, NONE); }
-void sha256msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCD, isXMM_XMMorMEM, NONE); }
-void sha256rnds2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCB, isXMM_XMMorMEM, NONE); }
+void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); }
+void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); }
+void sha1nexte(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC8, T_MUST_EVEX, 0xD8); }
+void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }
+void sha256msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCC, T_MUST_EVEX, 0xDC); }
+void sha256msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCD, T_MUST_EVEX, 0xDD); }
+void sha256rnds2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCB, T_MUST_EVEX, 0xDB); }
void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); }
void shl(const Operand& op, int imm) { opShift(op, imm, 12); }
void shl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); }
@@ -1926,14 +1926,14 @@
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); }
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); }
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
-void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); }
-void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); }
-void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
-void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
-void aesenc128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDC); }
-void aesenc256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDE); }
-void aesencwide128kl(const Address& addr) { opAESKL(&xmm0, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
-void aesencwide256kl(const Address& addr) { opAESKL(&xmm2, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
+void aesdec128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDD, T_F3|T_MUST_EVEX, 0xDD); }
+void aesdec256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDF, T_F3|T_MUST_EVEX, 0xDF); }
+void aesdecwide128kl(const Address& addr) { opSSE_APX(xmm1, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
+void aesdecwide256kl(const Address& addr) { opSSE_APX(xmm3, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
+void aesenc128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDC, T_F3|T_MUST_EVEX, 0xDC); }
+void aesenc256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDE, T_F3|T_MUST_EVEX, 0xDE); }
+void aesencwide128kl(const Address& addr) { opSSE_APX(xmm0, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
+void aesencwide256kl(const Address& addr) { opSSE_APX(xmm2, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }
void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }