Add base.arm_crc32_u32 type
On a Raspberry Pi 4 (32-bit armv7l) with -march=native and -mfpu=neon
("native" means "armv8-a+crc+simd"):
wuffs_crc32_ieee_10k/clang9 825MB/s ± 0% 6802MB/s ± 0% +724.00% (p=0.008 n=5+5)
wuffs_crc32_ieee_100k/clang9 837MB/s ± 0% 6199MB/s ± 0% +640.25% (p=0.008 n=5+5)
wuffs_crc32_ieee_10k/gcc8 960MB/s ± 0% 4495MB/s ± 0% +368.35% (p=0.016 n=4+5)
wuffs_crc32_ieee_100k/gcc8 961MB/s ± 0% 4447MB/s ± 0% +363.01% (p=0.016 n=5+4)
mimic_crc32_ieee_10k 638MB/s ± 0% 631MB/s ± 0% -1.17% (p=0.008 n=5+5)
mimic_crc32_ieee_100k 639MB/s ± 0% 639MB/s ± 0% +0.02% (p=0.008 n=5+5)
wuffs_gzip_decode_10k/clang9 83.0MB/s ± 0% 90.7MB/s ± 0% +9.26% (p=0.008 n=5+5)
wuffs_gzip_decode_100k/clang9 100MB/s ± 0% 112MB/s ± 0% +11.91% (p=0.008 n=5+5)
wuffs_gzip_decode_10k/gcc8 86.1MB/s ± 0% 91.9MB/s ± 0% +6.72% (p=0.008 n=5+5)
wuffs_gzip_decode_100k/gcc8 102MB/s ± 0% 112MB/s ± 0% +9.34% (p=0.008 n=5+5)
mimic_gzip_decode_10k 98.8MB/s ± 0% 98.2MB/s ± 0% -0.60% (p=0.008 n=5+5)
mimic_gzip_decode_100k 128MB/s ± 0% 128MB/s ± 0% -0.08% (p=0.008 n=5+5)
wuffs_png_decode_image_19k_8bpp/clang9 67.6MB/s ± 0% 70.8MB/s ± 0% +4.72% (p=0.008 n=5+5)
wuffs_png_decode_image_40k_24bpp/clang9 92.8MB/s ± 0% 98.8MB/s ± 0% +6.49% (p=0.008 n=5+5)
wuffs_png_decode_image_77k_8bpp/clang9 255MB/s ± 0% 269MB/s ± 0% +5.44% (p=0.008 n=5+5)
wuffs_png_decode_image_552k_32bpp_ignore_checksum/clang9 123MB/s ± 0% 123MB/s ± 0% +0.13% (p=0.008 n=5+5)
wuffs_png_decode_image_552k_32bpp_verify_checksum/clang9 110MB/s ± 0% 112MB/s ± 0% +1.46% (p=0.008 n=5+5)
wuffs_png_decode_image_4002k_24bpp/clang9 88.4MB/s ± 0% 92.8MB/s ± 0% +4.90% (p=0.008 n=5+5)
wuffs_png_decode_image_19k_8bpp/gcc8 67.2MB/s ± 0% 70.8MB/s ± 0% +5.37% (p=0.008 n=5+5)
wuffs_png_decode_image_40k_24bpp/gcc8 94.0MB/s ± 0% 98.2MB/s ± 0% +4.45% (p=0.008 n=5+5)
wuffs_png_decode_image_77k_8bpp/gcc8 236MB/s ± 0% 248MB/s ± 0% +4.84% (p=0.008 n=5+5)
wuffs_png_decode_image_552k_32bpp_ignore_checksum/gcc8 114MB/s ± 0% 115MB/s ± 0% +1.11% (p=0.008 n=5+5)
wuffs_png_decode_image_552k_32bpp_verify_checksum/gcc8 102MB/s ± 0% 104MB/s ± 0% +2.17% (p=0.008 n=5+5)
wuffs_png_decode_image_4002k_24bpp/gcc8 88.8MB/s ± 0% 94.3MB/s ± 0% +6.21% (p=0.008 n=5+5)
mimic_png_decode_image_19k_8bpp 43.7MB/s ± 1% 43.4MB/s ± 1% ~ (p=0.095 n=5+5)
mimic_png_decode_image_40k_24bpp 54.6MB/s ± 0% 53.9MB/s ± 0% -1.25% (p=0.008 n=5+5)
mimic_png_decode_image_77k_8bpp 125MB/s ± 0% 124MB/s ± 0% -0.73% (p=0.008 n=5+5)
mimic_png_decode_image_552k_32bpp_ignore_checksum skipped
mimic_png_decode_image_552k_32bpp_verify_checksum 102MB/s ± 0% 101MB/s ± 0% -0.95% (p=0.008 n=5+5)
mimic_png_decode_image_4002k_24bpp 81.3MB/s ± 0% 81.0MB/s ± 0% -0.46% (p=0.016 n=4+5)
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index 6b602db..45dbefe 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -95,6 +95,8 @@
if qid[1].IsNumType() {
return g.writeBuiltinNumType(b, recv, method.Ident(), n.Args(), depth)
+ } else if qid[1].IsBuiltInCPUArch() {
+ return g.writeBuiltinCPUArch(b, recv, method.Ident(), n.Args(), sideEffectsOnly, depth)
} else {
switch qid[1] {
case t.IDIOReader:
@@ -144,8 +146,6 @@
b.writes("&empty_io_buffer")
return nil
}
- case t.IDX86M128I:
- return g.writeBuiltinCPUArch(b, recv, method.Ident(), n.Args(), sideEffectsOnly, depth)
}
}
return errNoSuchBuiltin
@@ -524,8 +524,11 @@
}
const create = "create"
- methodStr := method.Str(g.tm)
- if strings.HasPrefix(methodStr, create) {
+ if methodStr := method.Str(g.tm); methodStr == "value" {
+ return g.writeExpr(b, recv, false, depth)
+ } else if methodStr == create {
+ return g.writeExpr(b, args[0].AsArg().Value(), false, depth)
+ } else if strings.HasPrefix(methodStr, create) {
b.printf("%s(", methodStr[len(create):])
for i, o := range args {
if i > 0 {
@@ -552,6 +555,10 @@
b.writes(after)
}
} else {
+ armCRC32U32 := recv.MType().Eq(typeExprARMCRC32U32)
+ if armCRC32U32 {
+ b.writeb('_')
+ }
b.printf("%s(", methodStr)
if err := g.writeExpr(b, recv, false, depth); err != nil {
return err
@@ -560,7 +567,9 @@
b.writes(", ")
after := ""
v := o.AsArg().Value()
- if !v.MType().IsCPUArchType() {
+ if armCRC32U32 {
+ // No-op.
+ } else if !v.MType().IsCPUArchType() {
b.writes("(int32_t)(")
after = ")"
}
diff --git a/internal/cgen/cgen.go b/internal/cgen/cgen.go
index ee3ee7a..492c517 100644
--- a/internal/cgen/cgen.go
+++ b/internal/cgen/cgen.go
@@ -43,6 +43,7 @@
maxInt64 = big.NewInt((1 << 63) - 1)
+ typeExprARMCRC32U32 = a.NewTypeExpr(0, t.IDBase, t.IDARMCRC32U32, nil, nil, nil)
typeExprPixelSwizzler = a.NewTypeExpr(0, t.IDBase, t.IDPixelSwizzler, nil, nil, nil)
typeExprUtility = a.NewTypeExpr(0, t.IDBase, t.IDUtility, nil, nil, nil)
)
diff --git a/internal/cgen/expr.go b/internal/cgen/expr.go
index 3747fa4..c280ced 100644
--- a/internal/cgen/expr.go
+++ b/internal/cgen/expr.go
@@ -577,7 +577,8 @@
t.IDTokenReader: "wuffs_base__token_buffer*",
t.IDTokenWriter: "wuffs_base__token_buffer*",
- t.IDX86M128I: "__m128i",
+ t.IDARMCRC32U32: "uint32_t",
+ t.IDX86M128I: "__m128i",
}
const noSuchCOperator = " no_such_C_operator "
diff --git a/internal/cgen/var.go b/internal/cgen/var.go
index f59e651..95ddfcf 100644
--- a/internal/cgen/var.go
+++ b/internal/cgen/var.go
@@ -351,6 +351,8 @@
b.writes(" = wuffs_base__make_status(NULL);\n")
} else if typ.IsIOType() {
b.printf(" = &%s%s;\n", uPrefix, name)
+ } else if typ.Eq(typeExprARMCRC32U32) {
+ b.writes(" = 0;\n")
} else {
b.writes(" = {0};\n")
}
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index 688201d..25d7cf7 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -245,6 +245,7 @@
// ----
+ "arm_crc32_u32",
"x86_m128i",
}
@@ -531,6 +532,16 @@
"pixel_swizzler.swizzle_interleaved_transparent_black!(" +
"dst: slice u8, dst_palette: slice u8, num_pixels: u64) u64",
+ // ---- arm_crc32_u32
+
+ "arm_crc32_u32.create(a: u32) arm_crc32_u32",
+ "arm_crc32_u32.value() u32",
+
+ "arm_crc32_u32._crc32b(b: u8) arm_crc32_u32",
+ "arm_crc32_u32._crc32h(b: u16) arm_crc32_u32",
+ "arm_crc32_u32._crc32w(b: u32) arm_crc32_u32",
+ "arm_crc32_u32._crc32d(b: u64) arm_crc32_u32",
+
// ---- x86_m128i
"x86_m128i.load_u32!(a: u32)",
diff --git a/lang/check/resolve.go b/lang/check/resolve.go
index c1e2c31..611321b 100644
--- a/lang/check/resolve.go
+++ b/lang/check/resolve.go
@@ -75,7 +75,8 @@
typeExprDecodeFrameOptions = a.NewTypeExpr(0, t.IDBase, t.IDDecodeFrameOptions, nil, nil, nil)
- typeExprX86M128I = a.NewTypeExpr(0, t.IDBase, t.IDX86M128I, nil, nil, nil)
+ typeExprARMCRC32U32 = a.NewTypeExpr(0, t.IDBase, t.IDARMCRC32U32, nil, nil, nil)
+ typeExprX86M128I = a.NewTypeExpr(0, t.IDBase, t.IDX86M128I, nil, nil, nil)
typeExprSliceU8 = a.NewTypeExpr(t.IDSlice, 0, 0, nil, nil, typeExprU8)
typeExprTableU8 = a.NewTypeExpr(t.IDTable, 0, 0, nil, nil, typeExprU8)
@@ -125,7 +126,8 @@
t.IDDecodeFrameOptions: typeExprDecodeFrameOptions,
- t.IDX86M128I: typeExprX86M128I,
+ t.IDARMCRC32U32: typeExprARMCRC32U32,
+ t.IDX86M128I: typeExprX86M128I,
}
func (c *Checker) parseBuiltInFuncs(m map[t.QQID]*a.Func, ss []string) error {
diff --git a/lang/check/type.go b/lang/check/type.go
index 90a2875..f444972 100644
--- a/lang/check/type.go
+++ b/lang/check/type.go
@@ -25,7 +25,9 @@
type cpuArchBits uint32
const (
- cpuArchBitsX86SSE42 = cpuArchBits(0x00000001)
+ cpuArchBitsARMCRC32 = cpuArchBits(0x00000001)
+ cpuArchBitsARMNeon = cpuArchBits(0x00000002)
+ cpuArchBitsX86SSE42 = cpuArchBits(0x00000004)
)
func calcCPUArchBits(n *a.Func) (ret cpuArchBits) {
@@ -35,6 +37,10 @@
continue
}
switch o.Condition().RHS().AsExpr().Ident() {
+ case t.IDARMCRC32:
+ ret |= cpuArchBitsARMCRC32
+ case t.IDARMNeon:
+ ret |= cpuArchBitsARMNeon
case t.IDX86SSE42:
ret |= cpuArchBitsX86SSE42
}
@@ -46,6 +52,8 @@
if qid := typ.Innermost().QID(); qid[0] == t.IDBase {
need := cpuArchBits(0)
switch qid[1] {
+ case t.IDARMCRC32U32:
+ need = cpuArchBitsARMCRC32
case t.IDX86M128I:
need = cpuArchBitsX86SSE42
}
diff --git a/lang/token/list.go b/lang/token/list.go
index 6231aba..22664d0 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -668,6 +668,8 @@
IDARMCRC32 = ID(0x300)
IDARMNeon = ID(0x301)
+ IDARMCRC32U32 = ID(0x308)
+
IDX86SSE42 = ID(0x320)
IDX86AVX2 = ID(0x321)
@@ -1087,6 +1089,8 @@
IDARMCRC32: "arm_crc32",
IDARMNeon: "arm_neon",
+ IDARMCRC32U32: "arm_crc32_u32",
+
IDX86SSE42: "x86_sse42",
IDX86AVX2: "x86_avx2",
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index e403bc3..09a2379 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -21945,87 +21945,58 @@
v_s = (4294967295 ^ self->private_impl.f_state);
while ((((uint64_t)(a_x.len)) > 0) && ((15 & ((uint32_t)(0xFFF & (uintptr_t)(a_x.ptr)))) != 0)) {
- v_s = (WUFFS_CRC32__IEEE_TABLE[0][(((uint8_t)((v_s & 255))) ^ a_x.ptr[0])] ^ (v_s >> 8));
+ v_s = __crc32b(v_s, a_x.ptr[0]);
a_x = wuffs_base__slice_u8__subslice_i(a_x, 1);
}
{
wuffs_base__slice_u8 i_slice_p = a_x;
v_p.ptr = i_slice_p.ptr;
- v_p.len = 16;
- uint8_t* i_end0_p = v_p.ptr + (((i_slice_p.len - (size_t)(v_p.ptr - i_slice_p.ptr)) / 32) * 32);
+ v_p.len = 8;
+ uint8_t* i_end0_p = v_p.ptr + (((i_slice_p.len - (size_t)(v_p.ptr - i_slice_p.ptr)) / 128) * 128);
while (v_p.ptr < i_end0_p) {
- v_s ^= ((((uint32_t)(v_p.ptr[0])) << 0) |
- (((uint32_t)(v_p.ptr[1])) << 8) |
- (((uint32_t)(v_p.ptr[2])) << 16) |
- (((uint32_t)(v_p.ptr[3])) << 24));
- v_s = (WUFFS_CRC32__IEEE_TABLE[0][v_p.ptr[15]] ^
- WUFFS_CRC32__IEEE_TABLE[1][v_p.ptr[14]] ^
- WUFFS_CRC32__IEEE_TABLE[2][v_p.ptr[13]] ^
- WUFFS_CRC32__IEEE_TABLE[3][v_p.ptr[12]] ^
- WUFFS_CRC32__IEEE_TABLE[4][v_p.ptr[11]] ^
- WUFFS_CRC32__IEEE_TABLE[5][v_p.ptr[10]] ^
- WUFFS_CRC32__IEEE_TABLE[6][v_p.ptr[9]] ^
- WUFFS_CRC32__IEEE_TABLE[7][v_p.ptr[8]] ^
- WUFFS_CRC32__IEEE_TABLE[8][v_p.ptr[7]] ^
- WUFFS_CRC32__IEEE_TABLE[9][v_p.ptr[6]] ^
- WUFFS_CRC32__IEEE_TABLE[10][v_p.ptr[5]] ^
- WUFFS_CRC32__IEEE_TABLE[11][v_p.ptr[4]] ^
- WUFFS_CRC32__IEEE_TABLE[12][(255 & (v_s >> 24))] ^
- WUFFS_CRC32__IEEE_TABLE[13][(255 & (v_s >> 16))] ^
- WUFFS_CRC32__IEEE_TABLE[14][(255 & (v_s >> 8))] ^
- WUFFS_CRC32__IEEE_TABLE[15][(255 & (v_s >> 0))]);
- v_p.ptr += 16;
- v_s ^= ((((uint32_t)(v_p.ptr[0])) << 0) |
- (((uint32_t)(v_p.ptr[1])) << 8) |
- (((uint32_t)(v_p.ptr[2])) << 16) |
- (((uint32_t)(v_p.ptr[3])) << 24));
- v_s = (WUFFS_CRC32__IEEE_TABLE[0][v_p.ptr[15]] ^
- WUFFS_CRC32__IEEE_TABLE[1][v_p.ptr[14]] ^
- WUFFS_CRC32__IEEE_TABLE[2][v_p.ptr[13]] ^
- WUFFS_CRC32__IEEE_TABLE[3][v_p.ptr[12]] ^
- WUFFS_CRC32__IEEE_TABLE[4][v_p.ptr[11]] ^
- WUFFS_CRC32__IEEE_TABLE[5][v_p.ptr[10]] ^
- WUFFS_CRC32__IEEE_TABLE[6][v_p.ptr[9]] ^
- WUFFS_CRC32__IEEE_TABLE[7][v_p.ptr[8]] ^
- WUFFS_CRC32__IEEE_TABLE[8][v_p.ptr[7]] ^
- WUFFS_CRC32__IEEE_TABLE[9][v_p.ptr[6]] ^
- WUFFS_CRC32__IEEE_TABLE[10][v_p.ptr[5]] ^
- WUFFS_CRC32__IEEE_TABLE[11][v_p.ptr[4]] ^
- WUFFS_CRC32__IEEE_TABLE[12][(255 & (v_s >> 24))] ^
- WUFFS_CRC32__IEEE_TABLE[13][(255 & (v_s >> 16))] ^
- WUFFS_CRC32__IEEE_TABLE[14][(255 & (v_s >> 8))] ^
- WUFFS_CRC32__IEEE_TABLE[15][(255 & (v_s >> 0))]);
- v_p.ptr += 16;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
}
- v_p.len = 16;
- uint8_t* i_end1_p = v_p.ptr + (((i_slice_p.len - (size_t)(v_p.ptr - i_slice_p.ptr)) / 16) * 16);
+ v_p.len = 8;
+ uint8_t* i_end1_p = v_p.ptr + (((i_slice_p.len - (size_t)(v_p.ptr - i_slice_p.ptr)) / 8) * 8);
while (v_p.ptr < i_end1_p) {
- v_s ^= ((((uint32_t)(v_p.ptr[0])) << 0) |
- (((uint32_t)(v_p.ptr[1])) << 8) |
- (((uint32_t)(v_p.ptr[2])) << 16) |
- (((uint32_t)(v_p.ptr[3])) << 24));
- v_s = (WUFFS_CRC32__IEEE_TABLE[0][v_p.ptr[15]] ^
- WUFFS_CRC32__IEEE_TABLE[1][v_p.ptr[14]] ^
- WUFFS_CRC32__IEEE_TABLE[2][v_p.ptr[13]] ^
- WUFFS_CRC32__IEEE_TABLE[3][v_p.ptr[12]] ^
- WUFFS_CRC32__IEEE_TABLE[4][v_p.ptr[11]] ^
- WUFFS_CRC32__IEEE_TABLE[5][v_p.ptr[10]] ^
- WUFFS_CRC32__IEEE_TABLE[6][v_p.ptr[9]] ^
- WUFFS_CRC32__IEEE_TABLE[7][v_p.ptr[8]] ^
- WUFFS_CRC32__IEEE_TABLE[8][v_p.ptr[7]] ^
- WUFFS_CRC32__IEEE_TABLE[9][v_p.ptr[6]] ^
- WUFFS_CRC32__IEEE_TABLE[10][v_p.ptr[5]] ^
- WUFFS_CRC32__IEEE_TABLE[11][v_p.ptr[4]] ^
- WUFFS_CRC32__IEEE_TABLE[12][(255 & (v_s >> 24))] ^
- WUFFS_CRC32__IEEE_TABLE[13][(255 & (v_s >> 16))] ^
- WUFFS_CRC32__IEEE_TABLE[14][(255 & (v_s >> 8))] ^
- WUFFS_CRC32__IEEE_TABLE[15][(255 & (v_s >> 0))]);
- v_p.ptr += 16;
+ v_s = __crc32d(v_s, wuffs_base__peek_u64le__no_bounds_check(v_p.ptr));
+ v_p.ptr += 8;
}
v_p.len = 1;
uint8_t* i_end2_p = i_slice_p.ptr + i_slice_p.len;
while (v_p.ptr < i_end2_p) {
- v_s = (WUFFS_CRC32__IEEE_TABLE[0][(((uint8_t)((v_s & 255))) ^ v_p.ptr[0])] ^ (v_s >> 8));
+ v_s = __crc32b(v_s, v_p.ptr[0]);
v_p.ptr += 1;
}
v_p.len = 0;
diff --git a/std/crc32/common_crc32.wuffs b/std/crc32/common_crc32.wuffs
index 02d6293..ee3470e 100644
--- a/std/crc32/common_crc32.wuffs
+++ b/std/crc32/common_crc32.wuffs
@@ -72,45 +72,24 @@
pri func ieee_hasher.up_arm_crc32!(x: slice base.u8),
choose cpu_arch >= arm_crc32,
{
- var s : base.u32
+ var s : base.arm_crc32_u32
var p : slice base.u8
- s = 0xFFFF_FFFF ^ this.state
+ s = s.create(a: 0xFFFF_FFFF ^ this.state)
// Align to a 16-byte boundary.
while (args.x.length() > 0) and ((15 & args.x.uintptr_low_12_bits()) <> 0) {
- s = IEEE_TABLE[0][((s & 0xFF) as base.u8) ^ args.x[0]] ^ (s >> 8)
+ s = s._crc32b(b: args.x[0])
args.x = args.x[1 ..]
} endwhile
- // See "Multi-Byte Lookup Tables" in std/crc32/README.md for more detail on
- // the slicing-by-M algorithm. We use an M of 16.
- iterate (p = args.x)(length: 16, advance: 16, unroll: 2) {
- s ^= ((p[0x00] as base.u32) << 0) |
- ((p[0x01] as base.u32) << 8) |
- ((p[0x02] as base.u32) << 16) |
- ((p[0x03] as base.u32) << 24)
- s = IEEE_TABLE[0x00][p[0x0F]] ^
- IEEE_TABLE[0x01][p[0x0E]] ^
- IEEE_TABLE[0x02][p[0x0D]] ^
- IEEE_TABLE[0x03][p[0x0C]] ^
- IEEE_TABLE[0x04][p[0x0B]] ^
- IEEE_TABLE[0x05][p[0x0A]] ^
- IEEE_TABLE[0x06][p[0x09]] ^
- IEEE_TABLE[0x07][p[0x08]] ^
- IEEE_TABLE[0x08][p[0x07]] ^
- IEEE_TABLE[0x09][p[0x06]] ^
- IEEE_TABLE[0x0A][p[0x05]] ^
- IEEE_TABLE[0x0B][p[0x04]] ^
- IEEE_TABLE[0x0C][0xFF & (s >> 24)] ^
- IEEE_TABLE[0x0D][0xFF & (s >> 16)] ^
- IEEE_TABLE[0x0E][0xFF & (s >> 8)] ^
- IEEE_TABLE[0x0F][0xFF & (s >> 0)]
+ iterate (p = args.x)(length: 8, advance: 8, unroll: 16) {
+ s = s._crc32d(b: p.peek_u64le())
} else (length: 1, advance: 1, unroll: 1) {
- s = IEEE_TABLE[0][((s & 0xFF) as base.u8) ^ p[0]] ^ (s >> 8)
+ s = s._crc32b(b: p[0])
}
- this.state = 0xFFFF_FFFF ^ s
+ this.state = 0xFFFF_FFFF ^ s.value()
}
// The table below was created by script/print-crc32-magic-numbers.go.