Tweak builtin.SliceU8Funcs peek method names
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index d31af4c..9965cff 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -593,7 +593,11 @@
}
if (t.IDPeekU8 <= method) && (method <= t.IDPeekU64LE) {
- b.printf("wuffs_base__%s__no_bounds_check(", method.Str(g.tm))
+ s := method.Str(g.tm)
+ if i := strings.Index(s, "_as_"); i >= 0 {
+ s = s[:i]
+ }
+ b.printf("wuffs_base__%s__no_bounds_check(", s)
if err := g.writeExpr(b, recv, depth); err != nil {
return err
}
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index e117e68..b7d713a 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -612,16 +612,16 @@
"GENERIC T1.peek_u8() u8",
"GENERIC T1.peek_u16be() u16",
"GENERIC T1.peek_u16le() u16",
- "GENERIC T1.peek_u24be() u32",
- "GENERIC T1.peek_u24le() u32",
+ "GENERIC T1.peek_u24be_as_u32() u32",
+ "GENERIC T1.peek_u24le_as_u32() u32",
"GENERIC T1.peek_u32be() u32",
"GENERIC T1.peek_u32le() u32",
- "GENERIC T1.peek_u40be() u64",
- "GENERIC T1.peek_u40le() u64",
- "GENERIC T1.peek_u48be() u64",
- "GENERIC T1.peek_u48le() u64",
- "GENERIC T1.peek_u56be() u64",
- "GENERIC T1.peek_u56le() u64",
+ "GENERIC T1.peek_u40be_as_u64() u64",
+ "GENERIC T1.peek_u40le_as_u64() u64",
+ "GENERIC T1.peek_u48be_as_u64() u64",
+ "GENERIC T1.peek_u48le_as_u64() u64",
+ "GENERIC T1.peek_u56be_as_u64() u64",
+ "GENERIC T1.peek_u56le_as_u64() u64",
"GENERIC T1.peek_u64be() u64",
"GENERIC T1.peek_u64le() u64",
diff --git a/std/png/decode_filter_sse128.wuffs b/std/png/decode_filter_sse128.wuffs
index 746afcd..7858541 100644
--- a/std/png/decode_filter_sse128.wuffs
+++ b/std/png/decode_filter_sse128.wuffs
@@ -16,6 +16,28 @@
// Filter 1: Sub.
+// This (filter = 1, distance = 3) implementation doesn't actually bench faster
+// than the non-SIMD one.
+//
+// pri func decoder.filter_1_distance_3_sse128!(curr: slice base.u8),
+// choose cpu_arch >= sse128,
+// {
+// var c : slice base.u8
+// var x128 : base.sse128_i
+// var a128 : base.sse128_i
+//
+// iterate (c = args.curr)(length: 4, advance: 3, unroll: 1) {
+// x128.load_u32!(a: c.peek_u32le())
+// x128 = x128._mm_add_epi8!(b: a128)
+// a128 = x128
+// c.poke_u24le!(a: x128.truncate_u32())
+// } else (length: 3, advance: 3, unroll: 1) {
+// x128.load_u32!(a: c.peek_u24le_as_u32())
+// x128 = x128._mm_add_epi8!(b: a128)
+// c.poke_u24le!(a: x128.truncate_u32())
+// }
+// }
+
pri func decoder.filter_1_distance_4_sse128!(curr: slice base.u8),
choose cpu_arch >= sse128,
{