Add slice base.u8 peek/poke methods
wuffs_png_decode_filter_1_sub/clang9 4.59GB/s ± 0% 4.59GB/s ± 0% ~ (p=0.151 n=5+5)
wuffs_png_decode_filter_1_sub/gcc10 1.85GB/s ± 0% 4.11GB/s ± 0% +122.24% (p=0.016 n=5+4)
diff --git a/doc/changelog.md b/doc/changelog.md
index 4ffedf1..3138787 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -19,6 +19,7 @@
- Added `example/json-to-cbor`.
- Added `example/jsonfindptrs`.
- Added `example/jsonptr`.
+- Added `slice base.u8 peek/poke` methods.
- Added `std/bmp`.
- Added `std/cbor`.
- Added `std/gif.config_decoder`.
diff --git a/internal/cgen/base/fundamental-public.h b/internal/cgen/base/fundamental-public.h
index cff5fcf..a36cf70 100644
--- a/internal/cgen/base/fundamental-public.h
+++ b/internal/cgen/base/fundamental-public.h
@@ -718,8 +718,14 @@
static inline void //
wuffs_base__poke_u16le__no_bounds_check(uint8_t* p, uint16_t x) {
+#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
+ // This seems to perform better on gcc 10 (but not clang 9). Clang also
+ // defines "__GNUC__".
+ memcpy(p, &x, 2);
+#else
p[0] = (uint8_t)(x >> 0);
p[1] = (uint8_t)(x >> 8);
+#endif
}
static inline void //
@@ -746,10 +752,16 @@
static inline void //
wuffs_base__poke_u32le__no_bounds_check(uint8_t* p, uint32_t x) {
+#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
+ // This seems to perform better on gcc 10 (but not clang 9). Clang also
+ // defines "__GNUC__".
+ memcpy(p, &x, 4);
+#else
p[0] = (uint8_t)(x >> 0);
p[1] = (uint8_t)(x >> 8);
p[2] = (uint8_t)(x >> 16);
p[3] = (uint8_t)(x >> 24);
+#endif
}
static inline void //
@@ -826,6 +838,11 @@
static inline void //
wuffs_base__poke_u64le__no_bounds_check(uint8_t* p, uint64_t x) {
+#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
+ // This seems to perform better on gcc 10 (but not clang 9). Clang also
+ // defines "__GNUC__".
+ memcpy(p, &x, 8);
+#else
p[0] = (uint8_t)(x >> 0);
p[1] = (uint8_t)(x >> 8);
p[2] = (uint8_t)(x >> 16);
@@ -834,6 +851,7 @@
p[5] = (uint8_t)(x >> 40);
p[6] = (uint8_t)(x >> 48);
p[7] = (uint8_t)(x >> 56);
+#endif
}
// --------
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index 48ebc82..d31af4c 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -591,6 +591,29 @@
b.writes(", ")
return g.writeArgs(b, args, depth)
}
+
+ if (t.IDPeekU8 <= method) && (method <= t.IDPeekU64LE) {
+ b.printf("wuffs_base__%s__no_bounds_check(", method.Str(g.tm))
+ if err := g.writeExpr(b, recv, depth); err != nil {
+ return err
+ }
+ b.writes(".ptr)")
+ return nil
+ }
+
+ if (t.IDPokeU8 <= method) && (method <= t.IDPokeU64LE) {
+ b.printf("(wuffs_base__%s__no_bounds_check(", method.Str(g.tm))
+ if err := g.writeExpr(b, recv, depth); err != nil {
+ return err
+ }
+ b.writes(".ptr, ")
+ if err := g.writeExpr(b, args[0].AsArg().Value(), depth); err != nil {
+ return err
+ }
+ b.writes("), wuffs_base__make_empty_struct())")
+ return nil
+ }
+
return errNoSuchBuiltin
}
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index ce5aa5f..8358c8f 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -96,10 +96,11 @@
"1]) << 32) |\n ((uint64_t)(p[2]) << 24) | ((uint64_t)(p[3]) << 16) |\n ((uint64_t)(p[4]) << 8) | ((uint64_t)(p[5]) << 0);\n}\n\nstatic inline uint64_t //\nwuffs_base__peek_u48le__no_bounds_check(const uint8_t* p) {\n return ((uint64_t)(p[0]) << 0) | ((uint64_t)(p[1]) << 8) |\n ((uint64_t)(p[2]) << 16) | ((uint64_t)(p[3]) << 24) |\n ((uint64_t)(p[4]) << 32) | ((uint64_t)(p[5]) << 40);\n}\n\nstatic inline uint64_t //\nwuffs_base__peek_u56be__no_bounds_check(const uint8_t* p) {\n return ((uint64_t)(p[0]) << 48) | ((uint64_t)(p[1]) << 40) |\n ((uint64_t)(p[2]) << 32) | ((uint64_t)(p[3]) << 24) |\n ((uint64_t)(p[4]) << 16) | ((uint64_t)(p[5]) << 8) |\n ((uint64_t)(p[6]) << 0);\n}\n\nstatic inline uint64_t //\nwuffs_base__peek_u56le__no_bounds_check(const uint8_t* p) {\n return ((uint64_t)(p[0]) << 0) | ((uint64_t)(p[1]) << 8) |\n ((uint64_t)(p[2]) << 16) | ((uint64_t)(p[3]) << 24) |\n ((uint64_t)(p[4]) << 32) | ((uint64_t)(p[5]) << 40) |\n ((uint64_t)(p[" +
"6]) << 48);\n}\n\nstatic inline uint64_t //\nwuffs_base__peek_u64be__no_bounds_check(const uint8_t* p) {\n return ((uint64_t)(p[0]) << 56) | ((uint64_t)(p[1]) << 48) |\n ((uint64_t)(p[2]) << 40) | ((uint64_t)(p[3]) << 32) |\n ((uint64_t)(p[4]) << 24) | ((uint64_t)(p[5]) << 16) |\n ((uint64_t)(p[6]) << 8) | ((uint64_t)(p[7]) << 0);\n}\n\nstatic inline uint64_t //\nwuffs_base__peek_u64le__no_bounds_check(const uint8_t* p) {\n return ((uint64_t)(p[0]) << 0) | ((uint64_t)(p[1]) << 8) |\n ((uint64_t)(p[2]) << 16) | ((uint64_t)(p[3]) << 24) |\n ((uint64_t)(p[4]) << 32) | ((uint64_t)(p[5]) << 40) |\n ((uint64_t)(p[6]) << 48) | ((uint64_t)(p[7]) << 56);\n}\n\n" +
"" +
- "// --------\n\n#define wuffs_base__poke_u8be__no_bounds_check \\\n wuffs_base__poke_u8__no_bounds_check\n#define wuffs_base__poke_u8le__no_bounds_check \\\n wuffs_base__poke_u8__no_bounds_check\n\nstatic inline void //\nwuffs_base__poke_u8__no_bounds_check(uint8_t* p, uint8_t x) {\n p[0] = x;\n}\n\nstatic inline void //\nwuffs_base__poke_u16be__no_bounds_check(uint8_t* p, uint16_t x) {\n p[0] = (uint8_t)(x >> 8);\n p[1] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u16le__no_bounds_check(uint8_t* p, uint16_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n}\n\nstatic inline void //\nwuffs_base__poke_u24be__no_bounds_check(uint8_t* p, uint32_t x) {\n p[0] = (uint8_t)(x >> 16);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u24le__no_bounds_check(uint8_t* p, uint32_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n}\n\nstatic inline void //\nwuffs_base__poke_u32be__no_bounds_check(uint8_t* p, uint32_t " +
- "x) {\n p[0] = (uint8_t)(x >> 24);\n p[1] = (uint8_t)(x >> 16);\n p[2] = (uint8_t)(x >> 8);\n p[3] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u32le__no_bounds_check(uint8_t* p, uint32_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n}\n\nstatic inline void //\nwuffs_base__poke_u40be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 32);\n p[1] = (uint8_t)(x >> 24);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 8);\n p[4] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u40le__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n}\n\nstatic inline void //\nwuffs_base__poke_u48be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 40);\n p[1] = (uint8_t)(x >> 32);\n p[2] = (uint8_t)(x >> 24);\n p[3] = (uint8_t)(x >> 16);\n p[4] = (uint8_t)(x " +
- ">> 8);\n p[5] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u48le__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n p[5] = (uint8_t)(x >> 40);\n}\n\nstatic inline void //\nwuffs_base__poke_u56be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 48);\n p[1] = (uint8_t)(x >> 40);\n p[2] = (uint8_t)(x >> 32);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 16);\n p[5] = (uint8_t)(x >> 8);\n p[6] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u56le__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n p[5] = (uint8_t)(x >> 40);\n p[6] = (uint8_t)(x >> 48);\n}\n\nstatic inline void //\nwuffs_base__poke_u64be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 56);\n p[1] = (uint8_t)" +
- "(x >> 48);\n p[2] = (uint8_t)(x >> 40);\n p[3] = (uint8_t)(x >> 32);\n p[4] = (uint8_t)(x >> 24);\n p[5] = (uint8_t)(x >> 16);\n p[6] = (uint8_t)(x >> 8);\n p[7] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u64le__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n p[5] = (uint8_t)(x >> 40);\n p[6] = (uint8_t)(x >> 48);\n p[7] = (uint8_t)(x >> 56);\n}\n\n" +
+ "// --------\n\n#define wuffs_base__poke_u8be__no_bounds_check \\\n wuffs_base__poke_u8__no_bounds_check\n#define wuffs_base__poke_u8le__no_bounds_check \\\n wuffs_base__poke_u8__no_bounds_check\n\nstatic inline void //\nwuffs_base__poke_u8__no_bounds_check(uint8_t* p, uint8_t x) {\n p[0] = x;\n}\n\nstatic inline void //\nwuffs_base__poke_u16be__no_bounds_check(uint8_t* p, uint16_t x) {\n p[0] = (uint8_t)(x >> 8);\n p[1] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u16le__no_bounds_check(uint8_t* p, uint16_t x) {\n#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)\n // This seems to perform better on gcc 10 (but not clang 9). Clang also\n // defines \"__GNUC__\".\n memcpy(p, &x, 2);\n#else\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n#endif\n}\n\nstatic inline void //\nwuffs_base__poke_u24be__no_bounds_check(uint8_t* p, uint32_t x) {\n p[0] = (uint8_t)(x >> 16);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u24le__no_bounds_chec" +
+ "k(uint8_t* p, uint32_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n}\n\nstatic inline void //\nwuffs_base__poke_u32be__no_bounds_check(uint8_t* p, uint32_t x) {\n p[0] = (uint8_t)(x >> 24);\n p[1] = (uint8_t)(x >> 16);\n p[2] = (uint8_t)(x >> 8);\n p[3] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u32le__no_bounds_check(uint8_t* p, uint32_t x) {\n#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)\n // This seems to perform better on gcc 10 (but not clang 9). Clang also\n // defines \"__GNUC__\".\n memcpy(p, &x, 4);\n#else\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n#endif\n}\n\nstatic inline void //\nwuffs_base__poke_u40be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 32);\n p[1] = (uint8_t)(x >> 24);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 8);\n p[4] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u40le__no_bounds_chec" +
+ "k(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n}\n\nstatic inline void //\nwuffs_base__poke_u48be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 40);\n p[1] = (uint8_t)(x >> 32);\n p[2] = (uint8_t)(x >> 24);\n p[3] = (uint8_t)(x >> 16);\n p[4] = (uint8_t)(x >> 8);\n p[5] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u48le__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n p[5] = (uint8_t)(x >> 40);\n}\n\nstatic inline void //\nwuffs_base__poke_u56be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 48);\n p[1] = (uint8_t)(x >> 40);\n p[2] = (uint8_t)(x >> 32);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 16);\n p[5] = (uint8_t)(x >> 8);\n p[6] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_" +
+ "base__poke_u56le__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n p[5] = (uint8_t)(x >> 40);\n p[6] = (uint8_t)(x >> 48);\n}\n\nstatic inline void //\nwuffs_base__poke_u64be__no_bounds_check(uint8_t* p, uint64_t x) {\n p[0] = (uint8_t)(x >> 56);\n p[1] = (uint8_t)(x >> 48);\n p[2] = (uint8_t)(x >> 40);\n p[3] = (uint8_t)(x >> 32);\n p[4] = (uint8_t)(x >> 24);\n p[5] = (uint8_t)(x >> 16);\n p[6] = (uint8_t)(x >> 8);\n p[7] = (uint8_t)(x >> 0);\n}\n\nstatic inline void //\nwuffs_base__poke_u64le__no_bounds_check(uint8_t* p, uint64_t x) {\n#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)\n // This seems to perform better on gcc 10 (but not clang 9). Clang also\n // defines \"__GNUC__\".\n memcpy(p, &x, 8);\n#else\n p[0] = (uint8_t)(x >> 0);\n p[1] = (uint8_t)(x >> 8);\n p[2] = (uint8_t)(x >> 16);\n p[3] = (uint8_t)(x >> 24);\n p[4] = (uint8_t)(x >> 32);\n p[5] = (uin" +
+ "t8_t)(x >> 40);\n p[6] = (uint8_t)(x >> 48);\n p[7] = (uint8_t)(x >> 56);\n#endif\n}\n\n" +
"" +
"// --------\n\n// Load and Store functions are deprecated. Use Peek and Poke instead.\n\n#define wuffs_base__load_u8__no_bounds_check \\\n wuffs_base__peek_u8__no_bounds_check\n#define wuffs_base__load_u16be__no_bounds_check \\\n wuffs_base__peek_u16be__no_bounds_check\n#define wuffs_base__load_u16le__no_bounds_check \\\n wuffs_base__peek_u16le__no_bounds_check\n#define wuffs_base__load_u24be__no_bounds_check \\\n wuffs_base__peek_u24be__no_bounds_check\n#define wuffs_base__load_u24le__no_bounds_check \\\n wuffs_base__peek_u24le__no_bounds_check\n#define wuffs_base__load_u32be__no_bounds_check \\\n wuffs_base__peek_u32be__no_bounds_check\n#define wuffs_base__load_u32le__no_bounds_check \\\n wuffs_base__peek_u32le__no_bounds_check\n#define wuffs_base__load_u40be__no_bounds_check \\\n wuffs_base__peek_u40be__no_bounds_check\n#define wuffs_base__load_u40le__no_bounds_check \\\n wuffs_base__peek_u40le__no_bounds_check\n#define wuffs_base__load_u48be__no_bounds_check \\\n wuffs_base__peek_u48be__no_bounds_check\n#define wuffs_base__load_" +
"u48le__no_bounds_check \\\n wuffs_base__peek_u48le__no_bounds_check\n#define wuffs_base__load_u56be__no_bounds_check \\\n wuffs_base__peek_u56be__no_bounds_check\n#define wuffs_base__load_u56le__no_bounds_check \\\n wuffs_base__peek_u56le__no_bounds_check\n#define wuffs_base__load_u64be__no_bounds_check \\\n wuffs_base__peek_u64be__no_bounds_check\n#define wuffs_base__load_u64le__no_bounds_check \\\n wuffs_base__peek_u64le__no_bounds_check\n\n#define wuffs_base__store_u8__no_bounds_check \\\n wuffs_base__poke_u8__no_bounds_check\n#define wuffs_base__store_u16be__no_bounds_check \\\n wuffs_base__poke_u16be__no_bounds_check\n#define wuffs_base__store_u16le__no_bounds_check \\\n wuffs_base__poke_u16le__no_bounds_check\n#define wuffs_base__store_u24be__no_bounds_check \\\n wuffs_base__poke_u24be__no_bounds_check\n#define wuffs_base__store_u24le__no_bounds_check \\\n wuffs_base__poke_u24le__no_bounds_check\n#define wuffs_base__store_u32be__no_bounds_check \\\n wuffs_base__poke_u32be__no_bounds_check\n#define wuffs_base__store_u32le__no_" +
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index cb2f730..19357c0 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -608,6 +608,40 @@
"GENERIC T1.suffix(up_to: u64) T1",
}
+var SliceU8Funcs = []string{
+ "GENERIC T1.peek_u8() u8",
+ "GENERIC T1.peek_u16be() u16",
+ "GENERIC T1.peek_u16le() u16",
+ "GENERIC T1.peek_u24be() u32",
+ "GENERIC T1.peek_u24le() u32",
+ "GENERIC T1.peek_u32be() u32",
+ "GENERIC T1.peek_u32le() u32",
+ "GENERIC T1.peek_u40be() u64",
+ "GENERIC T1.peek_u40le() u64",
+ "GENERIC T1.peek_u48be() u64",
+ "GENERIC T1.peek_u48le() u64",
+ "GENERIC T1.peek_u56be() u64",
+ "GENERIC T1.peek_u56le() u64",
+ "GENERIC T1.peek_u64be() u64",
+ "GENERIC T1.peek_u64le() u64",
+
+ "GENERIC T1.poke_u8!(a: u8)",
+ "GENERIC T1.poke_u16be!(a: u16)",
+ "GENERIC T1.poke_u16le!(a: u16)",
+ "GENERIC T1.poke_u24be!(a: u32)",
+ "GENERIC T1.poke_u24le!(a: u32)",
+ "GENERIC T1.poke_u32be!(a: u32)",
+ "GENERIC T1.poke_u32le!(a: u32)",
+ "GENERIC T1.poke_u40be!(a: u64)",
+ "GENERIC T1.poke_u40le!(a: u64)",
+ "GENERIC T1.poke_u48be!(a: u64)",
+ "GENERIC T1.poke_u48le!(a: u64)",
+ "GENERIC T1.poke_u56be!(a: u64)",
+ "GENERIC T1.poke_u56le!(a: u64)",
+ "GENERIC T1.poke_u64be!(a: u64)",
+ "GENERIC T1.poke_u64le!(a: u64)",
+}
+
var TableFuncs = []string{
"GENERIC T2.height() u64",
"GENERIC T2.stride() u64",
diff --git a/lang/check/bounds.go b/lang/check/bounds.go
index d9a27bc..ffcd7a8 100644
--- a/lang/check/bounds.go
+++ b/lang/check/bounds.go
@@ -1051,6 +1051,8 @@
recv := lhs.LHS().AsExpr()
method := lhs.Ident()
+ advance, advanceExpr, update := (*big.Int)(nil), (*a.Expr)(nil), false
+
if recvTyp := recv.MType(); recvTyp == nil {
return bounds{}, errNotASpecialCase
@@ -1094,8 +1096,6 @@
}
} else if recvTyp.IsIOTokenType() {
- advance, advanceExpr, update := (*big.Int)(nil), (*a.Expr)(nil), false
-
if method == t.IDUndoByte {
if err := q.canUndoByte(recv); err != nil {
return bounds{}, err
@@ -1146,23 +1146,31 @@
}
}
- if (advance != nil) || (advanceExpr != nil) {
- if ok, err := q.optimizeIOMethodAdvance(recv, advance, advanceExpr, update); err != nil {
- return bounds{}, err
- } else if !ok {
- adv := ""
- if advance != nil {
- adv = advance.String()
- } else {
- adv = advanceExpr.Str(q.tm)
- }
- return bounds{}, fmt.Errorf("check: could not prove %s pre-condition: %s.length() >= %s",
- method.Str(q.tm), recv.Str(q.tm), adv)
+ } else if recvTyp.Eq(typeExprSliceU8) {
+ if method >= t.IDPeekU8 {
+ if m := method - t.IDPeekU8; m < t.ID(len(ioMethodAdvances)) {
+ au := ioMethodAdvances[m]
+ advance, update = au.advance, au.update
}
- // TODO: drop other recv-related facts?
}
}
+ if (advance != nil) || (advanceExpr != nil) {
+ if ok, err := q.optimizeIOMethodAdvance(recv, advance, advanceExpr, update); err != nil {
+ return bounds{}, err
+ } else if !ok {
+ adv := ""
+ if advance != nil {
+ adv = advance.String()
+ } else {
+ adv = advanceExpr.Str(q.tm)
+ }
+ return bounds{}, fmt.Errorf("check: could not prove %s pre-condition: %s.length() >= %s",
+ method.Str(q.tm), recv.Str(q.tm), adv)
+ }
+ // TODO: drop other recv-related facts?
+ }
+
return bounds{}, errNotASpecialCase
}
@@ -1319,6 +1327,22 @@
t.IDPeekU64BE - t.IDPeekU8: {eight, false},
t.IDPeekU64LE - t.IDPeekU8: {eight, false},
+ t.IDPokeU8 - t.IDPeekU8: {one, false},
+ t.IDPokeU16BE - t.IDPeekU8: {two, false},
+ t.IDPokeU16LE - t.IDPeekU8: {two, false},
+ t.IDPokeU24BE - t.IDPeekU8: {three, false},
+ t.IDPokeU24LE - t.IDPeekU8: {three, false},
+ t.IDPokeU32BE - t.IDPeekU8: {four, false},
+ t.IDPokeU32LE - t.IDPeekU8: {four, false},
+ t.IDPokeU40BE - t.IDPeekU8: {five, false},
+ t.IDPokeU40LE - t.IDPeekU8: {five, false},
+ t.IDPokeU48BE - t.IDPeekU8: {six, false},
+ t.IDPokeU48LE - t.IDPeekU8: {six, false},
+ t.IDPokeU56BE - t.IDPeekU8: {seven, false},
+ t.IDPokeU56LE - t.IDPeekU8: {seven, false},
+ t.IDPokeU64BE - t.IDPeekU8: {eight, false},
+ t.IDPokeU64LE - t.IDPeekU8: {eight, false},
+
t.IDWriteU8Fast - t.IDPeekU8: {one, true},
t.IDWriteU16BEFast - t.IDPeekU8: {two, true},
t.IDWriteU16LEFast - t.IDPeekU8: {two, true},
diff --git a/lang/check/check.go b/lang/check/check.go
index 610f9d6..84dc12d 100644
--- a/lang/check/check.go
+++ b/lang/check/check.go
@@ -91,8 +91,9 @@
funcs: map[t.QQID]*a.Func{},
localVars: map[t.QQID]typeMap{},
- builtInSliceFuncs: map[t.QQID]*a.Func{},
- builtInTableFuncs: map[t.QQID]*a.Func{},
+ builtInSliceFuncs: map[t.QQID]*a.Func{},
+ builtInSliceU8Funcs: map[t.QQID]*a.Func{},
+ builtInTableFuncs: map[t.QQID]*a.Func{},
builtInInterfaces: map[t.QID][]t.QQID{},
builtInInterfaceFuncs: map[t.QQID]*a.Func{},
@@ -105,6 +106,9 @@
if err := c.parseBuiltInFuncs(c.builtInSliceFuncs, builtin.SliceFuncs); err != nil {
return nil, err
}
+ if err := c.parseBuiltInFuncs(c.builtInSliceU8Funcs, builtin.SliceU8Funcs); err != nil {
+ return nil, err
+ }
if err := c.parseBuiltInFuncs(c.builtInTableFuncs, builtin.TableFuncs); err != nil {
return nil, err
}
@@ -221,8 +225,9 @@
funcs map[t.QQID]*a.Func
localVars map[t.QQID]typeMap
- builtInSliceFuncs map[t.QQID]*a.Func
- builtInTableFuncs map[t.QQID]*a.Func
+ builtInSliceFuncs map[t.QQID]*a.Func
+ builtInSliceU8Funcs map[t.QQID]*a.Func
+ builtInTableFuncs map[t.QQID]*a.Func
builtInInterfaces map[t.QID][]t.QQID
builtInInterfaceFuncs map[t.QQID]*a.Func
diff --git a/lang/check/optimize.go b/lang/check/optimize.go
index c96dba0..4c4202e 100644
--- a/lang/check/optimize.go
+++ b/lang/check/optimize.go
@@ -58,7 +58,9 @@
// receiver.length(), even if they aren't an exact match.
op := x.Operator()
- if op != t.IDXBinaryGreaterEq && op != t.IDXBinaryGreaterThan {
+ if (op != t.IDXBinaryGreaterEq) &&
+ (op != t.IDXBinaryGreaterThan) &&
+ (op != t.IDXBinaryEqEq) {
return x, nil
}
diff --git a/lang/check/resolve.go b/lang/check/resolve.go
index de615b2..c9cd8f7 100644
--- a/lang/check/resolve.go
+++ b/lang/check/resolve.go
@@ -154,6 +154,11 @@
if f := c.builtInSliceFuncs[qqid]; f != nil {
return f, nil
}
+ if lTyp.Eq(typeExprSliceU8) {
+ if f := c.builtInSliceU8Funcs[qqid]; f != nil {
+ return f, nil
+ }
+ }
} else if lTyp.IsTableType() {
qqid[0] = t.IDBase
diff --git a/lang/check/type.go b/lang/check/type.go
index caedc7b..cde48c3 100644
--- a/lang/check/type.go
+++ b/lang/check/type.go
@@ -665,20 +665,21 @@
if lTyp.IsSliceType() {
qqid[0] = t.IDBase
qqid[1] = t.IDDagger1
- if q.c.builtInSliceFuncs[qqid] == nil {
- return fmt.Errorf("check: no slice method %q", n.Ident().Str(q.tm))
+ if (q.c.builtInSliceFuncs[qqid] != nil) ||
+ ((q.c.builtInSliceU8Funcs[qqid] != nil) && lTyp.Eq(typeExprSliceU8)) {
+ n.SetMType(a.NewTypeExpr(t.IDFunc, 0, n.Ident(), lTyp.AsNode(), nil, nil))
+ return nil
}
- n.SetMType(a.NewTypeExpr(t.IDFunc, 0, n.Ident(), lTyp.AsNode(), nil, nil))
- return nil
+ return fmt.Errorf("check: no slice method %q", n.Ident().Str(q.tm))
} else if lTyp.IsTableType() {
qqid[0] = t.IDBase
qqid[1] = t.IDDagger2
- if q.c.builtInTableFuncs[qqid] == nil {
- return fmt.Errorf("check: no table method %q", n.Ident().Str(q.tm))
+ if q.c.builtInTableFuncs[qqid] != nil {
+ n.SetMType(a.NewTypeExpr(t.IDFunc, 0, n.Ident(), lTyp.AsNode(), nil, nil))
+ return nil
}
- n.SetMType(a.NewTypeExpr(t.IDFunc, 0, n.Ident(), lTyp.AsNode(), nil, nil))
- return nil
+ return fmt.Errorf("check: no table method %q", n.Ident().Str(q.tm))
} else if lTyp.Decorator() != 0 {
return fmt.Errorf("check: invalid type %q for dot-expression LHS %q", lTyp.Str(q.tm), lhs.Str(q.tm))
diff --git a/lang/token/list.go b/lang/token/list.go
index c216ebf..24ad4a9 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -582,6 +582,24 @@
// --------
+ IDPokeU8 = ID(0x1D1)
+ IDPokeU16BE = ID(0x1D2)
+ IDPokeU16LE = ID(0x1D3)
+ IDPokeU24BE = ID(0x1D4)
+ IDPokeU24LE = ID(0x1D5)
+ IDPokeU32BE = ID(0x1D6)
+ IDPokeU32LE = ID(0x1D7)
+ IDPokeU40BE = ID(0x1D8)
+ IDPokeU40LE = ID(0x1D9)
+ IDPokeU48BE = ID(0x1DA)
+ IDPokeU48LE = ID(0x1DB)
+ IDPokeU56BE = ID(0x1DC)
+ IDPokeU56LE = ID(0x1DD)
+ IDPokeU64BE = ID(0x1DE)
+ IDPokeU64LE = ID(0x1DF)
+
+ // --------
+
IDWriteU8Fast = ID(0x1E1)
IDWriteU16BEFast = ID(0x1E2)
IDWriteU16LEFast = ID(0x1E3)
@@ -969,6 +987,24 @@
// --------
+ IDPokeU8: "poke_u8",
+ IDPokeU16BE: "poke_u16be",
+ IDPokeU16LE: "poke_u16le",
+ IDPokeU24BE: "poke_u24be",
+ IDPokeU24LE: "poke_u24le",
+ IDPokeU32BE: "poke_u32be",
+ IDPokeU32LE: "poke_u32le",
+ IDPokeU40BE: "poke_u40be",
+ IDPokeU40LE: "poke_u40le",
+ IDPokeU48BE: "poke_u48be",
+ IDPokeU48LE: "poke_u48le",
+ IDPokeU56BE: "poke_u56be",
+ IDPokeU56LE: "poke_u56le",
+ IDPokeU64BE: "poke_u64be",
+ IDPokeU64LE: "poke_u64le",
+
+ // --------
+
IDWriteU8Fast: "write_u8_fast",
IDWriteU16BEFast: "write_u16be_fast",
IDWriteU16LEFast: "write_u16le_fast",
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index dc02664..3de64cf 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -792,8 +792,14 @@
static inline void //
wuffs_base__poke_u16le__no_bounds_check(uint8_t* p, uint16_t x) {
+#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
+ // This seems to perform better on gcc 10 (but not clang 9). Clang also
+ // defines "__GNUC__".
+ memcpy(p, &x, 2);
+#else
p[0] = (uint8_t)(x >> 0);
p[1] = (uint8_t)(x >> 8);
+#endif
}
static inline void //
@@ -820,10 +826,16 @@
static inline void //
wuffs_base__poke_u32le__no_bounds_check(uint8_t* p, uint32_t x) {
+#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
+ // This seems to perform better on gcc 10 (but not clang 9). Clang also
+ // defines "__GNUC__".
+ memcpy(p, &x, 4);
+#else
p[0] = (uint8_t)(x >> 0);
p[1] = (uint8_t)(x >> 8);
p[2] = (uint8_t)(x >> 16);
p[3] = (uint8_t)(x >> 24);
+#endif
}
static inline void //
@@ -900,6 +912,11 @@
static inline void //
wuffs_base__poke_u64le__no_bounds_check(uint8_t* p, uint64_t x) {
+#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
+ // This seems to perform better on gcc 10 (but not clang 9). Clang also
+ // defines "__GNUC__".
+ memcpy(p, &x, 8);
+#else
p[0] = (uint8_t)(x >> 0);
p[1] = (uint8_t)(x >> 8);
p[2] = (uint8_t)(x >> 16);
@@ -908,6 +925,7 @@
p[5] = (uint8_t)(x >> 40);
p[6] = (uint8_t)(x >> 48);
p[7] = (uint8_t)(x >> 56);
+#endif
}
// --------
@@ -31045,18 +31063,12 @@
v_c.len = 4;
uint8_t* i_end0_c = i_slice_c.ptr + ((i_slice_c.len / 4) * 4);
while (v_c.ptr < i_end0_c) {
- v_x32 = ((((uint32_t)(v_c.ptr[0])) << 0) |
- (((uint32_t)(v_c.ptr[1])) << 8) |
- (((uint32_t)(v_c.ptr[2])) << 16) |
- (((uint32_t)(v_c.ptr[3])) << 24));
+ v_x32 = wuffs_base__peek_u32le__no_bounds_check(v_c.ptr);
(v_x128 = _mm_cvtsi32_si128((int)(v_x32)), wuffs_base__make_empty_struct());
v_x128 = _mm_add_epi8(v_x128, v_a128);
v_a128 = v_x128;
v_x32 = ((uint32_t)(_mm_cvtsi128_si32(v_x128)));
- v_c.ptr[0] = ((uint8_t)((255 & (v_x32 >> 0))));
- v_c.ptr[1] = ((uint8_t)((255 & (v_x32 >> 8))));
- v_c.ptr[2] = ((uint8_t)((255 & (v_x32 >> 16))));
- v_c.ptr[3] = ((uint8_t)((255 & (v_x32 >> 24))));
+ (wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, v_x32), wuffs_base__make_empty_struct());
v_c.ptr += 4;
}
}
diff --git a/std/png/decode_filter_sse128.wuffs b/std/png/decode_filter_sse128.wuffs
index 1eac907..0338f1c 100644
--- a/std/png/decode_filter_sse128.wuffs
+++ b/std/png/decode_filter_sse128.wuffs
@@ -25,19 +25,13 @@
var a128 : base.sse128_i
iterate (c = args.curr)(length: 4, advance: 4, unroll: 1) {
- x32 = ((c[0] as base.u32) << 0x00) |
- ((c[1] as base.u32) << 0x08) |
- ((c[2] as base.u32) << 0x10) |
- ((c[3] as base.u32) << 0x18)
+ x32 = c.peek_u32le()
x128.load_u32!(a: x32)
x128 = x128._mm_add_epi8!(b: a128)
a128 = x128
x32 = x128.truncate_u32()
- c[0] = (0xFF & (x32 >> 0x00)) as base.u8
- c[1] = (0xFF & (x32 >> 0x08)) as base.u8
- c[2] = (0xFF & (x32 >> 0x10)) as base.u8
- c[3] = (0xFF & (x32 >> 0x18)) as base.u8
+ c.poke_u32le!(a: x32)
}
}