Add io_reader.limited_copy_u32_to_slice
diff --git a/internal/cgen/base/io-private.h b/internal/cgen/base/io-private.h
index 4bd89f2..02ed2c7 100644
--- a/internal/cgen/base/io-private.h
+++ b/internal/cgen/base/io-private.h
@@ -34,6 +34,26 @@
// --------
+static inline uint32_t //
+wuffs_base__io_reader__limited_copy_u32_to_slice(const uint8_t** ptr_iop_r,
+ const uint8_t* io2_r,
+ uint32_t length,
+ wuffs_base__slice_u8 dst) {
+ const uint8_t* iop_r = *ptr_iop_r;
+ size_t n = dst.len;
+ if (n > length) {
+ n = length;
+ }
+ if (n > ((size_t)(io2_r - iop_r))) {
+ n = (size_t)(io2_r - iop_r);
+ }
+ if (n > 0) {
+ memmove(dst.ptr, iop_r, n);
+ *ptr_iop_r += n;
+ }
+ return (uint32_t)(n);
+}
+
// wuffs_base__io_reader__match7 returns whether the io_reader's upcoming bytes
// start with the given prefix (up to 7 bytes long). It is peek-like, not
// read-like, in that there are no side-effects.
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index f028724..989706c 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -152,6 +152,11 @@
b.printf("(%s%s > %s%s)", iopPrefix, name, io1Prefix, name)
return nil
+ case t.IDLimitedCopyU32ToSlice:
+ b.printf("wuffs_base__io_reader__limited_copy_u32_to_slice(&%s%s, %s%s,",
+ iopPrefix, name, io2Prefix, name)
+ return g.writeArgs(b, args, depth)
+
case t.IDCountSince:
b.printf("wuffs_base__io__count_since(")
if err := g.writeExpr(b, args[0].AsArg().Value(), depth); err != nil {
diff --git a/internal/cgen/data.go b/internal/cgen/data.go
index 4b871f2..4e6f069 100644
--- a/internal/cgen/data.go
+++ b/internal/cgen/data.go
@@ -327,9 +327,10 @@
const baseIOPrivateH = "" +
"// ---------------- I/O\n\nstatic inline uint64_t //\nwuffs_base__io__count_since(uint64_t mark, uint64_t index) {\n if (index >= mark) {\n return index - mark;\n }\n return 0;\n}\n\nstatic inline wuffs_base__slice_u8 //\nwuffs_base__io__since(uint64_t mark, uint64_t index, uint8_t* ptr) {\n if (index >= mark) {\n return wuffs_base__make_slice_u8(ptr + mark, index - mark);\n }\n return wuffs_base__make_slice_u8(NULL, 0);\n}\n\n" +
"" +
- "// --------\n\n// wuffs_base__io_reader__match7 returns whether the io_reader's upcoming bytes\n// start with the given prefix (up to 7 bytes long). It is peek-like, not\n// read-like, in that there are no side-effects.\n//\n// The low 3 bits of a hold the prefix length, n.\n//\n// The high 56 bits of a hold the prefix itself, in little-endian order. The\n// first prefix byte is in bits 8..=15, the second prefix byte is in bits\n// 16..=23, etc. The high (8 * (7 - n)) bits are ignored.\n//\n// There are three possible return values:\n// - 0 means success.\n// - 1 means inconclusive, equivalent to \"$short read\".\n// - 2 means failure.\nstatic inline uint32_t //\nwuffs_base__io_reader__match7(const uint8_t* iop_r,\n const uint8_t* io2_r,\n wuffs_base__io_buffer* r,\n uint64_t a) {\n uint32_t n = a & 7;\n a >>= 8;\n if ((io2_r - iop_r) >= 8) {\n uint64_t x = wuffs_base__load_u64le__no_bounds_check(iop_r);\n uint32_t shift = 8 * (8 - n);\n " +
- " return ((a << shift) == (x << shift)) ? 0 : 2;\n }\n for (; n > 0; n--) {\n if (iop_r >= io2_r) {\n return (r && r->meta.closed) ? 2 : 1;\n } else if (*iop_r != ((uint8_t)(a))) {\n return 2;\n }\n iop_r++;\n a >>= 8;\n }\n return 0;\n}\n\nstatic inline wuffs_base__io_buffer* //\nwuffs_base__io_reader__set(wuffs_base__io_buffer* b,\n const uint8_t** ptr_iop_r,\n const uint8_t** ptr_io0_r,\n const uint8_t** ptr_io1_r,\n const uint8_t** ptr_io2_r,\n wuffs_base__slice_u8 data) {\n b->data = data;\n b->meta.wi = data.len;\n b->meta.ri = 0;\n b->meta.pos = 0;\n b->meta.closed = false;\n\n *ptr_iop_r = data.ptr;\n *ptr_io0_r = data.ptr;\n *ptr_io1_r = data.ptr;\n *ptr_io2_r = data.ptr + data.len;\n\n return b;\n}\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wcast-qual\"\n// TODO: can we avoid the const_cast (by deleting this function)? This might\n// involve conver" +
- "ting the call sites to take an io_reader instead of a slice u8\n// (the result of io_reader.take).\nstatic inline wuffs_base__slice_u8 //\nwuffs_base__io_reader__take(const uint8_t** ptr_iop_r,\n const uint8_t* io2_r,\n uint64_t n) {\n if (n <= ((size_t)(io2_r - *ptr_iop_r))) {\n const uint8_t* p = *ptr_iop_r;\n *ptr_iop_r += n;\n // The arg is what C calls C++'s \"const_cast<uint8_t*>(p)\".\n return wuffs_base__make_slice_u8((uint8_t*)(p), n);\n }\n return wuffs_base__make_slice_u8(NULL, 0);\n}\n#pragma GCC diagnostic pop\n\n" +
+ "// --------\n\nstatic inline uint32_t //\nwuffs_base__io_reader__limited_copy_u32_to_slice(const uint8_t** ptr_iop_r,\n const uint8_t* io2_r,\n uint32_t length,\n wuffs_base__slice_u8 dst) {\n const uint8_t* iop_r = *ptr_iop_r;\n size_t n = dst.len;\n if (n > length) {\n n = length;\n }\n if (n > ((size_t)(io2_r - iop_r))) {\n n = (size_t)(io2_r - iop_r);\n }\n if (n > 0) {\n memmove(dst.ptr, iop_r, n);\n *ptr_iop_r += n;\n }\n return (uint32_t)(n);\n}\n\n// wuffs_base__io_reader__match7 returns whether the io_reader's upcoming bytes\n// start with the given prefix (up to 7 bytes long). It is peek-like, not\n// read-like, in that there are no side-effects.\n//\n// The low 3 bits of a hold the prefix length, n.\n//\n// The high 56 bits of a hold the prefix itself, in little-endian order. The\n// first prefix byte is in bits 8..=15, the second prefix byte is in bits\n// 16..=23" +
+ ", etc. The high (8 * (7 - n)) bits are ignored.\n//\n// There are three possible return values:\n// - 0 means success.\n// - 1 means inconclusive, equivalent to \"$short read\".\n// - 2 means failure.\nstatic inline uint32_t //\nwuffs_base__io_reader__match7(const uint8_t* iop_r,\n const uint8_t* io2_r,\n wuffs_base__io_buffer* r,\n uint64_t a) {\n uint32_t n = a & 7;\n a >>= 8;\n if ((io2_r - iop_r) >= 8) {\n uint64_t x = wuffs_base__load_u64le__no_bounds_check(iop_r);\n uint32_t shift = 8 * (8 - n);\n return ((a << shift) == (x << shift)) ? 0 : 2;\n }\n for (; n > 0; n--) {\n if (iop_r >= io2_r) {\n return (r && r->meta.closed) ? 2 : 1;\n } else if (*iop_r != ((uint8_t)(a))) {\n return 2;\n }\n iop_r++;\n a >>= 8;\n }\n return 0;\n}\n\nstatic inline wuffs_base__io_buffer* //\nwuffs_base__io_reader__set(wuffs_base__io_buffer* b,\n const uint8_t** ptr_iop_r,\n c" +
+ "onst uint8_t** ptr_io0_r,\n const uint8_t** ptr_io1_r,\n const uint8_t** ptr_io2_r,\n wuffs_base__slice_u8 data) {\n b->data = data;\n b->meta.wi = data.len;\n b->meta.ri = 0;\n b->meta.pos = 0;\n b->meta.closed = false;\n\n *ptr_iop_r = data.ptr;\n *ptr_io0_r = data.ptr;\n *ptr_io1_r = data.ptr;\n *ptr_io2_r = data.ptr + data.len;\n\n return b;\n}\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wcast-qual\"\n// TODO: can we avoid the const_cast (by deleting this function)? This might\n// involve converting the call sites to take an io_reader instead of a slice u8\n// (the result of io_reader.take).\nstatic inline wuffs_base__slice_u8 //\nwuffs_base__io_reader__take(const uint8_t** ptr_iop_r,\n const uint8_t* io2_r,\n uint64_t n) {\n if (n <= ((size_t)(io2_r - *ptr_iop_r))) {\n const uint8_t* p = *ptr_iop_r;\n *ptr_iop_r += n;\n // The arg is what C calls C++'s \"const_cast" +
+ "<uint8_t*>(p)\".\n return wuffs_base__make_slice_u8((uint8_t*)(p), n);\n }\n return wuffs_base__make_slice_u8(NULL, 0);\n}\n#pragma GCC diagnostic pop\n\n" +
"" +
"// --------\n\nstatic inline uint64_t //\nwuffs_base__io_writer__copy_from_slice(uint8_t** ptr_iop_w,\n uint8_t* io2_w,\n wuffs_base__slice_u8 src) {\n uint8_t* iop_w = *ptr_iop_w;\n size_t n = src.len;\n if (n > ((size_t)(io2_w - iop_w))) {\n n = (size_t)(io2_w - iop_w);\n }\n if (n > 0) {\n memmove(iop_w, src.ptr, n);\n *ptr_iop_w += n;\n }\n return (uint64_t)(n);\n}\n\nstatic inline uint32_t //\nwuffs_base__io_writer__limited_copy_u32_from_history(uint8_t** ptr_iop_w,\n uint8_t* io1_w,\n uint8_t* io2_w,\n uint32_t length,\n uint32_t distance) {\n if (!distance) {\n return 0;\n }\n uint8_t* p = *ptr_iop_w;\n if ((size_t)(p - io1_w) < (size_t)(distance)) {\n return 0;\n }\n uint8_t* q = p - distance;\n size_t n = (size_t)(io2_w - " +
"p);\n if ((size_t)(length) > n) {\n length = (uint32_t)(n);\n } else {\n n = (size_t)(length);\n }\n // TODO: unrolling by 3 seems best for the std/deflate benchmarks, but that\n // is mostly because 3 is the minimum length for the deflate format. This\n // function implementation shouldn't overfit to that one format. Perhaps the\n // limited_copy_u32_from_history Wuffs method should also take an unroll hint\n // argument, and the cgen can look if that argument is the constant\n // expression '3'.\n //\n // See also wuffs_base__io_writer__limited_copy_u32_from_history_fast below.\n //\n // Alternatively or additionally, have a sloppy_limited_copy_u32_from_history\n // method that copies 8 bytes at a time, which can more than length bytes?\n for (; n >= 3; n -= 3) {\n *p++ = *q++;\n *p++ = *q++;\n *p++ = *q++;\n }\n for (; n; n--) {\n *p++ = *q++;\n }\n *ptr_iop_w = p;\n return length;\n}\n\n// wuffs_base__io_writer__limited_copy_u32_from_history_fast is like the\n// wuffs_base__io_writer__limited_copy" +
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index efce8f3..5f7c32e 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -275,6 +275,8 @@
"io_reader.since(mark: u64) slice u8",
"io_reader.take!(n: u64) slice u8",
+ "io_reader.limited_copy_u32_to_slice!(up_to: u32, s: slice u8) u32",
+
"io_reader.skip?(n: u64)",
"io_reader.skip_u32?(n: u32)",
diff --git a/lang/token/list.go b/lang/token/list.go
index 5343025..4e5ea4f 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -522,6 +522,7 @@
IDLimitedCopyU32FromHistoryFast = ID(0x172)
IDLimitedCopyU32FromReader = ID(0x173)
IDLimitedCopyU32FromSlice = ID(0x174)
+ IDLimitedCopyU32ToSlice = ID(0x175)
// -------- 0x180 block.
@@ -883,6 +884,7 @@
IDLimitedCopyU32FromHistoryFast: "limited_copy_u32_from_history_fast",
IDLimitedCopyU32FromReader: "limited_copy_u32_from_reader",
IDLimitedCopyU32FromSlice: "limited_copy_u32_from_slice",
+ IDLimitedCopyU32ToSlice: "limited_copy_u32_to_slice",
// -------- 0x180 block.
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 4d68520..9415def 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -7641,6 +7641,26 @@
// --------
+static inline uint32_t //
+wuffs_base__io_reader__limited_copy_u32_to_slice(const uint8_t** ptr_iop_r,
+ const uint8_t* io2_r,
+ uint32_t length,
+ wuffs_base__slice_u8 dst) {
+ const uint8_t* iop_r = *ptr_iop_r;
+ size_t n = dst.len;
+ if (n > length) {
+ n = length;
+ }
+ if (n > ((size_t)(io2_r - iop_r))) {
+ n = (size_t)(io2_r - iop_r);
+ }
+ if (n > 0) {
+ memmove(dst.ptr, iop_r, n);
+ *ptr_iop_r += n;
+ }
+ return (uint32_t)(n);
+}
+
// wuffs_base__io_reader__match7 returns whether the io_reader's upcoming bytes
// start with the given prefix (up to 7 bytes long). It is peek-like, not
// read-like, in that there are no side-effects.
@@ -21277,8 +21297,8 @@
uint64_t v_block_size = 0;
bool v_need_block_size = false;
+ uint32_t v_n_copied = 0;
uint64_t v_n_compressed = 0;
- wuffs_base__slice_u8 v_compressed = {0};
wuffs_base__io_buffer u_r = wuffs_base__empty_io_buffer();
wuffs_base__io_buffer* v_r = &u_r;
const uint8_t* iop_v_r WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
@@ -21345,17 +21365,16 @@
if (v_n_compressed <= 0) {
goto label__0__break;
}
- v_compressed =
- wuffs_base__io_reader__take(&iop_a_src, io2_a_src, v_n_compressed);
- wuffs_base__slice_u8__copy_from_slice(
+ v_n_copied = wuffs_base__io_reader__limited_copy_u32_to_slice(
+ &iop_a_src, io2_a_src, ((uint32_t)((v_n_compressed & 4294967295))),
wuffs_base__slice_u8__subslice_i(
wuffs_base__make_slice_u8(self->private_data.f_compressed,
4096),
- self->private_impl.f_compressed_wi),
- v_compressed);
+ self->private_impl.f_compressed_wi));
wuffs_base__u64__sat_add_indirect(&self->private_impl.f_compressed_wi,
- v_n_compressed);
- wuffs_base__u64__sat_sub_indirect(&v_block_size, v_n_compressed);
+ ((uint64_t)(v_n_copied)));
+ wuffs_base__u64__sat_sub_indirect(&v_block_size,
+ ((uint64_t)(v_n_copied)));
if (v_block_size > 0) {
goto label__0__break;
}
diff --git a/std/gif/decode_gif.wuffs b/std/gif/decode_gif.wuffs
index 4b049cc..5bc27a0 100644
--- a/std/gif/decode_gif.wuffs
+++ b/std/gif/decode_gif.wuffs
@@ -924,8 +924,8 @@
pri func decoder.decode_id_part2?(dst: ptr base.pixel_buffer, src: base.io_reader, workbuf: slice base.u8) {
var block_size : base.u64[..= 255]
var need_block_size : base.bool
+ var n_copied : base.u32
var n_compressed : base.u64
- var compressed : slice base.u8
var r : base.io_reader
var mark : base.u64
var lzw_status : base.status
@@ -954,10 +954,11 @@
if n_compressed <= 0 {
break
}
- compressed = args.src.take!(n: n_compressed)
- this.compressed[this.compressed_wi ..].copy_from_slice!(s: compressed)
- this.compressed_wi ~sat+= n_compressed
- block_size ~sat-= n_compressed
+ n_copied = args.src.limited_copy_u32_to_slice!(
+ up_to: (n_compressed & 0xFFFF_FFFF) as base.u32,
+ s: this.compressed[this.compressed_wi ..])
+ this.compressed_wi ~sat+= n_copied as base.u64
+ block_size ~sat-= n_copied as base.u64
if block_size > 0 {
break
}