Have std/png verify PLTE chunk checksums
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 3b70d58..05ad44f 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -31632,7 +31632,7 @@
}
self->private_impl.f_chunk_type = t_5;
}
- if ( ! self->private_impl.f_ignore_checksum) {
+ if ( ! self->private_impl.f_ignore_checksum && ((self->private_impl.f_chunk_type == 1413563465) || (self->private_impl.f_chunk_type == 1163152464))) {
wuffs_base__ignore_status(wuffs_crc32__ieee_hasher__initialize(&self->private_data.f_crc32, sizeof (wuffs_crc32__ieee_hasher), WUFFS_VERSION, 0));
self->private_impl.f_chunk_type_array[0] = ((uint8_t)(((self->private_impl.f_chunk_type >> 0) & 255)));
self->private_impl.f_chunk_type_array[1] = ((uint8_t)(((self->private_impl.f_chunk_type >> 8) & 255)));
@@ -31655,7 +31655,7 @@
iop_a_src = a_src->data.ptr + a_src->meta.ri;
}
}
- if ( ! self->private_impl.f_ignore_checksum) {
+ if ( ! self->private_impl.f_ignore_checksum && (self->private_impl.f_chunk_type == 1163152464)) {
v_checksum_have = wuffs_crc32__ieee_hasher__update_u32(&self->private_data.f_crc32, wuffs_base__io__since(v_mark, ((uint64_t)(iop_a_src - io0_a_src)), io0_a_src));
}
if (wuffs_base__status__is_ok(&v_status)) {
@@ -31694,7 +31694,9 @@
}
v_checksum_want = t_7;
}
- if ( ! self->private_impl.f_ignore_checksum && (v_checksum_have != v_checksum_want)) {
+ if ( ! self->private_impl.f_ignore_checksum && (self->private_impl.f_chunk_type == 1163152464) && (v_checksum_have != v_checksum_want)) {
+ status = wuffs_base__make_status(wuffs_png__error__bad_checksum);
+ goto exit;
}
}
label__1__break:;
diff --git a/script/strip-png-ancillary-chunks.go b/script/strip-png-ancillary-chunks.go
new file mode 100644
index 0000000..2ddb53e
--- /dev/null
+++ b/script/strip-png-ancillary-chunks.go
@@ -0,0 +1,226 @@
+// Copyright 2021 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build ignore
+
+package main
+
+// strip-png-ancillary-chunks.go copies PNG data from stdin to stdout, removing
+// any ancillary chunks.
+//
+// Specification-compliant PNG decoders are required to honor critical chunks
+// but may ignore ancillary (non-critical) chunks. Stripping out ancillary
+// chunks before decoding should mean that different PNG decoders will agree on
+// the decoded output regardless of which ancillary chunk types they choose to
+// honor. Specifically, some PNG decoders may implement color and gamma
+// correction but not all do.
+//
+// This program will strip out all ancillary chunks, but it should be
+// straightforward to copy-paste-and-modify it to strip out only certain chunk
+// types (e.g. only "tRNS" transparency chunks).
+//
+// --------
+//
+// A PNG file consists of an 8-byte magic identifier and then a series of
+// chunks. Each chunk is:
+//
+// - a 4-byte uint32 payload length N.
+// - a 4-byte chunk type (e.g. "gAMA" for gamma correction metadata).
+// - an N-byte payload.
+// - a 4-byte CRC-32 checksum of the previous (N + 4) bytes, including the
+// chunk type but excluding the payload length.
+//
+// Chunk types consist of 4 ASCII letters. The upper-case / lower-case bit of
+// the first letter denote critical or ancillary chunks: "IDAT" and "PLTE" are
+// critical, "gAMA" and "tEXt" are ancillary. See
+// https://www.w3.org/TR/2003/REC-PNG-20031110/#5Chunk-naming-conventions
+//
+// --------
+//
+// Usage:
+// go run strip-png-ancillary-chunks.go < in.png > out.png
+//
+// Adding a -random=123 flag should not change the output:
+// go run strip-png-ancillary-chunks.go -random=123 < in.png > out.png
+
+import (
+ "encoding/binary"
+ "flag"
+ "io"
+ "math/rand"
+ "os"
+ "os/signal"
+ "syscall"
+)
+
+var (
+ randomFlag = flag.Int64("random", 0, "If non-zero, the seed for generating smaller reads")
+)
+
+// chunkTypeAncillaryBit is whether the first byte of a big-endian uint32 chunk
+// type (the first of four ASCII letters) is lower-case.
+const chunkTypeAncillaryBit = 0x20000000
+
+// RandomSizedReader wraps another io.Reader such that the Reader.Read []byte
+// argument has randomized lengths, up to a maximum of 9 bytes.
+//
+// This is slower but helps manually test that PNGAncillaryChunkStripper
+// produces identical output regardless of whether Read is called many times
+// (with small buffers) or few times (with large buffers). Specifically, this
+// exercises having Read boundaries straddle chunk boundaries.
+type RandomSizedReader struct {
+ // Reader is the wrapped io.Reader.
+ Reader io.Reader
+
+ // Rand is the random number generator.
+ Rand *rand.Rand
+}
+
+// Read implements io.Reader.
+func (r *RandomSizedReader) Read(p []byte) (int, error) {
+ if n := r.Rand.Intn(10); len(p) > n {
+ p = p[:n]
+ }
+ return r.Reader.Read(p)
+}
+
+// PNGAncillaryChunkStripper wraps another io.Reader to strip ancillary chunks,
+// if the data is in the PNG file format. If the data isn't PNG, it is passed
+// through unmodified.
+type PNGAncillaryChunkStripper struct {
+ // Reader is the wrapped io.Reader.
+ Reader io.Reader
+
+ // stickyErr is the first error returned from the wrapped io.Reader.
+ stickyErr error
+
+ // buffer[rIndex:wIndex] holds data read from the wrapped io.Reader that
+ // wasn't passed through yet.
+ buffer [8]byte
+ rIndex int
+ wIndex int
+
+ // pending and discard is the number of remaining bytes for (and whether to
+ // discard or pass through) the current chunk-in-progress.
+ pending int64
+ discard bool
+
+ // notPNG is set true if the data stream doesn't start with the 8-byte PNG
+ // magic identifier. If true, the wrapped io.Reader's data (including the
+ // first up-to-8 bytes) is passed through without modification.
+ notPNG bool
+
+ // seenMagic is whether we've seen the 8-byte PNG magic identifier.
+ seenMagic bool
+}
+
+// Read implements io.Reader.
+func (r *PNGAncillaryChunkStripper) Read(p []byte) (int, error) {
+ for {
+ // If the wrapped io.Reader returned a non-nil error, drain r.buffer
+ // (what data we have) and return that error (if fully drained).
+ if r.stickyErr != nil {
+ n := copy(p, r.buffer[r.rIndex:r.wIndex])
+ r.rIndex += n
+ if r.rIndex < r.wIndex {
+ return n, nil
+ }
+ return n, r.stickyErr
+ }
+
+ // Handle trivial requests, including draining our buffer.
+ if len(p) == 0 {
+ return 0, nil
+ } else if r.rIndex < r.wIndex {
+ n := copy(p, r.buffer[r.rIndex:r.wIndex])
+ r.rIndex += n
+ return n, nil
+ }
+
+ // From here onwards, our buffer is drained: r.rIndex == r.wIndex.
+
+ // Handle non-PNG input.
+ if r.notPNG {
+ return r.Reader.Read(p)
+ }
+
+ // Continue processing any PNG chunk that's in progress, whether
+ // discarding it or passing it through.
+ for r.pending > 0 {
+ if int64(len(p)) > r.pending {
+ p = p[:r.pending]
+ }
+ n, err := r.Reader.Read(p)
+ r.pending -= int64(n)
+ r.stickyErr = err
+ if r.discard {
+ continue
+ }
+ return n, err
+ }
+
+ // We're either expecting the 8-byte PNG magic identifier or the 4-byte
+ // PNG chunk length + 4-byte PNG chunk type. Either way, read 8 bytes.
+ r.rIndex = 0
+ r.wIndex, r.stickyErr = io.ReadFull(r.Reader, r.buffer[:8])
+ if r.stickyErr != nil {
+ // Undo io.ReadFull converting io.EOF to io.ErrUnexpectedEOF.
+ if r.stickyErr == io.ErrUnexpectedEOF {
+ r.stickyErr = io.EOF
+ }
+ continue
+ }
+
+ // Process those 8 bytes, either:
+ // - a PNG chunk (if we've already seen the PNG magic identifier),
+ // - the PNG magic identifier itself (if the input is a PNG) or
+ // - something else (if it's not a PNG).
+ if r.seenMagic {
+ // The number of pending bytes is equal to (N + 4) because of the 4
+ // byte trailer, a checksum.
+ r.pending = int64(binary.BigEndian.Uint32(r.buffer[:4])) + 4
+ chunkType := binary.BigEndian.Uint32(r.buffer[4:])
+ r.discard = (chunkType & chunkTypeAncillaryBit) != 0
+ if r.discard {
+ r.rIndex = r.wIndex
+ }
+ } else if string(r.buffer[:8]) == "\x89PNG\x0D\x0A\x1A\x0A" {
+ r.seenMagic = true
+ } else {
+ r.notPNG = true
+ }
+ }
+}
+
+func main() {
+ // Piping to /usr/bin/head can cause SIGPIPE (in the upstream process)
+ // since head can exit early, leaving this process with pending output. The
+ // command line experience can be cleaner if this program ignores SIGPIPE.
+ signal.Ignore(syscall.SIGPIPE)
+
+ flag.Parse()
+
+ r := io.Reader(&PNGAncillaryChunkStripper{
+ Reader: os.Stdin,
+ })
+
+ if *randomFlag != 0 {
+ r = &RandomSizedReader{
+ Reader: r,
+ Rand: rand.New(rand.NewSource(*randomFlag)),
+ }
+ }
+
+ io.Copy(os.Stdout, r)
+}
diff --git a/std/png/decode_png.wuffs b/std/png/decode_png.wuffs
index 2d9cb49..3d1c488 100644
--- a/std/png/decode_png.wuffs
+++ b/std/png/decode_png.wuffs
@@ -142,10 +142,22 @@
}
// Read up until an IDAT chunk.
+ //
+ // By default, libpng "warns and discards" when seeing ancillary chunk
+ // checksum failures (as opposed to critical chunk checksum failures) but
+ // it still continues to decode the image. Wuffs' decoder is similar,
+ // simply always ignoring ancillary chunks' CRC-32 checksums.
+ //
+ // https://github.com/glennrp/libpng/blob/dbe3e0c43e549a1602286144d94b0666549b18e6/png.h#L1436
+ //
+ // We've already seen the IHDR chunk. We're not expecting an IEND chunk. An
+ // IDAT chunk breaks the loop. The only other possible critical chunk is a
+ // PLTE chunk. We verify PLTE checksums here but ignore other checksums.
while true {
this.chunk_length = args.src.read_u32be_as_u64?()
this.chunk_type = args.src.read_u32le?()
- if not this.ignore_checksum {
+ if (not this.ignore_checksum) and
+ ((this.chunk_type == 'IDAT'le) or (this.chunk_type == 'PLTE'le)) {
this.crc32.reset!()
this.chunk_type_array[0] = ((this.chunk_type >> 0) & 0xFF) as base.u8
this.chunk_type_array[1] = ((this.chunk_type >> 8) & 0xFF) as base.u8
@@ -161,7 +173,7 @@
while true {
mark = args.src.mark()
status =? this.decode_other_chunk?(src: args.src)
- if not this.ignore_checksum {
+ if (not this.ignore_checksum) and (this.chunk_type == 'PLTE'le) {
checksum_have = this.crc32.update_u32!(x: args.src.since(mark: mark))
}
if status.is_ok() {
@@ -170,9 +182,9 @@
yield? status
} endwhile
checksum_want = args.src.read_u32be?()
- if (not this.ignore_checksum) and (checksum_have <> checksum_want) {
- // TODO: uncomment and fix test_mimic_png_decode_19k_8bpp.
- // return "#bad checksum"
+ if (not this.ignore_checksum) and (this.chunk_type == 'PLTE'le) and
+ (checksum_have <> checksum_want) {
+ return "#bad checksum"
}
} endwhile
diff --git a/test/c/std/png.c b/test/c/std/png.c
index 54bdbd8..863a84b 100644
--- a/test/c/std/png.c
+++ b/test/c/std/png.c
@@ -89,7 +89,7 @@
}
const char* //
-do_test_xxxxx_png_decode_bad_crc32_checksum(
+do_test_xxxxx_png_decode_bad_crc32_checksum_critical(
const char* (*decode_func)(uint64_t* n_bytes_out,
wuffs_base__io_buffer* dst,
uint32_t wuffs_initialize_flags,
@@ -98,13 +98,14 @@
const char* test_cases[] = {
// Change a byte in the IHDR CRC-32 checksum.
"@001F=8A=00;test/data/hippopotamus.regular.png",
+ // Change a byte in a PLTE CRC-32 checksum.
+ "@0372=52=00;test/data/bricks-dither.png",
// Change a byte in a final IDAT Adler-32 checksum.
"@084E=26=00;test/data/hippopotamus.regular.png",
// Change a byte in a final IDAT CRC-32 checksum.
"@084F=F4=00;test/data/hippopotamus.regular.png",
// Change a byte in a non-final IDAT CRC-32 checksum.
"@2029=B7=00;test/data/bricks-color.png",
- // TODO: Change a byte in an ancillary chunk's CRC-32 checksum.
};
int tc;
@@ -181,9 +182,10 @@
}
const char* //
-test_wuffs_png_decode_bad_crc32_checksum() {
+test_wuffs_png_decode_bad_crc32_checksum_critical() {
CHECK_FOCUS(__func__);
- return do_test_xxxxx_png_decode_bad_crc32_checksum(&wuffs_png_decode);
+ return do_test_xxxxx_png_decode_bad_crc32_checksum_critical(
+ &wuffs_png_decode);
}
const char* //
@@ -495,11 +497,7 @@
const char* //
test_mimic_png_decode_19k_8bpp() {
CHECK_FOCUS(__func__);
- // libpng automatically applies the "gAMA" chunk (with no matching "sRGB"
- // chunk) but Wuffs does not. To make the comparison more like-for-like,
- // especially in emitting identical BGRA pixels, patch the source file by
- // replacing the "gAMA" with the nonsense "hAMA". ASCII 'g' is 0x67.
- return do_test_mimic_png_decode("@25=67=68;test/data/bricks-gray.png");
+ return do_test_mimic_png_decode("test/data/bricks-gray.no-ancillary.png");
}
const char* //
@@ -527,9 +525,26 @@
}
const char* //
-test_mimic_png_decode_bad_crc32_checksum() {
+test_mimic_png_decode_bad_crc32_checksum_ancillary() {
CHECK_FOCUS(__func__);
- return do_test_xxxxx_png_decode_bad_crc32_checksum(&mimic_png_decode);
+ // libpng automatically applies the "gAMA" chunk (with no matching "sRGB"
+ // chunk) but Wuffs does not. To make the comparison more like-for-like,
+ // especially in emitting identical BGRA pixels, patch the source file by
+ // replacing the "gAMA" with the nonsense "hAMA". ASCII 'g' is 0x67.
+ //
+ // This makes the "hAMA" CRC-32 checksum no longer verify, since the checksum
+ // input includes the chunk type. By default, libpng "warns and discards"
+ // when seeing ancillary chunk checksum failures (as opposed to critical
+ // chunk checksum failures) but it still continues to decode the image.
+ // Wuffs' decoder likewise ignores the bad ancillary chunk checksum.
+ return do_test_mimic_png_decode("@25=67=68;test/data/bricks-gray.png");
+}
+
+const char* //
+test_mimic_png_decode_bad_crc32_checksum_critical() {
+ CHECK_FOCUS(__func__);
+ return do_test_xxxxx_png_decode_bad_crc32_checksum_critical(
+ &mimic_png_decode);
}
#endif // WUFFS_MIMIC
@@ -546,7 +561,7 @@
return do_bench_image_decode(
&wuffs_png_decode, WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED,
wuffs_base__make_pixel_format(WUFFS_BASE__PIXEL_FORMAT__Y),
- "@25=67=68;test/data/bricks-gray.png", 0, SIZE_MAX, 50);
+ "test/data/bricks-gray.no-ancillary.png", 0, SIZE_MAX, 50);
}
const char* //
@@ -716,7 +731,7 @@
return do_bench_image_decode(
&mimic_png_decode, WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED,
wuffs_base__make_pixel_format(WUFFS_BASE__PIXEL_FORMAT__Y),
- "@25=67=68;test/data/bricks-gray.png", 0, SIZE_MAX, 50);
+ "test/data/bricks-gray.no-ancillary.png", 0, SIZE_MAX, 50);
}
const char* //
@@ -761,7 +776,7 @@
proc g_tests[] = {
- test_wuffs_png_decode_bad_crc32_checksum,
+ test_wuffs_png_decode_bad_crc32_checksum_critical,
test_wuffs_png_decode_filters_golden,
test_wuffs_png_decode_filters_round_trip,
test_wuffs_png_decode_frame_config,
@@ -774,7 +789,8 @@
test_mimic_png_decode_77k_8bpp,
test_mimic_png_decode_552k_32bpp,
test_mimic_png_decode_4002k_24bpp,
- test_mimic_png_decode_bad_crc32_checksum,
+ test_mimic_png_decode_bad_crc32_checksum_ancillary,
+ test_mimic_png_decode_bad_crc32_checksum_critical,
#endif // WUFFS_MIMIC
diff --git a/test/data/bricks-gray.no-ancillary.png b/test/data/bricks-gray.no-ancillary.png
new file mode 100644
index 0000000..59de34b
--- /dev/null
+++ b/test/data/bricks-gray.no-ancillary.png
Binary files differ