Convert std/png text from Latin-1 to UTF-8 Fixes #55
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c index 145d8cd..5a649fc 100644 --- a/release/c/wuffs-unsupported-snapshot.c +++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9165,6 +9165,7 @@ extern const char wuffs_png__error__bad_chunk[]; extern const char wuffs_png__error__bad_filter[]; extern const char wuffs_png__error__bad_header[]; +extern const char wuffs_png__error__bad_text_chunk_not_latin_1[]; extern const char wuffs_png__error__missing_palette[]; extern const char wuffs_png__error__unsupported_png_compression_method[]; extern const char wuffs_png__error__unsupported_png_file[]; @@ -9381,6 +9382,9 @@ uint64_t f_metadata_x; uint64_t f_metadata_y; uint64_t f_metadata_z; + uint32_t f_ztxt_ri; + uint32_t f_ztxt_wi; + uint64_t f_ztxt_hist_pos; wuffs_base__pixel_swizzler f_swizzler; wuffs_base__empty_struct (*choosy_filter_1)( @@ -9470,7 +9474,7 @@ uint64_t scratch; } s_decode_pass[1]; struct { - uint8_t v_c; + wuffs_base__status v_zlib_status; uint64_t scratch; } s_tell_me_more[1]; } private_data; @@ -35327,9 +35331,11 @@ const char wuffs_png__error__bad_chunk[] = "#png: bad chunk"; const char wuffs_png__error__bad_filter[] = "#png: bad filter"; const char wuffs_png__error__bad_header[] = "#png: bad header"; +const char wuffs_png__error__bad_text_chunk_not_latin_1[] = "#png: bad text chunk (not Latin-1)"; const char wuffs_png__error__missing_palette[] = "#png: missing palette"; const char wuffs_png__error__unsupported_png_compression_method[] = "#png: unsupported PNG compression method"; const char wuffs_png__error__unsupported_png_file[] = "#png: unsupported PNG file"; +const char wuffs_png__error__internal_error_inconsistent_i_o[] = "#png: internal error: inconsistent I/O"; const char wuffs_png__error__internal_error_inconsistent_chunk_type[] = "#png: internal error: inconsistent chunk type"; const char wuffs_png__error__internal_error_inconsistent_frame_bounds[] = "#png: internal error: inconsistent frame bounds"; const char wuffs_png__error__internal_error_inconsistent_workbuf_length[] = "#png: internal error: inconsistent workbuf length"; @@ -35375,6 +35381,42 @@ 1, 0, 3, 1, 2, 0, 4, 0, }; +static const uint16_t +WUFFS_PNG__LATIN_1[256] WUFFS_BASE__POTENTIALLY_UNUSED = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 41410, 41666, 41922, 42178, 42434, 42690, 42946, + 43202, 43458, 43714, 43970, 44226, 44482, 44738, 44994, + 45250, 45506, 45762, 46018, 46274, 46530, 46786, 47042, + 47298, 47554, 47810, 48066, 48322, 48578, 48834, 49090, + 32963, 33219, 33475, 33731, 33987, 34243, 34499, 34755, + 35011, 35267, 35523, 35779, 36035, 36291, 36547, 36803, + 37059, 37315, 37571, 37827, 38083, 38339, 38595, 38851, + 39107, 39363, 39619, 39875, 40131, 40387, 40643, 40899, + 41155, 41411, 41667, 41923, 42179, 42435, 42691, 42947, + 43203, 43459, 43715, 43971, 44227, 44483, 44739, 44995, + 45251, 45507, 45763, 46019, 46275, 46531, 46787, 47043, + 47299, 47555, 47811, 48067, 48323, 48579, 48835, 49091, +}; + // ---------------- Private Initializer Prototypes // ---------------- Private Function Prototypes @@ -40455,6 +40497,15 @@ wuffs_base__status status = wuffs_base__make_status(NULL); uint8_t v_c = 0; + uint16_t v_c2 = 0; + wuffs_base__io_buffer u_w = wuffs_base__empty_io_buffer(); + wuffs_base__io_buffer* v_w = &u_w; + uint8_t* iop_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL; + uint8_t* io0_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL; + uint8_t* io1_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL; + uint8_t* io2_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL; + uint64_t v_num_written = 0; + uint64_t v_w_mark = 0; uint64_t v_r_mark = 0; wuffs_base__status v_zlib_status = wuffs_base__make_status(NULL); @@ -40484,7 +40535,7 @@ uint32_t coro_susp_point = self->private_impl.p_tell_me_more[0]; if (coro_susp_point) { - v_c = self->private_data.s_tell_me_more[0].v_c; + v_zlib_status = self->private_data.s_tell_me_more[0].v_zlib_status; } switch (coro_susp_point) { WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0; @@ -40506,6 +40557,7 @@ } } self->private_impl.f_zlib_is_dirty = true; + self->private_impl.f_ztxt_hist_pos = 0; } label__loop__continue:; while (true) { @@ -40614,34 +40666,86 @@ status = v_zlib_status; WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(2); } else if (self->private_impl.f_chunk_type == 1951945850) { - { - const uint8_t *o_2_io2_a_src = io2_a_src; - wuffs_base__io_reader__limit(&io2_a_src, iop_a_src, - ((uint64_t)(self->private_impl.f_chunk_length))); - if (a_src) { - a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr)); - } - v_r_mark = ((uint64_t)(iop_a_src - io0_a_src)); + if (self->private_impl.f_ztxt_ri == self->private_impl.f_ztxt_wi) { { - if (a_dst) { - a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr)); + wuffs_base__io_buffer* o_2_v_w = v_w; + uint8_t *o_2_iop_v_w = iop_v_w; + uint8_t *o_2_io0_v_w = io0_v_w; + uint8_t *o_2_io1_v_w = io1_v_w; + uint8_t *o_2_io2_v_w = io2_v_w; + v_w = wuffs_base__io_writer__set( + &u_w, + &iop_v_w, + &io0_v_w, + &io1_v_w, + &io2_v_w, + wuffs_base__make_slice_u8(self->private_data.f_dst_palette, 1024), + self->private_impl.f_ztxt_hist_pos); + { + const uint8_t *o_3_io2_a_src = io2_a_src; + wuffs_base__io_reader__limit(&io2_a_src, iop_a_src, + ((uint64_t)(self->private_impl.f_chunk_length))); + if (a_src) { + a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr)); + } + v_w_mark = ((uint64_t)(iop_v_w - io0_v_w)); + v_r_mark = ((uint64_t)(iop_a_src - io0_a_src)); + { + u_w.meta.wi = ((size_t)(iop_v_w - u_w.data.ptr)); + if (a_src) { + a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr)); + } + wuffs_base__status t_2 = wuffs_zlib__decoder__transform_io(&self->private_data.f_zlib, v_w, a_src, wuffs_base__utility__empty_slice_u8()); + v_zlib_status = t_2; + iop_v_w = u_w.data.ptr + u_w.meta.wi; + if (a_src) { + iop_a_src = a_src->data.ptr + a_src->meta.ri; + } + } + wuffs_base__u32__sat_sub_indirect(&self->private_impl.f_chunk_length, ((uint32_t)((wuffs_base__io__count_since(v_r_mark, ((uint64_t)(iop_a_src - io0_a_src))) & 4294967295)))); + v_num_written = wuffs_base__io__count_since(v_w_mark, ((uint64_t)(iop_v_w - io0_v_w))); + io2_a_src = o_3_io2_a_src; + if (a_src) { + a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr)); + } } - if (a_src) { - a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr)); - } - wuffs_base__status t_2 = wuffs_zlib__decoder__transform_io(&self->private_data.f_zlib, a_dst, a_src, wuffs_base__utility__empty_slice_u8()); - v_zlib_status = t_2; - if (a_dst) { - iop_a_dst = a_dst->data.ptr + a_dst->meta.wi; - } - if (a_src) { - iop_a_src = a_src->data.ptr + a_src->meta.ri; - } + v_w = o_2_v_w; + iop_v_w = o_2_iop_v_w; + io0_v_w = o_2_io0_v_w; + io1_v_w = o_2_io1_v_w; + io2_v_w = o_2_io2_v_w; } - wuffs_base__u32__sat_sub_indirect(&self->private_impl.f_chunk_length, ((uint32_t)((wuffs_base__io__count_since(v_r_mark, ((uint64_t)(iop_a_src - io0_a_src))) & 4294967295)))); - io2_a_src = o_2_io2_a_src; - if (a_src) { - a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr)); + if (v_num_written > 1024) { + status = wuffs_base__make_status(wuffs_png__error__internal_error_inconsistent_i_o); + goto exit; + } + self->private_impl.f_ztxt_ri = 0; + self->private_impl.f_ztxt_wi = ((uint32_t)(v_num_written)); + wuffs_base__u64__sat_add_indirect(&self->private_impl.f_ztxt_hist_pos, v_num_written); + } + while (self->private_impl.f_ztxt_ri < self->private_impl.f_ztxt_wi) { + v_c2 = WUFFS_PNG__LATIN_1[self->private_data.f_dst_palette[self->private_impl.f_ztxt_ri]]; + if (v_c2 == 0) { + status = wuffs_base__make_status(wuffs_png__error__bad_text_chunk_not_latin_1); + goto exit; + } else if (v_c2 <= 127) { + if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) { + status = wuffs_base__make_status(wuffs_base__suspension__short_write); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3); + v_c2 = 0; + goto label__loop__continue; + } + self->private_impl.f_ztxt_ri += 1; + (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, ((uint8_t)(v_c2))), iop_a_dst += 1); + } else { + if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 1) { + status = wuffs_base__make_status(wuffs_base__suspension__short_write); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4); + v_c2 = 0; + goto label__loop__continue; + } + self->private_impl.f_ztxt_ri += 1; + (wuffs_base__poke_u16le__no_bounds_check(iop_a_dst, v_c2), iop_a_dst += 2); } } if (wuffs_base__status__is_ok(&v_zlib_status)) { @@ -40656,13 +40760,32 @@ goto exit; } goto ok; + } else if (v_zlib_status.repr != wuffs_base__suspension__short_write) { + status = v_zlib_status; + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5); } - status = v_zlib_status; - WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3); } else { status = wuffs_base__make_status(wuffs_png__error__internal_error_inconsistent_chunk_type); goto exit; } + } else if ((self->private_impl.f_chunk_type == 1951945833) && (self->private_impl.f_metadata_fourcc == 1263947862)) { + while (true) { + if (self->private_impl.f_chunk_length <= 0) { + goto label__loop__break; + } else if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) { + status = wuffs_base__make_status(wuffs_base__suspension__short_read); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(6); + goto label__loop__continue; + } else if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) { + status = wuffs_base__make_status(wuffs_base__suspension__short_write); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(7); + goto label__loop__continue; + } + self->private_impl.f_chunk_length -= 1; + v_c = wuffs_base__peek_u8be__no_bounds_check(iop_a_src); + iop_a_src += 1; + (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, v_c), iop_a_dst += 1); + } } else { while (true) { if (self->private_impl.f_chunk_length <= 0) { @@ -40673,7 +40796,7 @@ goto label__loop__break; } else if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) { status = wuffs_base__make_status(wuffs_base__suspension__short_read); - WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(8); goto label__loop__continue; } v_c = wuffs_base__peek_u8be__no_bounds_check(iop_a_src); @@ -40682,14 +40805,31 @@ iop_a_src += 1; goto label__loop__break; } - if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) { - status = wuffs_base__make_status(wuffs_base__suspension__short_write); - WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5); - goto label__loop__continue; + v_c2 = WUFFS_PNG__LATIN_1[v_c]; + if (v_c2 == 0) { + status = wuffs_base__make_status(wuffs_png__error__bad_text_chunk_not_latin_1); + goto exit; + } else if (v_c2 <= 127) { + if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) { + status = wuffs_base__make_status(wuffs_base__suspension__short_write); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(9); + v_c2 = 0; + goto label__loop__continue; + } + self->private_impl.f_chunk_length -= 1; + iop_a_src += 1; + (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, ((uint8_t)(v_c2))), iop_a_dst += 1); + } else { + if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 1) { + status = wuffs_base__make_status(wuffs_base__suspension__short_write); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(10); + v_c2 = 0; + goto label__loop__continue; + } + self->private_impl.f_chunk_length -= 1; + iop_a_src += 1; + (wuffs_base__poke_u16le__no_bounds_check(iop_a_dst, v_c2), iop_a_dst += 2); } - self->private_impl.f_chunk_length -= 1; - iop_a_src += 1; - (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, v_c), iop_a_dst += 1); } } } @@ -40703,7 +40843,7 @@ } self->private_impl.f_chunk_length -= 2; { - WUFFS_BASE__COROUTINE_SUSPENSION_POINT(6); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT(11); if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) { status = wuffs_base__make_status(wuffs_base__suspension__short_read); goto suspend; @@ -40720,7 +40860,7 @@ goto exit; } { - WUFFS_BASE__COROUTINE_SUSPENSION_POINT(7); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT(12); if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) { status = wuffs_base__make_status(wuffs_base__suspension__short_read); goto suspend; @@ -40742,7 +40882,7 @@ } self->private_impl.f_chunk_length -= 1; { - WUFFS_BASE__COROUTINE_SUSPENSION_POINT(8); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT(13); if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) { status = wuffs_base__make_status(wuffs_base__suspension__short_read); goto suspend; @@ -40763,7 +40903,7 @@ } self->private_impl.f_chunk_length -= 1; { - WUFFS_BASE__COROUTINE_SUSPENSION_POINT(9); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT(14); if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) { status = wuffs_base__make_status(wuffs_base__suspension__short_read); goto suspend; @@ -40783,7 +40923,7 @@ goto exit; } self->private_data.s_tell_me_more[0].scratch = 4; - WUFFS_BASE__COROUTINE_SUSPENSION_POINT(10); + WUFFS_BASE__COROUTINE_SUSPENSION_POINT(15); if (self->private_data.s_tell_me_more[0].scratch > ((uint64_t)(io2_a_src - iop_a_src))) { self->private_data.s_tell_me_more[0].scratch -= ((uint64_t)(io2_a_src - iop_a_src)); iop_a_src = io2_a_src; @@ -40810,7 +40950,7 @@ suspend: self->private_impl.p_tell_me_more[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0; self->private_impl.active_coroutine = wuffs_base__status__is_suspension(&status) ? 4 : 0; - self->private_data.s_tell_me_more[0].v_c = v_c; + self->private_data.s_tell_me_more[0].v_zlib_status = v_zlib_status; goto exit; exit:
diff --git a/script/print-png-latin-1.go b/script/print-png-latin-1.go new file mode 100644 index 0000000..34fc86f --- /dev/null +++ b/script/print-png-latin-1.go
@@ -0,0 +1,57 @@ +// Copyright 2021 The Wuffs Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build ignore +// +build ignore + +package main + +// print-png-latin-1.go prints the UTF-8 encoding of the std/png Latin-1 table. +// +// Usage: go run print-png-latin-1.go + +import ( + "fmt" + "os" +) + +func main() { + if err := main1(); err != nil { + os.Stderr.WriteString(err.Error() + "\n") + os.Exit(1) + } +} + +func main1() error { + // The PNG spec (https://www.w3.org/TR/PNG/) says "*printable* [emphasis + // added] Latin-1 characters and spaces (only character codes 32-126 and + // 161-255 decimal are allowed)". + // + // See also https://www.w3.org/TR/2003/REC-PNG-20031110/iso_8859-1.txt + for r := rune(0); r <= 0xFF; r++ { + if (r < 32) || ((126 < r) && (r < 161)) { + fmt.Printf(" 0x0000,") + } else if r < 128 { + fmt.Printf(" 0x%04X,", r) + } else { + s := fmt.Sprintf("%c", r) + fmt.Printf(" 0x%02X%02X,", s[1], s[0]) // UTF-8 as little-endian uint16. + } + + if r%8 == 7 { + fmt.Println() + } + } + return nil +}
diff --git a/std/png/common_consts.wuffs b/std/png/common_consts.wuffs index d85a6d8..2c4e419 100644 --- a/std/png/common_consts.wuffs +++ b/std/png/common_consts.wuffs
@@ -17,10 +17,12 @@ pub status "#bad chunk" pub status "#bad filter" pub status "#bad header" +pub status "#bad text chunk (not Latin-1)" pub status "#missing palette" pub status "#unsupported PNG compression method" pub status "#unsupported PNG file" +pri status "#internal error: inconsistent I/O" pri status "#internal error: inconsistent chunk type" pri status "#internal error: inconsistent frame bounds" pri status "#internal error: inconsistent workbuf length" @@ -101,3 +103,40 @@ 4, // color_type == 6: RGBA 0, ] + +// LATIN_1 converts from printable Latin-1 (ISO/IEC 8859-1) to UTF-8 (encoded +// as little-endian u16). It was created by script/print-png-latin-1.go. +pri const LATIN_1 : array[256] base.u16 = [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xA1C2, 0xA2C2, 0xA3C2, 0xA4C2, 0xA5C2, 0xA6C2, 0xA7C2, + 0xA8C2, 0xA9C2, 0xAAC2, 0xABC2, 0xACC2, 0xADC2, 0xAEC2, 0xAFC2, + 0xB0C2, 0xB1C2, 0xB2C2, 0xB3C2, 0xB4C2, 0xB5C2, 0xB6C2, 0xB7C2, + 0xB8C2, 0xB9C2, 0xBAC2, 0xBBC2, 0xBCC2, 0xBDC2, 0xBEC2, 0xBFC2, + 0x80C3, 0x81C3, 0x82C3, 0x83C3, 0x84C3, 0x85C3, 0x86C3, 0x87C3, + 0x88C3, 0x89C3, 0x8AC3, 0x8BC3, 0x8CC3, 0x8DC3, 0x8EC3, 0x8FC3, + 0x90C3, 0x91C3, 0x92C3, 0x93C3, 0x94C3, 0x95C3, 0x96C3, 0x97C3, + 0x98C3, 0x99C3, 0x9AC3, 0x9BC3, 0x9CC3, 0x9DC3, 0x9EC3, 0x9FC3, + 0xA0C3, 0xA1C3, 0xA2C3, 0xA3C3, 0xA4C3, 0xA5C3, 0xA6C3, 0xA7C3, + 0xA8C3, 0xA9C3, 0xAAC3, 0xABC3, 0xACC3, 0xADC3, 0xAEC3, 0xAFC3, + 0xB0C3, 0xB1C3, 0xB2C3, 0xB3C3, 0xB4C3, 0xB5C3, 0xB6C3, 0xB7C3, + 0xB8C3, 0xB9C3, 0xBAC3, 0xBBC3, 0xBCC3, 0xBDC3, 0xBEC3, 0xBFC3, +]
diff --git a/std/png/decode_png.wuffs b/std/png/decode_png.wuffs index f3a37dd..54e17e7 100644 --- a/std/png/decode_png.wuffs +++ b/std/png/decode_png.wuffs
@@ -166,12 +166,25 @@ metadata_y : base.u64, metadata_z : base.u64, + // ztxt_ri and ztxt_wi are read and write indexes into the dst_palette + // buffer, re-purposed as a zlib uncompression buffer for zTXt chunks. The + // upper bound, 1024, is the same as the dst_palette length. + ztxt_ri : base.u32[..= 1024], + ztxt_wi : base.u32[..= 1024], + // ztxt_hist_pos is the history position: how many uncompressed bytes have + // been generated. + ztxt_hist_pos : base.u64, + swizzler : base.pixel_swizzler, util : base.utility, )( crc32 : crc32.ieee_hasher, zlib : zlib.decoder, + // dst_palette and src_palette are used by the swizzler, during + // decode_frame. src_palette is initialized by processing the PLTE chunk. + // dst_palette is also re-purposed as a zlib uncompression buffer for zTXt + // chunks, during decode_image_config. dst_palette : array[4 * 256] base.u8, src_palette : array[4 * 256] base.u8, ) @@ -1324,6 +1337,10 @@ pub func decoder.tell_me_more?(dst: base.io_writer, minfo: nptr base.more_information, src: base.io_reader) { var c : base.u8 + var c2 : base.u16 + var w : base.io_writer + var num_written : base.u64 + var w_mark : base.u64 var r_mark : base.u64 var zlib_status : base.status @@ -1342,6 +1359,7 @@ } } this.zlib_is_dirty = true + this.ztxt_hist_pos = 0 } while.loop true { @@ -1395,28 +1413,87 @@ yield? zlib_status } else if this.chunk_type == 'zTXt'le { - // TODO: Convert uncompressed data from Latin-1 to UTF-8. - io_limit (io: args.src, limit: this.chunk_length as base.u64) { - r_mark = args.src.mark() - zlib_status =? this.zlib.transform_io?( - dst: args.dst, src: args.src, workbuf: this.util.empty_slice_u8()) - this.chunk_length ~sat-= - (args.src.count_since(mark: r_mark) & 0xFFFF_FFFF) as base.u32 + // Fill this.dst_palette, zlib-uncompressing producing Latin-1. + if this.ztxt_ri == this.ztxt_wi { + io_bind (io: w, data: this.dst_palette[..], history_position: this.ztxt_hist_pos) { + io_limit (io: args.src, limit: this.chunk_length as base.u64) { + w_mark = w.mark() + r_mark = args.src.mark() + zlib_status =? this.zlib.transform_io?( + dst: w, src: args.src, workbuf: this.util.empty_slice_u8()) + this.chunk_length ~sat-= + (args.src.count_since(mark: r_mark) & 0xFFFF_FFFF) as base.u32 + num_written = w.count_since(mark: w_mark) + } + } + if num_written > 1024 { + return "#internal error: inconsistent I/O" + } + this.ztxt_ri = 0 + this.ztxt_wi = num_written as base.u32 + this.ztxt_hist_pos ~sat+= num_written } + // Drain this.dst_palette, converting from Latin-1 to UTF-8. + while this.ztxt_ri < this.ztxt_wi { + assert this.ztxt_ri < 1024 via "a < b: a < c; c <= b"(c: this.ztxt_wi) + c2 = LATIN_1[this.dst_palette[this.ztxt_ri]] + if c2 == 0 { + return "#bad text chunk (not Latin-1)" + } else if c2 <= 0x7F { + if args.dst.length() <= 0 { + yield? base."$short write" + c2 = 0 + continue.loop + } + this.ztxt_ri += 1 + args.dst.write_u8_fast!(a: c2 as base.u8) + } else { + if args.dst.length() <= 1 { + yield? base."$short write" + c2 = 0 + continue.loop + } + this.ztxt_ri += 1 + args.dst.write_u16le_fast!(a: c2) + } + } endwhile + if zlib_status.is_ok() { this.metadata_is_zlib_compressed = false break.loop } else if not zlib_status.is_suspension() { return zlib_status + } else if zlib_status <> base."$short write" { + yield? zlib_status } - yield? zlib_status } else { return "#internal error: inconsistent chunk type" } + } else if (this.chunk_type == 'iTXt'le) and (this.metadata_fourcc == 'KVPV'be) { + // iTXt value is UTF-8. + // + // TODO: verify data is UTF-8. + while true { + if this.chunk_length <= 0 { + break.loop + } else if args.src.length() <= 0 { + yield? base."$short read" + continue.loop + } else if args.dst.length() <= 0 { + yield? base."$short write" + continue.loop + } + this.chunk_length -= 1 + c = args.src.peek_u8() + args.src.skip_u32_fast!(actual: 1, worst_case: 1) + args.dst.write_u8_fast!(a: c) + } endwhile + } else { + // Other uncompressed keys and values are Latin-1. while true { if this.chunk_length <= 0 { // Keys are NUL-terminated but values are not. @@ -1434,14 +1511,28 @@ args.src.skip_u32_fast!(actual: 1, worst_case: 1) break.loop } - // TODO: Convert from Latin-1 to UTF-8. - if args.dst.length() <= 0 { - yield? base."$short write" - continue.loop + c2 = LATIN_1[c] + if c2 == 0 { + return "#bad text chunk (not Latin-1)" + } else if c2 <= 0x7F { + if args.dst.length() <= 0 { + yield? base."$short write" + c2 = 0 + continue.loop + } + this.chunk_length -= 1 + args.src.skip_u32_fast!(actual: 1, worst_case: 1) + args.dst.write_u8_fast!(a: c2 as base.u8) + } else { + if args.dst.length() <= 1 { + yield? base."$short write" + c2 = 0 + continue.loop + } + this.chunk_length -= 1 + args.src.skip_u32_fast!(actual: 1, worst_case: 1) + args.dst.write_u16le_fast!(a: c2) } - this.chunk_length -= 1 - args.src.skip_u32_fast!(actual: 1, worst_case: 1) - args.dst.write_u8_fast!(a: c) } endwhile } } endwhile.loop
diff --git a/test/c/std/png.c b/test/c/std/png.c index c291fee..102e3eb 100644 --- a/test/c/std/png.c +++ b/test/c/std/png.c
@@ -735,17 +735,16 @@ CHECK_STRING(read_file(&src, "test/data/artificial-png/key-value-pairs.png")); const char* wants[] = { - // TODO: the callee should produce UTF-8, not Latin-1. - "Key", // - "English", // - "Cl\xE9", // - "Fran\xE7\x61is", // - "zl\xEF\x62K", // - "zl\xEF\x62V", // - "U-Key", // - "U-значение", // - "Z-K\xEBy", // - "Z-значение", // + "Key", // + "English", // + "Clé", // + "Français", // + "zlïbK", // + "zlïbV", // + "U-Key", // + "U-значение", // + "Z-Këy", // + "Z-значение", // }; wuffs_png__decoder dec;