Convert std/png text from Latin-1 to UTF-8
Fixes #55
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 145d8cd..5a649fc 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9165,6 +9165,7 @@
extern const char wuffs_png__error__bad_chunk[];
extern const char wuffs_png__error__bad_filter[];
extern const char wuffs_png__error__bad_header[];
+extern const char wuffs_png__error__bad_text_chunk_not_latin_1[];
extern const char wuffs_png__error__missing_palette[];
extern const char wuffs_png__error__unsupported_png_compression_method[];
extern const char wuffs_png__error__unsupported_png_file[];
@@ -9381,6 +9382,9 @@
uint64_t f_metadata_x;
uint64_t f_metadata_y;
uint64_t f_metadata_z;
+ uint32_t f_ztxt_ri;
+ uint32_t f_ztxt_wi;
+ uint64_t f_ztxt_hist_pos;
wuffs_base__pixel_swizzler f_swizzler;
wuffs_base__empty_struct (*choosy_filter_1)(
@@ -9470,7 +9474,7 @@
uint64_t scratch;
} s_decode_pass[1];
struct {
- uint8_t v_c;
+ wuffs_base__status v_zlib_status;
uint64_t scratch;
} s_tell_me_more[1];
} private_data;
@@ -35327,9 +35331,11 @@
const char wuffs_png__error__bad_chunk[] = "#png: bad chunk";
const char wuffs_png__error__bad_filter[] = "#png: bad filter";
const char wuffs_png__error__bad_header[] = "#png: bad header";
+const char wuffs_png__error__bad_text_chunk_not_latin_1[] = "#png: bad text chunk (not Latin-1)";
const char wuffs_png__error__missing_palette[] = "#png: missing palette";
const char wuffs_png__error__unsupported_png_compression_method[] = "#png: unsupported PNG compression method";
const char wuffs_png__error__unsupported_png_file[] = "#png: unsupported PNG file";
+const char wuffs_png__error__internal_error_inconsistent_i_o[] = "#png: internal error: inconsistent I/O";
const char wuffs_png__error__internal_error_inconsistent_chunk_type[] = "#png: internal error: inconsistent chunk type";
const char wuffs_png__error__internal_error_inconsistent_frame_bounds[] = "#png: internal error: inconsistent frame bounds";
const char wuffs_png__error__internal_error_inconsistent_workbuf_length[] = "#png: internal error: inconsistent workbuf length";
@@ -35375,6 +35381,42 @@
1, 0, 3, 1, 2, 0, 4, 0,
};
+static const uint16_t
+WUFFS_PNG__LATIN_1[256] WUFFS_BASE__POTENTIALLY_UNUSED = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87,
+ 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103,
+ 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 41410, 41666, 41922, 42178, 42434, 42690, 42946,
+ 43202, 43458, 43714, 43970, 44226, 44482, 44738, 44994,
+ 45250, 45506, 45762, 46018, 46274, 46530, 46786, 47042,
+ 47298, 47554, 47810, 48066, 48322, 48578, 48834, 49090,
+ 32963, 33219, 33475, 33731, 33987, 34243, 34499, 34755,
+ 35011, 35267, 35523, 35779, 36035, 36291, 36547, 36803,
+ 37059, 37315, 37571, 37827, 38083, 38339, 38595, 38851,
+ 39107, 39363, 39619, 39875, 40131, 40387, 40643, 40899,
+ 41155, 41411, 41667, 41923, 42179, 42435, 42691, 42947,
+ 43203, 43459, 43715, 43971, 44227, 44483, 44739, 44995,
+ 45251, 45507, 45763, 46019, 46275, 46531, 46787, 47043,
+ 47299, 47555, 47811, 48067, 48323, 48579, 48835, 49091,
+};
+
// ---------------- Private Initializer Prototypes
// ---------------- Private Function Prototypes
@@ -40455,6 +40497,15 @@
wuffs_base__status status = wuffs_base__make_status(NULL);
uint8_t v_c = 0;
+ uint16_t v_c2 = 0;
+ wuffs_base__io_buffer u_w = wuffs_base__empty_io_buffer();
+ wuffs_base__io_buffer* v_w = &u_w;
+ uint8_t* iop_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint8_t* io0_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint8_t* io1_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint8_t* io2_v_w WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint64_t v_num_written = 0;
+ uint64_t v_w_mark = 0;
uint64_t v_r_mark = 0;
wuffs_base__status v_zlib_status = wuffs_base__make_status(NULL);
@@ -40484,7 +40535,7 @@
uint32_t coro_susp_point = self->private_impl.p_tell_me_more[0];
if (coro_susp_point) {
- v_c = self->private_data.s_tell_me_more[0].v_c;
+ v_zlib_status = self->private_data.s_tell_me_more[0].v_zlib_status;
}
switch (coro_susp_point) {
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
@@ -40506,6 +40557,7 @@
}
}
self->private_impl.f_zlib_is_dirty = true;
+ self->private_impl.f_ztxt_hist_pos = 0;
}
label__loop__continue:;
while (true) {
@@ -40614,34 +40666,86 @@
status = v_zlib_status;
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(2);
} else if (self->private_impl.f_chunk_type == 1951945850) {
- {
- const uint8_t *o_2_io2_a_src = io2_a_src;
- wuffs_base__io_reader__limit(&io2_a_src, iop_a_src,
- ((uint64_t)(self->private_impl.f_chunk_length)));
- if (a_src) {
- a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr));
- }
- v_r_mark = ((uint64_t)(iop_a_src - io0_a_src));
+ if (self->private_impl.f_ztxt_ri == self->private_impl.f_ztxt_wi) {
{
- if (a_dst) {
- a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
+ wuffs_base__io_buffer* o_2_v_w = v_w;
+ uint8_t *o_2_iop_v_w = iop_v_w;
+ uint8_t *o_2_io0_v_w = io0_v_w;
+ uint8_t *o_2_io1_v_w = io1_v_w;
+ uint8_t *o_2_io2_v_w = io2_v_w;
+ v_w = wuffs_base__io_writer__set(
+ &u_w,
+ &iop_v_w,
+ &io0_v_w,
+ &io1_v_w,
+ &io2_v_w,
+ wuffs_base__make_slice_u8(self->private_data.f_dst_palette, 1024),
+ self->private_impl.f_ztxt_hist_pos);
+ {
+ const uint8_t *o_3_io2_a_src = io2_a_src;
+ wuffs_base__io_reader__limit(&io2_a_src, iop_a_src,
+ ((uint64_t)(self->private_impl.f_chunk_length)));
+ if (a_src) {
+ a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr));
+ }
+ v_w_mark = ((uint64_t)(iop_v_w - io0_v_w));
+ v_r_mark = ((uint64_t)(iop_a_src - io0_a_src));
+ {
+ u_w.meta.wi = ((size_t)(iop_v_w - u_w.data.ptr));
+ if (a_src) {
+ a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
+ }
+ wuffs_base__status t_2 = wuffs_zlib__decoder__transform_io(&self->private_data.f_zlib, v_w, a_src, wuffs_base__utility__empty_slice_u8());
+ v_zlib_status = t_2;
+ iop_v_w = u_w.data.ptr + u_w.meta.wi;
+ if (a_src) {
+ iop_a_src = a_src->data.ptr + a_src->meta.ri;
+ }
+ }
+ wuffs_base__u32__sat_sub_indirect(&self->private_impl.f_chunk_length, ((uint32_t)((wuffs_base__io__count_since(v_r_mark, ((uint64_t)(iop_a_src - io0_a_src))) & 4294967295))));
+ v_num_written = wuffs_base__io__count_since(v_w_mark, ((uint64_t)(iop_v_w - io0_v_w)));
+ io2_a_src = o_3_io2_a_src;
+ if (a_src) {
+ a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr));
+ }
}
- if (a_src) {
- a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
- }
- wuffs_base__status t_2 = wuffs_zlib__decoder__transform_io(&self->private_data.f_zlib, a_dst, a_src, wuffs_base__utility__empty_slice_u8());
- v_zlib_status = t_2;
- if (a_dst) {
- iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
- }
- if (a_src) {
- iop_a_src = a_src->data.ptr + a_src->meta.ri;
- }
+ v_w = o_2_v_w;
+ iop_v_w = o_2_iop_v_w;
+ io0_v_w = o_2_io0_v_w;
+ io1_v_w = o_2_io1_v_w;
+ io2_v_w = o_2_io2_v_w;
}
- wuffs_base__u32__sat_sub_indirect(&self->private_impl.f_chunk_length, ((uint32_t)((wuffs_base__io__count_since(v_r_mark, ((uint64_t)(iop_a_src - io0_a_src))) & 4294967295))));
- io2_a_src = o_2_io2_a_src;
- if (a_src) {
- a_src->meta.wi = ((size_t)(io2_a_src - a_src->data.ptr));
+ if (v_num_written > 1024) {
+ status = wuffs_base__make_status(wuffs_png__error__internal_error_inconsistent_i_o);
+ goto exit;
+ }
+ self->private_impl.f_ztxt_ri = 0;
+ self->private_impl.f_ztxt_wi = ((uint32_t)(v_num_written));
+ wuffs_base__u64__sat_add_indirect(&self->private_impl.f_ztxt_hist_pos, v_num_written);
+ }
+ while (self->private_impl.f_ztxt_ri < self->private_impl.f_ztxt_wi) {
+ v_c2 = WUFFS_PNG__LATIN_1[self->private_data.f_dst_palette[self->private_impl.f_ztxt_ri]];
+ if (v_c2 == 0) {
+ status = wuffs_base__make_status(wuffs_png__error__bad_text_chunk_not_latin_1);
+ goto exit;
+ } else if (v_c2 <= 127) {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
+ v_c2 = 0;
+ goto label__loop__continue;
+ }
+ self->private_impl.f_ztxt_ri += 1;
+ (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, ((uint8_t)(v_c2))), iop_a_dst += 1);
+ } else {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 1) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4);
+ v_c2 = 0;
+ goto label__loop__continue;
+ }
+ self->private_impl.f_ztxt_ri += 1;
+ (wuffs_base__poke_u16le__no_bounds_check(iop_a_dst, v_c2), iop_a_dst += 2);
}
}
if (wuffs_base__status__is_ok(&v_zlib_status)) {
@@ -40656,13 +40760,32 @@
goto exit;
}
goto ok;
+ } else if (v_zlib_status.repr != wuffs_base__suspension__short_write) {
+ status = v_zlib_status;
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5);
}
- status = v_zlib_status;
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
} else {
status = wuffs_base__make_status(wuffs_png__error__internal_error_inconsistent_chunk_type);
goto exit;
}
+ } else if ((self->private_impl.f_chunk_type == 1951945833) && (self->private_impl.f_metadata_fourcc == 1263947862)) {
+ while (true) {
+ if (self->private_impl.f_chunk_length <= 0) {
+ goto label__loop__break;
+ } else if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(6);
+ goto label__loop__continue;
+ } else if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(7);
+ goto label__loop__continue;
+ }
+ self->private_impl.f_chunk_length -= 1;
+ v_c = wuffs_base__peek_u8be__no_bounds_check(iop_a_src);
+ iop_a_src += 1;
+ (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, v_c), iop_a_dst += 1);
+ }
} else {
while (true) {
if (self->private_impl.f_chunk_length <= 0) {
@@ -40673,7 +40796,7 @@
goto label__loop__break;
} else if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
status = wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(8);
goto label__loop__continue;
}
v_c = wuffs_base__peek_u8be__no_bounds_check(iop_a_src);
@@ -40682,14 +40805,31 @@
iop_a_src += 1;
goto label__loop__break;
}
- if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
- status = wuffs_base__make_status(wuffs_base__suspension__short_write);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5);
- goto label__loop__continue;
+ v_c2 = WUFFS_PNG__LATIN_1[v_c];
+ if (v_c2 == 0) {
+ status = wuffs_base__make_status(wuffs_png__error__bad_text_chunk_not_latin_1);
+ goto exit;
+ } else if (v_c2 <= 127) {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(9);
+ v_c2 = 0;
+ goto label__loop__continue;
+ }
+ self->private_impl.f_chunk_length -= 1;
+ iop_a_src += 1;
+ (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, ((uint8_t)(v_c2))), iop_a_dst += 1);
+ } else {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 1) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(10);
+ v_c2 = 0;
+ goto label__loop__continue;
+ }
+ self->private_impl.f_chunk_length -= 1;
+ iop_a_src += 1;
+ (wuffs_base__poke_u16le__no_bounds_check(iop_a_dst, v_c2), iop_a_dst += 2);
}
- self->private_impl.f_chunk_length -= 1;
- iop_a_src += 1;
- (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, v_c), iop_a_dst += 1);
}
}
}
@@ -40703,7 +40843,7 @@
}
self->private_impl.f_chunk_length -= 2;
{
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(6);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(11);
if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
status = wuffs_base__make_status(wuffs_base__suspension__short_read);
goto suspend;
@@ -40720,7 +40860,7 @@
goto exit;
}
{
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(7);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(12);
if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
status = wuffs_base__make_status(wuffs_base__suspension__short_read);
goto suspend;
@@ -40742,7 +40882,7 @@
}
self->private_impl.f_chunk_length -= 1;
{
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(8);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(13);
if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
status = wuffs_base__make_status(wuffs_base__suspension__short_read);
goto suspend;
@@ -40763,7 +40903,7 @@
}
self->private_impl.f_chunk_length -= 1;
{
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(9);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(14);
if (WUFFS_BASE__UNLIKELY(iop_a_src == io2_a_src)) {
status = wuffs_base__make_status(wuffs_base__suspension__short_read);
goto suspend;
@@ -40783,7 +40923,7 @@
goto exit;
}
self->private_data.s_tell_me_more[0].scratch = 4;
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(10);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(15);
if (self->private_data.s_tell_me_more[0].scratch > ((uint64_t)(io2_a_src - iop_a_src))) {
self->private_data.s_tell_me_more[0].scratch -= ((uint64_t)(io2_a_src - iop_a_src));
iop_a_src = io2_a_src;
@@ -40810,7 +40950,7 @@
suspend:
self->private_impl.p_tell_me_more[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
self->private_impl.active_coroutine = wuffs_base__status__is_suspension(&status) ? 4 : 0;
- self->private_data.s_tell_me_more[0].v_c = v_c;
+ self->private_data.s_tell_me_more[0].v_zlib_status = v_zlib_status;
goto exit;
exit:
diff --git a/script/print-png-latin-1.go b/script/print-png-latin-1.go
new file mode 100644
index 0000000..34fc86f
--- /dev/null
+++ b/script/print-png-latin-1.go
@@ -0,0 +1,57 @@
+// Copyright 2021 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build ignore
+// +build ignore
+
+package main
+
+// print-png-latin-1.go prints the UTF-8 encoding of the std/png Latin-1 table.
+//
+// Usage: go run print-png-latin-1.go
+
+import (
+ "fmt"
+ "os"
+)
+
+func main() {
+ if err := main1(); err != nil {
+ os.Stderr.WriteString(err.Error() + "\n")
+ os.Exit(1)
+ }
+}
+
+func main1() error {
+ // The PNG spec (https://www.w3.org/TR/PNG/) says "*printable* [emphasis
+ // added] Latin-1 characters and spaces (only character codes 32-126 and
+ // 161-255 decimal are allowed)".
+ //
+ // See also https://www.w3.org/TR/2003/REC-PNG-20031110/iso_8859-1.txt
+ for r := rune(0); r <= 0xFF; r++ {
+ if (r < 32) || ((126 < r) && (r < 161)) {
+ fmt.Printf(" 0x0000,")
+ } else if r < 128 {
+ fmt.Printf(" 0x%04X,", r)
+ } else {
+ s := fmt.Sprintf("%c", r)
+ fmt.Printf(" 0x%02X%02X,", s[1], s[0]) // UTF-8 as little-endian uint16.
+ }
+
+ if r%8 == 7 {
+ fmt.Println()
+ }
+ }
+ return nil
+}
diff --git a/std/png/common_consts.wuffs b/std/png/common_consts.wuffs
index d85a6d8..2c4e419 100644
--- a/std/png/common_consts.wuffs
+++ b/std/png/common_consts.wuffs
@@ -17,10 +17,12 @@
pub status "#bad chunk"
pub status "#bad filter"
pub status "#bad header"
+pub status "#bad text chunk (not Latin-1)"
pub status "#missing palette"
pub status "#unsupported PNG compression method"
pub status "#unsupported PNG file"
+pri status "#internal error: inconsistent I/O"
pri status "#internal error: inconsistent chunk type"
pri status "#internal error: inconsistent frame bounds"
pri status "#internal error: inconsistent workbuf length"
@@ -101,3 +103,40 @@
4, // color_type == 6: RGBA
0,
]
+
+// LATIN_1 converts from printable Latin-1 (ISO/IEC 8859-1) to UTF-8 (encoded
+// as little-endian u16). It was created by script/print-png-latin-1.go.
+pri const LATIN_1 : array[256] base.u16 = [
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0xA1C2, 0xA2C2, 0xA3C2, 0xA4C2, 0xA5C2, 0xA6C2, 0xA7C2,
+ 0xA8C2, 0xA9C2, 0xAAC2, 0xABC2, 0xACC2, 0xADC2, 0xAEC2, 0xAFC2,
+ 0xB0C2, 0xB1C2, 0xB2C2, 0xB3C2, 0xB4C2, 0xB5C2, 0xB6C2, 0xB7C2,
+ 0xB8C2, 0xB9C2, 0xBAC2, 0xBBC2, 0xBCC2, 0xBDC2, 0xBEC2, 0xBFC2,
+ 0x80C3, 0x81C3, 0x82C3, 0x83C3, 0x84C3, 0x85C3, 0x86C3, 0x87C3,
+ 0x88C3, 0x89C3, 0x8AC3, 0x8BC3, 0x8CC3, 0x8DC3, 0x8EC3, 0x8FC3,
+ 0x90C3, 0x91C3, 0x92C3, 0x93C3, 0x94C3, 0x95C3, 0x96C3, 0x97C3,
+ 0x98C3, 0x99C3, 0x9AC3, 0x9BC3, 0x9CC3, 0x9DC3, 0x9EC3, 0x9FC3,
+ 0xA0C3, 0xA1C3, 0xA2C3, 0xA3C3, 0xA4C3, 0xA5C3, 0xA6C3, 0xA7C3,
+ 0xA8C3, 0xA9C3, 0xAAC3, 0xABC3, 0xACC3, 0xADC3, 0xAEC3, 0xAFC3,
+ 0xB0C3, 0xB1C3, 0xB2C3, 0xB3C3, 0xB4C3, 0xB5C3, 0xB6C3, 0xB7C3,
+ 0xB8C3, 0xB9C3, 0xBAC3, 0xBBC3, 0xBCC3, 0xBDC3, 0xBEC3, 0xBFC3,
+]
diff --git a/std/png/decode_png.wuffs b/std/png/decode_png.wuffs
index f3a37dd..54e17e7 100644
--- a/std/png/decode_png.wuffs
+++ b/std/png/decode_png.wuffs
@@ -166,12 +166,25 @@
metadata_y : base.u64,
metadata_z : base.u64,
+ // ztxt_ri and ztxt_wi are read and write indexes into the dst_palette
+ // buffer, re-purposed as a zlib uncompression buffer for zTXt chunks. The
+ // upper bound, 1024, is the same as the dst_palette length.
+ ztxt_ri : base.u32[..= 1024],
+ ztxt_wi : base.u32[..= 1024],
+ // ztxt_hist_pos is the history position: how many uncompressed bytes have
+ // been generated.
+ ztxt_hist_pos : base.u64,
+
swizzler : base.pixel_swizzler,
util : base.utility,
)(
crc32 : crc32.ieee_hasher,
zlib : zlib.decoder,
+ // dst_palette and src_palette are used by the swizzler, during
+ // decode_frame. src_palette is initialized by processing the PLTE chunk.
+ // dst_palette is also re-purposed as a zlib uncompression buffer for zTXt
+ // chunks, during decode_image_config.
dst_palette : array[4 * 256] base.u8,
src_palette : array[4 * 256] base.u8,
)
@@ -1324,6 +1337,10 @@
pub func decoder.tell_me_more?(dst: base.io_writer, minfo: nptr base.more_information, src: base.io_reader) {
var c : base.u8
+ var c2 : base.u16
+ var w : base.io_writer
+ var num_written : base.u64
+ var w_mark : base.u64
var r_mark : base.u64
var zlib_status : base.status
@@ -1342,6 +1359,7 @@
}
}
this.zlib_is_dirty = true
+ this.ztxt_hist_pos = 0
}
while.loop true {
@@ -1395,28 +1413,87 @@
yield? zlib_status
} else if this.chunk_type == 'zTXt'le {
- // TODO: Convert uncompressed data from Latin-1 to UTF-8.
- io_limit (io: args.src, limit: this.chunk_length as base.u64) {
- r_mark = args.src.mark()
- zlib_status =? this.zlib.transform_io?(
- dst: args.dst, src: args.src, workbuf: this.util.empty_slice_u8())
- this.chunk_length ~sat-=
- (args.src.count_since(mark: r_mark) & 0xFFFF_FFFF) as base.u32
+ // Fill this.dst_palette, zlib-uncompressing producing Latin-1.
+ if this.ztxt_ri == this.ztxt_wi {
+ io_bind (io: w, data: this.dst_palette[..], history_position: this.ztxt_hist_pos) {
+ io_limit (io: args.src, limit: this.chunk_length as base.u64) {
+ w_mark = w.mark()
+ r_mark = args.src.mark()
+ zlib_status =? this.zlib.transform_io?(
+ dst: w, src: args.src, workbuf: this.util.empty_slice_u8())
+ this.chunk_length ~sat-=
+ (args.src.count_since(mark: r_mark) & 0xFFFF_FFFF) as base.u32
+ num_written = w.count_since(mark: w_mark)
+ }
+ }
+ if num_written > 1024 {
+ return "#internal error: inconsistent I/O"
+ }
+ this.ztxt_ri = 0
+ this.ztxt_wi = num_written as base.u32
+ this.ztxt_hist_pos ~sat+= num_written
}
+ // Drain this.dst_palette, converting from Latin-1 to UTF-8.
+ while this.ztxt_ri < this.ztxt_wi {
+ assert this.ztxt_ri < 1024 via "a < b: a < c; c <= b"(c: this.ztxt_wi)
+ c2 = LATIN_1[this.dst_palette[this.ztxt_ri]]
+ if c2 == 0 {
+ return "#bad text chunk (not Latin-1)"
+ } else if c2 <= 0x7F {
+ if args.dst.length() <= 0 {
+ yield? base."$short write"
+ c2 = 0
+ continue.loop
+ }
+ this.ztxt_ri += 1
+ args.dst.write_u8_fast!(a: c2 as base.u8)
+ } else {
+ if args.dst.length() <= 1 {
+ yield? base."$short write"
+ c2 = 0
+ continue.loop
+ }
+ this.ztxt_ri += 1
+ args.dst.write_u16le_fast!(a: c2)
+ }
+ } endwhile
+
if zlib_status.is_ok() {
this.metadata_is_zlib_compressed = false
break.loop
} else if not zlib_status.is_suspension() {
return zlib_status
+ } else if zlib_status <> base."$short write" {
+ yield? zlib_status
}
- yield? zlib_status
} else {
return "#internal error: inconsistent chunk type"
}
+ } else if (this.chunk_type == 'iTXt'le) and (this.metadata_fourcc == 'KVPV'be) {
+ // iTXt value is UTF-8.
+ //
+ // TODO: verify data is UTF-8.
+ while true {
+ if this.chunk_length <= 0 {
+ break.loop
+ } else if args.src.length() <= 0 {
+ yield? base."$short read"
+ continue.loop
+ } else if args.dst.length() <= 0 {
+ yield? base."$short write"
+ continue.loop
+ }
+ this.chunk_length -= 1
+ c = args.src.peek_u8()
+ args.src.skip_u32_fast!(actual: 1, worst_case: 1)
+ args.dst.write_u8_fast!(a: c)
+ } endwhile
+
} else {
+ // Other uncompressed keys and values are Latin-1.
while true {
if this.chunk_length <= 0 {
// Keys are NUL-terminated but values are not.
@@ -1434,14 +1511,28 @@
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
break.loop
}
- // TODO: Convert from Latin-1 to UTF-8.
- if args.dst.length() <= 0 {
- yield? base."$short write"
- continue.loop
+ c2 = LATIN_1[c]
+ if c2 == 0 {
+ return "#bad text chunk (not Latin-1)"
+ } else if c2 <= 0x7F {
+ if args.dst.length() <= 0 {
+ yield? base."$short write"
+ c2 = 0
+ continue.loop
+ }
+ this.chunk_length -= 1
+ args.src.skip_u32_fast!(actual: 1, worst_case: 1)
+ args.dst.write_u8_fast!(a: c2 as base.u8)
+ } else {
+ if args.dst.length() <= 1 {
+ yield? base."$short write"
+ c2 = 0
+ continue.loop
+ }
+ this.chunk_length -= 1
+ args.src.skip_u32_fast!(actual: 1, worst_case: 1)
+ args.dst.write_u16le_fast!(a: c2)
}
- this.chunk_length -= 1
- args.src.skip_u32_fast!(actual: 1, worst_case: 1)
- args.dst.write_u8_fast!(a: c)
} endwhile
}
} endwhile.loop
diff --git a/test/c/std/png.c b/test/c/std/png.c
index c291fee..102e3eb 100644
--- a/test/c/std/png.c
+++ b/test/c/std/png.c
@@ -735,17 +735,16 @@
CHECK_STRING(read_file(&src, "test/data/artificial-png/key-value-pairs.png"));
const char* wants[] = {
- // TODO: the callee should produce UTF-8, not Latin-1.
- "Key", //
- "English", //
- "Cl\xE9", //
- "Fran\xE7\x61is", //
- "zl\xEF\x62K", //
- "zl\xEF\x62V", //
- "U-Key", //
- "U-значение", //
- "Z-K\xEBy", //
- "Z-значение", //
+ "Key", //
+ "English", //
+ "Clé", //
+ "Français", //
+ "zlïbK", //
+ "zlïbV", //
+ "U-Key", //
+ "U-значение", //
+ "Z-Këy", //
+ "Z-значение", //
};
wuffs_png__decoder dec;