Add std/json quirk_allow_comment_etc
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 9aadd1a..fe922b0 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -6022,6 +6022,7 @@
uint32_t p_decode_tokens[1];
uint32_t p_decode_leading[1];
+ uint32_t p_decode_comment[1];
uint32_t p_decode_trailing_new_line[1];
} private_impl;
@@ -6033,6 +6034,9 @@
uint32_t v_expect;
uint32_t v_expect_after_value;
} s_decode_tokens[1];
+ struct {
+ uint32_t v_link_prev;
+ } s_decode_comment[1];
} private_data;
#ifdef __cplusplus
@@ -20367,6 +20371,11 @@
wuffs_base__io_buffer* a_src);
static wuffs_base__status //
+wuffs_json__decoder__decode_comment(wuffs_json__decoder* self,
+ wuffs_base__token_buffer* a_dst,
+ wuffs_base__io_buffer* a_src);
+
+static wuffs_base__status //
wuffs_json__decoder__decode_trailing_new_line(wuffs_json__decoder* self,
wuffs_base__token_buffer* a_dst,
wuffs_base__io_buffer* a_src);
@@ -21549,6 +21558,28 @@
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(20);
goto label__outer__continue;
}
+ } else if (v_class == 12) {
+ if (self->private_impl.f_quirk_enabled_allow_comment_block ||
+ self->private_impl.f_quirk_enabled_allow_comment_line) {
+ if (a_dst) {
+ a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
+ }
+ if (a_src) {
+ a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
+ }
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(21);
+ status = wuffs_json__decoder__decode_comment(self, a_dst, a_src);
+ if (a_dst) {
+ iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
+ }
+ if (a_src) {
+ iop_a_src = a_src->data.ptr + a_src->meta.ri;
+ }
+ if (status.repr) {
+ goto suspend;
+ }
+ goto label__outer__continue;
+ }
}
status = wuffs_base__make_status(wuffs_json__error__bad_input);
goto exit;
@@ -21567,7 +21598,7 @@
if (a_src) {
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
}
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(21);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(22);
status =
wuffs_json__decoder__decode_trailing_new_line(self, a_dst, a_src);
if (a_dst) {
@@ -21913,6 +21944,209 @@
return status;
}
+// -------- func json.decoder.decode_comment
+
+static wuffs_base__status //
+wuffs_json__decoder__decode_comment(wuffs_json__decoder* self,
+ wuffs_base__token_buffer* a_dst,
+ wuffs_base__io_buffer* a_src) {
+ wuffs_base__status status = wuffs_base__make_status(NULL);
+
+ uint8_t v_c8 = 0;
+ uint16_t v_c16 = 0;
+ uint32_t v_link_prev = 0;
+ uint32_t v_length = 0;
+
+ wuffs_base__token* iop_a_dst = NULL;
+ wuffs_base__token* io0_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ wuffs_base__token* io1_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ wuffs_base__token* io2_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ if (a_dst) {
+ io0_a_dst = a_dst->data.ptr;
+ io1_a_dst = io0_a_dst + a_dst->meta.wi;
+ iop_a_dst = io1_a_dst;
+ io2_a_dst = io0_a_dst + a_dst->data.len;
+ if (a_dst->meta.closed) {
+ io2_a_dst = iop_a_dst;
+ }
+ }
+ uint8_t* iop_a_src = NULL;
+ uint8_t* io0_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint8_t* io1_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint8_t* io2_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ if (a_src) {
+ io0_a_src = a_src->data.ptr;
+ io1_a_src = io0_a_src + a_src->meta.ri;
+ iop_a_src = io1_a_src;
+ io2_a_src = io0_a_src + a_src->meta.wi;
+ }
+
+ uint32_t coro_susp_point = self->private_impl.p_decode_comment[0];
+ if (coro_susp_point) {
+ v_link_prev = self->private_data.s_decode_comment[0].v_link_prev;
+ }
+ switch (coro_susp_point) {
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
+
+ label__0__continue:;
+ while ((((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) ||
+ (((uint64_t)(io2_a_src - iop_a_src)) <= 1)) {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(1);
+ goto label__0__continue;
+ }
+ if (a_src && a_src->meta.closed) {
+ status = wuffs_base__make_status(wuffs_json__error__bad_input);
+ goto exit;
+ }
+ status = wuffs_base__make_status(wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(2);
+ }
+ v_c16 = wuffs_base__load_u16le__no_bounds_check(iop_a_src);
+ if ((v_c16 == 10799) &&
+ self->private_impl.f_quirk_enabled_allow_comment_block) {
+ (iop_a_src += 2, wuffs_base__make_empty_struct());
+ v_length = 2;
+ label__comment_block__continue:;
+ while (true) {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
+ v_length = 0;
+ goto label__comment_block__continue;
+ }
+ while (true) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) <= 1) {
+ if (v_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(2)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)((1 | v_link_prev)))
+ << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_length)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_link_prev = 2;
+ }
+ if (a_src && a_src->meta.closed) {
+ status = wuffs_base__make_status(wuffs_json__error__bad_input);
+ goto exit;
+ }
+ status =
+ wuffs_base__make_status(wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4);
+ v_length = 0;
+ goto label__comment_block__continue;
+ }
+ v_c16 = wuffs_base__load_u16le__no_bounds_check(iop_a_src);
+ if (v_c16 == 12074) {
+ (iop_a_src += 2, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(2)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(v_link_prev)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_length + 2)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ status = wuffs_base__make_status(NULL);
+ goto ok;
+ }
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ if (v_length >= 65533) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(2)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)((1 | v_link_prev)))
+ << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_length + 1)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_length = 0;
+ v_link_prev = 2;
+ goto label__comment_block__continue;
+ }
+ v_length += 1;
+ }
+ }
+ } else if ((v_c16 == 12079) &&
+ self->private_impl.f_quirk_enabled_allow_comment_line) {
+ (iop_a_src += 2, wuffs_base__make_empty_struct());
+ v_length = 2;
+ label__comment_line__continue:;
+ while (true) {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ status = wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5);
+ v_length = 0;
+ goto label__comment_line__continue;
+ }
+ while (true) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
+ if (v_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(1)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)((1 | v_link_prev)))
+ << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_length)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_link_prev = 2;
+ }
+ if (a_src && a_src->meta.closed) {
+ status = wuffs_base__make_status(wuffs_json__error__bad_input);
+ goto exit;
+ }
+ status =
+ wuffs_base__make_status(wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(6);
+ v_length = 0;
+ goto label__comment_line__continue;
+ }
+ v_c8 = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
+ if (v_c8 == 10) {
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(1)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(v_link_prev)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_length + 1)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ status = wuffs_base__make_status(NULL);
+ goto ok;
+ }
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ if (v_length >= 65533) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(1)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)((1 | v_link_prev)))
+ << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_length + 1)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_length = 0;
+ v_link_prev = 2;
+ goto label__comment_line__continue;
+ }
+ v_length += 1;
+ }
+ }
+ }
+ status = wuffs_base__make_status(wuffs_json__error__bad_input);
+ goto exit;
+ goto ok;
+ ok:
+ self->private_impl.p_decode_comment[0] = 0;
+ goto exit;
+ }
+
+ goto suspend;
+suspend:
+ self->private_impl.p_decode_comment[0] =
+ wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
+ self->private_data.s_decode_comment[0].v_link_prev = v_link_prev;
+
+ goto exit;
+exit:
+ if (a_dst) {
+ a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
+ }
+ if (a_src) {
+ a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
+ }
+
+ return status;
+}
+
// -------- func json.decoder.decode_trailing_new_line
static wuffs_base__status //
diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index e297898..176fd09 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs
@@ -1070,6 +1070,12 @@
yield? base."$short read"
continue.outer
}
+
+ } else if class == 0x0C { // 0x0C is CLASS_COMMENT.
+ if this.quirk_enabled_allow_comment_block or this.quirk_enabled_allow_comment_line {
+ this.decode_comment?(dst: args.dst, src: args.src)
+ continue.outer
+ }
}
return "#bad input"
@@ -1310,6 +1316,145 @@
}
}
+pri func decoder.decode_comment?(dst: base.token_writer, src: base.io_reader) {
+ var c8 : base.u8
+ var c16 : base.u16
+ var link_prev : base.u32[..= 0x2]
+ var length : base.u32[..= 0xFFFD]
+
+ while (args.dst.available() <= 0) or (args.src.available() <= 1),
+ post args.dst.available() > 0,
+ post args.src.available() > 1,
+ {
+ if args.dst.available() <= 0 {
+ yield? base."$short write"
+ continue
+ }
+ if args.src.is_closed() {
+ return "#bad input"
+ }
+ yield? base."$short read"
+ }
+ c16 = args.src.peek_u16le()
+
+ if (c16 == 0x2A2F) and this.quirk_enabled_allow_comment_block {
+ args.src.skip32_fast!(actual: 2, worst_case: 2)
+ length = 2
+
+ while.comment_block true {
+ if args.dst.available() <= 0 {
+ yield? base."$short write"
+ length = 0
+ continue.comment_block
+ }
+
+ while true,
+ pre args.dst.available() > 0,
+ {
+ if args.src.available() <= 1 {
+ if length > 0 {
+ args.dst.write_fast_token!(
+ value_major: 0,
+ value_minor: 0x2,
+ link: 0x1 | link_prev,
+ length: length)
+ link_prev = 0x2
+ }
+ if args.src.is_closed() {
+ return "#bad input"
+ }
+ yield? base."$short read"
+ length = 0
+ continue.comment_block
+ }
+
+ c16 = args.src.peek_u16le()
+ if c16 == 0x2F2A { // 0x2F2A is "*/" little-endian.
+ args.src.skip32_fast!(actual: 2, worst_case: 2)
+ args.dst.write_fast_token!(
+ value_major: 0,
+ value_minor: 0x2,
+ link: link_prev,
+ length: length + 2)
+ return ok
+ }
+
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ if length >= 0xFFFD {
+ args.dst.write_fast_token!(
+ value_major: 0,
+ value_minor: 0x2,
+ link: 0x1 | link_prev,
+ length: length + 1)
+ length = 0
+ link_prev = 0x2
+ continue.comment_block
+ }
+ length += 1
+ }
+ } endwhile.comment_block
+
+ } else if (c16 == 0x2F2F) and this.quirk_enabled_allow_comment_line {
+ args.src.skip32_fast!(actual: 2, worst_case: 2)
+ length = 2
+
+ while.comment_line true {
+ if args.dst.available() <= 0 {
+ yield? base."$short write"
+ length = 0
+ continue.comment_line
+ }
+
+ while true,
+ pre args.dst.available() > 0,
+ {
+ if args.src.available() <= 0 {
+ if length > 0 {
+ args.dst.write_fast_token!(
+ value_major: 0,
+ value_minor: 0x1,
+ link: 0x1 | link_prev,
+ length: length)
+ link_prev = 0x2
+ }
+ if args.src.is_closed() {
+ return "#bad input"
+ }
+ yield? base."$short read"
+ length = 0
+ continue.comment_line
+ }
+
+ c8 = args.src.peek_u8()
+ if c8 == 0x0A { // 0x0A is '\n'.
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ args.dst.write_fast_token!(
+ value_major: 0,
+ value_minor: 0x1,
+ link: link_prev,
+ length: length + 1)
+ return ok
+ }
+
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ if length >= 0xFFFD {
+ args.dst.write_fast_token!(
+ value_major: 0,
+ value_minor: 0x1,
+ link: 0x1 | link_prev,
+ length: length + 1)
+ length = 0
+ link_prev = 0x2
+ continue.comment_line
+ }
+ length += 1
+ }
+ } endwhile.comment_line
+ }
+
+ return "#bad input"
+}
+
pri func decoder.decode_trailing_new_line?(dst: base.token_writer, src: base.io_reader) {
var c : base.u8
var whitespace_length : base.u32[..= 0xFFFE]
diff --git a/std/json/decode_quirks.wuffs b/std/json/decode_quirks.wuffs
index 4daf310..c2bda1a 100644
--- a/std/json/decode_quirks.wuffs
+++ b/std/json/decode_quirks.wuffs
@@ -68,18 +68,23 @@
// anywhere whitespace would be, although see the quirk_allow_trailing_new_line
// comment for additional interaction when combining multiple quirks.
//
-// They produce WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_BLOCK tokens.
+// They produce WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_BLOCK tokens. The token
+// chain's source bytes includes the starting "/*" and the ending "*/".
pub const quirk_allow_comment_block base.u32 = 0x4909_9400 | 0x08
// When this quirk is enabled, "// C/C++ style line comments\n" are accepted
// anywhere whitespace would be, although see the quirk_allow_trailing_new_line
// comment for additional interaction when combining multiple quirks.
//
-// A line comment may omit the trailing '\n' if there is no input afterwards
-// (i.e. the line comment ends with the end-of-file), but note the
-// quirk_allow_trailing_new_line interaction already mentioned.
+// A line comment may not omit the ending "\n", even if there is no input
+// afterwards (i.e. the prospective line comment ends with the end-of-file).
//
-// They produce WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_LINE tokens.
+// They produce WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_LINE tokens. The token
+// chain's source bytes includes the starting "//" and the ending "\n".
+//
+// Even if the line comments are on consecutive lines, each line comment is a
+// separate token chain. There may be whitespace tokens between one line
+// comment's ending "\n" and the next one's starting "//".
pub const quirk_allow_comment_line base.u32 = 0x4909_9400 | 0x09
// When this quirk is enabled, there may be a comma after the final array
diff --git a/test/c/std/json.c b/test/c/std/json.c
index 2d9cb1e..e345344 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c
@@ -1330,6 +1330,78 @@
}
const char* //
+test_wuffs_json_decode_quirk_allow_comment_etc() {
+ CHECK_FOCUS(__func__);
+
+ struct {
+ // want has 4 bytes, one for each possible q:
+ // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK.
+ // - q&2 sets WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE.
+ // An 'X', '+' or '-' means that decoding should succeed (and consume the
+ // entire input), succeed (without consuming the entire input) or fail.
+ const char* want;
+ const char* str;
+ } test_cases[] = {
+ {.want = "-X-X", .str = "[ /*com*/ 0]"},
+ {.want = "--XX", .str = "//l\n //m\n0"},
+ {.want = "---X", .str = "[ 0, /*com*/ 1 //l\n\n]"},
+ {.want = "----", .str = "/*/0"}, // Not a valid slash-star comment.
+ {.want = "----", .str = "[4/5]"}, // Lone slash.
+ };
+
+ int tc;
+ for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
+ int q;
+ for (q = 0; q < 4; q++) {
+ wuffs_json__decoder dec;
+ CHECK_STATUS("initialize", wuffs_json__decoder__initialize(
+ &dec, sizeof dec, WUFFS_VERSION,
+ WUFFS_INITIALIZE__DEFAULT_OPTIONS));
+ wuffs_json__decoder__set_quirk_enabled(
+ &dec, WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK, q & 1);
+ wuffs_json__decoder__set_quirk_enabled(
+ &dec, WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, q & 2);
+
+ wuffs_base__token_buffer tok =
+ wuffs_base__make_token_buffer_writer(global_have_token_slice);
+ wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader(
+ wuffs_base__make_slice_u8((void*)(test_cases[tc].str),
+ strlen(test_cases[tc].str)),
+ true);
+ const char* have =
+ wuffs_json__decoder__decode_tokens(&dec, &tok, &src).repr;
+ const char* want =
+ (test_cases[tc].want[q] != '-') ? NULL : wuffs_json__error__bad_input;
+ if (have != want) {
+ RETURN_FAIL("tc=%d, q=%d: decode_tokens: have \"%s\", want \"%s\"", tc,
+ q, have, want);
+ }
+
+ size_t total_length = 0;
+ while (tok.meta.ri < tok.meta.wi) {
+ total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]);
+ }
+ if (total_length != src.meta.ri) {
+ RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q,
+ total_length, src.meta.ri);
+ }
+ if (test_cases[tc].want[q] == 'X') {
+ if (total_length != src.data.len) {
+ RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q,
+ total_length, src.data.len);
+ }
+ } else if (test_cases[tc].want[q] == '+') {
+ if (total_length >= src.data.len) {
+ RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want < %zu", tc, q,
+ total_length, src.data.len);
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+const char* //
test_wuffs_json_decode_quirk_allow_leading_etc() {
CHECK_FOCUS(__func__);
@@ -1973,6 +2045,7 @@
test_wuffs_json_decode_prior_valid_utf_8,
test_wuffs_json_decode_quirk_allow_backslash_etc,
test_wuffs_json_decode_quirk_allow_backslash_x,
+ test_wuffs_json_decode_quirk_allow_comment_etc,
test_wuffs_json_decode_quirk_allow_leading_etc,
test_wuffs_json_decode_quirk_allow_trailing_etc,
test_wuffs_json_decode_quirk_replace_invalid_utf_8,