Tweak JSON trailing comment/new-line quirks
diff --git a/example/jsonptr/jsonptr.cc b/example/jsonptr/jsonptr.cc
index 0ee259b..bd0c172 100644
--- a/example/jsonptr/jsonptr.cc
+++ b/example/jsonptr/jsonptr.cc
@@ -470,6 +470,7 @@
in_dict_after_brace,
in_dict_after_key,
in_dict_after_value,
+ end_of_data,
} g_ctx;
bool //
@@ -934,7 +935,6 @@
if (g_flags.input_allow_comments) {
g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK, true);
g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, true);
- g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT, true);
}
if (g_flags.input_allow_extra_comma) {
g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA, true);
@@ -943,11 +943,11 @@
g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS, true);
}
- // Consume an optional whitespace trailer. This isn't part of the JSON spec,
- // but it works better with line oriented Unix tools (such as "echo 123 |
- // jsonptr" where it's "echo", not "echo -n") or hand-edited JSON files which
- // can accidentally contain trailing whitespace.
- g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, true);
+ // Consume any optional trailing whitespace and comments. This isn't part of
+ // the JSON spec, but it works better with line oriented Unix tools (such as
+ // "echo 123 | jsonptr" where it's "echo", not "echo -n") or hand-edited JSON
+ // files which can accidentally contain trailing whitespace.
+ g_dec.set_quirk_enabled(WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER, true);
return nullptr;
}
@@ -1301,7 +1301,8 @@
if (g_ctx == context::in_dict_after_key) {
TRY(write_dst(":", 1));
} else if ((g_ctx != context::in_list_after_bracket) &&
- (g_ctx != context::in_dict_after_brace)) {
+ (g_ctx != context::in_dict_after_brace) &&
+ (g_ctx != context::end_of_data)) {
TRY(write_dst(",", 1));
}
if (!g_flags.compact_output) {
@@ -1328,14 +1329,29 @@
start_of_token_chain = !t.continued();
if (z == nullptr) {
continue;
- } else if (z == g_eod) {
- goto end_of_data;
+ } else if (z != g_eod) {
+ return z;
+ } else if (g_flags.query_c_string && *g_flags.query_c_string) {
+ // With a non-empty g_query, don't try to consume trailing filler or
+ // confirm that we've processed all the tokens.
+ return nullptr;
}
- return z;
+ g_ctx = context::end_of_data;
}
if (status.repr == nullptr) {
- return "main: internal error: unexpected end of token stream";
+ if (g_ctx != context::end_of_data) {
+ return "main: internal error: unexpected end of token stream";
+ }
+ // Check that we've exhausted the input.
+ if ((g_src.meta.ri == g_src.meta.wi) && !g_src.meta.closed) {
+ TRY(read_src());
+ }
+ if ((g_src.meta.ri < g_src.meta.wi) || !g_src.meta.closed) {
+ return "main: valid JSON followed by further (unexpected) data";
+ }
+ // All done.
+ return nullptr;
} else if (status.repr == wuffs_base__suspension__short_read) {
if (g_cursor_index != g_src.meta.ri) {
return "main: internal error: inconsistent g_src indexes";
@@ -1348,34 +1364,6 @@
return status.message();
}
}
-end_of_data:
-
- // With a non-empty g_query, don't try to consume trailing whitespace or
- // confirm that we've processed all the tokens.
- if (g_flags.query_c_string && *g_flags.query_c_string) {
- return nullptr;
- }
-
- // Check that we've exhausted the input.
- if ((g_src.meta.ri == g_src.meta.wi) && !g_src.meta.closed) {
- TRY(read_src());
- }
- if ((g_src.meta.ri < g_src.meta.wi) || !g_src.meta.closed) {
- return "main: valid JSON followed by further (unexpected) data";
- }
-
- // Check that we've used all of the decoded tokens, other than trailing
- // filler tokens. For example, "true\n" is valid JSON (and fully consumed
- // with WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE enabled) with a trailing
- // filler token for the "\n".
- for (; g_tok.meta.ri < g_tok.meta.wi; g_tok.meta.ri++) {
- if (g_tok.data.ptr[g_tok.meta.ri].value_base_category() !=
- WUFFS_BASE__TOKEN__VBC__FILLER) {
- return "main: internal error: decoded OK but unprocessed tokens remain";
- }
- }
-
- return nullptr;
}
int //
@@ -1441,7 +1429,7 @@
const char* z = main1(argc, argv);
if (g_wrote_to_dst) {
- const char* z1 = write_dst("\n", 1);
+ const char* z1 = g_is_after_comment ? nullptr : write_dst("\n", 1);
const char* z2 = flush_dst();
z = z ? z : (z1 ? z1 : z2);
}
diff --git a/fuzz/c/std/json_fuzzer.c b/fuzz/c/std/json_fuzzer.c
index 0a83aa5..2e1fa2c 100644
--- a/fuzz/c/std/json_fuzzer.c
+++ b/fuzz/c/std/json_fuzzer.c
@@ -240,8 +240,7 @@
WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
- WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT,
- WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE,
+ WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
0,
};
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 3f4ad79..855abda 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -7661,8 +7661,12 @@
#define WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK 1225364496
+#define WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER 1225364497
+
#define WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT 1225364497
+#define WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF 1225364498
+
#define WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE 1225364498
#define WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_R_TILDE_N 1225364499
@@ -7760,15 +7764,15 @@
bool f_quirks[21];
bool f_allow_leading_ars;
bool f_allow_leading_ubom;
- bool f_allow_trailing_comment;
bool f_end_of_data;
+ uint8_t f_trailer_stop;
uint8_t f_comment_type;
uint32_t p_decode_tokens[1];
uint32_t p_decode_leading[1];
uint32_t p_decode_comment[1];
uint32_t p_decode_inf_nan[1];
- uint32_t p_decode_trailing_new_line[1];
+ uint32_t p_decode_trailer[1];
} private_impl;
struct {
@@ -25737,7 +25741,7 @@
wuffs_base__io_buffer* a_src);
static wuffs_base__status
-wuffs_json__decoder__decode_trailing_new_line(
+wuffs_json__decoder__decode_trailer(
wuffs_json__decoder* self,
wuffs_base__token_buffer* a_dst,
wuffs_base__io_buffer* a_src);
@@ -25960,6 +25964,12 @@
status = wuffs_base__make_status(wuffs_base__note__end_of_data);
goto ok;
}
+ if (self->private_impl.f_quirks[18]) {
+ if (self->private_impl.f_quirks[11] || self->private_impl.f_quirks[12] || self->private_impl.f_quirks[17]) {
+ status = wuffs_base__make_status(wuffs_json__error__bad_quirk_combination);
+ goto exit;
+ }
+ }
if (self->private_impl.f_quirks[15] || self->private_impl.f_quirks[16]) {
if (a_dst) {
a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
@@ -26832,7 +26842,9 @@
if (status.repr) {
goto suspend;
}
- goto label__outer__continue;
+ if (self->private_impl.f_comment_type > 0) {
+ goto label__outer__continue;
+ }
}
}
status = wuffs_base__make_status(wuffs_json__error__bad_input);
@@ -26845,7 +26857,7 @@
v_expect = v_expect_after_value;
}
label__outer__break:;
- if (self->private_impl.f_quirks[18]) {
+ if (self->private_impl.f_quirks[17] || self->private_impl.f_quirks[18]) {
if (a_dst) {
a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
}
@@ -26853,7 +26865,7 @@
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
}
WUFFS_BASE__COROUTINE_SUSPENSION_POINT(24);
- status = wuffs_json__decoder__decode_trailing_new_line(self, a_dst, a_src);
+ status = wuffs_json__decoder__decode_trailer(self, a_dst, a_src);
if (a_dst) {
iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
}
@@ -27240,8 +27252,8 @@
goto label__0__continue;
}
if (a_src && a_src->meta.closed) {
- status = wuffs_base__make_status(wuffs_json__error__bad_input);
- goto exit;
+ status = wuffs_base__make_status(NULL);
+ goto ok;
}
status = wuffs_base__make_status(wuffs_base__suspension__short_read);
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(2);
@@ -27348,8 +27360,6 @@
}
}
}
- status = wuffs_base__make_status(wuffs_json__error__bad_input);
- goto exit;
goto ok;
ok:
@@ -27535,10 +27545,10 @@
return status;
}
-// -------- func json.decoder.decode_trailing_new_line
+// -------- func json.decoder.decode_trailer
static wuffs_base__status
-wuffs_json__decoder__decode_trailing_new_line(
+wuffs_json__decoder__decode_trailer(
wuffs_json__decoder* self,
wuffs_base__token_buffer* a_dst,
wuffs_base__io_buffer* a_src) {
@@ -27571,11 +27581,15 @@
io2_a_src = io0_a_src + a_src->meta.wi;
}
- uint32_t coro_susp_point = self->private_impl.p_decode_trailing_new_line[0];
+ uint32_t coro_susp_point = self->private_impl.p_decode_trailer[0];
switch (coro_susp_point) {
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
- self->private_impl.f_allow_trailing_comment = self->private_impl.f_quirks[17];
+ if (self->private_impl.f_quirks[18]) {
+ self->private_impl.f_trailer_stop = 10;
+ } else {
+ self->private_impl.f_trailer_stop = 0;
+ }
label__outer__continue:;
while (true) {
if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
@@ -27608,44 +27622,44 @@
(((uint64_t)(v_whitespace_length)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
v_whitespace_length = 0;
}
- if (self->private_impl.f_allow_trailing_comment) {
- if (a_dst) {
- a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
- }
- if (a_src) {
- a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
- }
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(3);
- status = wuffs_json__decoder__decode_comment(self, a_dst, a_src);
- if (a_dst) {
- iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
- }
- if (a_src) {
- iop_a_src = a_src->data.ptr + a_src->meta.ri;
- }
- if (status.repr) {
- goto suspend;
- }
- v_c = 0;
- v_whitespace_length = 0;
- if (self->private_impl.f_comment_type == 1) {
- self->private_impl.f_allow_trailing_comment = false;
- goto label__outer__continue;
- } else if (self->private_impl.f_comment_type == 2) {
- goto label__outer__break;
- }
+ if (self->private_impl.f_trailer_stop > 0) {
+ status = wuffs_base__make_status(wuffs_json__error__bad_input);
+ goto exit;
}
- status = wuffs_base__make_status(wuffs_json__error__bad_input);
- goto exit;
+ if (a_dst) {
+ a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
+ }
+ if (a_src) {
+ a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
+ }
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(3);
+ status = wuffs_json__decoder__decode_comment(self, a_dst, a_src);
+ if (a_dst) {
+ iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
+ }
+ if (a_src) {
+ iop_a_src = a_src->data.ptr + a_src->meta.ri;
+ }
+ if (status.repr) {
+ goto suspend;
+ }
+ v_c = 0;
+ v_whitespace_length = 0;
+ if (self->private_impl.f_comment_type > 0) {
+ goto label__outer__continue;
+ }
+ status = wuffs_base__make_status(NULL);
+ goto ok;
}
(iop_a_src += 1, wuffs_base__make_empty_struct());
- if ((v_whitespace_length >= 65534) || (v_c == 10)) {
+ if ((v_whitespace_length >= 65534) || (v_c == self->private_impl.f_trailer_stop)) {
*iop_a_dst++ = wuffs_base__make_token(
(((uint64_t)(0)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
(((uint64_t)((v_whitespace_length + 1))) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
v_whitespace_length = 0;
- if (v_c == 10) {
- goto label__outer__break;
+ if (v_c == self->private_impl.f_trailer_stop) {
+ status = wuffs_base__make_status(NULL);
+ goto ok;
}
goto label__outer__continue;
}
@@ -27656,13 +27670,13 @@
goto ok;
ok:
- self->private_impl.p_decode_trailing_new_line[0] = 0;
+ self->private_impl.p_decode_trailer[0] = 0;
goto exit;
}
goto suspend;
suspend:
- self->private_impl.p_decode_trailing_new_line[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
+ self->private_impl.p_decode_trailer[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
goto exit;
exit:
diff --git a/script/print-json-token-debug-format.c b/script/print-json-token-debug-format.c
index 29342c5..b92a3a9 100644
--- a/script/print-json-token-debug-format.c
+++ b/script/print-json-token-debug-format.c
@@ -298,7 +298,7 @@
WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
- WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE,
+ WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
0,
};
diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index df48da1..bd83ae8 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs
@@ -15,14 +15,15 @@
pub struct decoder? implements base.token_decoder(
quirks : array[QUIRKS_COUNT] base.bool,
- allow_leading_ars : base.bool,
- allow_leading_ubom : base.bool,
- allow_trailing_comment : base.bool,
+ allow_leading_ars : base.bool,
+ allow_leading_ubom : base.bool,
end_of_data : base.bool,
+ trailer_stop : base.u8,
+
// comment_type is set as a side-effect of decode_comment?.
- // - 0 means failure.
+ // - 0 means no comment.
// - 1 means a block comment.
// - 2 means a line comment.
comment_type : base.u8,
@@ -123,6 +124,14 @@
return base."@end of data"
}
+ if this.quirks[QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF - QUIRKS_BASE] {
+ if this.quirks[QUIRK_ALLOW_COMMENT_BLOCK - QUIRKS_BASE] or
+ this.quirks[QUIRK_ALLOW_COMMENT_LINE - QUIRKS_BASE] or
+ this.quirks[QUIRK_ALLOW_TRAILING_FILLER - QUIRKS_BASE] {
+ return "#bad quirk combination"
+ }
+ }
+
if this.quirks[QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR - QUIRKS_BASE] or
this.quirks[QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK - QUIRKS_BASE] {
this.decode_leading?(dst: args.dst, src: args.src)
@@ -1176,7 +1185,9 @@
if this.quirks[QUIRK_ALLOW_COMMENT_BLOCK - QUIRKS_BASE] or
this.quirks[QUIRK_ALLOW_COMMENT_LINE - QUIRKS_BASE] {
this.decode_comment?(dst: args.dst, src: args.src)
- continue.outer
+ if this.comment_type > 0 {
+ continue.outer
+ }
}
}
@@ -1191,8 +1202,9 @@
expect = expect_after_value
} endwhile.outer
- if this.quirks[QUIRK_ALLOW_TRAILING_NEW_LINE - QUIRKS_BASE] {
- this.decode_trailing_new_line?(dst: args.dst, src: args.src)
+ if this.quirks[QUIRK_ALLOW_TRAILING_FILLER - QUIRKS_BASE] or
+ this.quirks[QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF - QUIRKS_BASE] {
+ this.decode_trailer?(dst: args.dst, src: args.src)
}
this.end_of_data = true
@@ -1438,7 +1450,7 @@
continue
}
if args.src.is_closed() {
- return "#bad input"
+ return ok
}
yield? base."$short read"
} endwhile
@@ -1562,8 +1574,6 @@
} endwhile
} endwhile.comment_line
}
-
- return "#bad input"
}
pri func decoder.decode_inf_nan?(dst: base.token_writer, src: base.io_reader) {
@@ -1677,11 +1687,15 @@
} endwhile
}
-pri func decoder.decode_trailing_new_line?(dst: base.token_writer, src: base.io_reader) {
+pri func decoder.decode_trailer?(dst: base.token_writer, src: base.io_reader) {
var c : base.u8
var whitespace_length : base.u32[..= 0xFFFE]
- this.allow_trailing_comment = this.quirks[QUIRK_ALLOW_TRAILING_COMMENT - QUIRKS_BASE]
+ if this.quirks[QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF - QUIRKS_BASE] {
+ this.trailer_stop = '\n'
+ } else {
+ this.trailer_stop = 0
+ }
while.outer true {
if args.dst.length() <= 0 {
@@ -1714,27 +1728,25 @@
value_major: 0, value_minor: 0, continued: 0, length: whitespace_length)
whitespace_length = 0
}
- if this.allow_trailing_comment {
- this.decode_comment?(dst: args.dst, src: args.src)
- c = 0
- whitespace_length = 0
- if this.comment_type == 1 { // Block comment.
- this.allow_trailing_comment = false
- continue.outer
- } else if this.comment_type == 2 { // Line comment.
- break.outer
- }
+ if this.trailer_stop > 0 {
+ return "#bad input"
}
- return "#bad input"
+ this.decode_comment?(dst: args.dst, src: args.src)
+ c = 0
+ whitespace_length = 0
+ if this.comment_type > 0 {
+ continue.outer
+ }
+ return ok
}
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
- if (whitespace_length >= 0xFFFE) or (c == '\n') {
+ if (whitespace_length >= 0xFFFE) or (c == this.trailer_stop) {
args.dst.write_simple_token_fast!(
value_major: 0, value_minor: 0, continued: 0, length: whitespace_length + 1)
whitespace_length = 0
- if c == '\n' {
- break.outer
+ if c == this.trailer_stop {
+ return ok
}
continue.outer
}
diff --git a/std/json/decode_quirks.wuffs b/std/json/decode_quirks.wuffs
index 5f31b5b..9970c35 100644
--- a/std/json/decode_quirks.wuffs
+++ b/std/json/decode_quirks.wuffs
@@ -97,16 +97,17 @@
pub const QUIRK_ALLOW_BACKSLASH_ZERO : base.u32 = 0x4909_9400 | 0x0A
// When this quirk is enabled, "/* C/C++ style block comments */" are accepted
-// anywhere whitespace would be, although see the QUIRK_ALLOW_TRAILING_COMMENT
-// comment for additional interaction when combining multiple quirks.
+// anywhere whitespace would be. See also QUIRK_ALLOW_TRAILING_FILLER.
//
// They produce WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_BLOCK tokens. The token
// chain's source bytes includes the starting "/*" and the ending "*/".
+//
+// To avoid ambiguity (as comments can contain new lines), this quirk cannot be
+// combined with QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
pub const QUIRK_ALLOW_COMMENT_BLOCK : base.u32 = 0x4909_9400 | 0x0B
// When this quirk is enabled, "// C/C++ style line comments\n" are accepted
-// anywhere whitespace would be, although see the QUIRK_ALLOW_TRAILING_COMMENT
-// comment for additional interaction when combining multiple quirks.
+// anywhere whitespace would be. See also QUIRK_ALLOW_TRAILING_FILLER.
//
// A line comment may not omit the ending "\n", even if there is no input
// afterwards (i.e. the prospective line comment ends with the end-of-file).
@@ -117,6 +118,9 @@
// Even if the line comments are on consecutive lines, each line comment is a
// separate token chain. There may be whitespace tokens between one line
// comment's ending "\n" and the next one's starting "//".
+//
+// To avoid ambiguity (as comments can contain new lines), this quirk cannot be
+// combined with QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
pub const QUIRK_ALLOW_COMMENT_LINE : base.u32 = 0x4909_9400 | 0x0C
// When this quirk is enabled, there may be a comma after the final array
@@ -140,8 +144,9 @@
// When combined with QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK, either mark
// may come first in the byte stream.
//
-// When combined with QUIRK_ALLOW_TRAILING_NEW_LINE, this format is also known
-// as RFC 7464, Json Text Sequences and MIME type "application/json-seq".
+// When combined with QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF, this format is
+// also known as RFC 7464, Json Text Sequences and MIME type
+// "application/json-seq".
pub const QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR : base.u32 = 0x4909_9400 | 0x0F
// When this quirk is enabled, the input byte stream may optionally start with
@@ -152,40 +157,43 @@
// may come first in the byte stream.
pub const QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK : base.u32 = 0x4909_9400 | 0x10
-// When this quirk is enabled and both:
-// - QUIRK_ALLOW_TRAILING_NEW_LINE is enabled,
-// - at least one of QUIRK_ALLOW_COMMENT_ETC is enabled,
-// the trailing whitespace may optionally contain a single comment. As per
-// QUIRK_ALLOW_TRAILING_NEW_LINE, processing will still stop at the first
-// trailing '\n' (outside of a block comment), even if more comments followed.
+// When this quirk is enabled, following a successful decoding of a top-level
+// JSON value, any trailing whitespace (ASCII characters 0x09, 0x0A, 0x0D or
+// 0x20) and/or comments (if QUIRK_ALLOW_COMMENT_ETC is enabled) are also
+// consumed (and WUFFS_BASE__TOKEN__VBC__FILLER tokens emitted) up to but
+// excluding the end-of-file or the next non-filler byte.
//
-// For a trailing block comment, new lines within the comment are not counted
-// and after the comment concludes, the decoder will continue consuming
-// whitespace up to and including the next '\n' (or end-of-file).
+// Trailing non-filler is not an error. Decoding simply stops before it.
//
-// For a trailing line comment, the decoder stops immediately after the
-// comment. If not stopped by end-of-file, this stops after the '\n' that
-// concludes (and is part of) the comment. One implication is that if multiple
-// line comments trail a JSON value, only the first one will be processed.
+// To avoid ambiguity, this quirk cannot be combined with
+// QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF. Unlike that quirk, enabling this
+// quirk will consume multiple trailing '\n' bytes.
+pub const QUIRK_ALLOW_TRAILING_FILLER : base.u32 = 0x4909_9400 | 0x11
+
+// Deprecated: equivalent to QUIRK_ALLOW_TRAILING_FILLER.
pub const QUIRK_ALLOW_TRAILING_COMMENT : base.u32 = 0x4909_9400 | 0x11
// When this quirk is enabled, following a successful decoding of a top-level
// JSON value, any trailing whitespace (ASCII characters 0x09, 0x0A, 0x0D or
// 0x20) is also consumed (and WUFFS_BASE__TOKEN__VBC__FILLER tokens emitted)
-// up to the end-of-file or up to and including a single new line (ASCII 0x0A,
-// also known as '\n'), whichever comes first. This trailing whitespace is not
-// mandatory, but it is consumed if present.
+// up to but excluding the end-of-file or up to and including a single new line
+// (ASCII 0x0A, also known as '\n'), whichever comes first. A trailing '\n' is
+// not mandatory (if at end-of-file), but it is consumed if present and will
+// stop decoding.
//
-// When enabled, trailing non-whitespace (before a '\n') is an error (unless it
-// is a comment and QUIRK_ALLOW_TRAILING_COMMENT is enabled; see above). For
-// example, with "007" input, decoding with this quirk disabled (the default
-// case) will consume just 1 byte ("0") and leave the rest ("07") unread
-// without error, as "0" is a perfectly valid JSON value (but "00" is not).
-// Decoding "007" (or "007\n") with this quirk enabled will return an error.
+// Trailing non-whitespace after a trailing '\n' is ignored.
//
-// When enabled, the decoder will not consume more than one trailing '\n'
-// (outside of a block comment), nor will it consume any other whitespace
-// immediately after a trailing '\n'.
+// Trailing non-whitespace, before EOF or '\n', is an error. For example, with
+// "007" input, decoding with this quirk disabled (the default case) will
+// consume just 1 byte ("0") and leave the rest ("07") unread without error, as
+// "0" is a perfectly valid JSON value (but "00" is not). Decoding "007" (or
+// "007\n") with this quirk enabled will return an error.
+//
+// To avoid ambiguity (as comments can contain new lines), this quirk cannot be
+// combined with any of:
+// - QUIRK_ALLOW_COMMENT_BLOCK
+// - QUIRK_ALLOW_COMMENT_LINE
+// - QUIRK_ALLOW_TRAILING_FILLER
//
// If a JSON encoder avoids emitting (optional) '\n' bytes, other than a single
// '\n' after each top-level value in a multi-JSON-value stream, this format is
@@ -199,13 +207,9 @@
// When combined with QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR, this format
// is also known as RFC 7464, Json Text Sequences and MIME type
// "application/json-seq".
-//
-// When combined with QUIRK_ALLOW_COMMENT_BLOCK or QUIRK_ALLOW_COMMENT_LINE, it
-// is an error for a comment to occur in this trailing whitespace, unless
-// QUIRK_ALLOW_TRAILING_COMMENT is also enabled. Be aware that block comments
-// can contain multiple new lines, so combining such quirks can break the
-// "exactly one JSON value per line" assumption for newline-delimited but
-// otherwise compact (minified) JSON.
+pub const QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF : base.u32 = 0x4909_9400 | 0x12
+
+// Deprecated: equivalent to QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
pub const QUIRK_ALLOW_TRAILING_NEW_LINE : base.u32 = 0x4909_9400 | 0x12
// When this quirk is enabled, JSON Pointer strings containing "~r" or "~n",
diff --git a/test/c/std/json.c b/test/c/std/json.c
index 413e586..878ca55 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c
@@ -2537,7 +2537,7 @@
WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS,
WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR,
WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK,
- WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE,
+ WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER,
WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE,
0,
};
@@ -3386,24 +3386,51 @@
}
const char* //
-test_wuffs_json_decode_quirk_allow_trailing_comment() {
+test_wuffs_json_decode_quirk_allow_trailing_comments() {
CHECK_FOCUS(__func__);
- // These test cases all end with two '\n' bytes. If the first byte is '8'
- // then decoding should succeed, and stop between those two '\n' bytes.
- // Otherwise, decoding should fail.
+ // The first byte is a code.
+ // - '1' means that there is zero or one '\n' bytes
+ // - '2' means that there are two '\n' bytes but no comments
+ // - '3' means that there are comments
+ // - '4' means that there is non-filler after eof-or-'\n'
+ // - '5' means that there is non-filler before eof-or-'\n'
+ //
+ // WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER (together with
+ // WUFFS_JSON__QUIRK_ALLOW_COMMENT_ETC) should decode the '1's, '2's and '3's
+ // completely and the '4's and '5's up to but excluding the non-filler.
+ //
+ // WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF should decode the '1's
+ // completely and the '2's and '4's just after the first '\n'.
const char* test_cases[] = {
- "80\n\n", //
- "81 \n\n", //
- "82 /*foo*/ \n\n", //
- "83/*bar\nbaz*/\n\n", //
- "84 // qux\n\n", //
- "95 /*c0*/ /*c1*/\n\n", //
- "96 /*c0*/ // c2 \n\n", //
+ "100", //
+ "101 \n", //
+ "202\n\n", //
+ "203 \n\n", //
+ "304 /*foo*/", //
+ "305 /*foo*/ ", //
+ "306 /*foo*/ \n", //
+ "307 /*foo*/ \n\n", //
+ "308/*bar\nbaz*/\n\n", //
+ "309 // qux\n\n", //
+ "310 /*c0*/ /*c1*/\n\n", //
+ "311 /*c0*/ \n\n // c2 \n\n", //
+ "412 \n9", //
+ "513 9", //
};
int tc;
+
+ // Test ALLOW_ETC.
for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
+ void* tc_ptr = (void*)(test_cases[tc]);
+ size_t tc_len = strlen(test_cases[tc]);
+ char code = test_cases[tc][0];
+
+ wuffs_base__token_buffer tok =
+ wuffs_base__slice_token__writer(g_have_slice_token);
+ wuffs_base__io_buffer src =
+ wuffs_base__ptr_u8__reader(tc_ptr, tc_len, true);
wuffs_json__decoder dec;
CHECK_STATUS("initialize", wuffs_json__decoder__initialize(
&dec, sizeof dec, WUFFS_VERSION,
@@ -3413,81 +3440,119 @@
wuffs_json__decoder__set_quirk_enabled(
&dec, WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, true);
wuffs_json__decoder__set_quirk_enabled(
- &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_COMMENT, true);
- wuffs_json__decoder__set_quirk_enabled(
- &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, true);
+ &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER, true);
+ const char* have_repr =
+ wuffs_json__decoder__decode_tokens(&dec, &tok, &src, g_work_slice_u8)
+ .repr;
+ if (have_repr != NULL) {
+ RETURN_FAIL("tc=%d, ALLOW_ETC: decode_tokens: have \"%s\", want NULL", tc,
+ have_repr);
+ }
+
+ size_t have_total_length = 0;
+ while (tok.meta.ri < tok.meta.wi) {
+ have_total_length +=
+ wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]);
+ }
+ size_t want_total_length = tc_len - ((code >= '4') ? 1 : 0);
+ if (have_total_length != src.meta.ri) {
+ RETURN_FAIL("tc=%d, ALLOW_ETC: total_length: have %zu, want %zu", tc,
+ have_total_length, src.meta.ri);
+ } else if (have_total_length != want_total_length) {
+ RETURN_FAIL("tc=%d, ALLOW_ETC: total_length: have %zu, want %zu", tc,
+ have_total_length, want_total_length);
+ }
+ }
+
+ // Test EXPECT_ETC.
+ for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
void* tc_ptr = (void*)(test_cases[tc]);
size_t tc_len = strlen(test_cases[tc]);
+ char code = test_cases[tc][0];
+
wuffs_base__token_buffer tok =
wuffs_base__slice_token__writer(g_have_slice_token);
wuffs_base__io_buffer src =
wuffs_base__ptr_u8__reader(tc_ptr, tc_len, true);
- const char* have =
+ wuffs_json__decoder dec;
+ CHECK_STATUS("initialize", wuffs_json__decoder__initialize(
+ &dec, sizeof dec, WUFFS_VERSION,
+ WUFFS_INITIALIZE__DEFAULT_OPTIONS));
+ wuffs_json__decoder__set_quirk_enabled(
+ &dec, WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF, true);
+
+ const char* have_repr =
wuffs_json__decoder__decode_tokens(&dec, &tok, &src, g_work_slice_u8)
.repr;
- const char* want =
- (test_cases[tc][0] == '8') ? NULL : wuffs_json__error__bad_input;
- if (have != want) {
- RETURN_FAIL("tc=%d: decode_tokens: have \"%s\", want \"%s\"", tc, have,
- want);
- } else if (have != NULL) {
+ const char* want_repr =
+ ((code == '3') || (code == '5')) ? wuffs_json__error__bad_input : NULL;
+ if (have_repr != want_repr) {
+ RETURN_FAIL("tc=%d, EXPECT_ETC: decode_tokens: have \"%s\", want \"%s\"",
+ tc, have_repr, want_repr);
+ } else if (have_repr != NULL) {
continue;
}
- size_t total_length = 0;
+ size_t have_total_length = 0;
while (tok.meta.ri < tok.meta.wi) {
- total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]);
+ have_total_length +=
+ wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]);
}
- if (total_length != src.meta.ri) {
- RETURN_FAIL("tc=%d: total_length: have %zu, want %zu", tc, total_length,
- src.meta.ri);
- } else if ((total_length + 1) != tc_len) {
- RETURN_FAIL("tc=%d: total_length+1: have %zu, want %zu", tc,
- total_length + 1, tc_len);
+ size_t want_total_length = tc_len - ((code == '1') ? 0 : 1);
+ if (have_total_length != src.meta.ri) {
+ RETURN_FAIL("tc=%d, EXPECT_ETC: total_length: have %zu, want %zu", tc,
+ have_total_length, src.meta.ri);
+ } else if (have_total_length != want_total_length) {
+ RETURN_FAIL("tc=%d, EXPECT_ETC: total_length: have %zu, want %zu", tc,
+ have_total_length, want_total_length);
}
}
+
return NULL;
}
const char* //
-test_wuffs_json_decode_quirk_allow_trailing_new_line() {
+test_wuffs_json_decode_quirk_allow_trailing_filler() {
CHECK_FOCUS(__func__);
struct {
- // want has 2 bytes, one for each possible q:
- // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE.
+ // want has 3 bytes, one for each possible q:
+ // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER.
+ // - q&2 sets WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
// An 'X', '+' or '-' means that decoding should succeed (and consume the
// entire input), succeed (without consuming the entire input) or fail.
const char* want;
const char* str;
} test_cases[] = {
- {.want = "++", .str = "0 \n "}, //
- {.want = "++", .str = "0 \n\n"}, //
- {.want = "++", .str = "0\n\n"}, //
- {.want = "+-", .str = "0 true \n"}, //
- {.want = "+-", .str = "007"}, //
- {.want = "+-", .str = "007\n"}, //
- {.want = "+-", .str = "0true "}, //
- {.want = "+-", .str = "0true"}, //
- {.want = "+X", .str = "0 "}, //
- {.want = "+X", .str = "0 \n"}, //
- {.want = "+X", .str = "0\n"}, //
- {.want = "+X", .str = "0\t\r\n"}, //
- {.want = "--", .str = "\n"}, //
- {.want = "XX", .str = "0"}, //
+ {.want = "+X+", .str = "0 \n "}, //
+ {.want = "+X+", .str = "0 \n\n"}, //
+ {.want = "+X+", .str = "0\n\n"}, //
+ {.want = "++-", .str = "0 true \n"}, //
+ {.want = "++-", .str = "007"}, //
+ {.want = "++-", .str = "007\n"}, //
+ {.want = "++-", .str = "0true "}, //
+ {.want = "++-", .str = "0true"}, //
+ {.want = "+XX", .str = "0 "}, //
+ {.want = "+XX", .str = "0 \n"}, //
+ {.want = "+XX", .str = "0\n"}, //
+ {.want = "+XX", .str = "0\t\r\n"}, //
+ {.want = "---", .str = "\n"}, //
+ {.want = "XXX", .str = "0"}, //
};
int tc;
for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
int q;
- for (q = 0; q < 2; q++) {
+ for (q = 0; q < 3; q++) {
wuffs_json__decoder dec;
CHECK_STATUS("initialize", wuffs_json__decoder__initialize(
&dec, sizeof dec, WUFFS_VERSION,
WUFFS_INITIALIZE__DEFAULT_OPTIONS));
wuffs_json__decoder__set_quirk_enabled(
- &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, q & 1);
+ &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_FILLER, q & 1);
+ wuffs_json__decoder__set_quirk_enabled(
+ &dec, WUFFS_JSON__QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF, q & 2);
wuffs_base__token_buffer tok =
wuffs_base__slice_token__writer(g_have_slice_token);
@@ -4134,8 +4199,8 @@
test_wuffs_json_decode_quirk_allow_extra_comma,
test_wuffs_json_decode_quirk_allow_inf_nan_numbers,
test_wuffs_json_decode_quirk_allow_leading_etc,
- test_wuffs_json_decode_quirk_allow_trailing_comment,
- test_wuffs_json_decode_quirk_allow_trailing_new_line,
+ test_wuffs_json_decode_quirk_allow_trailing_comments,
+ test_wuffs_json_decode_quirk_allow_trailing_filler,
test_wuffs_json_decode_quirk_replace_invalid_unicode,
test_wuffs_json_decode_src_io_buffer_length,
test_wuffs_json_decode_string,