Lower json.DECODER_NUMBER_LENGTH_MAX_INCL to 99
Commit b9f69264a09f6a1c610f35639eab024a0cbb3df9 "Raise
json.DECODER_NUMBER_LENGTH_MAX_INCL to 2047" might have regressed the
std/json benchmarks.
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index d4afcc9..7c75d22 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -6995,7 +6995,7 @@
#define WUFFS_JSON__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL 1
-#define WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL 2048
+#define WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL 100
#define WUFFS_JSON__QUIRK_ALLOW_ASCII_CONTROL_CODES 1225364480
@@ -23250,7 +23250,7 @@
// ---------------- Private Consts
-#define WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL 2047
+#define WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL 99
static const uint8_t
WUFFS_JSON__LUT_BACKSLASHES[256]WUFFS_BASE__POTENTIALLY_UNUSED = {
@@ -24375,12 +24375,12 @@
if (a_src) {
iop_a_src = a_src->data.ptr + a_src->meta.ri;
}
- v_number_status = (v_number_length >> 12);
+ v_number_status = (v_number_length >> 8);
v_vminor = 10486787;
- if ((v_number_length & 2048) != 0) {
+ if ((v_number_length & 128) != 0) {
v_vminor = 10486785;
}
- v_number_length = (v_number_length & 2047);
+ v_number_length = (v_number_length & 127);
if (v_number_status == 0) {
*iop_a_dst++ = wuffs_base__make_token(
(((uint64_t)(v_vminor)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
@@ -24707,7 +24707,7 @@
v_n = 0;
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if ( ! (a_src && a_src->meta.closed)) {
- v_n |= 12288;
+ v_n |= 768;
}
goto label__goto_done__break;
}
@@ -24718,9 +24718,9 @@
(iop_a_src += 1, wuffs_base__make_empty_struct());
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if ( ! (a_src && a_src->meta.closed)) {
- v_n |= 12288;
+ v_n |= 768;
}
- v_n |= 4096;
+ v_n |= 256;
goto label__goto_done__break;
}
v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
@@ -24736,26 +24736,26 @@
if (a_src) {
iop_a_src = a_src->data.ptr + a_src->meta.ri;
}
- if (v_n > 2047) {
+ if (v_n > 99) {
goto label__goto_done__break;
}
}
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if ( ! (a_src && a_src->meta.closed)) {
- v_n |= 12288;
+ v_n |= 768;
}
goto label__goto_done__break;
}
v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
if (v_c != 46) {
} else {
- if (v_n >= 2047) {
- v_n |= 8192;
+ if (v_n >= 99) {
+ v_n |= 512;
goto label__goto_done__break;
}
v_n += 1;
(iop_a_src += 1, wuffs_base__make_empty_struct());
- v_floating_point = 2048;
+ v_floating_point = 128;
if (a_src) {
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
}
@@ -24763,12 +24763,12 @@
if (a_src) {
iop_a_src = a_src->data.ptr + a_src->meta.ri;
}
- if (v_n > 2047) {
+ if (v_n > 99) {
goto label__goto_done__break;
}
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if ( ! (a_src && a_src->meta.closed)) {
- v_n |= 12288;
+ v_n |= 768;
}
goto label__goto_done__break;
}
@@ -24777,25 +24777,25 @@
if ((v_c != 69) && (v_c != 101)) {
goto label__goto_done__break;
}
- if (v_n >= 2047) {
- v_n |= 8192;
+ if (v_n >= 99) {
+ v_n |= 512;
goto label__goto_done__break;
}
v_n += 1;
(iop_a_src += 1, wuffs_base__make_empty_struct());
- v_floating_point = 2048;
+ v_floating_point = 128;
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if ( ! (a_src && a_src->meta.closed)) {
- v_n |= 12288;
+ v_n |= 768;
}
- v_n |= 4096;
+ v_n |= 256;
goto label__goto_done__break;
}
v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
if ((v_c != 43) && (v_c != 45)) {
} else {
- if (v_n >= 2047) {
- v_n |= 8192;
+ if (v_n >= 99) {
+ v_n |= 512;
goto label__goto_done__break;
}
v_n += 1;
@@ -24842,7 +24842,7 @@
while (true) {
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if ( ! (a_src && a_src->meta.closed)) {
- v_n |= 12288;
+ v_n |= 768;
}
goto label__0__break;
}
@@ -24850,8 +24850,8 @@
if (0 == WUFFS_JSON__LUT_DECIMAL_DIGITS[v_c]) {
goto label__0__break;
}
- if (v_n >= 2047) {
- v_n |= 8192;
+ if (v_n >= 99) {
+ v_n |= 512;
goto label__0__break;
}
v_n += 1;
@@ -24859,7 +24859,7 @@
}
label__0__break:;
if (v_n == a_n) {
- v_n |= 4096;
+ v_n |= 256;
}
if (a_src) {
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
diff --git a/std/json/common_consts.wuffs b/std/json/common_consts.wuffs
index 31787fc..7c5b445 100644
--- a/std/json/common_consts.wuffs
+++ b/std/json/common_consts.wuffs
@@ -41,8 +41,8 @@
// maximum possible src.meta.ri and src.meta.wi values). It is a property of
// the backing array's length, not the length of the JSON-formatted input per
// se. It is perfectly valid to decode "[1,2]" (of length 5) as JSON, as long
-// as that content is placed in an io_buffer whose data.len is at least 2048.
-pub const DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL : base.u64 = 2048
+// as that content is placed in an io_buffer whose data.len is at least 100.
+pub const DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL : base.u64 = 100
// DECODER_NUMBER_LENGTH_MAX_INCL is the longest supported byte length for a
// JSON number. Unlike JSON strings, this package's tokenizer never splits a
@@ -52,7 +52,7 @@
//
// The JSON spec itself does not define a limit, but allows implementations to
// set their own limits.
-pri const DECODER_NUMBER_LENGTH_MAX_INCL : base.u64 = 2047
+pri const DECODER_NUMBER_LENGTH_MAX_INCL : base.u64 = 99
// --------
diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index 0ac652c..7407994 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs
@@ -75,7 +75,7 @@
// https://github.com/google/wuffs/commit/86d3b89f9a6578d964a4b6d71e21dfc9bb702b44
var vminor : base.u32[..= 0xFF_FFFF]
- var number_length : base.u32[..= 0x3FFF]
+ var number_length : base.u32[..= 0x3FF]
var number_status : base.u32[..= 0x3]
var string_length : base.u32[..= 0xFFFB]
var whitespace_length : base.u32[..= 0xFFFE]
@@ -905,17 +905,17 @@
pre args.dst.available() > 0,
{
number_length = this.decode_number!(src: args.src)
- number_status = number_length >> 12
+ number_status = number_length >> 8
vminor = (base.TOKEN__VBC__NUMBER << 21) |
base.TOKEN__VBD__NUMBER__CONTENT_FLOATING_POINT |
base.TOKEN__VBD__NUMBER__CONTENT_INTEGER_SIGNED |
base.TOKEN__VBD__NUMBER__FORMAT_TEXT
- if (number_length & 0x800) <> 0 {
+ if (number_length & 0x80) <> 0 {
vminor = (base.TOKEN__VBC__NUMBER << 21) |
base.TOKEN__VBD__NUMBER__CONTENT_FLOATING_POINT |
base.TOKEN__VBD__NUMBER__FORMAT_TEXT
}
- number_length = number_length & 0x7FF
+ number_length = number_length & 0x7F
if number_status == 0 {
args.dst.write_simple_token_fast!(
value_major: 0,
@@ -1196,10 +1196,10 @@
this.end_of_data = true
}
-pri func decoder.decode_number!(src: base.io_reader) base.u32[..= 0x3FFF] {
+pri func decoder.decode_number!(src: base.io_reader) base.u32[..= 0x3FF] {
var c : base.u8
- var n : base.u32[..= 0x3FFF]
- var floating_point : base.u32[..= 0x800]
+ var n : base.u32[..= 0x3FF]
+ var floating_point : base.u32[..= 0x80]
while.goto_done true {{
n = 0
@@ -1207,7 +1207,7 @@
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
- n |= 0x3000
+ n |= 0x300
}
break.goto_done
}
@@ -1224,9 +1224,9 @@
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
- n |= 0x3000
+ n |= 0x300
}
- n |= 0x1000 // A '-' without digits is invalid.
+ n |= 0x100 // A '-' without digits is invalid.
break.goto_done
}
c = args.src.peek_u8()
@@ -1239,19 +1239,19 @@
if c == '0' {
n += 1
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
- assert n <= 2047
+ assert n <= 99
} else {
n = this.decode_digits!(src: args.src, n: n)
- if n > 2047 {
+ if n > 99 {
break.goto_done
}
- assert n <= 2047
+ assert n <= 99
}
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
- n |= 0x3000
+ n |= 0x300
}
break.goto_done
}
@@ -1260,68 +1260,68 @@
// Scan the optional fraction.
if c <> '.' {
assert args.src.available() > 0
- assert n <= 2047
+ assert n <= 99
} else {
- if n >= 2047 {
- n |= 0x2000
+ if n >= 99 {
+ n |= 0x200
break.goto_done
}
n += 1
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
- floating_point = 0x800
+ floating_point = 0x80
n = this.decode_digits!(src: args.src, n: n)
- if n > 2047 {
+ if n > 99 {
break.goto_done
}
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
- n |= 0x3000
+ n |= 0x300
}
break.goto_done
}
c = args.src.peek_u8()
assert args.src.available() > 0
- assert n <= 2047
+ assert n <= 99
}
// Scan the optional 'E' or 'e'.
if (c <> 'E') and (c <> 'e') {
break.goto_done
}
- if n >= 2047 {
- n |= 0x2000
+ if n >= 99 {
+ n |= 0x200
break.goto_done
}
n += 1
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
- floating_point = 0x800
- assert n <= 2047
+ floating_point = 0x80
+ assert n <= 99
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
- n |= 0x3000
+ n |= 0x300
}
- n |= 0x1000 // An 'E' or 'e' without digits is invalid.
+ n |= 0x100 // An 'E' or 'e' without digits is invalid.
break.goto_done
}
c = args.src.peek_u8()
// Scan the optional '+' or '-'.
if (c <> '+') and (c <> '-') {
- assert n <= 2047
+ assert n <= 99
} else {
- if n >= 2047 {
- n |= 0x2000
+ if n >= 99 {
+ n |= 0x200
break.goto_done
}
n += 1
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
- assert n <= 2047
+ assert n <= 99
}
// Scan the exponent digits.
@@ -1333,15 +1333,15 @@
return n | floating_point
}
-pri func decoder.decode_digits!(src: base.io_reader, n: base.u32[..= 2047]) base.u32[..= 0x3FFF] {
+pri func decoder.decode_digits!(src: base.io_reader, n: base.u32[..= 99]) base.u32[..= 0x3FF] {
var c : base.u8
- var n : base.u32[..= 0x3FFF]
+ var n : base.u32[..= 0x3FF]
n = args.n
while true {
if args.src.available() <= 0 {
if not args.src.is_closed() {
- n |= 0x3000
+ n |= 0x300
}
break
}
@@ -1349,27 +1349,24 @@
if 0x00 == LUT_DECIMAL_DIGITS[c] {
break
}
- // Cap DECODER_NUMBER_LENGTH_MAX_INCL at an arbitrary value, 2047. The
- // caller's src.data.len should therefore be at least 2048, also known
+ // Cap DECODER_NUMBER_LENGTH_MAX_INCL at an arbitrary value, 99. The
+ // caller's src.data.len should therefore be at least 100, also known
// as DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL.
//
// An example of a JSON number that is 81 bytes long is:
// https://github.com/nst/JSONTestSuite/blob/master/test_parsing/y_number_double_close_to_zero.json
//
- // A exact rendering (in printf's "%f" format) of 5e-324 can run to
- // almost 1100 bytes.
- //
- // Note that 2047 (in hex, 0x7FF) is less than 0x800, so we can use
- // 0x800 as a flag bit in func decoder.decode_number.
- if n >= 2047 {
- n |= 0x2000
+ // Note that 99 (in hex, 0x63) is less than 0x80, so we can use 0x80 as
+ // a flag bit in func decoder.decode_number.
+ if n >= 99 {
+ n |= 0x200
break
}
n += 1
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
} endwhile
if n == args.n {
- n |= 0x1000
+ n |= 0x100
}
return n
}
diff --git a/test/c/std/json.c b/test/c/std/json.c
index 9371eaf..5884997 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c
@@ -1982,14 +1982,14 @@
{.valid = true, .suffix = ".2e-5678 "},
};
- // src_array holds the overall test string. 2090 is arbitrary but long
- // enough. See the "if (suffix_length > etc)" check below. 2060 is also
- // arbitrary but larger than WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL.
+ // src_array holds the overall test string. 119 is arbitrary but long enough.
+ // See the "if (suffix_length > etc)" check below. 102 is also arbitrary but
+ // larger than WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL.
//
// See also test_wuffs_json_decode_src_io_buffer_length.
- uint8_t src_array[2090];
- memset(&src_array[0], '9', 2060);
- if (2060 <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL) {
+ uint8_t src_array[119];
+ memset(&src_array[0], '9', 102);
+ if (102 <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL) {
RETURN_FAIL("insufficient number_length test case coverage");
}
@@ -1998,7 +1998,7 @@
int tc;
for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
size_t suffix_length = strlen(test_cases[tc].suffix);
- if ((suffix_length + 1) > (2090 - 2060)) { // +1 for the terminal NUL.
+ if ((suffix_length + 1) > (119 - 102)) { // +1 for the terminal NUL.
RETURN_FAIL("tc=%d: src_array is too short", tc);
}
bool ends_with_space = (suffix_length > 0) &&
@@ -2007,12 +2007,12 @@
// Copying the terminal NUL isn't necessary for Wuffs' slices (which are a
// pointer-length pair), but this backstop can help debugging with printf
// where "%s" takes a C string (a bare pointer).
- memcpy(&src_array[2060], test_cases[tc].suffix, suffix_length + 1);
+ memcpy(&src_array[102], test_cases[tc].suffix, suffix_length + 1);
size_t nines_length;
- for (nines_length = 2037; nines_length < 2050; nines_length++) {
+ for (nines_length = 90; nines_length < 102; nines_length++) {
wuffs_base__slice_u8 src_data = ((wuffs_base__slice_u8){
- .ptr = &src_array[2060 - nines_length],
+ .ptr = &src_array[102 - nines_length],
.len = nines_length + suffix_length,
});
size_t number_length = src_data.len - (ends_with_space ? 1 : 0);
@@ -3019,10 +3019,10 @@
"WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL");
}
- // src_array holds the test string of repeated '7's. 2050 is arbitrary but
+ // src_array holds the test string of repeated '7's. 107 is arbitrary but
// long enough for the loop below.
- uint8_t src_array[2050];
- memset(&src_array[0], '7', 2050);
+ uint8_t src_array[107];
+ memset(&src_array[0], '7', 107);
wuffs_json__decoder dec;
@@ -3031,7 +3031,7 @@
i <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL + 2; i++) {
if (i < 0) {
RETURN_FAIL("invalid test case: i=%d", i);
- } else if (i > 2050) {
+ } else if (i > 107) {
RETURN_FAIL("invalid test case: i=%d", i);
}