Lower json.DECODER_NUMBER_LENGTH_MAX_INCL to 99 Commit b9f69264a09f6a1c610f35639eab024a0cbb3df9 "Raise json.DECODER_NUMBER_LENGTH_MAX_INCL to 2047" might have regressed the std/json benchmarks.

commit: 84bb3afd8742f9e3b3787dd3965d97f3a0a159c4 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Tue Jul 07 23:29:30 2020 +1000
committer: Nigel Tao <nigeltao@golang.org> Tue Jul 07 23:29:30 2020 +1000
tree: f0c181c7f1139b0b6ad0ea06d2d095a8d3285b33
parent: 12a8c7598b11ed6b10c9bb42d3323d71097aea3e [diff]
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index d4afcc9..7c75d22 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c

@@ -6995,7 +6995,7 @@
 
 #define WUFFS_JSON__DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL 1
 
-#define WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL 2048
+#define WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL 100
 
 #define WUFFS_JSON__QUIRK_ALLOW_ASCII_CONTROL_CODES 1225364480
 
@@ -23250,7 +23250,7 @@
 
 // ---------------- Private Consts
 
-#define WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL 2047
+#define WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL 99
 
 static const uint8_t
 WUFFS_JSON__LUT_BACKSLASHES[256]WUFFS_BASE__POTENTIALLY_UNUSED = {
@@ -24375,12 +24375,12 @@
             if (a_src) {
               iop_a_src = a_src->data.ptr + a_src->meta.ri;
             }
-            v_number_status = (v_number_length >> 12);
+            v_number_status = (v_number_length >> 8);
             v_vminor = 10486787;
-            if ((v_number_length & 2048) != 0) {
+            if ((v_number_length & 128) != 0) {
               v_vminor = 10486785;
             }
-            v_number_length = (v_number_length & 2047);
+            v_number_length = (v_number_length & 127);
             if (v_number_status == 0) {
               *iop_a_dst++ = wuffs_base__make_token(
                   (((uint64_t)(v_vminor)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
@@ -24707,7 +24707,7 @@
     v_n = 0;
     if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
       if ( ! (a_src && a_src->meta.closed)) {
-        v_n |= 12288;
+        v_n |= 768;
       }
       goto label__goto_done__break;
     }
@@ -24718,9 +24718,9 @@
       (iop_a_src += 1, wuffs_base__make_empty_struct());
       if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
         if ( ! (a_src && a_src->meta.closed)) {
-          v_n |= 12288;
+          v_n |= 768;
         }
-        v_n |= 4096;
+        v_n |= 256;
         goto label__goto_done__break;
       }
       v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
@@ -24736,26 +24736,26 @@
       if (a_src) {
         iop_a_src = a_src->data.ptr + a_src->meta.ri;
       }
-      if (v_n > 2047) {
+      if (v_n > 99) {
         goto label__goto_done__break;
       }
     }
     if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
       if ( ! (a_src && a_src->meta.closed)) {
-        v_n |= 12288;
+        v_n |= 768;
       }
       goto label__goto_done__break;
     }
     v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
     if (v_c != 46) {
     } else {
-      if (v_n >= 2047) {
-        v_n |= 8192;
+      if (v_n >= 99) {
+        v_n |= 512;
         goto label__goto_done__break;
       }
       v_n += 1;
       (iop_a_src += 1, wuffs_base__make_empty_struct());
-      v_floating_point = 2048;
+      v_floating_point = 128;
       if (a_src) {
         a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
       }
@@ -24763,12 +24763,12 @@
       if (a_src) {
         iop_a_src = a_src->data.ptr + a_src->meta.ri;
       }
-      if (v_n > 2047) {
+      if (v_n > 99) {
         goto label__goto_done__break;
       }
       if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
         if ( ! (a_src && a_src->meta.closed)) {
-          v_n |= 12288;
+          v_n |= 768;
         }
         goto label__goto_done__break;
       }
@@ -24777,25 +24777,25 @@
     if ((v_c != 69) && (v_c != 101)) {
       goto label__goto_done__break;
     }
-    if (v_n >= 2047) {
-      v_n |= 8192;
+    if (v_n >= 99) {
+      v_n |= 512;
       goto label__goto_done__break;
     }
     v_n += 1;
     (iop_a_src += 1, wuffs_base__make_empty_struct());
-    v_floating_point = 2048;
+    v_floating_point = 128;
     if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
       if ( ! (a_src && a_src->meta.closed)) {
-        v_n |= 12288;
+        v_n |= 768;
       }
-      v_n |= 4096;
+      v_n |= 256;
       goto label__goto_done__break;
     }
     v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
     if ((v_c != 43) && (v_c != 45)) {
     } else {
-      if (v_n >= 2047) {
-        v_n |= 8192;
+      if (v_n >= 99) {
+        v_n |= 512;
         goto label__goto_done__break;
       }
       v_n += 1;
@@ -24842,7 +24842,7 @@
   while (true) {
     if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
       if ( ! (a_src && a_src->meta.closed)) {
-        v_n |= 12288;
+        v_n |= 768;
       }
       goto label__0__break;
     }
@@ -24850,8 +24850,8 @@
     if (0 == WUFFS_JSON__LUT_DECIMAL_DIGITS[v_c]) {
       goto label__0__break;
     }
-    if (v_n >= 2047) {
-      v_n |= 8192;
+    if (v_n >= 99) {
+      v_n |= 512;
       goto label__0__break;
     }
     v_n += 1;
@@ -24859,7 +24859,7 @@
   }
   label__0__break:;
   if (v_n == a_n) {
-    v_n |= 4096;
+    v_n |= 256;
   }
   if (a_src) {
     a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));

diff --git a/std/json/common_consts.wuffs b/std/json/common_consts.wuffs
index 31787fc..7c5b445 100644
--- a/std/json/common_consts.wuffs
+++ b/std/json/common_consts.wuffs

@@ -41,8 +41,8 @@
 // maximum possible src.meta.ri and src.meta.wi values). It is a property of
 // the backing array's length, not the length of the JSON-formatted input per
 // se. It is perfectly valid to decode "[1,2]" (of length 5) as JSON, as long
-// as that content is placed in an io_buffer whose data.len is at least 2048.
-pub const DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL : base.u64 = 2048
+// as that content is placed in an io_buffer whose data.len is at least 100.
+pub const DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL : base.u64 = 100
 
 // DECODER_NUMBER_LENGTH_MAX_INCL is the longest supported byte length for a
 // JSON number. Unlike JSON strings, this package's tokenizer never splits a
@@ -52,7 +52,7 @@
 //
 // The JSON spec itself does not define a limit, but allows implementations to
 // set their own limits.
-pri const DECODER_NUMBER_LENGTH_MAX_INCL : base.u64 = 2047
+pri const DECODER_NUMBER_LENGTH_MAX_INCL : base.u64 = 99
 
 // --------
 

diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index 0ac652c..7407994 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs

@@ -75,7 +75,7 @@
 	// https://github.com/google/wuffs/commit/86d3b89f9a6578d964a4b6d71e21dfc9bb702b44
 
 	var vminor            : base.u32[..= 0xFF_FFFF]
-	var number_length     : base.u32[..= 0x3FFF]
+	var number_length     : base.u32[..= 0x3FF]
 	var number_status     : base.u32[..= 0x3]
 	var string_length     : base.u32[..= 0xFFFB]
 	var whitespace_length : base.u32[..= 0xFFFE]
@@ -905,17 +905,17 @@
 				pre args.dst.available() > 0,
 			{
 				number_length = this.decode_number!(src: args.src)
-				number_status = number_length >> 12
+				number_status = number_length >> 8
 				vminor = (base.TOKEN__VBC__NUMBER << 21) |
 					base.TOKEN__VBD__NUMBER__CONTENT_FLOATING_POINT |
 					base.TOKEN__VBD__NUMBER__CONTENT_INTEGER_SIGNED |
 					base.TOKEN__VBD__NUMBER__FORMAT_TEXT
-				if (number_length & 0x800) <> 0 {
+				if (number_length & 0x80) <> 0 {
 					vminor = (base.TOKEN__VBC__NUMBER << 21) |
 						base.TOKEN__VBD__NUMBER__CONTENT_FLOATING_POINT |
 						base.TOKEN__VBD__NUMBER__FORMAT_TEXT
 				}
-				number_length = number_length & 0x7FF
+				number_length = number_length & 0x7F
 				if number_status == 0 {
 					args.dst.write_simple_token_fast!(
 						value_major: 0,
@@ -1196,10 +1196,10 @@
 	this.end_of_data = true
 }
 
-pri func decoder.decode_number!(src: base.io_reader) base.u32[..= 0x3FFF] {
+pri func decoder.decode_number!(src: base.io_reader) base.u32[..= 0x3FF] {
 	var c              : base.u8
-	var n              : base.u32[..= 0x3FFF]
-	var floating_point : base.u32[..= 0x800]
+	var n              : base.u32[..= 0x3FF]
+	var floating_point : base.u32[..= 0x80]
 
 	while.goto_done true {{
 	n = 0
@@ -1207,7 +1207,7 @@
 	// Peek.
 	if args.src.available() <= 0 {
 		if not args.src.is_closed() {
-			n |= 0x3000
+			n |= 0x300
 		}
 		break.goto_done
 	}
@@ -1224,9 +1224,9 @@
 		// Peek.
 		if args.src.available() <= 0 {
 			if not args.src.is_closed() {
-				n |= 0x3000
+				n |= 0x300
 			}
-			n |= 0x1000  // A '-' without digits is invalid.
+			n |= 0x100  // A '-' without digits is invalid.
 			break.goto_done
 		}
 		c = args.src.peek_u8()
@@ -1239,19 +1239,19 @@
 	if c == '0' {
 		n += 1
 		args.src.skip_u32_fast!(actual: 1, worst_case: 1)
-		assert n <= 2047
+		assert n <= 99
 	} else {
 		n = this.decode_digits!(src: args.src, n: n)
-		if n > 2047 {
+		if n > 99 {
 			break.goto_done
 		}
-		assert n <= 2047
+		assert n <= 99
 	}
 
 	// Peek.
 	if args.src.available() <= 0 {
 		if not args.src.is_closed() {
-			n |= 0x3000
+			n |= 0x300
 		}
 		break.goto_done
 	}
@@ -1260,68 +1260,68 @@
 	// Scan the optional fraction.
 	if c <> '.' {
 		assert args.src.available() > 0
-		assert n <= 2047
+		assert n <= 99
 	} else {
-		if n >= 2047 {
-			n |= 0x2000
+		if n >= 99 {
+			n |= 0x200
 			break.goto_done
 		}
 		n += 1
 		args.src.skip_u32_fast!(actual: 1, worst_case: 1)
-		floating_point = 0x800
+		floating_point = 0x80
 
 		n = this.decode_digits!(src: args.src, n: n)
-		if n > 2047 {
+		if n > 99 {
 			break.goto_done
 		}
 
 		// Peek.
 		if args.src.available() <= 0 {
 			if not args.src.is_closed() {
-				n |= 0x3000
+				n |= 0x300
 			}
 			break.goto_done
 		}
 		c = args.src.peek_u8()
 
 		assert args.src.available() > 0
-		assert n <= 2047
+		assert n <= 99
 	}
 
 	// Scan the optional 'E' or 'e'.
 	if (c <> 'E') and (c <> 'e') {
 		break.goto_done
 	}
-	if n >= 2047 {
-		n |= 0x2000
+	if n >= 99 {
+		n |= 0x200
 		break.goto_done
 	}
 	n += 1
 	args.src.skip_u32_fast!(actual: 1, worst_case: 1)
-	floating_point = 0x800
-	assert n <= 2047
+	floating_point = 0x80
+	assert n <= 99
 
 	// Peek.
 	if args.src.available() <= 0 {
 		if not args.src.is_closed() {
-			n |= 0x3000
+			n |= 0x300
 		}
-		n |= 0x1000  // An 'E' or 'e' without digits is invalid.
+		n |= 0x100  // An 'E' or 'e' without digits is invalid.
 		break.goto_done
 	}
 	c = args.src.peek_u8()
 
 	// Scan the optional '+' or '-'.
 	if (c <> '+') and (c <> '-') {
-		assert n <= 2047
+		assert n <= 99
 	} else {
-		if n >= 2047 {
-			n |= 0x2000
+		if n >= 99 {
+			n |= 0x200
 			break.goto_done
 		}
 		n += 1
 		args.src.skip_u32_fast!(actual: 1, worst_case: 1)
-		assert n <= 2047
+		assert n <= 99
 	}
 
 	// Scan the exponent digits.
@@ -1333,15 +1333,15 @@
 	return n | floating_point
 }
 
-pri func decoder.decode_digits!(src: base.io_reader, n: base.u32[..= 2047]) base.u32[..= 0x3FFF] {
+pri func decoder.decode_digits!(src: base.io_reader, n: base.u32[..= 99]) base.u32[..= 0x3FF] {
 	var c : base.u8
-	var n : base.u32[..= 0x3FFF]
+	var n : base.u32[..= 0x3FF]
 
 	n = args.n
 	while true {
 		if args.src.available() <= 0 {
 			if not args.src.is_closed() {
-				n |= 0x3000
+				n |= 0x300
 			}
 			break
 		}
@@ -1349,27 +1349,24 @@
 		if 0x00 == LUT_DECIMAL_DIGITS[c] {
 			break
 		}
-		// Cap DECODER_NUMBER_LENGTH_MAX_INCL at an arbitrary value, 2047. The
-		// caller's src.data.len should therefore be at least 2048, also known
+		// Cap DECODER_NUMBER_LENGTH_MAX_INCL at an arbitrary value, 99. The
+		// caller's src.data.len should therefore be at least 100, also known
 		// as DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL.
 		//
 		// An example of a JSON number that is 81 bytes long is:
 		// https://github.com/nst/JSONTestSuite/blob/master/test_parsing/y_number_double_close_to_zero.json
 		//
-		// A exact rendering (in printf's "%f" format) of 5e-324 can run to
-		// almost 1100 bytes.
-		//
-		// Note that 2047 (in hex, 0x7FF) is less than 0x800, so we can use
-		// 0x800 as a flag bit in func decoder.decode_number.
-		if n >= 2047 {
-			n |= 0x2000
+		// Note that 99 (in hex, 0x63) is less than 0x80, so we can use 0x80 as
+		// a flag bit in func decoder.decode_number.
+		if n >= 99 {
+			n |= 0x200
 			break
 		}
 		n += 1
 		args.src.skip_u32_fast!(actual: 1, worst_case: 1)
 	} endwhile
 	if n == args.n {
-		n |= 0x1000
+		n |= 0x100
 	}
 	return n
 }

diff --git a/test/c/std/json.c b/test/c/std/json.c
index 9371eaf..5884997 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c

@@ -1982,14 +1982,14 @@
       {.valid = true, .suffix = ".2e-5678 "},
   };
 
-  // src_array holds the overall test string. 2090 is arbitrary but long
-  // enough. See the "if (suffix_length > etc)" check below. 2060 is also
-  // arbitrary but larger than WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL.
+  // src_array holds the overall test string. 119 is arbitrary but long enough.
+  // See the "if (suffix_length > etc)" check below. 102 is also arbitrary but
+  // larger than WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL.
   //
   // See also test_wuffs_json_decode_src_io_buffer_length.
-  uint8_t src_array[2090];
-  memset(&src_array[0], '9', 2060);
-  if (2060 <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL) {
+  uint8_t src_array[119];
+  memset(&src_array[0], '9', 102);
+  if (102 <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL) {
     RETURN_FAIL("insufficient number_length test case coverage");
   }
 
@@ -1998,7 +1998,7 @@
   int tc;
   for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) {
     size_t suffix_length = strlen(test_cases[tc].suffix);
-    if ((suffix_length + 1) > (2090 - 2060)) {  // +1 for the terminal NUL.
+    if ((suffix_length + 1) > (119 - 102)) {  // +1 for the terminal NUL.
       RETURN_FAIL("tc=%d: src_array is too short", tc);
     }
     bool ends_with_space = (suffix_length > 0) &&
@@ -2007,12 +2007,12 @@
     // Copying the terminal NUL isn't necessary for Wuffs' slices (which are a
     // pointer-length pair), but this backstop can help debugging with printf
     // where "%s" takes a C string (a bare pointer).
-    memcpy(&src_array[2060], test_cases[tc].suffix, suffix_length + 1);
+    memcpy(&src_array[102], test_cases[tc].suffix, suffix_length + 1);
 
     size_t nines_length;
-    for (nines_length = 2037; nines_length < 2050; nines_length++) {
+    for (nines_length = 90; nines_length < 102; nines_length++) {
       wuffs_base__slice_u8 src_data = ((wuffs_base__slice_u8){
-          .ptr = &src_array[2060 - nines_length],
+          .ptr = &src_array[102 - nines_length],
           .len = nines_length + suffix_length,
       });
       size_t number_length = src_data.len - (ends_with_space ? 1 : 0);
@@ -3019,10 +3019,10 @@
         "WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL");
   }
 
-  // src_array holds the test string of repeated '7's. 2050 is arbitrary but
+  // src_array holds the test string of repeated '7's. 107 is arbitrary but
   // long enough for the loop below.
-  uint8_t src_array[2050];
-  memset(&src_array[0], '7', 2050);
+  uint8_t src_array[107];
+  memset(&src_array[0], '7', 107);
 
   wuffs_json__decoder dec;
 
@@ -3031,7 +3031,7 @@
        i <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL + 2; i++) {
     if (i < 0) {
       RETURN_FAIL("invalid test case: i=%d", i);
-    } else if (i > 2050) {
+    } else if (i > 107) {
       RETURN_FAIL("invalid test case: i=%d", i);
     }
commit	84bb3afd8742f9e3b3787dd3965d97f3a0a159c4	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Tue Jul 07 23:29:30 2020 +1000
committer	Nigel Tao <nigeltao@golang.org>	Tue Jul 07 23:29:30 2020 +1000
tree	f0c181c7f1139b0b6ad0ea06d2d095a8d3285b33
parent	12a8c7598b11ed6b10c9bb42d3323d71097aea3e [diff]