Optimize std/json string decoding
name old speed new speed delta
wuffs_json_decode_1k/clang5 729MB/s ± 1% 1130MB/s ± 0% +54.99% (p=0.000 n=10+10)
wuffs_json_decode_21k_formatted/clang5 541MB/s ± 1% 576MB/s ± 1% +6.44% (p=0.000 n=10+10)
wuffs_json_decode_26k_compact/clang5 603MB/s ± 0% 873MB/s ± 1% +44.72% (p=0.000 n=9+10)
wuffs_json_decode_217k_stringy/clang5 607MB/s ± 0% 727MB/s ± 0% +19.78% (p=0.000 n=10+10)
wuffs_json_decode_1k/gcc9 871MB/s ± 0% 1104MB/s ± 1% +26.74% (p=0.000 n=10+10)
wuffs_json_decode_21k_formatted/gcc9 641MB/s ± 1% 628MB/s ± 0% -2.03% (p=0.000 n=10+10)
wuffs_json_decode_26k_compact/gcc9 746MB/s ± 1% 892MB/s ± 0% +19.54% (p=0.000 n=10+10)
wuffs_json_decode_217k_stringy/gcc9 736MB/s ± 1% 724MB/s ± 1% -1.68% (p=0.000 n=9+10)
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 149522b..cb70b3b 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -19349,6 +19349,7 @@
uint32_t v_stack_byte = 0;
uint32_t v_stack_bit = 0;
uint32_t v_match = 0;
+ uint32_t v_c_by_4 = 0;
uint8_t v_c = 0;
uint8_t v_backslash = 0;
uint8_t v_char = 0;
@@ -19495,6 +19496,28 @@
v_string_length = 0;
goto label__string_loop_outer__continue;
}
+ while (((uint64_t)(io2_a_src - iop_a_src)) > 4) {
+ v_c_by_4 = wuffs_base__load_u32le__no_bounds_check(iop_a_src);
+ if (0 != (wuffs_json__lut_chars[(255 & (v_c_by_4 >> 0))] |
+ wuffs_json__lut_chars[(255 & (v_c_by_4 >> 8))] |
+ wuffs_json__lut_chars[(255 & (v_c_by_4 >> 16))] |
+ wuffs_json__lut_chars[(255 & (v_c_by_4 >> 24))])) {
+ goto label__0__break;
+ }
+ (iop_a_src += 4, wuffs_base__make_empty_struct());
+ if (v_string_length > 65527) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_string_length + 4)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_string_length += 4;
+ }
+ label__0__break:;
v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
v_char = wuffs_json__lut_chars[v_c];
if (v_char == 0) {
@@ -19839,7 +19862,7 @@
}
}
label__string_loop_outer__break:;
- label__0__continue:;
+ label__1__continue:;
while (true) {
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if (a_src && a_src->meta.closed) {
@@ -19849,13 +19872,13 @@
status =
wuffs_base__make_status(wuffs_base__suspension__short_read);
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(11);
- goto label__0__continue;
+ goto label__1__continue;
}
if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
status =
wuffs_base__make_status(wuffs_base__suspension__short_write);
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(12);
- goto label__0__continue;
+ goto label__1__continue;
}
(iop_a_src += 1, wuffs_base__make_empty_struct());
*iop_a_dst++ = wuffs_base__make_token(
@@ -19863,9 +19886,9 @@
<< WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
(((uint64_t)(2)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
(((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__0__break;
+ goto label__1__break;
}
- label__0__break:;
+ label__1__break:;
if (0 == (v_expect & 16)) {
v_expect = 8;
goto label__outer__continue;
@@ -19910,7 +19933,7 @@
<< WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
(((uint64_t)(v_number_length))
<< WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__1__break;
+ goto label__2__break;
}
while (v_number_length > 0) {
v_number_length -= 1;
@@ -19940,7 +19963,7 @@
}
}
}
- label__1__break:;
+ label__2__break:;
goto label__goto_parsed_a_leaf_value__break;
} else if (v_class == 5) {
v_vminor = 2113553;
diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index c2f1efb..7f3f483 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs
@@ -55,6 +55,7 @@
var stack_byte : base.u32[..= (1024 / 32) - 1]
var stack_bit : base.u32[..= 31]
var match : base.u32[..= 2]
+ var c_by_4 : base.u32
var c : base.u8
var backslash : base.u8
var char : base.u8
@@ -190,6 +191,32 @@
string_length = 0
continue.string_loop_outer
}
+
+ // As an optimization, consume non-special ASCII 4 bytes at a time.
+ while args.src.available() > 4,
+ inv args.dst.available() > 0,
+ inv args.src.available() > 0,
+ {
+ c_by_4 = args.src.peek_u32le()
+ if 0x00 <> (lut_chars[0xFF & (c_by_4 >> 0)] |
+ lut_chars[0xFF & (c_by_4 >> 8)] |
+ lut_chars[0xFF & (c_by_4 >> 16)] |
+ lut_chars[0xFF & (c_by_4 >> 24)]) {
+ break
+ }
+ args.src.skip32_fast!(actual: 4, worst_case: 4)
+ if string_length > (0xFFFB - 4) {
+ args.dst.write_fast_token!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length + 4)
+ string_length = 0
+ continue.string_loop_outer
+ }
+ string_length += 4
+ }
+
c = args.src.peek_u8()
char = lut_chars[c]