Roll back 'factor out json.decoder.decode_string'
See the previous commit for the rationale.
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 3d34d19..bd4ff6b 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -6164,7 +6164,6 @@
bool f_end_of_data;
uint32_t p_decode_tokens[1];
- uint32_t p_decode_string[1];
uint32_t p_decode_leading[1];
uint32_t p_decode_comment[1];
uint32_t p_decode_inf_nan[1];
@@ -20928,11 +20927,6 @@
// ---------------- Private Function Prototypes
-static wuffs_base__status //
-wuffs_json__decoder__decode_string(wuffs_json__decoder* self,
- wuffs_base__token_buffer* a_dst,
- wuffs_base__io_buffer* a_src);
-
static uint32_t //
wuffs_json__decoder__decode_number(wuffs_json__decoder* self,
wuffs_base__io_buffer* a_src);
@@ -21130,13 +21124,28 @@
uint32_t v_vminor = 0;
uint32_t v_number_length = 0;
uint32_t v_number_status = 0;
+ uint32_t v_string_length = 0;
uint32_t v_whitespace_length = 0;
uint32_t v_depth = 0;
uint32_t v_stack_byte = 0;
uint32_t v_stack_bit = 0;
uint32_t v_match = 0;
+ uint32_t v_c4 = 0;
uint8_t v_c = 0;
+ uint8_t v_backslash = 0;
+ uint8_t v_char = 0;
uint8_t v_class = 0;
+ uint32_t v_multi_byte_utf8 = 0;
+ uint32_t v_backslash_x_length = 0;
+ uint8_t v_backslash_x_ok = 0;
+ uint32_t v_backslash_x_string = 0;
+ uint8_t v_uni4_ok = 0;
+ uint64_t v_uni4_string = 0;
+ uint32_t v_uni4_value = 0;
+ uint32_t v_uni4_high_surrogate = 0;
+ uint8_t v_uni8_ok = 0;
+ uint64_t v_uni8_string = 0;
+ uint32_t v_uni8_value = 0;
uint32_t v_expect = 0;
uint32_t v_expect_after_value = 0;
@@ -21267,24 +21276,608 @@
(((uint64_t)(1)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
(((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
(iop_a_src += 1, wuffs_base__make_empty_struct());
- if (a_dst) {
- a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
+ label__string_loop_outer__continue:;
+ while (true) {
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ status =
+ wuffs_base__make_status(wuffs_base__suspension__short_write);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4);
+ goto label__string_loop_outer__continue;
+ }
+ v_string_length = 0;
+ label__string_loop_inner__continue:;
+ while (true) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
+ if (v_string_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_string_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ }
+ if (a_src && a_src->meta.closed) {
+ status =
+ wuffs_base__make_status(wuffs_json__error__bad_input);
+ goto exit;
+ }
+ status =
+ wuffs_base__make_status(wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5);
+ v_string_length = 0;
+ goto label__string_loop_outer__continue;
+ }
+ while (((uint64_t)(io2_a_src - iop_a_src)) > 4) {
+ v_c4 = wuffs_base__load_u32le__no_bounds_check(iop_a_src);
+ if (0 != (wuffs_json__lut_chars[(255 & (v_c4 >> 0))] |
+ wuffs_json__lut_chars[(255 & (v_c4 >> 8))] |
+ wuffs_json__lut_chars[(255 & (v_c4 >> 16))] |
+ wuffs_json__lut_chars[(255 & (v_c4 >> 24))])) {
+ goto label__0__break;
+ }
+ (iop_a_src += 4, wuffs_base__make_empty_struct());
+ if (v_string_length > 65527) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_string_length + 4)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_string_length += 4;
+ }
+ label__0__break:;
+ v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
+ v_char = wuffs_json__lut_chars[v_c];
+ if (v_char == 0) {
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ if (v_string_length >= 65531) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(65532))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_string_length += 1;
+ goto label__string_loop_inner__continue;
+ } else if (v_char == 1) {
+ if (v_string_length != 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_string_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ }
+ goto label__string_loop_outer__break;
+ } else if (v_char == 2) {
+ if (v_string_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_string_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ goto label__string_loop_outer__continue;
+ }
+ }
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 2) {
+ if (a_src && a_src->meta.closed) {
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_backslash_escape);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(6);
+ v_string_length = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_c = ((uint8_t)(
+ (wuffs_base__load_u16le__no_bounds_check(iop_a_src) >> 8)));
+ v_backslash = wuffs_json__lut_backslashes[v_c];
+ if ((v_backslash & 128) != 0) {
+ (iop_a_src += 2, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(
+ (6291456 | ((uint32_t)((v_backslash & 127))))))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(2)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ } else if (v_backslash != 0) {
+ if (self->private_impl.f_quirk_enabled_allow_backslash_etc[(
+ v_backslash & 7)]) {
+ (iop_a_src += 2, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(
+ (6291456 |
+ ((uint32_t)(wuffs_json__lut_quirky_backslashes[(
+ v_backslash & 7)])))))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(2)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ }
+ } else if (v_c == 117) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 6) {
+ if (a_src && a_src->meta.closed) {
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_backslash_escape);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(7);
+ v_string_length = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_uni4_string =
+ (((uint64_t)(wuffs_base__load_u48le__no_bounds_check(
+ iop_a_src))) >>
+ 16);
+ v_uni4_value = 0;
+ v_uni4_ok = 128;
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 0))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 12);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 8))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 8);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 16))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 4);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 24))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 0);
+ if (v_uni4_ok == 0) {
+ } else if ((v_uni4_value < 55296) || (57343 < v_uni4_value)) {
+ (iop_a_src += 6, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)((6291456 | v_uni4_value)))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(6)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ } else if (v_uni4_value >= 56320) {
+ } else {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 12) {
+ if (a_src && a_src->meta.closed) {
+ if (self->private_impl
+ .f_quirk_enabled_replace_invalid_unicode) {
+ (iop_a_src += 6, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(6356989))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3))
+ << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(6))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ }
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_backslash_escape);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(8);
+ v_string_length = 0;
+ v_uni4_value = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_uni4_string = (wuffs_base__load_u64le__no_bounds_check(
+ iop_a_src + 4) >>
+ 16);
+ if (((255 & (v_uni4_string >> 0)) != 92) ||
+ ((255 & (v_uni4_string >> 8)) != 117)) {
+ v_uni4_high_surrogate = 0;
+ v_uni4_value = 0;
+ v_uni4_ok = 0;
+ } else {
+ v_uni4_high_surrogate =
+ (65536 + ((v_uni4_value - 55296) << 10));
+ v_uni4_value = 0;
+ v_uni4_ok = 128;
+ v_uni4_string >>= 16;
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 0))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 12);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 8))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 8);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 16))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 4);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni4_string >> 24))];
+ v_uni4_ok &= v_c;
+ v_uni4_value |= (((uint32_t)((v_c & 15))) << 0);
+ }
+ if ((v_uni4_ok != 0) && (56320 <= v_uni4_value) &&
+ (v_uni4_value <= 57343)) {
+ v_uni4_value -= 56320;
+ (iop_a_src += 12, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)((6291456 | v_uni4_high_surrogate |
+ v_uni4_value)))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(12))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ }
+ }
+ if (self->private_impl
+ .f_quirk_enabled_replace_invalid_unicode) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 6) {
+ status = wuffs_base__make_status(
+ wuffs_json__error__internal_error_inconsistent_i_o);
+ goto exit;
+ }
+ (iop_a_src += 6, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(6356989))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(6)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ }
+ } else if ((v_c == 85) &&
+ self->private_impl
+ .f_quirk_enabled_allow_backslash_capital_u) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 10) {
+ if (a_src && a_src->meta.closed) {
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_backslash_escape);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(9);
+ v_string_length = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_uni8_string =
+ wuffs_base__load_u64le__no_bounds_check(iop_a_src + 2);
+ v_uni8_value = 0;
+ v_uni8_ok = 128;
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 0))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 28);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 8))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 24);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 16))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 20);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 24))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 16);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 32))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 12);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 40))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 8);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 48))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 4);
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_uni8_string >> 56))];
+ v_uni8_ok &= v_c;
+ v_uni8_value |= (((uint32_t)((v_c & 15))) << 0);
+ if (v_uni8_ok == 0) {
+ } else if ((v_uni8_value < 55296) ||
+ ((57343 < v_uni8_value) &&
+ (v_uni8_value <= 1114111))) {
+ (iop_a_src += 10, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)((6291456 | (v_uni8_value & 2097151))))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(10)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ } else if (self->private_impl
+ .f_quirk_enabled_replace_invalid_unicode) {
+ (iop_a_src += 10, wuffs_base__make_empty_struct());
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(6356989))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(10)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ }
+ } else if ((v_c == 120) &&
+ self->private_impl
+ .f_quirk_enabled_allow_backslash_x) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 4) {
+ if (a_src && a_src->meta.closed) {
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_backslash_escape);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(10);
+ v_string_length = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_backslash_x_length = 0;
+ while ((v_backslash_x_length <= 65531) &&
+ (((uint64_t)(io2_a_src - iop_a_src)) >= 4)) {
+ v_backslash_x_string =
+ wuffs_base__load_u32le__no_bounds_check(iop_a_src);
+ v_backslash_x_ok = 128;
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_backslash_x_string >> 16))];
+ v_backslash_x_ok &= v_c;
+ v_c = wuffs_json__lut_hexadecimal_digits[(
+ 255 & (v_backslash_x_string >> 24))];
+ v_backslash_x_ok &= v_c;
+ if ((v_backslash_x_ok == 0) ||
+ ((v_backslash_x_string & 65535) != 30812)) {
+ goto label__1__break;
+ }
+ (iop_a_src += 4, wuffs_base__make_empty_struct());
+ v_backslash_x_length += 4;
+ }
+ label__1__break:;
+ if (v_backslash_x_length == 0) {
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_backslash_escape);
+ goto exit;
+ }
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194432))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_backslash_x_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ goto label__string_loop_outer__continue;
+ }
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_backslash_escape);
+ goto exit;
+ } else if (v_char == 3) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 2) {
+ if (v_string_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_string_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ goto label__string_loop_outer__continue;
+ }
+ }
+ if (a_src && a_src->meta.closed) {
+ if (self->private_impl
+ .f_quirk_enabled_replace_invalid_unicode) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(6356989))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(1))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ goto label__string_loop_outer__continue;
+ }
+ status =
+ wuffs_base__make_status(wuffs_json__error__bad_utf_8);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(11);
+ v_string_length = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_multi_byte_utf8 = ((uint32_t)(
+ wuffs_base__load_u16le__no_bounds_check(iop_a_src)));
+ if ((v_multi_byte_utf8 & 49152) == 32768) {
+ v_multi_byte_utf8 = ((1984 & (v_multi_byte_utf8 << 6)) |
+ (63 & (v_multi_byte_utf8 >> 8)));
+ (iop_a_src += 2, wuffs_base__make_empty_struct());
+ if (v_string_length >= 65528) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_string_length + 2)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_string_length += 2;
+ goto label__string_loop_inner__continue;
+ }
+ } else if (v_char == 4) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 3) {
+ if (v_string_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_string_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ goto label__string_loop_outer__continue;
+ }
+ }
+ if (a_src && a_src->meta.closed) {
+ if (self->private_impl
+ .f_quirk_enabled_replace_invalid_unicode) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(6356989))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(1))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ goto label__string_loop_outer__continue;
+ }
+ status =
+ wuffs_base__make_status(wuffs_json__error__bad_utf_8);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(12);
+ v_string_length = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_multi_byte_utf8 = ((uint32_t)(
+ wuffs_base__load_u24le__no_bounds_check(iop_a_src)));
+ if ((v_multi_byte_utf8 & 12632064) == 8421376) {
+ v_multi_byte_utf8 = ((61440 & (v_multi_byte_utf8 << 12)) |
+ (4032 & (v_multi_byte_utf8 >> 2)) |
+ (63 & (v_multi_byte_utf8 >> 16)));
+ if ((2047 < v_multi_byte_utf8) &&
+ ((v_multi_byte_utf8 < 55296) ||
+ (57343 < v_multi_byte_utf8))) {
+ (iop_a_src += 3, wuffs_base__make_empty_struct());
+ if (v_string_length >= 65528) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_string_length + 3)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_string_length += 3;
+ goto label__string_loop_inner__continue;
+ }
+ }
+ } else if (v_char == 5) {
+ if (((uint64_t)(io2_a_src - iop_a_src)) < 4) {
+ if (v_string_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_string_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ goto label__string_loop_outer__continue;
+ }
+ }
+ if (a_src && a_src->meta.closed) {
+ if (self->private_impl
+ .f_quirk_enabled_replace_invalid_unicode) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(6356989))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(1))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ goto label__string_loop_outer__continue;
+ }
+ status =
+ wuffs_base__make_status(wuffs_json__error__bad_utf_8);
+ goto exit;
+ }
+ status = wuffs_base__make_status(
+ wuffs_base__suspension__short_read);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(13);
+ v_string_length = 0;
+ v_char = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_multi_byte_utf8 =
+ wuffs_base__load_u32le__no_bounds_check(iop_a_src);
+ if ((v_multi_byte_utf8 & 3233857536) == 2155905024) {
+ v_multi_byte_utf8 = ((1835008 & (v_multi_byte_utf8 << 18)) |
+ (258048 & (v_multi_byte_utf8 << 4)) |
+ (4032 & (v_multi_byte_utf8 >> 10)) |
+ (63 & (v_multi_byte_utf8 >> 24)));
+ if ((65535 < v_multi_byte_utf8) &&
+ (v_multi_byte_utf8 <= 1114111)) {
+ (iop_a_src += 4, wuffs_base__make_empty_struct());
+ if (v_string_length >= 65528) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)((v_string_length + 4)))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ goto label__string_loop_outer__continue;
+ }
+ v_string_length += 4;
+ goto label__string_loop_inner__continue;
+ }
+ }
+ }
+ if (v_string_length > 0) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4194337))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(v_string_length))
+ << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ v_string_length = 0;
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
+ goto label__string_loop_outer__continue;
+ }
+ }
+ if (v_char == 128) {
+ status = wuffs_base__make_status(
+ wuffs_json__error__bad_c0_control_code);
+ goto exit;
+ }
+ if (self->private_impl.f_quirk_enabled_replace_invalid_unicode) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(6356989))
+ << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
+ (((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ (iop_a_src += 1, wuffs_base__make_empty_struct());
+ goto label__string_loop_outer__continue;
+ }
+ status = wuffs_base__make_status(wuffs_json__error__bad_utf_8);
+ goto exit;
+ }
}
- if (a_src) {
- a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
- }
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(4);
- status = wuffs_json__decoder__decode_string(self, a_dst, a_src);
- if (a_dst) {
- iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
- }
- if (a_src) {
- iop_a_src = a_src->data.ptr + a_src->meta.ri;
- }
- if (status.repr) {
- goto suspend;
- }
- label__0__continue:;
+ label__string_loop_outer__break:;
+ label__2__continue:;
while (true) {
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
if (a_src && a_src->meta.closed) {
@@ -21293,14 +21886,14 @@
}
status =
wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5);
- goto label__0__continue;
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(14);
+ goto label__2__continue;
}
if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
status =
wuffs_base__make_status(wuffs_base__suspension__short_write);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(6);
- goto label__0__continue;
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(15);
+ goto label__2__continue;
}
(iop_a_src += 1, wuffs_base__make_empty_struct());
*iop_a_dst++ = wuffs_base__make_token(
@@ -21308,9 +21901,9 @@
<< WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
(((uint64_t)(2)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
(((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__0__break;
+ goto label__2__break;
}
- label__0__break:;
+ label__2__break:;
if (0 == (v_expect & 16)) {
v_expect = 4104;
goto label__outer__continue;
@@ -21363,7 +21956,7 @@
<< WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
(((uint64_t)(v_number_length))
<< WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__1__break;
+ goto label__3__break;
}
while (v_number_length > 0) {
v_number_length -= 1;
@@ -21383,7 +21976,7 @@
if (a_src) {
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
}
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(7);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(16);
status =
wuffs_json__decoder__decode_inf_nan(self, a_dst, a_src);
if (a_dst) {
@@ -21395,7 +21988,7 @@
if (status.repr) {
goto suspend;
}
- goto label__1__break;
+ goto label__3__break;
}
status = wuffs_base__make_status(wuffs_json__error__bad_input);
goto exit;
@@ -21406,15 +21999,15 @@
} else {
status =
wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(8);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(17);
while (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
status = wuffs_base__make_status(
wuffs_base__suspension__short_write);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(9);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(18);
}
}
}
- label__1__break:;
+ label__3__break:;
goto label__goto_parsed_a_leaf_value__break;
} else if (v_class == 5) {
v_vminor = 2113553;
@@ -21544,7 +22137,7 @@
} else if (v_match == 1) {
status =
wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(10);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(19);
goto label__outer__continue;
}
} else if (v_class == 10) {
@@ -21565,7 +22158,7 @@
} else if (v_match == 1) {
status =
wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(11);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(20);
goto label__outer__continue;
}
} else if (v_class == 11) {
@@ -21586,7 +22179,7 @@
} else if (v_match == 1) {
status =
wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(12);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(21);
goto label__outer__continue;
}
if (self->private_impl.f_quirk_enabled_allow_inf_nan_numbers) {
@@ -21596,7 +22189,7 @@
if (a_src) {
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
}
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(13);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(22);
status = wuffs_json__decoder__decode_inf_nan(self, a_dst, a_src);
if (a_dst) {
iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
@@ -21618,7 +22211,7 @@
if (a_src) {
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
}
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(14);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(23);
status = wuffs_json__decoder__decode_comment(self, a_dst, a_src);
if (a_dst) {
iop_a_dst = a_dst->data.ptr + a_dst->meta.wi;
@@ -21649,7 +22242,7 @@
if (a_src) {
a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
}
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(15);
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(24);
status =
wuffs_json__decoder__decode_trailing_new_line(self, a_dst, a_src);
if (a_dst) {
@@ -21696,659 +22289,6 @@
return status;
}
-// -------- func json.decoder.decode_string
-
-static wuffs_base__status //
-wuffs_json__decoder__decode_string(wuffs_json__decoder* self,
- wuffs_base__token_buffer* a_dst,
- wuffs_base__io_buffer* a_src) {
- wuffs_base__status status = wuffs_base__make_status(NULL);
-
- uint32_t v_string_length = 0;
- uint32_t v_c4 = 0;
- uint8_t v_c = 0;
- uint8_t v_backslash = 0;
- uint8_t v_char = 0;
- uint32_t v_multi_byte_utf8 = 0;
- uint32_t v_backslash_x_length = 0;
- uint8_t v_backslash_x_ok = 0;
- uint32_t v_backslash_x_string = 0;
- uint8_t v_uni4_ok = 0;
- uint64_t v_uni4_string = 0;
- uint32_t v_uni4_value = 0;
- uint32_t v_uni4_high_surrogate = 0;
- uint8_t v_uni8_ok = 0;
- uint64_t v_uni8_string = 0;
- uint32_t v_uni8_value = 0;
-
- wuffs_base__token* iop_a_dst = NULL;
- wuffs_base__token* io0_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
- wuffs_base__token* io1_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
- wuffs_base__token* io2_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
- if (a_dst) {
- io0_a_dst = a_dst->data.ptr;
- io1_a_dst = io0_a_dst + a_dst->meta.wi;
- iop_a_dst = io1_a_dst;
- io2_a_dst = io0_a_dst + a_dst->data.len;
- if (a_dst->meta.closed) {
- io2_a_dst = iop_a_dst;
- }
- }
- uint8_t* iop_a_src = NULL;
- uint8_t* io0_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
- uint8_t* io1_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
- uint8_t* io2_a_src WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
- if (a_src) {
- io0_a_src = a_src->data.ptr;
- io1_a_src = io0_a_src + a_src->meta.ri;
- iop_a_src = io1_a_src;
- io2_a_src = io0_a_src + a_src->meta.wi;
- }
-
- uint32_t coro_susp_point = self->private_impl.p_decode_string[0];
- switch (coro_susp_point) {
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
-
- label__string_loop_outer__continue:;
- while (true) {
- if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
- status = wuffs_base__make_status(wuffs_base__suspension__short_write);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(1);
- goto label__string_loop_outer__continue;
- }
- v_string_length = 0;
- label__string_loop_inner__continue:;
- while (true) {
- if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
- if (v_string_length > 0) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_string_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- }
- if (a_src && a_src->meta.closed) {
- status = wuffs_base__make_status(wuffs_json__error__bad_input);
- goto exit;
- }
- status = wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(2);
- v_string_length = 0;
- goto label__string_loop_outer__continue;
- }
- while (((uint64_t)(io2_a_src - iop_a_src)) > 4) {
- v_c4 = wuffs_base__load_u32le__no_bounds_check(iop_a_src);
- if (0 != (wuffs_json__lut_chars[(255 & (v_c4 >> 0))] |
- wuffs_json__lut_chars[(255 & (v_c4 >> 8))] |
- wuffs_json__lut_chars[(255 & (v_c4 >> 16))] |
- wuffs_json__lut_chars[(255 & (v_c4 >> 24))])) {
- goto label__0__break;
- }
- (iop_a_src += 4, wuffs_base__make_empty_struct());
- if (v_string_length > 65527) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)((v_string_length + 4)))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- goto label__string_loop_outer__continue;
- }
- v_string_length += 4;
- }
- label__0__break:;
- v_c = wuffs_base__load_u8be__no_bounds_check(iop_a_src);
- v_char = wuffs_json__lut_chars[v_c];
- if (v_char == 0) {
- (iop_a_src += 1, wuffs_base__make_empty_struct());
- if (v_string_length >= 65531) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(65532)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- goto label__string_loop_outer__continue;
- }
- v_string_length += 1;
- goto label__string_loop_inner__continue;
- } else if (v_char == 1) {
- if (v_string_length != 0) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_string_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- }
- goto label__string_loop_outer__break;
- } else if (v_char == 2) {
- if (v_string_length > 0) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_string_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
- goto label__string_loop_outer__continue;
- }
- }
- if (((uint64_t)(io2_a_src - iop_a_src)) < 2) {
- if (a_src && a_src->meta.closed) {
- status = wuffs_base__make_status(
- wuffs_json__error__bad_backslash_escape);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(3);
- v_string_length = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_c = ((uint8_t)(
- (wuffs_base__load_u16le__no_bounds_check(iop_a_src) >> 8)));
- v_backslash = wuffs_json__lut_backslashes[v_c];
- if ((v_backslash & 128) != 0) {
- (iop_a_src += 2, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)((6291456 | ((uint32_t)((v_backslash & 127))))))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(2)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- } else if (v_backslash != 0) {
- if (self->private_impl
- .f_quirk_enabled_allow_backslash_etc[(v_backslash & 7)]) {
- (iop_a_src += 2, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)((6291456 |
- ((uint32_t)(wuffs_json__lut_quirky_backslashes[(
- v_backslash & 7)])))))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(2)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- }
- } else if (v_c == 117) {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 6) {
- if (a_src && a_src->meta.closed) {
- status = wuffs_base__make_status(
- wuffs_json__error__bad_backslash_escape);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(4);
- v_string_length = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_uni4_string =
- (((uint64_t)(
- wuffs_base__load_u48le__no_bounds_check(iop_a_src))) >>
- 16);
- v_uni4_value = 0;
- v_uni4_ok = 128;
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni4_string >> 0))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 12);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni4_string >> 8))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 8);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni4_string >> 16))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 4);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni4_string >> 24))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 0);
- if (v_uni4_ok == 0) {
- } else if ((v_uni4_value < 55296) || (57343 < v_uni4_value)) {
- (iop_a_src += 6, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)((6291456 | v_uni4_value)))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(6)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- } else if (v_uni4_value >= 56320) {
- } else {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 12) {
- if (a_src && a_src->meta.closed) {
- if (self->private_impl
- .f_quirk_enabled_replace_invalid_unicode) {
- (iop_a_src += 6, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(6356989))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(6)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- }
- status = wuffs_base__make_status(
- wuffs_json__error__bad_backslash_escape);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(5);
- v_string_length = 0;
- v_uni4_value = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_uni4_string =
- (wuffs_base__load_u64le__no_bounds_check(iop_a_src + 4) >>
- 16);
- if (((255 & (v_uni4_string >> 0)) != 92) ||
- ((255 & (v_uni4_string >> 8)) != 117)) {
- v_uni4_high_surrogate = 0;
- v_uni4_value = 0;
- v_uni4_ok = 0;
- } else {
- v_uni4_high_surrogate =
- (65536 + ((v_uni4_value - 55296) << 10));
- v_uni4_value = 0;
- v_uni4_ok = 128;
- v_uni4_string >>= 16;
- v_c = wuffs_json__lut_hexadecimal_digits[(
- 255 & (v_uni4_string >> 0))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 12);
- v_c = wuffs_json__lut_hexadecimal_digits[(
- 255 & (v_uni4_string >> 8))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 8);
- v_c = wuffs_json__lut_hexadecimal_digits[(
- 255 & (v_uni4_string >> 16))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 4);
- v_c = wuffs_json__lut_hexadecimal_digits[(
- 255 & (v_uni4_string >> 24))];
- v_uni4_ok &= v_c;
- v_uni4_value |= (((uint32_t)((v_c & 15))) << 0);
- }
- if ((v_uni4_ok != 0) && (56320 <= v_uni4_value) &&
- (v_uni4_value <= 57343)) {
- v_uni4_value -= 56320;
- (iop_a_src += 12, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(
- (6291456 | v_uni4_high_surrogate | v_uni4_value)))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(12)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- }
- }
- if (self->private_impl.f_quirk_enabled_replace_invalid_unicode) {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 6) {
- status = wuffs_base__make_status(
- wuffs_json__error__internal_error_inconsistent_i_o);
- goto exit;
- }
- (iop_a_src += 6, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(6356989))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(6)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- }
- } else if ((v_c == 85) &&
- self->private_impl
- .f_quirk_enabled_allow_backslash_capital_u) {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 10) {
- if (a_src && a_src->meta.closed) {
- status = wuffs_base__make_status(
- wuffs_json__error__bad_backslash_escape);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(6);
- v_string_length = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_uni8_string =
- wuffs_base__load_u64le__no_bounds_check(iop_a_src + 2);
- v_uni8_value = 0;
- v_uni8_ok = 128;
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 0))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 28);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 8))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 24);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 16))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 20);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 24))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 16);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 32))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 12);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 40))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 8);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 48))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 4);
- v_c = wuffs_json__lut_hexadecimal_digits[(255 &
- (v_uni8_string >> 56))];
- v_uni8_ok &= v_c;
- v_uni8_value |= (((uint32_t)((v_c & 15))) << 0);
- if (v_uni8_ok == 0) {
- } else if ((v_uni8_value < 55296) ||
- ((57343 < v_uni8_value) && (v_uni8_value <= 1114111))) {
- (iop_a_src += 10, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)((6291456 | (v_uni8_value & 2097151))))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(10)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- } else if (self->private_impl
- .f_quirk_enabled_replace_invalid_unicode) {
- (iop_a_src += 10, wuffs_base__make_empty_struct());
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(6356989))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(10)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- }
- } else if ((v_c == 120) &&
- self->private_impl.f_quirk_enabled_allow_backslash_x) {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 4) {
- if (a_src && a_src->meta.closed) {
- status = wuffs_base__make_status(
- wuffs_json__error__bad_backslash_escape);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(7);
- v_string_length = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_backslash_x_length = 0;
- while ((v_backslash_x_length <= 65531) &&
- (((uint64_t)(io2_a_src - iop_a_src)) >= 4)) {
- v_backslash_x_string =
- wuffs_base__load_u32le__no_bounds_check(iop_a_src);
- v_backslash_x_ok = 128;
- v_c = wuffs_json__lut_hexadecimal_digits[(
- 255 & (v_backslash_x_string >> 16))];
- v_backslash_x_ok &= v_c;
- v_c = wuffs_json__lut_hexadecimal_digits[(
- 255 & (v_backslash_x_string >> 24))];
- v_backslash_x_ok &= v_c;
- if ((v_backslash_x_ok == 0) ||
- ((v_backslash_x_string & 65535) != 30812)) {
- goto label__1__break;
- }
- (iop_a_src += 4, wuffs_base__make_empty_struct());
- v_backslash_x_length += 4;
- }
- label__1__break:;
- if (v_backslash_x_length == 0) {
- status = wuffs_base__make_status(
- wuffs_json__error__bad_backslash_escape);
- goto exit;
- }
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194432))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_backslash_x_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- goto label__string_loop_outer__continue;
- }
- status =
- wuffs_base__make_status(wuffs_json__error__bad_backslash_escape);
- goto exit;
- } else if (v_char == 3) {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 2) {
- if (v_string_length > 0) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_string_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
- goto label__string_loop_outer__continue;
- }
- }
- if (a_src && a_src->meta.closed) {
- if (self->private_impl.f_quirk_enabled_replace_invalid_unicode) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(6356989))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- (iop_a_src += 1, wuffs_base__make_empty_struct());
- goto label__string_loop_outer__continue;
- }
- status = wuffs_base__make_status(wuffs_json__error__bad_utf_8);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(8);
- v_string_length = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_multi_byte_utf8 =
- ((uint32_t)(wuffs_base__load_u16le__no_bounds_check(iop_a_src)));
- if ((v_multi_byte_utf8 & 49152) == 32768) {
- v_multi_byte_utf8 = ((1984 & (v_multi_byte_utf8 << 6)) |
- (63 & (v_multi_byte_utf8 >> 8)));
- (iop_a_src += 2, wuffs_base__make_empty_struct());
- if (v_string_length >= 65528) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)((v_string_length + 2)))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- goto label__string_loop_outer__continue;
- }
- v_string_length += 2;
- goto label__string_loop_inner__continue;
- }
- } else if (v_char == 4) {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 3) {
- if (v_string_length > 0) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_string_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
- goto label__string_loop_outer__continue;
- }
- }
- if (a_src && a_src->meta.closed) {
- if (self->private_impl.f_quirk_enabled_replace_invalid_unicode) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(6356989))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- (iop_a_src += 1, wuffs_base__make_empty_struct());
- goto label__string_loop_outer__continue;
- }
- status = wuffs_base__make_status(wuffs_json__error__bad_utf_8);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(9);
- v_string_length = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_multi_byte_utf8 =
- ((uint32_t)(wuffs_base__load_u24le__no_bounds_check(iop_a_src)));
- if ((v_multi_byte_utf8 & 12632064) == 8421376) {
- v_multi_byte_utf8 = ((61440 & (v_multi_byte_utf8 << 12)) |
- (4032 & (v_multi_byte_utf8 >> 2)) |
- (63 & (v_multi_byte_utf8 >> 16)));
- if ((2047 < v_multi_byte_utf8) &&
- ((v_multi_byte_utf8 < 55296) || (57343 < v_multi_byte_utf8))) {
- (iop_a_src += 3, wuffs_base__make_empty_struct());
- if (v_string_length >= 65528) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)((v_string_length + 3)))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- goto label__string_loop_outer__continue;
- }
- v_string_length += 3;
- goto label__string_loop_inner__continue;
- }
- }
- } else if (v_char == 5) {
- if (((uint64_t)(io2_a_src - iop_a_src)) < 4) {
- if (v_string_length > 0) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_string_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
- goto label__string_loop_outer__continue;
- }
- }
- if (a_src && a_src->meta.closed) {
- if (self->private_impl.f_quirk_enabled_replace_invalid_unicode) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(6356989))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- (iop_a_src += 1, wuffs_base__make_empty_struct());
- goto label__string_loop_outer__continue;
- }
- status = wuffs_base__make_status(wuffs_json__error__bad_utf_8);
- goto exit;
- }
- status =
- wuffs_base__make_status(wuffs_base__suspension__short_read);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(10);
- v_string_length = 0;
- v_char = 0;
- goto label__string_loop_outer__continue;
- }
- v_multi_byte_utf8 =
- wuffs_base__load_u32le__no_bounds_check(iop_a_src);
- if ((v_multi_byte_utf8 & 3233857536) == 2155905024) {
- v_multi_byte_utf8 = ((1835008 & (v_multi_byte_utf8 << 18)) |
- (258048 & (v_multi_byte_utf8 << 4)) |
- (4032 & (v_multi_byte_utf8 >> 10)) |
- (63 & (v_multi_byte_utf8 >> 24)));
- if ((65535 < v_multi_byte_utf8) && (v_multi_byte_utf8 <= 1114111)) {
- (iop_a_src += 4, wuffs_base__make_empty_struct());
- if (v_string_length >= 65528) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337))
- << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)((v_string_length + 4)))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- goto label__string_loop_outer__continue;
- }
- v_string_length += 4;
- goto label__string_loop_inner__continue;
- }
- }
- }
- if (v_string_length > 0) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(4194337)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(v_string_length))
- << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- v_string_length = 0;
- if (((uint64_t)(io2_a_dst - iop_a_dst)) <= 0) {
- goto label__string_loop_outer__continue;
- }
- }
- if (v_char == 128) {
- status =
- wuffs_base__make_status(wuffs_json__error__bad_c0_control_code);
- goto exit;
- }
- if (self->private_impl.f_quirk_enabled_replace_invalid_unicode) {
- *iop_a_dst++ = wuffs_base__make_token(
- (((uint64_t)(6356989)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)(3)) << WUFFS_BASE__TOKEN__LINK__SHIFT) |
- (((uint64_t)(1)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
- (iop_a_src += 1, wuffs_base__make_empty_struct());
- goto label__string_loop_outer__continue;
- }
- status = wuffs_base__make_status(wuffs_json__error__bad_utf_8);
- goto exit;
- }
- }
- label__string_loop_outer__break:;
-
- goto ok;
- ok:
- self->private_impl.p_decode_string[0] = 0;
- goto exit;
- }
-
- goto suspend;
-suspend:
- self->private_impl.p_decode_string[0] =
- wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
-
- goto exit;
-exit:
- if (a_dst) {
- a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
- }
- if (a_src) {
- a_src->meta.ri = ((size_t)(iop_a_src - a_src->data.ptr));
- }
-
- return status;
-}
-
// -------- func json.decoder.decode_number
static uint32_t //
diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index 05e6f4e..7ad5da4 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs
@@ -110,13 +110,31 @@
var vminor : base.u32[..= 0xFF_FFFF]
var number_length : base.u32[..= 0x3FF]
var number_status : base.u32[..= 0x3]
+ var string_length : base.u32[..= 0xFFFB]
var whitespace_length : base.u32[..= 0xFFFE]
var depth : base.u32[..= 1024]
var stack_byte : base.u32[..= (1024 / 32) - 1]
var stack_bit : base.u32[..= 31]
var match : base.u32[..= 2]
+ var c4 : base.u32
var c : base.u8
+ var backslash : base.u8
+ var char : base.u8
var class : base.u8[..= 0x0F]
+ var multi_byte_utf8 : base.u32
+
+ var backslash_x_length : base.u32[..= 0xFFFF]
+ var backslash_x_ok : base.u8
+ var backslash_x_string : base.u32
+
+ var uni4_ok : base.u8
+ var uni4_string : base.u64
+ var uni4_value : base.u32[..= 0xFFFF]
+ var uni4_high_surrogate : base.u32[..= 0x10_FC00]
+
+ var uni8_ok : base.u8
+ var uni8_string : base.u64
+ var uni8_value : base.u32[..= 0xFFFF_FFFF]
// expect is a bitmask of what the next character class can be.
//
@@ -224,7 +242,558 @@
length: 1)
args.src.skip32_fast!(actual: 1, worst_case: 1)
- this.decode_string?(dst: args.dst, src: args.src)
+ while.string_loop_outer true {
+ if args.dst.available() <= 0 {
+ yield? base."$short write"
+ continue.string_loop_outer
+ }
+
+ string_length = 0
+ while.string_loop_inner true,
+ pre args.dst.available() > 0,
+ {
+ if args.src.available() <= 0 {
+ if string_length > 0 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length)
+ string_length = 0
+ }
+ if args.src.is_closed() {
+ return "#bad input"
+ }
+ yield? base."$short read"
+ string_length = 0
+ continue.string_loop_outer
+ }
+
+ // As an optimization, consume non-special ASCII 4 bytes at
+ // a time.
+ while args.src.available() > 4,
+ inv args.dst.available() > 0,
+ inv args.src.available() > 0,
+ {
+ c4 = args.src.peek_u32le()
+ if 0x00 <> (lut_chars[0xFF & (c4 >> 0)] |
+ lut_chars[0xFF & (c4 >> 8)] |
+ lut_chars[0xFF & (c4 >> 16)] |
+ lut_chars[0xFF & (c4 >> 24)]) {
+ break
+ }
+ args.src.skip32_fast!(actual: 4, worst_case: 4)
+ if string_length > (0xFFFB - 4) {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length + 4)
+ string_length = 0
+ continue.string_loop_outer
+ }
+ string_length += 4
+ } endwhile
+
+ c = args.src.peek_u8()
+ char = lut_chars[c]
+
+ if char == 0x00 { // Non-special ASCII.
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ if string_length >= 0xFFFB {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: 0xFFFC)
+ string_length = 0
+ continue.string_loop_outer
+ }
+ string_length += 1
+ continue.string_loop_inner
+
+ } else if char == 0x01 { // '"'
+ if string_length <> 0 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length)
+ string_length = 0
+ }
+ break.string_loop_outer
+
+ } else if char == 0x02 { // '\\'.
+ if string_length > 0 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length)
+ string_length = 0
+ if args.dst.available() <= 0 {
+ continue.string_loop_outer
+ }
+ }
+ assert args.dst.available() > 0
+
+ if args.src.available() < 2 {
+ if args.src.is_closed() {
+ return "#bad backslash-escape"
+ }
+ yield? base."$short read"
+ string_length = 0
+ char = 0
+ continue.string_loop_outer
+ }
+ c = (args.src.peek_u16le() >> 8) as base.u8
+ backslash = lut_backslashes[c]
+ if (backslash & 0x80) <> 0 {
+ args.src.skip32_fast!(actual: 2, worst_case: 2)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_0000 | ((backslash & 0x7F) as base.u32),
+ link: 0x3,
+ length: 2)
+ continue.string_loop_outer
+
+ } else if backslash <> 0 {
+ if this.quirk_enabled_allow_backslash_etc[backslash & 7] {
+ args.src.skip32_fast!(actual: 2, worst_case: 2)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_0000 | (lut_quirky_backslashes[backslash & 7] as base.u32),
+ link: 0x3,
+ length: 2)
+ continue.string_loop_outer
+ }
+
+ } else if c == 'u' {
+ // -------- BEGIN backslash-u.
+ if args.src.available() < 6 {
+ if args.src.is_closed() {
+ return "#bad backslash-escape"
+ }
+ yield? base."$short read"
+ string_length = 0
+ char = 0
+ continue.string_loop_outer
+ }
+
+ uni4_string = args.src.peek_u48le_as_u64() >> 16
+ uni4_value = 0
+ uni4_ok = 0x80
+
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 12
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 8
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 4
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 0
+
+ if uni4_ok == 0 {
+ // It wasn't 4 hexadecimal digits. No-op (and
+ // fall through to "#bad backslash-escape").
+
+ } else if (uni4_value < 0xD800) or (0xDFFF < uni4_value) {
+ // Not a Unicode surrogate. We're good.
+ args.src.skip32_fast!(actual: 6, worst_case: 6)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_0000 | uni4_value,
+ link: 0x3,
+ length: 6)
+ continue.string_loop_outer
+
+ } else if uni4_value >= 0xDC00 {
+ // Low surrogate. No-op (and fall through to
+ // "#bad backslash-escape").
+
+ } else {
+ // High surrogate, which needs to be followed
+ // by a "\\u1234" low surrogate. We've already
+ // peeked 6 bytes for the high surrogate. We
+ // need 12 in total: another 8 bytes at an
+ // offset of 4.
+ if args.src.available() < 12 {
+ if args.src.is_closed() {
+ if this.quirk_enabled_replace_invalid_unicode {
+ args.src.skip32_fast!(actual: 6, worst_case: 6)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_FFFD,
+ link: 0x3,
+ length: 6)
+ continue.string_loop_outer
+ }
+ return "#bad backslash-escape"
+ }
+ yield? base."$short read"
+ string_length = 0
+ uni4_value = 0
+ char = 0
+ continue.string_loop_outer
+ }
+ uni4_string = args.src.peek_u64le_at(offset: 4) >> 16
+
+ // Look for the low surrogate's "\\u".
+ if ((0xFF & (uni4_string >> 0)) <> '\\') or
+ ((0xFF & (uni4_string >> 8)) <> 'u') {
+ uni4_high_surrogate = 0
+ uni4_value = 0
+ uni4_ok = 0
+ } else {
+ uni4_high_surrogate =
+ 0x1_0000 + ((uni4_value - 0xD800) << 10)
+ uni4_value = 0
+ uni4_ok = 0x80
+ uni4_string >>= 16
+
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 12
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 8
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 4
+ c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
+ uni4_ok &= c
+ uni4_value |= ((c & 0x0F) as base.u32) << 0
+ }
+
+ if (uni4_ok <> 0) and
+ (0xDC00 <= uni4_value) and (uni4_value <= 0xDFFF) {
+
+ // Emit a single token for the surrogate
+ // pair.
+ uni4_value -= 0xDC00
+ args.src.skip32_fast!(actual: 12, worst_case: 12)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_0000 | uni4_high_surrogate | uni4_value,
+ link: 0x3,
+ length: 12)
+ continue.string_loop_outer
+ }
+ }
+
+ if this.quirk_enabled_replace_invalid_unicode {
+ if args.src.available() < 6 {
+ return "#internal error: inconsistent I/O"
+ }
+ args.src.skip32_fast!(actual: 6, worst_case: 6)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_FFFD,
+ link: 0x3,
+ length: 6)
+ continue.string_loop_outer
+ }
+ // -------- END backslash-u.
+
+ } else if (c == 'U') and
+ this.quirk_enabled_allow_backslash_capital_u {
+ // -------- BEGIN backslash-capital-u.
+ if args.src.available() < 10 {
+ if args.src.is_closed() {
+ return "#bad backslash-escape"
+ }
+ yield? base."$short read"
+ string_length = 0
+ char = 0
+ continue.string_loop_outer
+ }
+ uni8_string = args.src.peek_u64le_at(offset: 2)
+ uni8_value = 0
+ uni8_ok = 0x80
+
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 0)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 28
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 8)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 24
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 16)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 20
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 24)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 16
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 32)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 12
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 40)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 8
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 48)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 4
+ c = lut_hexadecimal_digits[0xFF & (uni8_string >> 56)]
+ uni8_ok &= c
+ uni8_value |= ((c & 0x0F) as base.u32) << 0
+
+ if uni8_ok == 0 {
+ // It wasn't 8 hexadecimal digits. No-op (and
+ // fall through to "#bad backslash-escape").
+
+ } else if (uni8_value < 0xD800) or (
+ (0xDFFF < uni8_value) and (uni8_value <= 0x10_FFFF)) {
+ // Not a Unicode surrogate. We're good.
+ args.src.skip32_fast!(actual: 10, worst_case: 10)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_0000 | (uni8_value & 0x1F_FFFF),
+ link: 0x3,
+ length: 10)
+ continue.string_loop_outer
+ } else if this.quirk_enabled_replace_invalid_unicode {
+ args.src.skip32_fast!(actual: 10, worst_case: 10)
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_FFFD,
+ link: 0x3,
+ length: 10)
+ continue.string_loop_outer
+ }
+ // -------- END backslash-capital-u.
+
+ } else if (c == 'x') and
+ this.quirk_enabled_allow_backslash_x {
+ // -------- BEGIN backslash-x
+ if args.src.available() < 4 {
+ if args.src.is_closed() {
+ return "#bad backslash-escape"
+ }
+ yield? base."$short read"
+ string_length = 0
+ char = 0
+ continue.string_loop_outer
+ }
+
+ backslash_x_length = 0
+ while (backslash_x_length <= 0xFFFB) and (args.src.available() >= 4),
+ inv args.dst.available() > 0,
+ {
+ backslash_x_string = args.src.peek_u32le()
+ backslash_x_ok = 0x80
+
+ c = lut_hexadecimal_digits[0xFF & (backslash_x_string >> 16)]
+ backslash_x_ok &= c
+ c = lut_hexadecimal_digits[0xFF & (backslash_x_string >> 24)]
+ backslash_x_ok &= c
+
+ if (backslash_x_ok == 0) or
+ ((backslash_x_string & 0xFFFF) <> 0x785C) {
+ // It wasn't "\\x34", for some hexadecimal
+ // digits "34".
+ break
+ }
+ args.src.skip32_fast!(actual: 4, worst_case: 4)
+ backslash_x_length += 4
+ } endwhile
+
+ if backslash_x_length == 0 {
+ return "#bad backslash-escape"
+ }
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0080,
+ link: 0x3,
+ length: backslash_x_length)
+ continue.string_loop_outer
+ // -------- END backslash-x
+ }
+
+ return "#bad backslash-escape"
+
+ } else if char == 0x03 { // 2-byte UTF-8.
+ if args.src.available() < 2 {
+ if string_length > 0 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length)
+ string_length = 0
+ if args.dst.available() <= 0 {
+ continue.string_loop_outer
+ }
+ }
+ if args.src.is_closed() {
+ if this.quirk_enabled_replace_invalid_unicode {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_FFFD,
+ link: 0x3,
+ length: 1)
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ continue.string_loop_outer
+ }
+ return "#bad UTF-8"
+ }
+ yield? base."$short read"
+ string_length = 0
+ char = 0
+ continue.string_loop_outer
+ }
+ multi_byte_utf8 = args.src.peek_u16le_as_u32()
+ if (multi_byte_utf8 & 0xC000) == 0x8000 {
+ multi_byte_utf8 = (0x00_07C0 & (multi_byte_utf8 ~mod<< 6)) |
+ (0x00_003F & (multi_byte_utf8 >> 8))
+ args.src.skip32_fast!(actual: 2, worst_case: 2)
+ if string_length >= 0xFFF8 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length + 2)
+ string_length = 0
+ continue.string_loop_outer
+ }
+ string_length += 2
+ continue.string_loop_inner
+ }
+
+ } else if char == 0x04 { // 3-byte UTF-8.
+ if args.src.available() < 3 {
+ if string_length > 0 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length)
+ string_length = 0
+ if args.dst.available() <= 0 {
+ continue.string_loop_outer
+ }
+ }
+ if args.src.is_closed() {
+ if this.quirk_enabled_replace_invalid_unicode {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_FFFD,
+ link: 0x3,
+ length: 1)
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ continue.string_loop_outer
+ }
+ return "#bad UTF-8"
+ }
+ yield? base."$short read"
+ string_length = 0
+ char = 0
+ continue.string_loop_outer
+ }
+ multi_byte_utf8 = args.src.peek_u24le_as_u32()
+ if (multi_byte_utf8 & 0xC0_C000) == 0x80_8000 {
+ multi_byte_utf8 = (0x00_F000 & (multi_byte_utf8 ~mod<< 12)) |
+ (0x00_0FC0 & (multi_byte_utf8 >> 2)) |
+ (0x00_003F & (multi_byte_utf8 >> 16))
+ if (0x07FF < multi_byte_utf8) and
+ ((multi_byte_utf8 < 0xD800) or (0xDFFF < multi_byte_utf8)) {
+
+ args.src.skip32_fast!(actual: 3, worst_case: 3)
+ if string_length >= 0xFFF8 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length + 3)
+ string_length = 0
+ continue.string_loop_outer
+ }
+ string_length += 3
+ continue.string_loop_inner
+ }
+ }
+
+ } else if char == 0x05 { // 4-byte UTF-8.
+ if args.src.available() < 4 {
+ if string_length > 0 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length)
+ string_length = 0
+ if args.dst.available() <= 0 {
+ continue.string_loop_outer
+ }
+ }
+ if args.src.is_closed() {
+ if this.quirk_enabled_replace_invalid_unicode {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_FFFD,
+ link: 0x3,
+ length: 1)
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ continue.string_loop_outer
+ }
+ return "#bad UTF-8"
+ }
+ yield? base."$short read"
+ string_length = 0
+ char = 0
+ continue.string_loop_outer
+ }
+ multi_byte_utf8 = args.src.peek_u32le()
+ if (multi_byte_utf8 & 0xC0C0_C000) == 0x8080_8000 {
+ multi_byte_utf8 = (0x1C_0000 & (multi_byte_utf8 ~mod<< 18)) |
+ (0x03_F000 & (multi_byte_utf8 ~mod<< 4)) |
+ (0x00_0FC0 & (multi_byte_utf8 >> 10)) |
+ (0x00_003F & (multi_byte_utf8 >> 24))
+ if (0xFFFF < multi_byte_utf8) and (multi_byte_utf8 <= 0x10_FFFF) {
+ args.src.skip32_fast!(actual: 4, worst_case: 4)
+ if string_length >= 0xFFF8 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length + 4)
+ string_length = 0
+ continue.string_loop_outer
+ }
+ string_length += 4
+ continue.string_loop_inner
+ }
+ }
+ }
+
+ if string_length > 0 {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x40_0021,
+ link: 0x3,
+ length: string_length)
+ string_length = 0
+ if args.dst.available() <= 0 {
+ continue.string_loop_outer
+ }
+ }
+ if char == 0x80 {
+ return "#bad C0 control code"
+ }
+ if this.quirk_enabled_replace_invalid_unicode {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: 0x60_FFFD,
+ link: 0x3,
+ length: 1)
+ args.src.skip32_fast!(actual: 1, worst_case: 1)
+ continue.string_loop_outer
+ }
+ return "#bad UTF-8"
+ } endwhile.string_loop_inner
+ } endwhile.string_loop_outer
// Emit the trailing '"'.
while true {
@@ -550,581 +1119,6 @@
this.end_of_data = true
}
-pri func decoder.decode_string?(dst: base.token_writer, src: base.io_reader) {
- var string_length : base.u32[..= 0xFFFB]
- var c4 : base.u32
- var c : base.u8
- var backslash : base.u8
- var char : base.u8
- var multi_byte_utf8 : base.u32
-
- var backslash_x_length : base.u32[..= 0xFFFF]
- var backslash_x_ok : base.u8
- var backslash_x_string : base.u32
-
- var uni4_ok : base.u8
- var uni4_string : base.u64
- var uni4_value : base.u32[..= 0xFFFF]
- var uni4_high_surrogate : base.u32[..= 0x10_FC00]
-
- var uni8_ok : base.u8
- var uni8_string : base.u64
- var uni8_value : base.u32[..= 0xFFFF_FFFF]
-
- while.string_loop_outer true {
- if args.dst.available() <= 0 {
- yield? base."$short write"
- continue.string_loop_outer
- }
-
- string_length = 0
- while.string_loop_inner true,
- pre args.dst.available() > 0,
- {
- if args.src.available() <= 0 {
- if string_length > 0 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length)
- string_length = 0
- }
- if args.src.is_closed() {
- return "#bad input"
- }
- yield? base."$short read"
- string_length = 0
- continue.string_loop_outer
- }
-
- // As an optimization, consume non-special ASCII 4 bytes at
- // a time.
- while args.src.available() > 4,
- inv args.dst.available() > 0,
- inv args.src.available() > 0,
- {
- c4 = args.src.peek_u32le()
- if 0x00 <> (lut_chars[0xFF & (c4 >> 0)] |
- lut_chars[0xFF & (c4 >> 8)] |
- lut_chars[0xFF & (c4 >> 16)] |
- lut_chars[0xFF & (c4 >> 24)]) {
- break
- }
- args.src.skip32_fast!(actual: 4, worst_case: 4)
- if string_length > (0xFFFB - 4) {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length + 4)
- string_length = 0
- continue.string_loop_outer
- }
- string_length += 4
- } endwhile
-
- c = args.src.peek_u8()
- char = lut_chars[c]
-
- if char == 0x00 { // Non-special ASCII.
- args.src.skip32_fast!(actual: 1, worst_case: 1)
- if string_length >= 0xFFFB {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: 0xFFFC)
- string_length = 0
- continue.string_loop_outer
- }
- string_length += 1
- continue.string_loop_inner
-
- } else if char == 0x01 { // '"'
- if string_length <> 0 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length)
- string_length = 0
- }
- break.string_loop_outer
-
- } else if char == 0x02 { // '\\'.
- if string_length > 0 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length)
- string_length = 0
- if args.dst.available() <= 0 {
- continue.string_loop_outer
- }
- }
- assert args.dst.available() > 0
-
- if args.src.available() < 2 {
- if args.src.is_closed() {
- return "#bad backslash-escape"
- }
- yield? base."$short read"
- string_length = 0
- char = 0
- continue.string_loop_outer
- }
- c = (args.src.peek_u16le() >> 8) as base.u8
- backslash = lut_backslashes[c]
- if (backslash & 0x80) <> 0 {
- args.src.skip32_fast!(actual: 2, worst_case: 2)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_0000 | ((backslash & 0x7F) as base.u32),
- link: 0x3,
- length: 2)
- continue.string_loop_outer
-
- } else if backslash <> 0 {
- if this.quirk_enabled_allow_backslash_etc[backslash & 7] {
- args.src.skip32_fast!(actual: 2, worst_case: 2)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_0000 | (lut_quirky_backslashes[backslash & 7] as base.u32),
- link: 0x3,
- length: 2)
- continue.string_loop_outer
- }
-
- } else if c == 'u' {
- // -------- BEGIN backslash-u.
- if args.src.available() < 6 {
- if args.src.is_closed() {
- return "#bad backslash-escape"
- }
- yield? base."$short read"
- string_length = 0
- char = 0
- continue.string_loop_outer
- }
-
- uni4_string = args.src.peek_u48le_as_u64() >> 16
- uni4_value = 0
- uni4_ok = 0x80
-
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 12
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 8
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 4
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 0
-
- if uni4_ok == 0 {
- // It wasn't 4 hexadecimal digits. No-op (and
- // fall through to "#bad backslash-escape").
-
- } else if (uni4_value < 0xD800) or (0xDFFF < uni4_value) {
- // Not a Unicode surrogate. We're good.
- args.src.skip32_fast!(actual: 6, worst_case: 6)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_0000 | uni4_value,
- link: 0x3,
- length: 6)
- continue.string_loop_outer
-
- } else if uni4_value >= 0xDC00 {
- // Low surrogate. No-op (and fall through to
- // "#bad backslash-escape").
-
- } else {
- // High surrogate, which needs to be followed
- // by a "\\u1234" low surrogate. We've already
- // peeked 6 bytes for the high surrogate. We
- // need 12 in total: another 8 bytes at an
- // offset of 4.
- if args.src.available() < 12 {
- if args.src.is_closed() {
- if this.quirk_enabled_replace_invalid_unicode {
- args.src.skip32_fast!(actual: 6, worst_case: 6)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_FFFD,
- link: 0x3,
- length: 6)
- continue.string_loop_outer
- }
- return "#bad backslash-escape"
- }
- yield? base."$short read"
- string_length = 0
- uni4_value = 0
- char = 0
- continue.string_loop_outer
- }
- uni4_string = args.src.peek_u64le_at(offset: 4) >> 16
-
- // Look for the low surrogate's "\\u".
- if ((0xFF & (uni4_string >> 0)) <> '\\') or
- ((0xFF & (uni4_string >> 8)) <> 'u') {
- uni4_high_surrogate = 0
- uni4_value = 0
- uni4_ok = 0
- } else {
- uni4_high_surrogate =
- 0x1_0000 + ((uni4_value - 0xD800) << 10)
- uni4_value = 0
- uni4_ok = 0x80
- uni4_string >>= 16
-
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 12
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 8
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 4
- c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
- uni4_ok &= c
- uni4_value |= ((c & 0x0F) as base.u32) << 0
- }
-
- if (uni4_ok <> 0) and
- (0xDC00 <= uni4_value) and (uni4_value <= 0xDFFF) {
-
- // Emit a single token for the surrogate
- // pair.
- uni4_value -= 0xDC00
- args.src.skip32_fast!(actual: 12, worst_case: 12)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_0000 | uni4_high_surrogate | uni4_value,
- link: 0x3,
- length: 12)
- continue.string_loop_outer
- }
- }
-
- if this.quirk_enabled_replace_invalid_unicode {
- if args.src.available() < 6 {
- return "#internal error: inconsistent I/O"
- }
- args.src.skip32_fast!(actual: 6, worst_case: 6)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_FFFD,
- link: 0x3,
- length: 6)
- continue.string_loop_outer
- }
- // -------- END backslash-u.
-
- } else if (c == 'U') and
- this.quirk_enabled_allow_backslash_capital_u {
- // -------- BEGIN backslash-capital-u.
- if args.src.available() < 10 {
- if args.src.is_closed() {
- return "#bad backslash-escape"
- }
- yield? base."$short read"
- string_length = 0
- char = 0
- continue.string_loop_outer
- }
- uni8_string = args.src.peek_u64le_at(offset: 2)
- uni8_value = 0
- uni8_ok = 0x80
-
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 0)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 28
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 8)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 24
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 16)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 20
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 24)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 16
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 32)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 12
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 40)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 8
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 48)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 4
- c = lut_hexadecimal_digits[0xFF & (uni8_string >> 56)]
- uni8_ok &= c
- uni8_value |= ((c & 0x0F) as base.u32) << 0
-
- if uni8_ok == 0 {
- // It wasn't 8 hexadecimal digits. No-op (and
- // fall through to "#bad backslash-escape").
-
- } else if (uni8_value < 0xD800) or (
- (0xDFFF < uni8_value) and (uni8_value <= 0x10_FFFF)) {
- // Not a Unicode surrogate. We're good.
- args.src.skip32_fast!(actual: 10, worst_case: 10)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_0000 | (uni8_value & 0x1F_FFFF),
- link: 0x3,
- length: 10)
- continue.string_loop_outer
- } else if this.quirk_enabled_replace_invalid_unicode {
- args.src.skip32_fast!(actual: 10, worst_case: 10)
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_FFFD,
- link: 0x3,
- length: 10)
- continue.string_loop_outer
- }
- // -------- END backslash-capital-u.
-
- } else if (c == 'x') and
- this.quirk_enabled_allow_backslash_x {
- // -------- BEGIN backslash-x
- if args.src.available() < 4 {
- if args.src.is_closed() {
- return "#bad backslash-escape"
- }
- yield? base."$short read"
- string_length = 0
- char = 0
- continue.string_loop_outer
- }
-
- backslash_x_length = 0
- while (backslash_x_length <= 0xFFFB) and (args.src.available() >= 4),
- inv args.dst.available() > 0,
- {
- backslash_x_string = args.src.peek_u32le()
- backslash_x_ok = 0x80
-
- c = lut_hexadecimal_digits[0xFF & (backslash_x_string >> 16)]
- backslash_x_ok &= c
- c = lut_hexadecimal_digits[0xFF & (backslash_x_string >> 24)]
- backslash_x_ok &= c
-
- if (backslash_x_ok == 0) or
- ((backslash_x_string & 0xFFFF) <> 0x785C) {
- // It wasn't "\\x34", for some hexadecimal
- // digits "34".
- break
- }
- args.src.skip32_fast!(actual: 4, worst_case: 4)
- backslash_x_length += 4
- } endwhile
-
- if backslash_x_length == 0 {
- return "#bad backslash-escape"
- }
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0080,
- link: 0x3,
- length: backslash_x_length)
- continue.string_loop_outer
- // -------- END backslash-x
- }
-
- return "#bad backslash-escape"
-
- } else if char == 0x03 { // 2-byte UTF-8.
- if args.src.available() < 2 {
- if string_length > 0 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length)
- string_length = 0
- if args.dst.available() <= 0 {
- continue.string_loop_outer
- }
- }
- if args.src.is_closed() {
- if this.quirk_enabled_replace_invalid_unicode {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_FFFD,
- link: 0x3,
- length: 1)
- args.src.skip32_fast!(actual: 1, worst_case: 1)
- continue.string_loop_outer
- }
- return "#bad UTF-8"
- }
- yield? base."$short read"
- string_length = 0
- char = 0
- continue.string_loop_outer
- }
- multi_byte_utf8 = args.src.peek_u16le_as_u32()
- if (multi_byte_utf8 & 0xC000) == 0x8000 {
- multi_byte_utf8 = (0x00_07C0 & (multi_byte_utf8 ~mod<< 6)) |
- (0x00_003F & (multi_byte_utf8 >> 8))
- args.src.skip32_fast!(actual: 2, worst_case: 2)
- if string_length >= 0xFFF8 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length + 2)
- string_length = 0
- continue.string_loop_outer
- }
- string_length += 2
- continue.string_loop_inner
- }
-
- } else if char == 0x04 { // 3-byte UTF-8.
- if args.src.available() < 3 {
- if string_length > 0 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length)
- string_length = 0
- if args.dst.available() <= 0 {
- continue.string_loop_outer
- }
- }
- if args.src.is_closed() {
- if this.quirk_enabled_replace_invalid_unicode {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_FFFD,
- link: 0x3,
- length: 1)
- args.src.skip32_fast!(actual: 1, worst_case: 1)
- continue.string_loop_outer
- }
- return "#bad UTF-8"
- }
- yield? base."$short read"
- string_length = 0
- char = 0
- continue.string_loop_outer
- }
- multi_byte_utf8 = args.src.peek_u24le_as_u32()
- if (multi_byte_utf8 & 0xC0_C000) == 0x80_8000 {
- multi_byte_utf8 = (0x00_F000 & (multi_byte_utf8 ~mod<< 12)) |
- (0x00_0FC0 & (multi_byte_utf8 >> 2)) |
- (0x00_003F & (multi_byte_utf8 >> 16))
- if (0x07FF < multi_byte_utf8) and
- ((multi_byte_utf8 < 0xD800) or (0xDFFF < multi_byte_utf8)) {
-
- args.src.skip32_fast!(actual: 3, worst_case: 3)
- if string_length >= 0xFFF8 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length + 3)
- string_length = 0
- continue.string_loop_outer
- }
- string_length += 3
- continue.string_loop_inner
- }
- }
-
- } else if char == 0x05 { // 4-byte UTF-8.
- if args.src.available() < 4 {
- if string_length > 0 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length)
- string_length = 0
- if args.dst.available() <= 0 {
- continue.string_loop_outer
- }
- }
- if args.src.is_closed() {
- if this.quirk_enabled_replace_invalid_unicode {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_FFFD,
- link: 0x3,
- length: 1)
- args.src.skip32_fast!(actual: 1, worst_case: 1)
- continue.string_loop_outer
- }
- return "#bad UTF-8"
- }
- yield? base."$short read"
- string_length = 0
- char = 0
- continue.string_loop_outer
- }
- multi_byte_utf8 = args.src.peek_u32le()
- if (multi_byte_utf8 & 0xC0C0_C000) == 0x8080_8000 {
- multi_byte_utf8 = (0x1C_0000 & (multi_byte_utf8 ~mod<< 18)) |
- (0x03_F000 & (multi_byte_utf8 ~mod<< 4)) |
- (0x00_0FC0 & (multi_byte_utf8 >> 10)) |
- (0x00_003F & (multi_byte_utf8 >> 24))
- if (0xFFFF < multi_byte_utf8) and (multi_byte_utf8 <= 0x10_FFFF) {
- args.src.skip32_fast!(actual: 4, worst_case: 4)
- if string_length >= 0xFFF8 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length + 4)
- string_length = 0
- continue.string_loop_outer
- }
- string_length += 4
- continue.string_loop_inner
- }
- }
- }
-
- if string_length > 0 {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x40_0021,
- link: 0x3,
- length: string_length)
- string_length = 0
- if args.dst.available() <= 0 {
- continue.string_loop_outer
- }
- }
- if char == 0x80 {
- return "#bad C0 control code"
- }
- if this.quirk_enabled_replace_invalid_unicode {
- args.dst.write_simple_token_fast!(
- value_major: 0,
- value_minor: 0x60_FFFD,
- link: 0x3,
- length: 1)
- args.src.skip32_fast!(actual: 1, worst_case: 1)
- continue.string_loop_outer
- }
- return "#bad UTF-8"
- } endwhile.string_loop_inner
- } endwhile.string_loop_outer
-}
-
pri func decoder.decode_number!(src: base.io_reader) base.u32[..= 0x3FF] {
var c : base.u8
var n : base.u32[..= 0x3FF]