blob: 1bd524a44be6864604099e86f2aced5966db58e5 [file] [log] [blame]
// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub struct decoder? implements base.token_decoder(
end_of_data : base.bool,
)(
// stack is conceptually an array of bits, implemented as an array of u32.
// The N'th bit being 0 or 1 means that we're in an array or object, where
// N is the recursion depth.
//
// Parsing JSON involves recursion: containers (arrays and objects) can
// hold other containers. As child elements are completed, the parser needs
// to remember 1 bit of state per recursion depth: whether the parent
// container was an array or an object. When continuing to parse the
// parent's elements, `, "key": value` is only valid for objects.
//
// Note that we explicitly track our own stack and depth. We do not use the
// call stack to hold this state and the decoder.decode_tokens function is
// not recursive per se.
//
// Wuffs code does not have the capability to dynamically allocate memory,
// so the maximum depth is hard-coded at compile time. In this case, the
// maximum is 1024 (stack is 1024 bits or 128 bytes), also known as
// decoder_depth_max_incl.
//
// The [JSON spec](https://www.ietf.org/rfc/rfc8259.txt) clearly states,
// "an implementation may set limits on the maximum depth of nesting".
//
// In comparison, as of February 2020, the Chromium web browser's JSON
// parser's maximum recursion depth is 200:
// https://source.chromium.org/chromium/chromium/src/+/3dece34cde622faa0daac07156c25d92c9897d1e:base/json/json_common.h;l=18
//
// Other languages and libraries' maximum depths (determined empirically)
// are listed at https://github.com/lovasoa/bad_json_parsers#results
stack : array[1024 / 32] base.u32,
)
pub func decoder.decode_tokens?(dst: base.token_writer, src: base.io_reader) {
var vminor : base.u32[..= 0xFF_FFFF]
var number_length : base.u32[..= 0x3FF]
var number_status : base.u32[..= 0x3]
var string_length : base.u32[..= 0xFFFB]
var whitespace_length : base.u32[..= 0xFFFE]
var depth : base.u32[..= 1024]
var stack_byte : base.u32[..= (1024 / 32) - 1]
var stack_bit : base.u32[..= 31]
var match : base.u32[..= 2]
var c_by_4 : base.u32
var c : base.u8
var backslash : base.u8
var char : base.u8
var class : base.u8[..= 0x0F]
var multi_byte_utf8 : base.u32
var uni4_ok : base.u8
var uni4_string : base.u64
var uni4_value : base.u32[..= 0xFFFF]
var uni4_high_surrogate : base.u32[..= 0x10_FC00]
var uni4_rollback : base.u32
// expect is a bitmask of what the next character class can be.
//
// expect_after_value is what to expect after seeing a value (a literal,
// number, string, array or object). For depth 0, this is ignored.
// Otherwise, it should be (EXPECT_CLOSE_FOO | EXPECT_COMMA), for some
// value of FOO.
var expect : base.u32
var expect_after_value : base.u32
while this.end_of_data {
return base."@end of data"
}
expect = 0x0EB2 // EXPECT_VALUE
while.outer true {
while.goto_parsed_a_leaf_value true {
if args.dst.available() <= 0 {
yield? base."$short write"
continue.outer
}
// Consume whitespace.
whitespace_length = 0
c = 0
class = 0
while.ws true,
inv args.dst.available() > 0,
post args.src.available() > 0,
{
if args.src.available() <= 0 {
if whitespace_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0,
link: 0x0,
length: whitespace_length)
whitespace_length = 0
}
if args.src.is_closed() {
return "#bad input"
}
yield? base."$short read"
whitespace_length = 0
continue.outer
}
c = args.src.peek_u8()
class = lut_classes[c]
if class <> 0x00 { // 0x00 is CLASS_WHITESPACE.
break.ws
}
args.src.skip32_fast!(actual: 1, worst_case: 1)
if whitespace_length >= 0xFFFE {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0,
link: 0x0,
length: 0xFFFF)
whitespace_length = 0
continue.outer
}
whitespace_length += 1
} endwhile.ws
// Emit whitespace.
if whitespace_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0,
link: 0x0,
length: whitespace_length)
whitespace_length = 0
if args.dst.available() <= 0 {
continue.outer
}
}
// Check expected character classes.
if 0 == (expect & ((1 as base.u32) << class)) {
return "#bad input"
}
// These assertions are redundant (the Wuffs compiler should
// already know these facts; deleting these assertions should still
// compile) but are listed explicitly to guard against future edits
// to the code above inadvertently invalidating these assertions.
assert args.dst.available() > 0
assert args.src.available() > 0
if class == 0x01 { // 0x01 is CLASS_STRING.
// -------- BEGIN parse strings.
// Emit the leading '"'.
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0013,
link: 0x1,
length: 1)
args.src.skip32_fast!(actual: 1, worst_case: 1)
while.string_loop_outer true {
if args.dst.available() <= 0 {
yield? base."$short write"
continue.string_loop_outer
}
string_length = 0
while.string_loop_inner true,
pre args.dst.available() > 0,
{
if args.src.available() <= 0 {
if string_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length)
string_length = 0
}
if args.src.is_closed() {
return "#bad input"
}
yield? base."$short read"
string_length = 0
continue.string_loop_outer
}
// As an optimization, consume non-special ASCII 4 bytes at a time.
while args.src.available() > 4,
inv args.dst.available() > 0,
inv args.src.available() > 0,
{
c_by_4 = args.src.peek_u32le()
if 0x00 <> (lut_chars[0xFF & (c_by_4 >> 0)] |
lut_chars[0xFF & (c_by_4 >> 8)] |
lut_chars[0xFF & (c_by_4 >> 16)] |
lut_chars[0xFF & (c_by_4 >> 24)]) {
break
}
args.src.skip32_fast!(actual: 4, worst_case: 4)
if string_length > (0xFFFB - 4) {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length + 4)
string_length = 0
continue.string_loop_outer
}
string_length += 4
}
c = args.src.peek_u8()
char = lut_chars[c]
if char == 0x00 { // Non-special ASCII.
args.src.skip32_fast!(actual: 1, worst_case: 1)
if string_length >= 0xFFFB {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: 0xFFFC)
string_length = 0
continue.string_loop_outer
}
string_length += 1
continue.string_loop_inner
} else if char == 0x01 { // '"'
if string_length <> 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length)
string_length = 0
}
break.string_loop_outer
} else if char == 0x02 { // '\\'.
if string_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length)
string_length = 0
if args.dst.available() <= 0 {
continue.string_loop_outer
}
}
assert args.dst.available() > 0
if args.src.available() < 2 {
if args.src.is_closed() {
return "#bad backslash-escape"
}
yield? base."$short read"
string_length = 0
char = 0
continue.string_loop_outer
}
c = (args.src.peek_u16le() >> 8) as base.u8
backslash = lut_backslashes[c]
if backslash > 0 {
args.src.skip32_fast!(actual: 2, worst_case: 2)
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x60_0000 | ((backslash & 0x7F) as base.u32),
link: 0x3,
length: 2)
continue.string_loop_outer
} else if c == 0x75 { // 0x75 is 'u'.
// -------- BEGIN backslash-u.
if args.src.available() < 6 {
if args.src.is_closed() {
return "#bad backslash-escape"
}
yield? base."$short read"
string_length = 0
char = 0
continue.string_loop_outer
}
uni4_string = args.src.peek_u48le_as_u64() >> 16
uni4_value = 0
uni4_ok = 0x80
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 12
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 8
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 4
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 0
if uni4_ok == 0 {
// It wasn't 4 hexadecimal digits. No-op
// (and fall through to "#bad
// backslash-escape").
} else if (uni4_value < 0xD800) or (0xDFFF < uni4_value) {
// Not a Unicode surrogate. We're good.
args.src.skip32_fast!(actual: 6, worst_case: 6)
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x60_0000 | uni4_value,
link: 0x3,
length: 6)
continue.string_loop_outer
} else if uni4_value >= 0xDC00 {
// Low surrogate. No-op (and fall through
// to "#bad backslash-escape").
} else {
// High surrogate, which needs to be
// followed by a "\\u1234" low surrogate.
// We've already peeked 6 bytes for the
// high surrogate. We need another 6.
if args.src.available() < 12 {
if args.src.is_closed() {
return "#bad backslash-escape"
}
yield? base."$short read"
string_length = 0
uni4_value = 0
char = 0
continue.string_loop_outer
}
// Roll forward 4 bytes, so that calling
// peek_u64le will pick up the last 8.
args.src.skip32_fast!(actual: 4, worst_case: 4)
uni4_string = args.src.peek_u64le() >> 16
// Look for the low surrogate's "\\u".
if ((0xFF & (uni4_string >> 0)) <> 0x5C) or
((0xFF & (uni4_string >> 8)) <> 0x75) {
uni4_high_surrogate = 0
uni4_value = 0
uni4_ok = 0
} else {
uni4_high_surrogate =
0x1_0000 + ((uni4_value - 0xD800) << 10)
uni4_value = 0
uni4_ok = 0x80
uni4_string >>= 16
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 12
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 8
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 4
c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
uni4_ok &= c
uni4_value |= ((c & 0x0F) as base.u32) << 0
}
if (uni4_ok <> 0) and
(0xDC00 <= uni4_value) and (uni4_value <= 0xDFFF) {
// Emit a single token for the surrogate pair.
uni4_value -= 0xDC00
args.src.skip32_fast!(actual: 8, worst_case: 8)
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x60_0000 | uni4_high_surrogate | uni4_value,
link: 0x3,
length: 12)
continue.string_loop_outer
} else {
// Roll back the 4 bytes, then fall
// through to "#invalid
// backslash-escape".
uni4_rollback = 4
while uni4_rollback > 0 {
uni4_rollback -= 1
if args.src.can_undo_byte() {
args.src.undo_byte!()
} else {
return "#internal error: inconsistent I/O"
}
}
}
}
// -------- END backslash-u.
}
return "#bad backslash-escape"
} else if char == 0x03 { // 2-byte UTF-8.
if args.src.available() < 2 {
if string_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length)
string_length = 0
}
if args.src.is_closed() {
return "#bad UTF-8"
}
yield? base."$short read"
string_length = 0
char = 0
continue.string_loop_outer
}
multi_byte_utf8 = args.src.peek_u16le_as_u32()
if (multi_byte_utf8 & 0xC000) == 0x8000 {
multi_byte_utf8 = (0x00_07C0 & (multi_byte_utf8 ~mod<< 6)) |
(0x00_003F & (multi_byte_utf8 >> 8))
args.src.skip32_fast!(actual: 2, worst_case: 2)
if string_length >= 0xFFF8 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length + 2)
string_length = 0
continue.string_loop_outer
}
string_length += 2
continue.string_loop_inner
}
} else if char == 0x04 { // 3-byte UTF-8.
if args.src.available() < 3 {
if string_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length)
string_length = 0
}
if args.src.is_closed() {
return "#bad UTF-8"
}
yield? base."$short read"
string_length = 0
char = 0
continue.string_loop_outer
}
multi_byte_utf8 = args.src.peek_u24le_as_u32()
if (multi_byte_utf8 & 0xC0_C000) == 0x80_8000 {
multi_byte_utf8 = (0x00_F000 & (multi_byte_utf8 ~mod<< 12)) |
(0x00_0FC0 & (multi_byte_utf8 >> 2)) |
(0x00_003F & (multi_byte_utf8 >> 16))
if (0x07FF < multi_byte_utf8) and
((multi_byte_utf8 < 0xD800) or (0xDFFF < multi_byte_utf8)) {
args.src.skip32_fast!(actual: 3, worst_case: 3)
if string_length >= 0xFFF8 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length + 3)
string_length = 0
continue.string_loop_outer
}
string_length += 3
continue.string_loop_inner
}
}
} else if char == 0x05 { // 4-byte UTF-8.
if args.src.available() < 4 {
if string_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length)
string_length = 0
}
if args.src.is_closed() {
return "#bad UTF-8"
}
yield? base."$short read"
string_length = 0
char = 0
continue.string_loop_outer
}
multi_byte_utf8 = args.src.peek_u32le()
if (multi_byte_utf8 & 0xC0C0_C000) == 0x8080_8000 {
multi_byte_utf8 = (0x1C_0000 & (multi_byte_utf8 ~mod<< 18)) |
(0x03_F000 & (multi_byte_utf8 ~mod<< 4)) |
(0x00_0FC0 & (multi_byte_utf8 >> 10)) |
(0x00_003F & (multi_byte_utf8 >> 24))
if (0xFFFF < multi_byte_utf8) and (multi_byte_utf8 <= 0x10_FFFF) {
args.src.skip32_fast!(actual: 4, worst_case: 4)
if string_length >= 0xFFF8 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length + 4)
string_length = 0
continue.string_loop_outer
}
string_length += 4
continue.string_loop_inner
}
}
}
if string_length > 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0021,
link: 0x3,
length: string_length)
string_length = 0
}
if char == 0x80 {
return "#bad C0 control code"
}
return "#bad UTF-8"
} endwhile.string_loop_inner
} endwhile.string_loop_outer
// Emit the trailing '"'.
while true {
if args.src.available() <= 0 {
if args.src.is_closed() {
return "#bad input"
}
yield? base."$short read"
continue
}
if args.dst.available() <= 0 {
yield? base."$short write"
continue
}
args.src.skip32_fast!(actual: 1, worst_case: 1)
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x40_0013,
link: 0x2,
length: 1)
break
}
// As above, expect must have contained EXPECT_STRING. If it
// didn't also contain EXPECT_NUMBER then we were parsing an
// object key and the next token should be ':'.
if 0 == (expect & 0x0010) { // 0x0010 is EXPECT_NUMBER.
expect = 0x0008 // 0x0008 is EXPECT_COLON.
continue.outer
}
break.goto_parsed_a_leaf_value
// -------- END parse strings.
} else if class == 0x02 { // 0x02 is CLASS_COMMA.
args.src.skip32_fast!(actual: 1, worst_case: 1)
// The ',' is filler.
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0,
link: 0x0,
length: 1)
// What's valid after a comma depends on whether or not we're
// in an array or an object.
if 0 == (expect & 0x0100) { // 0x0100 is EXPECT_CLOSE_SQUARE_BRACKET
expect = 0x0002 // 0x0002 is EXPECT_STRING.
} else {
expect = 0x0EB2 // 0x0EB2 is EXPECT_VALUE.
}
continue.outer
} else if class == 0x03 { // 0x03 is CLASS_COLON.
args.src.skip32_fast!(actual: 1, worst_case: 1)
// The ':' is filler.
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0,
link: 0x0,
length: 1)
expect = 0x0EB2 // 0x0EB2 is EXPECT_VALUE.
continue.outer
} else if class == 0x04 { // 0x04 is CLASS_NUMBER.
// -------- BEGIN parse numbers.
while true,
pre args.dst.available() > 0,
{
number_length = this.decode_number!(src: args.src)
number_status = number_length >> 8
vminor = 0xA0_0403
if (number_length & 0x80) <> 0 {
vminor = 0xA0_0401
}
number_length = number_length & 0x7F
if number_status == 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: vminor,
link: 0x0,
length: number_length)
break
}
while number_length > 0 {
number_length -= 1
if args.src.can_undo_byte() {
args.src.undo_byte!()
} else {
return "#internal error: inconsistent I/O"
}
}
if number_status == 1 {
return "#bad input"
} else if number_status == 2 {
return "#unsupported number length"
} else {
yield? base."$short read"
while args.dst.available() <= 0,
post args.dst.available() > 0,
{
yield? base."$short write"
}
}
}
break.goto_parsed_a_leaf_value
// -------- END parse numbers.
} else if class == 0x05 { // 0x05 is CLASS_OPEN_CURLY_BRACE.
vminor = 0x20_4011
if depth == 0 {
// No-op.
} else if 0 <> (expect_after_value & 0x0040) { // 0x0040 is EXPECT_CLOSE_CURLY_BRACE.
vminor = 0x20_4041
} else {
vminor = 0x20_4021
}
if depth >= 1024 {
return "#unsupported recursion depth"
}
stack_byte = depth / 32
stack_bit = depth & 31
this.stack[stack_byte] |= (1 as base.u32) << stack_bit
depth += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
args.dst.write_fast_token!(
value_major: 0,
value_minor: vminor,
link: 0x0,
length: 1)
expect = 0x0042 // 0x0042 is (EXPECT_CLOSE_CURLY_BRACE | EXPECT_STRING).
expect_after_value = 0x0044 // 0x0044 is (EXPECT_CURLY_CLOSE_BRACE | EXPECT_COMMA).
continue.outer
} else if class == 0x06 { // 0x06 is CLASS_CLOSE_CURLY_BRACE.
args.src.skip32_fast!(actual: 1, worst_case: 1)
if depth <= 1 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x20_1042,
link: 0x0,
length: 1)
break.outer
}
depth -= 1
stack_byte = (depth - 1) / 32
stack_bit = (depth - 1) & 31
if 0 == (this.stack[stack_byte] & ((1 as base.u32) << stack_bit)) {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x20_2042,
link: 0x0,
length: 1)
// 0x0104 is (EXPECT_SQUARE_CLOSE_BRACKET | EXPECT_COMMA).
expect = 0x0104
expect_after_value = 0x0104
} else {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x20_4042,
link: 0x0,
length: 1)
// 0x0044 is (EXPECT_CURLY_CLOSE_BRACE | EXPECT_COMMA).
expect = 0x0044
expect_after_value = 0x0044
}
continue.outer
} else if class == 0x07 { // 0x07 is CLASS_OPEN_SQUARE_BRACKET.
vminor = 0x20_2011
if depth == 0 {
// No-op.
} else if 0 <> (expect_after_value & 0x0040) { // 0x0040 is EXPECT_CLOSE_CURLY_BRACE.
vminor = 0x20_2041
} else {
vminor = 0x20_2021
}
if depth >= 1024 {
return "#unsupported recursion depth"
}
stack_byte = depth / 32
stack_bit = depth & 31
this.stack[stack_byte] &= 0xFFFF_FFFF ^ ((1 as base.u32) << stack_bit)
depth += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
args.dst.write_fast_token!(
value_major: 0,
value_minor: vminor,
link: 0x0,
length: 1)
expect = 0x0FB2 // 0x0FB2 is (EXPECT_CLOSE_SQUARE_BRACKET | EXPECT_VALUE).
expect_after_value = 0x0104 // 0x0104 is (EXPECT_CLOSE_SQUARE_BRACKET | EXPECT_COMMA).
continue.outer
} else if class == 0x08 { // 0x08 is CLASS_CLOSE_SQUARE_BRACKET.
args.src.skip32_fast!(actual: 1, worst_case: 1)
if depth <= 1 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x20_1022,
link: 0x0,
length: 1)
break.outer
}
depth -= 1
stack_byte = (depth - 1) / 32
stack_bit = (depth - 1) & 31
if 0 == (this.stack[stack_byte] & ((1 as base.u32) << stack_bit)) {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x20_2022,
link: 0x0,
length: 1)
// 0x0104 is (EXPECT_CLOSE_SQUARE_BRACKET | EXPECT_COMMA).
expect = 0x0104
expect_after_value = 0x0104
} else {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x20_4022,
link: 0x0,
length: 1)
// 0x0044 is (EXPECT_CLOSE_CURLY_BRACE | EXPECT_COMMA).
expect = 0x0044
expect_after_value = 0x0044
}
continue.outer
} else if class == 0x09 { // 0x09 is CLASS_FALSE.
match = args.src.match7(a: 0x6573_6C61_6605) // 5 bytes "false".
if match == 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x80_0004,
link: 0x0,
length: 5)
if args.src.available() < 5 {
return "#internal error: inconsistent I/O"
}
args.src.skip32_fast!(actual: 5, worst_case: 5)
break.goto_parsed_a_leaf_value
} else if match == 1 {
yield? base."$short read"
continue.outer
}
} else if class == 0x0A { // 0x0A is CLASS_TRUE.
match = args.src.match7(a: 0x65_7572_7404) // 4 bytes "true".
if match == 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x80_0008,
link: 0x0,
length: 4)
if args.src.available() < 4 {
return "#internal error: inconsistent I/O"
}
args.src.skip32_fast!(actual: 4, worst_case: 4)
break.goto_parsed_a_leaf_value
} else if match == 1 {
yield? base."$short read"
continue.outer
}
} else if class == 0x0B { // 0x0B is CLASS_NULL.
match = args.src.match7(a: 0x6C_6C75_6E04) // 4 bytes "null".
if match == 0 {
args.dst.write_fast_token!(
value_major: 0,
value_minor: 0x80_0002,
link: 0x0,
length: 4)
if args.src.available() < 4 {
return "#internal error: inconsistent I/O"
}
args.src.skip32_fast!(actual: 4, worst_case: 4)
break.goto_parsed_a_leaf_value
} else if match == 1 {
yield? base."$short read"
continue.outer
}
}
return "#bad input"
} endwhile.goto_parsed_a_leaf_value
// We've just parsed a leaf (non-container) value: literal (null,
// false, true), number or string.
if depth == 0 {
break.outer
}
expect = expect_after_value
} endwhile.outer
this.end_of_data = true
}
pri func decoder.decode_number!(src: base.io_reader) base.u32[..= 0x3FF] {
var c : base.u8
var n : base.u32[..= 0x3FF]
var floating_point : base.u32[..= 0x80]
while.goto_done true {
n = 0
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
n |= 0x300
}
break.goto_done
}
c = args.src.peek_u8()
// Scan the optional minus sign.
if c <> 0x2D { // 0x2D is '-'.
assert args.src.available() > 0
assert n <= 1
} else {
n += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
n |= 0x300
}
n |= 0x100 // A '-' without digits is invalid.
break.goto_done
}
c = args.src.peek_u8()
assert args.src.available() > 0
assert n <= 1
}
// Scan the opening digits.
if c == 0x30 { // 0x30 is '0'.
n += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
assert n <= 99
} else {
n = this.decode_digits!(src: args.src, n: n)
if n > 99 {
break.goto_done
}
assert n <= 99
}
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
n |= 0x300
}
break.goto_done
}
c = args.src.peek_u8()
// Scan the optional fraction.
if c <> 0x2E { // 0x2E is '.'.
assert args.src.available() > 0
assert n <= 99
} else {
if n >= 99 {
n |= 0x200
break.goto_done
}
n += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
floating_point = 0x80
n = this.decode_digits!(src: args.src, n: n)
if n > 99 {
break.goto_done
}
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
n |= 0x300
}
break.goto_done
}
c = args.src.peek_u8()
assert args.src.available() > 0
assert n <= 99
}
// Scan the optional 'E' or 'e'.
if (c <> 0x45) and (c <> 0x65) { // 0x45 and 0x65 are 'E' and 'e'.
break.goto_done
}
if n >= 99 {
n |= 0x200
break.goto_done
}
n += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
floating_point = 0x80
assert n <= 99
// Peek.
if args.src.available() <= 0 {
if not args.src.is_closed() {
n |= 0x300
}
n |= 0x100 // An 'E' or 'e' without digits is invalid.
break.goto_done
}
c = args.src.peek_u8()
// Scan the optional '+' or '-'.
if (c <> 0x2B) and (c <> 0x2D) { // 0x2B and 0x2D are '+' and '-'.
assert n <= 99
} else {
if n >= 99 {
n |= 0x200
break.goto_done
}
n += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
assert n <= 99
}
// Scan the exponent digits.
n = this.decode_digits!(src: args.src, n: n)
break.goto_done
} endwhile.goto_done
return n | floating_point
}
pri func decoder.decode_digits!(src: base.io_reader, n: base.u32[..= 99]) base.u32[..= 0x3FF] {
var c : base.u8
var n : base.u32[..= 0x3FF]
n = args.n
while true {
if args.src.available() <= 0 {
if not args.src.is_closed() {
n |= 0x300
}
break
}
c = args.src.peek_u8()
if 0x00 == lut_decimal_digits[c] {
break
}
// Cap decoder_number_length_max_incl at an arbitrary value, 99. The
// caller's src.data.len should therefore be at least 100, also known
// as decoder_src_io_buffer_length_min_incl.
//
// An example of a JSON number that is 81 bytes long is:
// https://github.com/nst/JSONTestSuite/blob/master/test_parsing/y_number_double_close_to_zero.json
//
// Note that 99 (in hex, 0x63) is less than 0x80, so we can use 0x80 as
// a flag bit in func decoder.decode_number.
if n >= 99 {
n |= 0x200
break
}
n += 1
args.src.skip32_fast!(actual: 1, worst_case: 1)
}
if n == args.n {
n |= 0x100
}
return n
}