// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub struct decoder? implements base.token_decoder(
	// quirk_enabled_allow_backslash_etc, an 8-element array, is indexed by the
	// same enum as lut_quirky_backslashes.
	quirk_enabled_allow_backslash_etc : array[8] base.bool,

	quirk_enabled_allow_backslash_capital_u             : base.bool,
	quirk_enabled_allow_backslash_x                     : base.bool,
	quirk_enabled_allow_comment_block                   : base.bool,
	quirk_enabled_allow_comment_line                    : base.bool,
	quirk_enabled_allow_final_comma                     : base.bool,
	quirk_enabled_allow_inf_nan_numbers                 : base.bool,
	quirk_enabled_allow_leading_ascii_record_separator  : base.bool,
	quirk_enabled_allow_leading_unicode_byte_order_mark : base.bool,
	quirk_enabled_allow_trailing_new_line               : base.bool,
	quirk_enabled_replace_invalid_utf_8                 : base.bool,

	allow_leading_ars  : base.bool,
	allow_leading_ubom : base.bool,

	end_of_data : base.bool,
)(
	// stack is conceptually an array of bits, implemented as an array of u32.
	// The N'th bit being 0 or 1 means that we're in an array or object, where
	// N is the recursion depth.
	//
	// Parsing JSON involves recursion: containers (arrays and objects) can
	// hold other containers. As child elements are completed, the parser needs
	// to remember 1 bit of state per recursion depth: whether the parent
	// container was an array or an object. When continuing to parse the
	// parent's elements, `, "key": value` is only valid for objects.
	//
	// Note that we explicitly track our own stack and depth. We do not use the
	// call stack to hold this state and the decoder.decode_tokens function is
	// not recursive per se.
	//
	// Wuffs code does not have the capability to dynamically allocate memory,
	// so the maximum depth is hard-coded at compile time. In this case, the
	// maximum is 1024 (stack is 1024 bits or 128 bytes), also known as
	// decoder_depth_max_incl.
	//
	// The [JSON spec](https://www.ietf.org/rfc/rfc8259.txt) clearly states,
	// "an implementation may set limits on the maximum depth of nesting".
	//
	// In comparison, as of February 2020, the Chromium web browser's JSON
	// parser's maximum recursion depth is 200:
	// https://source.chromium.org/chromium/chromium/src/+/3dece34cde622faa0daac07156c25d92c9897d1e:base/json/json_common.h;l=18
	//
	// Other languages and libraries' maximum depths (determined empirically)
	// are listed at https://github.com/lovasoa/bad_json_parsers#results
	stack : array[1024 / 32] base.u32,
)

pub func decoder.set_quirk_enabled!(quirk: base.u32, enabled: base.bool) {
	if args.quirk == quirk_allow_backslash_a {
		this.quirk_enabled_allow_backslash_etc[1] = args.enabled
	} else if args.quirk == quirk_allow_backslash_capital_u {
		this.quirk_enabled_allow_backslash_capital_u = args.enabled
	} else if args.quirk == quirk_allow_backslash_e {
		this.quirk_enabled_allow_backslash_etc[2] = args.enabled
	} else if args.quirk == quirk_allow_backslash_question_mark {
		this.quirk_enabled_allow_backslash_etc[3] = args.enabled
	} else if args.quirk == quirk_allow_backslash_single_quote {
		this.quirk_enabled_allow_backslash_etc[4] = args.enabled
	} else if args.quirk == quirk_allow_backslash_v {
		this.quirk_enabled_allow_backslash_etc[5] = args.enabled
	} else if args.quirk == quirk_allow_backslash_x {
		this.quirk_enabled_allow_backslash_x = args.enabled
	} else if args.quirk == quirk_allow_backslash_zero {
		this.quirk_enabled_allow_backslash_etc[6] = args.enabled
	} else if args.quirk == quirk_allow_comment_block {
		this.quirk_enabled_allow_comment_block = args.enabled
	} else if args.quirk == quirk_allow_comment_line {
		this.quirk_enabled_allow_comment_line = args.enabled
	} else if args.quirk == quirk_allow_final_comma {
		this.quirk_enabled_allow_final_comma = args.enabled
	} else if args.quirk == quirk_allow_inf_nan_numbers {
		this.quirk_enabled_allow_inf_nan_numbers = args.enabled
	} else if args.quirk == quirk_allow_leading_ascii_record_separator {
		this.quirk_enabled_allow_leading_ascii_record_separator = args.enabled
	} else if args.quirk == quirk_allow_leading_unicode_byte_order_mark {
		this.quirk_enabled_allow_leading_unicode_byte_order_mark = args.enabled
	} else if args.quirk == quirk_allow_trailing_new_line {
		this.quirk_enabled_allow_trailing_new_line = args.enabled
	} else if args.quirk == quirk_replace_invalid_utf_8 {
		this.quirk_enabled_replace_invalid_utf_8 = args.enabled
	}
}

pub func decoder.decode_tokens?(dst: base.token_writer, src: base.io_reader) {
	var vminor            : base.u32[..= 0xFF_FFFF]
	var number_length     : base.u32[..= 0x3FF]
	var number_status     : base.u32[..= 0x3]
	var string_length     : base.u32[..= 0xFFFB]
	var whitespace_length : base.u32[..= 0xFFFE]
	var depth             : base.u32[..= 1024]
	var stack_byte        : base.u32[..= (1024 / 32) - 1]
	var stack_bit         : base.u32[..= 31]
	var match             : base.u32[..= 2]
	var c_by_4            : base.u32
	var c                 : base.u8
	var backslash         : base.u8
	var char              : base.u8
	var class             : base.u8[..= 0x0F]
	var multi_byte_utf8   : base.u32

	var backslash_x_length : base.u32[..= 0xFFFF]
	var backslash_x_ok     : base.u8
	var backslash_x_string : base.u32

	var uni4_ok             : base.u8
	var uni4_string         : base.u64
	var uni4_value          : base.u32[..= 0xFFFF]
	var uni4_high_surrogate : base.u32[..= 0x10_FC00]

	var uni8_ok     : base.u8
	var uni8_string : base.u64
	var uni8_value  : base.u32[..= 0xFFFF_FFFF]

	// expect is a bitmask of what the next character class can be.
	//
	// expect_after_value is what to expect after seeing a value (a literal,
	// number, string, array or object). For depth 0, this is ignored.
	// Otherwise, it should be (EXPECT_CLOSE_FOO | EXPECT_COMMA), for some
	// value of FOO.
	var expect             : base.u32
	var expect_after_value : base.u32

	while this.end_of_data {
		return base."@end of data"
	}

	if this.quirk_enabled_allow_leading_ascii_record_separator or
		this.quirk_enabled_allow_leading_unicode_byte_order_mark {
		this.decode_leading?(dst: args.dst, src: args.src)
	}

	expect = 0x1EB2  // EXPECT_VALUE

	while.outer true {
		while.goto_parsed_a_leaf_value true {
			if args.dst.available() <= 0 {
				yield? base."$short write"
				continue.outer
			}

			// Consume whitespace.
			whitespace_length = 0
			c = 0
			class = 0
			while.ws true,
				inv args.dst.available() > 0,
				post args.src.available() > 0,
			{
				if args.src.available() <= 0 {
					if whitespace_length > 0 {
						args.dst.write_fast_token!(
							value_major: 0,
							value_minor: 0,
							link: 0x0,
							length: whitespace_length)
						whitespace_length = 0
					}
					if args.src.is_closed() {
						return "#bad input"
					}
					yield? base."$short read"
					whitespace_length = 0
					continue.outer
				}

				c = args.src.peek_u8()
				class = lut_classes[c]
				if class <> 0x00 {  // 0x00 is CLASS_WHITESPACE.
					break.ws
				}
				args.src.skip32_fast!(actual: 1, worst_case: 1)

				if whitespace_length >= 0xFFFE {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0,
						link: 0x0,
						length: 0xFFFF)
					whitespace_length = 0
					continue.outer
				}
				whitespace_length += 1
			} endwhile.ws

			// Emit whitespace.
			if whitespace_length > 0 {
				args.dst.write_fast_token!(
					value_major: 0,
					value_minor: 0,
					link: 0x0,
					length: whitespace_length)
				whitespace_length = 0
				if args.dst.available() <= 0 {
					continue.outer
				}
			}

			// Check expected character classes.
			if 0 == (expect & ((1 as base.u32) << class)) {
				return "#bad input"
			}

			// These assertions are redundant (the Wuffs compiler should
			// already know these facts; deleting these assertions should still
			// compile) but are listed explicitly to guard against future edits
			// to the code above inadvertently invalidating these assertions.
			assert args.dst.available() > 0
			assert args.src.available() > 0

			if class == 0x01 {  // 0x01 is CLASS_STRING.
				// -------- BEGIN parse strings.
				// Emit the leading '"'.
				args.dst.write_fast_token!(
					value_major: 0,
					value_minor: 0x40_0013,
					link: 0x1,
					length: 1)
				args.src.skip32_fast!(actual: 1, worst_case: 1)

				while.string_loop_outer true {
					if args.dst.available() <= 0 {
						yield? base."$short write"
						continue.string_loop_outer
					}

					string_length = 0
					while.string_loop_inner true,
						pre args.dst.available() > 0,
					{
						if args.src.available() <= 0 {
							if string_length > 0 {
								args.dst.write_fast_token!(
									value_major: 0,
									value_minor: 0x40_0021,
									link: 0x3,
									length: string_length)
								string_length = 0
							}
							if args.src.is_closed() {
								return "#bad input"
							}
							yield? base."$short read"
							string_length = 0
							continue.string_loop_outer
						}

						// As an optimization, consume non-special ASCII 4 bytes at a time.
						while args.src.available() > 4,
							inv args.dst.available() > 0,
							inv args.src.available() > 0,
						{
							c_by_4 = args.src.peek_u32le()
							if 0x00 <> (lut_chars[0xFF & (c_by_4 >> 0)] |
								lut_chars[0xFF & (c_by_4 >> 8)] |
								lut_chars[0xFF & (c_by_4 >> 16)] |
								lut_chars[0xFF & (c_by_4 >> 24)]) {
								break
							}
							args.src.skip32_fast!(actual: 4, worst_case: 4)
							if string_length > (0xFFFB - 4) {
								args.dst.write_fast_token!(
									value_major: 0,
									value_minor: 0x40_0021,
									link: 0x3,
									length: string_length + 4)
								string_length = 0
								continue.string_loop_outer
							}
							string_length += 4
						}

						c = args.src.peek_u8()
						char = lut_chars[c]

						if char == 0x00 {  // Non-special ASCII.
							args.src.skip32_fast!(actual: 1, worst_case: 1)
							if string_length >= 0xFFFB {
								args.dst.write_fast_token!(
									value_major: 0,
									value_minor: 0x40_0021,
									link: 0x3,
									length: 0xFFFC)
								string_length = 0
								continue.string_loop_outer
							}
							string_length += 1
							continue.string_loop_inner

						} else if char == 0x01 {  // '"'
							if string_length <> 0 {
								args.dst.write_fast_token!(
									value_major: 0,
									value_minor: 0x40_0021,
									link: 0x3,
									length: string_length)
								string_length = 0
							}
							break.string_loop_outer

						} else if char == 0x02 {  // '\\'.
							if string_length > 0 {
								args.dst.write_fast_token!(
									value_major: 0,
									value_minor: 0x40_0021,
									link: 0x3,
									length: string_length)
								string_length = 0
								if args.dst.available() <= 0 {
									continue.string_loop_outer
								}
							}
							assert args.dst.available() > 0

							if args.src.available() < 2 {
								if args.src.is_closed() {
									return "#bad backslash-escape"
								}
								yield? base."$short read"
								string_length = 0
								char = 0
								continue.string_loop_outer
							}
							c = (args.src.peek_u16le() >> 8) as base.u8
							backslash = lut_backslashes[c]
							if (backslash & 0x80) <> 0 {
								args.src.skip32_fast!(actual: 2, worst_case: 2)
								args.dst.write_fast_token!(
									value_major: 0,
									value_minor: 0x60_0000 | ((backslash & 0x7F) as base.u32),
									link: 0x3,
									length: 2)
								continue.string_loop_outer

							} else if backslash <> 0 {
								if this.quirk_enabled_allow_backslash_etc[backslash & 7] {
									args.src.skip32_fast!(actual: 2, worst_case: 2)
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x60_0000 | (lut_quirky_backslashes[backslash & 7] as base.u32),
										link: 0x3,
										length: 2)
									continue.string_loop_outer
								}

							} else if c == 0x75 {  // 0x75 is 'u'.
								// -------- BEGIN backslash-u.
								if args.src.available() < 6 {
									if args.src.is_closed() {
										return "#bad backslash-escape"
									}
									yield? base."$short read"
									string_length = 0
									char = 0
									continue.string_loop_outer
								}

								uni4_string = args.src.peek_u48le_as_u64() >> 16
								uni4_value = 0
								uni4_ok = 0x80

								c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
								uni4_ok &= c
								uni4_value |= ((c & 0x0F) as base.u32) << 12
								c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
								uni4_ok &= c
								uni4_value |= ((c & 0x0F) as base.u32) << 8
								c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
								uni4_ok &= c
								uni4_value |= ((c & 0x0F) as base.u32) << 4
								c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
								uni4_ok &= c
								uni4_value |= ((c & 0x0F) as base.u32) << 0

								if uni4_ok == 0 {
									// It wasn't 4 hexadecimal digits. No-op
									// (and fall through to "#bad
									// backslash-escape").

								} else if (uni4_value < 0xD800) or (0xDFFF < uni4_value) {
									// Not a Unicode surrogate. We're good.
									args.src.skip32_fast!(actual: 6, worst_case: 6)
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x60_0000 | uni4_value,
										link: 0x3,
										length: 6)
									continue.string_loop_outer

								} else if uni4_value >= 0xDC00 {
									// Low surrogate. No-op (and fall through
									// to "#bad backslash-escape").

								} else {
									// High surrogate, which needs to be
									// followed by a "\\u1234" low surrogate.
									// We've already peeked 6 bytes for the
									// high surrogate. We need 12 in total:
									// another 8 bytes at an offset of 4.
									if args.src.available() < 12 {
										if args.src.is_closed() {
											if this.quirk_enabled_replace_invalid_utf_8 {
												args.src.skip32_fast!(actual: 6, worst_case: 6)
												args.dst.write_fast_token!(
													value_major: 0,
													value_minor: 0x60_FFFD,
													link: 0x3,
													length: 6)
												continue.string_loop_outer
											}
											return "#bad backslash-escape"
										}
										yield? base."$short read"
										string_length = 0
										uni4_value = 0
										char = 0
										continue.string_loop_outer
									}
									uni4_string = args.src.peek_u64le_at(offset: 4) >> 16

									// Look for the low surrogate's "\\u".
									if ((0xFF & (uni4_string >> 0)) <> 0x5C) or
										((0xFF & (uni4_string >> 8)) <> 0x75) {
										uni4_high_surrogate = 0
										uni4_value = 0
										uni4_ok = 0
									} else {
										uni4_high_surrogate =
											0x1_0000 + ((uni4_value - 0xD800) << 10)
										uni4_value = 0
										uni4_ok = 0x80
										uni4_string >>= 16

										c = lut_hexadecimal_digits[0xFF & (uni4_string >> 0)]
										uni4_ok &= c
										uni4_value |= ((c & 0x0F) as base.u32) << 12
										c = lut_hexadecimal_digits[0xFF & (uni4_string >> 8)]
										uni4_ok &= c
										uni4_value |= ((c & 0x0F) as base.u32) << 8
										c = lut_hexadecimal_digits[0xFF & (uni4_string >> 16)]
										uni4_ok &= c
										uni4_value |= ((c & 0x0F) as base.u32) << 4
										c = lut_hexadecimal_digits[0xFF & (uni4_string >> 24)]
										uni4_ok &= c
										uni4_value |= ((c & 0x0F) as base.u32) << 0
									}

									if (uni4_ok <> 0) and
										(0xDC00 <= uni4_value) and (uni4_value <= 0xDFFF) {

										// Emit a single token for the surrogate pair.
										uni4_value -= 0xDC00
										args.src.skip32_fast!(actual: 12, worst_case: 12)
										args.dst.write_fast_token!(
											value_major: 0,
											value_minor: 0x60_0000 | uni4_high_surrogate | uni4_value,
											link: 0x3,
											length: 12)
										continue.string_loop_outer
									}
								}

								if this.quirk_enabled_replace_invalid_utf_8 {
									if args.src.available() < 6 {
										return "#internal error: inconsistent I/O"
									}
									args.src.skip32_fast!(actual: 6, worst_case: 6)
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x60_FFFD,
										link: 0x3,
										length: 6)
									continue.string_loop_outer
								}
								// -------- END   backslash-u.

							} else if (c == 0x55) and
								this.quirk_enabled_allow_backslash_capital_u {  // 0x55 is 'U'.
								// -------- BEGIN backslash-capital-u.
								if args.src.available() < 10 {
									if args.src.is_closed() {
										return "#bad backslash-escape"
									}
									yield? base."$short read"
									string_length = 0
									char = 0
									continue.string_loop_outer
								}
								uni8_string = args.src.peek_u64le_at(offset: 2)
								uni8_value = 0
								uni8_ok = 0x80

								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 0)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 28
								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 8)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 24
								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 16)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 20
								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 24)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 16
								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 32)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 12
								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 40)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 8
								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 48)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 4
								c = lut_hexadecimal_digits[0xFF & (uni8_string >> 56)]
								uni8_ok &= c
								uni8_value |= ((c & 0x0F) as base.u32) << 0

								if uni8_ok == 0 {
									// It wasn't 8 hexadecimal digits. No-op
									// (and fall through to "#bad
									// backslash-escape").

								} else if (uni8_value < 0xD800) or (
									(0xDFFF < uni8_value) and (uni8_value <= 0x10_FFFF)) {
									// Not a Unicode surrogate. We're good.
									args.src.skip32_fast!(actual: 10, worst_case: 10)
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x60_0000 | (uni8_value & 0x1F_FFFF),
										link: 0x3,
										length: 10)
									continue.string_loop_outer
								} else if this.quirk_enabled_replace_invalid_utf_8 {
									args.src.skip32_fast!(actual: 10, worst_case: 10)
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x60_FFFD,
										link: 0x3,
										length: 6)
									continue.string_loop_outer
								}
								// -------- END   backslash-capital-u.

							} else if (c == 0x78) and
								this.quirk_enabled_allow_backslash_x {  // 0x78 is 'x'.
								// -------- BEGIN backslash-x
								if args.src.available() < 4 {
									if args.src.is_closed() {
										return "#bad backslash-escape"
									}
									yield? base."$short read"
									string_length = 0
									char = 0
									continue.string_loop_outer
								}

								backslash_x_length = 0
								while (backslash_x_length <= 0xFFFB) and (args.src.available() >= 4),
									inv args.dst.available() > 0,
								{
									backslash_x_string = args.src.peek_u32le()
									backslash_x_ok = 0x80

									c = lut_hexadecimal_digits[0xFF & (backslash_x_string >> 16)]
									backslash_x_ok &= c
									c = lut_hexadecimal_digits[0xFF & (backslash_x_string >> 24)]
									backslash_x_ok &= c

									if (backslash_x_ok == 0) or
										((backslash_x_string & 0xFFFF) <> 0x785C) {
										// It wasn't "\\x34", for some
										// hexadecimal digits "34".
										break
									}
									args.src.skip32_fast!(actual: 4, worst_case: 4)
									backslash_x_length += 4
								}

								if backslash_x_length == 0 {
									return "#bad backslash-escape"
								}
								args.dst.write_fast_token!(
									value_major: 0,
									value_minor: 0x40_0080,
									link: 0x3,
									length: backslash_x_length)
								continue.string_loop_outer
								// -------- END   backslash-x
							}

							return "#bad backslash-escape"

						} else if char == 0x03 {  // 2-byte UTF-8.
							if args.src.available() < 2 {
								if string_length > 0 {
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x40_0021,
										link: 0x3,
										length: string_length)
									string_length = 0
									if args.dst.available() <= 0 {
										continue.string_loop_outer
									}
								}
								if args.src.is_closed() {
									if this.quirk_enabled_replace_invalid_utf_8 {
										args.dst.write_fast_token!(
											value_major: 0,
											value_minor: 0x60_FFFD,
											link: 0x3,
											length: 1)
										args.src.skip32_fast!(actual: 1, worst_case: 1)
										continue.string_loop_inner
									}
									return "#bad UTF-8"
								}
								yield? base."$short read"
								string_length = 0
								char = 0
								continue.string_loop_outer
							}
							multi_byte_utf8 = args.src.peek_u16le_as_u32()
							if (multi_byte_utf8 & 0xC000) == 0x8000 {
								multi_byte_utf8 = (0x00_07C0 & (multi_byte_utf8 ~mod<< 6)) |
									(0x00_003F & (multi_byte_utf8 >> 8))
								args.src.skip32_fast!(actual: 2, worst_case: 2)
								if string_length >= 0xFFF8 {
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x40_0021,
										link: 0x3,
										length: string_length + 2)
									string_length = 0
									continue.string_loop_outer
								}
								string_length += 2
								continue.string_loop_inner
							}

						} else if char == 0x04 {  // 3-byte UTF-8.
							if args.src.available() < 3 {
								if string_length > 0 {
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x40_0021,
										link: 0x3,
										length: string_length)
									string_length = 0
									if args.dst.available() <= 0 {
										continue.string_loop_outer
									}
								}
								if args.src.is_closed() {
									if this.quirk_enabled_replace_invalid_utf_8 {
										args.dst.write_fast_token!(
											value_major: 0,
											value_minor: 0x60_FFFD,
											link: 0x3,
											length: 1)
										args.src.skip32_fast!(actual: 1, worst_case: 1)
										continue.string_loop_inner
									}
									return "#bad UTF-8"
								}
								yield? base."$short read"
								string_length = 0
								char = 0
								continue.string_loop_outer
							}
							multi_byte_utf8 = args.src.peek_u24le_as_u32()
							if (multi_byte_utf8 & 0xC0_C000) == 0x80_8000 {
								multi_byte_utf8 = (0x00_F000 & (multi_byte_utf8 ~mod<< 12)) |
									(0x00_0FC0 & (multi_byte_utf8 >> 2)) |
									(0x00_003F & (multi_byte_utf8 >> 16))
								if (0x07FF < multi_byte_utf8) and
									((multi_byte_utf8 < 0xD800) or (0xDFFF < multi_byte_utf8)) {

									args.src.skip32_fast!(actual: 3, worst_case: 3)
									if string_length >= 0xFFF8 {
										args.dst.write_fast_token!(
											value_major: 0,
											value_minor: 0x40_0021,
											link: 0x3,
											length: string_length + 3)
										string_length = 0
										continue.string_loop_outer
									}
									string_length += 3
									continue.string_loop_inner
								}
							}

						} else if char == 0x05 {  // 4-byte UTF-8.
							if args.src.available() < 4 {
								if string_length > 0 {
									args.dst.write_fast_token!(
										value_major: 0,
										value_minor: 0x40_0021,
										link: 0x3,
										length: string_length)
									string_length = 0
									if args.dst.available() <= 0 {
										continue.string_loop_outer
									}
								}
								if args.src.is_closed() {
									if this.quirk_enabled_replace_invalid_utf_8 {
										args.dst.write_fast_token!(
											value_major: 0,
											value_minor: 0x60_FFFD,
											link: 0x3,
											length: 1)
										args.src.skip32_fast!(actual: 1, worst_case: 1)
										continue.string_loop_inner
									}
									return "#bad UTF-8"
								}
								yield? base."$short read"
								string_length = 0
								char = 0
								continue.string_loop_outer
							}
							multi_byte_utf8 = args.src.peek_u32le()
							if (multi_byte_utf8 & 0xC0C0_C000) == 0x8080_8000 {
								multi_byte_utf8 = (0x1C_0000 & (multi_byte_utf8 ~mod<< 18)) |
									(0x03_F000 & (multi_byte_utf8 ~mod<< 4)) |
									(0x00_0FC0 & (multi_byte_utf8 >> 10)) |
									(0x00_003F & (multi_byte_utf8 >> 24))
								if (0xFFFF < multi_byte_utf8) and (multi_byte_utf8 <= 0x10_FFFF) {
									args.src.skip32_fast!(actual: 4, worst_case: 4)
									if string_length >= 0xFFF8 {
										args.dst.write_fast_token!(
											value_major: 0,
											value_minor: 0x40_0021,
											link: 0x3,
											length: string_length + 4)
										string_length = 0
										continue.string_loop_outer
									}
									string_length += 4
									continue.string_loop_inner
								}
							}
						}

						if string_length > 0 {
							args.dst.write_fast_token!(
								value_major: 0,
								value_minor: 0x40_0021,
								link: 0x3,
								length: string_length)
							string_length = 0
							if args.dst.available() <= 0 {
								continue.string_loop_outer
							}
						}
						if char == 0x80 {
							return "#bad C0 control code"
						}
						if this.quirk_enabled_replace_invalid_utf_8 {
							args.dst.write_fast_token!(
								value_major: 0,
								value_minor: 0x60_FFFD,
								link: 0x3,
								length: 1)
							args.src.skip32_fast!(actual: 1, worst_case: 1)
							continue.string_loop_inner
						}
						return "#bad UTF-8"
					} endwhile.string_loop_inner
				} endwhile.string_loop_outer

				// Emit the trailing '"'.
				while true {
					if args.src.available() <= 0 {
						if args.src.is_closed() {
							return "#bad input"
						}
						yield? base."$short read"
						continue
					}
					if args.dst.available() <= 0 {
						yield? base."$short write"
						continue
					}
					args.src.skip32_fast!(actual: 1, worst_case: 1)
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x40_0013,
						link: 0x2,
						length: 1)
					break
				}

				// As above, expect must have contained EXPECT_STRING. If it
				// didn't also contain EXPECT_NUMBER then we were parsing an
				// object key and the next token should be ':'.
				if 0 == (expect & 0x0010) {  // 0x0010 is (1 << CLASS_NUMBER).
					expect = 0x1008  // 0x1008 is EXPECT_COLON.
					continue.outer
				}
				break.goto_parsed_a_leaf_value
				// -------- END   parse strings.

			} else if class == 0x02 {  // 0x02 is CLASS_COMMA.
				args.src.skip32_fast!(actual: 1, worst_case: 1)
				// The ',' is filler.
				args.dst.write_fast_token!(
					value_major: 0,
					value_minor: 0,
					link: 0x0,
					length: 1)
				// What's valid after a comma depends on whether or not we're
				// in an array or an object.
				if 0 == (expect & 0x0100) {  // 0x0100 is (1 << CLASS_CLOSE_SQUARE_BRACKET).
					expect = 0x1002  // 0x1002 is EXPECT_STRING.
				} else {
					expect = 0x1EB2  // 0x0EB2 is EXPECT_VALUE.
				}
				continue.outer

			} else if class == 0x03 {  // 0x03 is CLASS_COLON.
				args.src.skip32_fast!(actual: 1, worst_case: 1)
				// The ':' is filler.
				args.dst.write_fast_token!(
					value_major: 0,
					value_minor: 0,
					link: 0x0,
					length: 1)
				expect = 0x1EB2  // 0x1EB2 is EXPECT_VALUE.
				continue.outer

			} else if class == 0x04 {  // 0x04 is CLASS_NUMBER.
				// -------- BEGIN parse numbers.
				while true,
					pre args.dst.available() > 0,
				{
					number_length = this.decode_number!(src: args.src)
					number_status = number_length >> 8
					vminor = 0xA0_0403
					if (number_length & 0x80) <> 0 {
						vminor = 0xA0_0401
					}
					number_length = number_length & 0x7F
					if number_status == 0 {
						args.dst.write_fast_token!(
							value_major: 0,
							value_minor: vminor,
							link: 0x0,
							length: number_length)
						break
					}

					while number_length > 0 {
						number_length -= 1
						if args.src.can_undo_byte() {
							args.src.undo_byte!()
						} else {
							return "#internal error: inconsistent I/O"
						}
					}

					if number_status == 1 {
						return "#bad input"
					} else if number_status == 2 {
						return "#unsupported number length"
					} else {
						yield? base."$short read"
						while args.dst.available() <= 0,
							post args.dst.available() > 0,
						{
							yield? base."$short write"
						}
					}
				}
				break.goto_parsed_a_leaf_value
				// -------- END   parse numbers.

			} else if class == 0x05 {  // 0x05 is CLASS_OPEN_CURLY_BRACE.
				vminor = 0x20_4011
				if depth == 0 {
					// No-op.
				} else if 0 <> (expect_after_value & 0x0040) {  // 0x0040 is (1 << CLASS_CLOSE_CURLY_BRACE).
					vminor = 0x20_4041
				} else {
					vminor = 0x20_4021
				}
				if depth >= 1024 {
					return "#unsupported recursion depth"
				}
				stack_byte = depth / 32
				stack_bit = depth & 31
				this.stack[stack_byte] |= (1 as base.u32) << stack_bit
				depth += 1

				args.src.skip32_fast!(actual: 1, worst_case: 1)
				args.dst.write_fast_token!(
					value_major: 0,
					value_minor: vminor,
					link: 0x0,
					length: 1)
				expect = 0x1042  // 0x1042 is (EXPECT_CLOSE_CURLY_BRACE | EXPECT_STRING).
				expect_after_value = 0x1044  // 0x1044 is (EXPECT_CURLY_CLOSE_BRACE | EXPECT_COMMA).
				continue.outer

			} else if class == 0x06 {  // 0x06 is CLASS_CLOSE_CURLY_BRACE.
				args.src.skip32_fast!(actual: 1, worst_case: 1)
				if depth <= 1 {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x20_1042,
						link: 0x0,
						length: 1)
					break.outer
				}
				depth -= 1
				stack_byte = (depth - 1) / 32
				stack_bit = (depth - 1) & 31
				if 0 == (this.stack[stack_byte] & ((1 as base.u32) << stack_bit)) {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x20_2042,
						link: 0x0,
						length: 1)
					// 0x1104 is (EXPECT_SQUARE_CLOSE_BRACKET | EXPECT_COMMA).
					expect = 0x1104
					expect_after_value = 0x1104
				} else {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x20_4042,
						link: 0x0,
						length: 1)
					// 0x1044 is (EXPECT_CURLY_CLOSE_BRACE | EXPECT_COMMA).
					expect = 0x1044
					expect_after_value = 0x1044
				}
				continue.outer

			} else if class == 0x07 {  // 0x07 is CLASS_OPEN_SQUARE_BRACKET.
				vminor = 0x20_2011
				if depth == 0 {
					// No-op.
				} else if 0 <> (expect_after_value & 0x0040) {  // 0x0040 is (1 << CLASS_CLOSE_CURLY_BRACE).
					vminor = 0x20_2041
				} else {
					vminor = 0x20_2021
				}
				if depth >= 1024 {
					return "#unsupported recursion depth"
				}
				stack_byte = depth / 32
				stack_bit = depth & 31
				this.stack[stack_byte] &= 0xFFFF_FFFF ^ ((1 as base.u32) << stack_bit)
				depth += 1

				args.src.skip32_fast!(actual: 1, worst_case: 1)
				args.dst.write_fast_token!(
					value_major: 0,
					value_minor: vminor,
					link: 0x0,
					length: 1)
				expect = 0x1FB2  // 0x1FB2 is (EXPECT_CLOSE_SQUARE_BRACKET | EXPECT_VALUE).
				expect_after_value = 0x1104  // 0x1104 is (EXPECT_CLOSE_SQUARE_BRACKET | EXPECT_COMMA).
				continue.outer

			} else if class == 0x08 {  // 0x08 is CLASS_CLOSE_SQUARE_BRACKET.
				args.src.skip32_fast!(actual: 1, worst_case: 1)
				if depth <= 1 {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x20_1022,
						link: 0x0,
						length: 1)
					break.outer
				}
				depth -= 1
				stack_byte = (depth - 1) / 32
				stack_bit = (depth - 1) & 31
				if 0 == (this.stack[stack_byte] & ((1 as base.u32) << stack_bit)) {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x20_2022,
						link: 0x0,
						length: 1)
					// 0x1104 is (EXPECT_CLOSE_SQUARE_BRACKET | EXPECT_COMMA).
					expect = 0x1104
					expect_after_value = 0x1104
				} else {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x20_4022,
						link: 0x0,
						length: 1)
					// 0x1044 is (EXPECT_CLOSE_CURLY_BRACE | EXPECT_COMMA).
					expect = 0x1044
					expect_after_value = 0x1044
				}
				continue.outer

			} else if class == 0x09 {  // 0x09 is CLASS_FALSE.
				match = args.src.match7(a: 0x6573_6C61_6605)  // 5 bytes "false".
				if match == 0 {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x80_0004,
						link: 0x0,
						length: 5)
					if args.src.available() < 5 {
						return "#internal error: inconsistent I/O"
					}
					args.src.skip32_fast!(actual: 5, worst_case: 5)
					break.goto_parsed_a_leaf_value
				} else if match == 1 {
					yield? base."$short read"
					continue.outer
				}

			} else if class == 0x0A {  // 0x0A is CLASS_TRUE.
				match = args.src.match7(a: 0x65_7572_7404)  // 4 bytes "true".
				if match == 0 {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x80_0008,
						link: 0x0,
						length: 4)
					if args.src.available() < 4 {
						return "#internal error: inconsistent I/O"
					}
					args.src.skip32_fast!(actual: 4, worst_case: 4)
					break.goto_parsed_a_leaf_value
				} else if match == 1 {
					yield? base."$short read"
					continue.outer
				}

			} else if class == 0x0B {  // 0x0B is CLASS_NULL.
				match = args.src.match7(a: 0x6C_6C75_6E04)  // 4 bytes "null".
				if match == 0 {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x80_0002,
						link: 0x0,
						length: 4)
					if args.src.available() < 4 {
						return "#internal error: inconsistent I/O"
					}
					args.src.skip32_fast!(actual: 4, worst_case: 4)
					break.goto_parsed_a_leaf_value
				} else if match == 1 {
					yield? base."$short read"
					continue.outer
				}

			} else if class == 0x0C {  // 0x0C is CLASS_COMMENT.
				if this.quirk_enabled_allow_comment_block or this.quirk_enabled_allow_comment_line {
					this.decode_comment?(dst: args.dst, src: args.src)
					continue.outer
				}
			}

			return "#bad input"
		} endwhile.goto_parsed_a_leaf_value

		// We've just parsed a leaf (non-container) value: literal (null,
		// false, true), number or string.
		if depth == 0 {
			break.outer
		}
		expect = expect_after_value
	} endwhile.outer

	if this.quirk_enabled_allow_trailing_new_line {
		this.decode_trailing_new_line?(dst: args.dst, src: args.src)
	}

	this.end_of_data = true
}

pri func decoder.decode_number!(src: base.io_reader) base.u32[..= 0x3FF] {
	var c              : base.u8
	var n              : base.u32[..= 0x3FF]
	var floating_point : base.u32[..= 0x80]

	while.goto_done true {
		n = 0

		// Peek.
		if args.src.available() <= 0 {
			if not args.src.is_closed() {
				n |= 0x300
			}
			break.goto_done
		}
		c = args.src.peek_u8()

		// Scan the optional minus sign.
		if c <> 0x2D {  // 0x2D is '-'.
			assert args.src.available() > 0
			assert n <= 1
		} else {
			n += 1
			args.src.skip32_fast!(actual: 1, worst_case: 1)

			// Peek.
			if args.src.available() <= 0 {
				if not args.src.is_closed() {
					n |= 0x300
				}
				n |= 0x100  // A '-' without digits is invalid.
				break.goto_done
			}
			c = args.src.peek_u8()

			assert args.src.available() > 0
			assert n <= 1
		}

		// Scan the opening digits.
		if c == 0x30 {  // 0x30 is '0'.
			n += 1
			args.src.skip32_fast!(actual: 1, worst_case: 1)
			assert n <= 99
		} else {
			n = this.decode_digits!(src: args.src, n: n)
			if n > 99 {
				break.goto_done
			}
			assert n <= 99
		}

		// Peek.
		if args.src.available() <= 0 {
			if not args.src.is_closed() {
				n |= 0x300
			}
			break.goto_done
		}
		c = args.src.peek_u8()

		// Scan the optional fraction.
		if c <> 0x2E {  // 0x2E is '.'.
			assert args.src.available() > 0
			assert n <= 99
		} else {
			if n >= 99 {
				n |= 0x200
				break.goto_done
			}
			n += 1
			args.src.skip32_fast!(actual: 1, worst_case: 1)
			floating_point = 0x80

			n = this.decode_digits!(src: args.src, n: n)
			if n > 99 {
				break.goto_done
			}

			// Peek.
			if args.src.available() <= 0 {
				if not args.src.is_closed() {
					n |= 0x300
				}
				break.goto_done
			}
			c = args.src.peek_u8()

			assert args.src.available() > 0
			assert n <= 99
		}

		// Scan the optional 'E' or 'e'.
		if (c <> 0x45) and (c <> 0x65) {  // 0x45 and 0x65 are 'E' and 'e'.
			break.goto_done
		}
		if n >= 99 {
			n |= 0x200
			break.goto_done
		}
		n += 1
		args.src.skip32_fast!(actual: 1, worst_case: 1)
		floating_point = 0x80
		assert n <= 99

		// Peek.
		if args.src.available() <= 0 {
			if not args.src.is_closed() {
				n |= 0x300
			}
			n |= 0x100  // An 'E' or 'e' without digits is invalid.
			break.goto_done
		}
		c = args.src.peek_u8()

		// Scan the optional '+' or '-'.
		if (c <> 0x2B) and (c <> 0x2D) {  // 0x2B and 0x2D are '+' and '-'.
			assert n <= 99
		} else {
			if n >= 99 {
				n |= 0x200
				break.goto_done
			}
			n += 1
			args.src.skip32_fast!(actual: 1, worst_case: 1)
			assert n <= 99
		}

		// Scan the exponent digits.
		n = this.decode_digits!(src: args.src, n: n)

		break.goto_done
	} endwhile.goto_done
	return n | floating_point
}

pri func decoder.decode_digits!(src: base.io_reader, n: base.u32[..= 99]) base.u32[..= 0x3FF] {
	var c : base.u8
	var n : base.u32[..= 0x3FF]

	n = args.n
	while true {
		if args.src.available() <= 0 {
			if not args.src.is_closed() {
				n |= 0x300
			}
			break
		}
		c = args.src.peek_u8()
		if 0x00 == lut_decimal_digits[c] {
			break
		}
		// Cap decoder_number_length_max_incl at an arbitrary value, 99. The
		// caller's src.data.len should therefore be at least 100, also known
		// as decoder_src_io_buffer_length_min_incl.
		//
		// An example of a JSON number that is 81 bytes long is:
		// https://github.com/nst/JSONTestSuite/blob/master/test_parsing/y_number_double_close_to_zero.json
		//
		// Note that 99 (in hex, 0x63) is less than 0x80, so we can use 0x80 as
		// a flag bit in func decoder.decode_number.
		if n >= 99 {
			n |= 0x200
			break
		}
		n += 1
		args.src.skip32_fast!(actual: 1, worst_case: 1)
	}
	if n == args.n {
		n |= 0x100
	}
	return n
}

pri func decoder.decode_leading?(dst: base.token_writer, src: base.io_reader) {
	var c : base.u8
	var u : base.u32

	this.allow_leading_ars = this.quirk_enabled_allow_leading_ascii_record_separator
	this.allow_leading_ubom = this.quirk_enabled_allow_leading_unicode_byte_order_mark
	while this.allow_leading_ars or this.allow_leading_ubom {
		if args.dst.available() <= 0 {
			yield? base."$short write"
			continue
		}
		if args.src.available() <= 0 {
			if args.src.is_closed() {
				break
			}
			yield? base."$short read"
			continue
		}
		c = args.src.peek_u8()
		if (c == 0x1E) and this.allow_leading_ars {
			this.allow_leading_ars = false
			args.src.skip32_fast!(actual: 1, worst_case: 1)
			args.dst.write_fast_token!(
				value_major: 0, value_minor: 0, link: 0x0, length: 1)
			continue
		} else if (c == 0xEF) and this.allow_leading_ubom {
			if args.src.available() < 3 {
				if args.src.is_closed() {
					break
				}
				yield? base."$short read"
				continue
			}
			u = args.src.peek_u24le_as_u32()
			if u == 0xBF_BBEF {
				this.allow_leading_ubom = false
				args.src.skip32_fast!(actual: 3, worst_case: 3)
				args.dst.write_fast_token!(
					value_major: 0, value_minor: 0, link: 0x0, length: 3)
				continue
			}
		}
		break
	}
}

pri func decoder.decode_comment?(dst: base.token_writer, src: base.io_reader) {
	var c8        : base.u8
	var c16       : base.u16
	var link_prev : base.u32[..= 0x2]
	var length    : base.u32[..= 0xFFFD]

	while (args.dst.available() <= 0) or (args.src.available() <= 1),
		post args.dst.available() > 0,
		post args.src.available() > 1,
	{
		if args.dst.available() <= 0 {
			yield? base."$short write"
			continue
		}
		if args.src.is_closed() {
			return "#bad input"
		}
		yield? base."$short read"
	}
	c16 = args.src.peek_u16le()

	if (c16 == 0x2A2F) and this.quirk_enabled_allow_comment_block {
		args.src.skip32_fast!(actual: 2, worst_case: 2)
		length = 2

		while.comment_block true {
			if args.dst.available() <= 0 {
				yield? base."$short write"
				length = 0
				continue.comment_block
			}

			while true,
				pre args.dst.available() > 0,
			{
				if args.src.available() <= 1 {
					if length > 0 {
						args.dst.write_fast_token!(
							value_major: 0,
							value_minor: 0x2,
							link: 0x1 | link_prev,
							length: length)
						link_prev = 0x2
					}
					if args.src.is_closed() {
						return "#bad input"
					}
					yield? base."$short read"
					length = 0
					continue.comment_block
				}

				c16 = args.src.peek_u16le()
				if c16 == 0x2F2A {  // 0x2F2A is "*/" little-endian.
					args.src.skip32_fast!(actual: 2, worst_case: 2)
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x2,
						link: link_prev,
						length: length + 2)
					return ok
				}

				args.src.skip32_fast!(actual: 1, worst_case: 1)
				if length >= 0xFFFD {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x2,
						link: 0x1 | link_prev,
						length: length + 1)
					length = 0
					link_prev = 0x2
					continue.comment_block
				}
				length += 1
			}
		} endwhile.comment_block

	} else if (c16 == 0x2F2F) and this.quirk_enabled_allow_comment_line {
		args.src.skip32_fast!(actual: 2, worst_case: 2)
		length = 2

		while.comment_line true {
			if args.dst.available() <= 0 {
				yield? base."$short write"
				length = 0
				continue.comment_line
			}

			while true,
				pre args.dst.available() > 0,
			{
				if args.src.available() <= 0 {
					if length > 0 {
						args.dst.write_fast_token!(
							value_major: 0,
							value_minor: 0x1,
							link: 0x1 | link_prev,
							length: length)
						link_prev = 0x2
					}
					if args.src.is_closed() {
						return "#bad input"
					}
					yield? base."$short read"
					length = 0
					continue.comment_line
				}

				c8 = args.src.peek_u8()
				if c8 == 0x0A {  // 0x0A is '\n'.
					args.src.skip32_fast!(actual: 1, worst_case: 1)
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x1,
						link: link_prev,
						length: length + 1)
					return ok
				}

				args.src.skip32_fast!(actual: 1, worst_case: 1)
				if length >= 0xFFFD {
					args.dst.write_fast_token!(
						value_major: 0,
						value_minor: 0x1,
						link: 0x1 | link_prev,
						length: length + 1)
					length = 0
					link_prev = 0x2
					continue.comment_line
				}
				length += 1
			}
		} endwhile.comment_line
	}

	return "#bad input"
}

pri func decoder.decode_trailing_new_line?(dst: base.token_writer, src: base.io_reader) {
	var c                 : base.u8
	var whitespace_length : base.u32[..= 0xFFFE]

	while.outer true {
		if args.dst.available() <= 0 {
			yield? base."$short write"
			whitespace_length = 0
			continue.outer
		}

		while.inner true,
			pre args.dst.available() > 0,
		{
			if args.src.available() <= 0 {
				if whitespace_length > 0 {
					args.dst.write_fast_token!(
						value_major: 0, value_minor: 0, link: 0x0, length: whitespace_length)
					whitespace_length = 0
				}
				if args.src.is_closed() {
					break.outer
				}
				yield? base."$short read"
				whitespace_length = 0
				continue.outer
			}

			c = args.src.peek_u8()
			if lut_classes[c] <> 0x00 {  // 0x00 is CLASS_WHITESPACE.
				if whitespace_length > 0 {
					args.dst.write_fast_token!(
						value_major: 0, value_minor: 0, link: 0x0, length: whitespace_length)
					whitespace_length = 0
				}
				return "#bad input"
			}

			args.src.skip32_fast!(actual: 1, worst_case: 1)
			if (whitespace_length >= 0xFFFE) or (c == 0x0A) {
				args.dst.write_fast_token!(
					value_major: 0, value_minor: 0, link: 0x0, length: whitespace_length + 1)
				whitespace_length = 0
				if c == 0x0A {
					break.outer
				}
				continue.outer
			}
			whitespace_length += 1
		} endwhile.inner
	} endwhile.outer

}
