// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// TODO: "decoder implements base.token_decoder".

pub status "#bad input"
pub status "#unsupported recursion depth"

pub struct decoder?(
)(
	// stack is conceptually an array of bits, implemented as an array of u32.
	// The N'th bit being 0 or 1 means that we're in an array or object, where
	// N is the recursion depth.
	//
	// Parsing JSON involves recursion: containers (arrays and objects) can
	// hold other containers. As child elements are completed, the parser needs
	// to remember 1 bit of state per recursion depth: whether the parent
	// container was an array or an object. When continuing to parse the
	// parent's elements, `, "key": value` is only valid for objects.
	//
	// Note that we explicitly track our own stack and depth. We do not use the
	// call stack to hold this state and the decoder.decode_tokens function is
	// not recursive per se.
	//
	// Wuffs code does not have the capability to dynamically allocate memory,
	// so the maximum depth is hard-coded at compile time. In this case, the
	// maximum is 1024 (stack is 1024 bits or 128 bytes).
	//
	// The [JSON spec](https://tools.ietf.org/html/rfc7159) clearly states, "an
	// implementation may set limits on the maximum depth of nesting".
	//
	// In comparison, as of February 2020, the Chromium web browser's JSON
	// parser's maximum recursion depth is 200:
	// https://source.chromium.org/chromium/chromium/src/+/3dece34cde622faa0daac07156c25d92c9897d1e:base/json/json_common.h;l=18
	//
	// Other languages and libraries' maximum depths (determined empirically)
	// are listed at https://github.com/lovasoa/bad_json_parsers#results
	stack : array[1024 / 32] base.u32,
)

pub func decoder.decode_tokens?(dst: base.token_writer, src: base.io_reader) {
	var string_length     : base.u32[..= 0xFFFE]
	var whitespace_length : base.u32[..= 0xFFFE]
	var depth             : base.u32[..= 1024]
	var stack_byte        : base.u32[..= (1024 / 32) - 1]
	var stack_bit         : base.u32[..= 31]
	var c                 : base.u8

	// expect is a bitmask of what the next token can be:
	//  - 0x01 EXPECT_NON_STRING_VALUE.
	//  - 0x02 EXPECT_STRING_VALUE.
	//  - 0x04 EXPECT_COMMA ','.
	//  - 0x08 EXPECT_COLON ':'.
	//  - 0x10 EXPECT_CLOSE_BRACKET ']'.
	//  - 0x20 EXPECT_CLOSE_BRACE '}'.
	//
	// EXPECT_VALUE is also defined to be 0x03, equivalent to
	// (EXPECT_NON_STRING_VALUE | EXPECT_STRING_VALUE).
	//
	// "Non-string value" includes numbers, literals (false, true, null),
	// arrays ("[etc]") and objects ("{etc}").
	//
	// "String value" includes "this" and "th\u0061t".
	var expect : base.u8

	// expect_after_value is what to expect after seeing a value. For depth 0,
	// this is ignored. Otherwise, it should be (EXPECT_CLOSE_FOO |
	// EXPECT_COMMA), for some value of FOO.
	var expect_after_value : base.u8

	expect = 0x03  // EXPECT_VALUE

	while.outer true {
		if args.dst.available() <= 0 {
			yield? base."$short write"
			continue.outer
		}

		// Consume whitespace.
		whitespace_length = 0
		c = 0
		while.ws true,
			inv args.dst.available() > 0,
			post args.src.available() > 0,
		{
			if args.src.available() <= 0 {
				if whitespace_length > 0 {
					args.dst.write_fast_token!(value: 0, length: whitespace_length as base.u64)
					whitespace_length = 0
				}
				yield? base."$short read"
				whitespace_length = 0
				continue.outer
			}

			c = args.src.peek_u8()
			if c > 0x20 {  // 0x20 is ' '.
				break.ws
			} else if (c <> 0x20) and (c <> 0x0A) and (c <> 0x09) and (c <> 0x0D) {
				if whitespace_length > 0 {
					args.dst.write_fast_token!(value: 0, length: whitespace_length as base.u64)
				}
				return "#bad input"
			}
			args.src.skip32_fast!(actual: 1, worst_case: 1)

			if whitespace_length >= 0xFFFE {
				args.dst.write_fast_token!(value: 0, length: 0xFFFF)
				whitespace_length = 0
				continue.outer
			}
			whitespace_length += 1
		}

		// Emit whitespace.
		if whitespace_length > 0 {
			args.dst.write_fast_token!(value: 0, length: whitespace_length as base.u64)
			whitespace_length = 0
			if args.dst.available() <= 0 {
				continue.outer
			}
		}

		// These assertions are redundant (the Wuffs compiler should already
		// know these facts; deleting these assertions should still compile)
		// but are listed explicitly to help guard against future edits to the
		// code above inadvertently invalidating these assertions.
		assert args.dst.available() > 0
		assert args.src.available() > 0

		if c <= 0x39 {  // 0x39 is '9'.
			if c == 0x22 {  // 0x22 is '"'.
				// -------- BEGIN parse strings.
				if 0 == (expect & 0x02) {  // 0x02 is EXPECT_STRING_VALUE.
					return "#bad input"
				}

				// The leading '"' is filler.
				args.dst.write_fast_token!(value: 0, length: 1)
				args.src.skip32_fast!(actual: 1, worst_case: 1)

				while.string_loop true {
					if args.dst.available() <= 0 {
						yield? base."$short write"
						continue.string_loop
					}

					string_length = 0
					while true,
						pre args.dst.available() > 0,
					{
						if args.src.available() <= 0 {
							if string_length > 0 {
								args.dst.write_fast_token!(value: 0x60_0001, length: string_length as base.u64)
								string_length = 0
							}
							continue.string_loop
						}
						c = args.src.peek_u8()
						args.src.skip32_fast!(actual: 1, worst_case: 1)

						if c == 0x22 {  // 0x22 is '"'.
							args.dst.write_fast_token!(value: 0x60_0000, length: string_length as base.u64)
							string_length = 0
							break.string_loop

						} else if c == 0x5C {  // 0x5C is '\\'.
							// TODO: escapes.
							return "#bad input"

						} else if string_length >= 0xFFFE {
							args.dst.write_fast_token!(value: 0x60_0001, length: 0xFFFF)
							string_length = 0
							continue.string_loop
						}

						string_length += 1
					}
				}

				// The trailing '"' is filler.
				while true {
					if args.dst.available() > 0 {
						args.dst.write_fast_token!(value: 0, length: 1)
						break
					}
					yield? base."$short write"
				}

				// As above, expect must have contained EXPECT_STRING_VALUE. If
				// it didn't also contain EXPECT_NON_STRING_VALUE then we were
				// parsing an object key and the next token should be ':'.
				// Otherwise, fall below to "We've just parsed a value".
				if 0 == (expect & 0x01) {  // 0x01 is EXPECT_NON_STRING_VALUE.
					expect = 0x08  // 0x08 is EXPECT_COLON.
					continue.outer
				}
				// -------- END   parse strings.

			} else if c == 0x2C {  // 0x2C is ','.
				if 0 == (expect & 0x04) {  // 0x01 is EXPECT_COMMA.
					return "#bad input"
				}
				args.src.skip32_fast!(actual: 1, worst_case: 1)
				args.dst.write_fast_token!(value: 0, length: 1)  // The ',' is filler.
				// What's valid after a comma depends on whether or not we're
				// in an array or an object.
				if 0 == (expect & 0x10) {  // 0x10 is EXPECT_CLOSE_BRACKET
					expect = 0x02  // 0x02 is EXPECT_STRING_VALUE.
				} else {
					expect = 0x03  // 0x03 is EXPECT_VALUE.
				}
				continue.outer

			} else {
				// TODO: numbers.
				return "#bad input"
			}

		} else if c == 0x3A {  // 0x3A is ':'.
			if 0 == (expect & 0x08) {  // 0x08 is EXPECT_COLON.
				return "#bad input"
			}
			args.src.skip32_fast!(actual: 1, worst_case: 1)
			args.dst.write_fast_token!(value: 0, length: 1)  // The ':' is filler.
			expect = 0x03  // 0x03 is EXPECT_VALUE.
			continue.outer

		} else if c == 0x5B {  // 0x5B is '['.
			if 0 == (expect & 0x01) {  // 0x01 is EXPECT_NON_STRING_VALUE.
				return "#bad input"
			}
			if depth >= 1024 {
				return "#unsupported recursion depth"
			}
			stack_byte = depth / 32
			stack_bit = depth & 31
			this.stack[stack_byte] &= 0 ^ ((1 as base.u32) << stack_bit)
			depth += 1

			args.src.skip32_fast!(actual: 1, worst_case: 1)
			args.dst.write_fast_token!(value: 0x20_0011, length: 1)
			expect = 0x13  // 0x13 is (EXPECT_CLOSE_BRACKET | EXPECT_VALUE).
			expect_after_value = 0x14  // 0x14 is (EXPECT_CLOSE_BRACKET | EXPECT_COMMA).
			continue.outer

		} else if c == 0x5D {  // 0x5D is ']'.
			if 0 == (expect & 0x10) {  // 0x10 is EXPECT_CLOSE_BRACKET
				return "#bad input"
			}
			args.src.skip32_fast!(actual: 1, worst_case: 1)
			if depth <= 1 {
				args.dst.write_fast_token!(value: 0x20_0012, length: 1)
				break.outer
			}
			depth -= 1
			stack_byte = (depth - 1) / 32
			stack_bit = (depth - 1) & 31
			if 0 == (this.stack[stack_byte] & ((1 as base.u32) << stack_bit)) {
				args.dst.write_fast_token!(value: 0x20_1012, length: 1)
				// 0x14 is (EXPECT_CLOSE_BRACKET | EXPECT_COMMA).
				expect = 0x14
				expect_after_value = 0x14
			} else {
				args.dst.write_fast_token!(value: 0x20_2012, length: 1)
				// 0x24 is (EXPECT_CLOSE_BRACE | EXPECT_COMMA).
				expect = 0x24
				expect_after_value = 0x24
			}
			continue.outer

		} else if c == 0x7B {  // 0x7B is '{'.
			if 0 == (expect & 0x01) {  // 0x01 is EXPECT_NON_STRING_VALUE.
				return "#bad input"
			}
			if depth >= 1024 {
				return "#unsupported recursion depth"
			}
			stack_byte = depth / 32
			stack_bit = depth & 31
			this.stack[stack_byte] |= (1 as base.u32) << stack_bit
			depth += 1

			args.src.skip32_fast!(actual: 1, worst_case: 1)
			args.dst.write_fast_token!(value: 0x20_0021, length: 1)
			expect = 0x22  // 0x22 is (EXPECT_CLOSE_BRACE | EXPECT_STRING_VALUE).
			expect_after_value = 0x24  // 0x24 is (EXPECT_CLOSE_BRACE | EXPECT_COMMA).
			continue.outer

		} else if c == 0x7D {  // 0x7D is '}'.
			if 0 == (expect & 0x20) {  // 0x20 is EXPECT_CLOSE_BRACE.
				return "#bad input"
			}
			args.src.skip32_fast!(actual: 1, worst_case: 1)
			if depth <= 1 {
				args.dst.write_fast_token!(value: 0x20_0022, length: 1)
				break.outer
			}
			depth -= 1
			stack_byte = (depth - 1) / 32
			stack_bit = (depth - 1) & 31
			if 0 == (this.stack[stack_byte] & ((1 as base.u32) << stack_bit)) {
				args.dst.write_fast_token!(value: 0x20_1022, length: 1)
				// 0x14 is (EXPECT_CLOSE_BRACKET | EXPECT_COMMA).
				expect = 0x14
				expect_after_value = 0x14
			} else {
				args.dst.write_fast_token!(value: 0x20_2022, length: 1)
				// 0x24 is (EXPECT_CLOSE_BRACE | EXPECT_COMMA).
				expect = 0x24
				expect_after_value = 0x24
			}
			continue.outer

		} else {
			// TODO: literals (false, true, null).
			return "#bad input"
		}

		// We've just parsed a value.
		if depth == 0 {
			break.outer
		}
		expect = expect_after_value
	}
}
