| // Copyright 2020 The Wuffs Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| pub status "#bad input" |
| |
| pri status "#internal error: inconsistent I/O" |
| pri status "#internal error: inconsistent token length" |
| |
| // TOKEN_VALUE_MAJOR is the base-38 encoding of "cbor". |
| pub const TOKEN_VALUE_MAJOR : base.u32 = 0x0C_061D |
| |
| // TOKEN_VALUE_MINOR__DETAIL_MASK is a mask for the low 18 bits of a token's |
| // value_minor. 18 is 64 - base.TOKEN__VALUE_EXTENSION__NUM_BITS. |
| pub const TOKEN_VALUE_MINOR__DETAIL_MASK : base.u64 = 0x003_FFFF |
| |
| // TOKEN_VALUE_MINOR__TAG means that the low 18 bits of the token's value_minor |
| // is a CBOR tag. That token may be continued, in which case the following |
| // token is an extended token whose value_extension holds a further |
| // base.TOKEN__VALUE_EXTENSION__NUM_BITS bits. The 64-bit CBOR tag is either v |
| // or ((v << base.TOKEN__VALUE_EXTENSION__NUM_BITS) | value_extension_1) where |
| // v is (value_minor_0 & TOKEN_VALUE_MINOR__DETAIL_MASK). |
| // |
| // When a token chain contains extended tokens like this, all but the last |
| // token has zero length. |
| pub const TOKEN_VALUE_MINOR__TAG : base.u32 = 0x100_0000 |
| |
| // TOKEN_VALUE_MINOR__MINUS_1_MINUS_X means that the 9-byte length token holds |
| // the negative integer (-1 - x), where x is the big-endian unsigned integer in |
| // the token's final 8 bytes. The most significant bit of x is guaranteed to be |
| // set, so that (-1 - x) will always underflow an int64_t and its absolute |
| // value (+1 + x) might also overflow a uint64_t. |
| pub const TOKEN_VALUE_MINOR__MINUS_1_MINUS_X : base.u32 = 0x080_0000 |
| |
| pri const TOKEN_LENGTHS : array[32] base.u8[..= 9] = [ |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 2, 3, 5, 9, 1, 1, 1, 1, |
| ] |
| |
| pub struct decoder? implements base.token_decoder( |
| end_of_data : base.bool, |
| |
| util : base.utility, |
| )( |
| ) |
| |
| pub func decoder.set_quirk_enabled!(quirk: base.u32, enabled: base.bool) { |
| } |
| |
| pub func decoder.workbuf_len() base.range_ii_u64 { |
| return this.util.empty_range_ii_u64() |
| } |
| |
| pub func decoder.decode_tokens?(dst: base.token_writer, src: base.io_reader, workbuf: slice base.u8) { |
| var string_length : base.u64 |
| |
| var n64 : base.u64 |
| var depth : base.u32[..= 1024] |
| var token_length : base.u32[..= 0xFFFF] |
| var value_minor : base.u32[..= 0x1FF_FFFF] |
| var continued : base.u32[..= 1] |
| var c : base.u8 |
| var c_major : base.u8[..= 0x07] |
| var c_minor : base.u8[..= 0x1F] |
| |
| // indefinite_string_major_type is 2 or 3 when we are in an |
| // indefinite-length byte string or text string. It is 0 otherwise. |
| var indefinite_string_major_type : base.u8[..= 3] |
| |
| if this.end_of_data { |
| return base."@end of data" |
| } |
| |
| while.outer true { |
| while.goto_parsed_a_leaf_value true {{ |
| while.goto_fail true {{ |
| if args.dst.available() <= 1 { |
| yield? base."$short write" |
| continue.outer |
| } |
| if args.src.available() <= 0 { |
| if args.src.is_closed() { |
| return "#bad input" |
| } |
| yield? base."$short read" |
| continue.outer |
| } |
| c = args.src.peek_u8() |
| |
| if (indefinite_string_major_type <> 0) and (indefinite_string_major_type <> (c >> 5)) { |
| if c <> 0xFF { |
| return "#bad input" |
| } |
| value_minor = (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP |
| if indefinite_string_major_type == 3 { |
| value_minor |= base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 | |
| base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 | |
| base.TOKEN__VBD__STRING__DEFINITELY_ASCII |
| } |
| indefinite_string_major_type = 0 |
| args.src.skip_u32_fast!(actual: 1, worst_case: 1) |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: value_minor, |
| continued: 0, |
| length: 1) |
| break.goto_parsed_a_leaf_value |
| } |
| |
| args.src.skip_u32_fast!(actual: 1, worst_case: 1) |
| c_major = c >> 5 |
| c_minor = c & 0x1F |
| if c_minor < 0x18 { |
| string_length = c_minor as base.u64 |
| } else { |
| while.goto_have_string_length true, |
| inv args.dst.available() > 1, |
| {{ |
| if c_minor == 0x18 { |
| if args.src.available() >= 1 { |
| string_length = args.src.peek_u8_as_u64() |
| args.src.skip_u32_fast!(actual: 1, worst_case: 1) |
| break.goto_have_string_length |
| } |
| } else if c_minor == 0x19 { |
| if args.src.available() >= 2 { |
| string_length = args.src.peek_u16be_as_u64() |
| args.src.skip_u32_fast!(actual: 2, worst_case: 2) |
| break.goto_have_string_length |
| } |
| } else if c_minor == 0x1A { |
| if args.src.available() >= 4 { |
| string_length = args.src.peek_u32be_as_u64() |
| args.src.skip_u32_fast!(actual: 4, worst_case: 4) |
| break.goto_have_string_length |
| } |
| } else if c_minor == 0x1B { |
| if args.src.available() >= 8 { |
| string_length = args.src.peek_u64be() |
| args.src.skip_u32_fast!(actual: 8, worst_case: 8) |
| break.goto_have_string_length |
| } |
| } else { |
| string_length = 0 |
| break.goto_have_string_length |
| } |
| |
| if args.src.can_undo_byte() { |
| args.src.undo_byte!() |
| if args.src.is_closed() { |
| return "#bad input" |
| } |
| yield? base."$short read" |
| c_major = 0 |
| c_minor = 0 |
| continue.outer |
| } |
| return "#internal error: inconsistent I/O" |
| }} endwhile.goto_have_string_length |
| } |
| |
| if c_major == 0 { |
| // -------- BEGIN Major type 0: an unsigned integer. |
| if c_minor < 0x1A { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__INLINE_INTEGER_UNSIGNED << 21) | |
| ((string_length & 0xFFFF) as base.u32), |
| continued: 0, |
| length: TOKEN_LENGTHS[c_minor] as base.u32) |
| break.goto_parsed_a_leaf_value |
| } else if c_minor < 0x1C { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__INLINE_INTEGER_UNSIGNED << 21) | |
| ((string_length >> base.TOKEN__VALUE_EXTENSION__NUM_BITS) as base.u32), |
| continued: 1, |
| length: 0) |
| args.dst.write_extended_token_fast!( |
| value_extension: string_length & 0x3FFF_FFFF_FFFF, |
| continued: 0, |
| length: TOKEN_LENGTHS[c_minor] as base.u32) |
| break.goto_parsed_a_leaf_value |
| } |
| // -------- END Major type 0: an unsigned integer. |
| |
| } else if c_major == 1 { |
| // -------- END Major type 1: a negative integer. |
| if c_minor < 0x1A { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__INLINE_INTEGER_SIGNED << 21) | |
| (0x1F_FFFF - ((string_length & 0xFFFF) as base.u32)), |
| continued: 0, |
| length: TOKEN_LENGTHS[c_minor] as base.u32) |
| break.goto_parsed_a_leaf_value |
| } else if c_minor < 0x1C { |
| if string_length < 0x8000_0000_0000_0000 { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__INLINE_INTEGER_SIGNED << 21) | |
| (0x1F_FFFF - ((string_length >> base.TOKEN__VALUE_EXTENSION__NUM_BITS) as base.u32)), |
| continued: 1, |
| length: 0) |
| args.dst.write_extended_token_fast!( |
| value_extension: (0xFFFF_FFFF_FFFF_FFFF - string_length) & 0x3FFF_FFFF_FFFF, |
| continued: 0, |
| length: TOKEN_LENGTHS[c_minor] as base.u32) |
| } else { |
| args.dst.write_simple_token_fast!( |
| value_major: TOKEN_VALUE_MAJOR, |
| value_minor: TOKEN_VALUE_MINOR__MINUS_1_MINUS_X, |
| continued: 0, |
| length: 9) |
| } |
| break.goto_parsed_a_leaf_value |
| } |
| // -------- END Major type 1: a negative integer. |
| |
| } else if c_major == 2 { |
| // -------- BEGIN Major type 2: a byte string. |
| if c_minor == 0x00 { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 0, |
| length: 1) |
| break.goto_parsed_a_leaf_value |
| } else if c_minor < 0x18 { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 1, |
| length: 1) |
| } else if c_minor < 0x1C { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 1, |
| length: 1 + ((1 as base.u32) << (c_minor & 3))) |
| } else if c_minor == 0x1F { |
| indefinite_string_major_type = 2 |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 1, |
| length: 1) |
| continue.outer |
| } else { |
| break.goto_fail |
| } |
| |
| while true { |
| if args.dst.available() <= 0 { |
| yield? base."$short write" |
| continue |
| } |
| n64 = string_length.min(a: args.src.available()) |
| token_length = (n64 & 0xFFFF) as base.u32 |
| if n64 > 0xFFFF { |
| token_length = 0xFFFF |
| } else if token_length <= 0 { |
| if args.src.is_closed() { |
| return "#bad input" |
| } |
| yield? base."$short read" |
| continue |
| } |
| if args.src.available() < (token_length as base.u64) { |
| return "#internal error: inconsistent token length" |
| } |
| string_length ~mod-= token_length as base.u64 |
| continued = 0 |
| if (string_length > 0) or (indefinite_string_major_type > 0) { |
| continued = 1 |
| } |
| args.src.skip_u32_fast!(actual: token_length, worst_case: token_length) |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY, |
| continued: continued, |
| length: token_length) |
| if string_length > 0 { |
| continue |
| } else if indefinite_string_major_type > 0 { |
| continue.outer |
| } |
| break.goto_parsed_a_leaf_value |
| } endwhile |
| // -------- END Major type 2: a byte string. |
| |
| } else if c_major == 3 { |
| // -------- BEGIN Major type 3: a text string. |
| if c_minor == 0x00 { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 | |
| base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 | |
| base.TOKEN__VBD__STRING__DEFINITELY_ASCII | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 0, |
| length: 1) |
| break.goto_parsed_a_leaf_value |
| } else if c_minor < 0x18 { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 | |
| base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 | |
| base.TOKEN__VBD__STRING__DEFINITELY_ASCII | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 1, |
| length: 1) |
| } else if c_minor < 0x1C { |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 | |
| base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 | |
| base.TOKEN__VBD__STRING__DEFINITELY_ASCII | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 1, |
| length: 1 + ((1 as base.u32) << (c_minor & 3))) |
| } else if c_minor == 0x1F { |
| indefinite_string_major_type = 3 |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 | |
| base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 | |
| base.TOKEN__VBD__STRING__DEFINITELY_ASCII | |
| base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP, |
| continued: 1, |
| length: 1) |
| continue.outer |
| } else { |
| break.goto_fail |
| } |
| |
| while true { |
| if args.dst.available() <= 0 { |
| yield? base."$short write" |
| continue |
| } |
| n64 = string_length.min(a: args.src.available()) |
| token_length = (n64 & 0xFFFF) as base.u32 |
| if n64 > 0xFFFF { |
| token_length = 0xFFFF |
| } |
| // TODO: walk token_length back to a UTF-8 boundary. |
| if token_length <= 0 { |
| if args.src.is_closed() { |
| return "#bad input" |
| } |
| yield? base."$short read" |
| continue |
| } |
| if args.src.available() < (token_length as base.u64) { |
| return "#internal error: inconsistent token length" |
| } |
| string_length ~mod-= token_length as base.u64 |
| continued = 0 |
| if (string_length > 0) or (indefinite_string_major_type > 0) { |
| continued = 1 |
| } |
| args.src.skip_u32_fast!(actual: token_length, worst_case: token_length) |
| args.dst.write_simple_token_fast!( |
| value_major: 0, |
| value_minor: (base.TOKEN__VBC__STRING << 21) | |
| base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 | |
| base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 | |
| base.TOKEN__VBD__STRING__DEFINITELY_ASCII | |
| base.TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY, |
| continued: continued, |
| length: token_length) |
| if string_length > 0 { |
| continue |
| } else if indefinite_string_major_type > 0 { |
| continue.outer |
| } |
| break.goto_parsed_a_leaf_value |
| } endwhile |
| // -------- END Major type 3: a text string. |
| } |
| break.goto_fail |
| }} endwhile.goto_fail |
| |
| if args.src.can_undo_byte() { |
| args.src.undo_byte!() |
| return "#bad input" |
| } |
| return "#internal error: inconsistent I/O" |
| }} endwhile.goto_parsed_a_leaf_value |
| |
| // We've just parsed a leaf (non-container) value: literal (null, |
| // false, true), number or string. |
| if depth == 0 { |
| break.outer |
| } |
| } endwhile.outer |
| |
| this.end_of_data = true |
| } |