blob: bc76df39d6ca317aba45dec9c654b59458bdc19a [file] [log] [blame]
// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub status "#bad input"
pri status "#internal error: inconsistent I/O"
pri status "#internal error: inconsistent token length"
// TOKEN_VALUE_MAJOR is the base-38 encoding of "cbor".
pub const TOKEN_VALUE_MAJOR : base.u32 = 0x0C_061D
// TOKEN_VALUE_MINOR__DETAIL_MASK is a mask for the low 18 bits of a token's
// value_minor. 18 is 64 - base.TOKEN__VALUE_EXTENSION__NUM_BITS.
pub const TOKEN_VALUE_MINOR__DETAIL_MASK : base.u64 = 0x003_FFFF
// TOKEN_VALUE_MINOR__TAG means that the low 18 bits of the token's value_minor
// is a CBOR tag. That token may be continued, in which case the following
// token is an extended token whose value_extension holds a further
// base.TOKEN__VALUE_EXTENSION__NUM_BITS bits. The 64-bit CBOR tag is either v
// or ((v << base.TOKEN__VALUE_EXTENSION__NUM_BITS) | value_extension_1) where
// v is (value_minor_0 & TOKEN_VALUE_MINOR__DETAIL_MASK).
//
// When a token chain contains extended tokens like this, all but the last
// token has zero length.
pub const TOKEN_VALUE_MINOR__TAG : base.u32 = 0x100_0000
// TOKEN_VALUE_MINOR__MINUS_1_MINUS_X means that the 9-byte length token holds
// the negative integer (-1 - x), where x is the big-endian unsigned integer in
// the token's final 8 bytes. The most significant bit of x is guaranteed to be
// set, so that (-1 - x) will always underflow an int64_t and its absolute
// value (+1 + x) might also overflow a uint64_t.
pub const TOKEN_VALUE_MINOR__MINUS_1_MINUS_X : base.u32 = 0x080_0000
pri const TOKEN_LENGTHS : array[32] base.u8[..= 9] = [
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
2, 3, 5, 9, 1, 1, 1, 1,
]
pub struct decoder? implements base.token_decoder(
end_of_data : base.bool,
util : base.utility,
)(
)
pub func decoder.set_quirk_enabled!(quirk: base.u32, enabled: base.bool) {
}
pub func decoder.workbuf_len() base.range_ii_u64 {
return this.util.empty_range_ii_u64()
}
pub func decoder.decode_tokens?(dst: base.token_writer, src: base.io_reader, workbuf: slice base.u8) {
var string_length : base.u64
var n64 : base.u64
var depth : base.u32[..= 1024]
var token_length : base.u32[..= 0xFFFF]
var value_minor : base.u32[..= 0x1FF_FFFF]
var continued : base.u32[..= 1]
var c : base.u8
var c_major : base.u8[..= 0x07]
var c_minor : base.u8[..= 0x1F]
// indefinite_string_major_type is 2 or 3 when we are in an
// indefinite-length byte string or text string. It is 0 otherwise.
var indefinite_string_major_type : base.u8[..= 3]
if this.end_of_data {
return base."@end of data"
}
while.outer true {
while.goto_parsed_a_leaf_value true {{
while.goto_fail true {{
if args.dst.available() <= 1 {
yield? base."$short write"
continue.outer
}
if args.src.available() <= 0 {
if args.src.is_closed() {
return "#bad input"
}
yield? base."$short read"
continue.outer
}
c = args.src.peek_u8()
if (indefinite_string_major_type <> 0) and (indefinite_string_major_type <> (c >> 5)) {
if c <> 0xFF {
return "#bad input"
}
value_minor = (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP
if indefinite_string_major_type == 3 {
value_minor |= base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 |
base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 |
base.TOKEN__VBD__STRING__DEFINITELY_ASCII
}
indefinite_string_major_type = 0
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: value_minor,
continued: 0,
length: 1)
break.goto_parsed_a_leaf_value
}
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
c_major = c >> 5
c_minor = c & 0x1F
if c_minor < 0x18 {
string_length = c_minor as base.u64
} else {
while.goto_have_string_length true,
inv args.dst.available() > 1,
{{
if c_minor == 0x18 {
if args.src.available() >= 1 {
string_length = args.src.peek_u8_as_u64()
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
break.goto_have_string_length
}
} else if c_minor == 0x19 {
if args.src.available() >= 2 {
string_length = args.src.peek_u16be_as_u64()
args.src.skip_u32_fast!(actual: 2, worst_case: 2)
break.goto_have_string_length
}
} else if c_minor == 0x1A {
if args.src.available() >= 4 {
string_length = args.src.peek_u32be_as_u64()
args.src.skip_u32_fast!(actual: 4, worst_case: 4)
break.goto_have_string_length
}
} else if c_minor == 0x1B {
if args.src.available() >= 8 {
string_length = args.src.peek_u64be()
args.src.skip_u32_fast!(actual: 8, worst_case: 8)
break.goto_have_string_length
}
} else {
string_length = 0
break.goto_have_string_length
}
if args.src.can_undo_byte() {
args.src.undo_byte!()
if args.src.is_closed() {
return "#bad input"
}
yield? base."$short read"
c_major = 0
c_minor = 0
continue.outer
}
return "#internal error: inconsistent I/O"
}} endwhile.goto_have_string_length
}
if c_major == 0 {
// -------- BEGIN Major type 0: an unsigned integer.
if c_minor < 0x1A {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__INLINE_INTEGER_UNSIGNED << 21) |
((string_length & 0xFFFF) as base.u32),
continued: 0,
length: TOKEN_LENGTHS[c_minor] as base.u32)
break.goto_parsed_a_leaf_value
} else if c_minor < 0x1C {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__INLINE_INTEGER_UNSIGNED << 21) |
((string_length >> base.TOKEN__VALUE_EXTENSION__NUM_BITS) as base.u32),
continued: 1,
length: 0)
args.dst.write_extended_token_fast!(
value_extension: string_length & 0x3FFF_FFFF_FFFF,
continued: 0,
length: TOKEN_LENGTHS[c_minor] as base.u32)
break.goto_parsed_a_leaf_value
}
// -------- END Major type 0: an unsigned integer.
} else if c_major == 1 {
// -------- END Major type 1: a negative integer.
if c_minor < 0x1A {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__INLINE_INTEGER_SIGNED << 21) |
(0x1F_FFFF - ((string_length & 0xFFFF) as base.u32)),
continued: 0,
length: TOKEN_LENGTHS[c_minor] as base.u32)
break.goto_parsed_a_leaf_value
} else if c_minor < 0x1C {
if string_length < 0x8000_0000_0000_0000 {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__INLINE_INTEGER_SIGNED << 21) |
(0x1F_FFFF - ((string_length >> base.TOKEN__VALUE_EXTENSION__NUM_BITS) as base.u32)),
continued: 1,
length: 0)
args.dst.write_extended_token_fast!(
value_extension: (0xFFFF_FFFF_FFFF_FFFF - string_length) & 0x3FFF_FFFF_FFFF,
continued: 0,
length: TOKEN_LENGTHS[c_minor] as base.u32)
} else {
args.dst.write_simple_token_fast!(
value_major: TOKEN_VALUE_MAJOR,
value_minor: TOKEN_VALUE_MINOR__MINUS_1_MINUS_X,
continued: 0,
length: 9)
}
break.goto_parsed_a_leaf_value
}
// -------- END Major type 1: a negative integer.
} else if c_major == 2 {
// -------- BEGIN Major type 2: a byte string.
if c_minor == 0x00 {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 0,
length: 1)
break.goto_parsed_a_leaf_value
} else if c_minor < 0x18 {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 1,
length: 1)
} else if c_minor < 0x1C {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 1,
length: 1 + ((1 as base.u32) << (c_minor & 3)))
} else if c_minor == 0x1F {
indefinite_string_major_type = 2
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 1,
length: 1)
continue.outer
} else {
break.goto_fail
}
while true {
if args.dst.available() <= 0 {
yield? base."$short write"
continue
}
n64 = string_length.min(a: args.src.available())
token_length = (n64 & 0xFFFF) as base.u32
if n64 > 0xFFFF {
token_length = 0xFFFF
} else if token_length <= 0 {
if args.src.is_closed() {
return "#bad input"
}
yield? base."$short read"
continue
}
if args.src.available() < (token_length as base.u64) {
return "#internal error: inconsistent token length"
}
string_length ~mod-= token_length as base.u64
continued = 0
if (string_length > 0) or (indefinite_string_major_type > 0) {
continued = 1
}
args.src.skip_u32_fast!(actual: token_length, worst_case: token_length)
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY,
continued: continued,
length: token_length)
if string_length > 0 {
continue
} else if indefinite_string_major_type > 0 {
continue.outer
}
break.goto_parsed_a_leaf_value
} endwhile
// -------- END Major type 2: a byte string.
} else if c_major == 3 {
// -------- BEGIN Major type 3: a text string.
if c_minor == 0x00 {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 |
base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 |
base.TOKEN__VBD__STRING__DEFINITELY_ASCII |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 0,
length: 1)
break.goto_parsed_a_leaf_value
} else if c_minor < 0x18 {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 |
base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 |
base.TOKEN__VBD__STRING__DEFINITELY_ASCII |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 1,
length: 1)
} else if c_minor < 0x1C {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 |
base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 |
base.TOKEN__VBD__STRING__DEFINITELY_ASCII |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 1,
length: 1 + ((1 as base.u32) << (c_minor & 3)))
} else if c_minor == 0x1F {
indefinite_string_major_type = 3
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 |
base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 |
base.TOKEN__VBD__STRING__DEFINITELY_ASCII |
base.TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP,
continued: 1,
length: 1)
continue.outer
} else {
break.goto_fail
}
while true {
if args.dst.available() <= 0 {
yield? base."$short write"
continue
}
n64 = string_length.min(a: args.src.available())
token_length = (n64 & 0xFFFF) as base.u32
if n64 > 0xFFFF {
token_length = 0xFFFF
}
// TODO: walk token_length back to a UTF-8 boundary.
if token_length <= 0 {
if args.src.is_closed() {
return "#bad input"
}
yield? base."$short read"
continue
}
if args.src.available() < (token_length as base.u64) {
return "#internal error: inconsistent token length"
}
string_length ~mod-= token_length as base.u64
continued = 0
if (string_length > 0) or (indefinite_string_major_type > 0) {
continued = 1
}
args.src.skip_u32_fast!(actual: token_length, worst_case: token_length)
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__STRING << 21) |
base.TOKEN__VBD__STRING__DEFINITELY_UTF_8 |
base.TOKEN__VBD__STRING__CHAIN_MUST_BE_UTF_8 |
base.TOKEN__VBD__STRING__DEFINITELY_ASCII |
base.TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY,
continued: continued,
length: token_length)
if string_length > 0 {
continue
} else if indefinite_string_major_type > 0 {
continue.outer
}
break.goto_parsed_a_leaf_value
} endwhile
// -------- END Major type 3: a text string.
}
break.goto_fail
}} endwhile.goto_fail
if args.src.can_undo_byte() {
args.src.undo_byte!()
return "#bad input"
}
return "#internal error: inconsistent I/O"
}} endwhile.goto_parsed_a_leaf_value
// We've just parsed a leaf (non-container) value: literal (null,
// false, true), number or string.
if depth == 0 {
break.outer
}
} endwhile.outer
this.end_of_data = true
}