blob: 12b7d600d4a9a432ecd902eba24b715a527e38c3 [file] [log] [blame]
// Copyright 2023 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// SPDX-License-Identifier: Apache-2.0 OR MIT
// --------
// See https://tukaani.org/xz/xz-file-format.txt
use "std/crc32"
use "std/crc64"
use "std/lzma"
use "std/sha256"
pub status "#bad block header"
pub status "#bad checksum"
pub status "#bad filter"
pub status "#bad header"
pub status "#bad padding"
pub status "#truncated input"
pub status "#unsupported checksum algorithm"
pub status "#unsupported filter"
pub const DECODER_DST_HISTORY_RETAIN_LENGTH_MAX_INCL_WORST_CASE : base.u64 = 0xFFFF_FFFF
pub const DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE : base.u64 = 0
pub struct decoder? implements base.io_transformer(
placeholder : base.u32,
filters : array[3] base.u32,
num_non_final_filters : base.u32[..= 3],
// 0: None.
// 1: CRC-32.
// 2: CRC-64.
// 3: SHA-256.
checksummer : base.u32[..= 3],
ignore_checksum : base.bool,
block_has_compressed_size : base.bool,
block_has_uncompressed_size : base.bool,
block_compressed_size : base.u64,
block_uncompressed_size : base.u64,
util : base.utility,
) + (
filter_data : array[3] array[256] base.u8,
crc32 : crc32.ieee_hasher,
crc64 : crc64.ecma_hasher,
sha256 : sha256.hasher,
lzma : lzma.decoder,
)
pub func decoder.get_quirk(key: base.u32) base.u64 {
if (args.key == base.QUIRK_IGNORE_CHECKSUM) and this.ignore_checksum {
return 1
}
return 0
}
pub func decoder.set_quirk!(key: base.u32, value: base.u64) base.status {
if args.key == base.QUIRK_IGNORE_CHECKSUM {
this.ignore_checksum = args.value > 0
return ok
}
return base."#unsupported option"
}
pub func decoder.dst_history_retain_length() base.optional_u63 {
return this.lzma.dst_history_retain_length()
}
pub func decoder.workbuf_len() base.range_ii_u64 {
return this.util.make_range_ii_u64(min_incl: 0, max_incl: 0)
}
pub func decoder.transform_io?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
var status : base.status
while true {
status =? this.do_transform_io?(dst: args.dst, src: args.src, workbuf: args.workbuf)
if (status == base."$short read") and args.src.is_closed() {
return "#truncated input"
}
yield? status
} endwhile
}
pri func decoder.do_transform_io?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
var magic : base.u64
var dmark : base.u64
var smark : base.u64
var status : base.status
var checksum32_have : base.u32
var checksum32_want : base.u32
var checksum64_have : base.u64
var checksum64_want : base.u64
var checksum256_have : base.bitvec256
var compressed_size : base.u64
var uncompressed_size : base.u64
var c8 : base.u8
magic = args.src.read_u48le_as_u64?()
if magic <> '\xFD\x37\x7A\x58\x5A\x00'le {
return "#bad header"
}
// Read the 2-byte flags and the 4-byte CRC-32 checksum of those 2 bytes.
magic = args.src.read_u48le_as_u64?()
if magic == '\x00\x00\xFF\x12\xD9\x41'le {
this.checksummer = 0 // None.
} else if magic == '\x00\x01\x69\x22\xDE\x36'le {
this.checksummer = 1 // CRC-32.
} else if magic == '\x00\x04\xE6\xD6\xB4\x46'le {
this.checksummer = 2 // CRC-64.
} else if magic == '\x00\x0A\xE1\xFB\x0C\xA1'le {
this.checksummer = 3 // SHA-256.
} else if (magic & 0xF0FF) <> 0 {
// Section 2.1.1.2. Stream Flags: "If any reserved bit is set, the
// decoder MUST indicate an error."
return "#bad header"
} else {
magic = 0xF & (magic >> 8)
if (magic <> 0x0) and (magic <> 0x1) and (magic <> 0x4) and (magic <> 0xA) {
return "#unsupported checksum algorithm"
}
return "#bad checksum"
}
while true {
if args.src.length() <= 0 {
yield? base."$short read"
continue
} else if args.src.peek_u8() == 0x00 {
// We've hit the Index instead of a Block.
args.src.skip_u32_fast!(actual: 1, worst_case: 1)
break
}
if not this.ignore_checksum {
this.crc32.reset!()
}
while true {
smark = args.src.mark()
status =? this.decode_block_header_with_padding?(src: args.src)
if not this.ignore_checksum {
checksum32_have = this.crc32.update_u32!(x: args.src.since(mark: smark))
}
if status.is_ok() {
break
}
yield? status
} endwhile
checksum32_want = args.src.read_u32le?()
if this.ignore_checksum {
// No-op.
} else if checksum32_have <> checksum32_want {
return "#bad checksum"
} else {
this.crc32.reset!()
this.crc64.reset!()
this.sha256.reset!()
}
compressed_size = 0
uncompressed_size = 0
while true {
dmark = args.dst.mark()
smark = args.src.mark()
status =? this.lzma.transform_io?(dst: args.dst, src: args.src, workbuf: this.util.empty_slice_u8())
compressed_size ~mod+= args.src.count_since(mark: smark)
uncompressed_size ~mod+= args.dst.count_since(mark: dmark)
if this.num_non_final_filters <> 0 {
this.apply_non_final_filters!(dst_slice: args.dst.since(mark: dmark))
}
if this.ignore_checksum {
// No-op.
} else if this.checksummer == 1 {
this.crc32.update!(x: args.dst.since(mark: dmark))
} else if this.checksummer == 2 {
this.crc64.update!(x: args.dst.since(mark: dmark))
} else if this.checksummer == 3 {
this.sha256.update!(x: args.dst.since(mark: dmark))
}
if status.is_ok() {
break
}
yield? status
} endwhile
if (this.block_has_compressed_size and (this.block_compressed_size <> compressed_size)) or
(this.block_has_uncompressed_size and (this.block_uncompressed_size <> uncompressed_size)) {
return "#bad block header"
}
while (compressed_size & 3) <> 0 {
c8 = args.src.read_u8?()
if c8 <> 0x00 {
return "#bad padding"
}
compressed_size ~mod+= 1
} endwhile
if this.ignore_checksum {
// No-op.
} else if this.checksummer == 1 {
checksum32_want = args.src.read_u32le?()
checksum32_have = this.crc32.checksum_u32()
if checksum32_have <> checksum32_want {
return "#bad checksum"
}
} else if this.checksummer == 2 {
checksum64_want = args.src.read_u64le?()
checksum64_have = this.crc64.checksum_u64()
if checksum64_have <> checksum64_want {
return "#bad checksum"
}
} else if this.checksummer == 3 {
checksum256_have = this.sha256.checksum_bitvec256()
checksum64_want = args.src.read_u64be?()
if checksum256_have.get_u64(i: 3) <> checksum64_want {
return "#bad checksum"
}
checksum64_want = args.src.read_u64be?()
if checksum256_have.get_u64(i: 2) <> checksum64_want {
return "#bad checksum"
}
checksum64_want = args.src.read_u64be?()
if checksum256_have.get_u64(i: 1) <> checksum64_want {
return "#bad checksum"
}
checksum64_want = args.src.read_u64be?()
if checksum256_have.get_u64(i: 0) <> checksum64_want {
return "#bad checksum"
}
}
} endwhile
}
pri func decoder.decode_block_header_with_padding?(src: base.io_reader) {
var c8 : base.u8
var padded_size_have : base.u64
var padded_size_want : base.u64
var smark : base.u64
var status : base.status
c8 = args.src.read_u8?()
padded_size_want = ((c8 as base.u64) * 4) ~mod- 1
while true {
smark = args.src.mark()
status =? this.decode_block_header_sans_padding?(src: args.src)
padded_size_have ~sat+= args.src.count_since(mark: smark)
if status.is_ok() {
break
}
yield? status
} endwhile
if padded_size_have > padded_size_want {
return "#bad block header"
}
while padded_size_have < padded_size_want {
assert padded_size_have < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: padded_size_want)
c8 = args.src.read_u8?()
if c8 <> 0x00 {
return "#bad block header"
}
padded_size_have += 1
} endwhile
}
pri func decoder.decode_block_header_sans_padding?(src: base.io_reader) {
var c8 : base.u8
var flags : base.u8
var filter_id : base.u8
var status : base.status
var shift : base.u32
var f : base.u32
var k : base.u32
flags = args.src.read_u8?()
this.num_non_final_filters = (flags & 0x03) as base.u32
if (flags & 0x3C) <> 0 {
return "#bad block header"
}
this.block_has_compressed_size = (flags & 0x40) <> 0
if this.block_has_compressed_size {
// Read a uvarint (Unsigned LEB128) as this.block_compressed_size.
this.block_compressed_size = 0
shift = 0
while true {
c8 = args.src.read_u8?()
if shift <= 56 {
this.block_compressed_size |= ((c8 & 0x7F) as base.u64) << shift
if c8 < 0x80 {
break
}
shift += 7
continue
} else if c8 > 1 {
return "#bad block header"
}
this.block_compressed_size |= (c8 as base.u64) << 63
break
} endwhile
}
this.block_has_uncompressed_size = (flags & 0x80) <> 0
if this.block_has_uncompressed_size {
// Read a uvarint (Unsigned LEB128) as this.block_uncompressed_size.
this.block_uncompressed_size = 0
shift = 0
while true {
c8 = args.src.read_u8?()
if shift <= 56 {
this.block_uncompressed_size |= ((c8 & 0x7F) as base.u64) << shift
if c8 < 0x80 {
break
}
shift += 7
continue
} else if c8 > 1 {
return "#bad block header"
}
this.block_uncompressed_size |= (c8 as base.u64) << 63
break
} endwhile
}
// Configure the non-final filters.
f = 0
while f < this.num_non_final_filters {
assert f < 3 via "a < b: a < c; c <= b"(c: this.num_non_final_filters)
filter_id = args.src.read_u8?()
if filter_id == 0x21 { // LZMA2
return "#bad filter"
} else if filter_id == 0x03 { // Delta.
// Delta's "Size of Properties" should be 1.
c8 = args.src.read_u8?()
if c8 <> 0x01 {
return "#bad filter"
}
// Stash that 1 Properties byte on to the LZMA decoder.
c8 = args.src.read_u8?()
this.filters[f] = ((c8 as base.u32) << 8) | 0x03
k = 0
while k < 0x100,
inv f < 3,
{
this.filter_data[f][k] = 0x00
k += 1
} endwhile
} else {
return "#unsupported filter"
}
f += 1
} endwhile
// Configure the final filter (which must be LZMA2).
filter_id = args.src.read_u8?()
if filter_id == 0x21 { // LZMA2
// LZMA2's "Size of Properties" should be 1.
c8 = args.src.read_u8?()
if c8 <> 0x01 {
return "#bad filter"
}
// Pass that 1 Properties byte on to the LZMA decoder.
c8 = args.src.read_u8?()
status = this.lzma.set_quirk!(
key: lzma.QUIRK_FORMAT_EXTENSION,
value: 0x02 | ((c8 as base.u64) << 8))
if not status.is_ok() {
return "#bad filter"
}
} else if filter_id == 0x03 { // Delta.
return "#bad filter"
} else {
return "#unsupported filter"
}
}
pri func decoder.apply_non_final_filters!(dst_slice: slice base.u8) {
var f : base.u32[..= 2]
var i : base.u64
var filter_id : base.u32[..= 0x7F]
var delta_dist : base.u32[..= 0x100]
var delta_pos : base.u32
var c8 : base.u8
if this.num_non_final_filters <= 0 {
return nothing
}
f = this.num_non_final_filters - 1
while true {
filter_id = this.filters[f] & 0x7F
if filter_id == 0x03 { // Delta.
delta_dist = ((this.filters[f] >> 8) & 0xFF) + 1
delta_pos = this.filters[f] >> 24
i = 0
while i < args.dst_slice.length() {
assert i < 0xFFFF_FFFF_FFFF_FFFF via "a < b: a < c; c <= b"(c: args.dst_slice.length())
c8 = args.dst_slice[i]
c8 ~mod+= this.filter_data[f][(delta_dist ~mod+ delta_pos) & 0xFF]
this.filter_data[f][delta_pos & 0xFF] = c8
delta_pos ~mod-= 1
args.dst_slice[i] = c8
i += 1
} endwhile
this.filters[f] &= 0xFFFF
this.filters[f] |= (delta_pos ~mod<< 24)
}
if f <= 0 {
break
}
f -= 1
} endwhile
}