std/lzip/decode_lzip.wuffs - external/github.com/google/wuffs - Git at Google

 // Copyright 2024 The Wuffs Authors.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
 // SPDX-License-Identifier: Apache-2.0 OR MIT

 // --------

 // The Lzip file format specification is at
 // https://www.nongnu.org/lzip/manual/lzip_manual.html#File-format

 use "std/crc32"
 use "std/lzma"

 pub status "#bad checksum"
 pub status "#bad footer"
 pub status "#bad header"
 pub status "#truncated input"

 pub const DECODER_DST_HISTORY_RETAIN_LENGTH_MAX_INCL_WORST_CASE : base.u64 = 0
 pub const DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE               : base.u64 = 0xFFFF_FFFF + 273

 pub struct decoder? implements base.io_transformer(
         ignore_checksum : base.bool,

         dsize_have : base.u64,
         ssize_have : base.u64,

         util : base.utility,
 ) + (
         crc32 : crc32.ieee_hasher,
         lzma  : lzma.decoder,
 )

 pub func decoder.get_quirk(key: base.u32) base.u64 {
     if (args.key == base.QUIRK_IGNORE_CHECKSUM) and this.ignore_checksum {
         return 1
     }
     return 0
 }

 pub func decoder.set_quirk!(key: base.u32, value: base.u64) base.status {
     if args.key == base.QUIRK_IGNORE_CHECKSUM {
         this.ignore_checksum = args.value > 0
         return ok
     }
     return base."#unsupported option"
 }

 pub func decoder.dst_history_retain_length() base.optional_u63 {
     return this.lzma.dst_history_retain_length()
 }

 pub func decoder.workbuf_len() base.range_ii_u64 {
     return this.lzma.workbuf_len()
 }

 pub func decoder.transform_io?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
     var status : base.status

     while true {
         status =? this.do_transform_io?(dst: args.dst, src: args.src, workbuf: args.workbuf)
         if (status == base."$short read") and args.src.is_closed() {
             return "#truncated input"
         }
         yield? status
     }
 }

 pri func decoder.do_transform_io?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
     var c8            : base.u8
     var c32           : base.u32
     var c64           : base.u64
     var dmark         : base.u64
     var smark         : base.u64
     var status        : base.status
     var checksum_want : base.u32
     var checksum_have : base.u32
     var size_want     : base.u64

     while.outer true {
         // Read the magic number and version number.
         //
         // Some of the test/3pdata/xzsuite/good*v0*.lz files have a 0x00
         // version number (instead of 0x01). We reject those, just like
         // /usr/bin/lzip does and as per the Lzip spec.
         c64 = args.src.read_u40le_as_u64?()
         if c64 <> 'LZIP\x01'le {
             return "#bad header"
         }

         // Read the dictionary size.
         c8 = args.src.read_u8?()
         status = this.lzma.set_quirk!(
                 key: lzma.QUIRK_FORMAT_EXTENSION,
                 value: 0x01 | ((c8 as base.u64) << 8))
         if not status.is_ok() {
             if status == base."#bad argument" {
                 return "#bad header"
             }
             return status
         }

         // Decode LZMA.
         //
         // Re lzma.QUIRK_ALLOW_NON_ZERO_INITIAL_BYTE, as exercised by
         // test/3pdata/xzsuite/lzip-testsuite/fox6_mark.lz, the LZMA spec says
         // that the decoder must check that the byte is zero, but the Lzip spec
         // says that the byte "must be ignored by the range decoder".
         this.ssize_have = 0
         this.dsize_have = 0
         this.lzma.set_quirk!(key: lzma.QUIRK_ALLOW_NON_ZERO_INITIAL_BYTE, value: 1)
         while true {
             dmark = args.dst.mark()
             smark = args.src.mark()
             status =? this.lzma.transform_io?(dst: args.dst, src: args.src, workbuf: args.workbuf)
             this.ssize_have ~mod+= args.src.count_since(mark: smark)
             this.dsize_have ~mod+= args.dst.count_since(mark: dmark)

             if not this.ignore_checksum {
                 this.crc32.update!(x: args.dst.since(mark: dmark))
             }

             if status.is_ok() {
                 break
             }
             yield? status
         }

         // Check the checksum.
         checksum_want = args.src.read_u32le?()
         if not this.ignore_checksum {
             checksum_have = this.crc32.checksum_u32()
             if checksum_have <> checksum_want {
                 return "#bad checksum"
             }
         }

         // Check the uncompressed size.
         size_want = args.src.read_u64le?()
         if this.dsize_have <> size_want {
             return "#bad footer"
         }

         // Check the compressed size, which includes 6 bytes of header and 20
         // bytes of footer. Per the spec, it's also capped at 2 PiB.
         size_want = args.src.read_u64le?()
         if (size_want < 26) or (0x8_0000_0000_0000 < size_want) {
             return "#bad footer"
         } else if this.ssize_have <> (size_want - 26) {
             return "#bad footer"
         }

         // Reset state and continue the outer loop, if not at EOF and it looks
         // like there's another LZIP-encoded chunk.
         while args.src.length() < 4,
                 post args.src.length() >= 4,
         {
             if args.src.is_closed() {
                 break.outer
             }
             yield? base."$short read"
         }
         c32 = args.src.peek_u32le()
         if c32 <> 'LZIP'le {
             break.outer
         }
         this.crc32.reset!()
         this.lzma.reset!()
     }.outer
 }
	// Copyright 2024 The Wuffs Authors.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.
	//
	// SPDX-License-Identifier: Apache-2.0 OR MIT

	// --------

	// The Lzip file format specification is at
	// https://www.nongnu.org/lzip/manual/lzip_manual.html#File-format

	use "std/crc32"
	use "std/lzma"

	pub status "#bad checksum"
	pub status "#bad footer"
	pub status "#bad header"
	pub status "#truncated input"

	pub const DECODER_DST_HISTORY_RETAIN_LENGTH_MAX_INCL_WORST_CASE : base.u64 = 0
	pub const DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE : base.u64 = 0xFFFF_FFFF + 273

	pub struct decoder? implements base.io_transformer(
	ignore_checksum : base.bool,

	dsize_have : base.u64,
	ssize_have : base.u64,

	util : base.utility,
	) + (
	crc32 : crc32.ieee_hasher,
	lzma : lzma.decoder,
	)

	pub func decoder.get_quirk(key: base.u32) base.u64 {
	if (args.key == base.QUIRK_IGNORE_CHECKSUM) and this.ignore_checksum {
	return 1
	}
	return 0
	}

	pub func decoder.set_quirk!(key: base.u32, value: base.u64) base.status {
	if args.key == base.QUIRK_IGNORE_CHECKSUM {
	this.ignore_checksum = args.value > 0
	return ok
	}
	return base."#unsupported option"
	}

	pub func decoder.dst_history_retain_length() base.optional_u63 {
	return this.lzma.dst_history_retain_length()
	}

	pub func decoder.workbuf_len() base.range_ii_u64 {
	return this.lzma.workbuf_len()
	}

	pub func decoder.transform_io?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
	var status : base.status

	while true {
	status =? this.do_transform_io?(dst: args.dst, src: args.src, workbuf: args.workbuf)
	if (status == base."$short read") and args.src.is_closed() {
	return "#truncated input"
	}
	yield? status
	}
	}

	pri func decoder.do_transform_io?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
	var c8 : base.u8
	var c32 : base.u32
	var c64 : base.u64
	var dmark : base.u64
	var smark : base.u64
	var status : base.status
	var checksum_want : base.u32
	var checksum_have : base.u32
	var size_want : base.u64

	while.outer true {
	// Read the magic number and version number.
	//
	// Some of the test/3pdata/xzsuite/goodv0.lz files have a 0x00
	// version number (instead of 0x01). We reject those, just like
	// /usr/bin/lzip does and as per the Lzip spec.
	c64 = args.src.read_u40le_as_u64?()
	if c64 <> 'LZIP\x01'le {
	return "#bad header"
	}

	// Read the dictionary size.
	c8 = args.src.read_u8?()
	status = this.lzma.set_quirk!(
	key: lzma.QUIRK_FORMAT_EXTENSION,
	value: 0x01 \| ((c8 as base.u64) << 8))
	if not status.is_ok() {
	if status == base."#bad argument" {
	return "#bad header"
	}
	return status
	}

	// Decode LZMA.
	//
	// Re lzma.QUIRK_ALLOW_NON_ZERO_INITIAL_BYTE, as exercised by
	// test/3pdata/xzsuite/lzip-testsuite/fox6_mark.lz, the LZMA spec says
	// that the decoder must check that the byte is zero, but the Lzip spec
	// says that the byte "must be ignored by the range decoder".
	this.ssize_have = 0
	this.dsize_have = 0
	this.lzma.set_quirk!(key: lzma.QUIRK_ALLOW_NON_ZERO_INITIAL_BYTE, value: 1)
	while true {
	dmark = args.dst.mark()
	smark = args.src.mark()
	status =? this.lzma.transform_io?(dst: args.dst, src: args.src, workbuf: args.workbuf)
	this.ssize_have ~mod+= args.src.count_since(mark: smark)
	this.dsize_have ~mod+= args.dst.count_since(mark: dmark)

	if not this.ignore_checksum {
	this.crc32.update!(x: args.dst.since(mark: dmark))
	}

	if status.is_ok() {
	break
	}
	yield? status
	}

	// Check the checksum.
	checksum_want = args.src.read_u32le?()
	if not this.ignore_checksum {
	checksum_have = this.crc32.checksum_u32()
	if checksum_have <> checksum_want {
	return "#bad checksum"
	}
	}

	// Check the uncompressed size.
	size_want = args.src.read_u64le?()
	if this.dsize_have <> size_want {
	return "#bad footer"
	}

	// Check the compressed size, which includes 6 bytes of header and 20
	// bytes of footer. Per the spec, it's also capped at 2 PiB.
	size_want = args.src.read_u64le?()
	if (size_want < 26) or (0x8_0000_0000_0000 < size_want) {
	return "#bad footer"
	} else if this.ssize_have <> (size_want - 26) {
	return "#bad footer"
	}

	// Reset state and continue the outer loop, if not at EOF and it looks
	// like there's another LZIP-encoded chunk.
	while args.src.length() < 4,
	post args.src.length() >= 4,
	{
	if args.src.is_closed() {
	break.outer
	}
	yield? base."$short read"
	}
	c32 = args.src.peek_u32le()
	if c32 <> 'LZIP'le {
	break.outer
	}
	this.crc32.reset!()
	this.lzma.reset!()
	}.outer
	}