std/lzw/decode_lzw.wuffs - external/github.com/google/wuffs - Git at Google

 // Copyright 2017 The Wuffs Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 pub status "#bad code"

 pri status "#internal error: inconsistent I/O"

 // TODO: move bulk data buffers like decoder.suffixes or decoder.output into
 // the workbuf? The first attempt at this was a performance regression for
 // decoding all but the smallest GIFs. See these git commits for numbers:
 //  - 49627b4 Flatten the lzw.decoder.suffixes array
 //  - f877fb2 Use the workbuf instead of lzw.decoder.suffixes
 //  - 85be5b9 Delete the obsolete lzw.decoder.suffixes array
 // and the roll back has combined numbers:
 //  - 3056a84 Roll back 3 recent lzw.decoder.suffixes commits
 pub const decoder_workbuf_len_max_incl_worst_case base.u64 = 0

 pub struct decoder?(
 	// set_literal_width_arg is 1 plus the saved argument passed to
 	// set_literal_width. This is assigned to the literal_width field at the
 	// start of decode_io_writer. During that method, calling set_literal_width
 	// will change set_literal_width_arg but not literal_width.
 	set_literal_width_arg : base.u32[..= 9],

 	// read_from state that does not change during a decode call.
 	literal_width : base.u32[..= 8],
 	clear_code    : base.u32[..= 256],
 	end_code      : base.u32[..= 257],

 	// read_from state that does change during a decode call.
 	save_code : base.u32[..= 4096],
 	prev_code : base.u32[..= 4095],
 	width     : base.u32[..= 12],
 	bits      : base.u32,
 	n_bits    : base.u32[..= 31],
 	output_ri : base.u32[..= 8191],
 	output_wi : base.u32[..= 8191],

 	// read_from return value. The read_from method effectively returns a
 	// base.u32 to show how decode should continue after calling write_to. That
 	// value needs to be saved across write_to's possible suspension, so we
 	// might as well save it explicitly as a decoder field.
 	read_from_return_value : base.u32,

 	// read_from per-code state.
 	prefixes : array[4096] base.u16[..= 4095],

 	util : base.utility,
 )(
 	// read_from per-code state.
 	suffixes : array[4096] array[8] base.u8,
 	// lm1s is the "length minus 1"s of the values for the implicit key-value
 	// table in this decoder. See std/lzw/README.md for more detail.
 	lm1s : array[4096] base.u16,

 	// output[output_ri:output_wi] is the buffered output, connecting read_from
 	// with write_to and flush.
 	output : array[8192 + 7] base.u8,
 )

 pub func decoder.set_literal_width!(lw: base.u32[..= 8]) {
 	this.set_literal_width_arg = args.lw + 1
 }

 pub func decoder.workbuf_len() base.range_ii_u64 {
 	return this.util.make_range_ii_u64(min_incl: 0, max_incl: 0)
 }

 pub func decoder.decode_io_writer?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
 	var i : base.u32[..= 8191]

 	// Initialize read_from state.
 	this.literal_width = 8
 	if this.set_literal_width_arg > 0 {
 		this.literal_width = this.set_literal_width_arg - 1
 	}
 	this.clear_code = (1 as base.u32) << this.literal_width
 	this.end_code = this.clear_code + 1
 	this.save_code = this.end_code
 	this.prev_code = this.end_code
 	this.width = this.literal_width + 1
 	this.bits = 0
 	this.n_bits = 0
 	this.output_ri = 0
 	this.output_wi = 0
 	i = 0
 	while i < this.clear_code {
 		assert i < 256 via "a < b: a < c; c <= b"(c: this.clear_code)
 		this.lm1s[i] = 0
 		this.suffixes[i][0] = i as base.u8
 		i += 1
 	}

 	while true {
 		this.read_from!(src: args.src)

 		if this.output_wi > 0 {
 			this.write_to?(dst: args.dst)
 		}

 		if this.read_from_return_value == 0 {
 			break
 		} else if this.read_from_return_value == 1 {
 			continue
 		} else if this.read_from_return_value == 2 {
 			yield? base."$short read"
 		} else if this.read_from_return_value == 3 {
 			return "#bad code"
 		} else {
 			return "#internal error: inconsistent I/O"
 		}
 	}
 }

 pri func decoder.read_from!(src: base.io_reader) {
 	var clear_code : base.u32[..= 256]
 	var end_code   : base.u32[..= 257]

 	var save_code : base.u32[..= 4096]
 	var prev_code : base.u32[..= 4095]
 	var width     : base.u32[..= 12]
 	var bits      : base.u32
 	var n_bits    : base.u32[..= 31]
 	var output_wi : base.u32[..= 8191]

 	var code       : base.u32[..= 4095]
 	var c          : base.u32[..= 4095]
 	var o          : base.u32[..= 8191]
 	var steps      : base.u32
 	var first_byte : base.u8
 	var lm1_b      : base.u16[..= 4095]
 	var lm1_a      : base.u16[..= 4095]

 	clear_code = this.clear_code
 	end_code = this.end_code

 	save_code = this.save_code
 	prev_code = this.prev_code
 	width = this.width
 	bits = this.bits
 	n_bits = this.n_bits
 	output_wi = this.output_wi

 	while true {
 		if n_bits < width {
 			assert n_bits < 12 via "a < b: a < c; c <= b"(c: width)
 			if args.src.available() >= 4 {
 				// Read 4 bytes, using the "Variant 4" technique of
 				// https://fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/
 				bits |= args.src.peek_u32le() ~mod<< n_bits
 				args.src.skip_fast!(actual: (31 - n_bits) >> 3, worst_case: 3)
 				n_bits |= 24
 				assert width <= n_bits via "a <= b: a <= c; c <= b"(c: 12)
 				assert n_bits >= width via "a >= b: b <= a"()
 			} else if args.src.available() <= 0 {
 				this.read_from_return_value = 2
 				break
 			} else {
 				bits |= args.src.peek_u8_as_u32() << n_bits
 				args.src.skip_fast!(actual: 1, worst_case: 1)
 				n_bits += 8
 				if n_bits >= width {
 					// No-op.
 				} else if args.src.available() <= 0 {
 					this.read_from_return_value = 2
 					break
 				} else {
 					bits |= args.src.peek_u8_as_u32() << n_bits
 					args.src.skip_fast!(actual: 1, worst_case: 1)
 					n_bits += 8
 					assert width <= n_bits via "a <= b: a <= c; c <= b"(c: 12)
 					assert n_bits >= width via "a >= b: b <= a"()

 					// This if condition is always false, but for some unknown
 					// reason, removing it worsens the benchmarks slightly.
 					if n_bits < width {
 						this.read_from_return_value = 4
 						break
 					}
 				}
 			}
 		}

 		code = bits.low_bits(n: width)
 		bits >>= width
 		n_bits -= width

 		if code < clear_code {
 			assert code < 256 via "a < b: a < c; c <= b"(c: clear_code)
 			this.output[output_wi] = code as base.u8
 			output_wi = (output_wi + 1) & 8191
 			if save_code <= 4095 {
 				lm1_a = (this.lm1s[prev_code] ~mod+ 1) & 4095
 				this.lm1s[save_code] = lm1_a

 				if (lm1_a % 8) <> 0 {
 					this.prefixes[save_code] = this.prefixes[prev_code]
 					this.suffixes[save_code] = this.suffixes[prev_code]
 					this.suffixes[save_code][lm1_a % 8] = code as base.u8
 				} else {
 					this.prefixes[save_code] = prev_code as base.u16
 					this.suffixes[save_code][0] = code as base.u8
 				}

 				save_code += 1
 				if width < 12 {
 					width += 1 & (save_code >> width)
 				}
 				prev_code = code
 			}

 		} else if code <= end_code {
 			if code == end_code {
 				this.read_from_return_value = 0
 				break
 			}
 			save_code = end_code
 			prev_code = end_code
 			width = this.literal_width + 1

 		} else if code <= save_code {
 			c = code
 			if code == save_code {
 				c = prev_code
 			}

 			// Letting old_wi and new_wi denote the values of output_wi before
 			// and after these two lines of code, the decoded bytes will be
 			// written to output[old_wi:new_wi]. They will be written
 			// back-to-front, 8 bytes at a time, starting by writing
 			// output[o:o + 8], which will contain output[new_wi - 1].
 			//
 			// In the special case that code == save_code, the decoded bytes
 			// contain an extra copy (at the end) of the first byte, and will
 			// be written to output[old_wi:new_wi + 1].
 			o = (output_wi + ((this.lm1s[c] as base.u32) & 0xFFFFFFF8)) & 8191
 			output_wi = (output_wi + 1 + (this.lm1s[c] as base.u32)) & 8191

 			steps = (this.lm1s[c] as base.u32) >> 3
 			while true {
 				assert o <= (o + 8) via "a <= (a + b): 0 <= b"(b: 8)

 				// The final "8" is redundant semantically, but helps the
 				// wuffs-c code generator recognize that both slices have the
 				// same constant length, and hence produce efficient C code.
 				this.output[o .. o + 8].copy_from_slice!(s: this.suffixes[c][.. 8])

 				if steps <= 0 {
 					break
 				}
 				steps -= 1

 				// This line is essentially "o -= 8". The "& 8191" is a no-op
 				// in practice, but is necessary for the overflow checker.
 				o = (o ~mod- 8) & 8191
 				c = this.prefixes[c] as base.u32
 			}
 			first_byte = this.suffixes[c][0]

 			if code == save_code {
 				this.output[output_wi] = first_byte
 				output_wi = (output_wi + 1) & 8191
 			}

 			if save_code <= 4095 {
 				lm1_b = (this.lm1s[prev_code] ~mod+ 1) & 4095
 				this.lm1s[save_code] = lm1_b

 				if (lm1_b % 8) <> 0 {
 					this.prefixes[save_code] = this.prefixes[prev_code]
 					this.suffixes[save_code] = this.suffixes[prev_code]
 					this.suffixes[save_code][lm1_b % 8] = first_byte
 				} else {
 					this.prefixes[save_code] = prev_code as base.u16
 					this.suffixes[save_code][0] = first_byte as base.u8
 				}

 				save_code += 1
 				if width < 12 {
 					width += 1 & (save_code >> width)
 				}
 				prev_code = code
 			}

 		} else {
 			this.read_from_return_value = 3
 			break
 		}

 		// Flush the output if it could be too full to contain the entire
 		// decoding of the next code. The longest possible decoding is slightly
 		// less than 4096 and output's length is 8192, so a conservative
 		// threshold is ensuring that output_wi <= 4095.
 		if output_wi > 4095 {
 			this.read_from_return_value = 1
 			break
 		}
 	}

 	// Rewind args.src, if we're not in "$short read" and we've read too many
 	// bits.
 	if this.read_from_return_value <> 2 {
 		while n_bits >= 8 {
 			n_bits -= 8
 			if args.src.can_undo_byte() {
 				args.src.undo_byte!()
 			} else {
 				this.read_from_return_value = 4
 				break
 			}
 		}
 	}

 	this.save_code = save_code
 	this.prev_code = prev_code
 	this.width = width
 	this.bits = bits
 	this.n_bits = n_bits
 	this.output_wi = output_wi
 }

 pri func decoder.write_to?(dst: base.io_writer) {
 	var s : slice base.u8
 	var n : base.u64

 	while this.output_wi > 0 {
 		if this.output_ri > this.output_wi {
 			return "#internal error: inconsistent I/O"
 		}
 		s = this.output[this.output_ri .. this.output_wi]
 		n = args.dst.copy_from_slice!(s: s)
 		if n == s.length() {
 			this.output_ri = 0
 			this.output_wi = 0
 			return ok
 		}
 		this.output_ri = (this.output_ri ~mod+ ((n & 0xFFFFFFFF) as base.u32)) & 8191
 		yield? base."$short write"
 	}
 }

 pub func decoder.flush!() slice base.u8 {
 	var s : slice base.u8

 	if this.output_ri <= this.output_wi {
 		s = this.output[this.output_ri .. this.output_wi]
 	}
 	this.output_ri = 0
 	this.output_wi = 0
 	return s
 }
	// Copyright 2017 The Wuffs Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	pub status "#bad code"

	pri status "#internal error: inconsistent I/O"

	// TODO: move bulk data buffers like decoder.suffixes or decoder.output into
	// the workbuf? The first attempt at this was a performance regression for
	// decoding all but the smallest GIFs. See these git commits for numbers:
	// - 49627b4 Flatten the lzw.decoder.suffixes array
	// - f877fb2 Use the workbuf instead of lzw.decoder.suffixes
	// - 85be5b9 Delete the obsolete lzw.decoder.suffixes array
	// and the roll back has combined numbers:
	// - 3056a84 Roll back 3 recent lzw.decoder.suffixes commits
	pub const decoder_workbuf_len_max_incl_worst_case base.u64 = 0

	pub struct decoder?(
	// set_literal_width_arg is 1 plus the saved argument passed to
	// set_literal_width. This is assigned to the literal_width field at the
	// start of decode_io_writer. During that method, calling set_literal_width
	// will change set_literal_width_arg but not literal_width.
	set_literal_width_arg : base.u32[..= 9],

	// read_from state that does not change during a decode call.
	literal_width : base.u32[..= 8],
	clear_code : base.u32[..= 256],
	end_code : base.u32[..= 257],

	// read_from state that does change during a decode call.
	save_code : base.u32[..= 4096],
	prev_code : base.u32[..= 4095],
	width : base.u32[..= 12],
	bits : base.u32,
	n_bits : base.u32[..= 31],
	output_ri : base.u32[..= 8191],
	output_wi : base.u32[..= 8191],

	// read_from return value. The read_from method effectively returns a
	// base.u32 to show how decode should continue after calling write_to. That
	// value needs to be saved across write_to's possible suspension, so we
	// might as well save it explicitly as a decoder field.
	read_from_return_value : base.u32,

	// read_from per-code state.
	prefixes : array[4096] base.u16[..= 4095],

	util : base.utility,
	)(
	// read_from per-code state.
	suffixes : array[4096] array[8] base.u8,
	// lm1s is the "length minus 1"s of the values for the implicit key-value
	// table in this decoder. See std/lzw/README.md for more detail.
	lm1s : array[4096] base.u16,

	// output[output_ri:output_wi] is the buffered output, connecting read_from
	// with write_to and flush.
	output : array[8192 + 7] base.u8,
	)

	pub func decoder.set_literal_width!(lw: base.u32[..= 8]) {
	this.set_literal_width_arg = args.lw + 1
	}

	pub func decoder.workbuf_len() base.range_ii_u64 {
	return this.util.make_range_ii_u64(min_incl: 0, max_incl: 0)
	}

	pub func decoder.decode_io_writer?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) {
	var i : base.u32[..= 8191]

	// Initialize read_from state.
	this.literal_width = 8
	if this.set_literal_width_arg > 0 {
	this.literal_width = this.set_literal_width_arg - 1
	}
	this.clear_code = (1 as base.u32) << this.literal_width
	this.end_code = this.clear_code + 1
	this.save_code = this.end_code
	this.prev_code = this.end_code
	this.width = this.literal_width + 1
	this.bits = 0
	this.n_bits = 0
	this.output_ri = 0
	this.output_wi = 0
	i = 0
	while i < this.clear_code {
	assert i < 256 via "a < b: a < c; c <= b"(c: this.clear_code)
	this.lm1s[i] = 0
	this.suffixes[i][0] = i as base.u8
	i += 1
	}

	while true {
	this.read_from!(src: args.src)

	if this.output_wi > 0 {
	this.write_to?(dst: args.dst)
	}

	if this.read_from_return_value == 0 {
	break
	} else if this.read_from_return_value == 1 {
	continue
	} else if this.read_from_return_value == 2 {
	yield? base."$short read"
	} else if this.read_from_return_value == 3 {
	return "#bad code"
	} else {
	return "#internal error: inconsistent I/O"
	}
	}
	}

	pri func decoder.read_from!(src: base.io_reader) {
	var clear_code : base.u32[..= 256]
	var end_code : base.u32[..= 257]

	var save_code : base.u32[..= 4096]
	var prev_code : base.u32[..= 4095]
	var width : base.u32[..= 12]
	var bits : base.u32
	var n_bits : base.u32[..= 31]
	var output_wi : base.u32[..= 8191]

	var code : base.u32[..= 4095]
	var c : base.u32[..= 4095]
	var o : base.u32[..= 8191]
	var steps : base.u32
	var first_byte : base.u8
	var lm1_b : base.u16[..= 4095]
	var lm1_a : base.u16[..= 4095]

	clear_code = this.clear_code
	end_code = this.end_code

	save_code = this.save_code
	prev_code = this.prev_code
	width = this.width
	bits = this.bits
	n_bits = this.n_bits
	output_wi = this.output_wi

	while true {
	if n_bits < width {
	assert n_bits < 12 via "a < b: a < c; c <= b"(c: width)
	if args.src.available() >= 4 {
	// Read 4 bytes, using the "Variant 4" technique of
	// https://fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/
	bits \|= args.src.peek_u32le() ~mod<< n_bits
	args.src.skip_fast!(actual: (31 - n_bits) >> 3, worst_case: 3)
	n_bits \|= 24
	assert width <= n_bits via "a <= b: a <= c; c <= b"(c: 12)
	assert n_bits >= width via "a >= b: b <= a"()
	} else if args.src.available() <= 0 {
	this.read_from_return_value = 2
	break
	} else {
	bits \|= args.src.peek_u8_as_u32() << n_bits
	args.src.skip_fast!(actual: 1, worst_case: 1)
	n_bits += 8
	if n_bits >= width {
	// No-op.
	} else if args.src.available() <= 0 {
	this.read_from_return_value = 2
	break
	} else {
	bits \|= args.src.peek_u8_as_u32() << n_bits
	args.src.skip_fast!(actual: 1, worst_case: 1)
	n_bits += 8
	assert width <= n_bits via "a <= b: a <= c; c <= b"(c: 12)
	assert n_bits >= width via "a >= b: b <= a"()

	// This if condition is always false, but for some unknown
	// reason, removing it worsens the benchmarks slightly.
	if n_bits < width {
	this.read_from_return_value = 4
	break
	}
	}
	}
	}

	code = bits.low_bits(n: width)
	bits >>= width
	n_bits -= width

	if code < clear_code {
	assert code < 256 via "a < b: a < c; c <= b"(c: clear_code)
	this.output[output_wi] = code as base.u8
	output_wi = (output_wi + 1) & 8191
	if save_code <= 4095 {
	lm1_a = (this.lm1s[prev_code] ~mod+ 1) & 4095
	this.lm1s[save_code] = lm1_a

	if (lm1_a % 8) <> 0 {
	this.prefixes[save_code] = this.prefixes[prev_code]
	this.suffixes[save_code] = this.suffixes[prev_code]
	this.suffixes[save_code][lm1_a % 8] = code as base.u8
	} else {
	this.prefixes[save_code] = prev_code as base.u16
	this.suffixes[save_code][0] = code as base.u8
	}

	save_code += 1
	if width < 12 {
	width += 1 & (save_code >> width)
	}
	prev_code = code
	}

	} else if code <= end_code {
	if code == end_code {
	this.read_from_return_value = 0
	break
	}
	save_code = end_code
	prev_code = end_code
	width = this.literal_width + 1

	} else if code <= save_code {
	c = code
	if code == save_code {
	c = prev_code
	}

	// Letting old_wi and new_wi denote the values of output_wi before
	// and after these two lines of code, the decoded bytes will be
	// written to output[old_wi:new_wi]. They will be written
	// back-to-front, 8 bytes at a time, starting by writing
	// output[o:o + 8], which will contain output[new_wi - 1].
	//
	// In the special case that code == save_code, the decoded bytes
	// contain an extra copy (at the end) of the first byte, and will
	// be written to output[old_wi:new_wi + 1].
	o = (output_wi + ((this.lm1s[c] as base.u32) & 0xFFFFFFF8)) & 8191
	output_wi = (output_wi + 1 + (this.lm1s[c] as base.u32)) & 8191

	steps = (this.lm1s[c] as base.u32) >> 3
	while true {
	assert o <= (o + 8) via "a <= (a + b): 0 <= b"(b: 8)

	// The final "8" is redundant semantically, but helps the
	// wuffs-c code generator recognize that both slices have the
	// same constant length, and hence produce efficient C code.
	this.output[o .. o + 8].copy_from_slice!(s: this.suffixes[c][.. 8])

	if steps <= 0 {
	break
	}
	steps -= 1

	// This line is essentially "o -= 8". The "& 8191" is a no-op
	// in practice, but is necessary for the overflow checker.
	o = (o ~mod- 8) & 8191
	c = this.prefixes[c] as base.u32
	}
	first_byte = this.suffixes[c][0]

	if code == save_code {
	this.output[output_wi] = first_byte
	output_wi = (output_wi + 1) & 8191
	}

	if save_code <= 4095 {
	lm1_b = (this.lm1s[prev_code] ~mod+ 1) & 4095
	this.lm1s[save_code] = lm1_b

	if (lm1_b % 8) <> 0 {
	this.prefixes[save_code] = this.prefixes[prev_code]
	this.suffixes[save_code] = this.suffixes[prev_code]
	this.suffixes[save_code][lm1_b % 8] = first_byte
	} else {
	this.prefixes[save_code] = prev_code as base.u16
	this.suffixes[save_code][0] = first_byte as base.u8
	}

	save_code += 1
	if width < 12 {
	width += 1 & (save_code >> width)
	}
	prev_code = code
	}

	} else {
	this.read_from_return_value = 3
	break
	}

	// Flush the output if it could be too full to contain the entire
	// decoding of the next code. The longest possible decoding is slightly
	// less than 4096 and output's length is 8192, so a conservative
	// threshold is ensuring that output_wi <= 4095.
	if output_wi > 4095 {
	this.read_from_return_value = 1
	break
	}
	}

	// Rewind args.src, if we're not in "$short read" and we've read too many
	// bits.
	if this.read_from_return_value <> 2 {
	while n_bits >= 8 {
	n_bits -= 8
	if args.src.can_undo_byte() {
	args.src.undo_byte!()
	} else {
	this.read_from_return_value = 4
	break
	}
	}
	}

	this.save_code = save_code
	this.prev_code = prev_code
	this.width = width
	this.bits = bits
	this.n_bits = n_bits
	this.output_wi = output_wi
	}

	pri func decoder.write_to?(dst: base.io_writer) {
	var s : slice base.u8
	var n : base.u64

	while this.output_wi > 0 {
	if this.output_ri > this.output_wi {
	return "#internal error: inconsistent I/O"
	}
	s = this.output[this.output_ri .. this.output_wi]
	n = args.dst.copy_from_slice!(s: s)
	if n == s.length() {
	this.output_ri = 0
	this.output_wi = 0
	return ok
	}
	this.output_ri = (this.output_ri ~mod+ ((n & 0xFFFFFFFF) as base.u32)) & 8191
	yield? base."$short write"
	}
	}

	pub func decoder.flush!() slice base.u8 {
	var s : slice base.u8

	if this.output_ri <= this.output_wi {
	s = this.output[this.output_ri .. this.output_wi]
	}
	this.output_ri = 0
	this.output_wi = 0
	return s
	}