| // Copyright 2017 The Puffs Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // TODO: describe how the xxx_fast version differs from the xxx_slow one, the |
| // assumptions that xxx_fast makes, and how that makes it fast. |
| pri func flate_decoder.decode_huffman_fast?(dst writer1, src reader1)() { |
| // When editing this function, consider making the equivalent change to the |
| // decode_huffman_slow function. Keep the diff between the two |
| // decode_huffman_*.puffs files as small as possible, while retaining both |
| // correctness and performance. |
| |
| // TODO: make this a func pre-condition. |
| if not in.dst.is_marked() { |
| return error "bad argument" |
| } |
| |
| if (this.n_bits >= 8) or ((this.bits >> this.n_bits) != 0) { |
| return error "internal error: inconsistent n_bits" |
| } |
| |
| var bits u32 = this.bits |
| var n_bits u32 = this.n_bits |
| |
| var table_entry u32 |
| var table_entry_n_bits u32[..15] |
| var lmask u32[..511] = ((1 as u32) << this.n_huffs_bits[0]) - 1 |
| var dmask u32[..511] = ((1 as u32) << this.n_huffs_bits[1]) - 1 |
| |
| // Check up front, on each iteration, that we have enough buffer space to |
| // both read (12 bytes) and write (258 bytes) as much as we need to. Doing |
| // this check once (per iteration), up front, removes the need to check |
| // (and possibly suspend the coroutine) multiple times inside the loop |
| // body, so it's faster overall. |
| // |
| // For writing, a literal code obviously corresponds to writing 1 byte, and |
| // 258 is the maximum length in a length-distance pair, as specified in the |
| // RFC section 3.2.5. Compressed blocks (length and distance codes). |
| // |
| // For reading, strictly speaking, we only need 6 bytes (48 bits) of |
| // available input, because the H-L length/literal code is up to 15 bits |
| // plus up to 5 extra bits, the H-D distance code is up to 15 bits plus up |
| // to 13 extra bits and 15 + 5 + 15 + 13 == 48. However, in.src.available() |
| // assertions are made in terms of bytes, not bits. |
| // |
| // In theory, we could write assertions that mixed bytes and bits, such as: |
| // |
| // assert ((in.src.available() >= 6) and (n_bits >= 5)) or |
| // ((in.src.available() >= 4) and (n_bits >= 21)) |
| // |
| // but that looks clumsy and makes the proofs more complicated. Instead, we |
| // are conservative and work in terms of bytes only. In bytes, each code |
| // requires either one (direct) or two (redirect) lookups, up to 2 bytes |
| // for each. There are also up to 13 extra bits, or 2 bytes. Thus, each |
| // code requires up to 2 + 2 + 2 == 6 bytes, and there are two codes (one |
| // from H-L and one from H-D). Therefore, we check for at least 12 bytes. |
| while:loop(in.dst.available() >= 258) and (in.src.available() >= 12), |
| inv in.dst.is_marked(), |
| { |
| // Ensure that we have at least 15 bits of input. |
| if n_bits < 15 { |
| assert n_bits >= 0 // TODO: this shouldn't be necessary. |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| assert n_bits >= 15 |
| } else { |
| assert in.src.available() >= 10 |
| } |
| // These assertions are redundant, but are listed explicitly for |
| // clarity. They must hold regardless of which branch was taken for the |
| // if statement above. |
| assert in.src.available() >= 10 |
| assert n_bits >= 15 |
| |
| // Decode an lcode symbol from H-L. |
| table_entry = this.huffs[0][bits & lmask] |
| table_entry_n_bits = table_entry & 0x0F |
| bits >>= table_entry_n_bits |
| n_bits -= table_entry_n_bits |
| |
| if (table_entry >> 31) != 0 { |
| // Literal. |
| in.dst.write_u8?(x:(table_entry >> 8) & 0xFF) |
| continue:loop |
| } else if (table_entry >> 30) != 0 { |
| // No-op; code continues past the if-else chain. |
| assert in.src.available() >= 8 |
| } else if (table_entry >> 29) != 0 { |
| // End of block. |
| this.end_of_block = true |
| break:loop |
| } else if (table_entry >> 28) != 0 { |
| // Redirect. |
| |
| // Ensure that we have at least 15 bits of input. |
| if n_bits < 15 { |
| assert n_bits >= 0 // TODO: this shouldn't be necessary. |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| assert n_bits >= 15 |
| } else { |
| assert in.src.available() >= 8 |
| } |
| // Once again, redundant but explicit assertions. |
| assert in.src.available() >= 8 |
| assert n_bits >= 15 |
| |
| var redir_top u32[..0xFFFF] = (table_entry >> 8) & 0xFFFF |
| var redir_mask u32[..0x7FFF] = ((1 as u32) << ((table_entry >> 4) & 0x0F)) - 1 |
| if (redir_top + (bits & redir_mask)) >= 1234 { |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| table_entry = this.huffs[0][redir_top + (bits & redir_mask)] |
| table_entry_n_bits = table_entry & 0x0F |
| bits >>= table_entry_n_bits |
| n_bits -= table_entry_n_bits |
| |
| if (table_entry >> 31) != 0 { |
| // Literal. |
| in.dst.write_u8?(x:(table_entry >> 8) & 0xFF) |
| continue:loop |
| } else if (table_entry >> 30) != 0 { |
| // No-op; code continues past the if-else chain. |
| } else if (table_entry >> 29) != 0 { |
| // End of block. |
| this.end_of_block = true |
| break:loop |
| } else if (table_entry >> 28) != 0 { |
| return error "internal error: inconsistent Huffman decoder state" |
| } else if (table_entry >> 27) != 0 { |
| return error "bad Huffman code" |
| } else { |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| |
| // Once again, redundant but explicit assertions. |
| assert in.src.available() >= 8 |
| |
| } else if (table_entry >> 27) != 0 { |
| return error "bad Huffman code" |
| } else { |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| |
| // length = base number + extra bits. |
| var length u32[..0x7FFF] = (table_entry >> 8) & 0x7FFF |
| table_entry_n_bits = (table_entry >> 4) & 0x0F |
| if table_entry_n_bits > 0 { |
| // Ensure that we have at least 15 bits of input. |
| if n_bits < 15 { |
| assert n_bits >= 0 // TODO: this shouldn't be necessary. |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| assert n_bits >= 15 |
| } else { |
| assert in.src.available() >= 6 |
| } |
| // Once again, redundant but explicit assertions. |
| assert in.src.available() >= 6 |
| assert n_bits >= 15 |
| |
| length = (length + bits.low_bits(n:table_entry_n_bits)) & 0x7FFF |
| bits >>= table_entry_n_bits |
| n_bits -= table_entry_n_bits |
| |
| } else { |
| assert in.src.available() >= 6 |
| } |
| if length > 258 { |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| |
| // Ensure that we have at least 15 bits of input. |
| if n_bits < 15 { |
| assert n_bits >= 0 // TODO: this shouldn't be necessary. |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| assert n_bits >= 15 |
| } else { |
| assert in.src.available() >= 4 |
| } |
| // Once again, redundant but explicit assertions. |
| assert in.src.available() >= 4 |
| assert n_bits >= 15 |
| |
| // Decode a dcode symbol from H-D. |
| table_entry = this.huffs[1][bits & dmask] |
| table_entry_n_bits = table_entry & 15 |
| bits >>= table_entry_n_bits |
| n_bits -= table_entry_n_bits |
| |
| // Check for a redirect. |
| if (table_entry >> 28) == 1 { |
| // Ensure that we have at least 15 bits of input. |
| if n_bits < 15 { |
| assert n_bits >= 0 // TODO: this shouldn't be necessary. |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| assert n_bits >= 15 |
| } else { |
| assert in.src.available() >= 2 |
| } |
| // Once again, redundant but explicit assertions. |
| assert in.src.available() >= 2 |
| assert n_bits >= 15 |
| |
| redir_top = (table_entry >> 8) & 0xFFFF |
| redir_mask = ((1 as u32) << ((table_entry >> 4) & 0x0F)) - 1 |
| if (redir_top + (bits & redir_mask)) >= 1234 { |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| table_entry = this.huffs[1][redir_top + (bits & redir_mask)] |
| table_entry_n_bits = table_entry & 0x0F |
| bits >>= table_entry_n_bits |
| n_bits -= table_entry_n_bits |
| } else { |
| assert in.src.available() >= 2 |
| } |
| |
| // For H-D, all symbols should be base number + extra bits. |
| if (table_entry >> 24) != 0x40 { |
| if (table_entry >> 24) == 0x08 { |
| return error "bad Huffman code" |
| } |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| |
| // distance = base number + extra bits. |
| var distance u32[..0x7FFF] = (table_entry >> 8) & 0x7FFF |
| table_entry_n_bits = (table_entry >> 4) & 0x0F |
| if table_entry_n_bits > 0 { |
| // Ensure that we have at least 15 bits of input. |
| if n_bits < 15 { |
| assert n_bits >= 0 // TODO: this shouldn't be necessary. |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| bits |= (in.src.read_u8?() as u32) << n_bits |
| n_bits += 8 |
| assert n_bits >= 15 |
| } |
| // Once again, redundant but explicit assertions. |
| assert n_bits >= 15 |
| |
| distance = (distance + bits.low_bits(n:table_entry_n_bits)) & 0x7FFF |
| bits >>= table_entry_n_bits |
| n_bits -= table_entry_n_bits |
| } |
| if distance <= 0 { |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| |
| var n_copied u32 |
| // The "while true { etc; break }" is a redundant version of "etc", but |
| // its presence minimizes the diff between decode_huffman_fast and |
| // decode_huffman_slow. |
| while true, |
| pre length <= 258, |
| pre distance > 0, |
| pre in.dst.available() >= 258, |
| inv in.dst.is_marked(), |
| { |
| // Copy from this.history. |
| if (distance as u64) > in.dst.since_mark().length() { |
| // Set (hlen, hdist) to be the length-distance pair to copy |
| // from this.history, and (length, distance) to be the |
| // remaining length-distance pair to copy from in.dst. |
| var hlen u32[..0x7FFF] |
| var hdist u32[..0x7FFF] = ((distance as u64) - in.dst.since_mark().length()) as u32 |
| if length > hdist { |
| length -= hdist |
| hlen = hdist |
| // TODO: this if check should be redundant. |
| if length > 258 { |
| return error "internal error: inconsistent Huffman decoder state" |
| } |
| } else { |
| hlen = length |
| length = 0 |
| assert length <= 258 |
| } |
| if this.history_index < hdist { |
| return error "bad distance" |
| } |
| // Re-purpose the hdist variable as the this.history index to |
| // start copying from. |
| hdist = (this.history_index - hdist) & 0x7FFF |
| |
| // Copy from hdist to the end of this.history, and then |
| // possibly wrap around the this.history ringbuffer. |
| // |
| // The "while true { etc; break }" is a redundant version of |
| // "etc", but its presence minimizes the diff between |
| // decode_huffman_fast and decode_huffman_slow. |
| while true, |
| inv length <= 258, |
| inv distance > 0, |
| inv in.dst.available() >= 258, |
| inv in.dst.is_marked(), |
| { |
| // TODO: copy_from_slice32 should probably update the |
| // "in.dst.available() >= 258" fact. It should certainly |
| // invalidate any fact that was "<=" instead of ">=". |
| n_copied = in.dst.copy_from_slice32(s:this.history[hdist:], length:hlen) |
| if hlen <= n_copied { |
| break |
| } |
| hlen -= n_copied |
| in.dst.copy_from_slice32(s:this.history[:], length:hlen) |
| break |
| } |
| |
| if length == 0 { |
| // No need to copy from in.dst. |
| continue:loop |
| } |
| |
| if (distance as u64) > in.dst.since_mark().length() { |
| return error "internal error: inconsistent distance" |
| } |
| } |
| // Once again, redundant but explicit assertions. |
| assert length <= 258 |
| assert distance > 0 |
| assert (distance as u64) <= in.dst.since_mark().length() |
| assert in.dst.available() >= 258 |
| assert in.dst.is_marked() |
| |
| // We can therefore prove: |
| assert (length as u64) <= 258 |
| assert (length as u64) <= in.dst.available() via "a <= b: a <= c; c <= b"(c:258) |
| |
| // Copy from in.dst. |
| in.dst.copy_from_history32(distance:distance, length:length) |
| break |
| } |
| } |
| |
| // Ensure n_bits < 8 by rewindng in.src, if we loaded too many of its bytes |
| // into the bits variable. |
| // |
| // Note that we can unconditionally call unread_u8 (without resulting in an |
| // "invalid I/O operation" error code) only because this whole function can |
| // never suspend, as all of its I/O operations were checked beforehand for |
| // sufficient buffer space. Otherwise, resuming from the suspension could |
| // mean that the (possibly different) in.src is no longer rewindable, even |
| // if conceptually, this function was responsible for reading the bytes we |
| // want to rewind. |
| // |
| // TODO: have the compiler enforce (via some explicit syntax) that this |
| // function is ProvenNotToSuspend? |
| while n_bits >= 8, |
| post n_bits < 8, |
| { |
| n_bits -= 8 |
| in.src.unread_u8?() |
| } |
| |
| this.bits = bits & (((1 as u32) << n_bits) - 1) |
| this.n_bits = n_bits |
| |
| if (this.n_bits >= 8) or ((this.bits >> this.n_bits) != 0) { |
| return error "internal error: inconsistent n_bits" |
| } |
| } |