Calculate deflate Huffman table worst case size
diff --git a/script/make-artificial.go b/script/make-artificial.go
index 0037f6e..7a16abe 100644
--- a/script/make-artificial.go
+++ b/script/make-artificial.go
@@ -244,47 +244,83 @@
stream deflateBitStream
// Dynamic Huffman state.
- numLCodes uint32
- numDCodes uint32
- numCLCodeLengths uint32
- whichHuffman uint32
+ whichHuffman uint32
// 0=Unused, 1=CodeLength, 2=Length/Literal, 3=Distance.
huffmans [4]deflateHuffmanTable
+
+ // DHH (Dynamic Huffman, inside a Huffman table) state.
+ prevLine string
+ etcetera bool
}
func deflateGlobalsClearDynamicHuffmanState() {
- deflateGlobals.numLCodes = 0
- deflateGlobals.numDCodes = 0
- deflateGlobals.numCLCodeLengths = 0
deflateGlobals.whichHuffman = 0
deflateGlobals.huffmans = [4]deflateHuffmanTable{}
+ deflateGlobals.prevLine = ""
+ deflateGlobals.etcetera = false
+}
+
+func deflateGlobalsCountCodes() (numLCodes uint32, numDCodes uint32, numCLCodeLengths uint32, retErr error) {
+ for k := range deflateGlobals.huffmans[2] {
+ if numLCodes < (k + 1) {
+ numLCodes = (k + 1)
+ }
+ }
+
+ for k := range deflateGlobals.huffmans[3] {
+ if numDCodes < (k + 1) {
+ numDCodes = (k + 1)
+ }
+ }
+
+ for k := range deflateGlobals.huffmans[1] {
+ if (k < 0) || (18 < k) {
+ return 0, 0, 0, fmt.Errorf("bad CodeLength: %d", k)
+ }
+ }
+ for i := len(deflateCodeOrder) - 1; i >= 0; i-- {
+ cl := deflateCodeOrder[i]
+ if _, ok := deflateGlobals.huffmans[1][cl]; ok {
+ numCLCodeLengths = uint32(i + 1)
+ break
+ }
+ }
+ if numCLCodeLengths < 4 {
+ numCLCodeLengths = 4
+ }
+
+ return numLCodes, numDCodes, numCLCodeLengths, nil
}
func deflateGlobalsWriteDynamicHuffmanTables() error {
g := &deflateGlobals
- if (g.numLCodes < 257) || (257+31 < g.numLCodes) {
- return fmt.Errorf("bad numLCodes: %d", g.numLCodes)
+ numLCodes, numDCodes, numCLCodeLengths, err := deflateGlobalsCountCodes()
+ if err != nil {
+ return err
}
- g.stream.writeBits(g.numLCodes-257, 5)
- if (g.numDCodes < 1) || (1+31 < g.numDCodes) {
- return fmt.Errorf("bad numDCodes: %d", g.numDCodes)
+ if (numLCodes < 257) || (257+31 < numLCodes) {
+ return fmt.Errorf("bad numLCodes: %d", numLCodes)
}
- g.stream.writeBits(g.numDCodes-1, 5)
- if (g.numCLCodeLengths < 4) || (4+15 < g.numCLCodeLengths) {
- return fmt.Errorf("bad numCLCodeLengths: %d", g.numCLCodeLengths)
+ g.stream.writeBits(numLCodes-257, 5)
+ if (numDCodes < 1) || (1+31 < numDCodes) {
+ return fmt.Errorf("bad numDCodes: %d", numDCodes)
}
- g.stream.writeBits(g.numCLCodeLengths-4, 4)
+ g.stream.writeBits(numDCodes-1, 5)
+ if (numCLCodeLengths < 4) || (4+15 < numCLCodeLengths) {
+ return fmt.Errorf("bad numCLCodeLengths: %d", numCLCodeLengths)
+ }
+ g.stream.writeBits(numCLCodeLengths-4, 4)
// Write the Huffman table for CodeLength.
{
- for i := uint32(0); i < g.numCLCodeLengths; i++ {
+ for i := uint32(0); i < numCLCodeLengths; i++ {
n := len(g.huffmans[1][deflateCodeOrder[i]])
g.stream.writeBits(uint32(n), 3)
}
- for i := g.numCLCodeLengths; i < uint32(len(deflateCodeOrder)); i++ {
+ for i := numCLCodeLengths; i < uint32(len(deflateCodeOrder)); i++ {
n := len(g.huffmans[1][deflateCodeOrder[i]])
if n > 0 {
- return fmt.Errorf("short numCLCodeLengths: %d", g.numCLCodeLengths)
+ return fmt.Errorf("short numCLCodeLengths: %d", numCLCodeLengths)
}
}
}
@@ -292,14 +328,14 @@
// Write the Huffman tables for Length/Literal and Distance.
{
numZeroes := uint32(0)
- for i := uint32(0); i < g.numLCodes+g.numDCodes; i++ {
- s := ""
- if i < g.numLCodes {
- s = g.huffmans[2][i]
+ for i := uint32(0); i < numLCodes+numDCodes; i++ {
+ codeLen := uint32(0)
+ if i < numLCodes {
+ codeLen = uint32(len(g.huffmans[2][i]))
} else {
- s = g.huffmans[3][i-g.numLCodes]
+ codeLen = uint32(len(g.huffmans[3][i-numLCodes]))
}
- if s == "" {
+ if codeLen == 0 {
numZeroes++
continue
}
@@ -309,7 +345,11 @@
}
numZeroes = 0
- deflateGlobalsWriteDynamicHuffmanBits(g.huffmans[1][uint32(len(s))])
+ codeLenCode := g.huffmans[1][codeLen]
+ if codeLenCode == "" {
+ return fmt.Errorf("no code for code-length %d", codeLen)
+ }
+ deflateGlobalsWriteDynamicHuffmanBits(g.huffmans[1][codeLen])
}
if err := deflateGlobalsWriteDynamicHuffmanZeroes(numZeroes); err != nil {
return err
@@ -512,10 +552,7 @@
func stateDeflateDynamicHuffman(line string) (stateFunc, error) {
g := &deflateGlobals
const (
- cmdH = "huffman "
- cmdNCLCL = "numCLCodeLengths "
- cmdNDC = "numDCodes "
- cmdNLC = "numLCodes "
+ cmdH = "huffman "
)
switch {
case line == "}":
@@ -530,33 +567,6 @@
}
g.whichHuffman = n
return stateDeflateDynamicHuffmanHuffman, nil
-
- case strings.HasPrefix(line, cmdNLC):
- s := line[len(cmdNLC):]
- n, s, ok := parseNum(s)
- if !ok {
- break
- }
- g.numLCodes = n
- return stateDeflateDynamicHuffman, nil
-
- case strings.HasPrefix(line, cmdNDC):
- s := line[len(cmdNDC):]
- n, s, ok := parseNum(s)
- if !ok {
- break
- }
- g.numDCodes = n
- return stateDeflateDynamicHuffman, nil
-
- case strings.HasPrefix(line, cmdNCLCL):
- s := line[len(cmdNCLCL):]
- n, s, ok := parseNum(s)
- if !ok {
- break
- }
- g.numCLCodeLengths = n
- return stateDeflateDynamicHuffman, nil
}
if lit, ok := deflateParseLiteral(line); ok {
@@ -568,6 +578,25 @@
deflateGlobalsWriteDynamicHuffmanBits(s)
}
return stateDeflateDynamicHuffman, nil
+
+ } else if l, d, ok := deflateParseLenDist(line); ok {
+ if (l != 3) || (d != 2) {
+ return nil, fmt.Errorf("TODO: support len/dist pairs for dynamic Huffman blocks")
+ }
+ // len 3 is code 257, with no extra bits.
+ if s := g.huffmans[2][257]; s != "" {
+ deflateGlobalsWriteDynamicHuffmanBits(s)
+ } else {
+ return nil, fmt.Errorf("no code for literal/length symbol 257")
+ }
+ // dist 2 is code 1, with no extra bits.
+ if s := g.huffmans[3][1]; s != "" {
+ deflateGlobalsWriteDynamicHuffmanBits(s)
+ } else {
+ return nil, fmt.Errorf("no code for distance symbol 2")
+ }
+ return stateDeflateDynamicHuffman, nil
+
} else if line == "endOfBlock" {
s := g.huffmans[2][256]
if s == "" {
@@ -586,6 +615,8 @@
switch {
case line == "}":
g.whichHuffman = 0
+ g.prevLine = ""
+ g.etcetera = false
// If we have all three Huffman tables, write them.
for i := 1; ; i++ {
@@ -602,6 +633,10 @@
return stateDeflateDynamicHuffman, nil
+ case line == "etcetera":
+ g.etcetera = true
+ return stateDeflateDynamicHuffmanHuffman, nil
+
default:
s := line
n, s, ok := parseNum(s)
@@ -613,16 +648,84 @@
break outer
}
}
+ if (len(s) < 1) || (15 < len(s)) {
+ return nil, fmt.Errorf("%q code length, %d, is out of range", s, len(s))
+ }
+
+ if g.etcetera {
+ g.etcetera = false
+ n0, s0, ok := parseNum(g.prevLine)
+ if !ok {
+ return nil, fmt.Errorf("bad etcetera command")
+ }
+ if err := stateDeflateDHHEtcetera(n0, s0, n, s); err != nil {
+ return nil, err
+ }
+ }
+
if g.huffmans[g.whichHuffman] == nil {
g.huffmans[g.whichHuffman] = deflateHuffmanTable{}
}
g.huffmans[g.whichHuffman][n] = s
+ g.prevLine = line
return stateDeflateDynamicHuffmanHuffman, nil
}
return nil, fmt.Errorf("bad stateDeflateDynamicHuffmanHuffman command: %q", line)
}
+// stateDeflateDHHEtcetera expands the "etcetera" line in:
+//
+// 0 0000
+// 1 0001
+// etcetera
+// 7 0111
+//
+// to produce the implicit lines:
+//
+// 0 0000
+// 1 0001
+// 2 0010
+// 3 0011
+// 4 0100
+// 5 0101
+// 6 0110
+// 7 0111
+func stateDeflateDHHEtcetera(n0 uint32, s0 string, n1 uint32, s1 string) error {
+ b := []byte(s0)
+ if !incrementBitstring(b) {
+ return fmt.Errorf("etcetera: could not increment bitstring")
+ }
+ for n := n0 + 1; n < n1; n++ {
+ line := fmt.Sprintf("%d %s", n, b)
+ if _, err := stateDeflateDynamicHuffmanHuffman(line); err != nil {
+ return err
+ }
+ if !incrementBitstring(b) {
+ return fmt.Errorf("etcetera: could not increment bitstring")
+ }
+ }
+ if string(b) != s1 {
+ return fmt.Errorf("etcetera: final bitstring: got %q, want %q", b, s1)
+ }
+ return nil
+}
+
+func incrementBitstring(b []byte) (ok bool) {
+ for i := len(b) - 1; i >= 0; i-- {
+ switch b[i] {
+ case '0':
+ b[i] = '1'
+ return true
+ case '1':
+ b[i] = '0'
+ default:
+ return false
+ }
+ }
+ return false
+}
+
type deflateBitStream struct {
bits uint32
nBits uint32 // Always within [0, 7].
diff --git a/script/print-deflate-huff-table-size.go b/script/print-deflate-huff-table-size.go
new file mode 100644
index 0000000..7082f5a
--- /dev/null
+++ b/script/print-deflate-huff-table-size.go
@@ -0,0 +1,561 @@
+// Copyright 2019 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build ignore
+
+package main
+
+// print-deflate-huff-table-size.go prints the worst-case number of elements in
+// the std/deflate decoder's huffs tables.
+//
+// Usage: go run print-deflate-huff-table-size.go
+//
+// This program might take tens of seconds to run.
+//
+// It is algorithmically similar to examples/enough.c in the zlib C library.
+// Much of the initial commentary in that file is also relevant for this
+// program. Not all of it applies, though. For example, the Wuffs decoder
+// implementation's primary (root) table length is fixed at compile time. It
+// does not grow that at run-time, even if the shortest Huffman code is longer
+// than that primary length.
+//
+// This program should print:
+//
+// ----------------
+// -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
+// primLen 3: 4376 entries = 8 prim + 4368 seco 1 0 0; 1 1 1 17 1 1 257 1 1 1 1 2
+// primLen 4: 2338 entries = 16 prim + 2322 seco 0 0 0 0; 3 1 1 177 97 1 1 1 1 1 2
+// primLen 5: 1330 entries = 32 prim + 1298 seco 1 0 0 0 0; 3 1 1 177 97 1 1 1 1 2
+// primLen 6: 852 entries = 64 prim + 788 seco 0 0 0 0 0 0; 1 229 49 1 1 1 1 1 2
+// primLen 7: 660 entries = 128 prim + 532 seco 1 0 0 0 0 0 0; 1 229 49 1 1 1 1 2
+// primLen 8: 660 entries = 256 prim + 404 seco 1 1 0 0 0 0 0 0; 1 229 49 1 1 1 2
+// primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2
+// primLen 10: 1332 entries = 1024 prim + 308 seco 1 1 1 1 0 0 0 0 0 0; 1 229 49 1 2
+// primLen 11: 2340 entries = 2048 prim + 292 seco 1 1 1 1 1 0 0 0 0 0 0; 1 229 49 2
+// primLen 12: 4380 entries = 4096 prim + 284 seco 1 1 1 1 1 1 0 0 0 0 0 0; 1 229 50
+// primLen 13: 8472 entries = 8192 prim + 280 seco 1 1 1 1 1 1 0 1 0 0 0 0 0;105 174
+// primLen 14: 16658 entries = 16384 prim + 274 seco 1 1 1 1 1 1 0 1 1 1 0 1 1 1;274
+// primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+//
+// -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
+// primLen 3: 4120 entries = 8 prim + 4112 seco 1 0 0; 1 9 9 1 1 1 1 1 1 1 1 2
+// primLen 4: 2080 entries = 16 prim + 2064 seco 1 1 0 0; 1 9 9 1 1 1 1 1 1 1 2
+// primLen 5: 1072 entries = 32 prim + 1040 seco 1 1 1 0 0; 1 9 9 1 1 1 1 1 1 2
+// primLen 6: 592 entries = 64 prim + 528 seco 1 1 1 1 0 0; 1 9 9 1 1 1 1 1 2
+// primLen 7: 400 entries = 128 prim + 272 seco 1 1 1 1 1 0 0; 1 9 9 1 1 1 1 2
+// primLen 8: 400 entries = 256 prim + 144 seco 1 1 1 1 1 1 0 0; 1 9 9 1 1 1 2
+// primLen 9: 592 entries = 512 prim + 80 seco 1 1 1 1 1 1 1 0 0; 1 9 9 1 1 2
+// primLen 10: 1072 entries = 1024 prim + 48 seco 1 1 1 1 1 1 1 1 0 0; 1 9 9 1 2
+// primLen 11: 2080 entries = 2048 prim + 32 seco 1 1 1 1 1 1 1 1 1 0 0; 1 9 9 2
+// primLen 12: 4120 entries = 4096 prim + 24 seco 1 1 1 1 1 1 1 1 1 1 0 0; 1 9 10
+// primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14
+// primLen 14: 16400 entries = 16384 prim + 16 seco 1 1 1 1 1 1 1 1 1 1 1 0 0 0; 16
+// primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+// ----------------
+//
+// Deflate's canonical Huffman codes are up to 15 bits long, but the decoder
+// can use fewer than (1 << 15) entries in its look-up table, by splitting it
+// into one primary table (of an at-compile-time fixed length <= 15) and zero
+// or more secondary tables (of variable lengths, determined at-run-time).
+//
+// Symbols whose Huffman codes' length are less than or equal to the primary
+// table length do not require any secondary tables. Longer codes are grouped
+// by their primLen length prefix. Each group occupies one entry in the primary
+// table, which redirects to a secondary table of size (1 << (N - primLen)),
+// where N is the length of the longest code in that group.
+//
+// This program calculates that, for a primary table length of 9 bits, the
+// worst case Huffman table size is 852 for the Literal/Length table (852 being
+// a 512 entry primary table and the secondary tables totalling 340 further
+// entries) and 592 (512 + 80) for the Distance table.
+//
+// Copy/pasting one row of that output (and the column headers):
+//
+// -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
+// primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2
+//
+// The "@1, @2, @3, ..." columns mean that one combination that hits that 852
+// worst case is having 1 1-length code, 1 2-length code, 1 3-length code, 0
+// 4-length codes, ..., 1 10-length code, 229 11-length codes, etc. There are a
+// total of 286 (1 + 1 + 1 + 0 + ... + 49 + 1 + 1 + 2) codes, for 286 symbols.
+//
+// See test/data/artificial/deflate-huffman-primlen-9.deflate* for an actual
+// example of valid Deflate-formatted data that exercises these worst-case code
+// lengths (for a primary table length of 9 bits).
+//
+// Hypothetically, suppose that we used a primLen of 13 for the Distance table.
+// Here is a code length assignment that produces 20 secondary table entries:
+//
+// -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
+// primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14
+//
+// In detail: call the 14 length codes "e0", "e1", ..., "e4" and the 15 length
+// codes "f00", "f01", "f02", ..., "f13".
+//
+// Recall that secondary tables' lengths are (1 << (N - primLen)), where N is
+// the longest code for a given primLen length prefix. In this case, (N -
+// primLen) is (15 - 13) if the secondary table contains a "f??" code,
+// otherwise it N is (14 - 13). When the secondary table contains a mixture of
+// lengths, some of the shorter codes' entries are duplicated.
+//
+// Also, for canonical Huffman codes, the codes (bitstrings) for 14-length
+// codes are all assigned (sequentially) before any 15-length codes. There are
+// therefore 6 secondary tables:
+//
+// - length 2: e0, e1.
+// - length 2: e2, e3.
+// - length 4: e4, e4, f00, f01.
+// - length 4: f02, f03, f04, f05.
+// - length 4: f06, f07, f08, f09.
+// - length 4: f10, f11, f12, f13.
+//
+// The total size of the secondary tables is (2 + 2 + 4 + 4 + 4 + 4) = 20.
+
+import (
+ "fmt"
+ "os"
+ "sort"
+)
+
+const (
+ maxMaxNSyms = 286 // RFC 1951 lists 286 literal/length codes and 30 distance codes.
+ maxCodeLen = 15 // Each code's encoding is at most 15 bits.
+)
+
+func main() {
+ if err := main1(); err != nil {
+ os.Stderr.WriteString(err.Error() + "\n")
+ os.Exit(1)
+ }
+}
+
+func main1() error {
+ do("Lit/Len", 286)
+ do("Distance", 30)
+ return nil
+}
+
+func do(name string, maxNSyms uint32) {
+ ftab := &feasibleTable{}
+ ftab.build(maxNSyms)
+
+ // We can do the per-primLen computations concurrently, speeding up the
+ // overall wall time taken.
+ ch := make(chan string)
+ numPending := 0
+ for primLen := uint32(3); primLen <= maxCodeLen; primLen++ {
+ go doPrimLen(ch, maxNSyms, ftab, primLen)
+ numPending++
+ }
+
+ // Gather, sort and print the results for each primLen.
+ results := make([]string, 0, numPending)
+ for ; numPending > 0; numPending-- {
+ results = append(results, <-ch)
+ }
+ sort.Strings(results)
+ fmt.Printf("\n-------- %8s (up to %3d symbols, %6d combos)"+
+ " @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15\n",
+ name, maxNSyms, ftab.count())
+ for _, s := range results {
+ fmt.Println(s)
+ }
+}
+
+type state struct {
+ ftab *feasibleTable
+ primLen uint32
+
+ currCase struct {
+ nAssignedCodes nAssignedCodes
+ }
+
+ worstCase struct {
+ nAssignedCodes nAssignedCodes
+ nSecondary uint32
+ }
+
+ visited [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bitVector
+}
+
+func doPrimLen(ch chan<- string, maxNSyms uint32, ftab *feasibleTable, primLen uint32) {
+ z := &state{
+ ftab: ftab,
+ primLen: primLen,
+ }
+
+ if z.primLen > maxCodeLen {
+ panic("unreachable")
+
+ } else if z.primLen == maxCodeLen {
+ // There's only the primary table and no secondary tables. For an
+ // example that fills that primary table, create a trivial encoding of
+ // two symbols: one symbol's code is "0", the other's is "1".
+ z.worstCase.nAssignedCodes[1] = 2
+ for n := uint32(2); n <= maxCodeLen; n++ {
+ z.worstCase.nAssignedCodes[n] = 0
+ }
+
+ } else {
+ // Brute force search the tree of possible code length assignments.
+ //
+ // nCodes is always even: the number of unassigned Huffman codes at
+ // length L is twice the number at length (L-1). At the start, there
+ // are two unassigned codes of length 1: "0" and "1".
+ //
+ // nSyms starts at nCodes, since (nCodes > nSyms) is infeasible: there
+ // will be unassigned codes.
+ for nCodes := uint32(2); nCodes <= maxNSyms; nCodes += 2 {
+ for nSyms := nCodes; nSyms <= maxNSyms; nSyms++ {
+ if z.ftab[z.primLen+1][nCodes][nSyms] {
+ z.visit(z.primLen+1, nCodes, nSyms, 0, 0)
+ }
+ }
+ }
+
+ z.worstCase.nAssignedCodes.fillPrimaryValues(z.primLen)
+ }
+
+ // Collect the details: the number of codes assigned to each length in the
+ // worst case.
+ details := ""
+ for n := uint32(1); n <= maxCodeLen; n++ {
+ if n == z.primLen+1 {
+ details += ";"
+ } else {
+ details += " "
+ }
+ details += fmt.Sprintf("%3d", z.worstCase.nAssignedCodes[n])
+ }
+
+ nPrimary := uint32(1) << z.primLen
+ nSecondary := z.worstCase.nSecondary
+ nEntries := nPrimary + nSecondary
+
+ ch <- fmt.Sprintf("primLen %2d: %5d entries = %5d prim + %4d seco%s",
+ z.primLen, nEntries, nPrimary, nSecondary, details)
+}
+
+// visit updates z.worstCase, starting with nCodes unassigned Huffman codes of
+// length codeLen for nSyms symbols. nSecondary is the total size (number of
+// entries) so far of all of the secondary tables.
+//
+// nRemain > 0 iff we have an incomplete secondary table, whose size was
+// partially accounted for in nSecondary, by (1 << (codeLen - z.primLen)). If
+// positive, nRemain is the number of unassigned entries in that incomplete
+// secondary table.
+//
+// visit can recursively call itself with longer codeLen values. As it does so,
+// it temporarily sets z.currCase.nAssignedCodes[codeLen], restoring that to
+// zero before it returns.
+func (z *state) visit(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) {
+ if z.alreadyVisited(codeLen, nCodes, nSyms, nSecondary, nRemain) {
+ // No-op.
+
+ } else if nCodes > nSyms {
+ // Infeasible: there will be unassigned codes.
+
+ } else if nCodes == nSyms {
+ // Fill in the incomplete secondary table (if present) and any new
+ // secondary tables (if necessary) to assign the nCodes codes to the
+ // nSyms symbols. At the end, we should have no remaining entries.
+ n := nCodes
+ for n > nRemain {
+ n -= nRemain
+ nRemain = 1 << (codeLen - z.primLen)
+ nSecondary += nRemain
+ }
+ if n != nRemain {
+ panic("inconsistent secondary table size")
+ }
+
+ // Update z.worstCase if we have a new record for nSecondary.
+ if z.worstCase.nSecondary < nSecondary {
+ z.worstCase.nSecondary = nSecondary
+ z.worstCase.nAssignedCodes = z.currCase.nAssignedCodes
+ z.worstCase.nAssignedCodes[codeLen] = nCodes
+ }
+
+ } else { // nCodes < nSyms.
+ codeLen1 := codeLen + 1
+ // Brute force assigning n codes of this codeLen. The other (nCodes -
+ // n) codes will have longer codes.
+ for n := uint32(0); n < nCodes; n++ {
+ nCodes1 := (nCodes - n) * 2
+ nSyms1 := nSyms - n
+
+ if nCodes1 > nSyms1 {
+ // Infeasible: there will be unassigned codes.
+
+ } else if z.ftab[codeLen1][nCodes1][nSyms1] {
+ nSecondary1 := nSecondary
+ nRemain1 := nRemain * 2
+
+ if nRemain1 > 0 {
+ // We have an incomplete secondary table, which has been
+ // partially accounted for: nSecondary has previously been
+ // incremented by X, where X equals (1 << (codeLen -
+ // z.primLen)).
+ //
+ // As we recursively call visit, with codeLen increased by
+ // 1, then we need to double the accounted size of that
+ // secondary table to 2*X. Since X out of that 2*X has
+ // already been added, we need to increment nSecondary1 by
+ // just the difference, which is also X.
+ nSecondary1 += 1 << (codeLen - z.primLen)
+ }
+
+ z.currCase.nAssignedCodes[codeLen] = n
+ z.visit(codeLen1, nCodes1, nSyms1, nSecondary1, nRemain1)
+ z.currCase.nAssignedCodes[codeLen] = 0
+ }
+
+ // Open a new secondary table if necessary: one whose size is (1 <<
+ // (codeLen - z.primLen)).
+ if nRemain == 0 {
+ nRemain = 1 << (codeLen - z.primLen)
+ nSecondary += nRemain
+ }
+
+ // Assign one of the incomplete secondary table's entries.
+ nRemain--
+ }
+ }
+}
+
+// alreadyVisited returns whether z has already visited the (nSecondary,
+// nRemain) pair. As a side effect, it also marks that pair as visited.
+func (z *state) alreadyVisited(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) bool {
+ // Use the zig re-numbering to minimize the memory requirements for the
+ // visited slice. There's an additional 8-fold memory reduction by using 1
+ // bit per element, not 1 bool (1 byte) per element.
+ i := zig(nSecondary/8, nRemain)
+ mask := uint8(1) << (nSecondary & 7)
+
+ visited := z.visited[codeLen][nCodes][nSyms]
+ if uint64(i) < uint64(len(visited)) {
+ x := visited[i]
+ visited[i] = x | mask
+ return x&mask != 0
+ }
+
+ oldDone := visited
+ visited = make(bitVector, i+1)
+ copy(visited, oldDone)
+ visited[i] = mask
+
+ z.visited[codeLen][nCodes][nSyms] = visited
+ return false
+}
+
+type bitVector []uint8
+
+// zig packs a pair of non-negative integers (i, j) into a single unique
+// non-negative integer, in a zig-zagging pattern:
+//
+// i=0 i=1 i=2 i=3 i=4 etc
+// j=0: 0 1 3 6 10 ...
+// j=1: 2 4 7 11 16 ...
+// j=2: 5 8 12 17 23 ...
+// j=3: 9 13 18 24 31 ...
+// j=4: 14 19 25 32 40 ...
+// etc: ... ... ... ... ... ...
+//
+// The closed from expression relies on the fact that the sum (0 + 1 + ... + n)
+// equals ((n * (n + 1)) / 2).
+//
+// This lets us minimize the memory requirements of a triangular array: one for
+// which a[i][j] is only accessed when ((i+j) < someBound).
+func zig(i uint32, j uint32) uint32 {
+ if (i > 0x4000) || (j > 0x4000) {
+ panic("overflow")
+ }
+ n := i + j
+ return ((n * (n + 1)) / 2) + j
+}
+
+// nAssignedCodes[i] holds the number of codes of length i.
+type nAssignedCodes [maxCodeLen + 1]uint32
+
+// fillPrimaryValues fills in feasible a[n] values for n <= primLen. It assumes
+// that the caller has already filled in a[n] for n > primLen.
+func (a *nAssignedCodes) fillPrimaryValues(primLen uint32) {
+ // Looping backwards from the maxCodeLen, figure out how many unassigned
+ // primLen length codes there were.
+ //
+ // For example, if there were 10 assigned 15 length codes, then there must
+ // have been (10 / 2 = 5) unassigned 14 length codes. If there were also 7
+ // 14 length codes, then there must have been ((5 + 7) / 2 = 6) unassigned
+ // 13 length codes. Etcetera.
+ nUnassignedPrimLen := uint32(0)
+ for n := uint32(maxCodeLen); n > primLen; n-- {
+ nUnassignedPrimLen += a[n]
+ if nUnassignedPrimLen%2 != 0 {
+ panic("cannot undo Huffman code split")
+ }
+ nUnassignedPrimLen /= 2
+ }
+ if nUnassignedPrimLen < 1 {
+ panic("not enough unassigned primLen length codes")
+ }
+
+ // Pick a combination of assigning shorter length codes (i.e. setting a[n]
+ // values) to reach that nUnassignedPrimLen target. There are many possible
+ // ways to do so, but only one unique way with the additional constraint
+ // that each value is either 0 or 1: unique because every positive number
+ // has a unique binary representation.
+ //
+ // For example, suppose we need to have 12 unassigned primLen length codes.
+ // With that additional constraint, the way to do this is:
+ //
+ // - 12 unassigned code of length primLen-0.
+ // - 6 unassigned code of length primLen-1.
+ // - 3 unassigned code of length primLen-2.
+ // - 2 unassigned code of length primLen-3.
+ // - 1 unassigned code of length primLen-4.
+ // - 1 unassigned code of length primLen-5.
+ // - 1 unassigned code of length primLen-6.
+ // - etc
+ //
+ // Each left hand column value is either ((2*b)-0) or ((2*b)-1), where b is
+ // the the number below. Subtracting 0 or 1 means that we assign 0 or 1
+ // codes of that row's length:
+ //
+ // - 0 assigned code of length primLen-0.
+ // - 0 assigned code of length primLen-1.
+ // - 1 assigned code of length primLen-2.
+ // - 0 assigned code of length primLen-3.
+ // - 1 assigned code of length primLen-4.
+ // - 1 assigned code of length primLen-5.
+ // - 1 assigned code of length primLen-6.
+ // - etc
+ //
+ // Reading upwards, this is the binary string "...1110100", which is the
+ // two's complement representation of the decimal number -12: the negative
+ // of nUnassignedPrimLen.
+ bits := -int32(nUnassignedPrimLen)
+ for n := primLen; n >= 1; n-- {
+ a[n] = uint32(bits & 1)
+ bits >>= 1
+ }
+ if bits != -1 {
+ panic("did not finish with one unassigned zero-length code")
+ }
+
+ // As a sanity check of the "negative two's complement" theory, loop
+ // forwards to calculate the number of unassigned primLen length codes,
+ // which should match nUnassignedPrimLen.
+ nUnassigned := uint32(1)
+ for n := uint32(1); n <= primLen; n++ {
+ nUnassigned *= 2
+ if nUnassigned <= a[n] {
+ panic("not enough unassigned codes")
+ }
+ nUnassigned -= a[n]
+ }
+ if nUnassigned != nUnassignedPrimLen {
+ panic("inconsistent unassigned codes")
+ }
+}
+
+// feasibleTable[codeLen][nCodes][nSyms] is whether it is feasible to assign
+// Huffman codes (of length at least codeLen) to nSyms symbols, given nCodes
+// unassigned codes of length codeLen. Each unassigned code of length L can be
+// split into 2 codes of length L+1, 4 codes of length L+2, etc, up to a
+// maximum code length of maxCodeLen. A feasible assignment ends with zero
+// unassigned codes, no more and no less.
+type feasibleTable [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bool
+
+func (f *feasibleTable) count() (ret uint64) {
+ for i := range *f {
+ for j := range f[i] {
+ for k := range f[i][j] {
+ if f[i][j][k] {
+ ret++
+ }
+ }
+ }
+ }
+ return ret
+}
+
+func (f *feasibleTable) build(maxNSyms uint32) {
+ for nSyms := uint32(2); nSyms <= maxNSyms; nSyms++ {
+ if f.buildMore(1, 2, nSyms) != bmResultOK {
+ panic("infeasible [codeLen][nCodes][nSyms] combination")
+ }
+ }
+}
+
+const (
+ bmResultOK = 0
+ bmResultUseFewerCodes = 1
+ bmResultUseMoreCodes = 2
+ bmResultUnsatisfiable = 3
+)
+
+func (f *feasibleTable) buildMore(codeLen uint32, nCodes uint32, nSyms uint32) uint32 {
+ if nCodes == nSyms {
+ if nCodes%2 == 1 {
+ panic("odd nCodes declared feasible")
+ }
+ // This is trivial: assign every symbol a code of length codeLen.
+ f[codeLen][nCodes][nSyms] = true
+ return bmResultOK
+ } else if nCodes > nSyms {
+ // Infeasible: there will be unassigned codes.
+ return bmResultUseMoreCodes
+ }
+ // From here onwards, we have (nCodes < nSyms).
+
+ if (nCodes << (maxCodeLen - codeLen)) < nSyms {
+ // Infeasible: there will be unassigned symbols, even if we extend the
+ // codes out to maxCodeLen.
+ return bmResultUseFewerCodes
+ }
+
+ // If we've already visited this [codeLen][nCodes][nSyms] combination,
+ // there's no need to re-do the computation.
+ if f[codeLen][nCodes][nSyms] {
+ return bmResultOK
+ }
+
+ // Try assigning n out of the nCodes codes 1-to-1 to a symbol, remembering
+ // that we have checked above that (nCodes < nSyms). The remaining (nCodes
+ // - n) codes are lengthened by 1, doubling the number of them, and try to
+ // assign those longer codes to the remaining (nSyms - n) symbols.
+ ok := false
+ codeLen1 := codeLen + 1
+ for n := uint32(0); n < nCodes; n++ {
+ nCodes1 := (nCodes - n) * 2
+ nSyms1 := nSyms - n
+
+ if x := f.buildMore(codeLen1, nCodes1, nSyms1); x == bmResultOK {
+ ok = true
+ } else if x == bmResultUseFewerCodes {
+ // This value of n didn't succeed, but also any larger n also won't
+ // succeed, so we can break the loop early.
+ break
+ }
+ }
+
+ if !ok {
+ return bmResultUnsatisfiable
+ }
+ if nCodes%2 == 1 {
+ panic("odd nCodes declared feasible")
+ }
+ f[codeLen][nCodes][nSyms] = true
+ return bmResultOK
+}
diff --git a/std/deflate/decode_deflate.wuffs b/std/deflate/decode_deflate.wuffs
index 2b233bb..44a82dc 100644
--- a/std/deflate/decode_deflate.wuffs
+++ b/std/deflate/decode_deflate.wuffs
@@ -57,8 +57,11 @@
0x401000B0, 0x401800B0, 0x402000C0, 0x403000C0, 0x404000D0, 0x406000D0, 0x08000000, 0x08000000,
]
-// TODO: replace the magic "big enough" 1024 with something more principled,
-// perhaps discovered via an exhaustive search.
+// huffs_table_size is the smallest power of 2 that is greater than or equal to
+// the worst-case size of the Huffman tables. See
+// script/print-deflate-huff-table-size.go which calculates that, for a 9-bit
+// primary table, the worst-case size is 852 for the Lit/Len table and 592 for
+// the Distance table.
pri const huffs_table_size base.u32 = 1024
pri const huffs_table_mask base.u32 = 1023
diff --git a/test/c/std/deflate.c b/test/c/std/deflate.c
index 2be529b..9d6217c 100644
--- a/test/c/std/deflate.c
+++ b/test/c/std/deflate.c
@@ -118,6 +118,15 @@
"deflate-distance-code-31.deflate",
};
+golden_test deflate_deflate_huffman_primlen_9_gt = {
+ .want_filename =
+ "test/data/artificial/"
+ "deflate-huffman-primlen-9.deflate.decompressed",
+ .src_filename =
+ "test/data/artificial/"
+ "deflate-huffman-primlen-9.deflate",
+};
+
golden_test deflate_midsummer_gt = {
.want_filename = "test/data/midsummer.txt", //
.src_filename = "test/data/midsummer.txt.gz", //
@@ -217,6 +226,63 @@
return NULL;
}
+const char* test_wuffs_deflate_decode_deflate_huffman_primlen_9() {
+ CHECK_FOCUS(__func__);
+
+ // First, treat this like any other compare-to-golden test.
+ const char* status = do_test_io_buffers(wuffs_deflate_decode,
+ &deflate_deflate_huffman_primlen_9_gt,
+ UINT64_MAX, UINT64_MAX);
+ if (status) {
+ return status;
+ }
+
+ // Second, check that the decoder's huffman table sizes match those predicted
+ // by the script/print-deflate-huff-table-size.go program.
+ wuffs_base__io_buffer src = ((wuffs_base__io_buffer){
+ .data = global_src_slice,
+ });
+ wuffs_base__io_buffer got = ((wuffs_base__io_buffer){
+ .data = global_got_slice,
+ });
+
+ golden_test* gt = &deflate_deflate_huffman_primlen_9_gt;
+ status = read_file(&src, gt->src_filename);
+ if (status) {
+ return status;
+ }
+
+ wuffs_deflate__decoder dec;
+ status = wuffs_deflate__decoder__initialize(
+ &dec, sizeof dec, WUFFS_VERSION, WUFFS_INITIALIZE__DEFAULT_OPTIONS);
+ if (status) {
+ RETURN_FAIL("initialize: \"%s\"", status);
+ }
+ status = wuffs_deflate__decoder__decode_io_writer(&dec, &got, &src,
+ global_work_slice);
+ if (status) {
+ RETURN_FAIL("decode_io_writer: \"%s\"", status);
+ }
+
+ int i;
+ for (i = 0; i < 2; i++) {
+ // Find the first unused (i.e. zero) entry in the i'th huffs table.
+ int got = wuffs_deflate__huffs_table_size;
+ while ((got > 0) && (dec.private_data.f_huffs[i][got - 1] == 0)) {
+ got--;
+ }
+
+ // See script/print-deflate-huff-table-size.go with primLen = 9 for how
+ // these expected values are derived.
+ int want = (i == 0) ? 852 : 592;
+ if (got != want) {
+ RETURN_FAIL("i=%d: got %d, want %d", i, got, want);
+ }
+ }
+
+ return NULL;
+}
+
const char* test_wuffs_deflate_decode_midsummer() {
CHECK_FOCUS(__func__);
return do_test_io_buffers(wuffs_deflate_decode, &deflate_midsummer_gt,
@@ -682,6 +748,13 @@
return NULL;
}
+const char* test_mimic_deflate_decode_deflate_huffman_primlen_9() {
+ CHECK_FOCUS(__func__);
+ return do_test_io_buffers(mimic_deflate_decode,
+ &deflate_deflate_huffman_primlen_9_gt, UINT64_MAX,
+ UINT64_MAX);
+}
+
const char* test_mimic_deflate_decode_midsummer() {
CHECK_FOCUS(__func__);
return do_test_io_buffers(mimic_deflate_decode, &deflate_midsummer_gt,
@@ -803,6 +876,7 @@
test_wuffs_deflate_decode_deflate_degenerate_huffman_unused, //
test_wuffs_deflate_decode_deflate_distance_32768, //
test_wuffs_deflate_decode_deflate_distance_code_31, //
+ test_wuffs_deflate_decode_deflate_huffman_primlen_9, //
test_wuffs_deflate_decode_midsummer, //
test_wuffs_deflate_decode_pi_just_one_read, //
test_wuffs_deflate_decode_pi_many_big_reads, //
@@ -822,6 +896,7 @@
test_mimic_deflate_decode_deflate_degenerate_huffman_unused, //
test_mimic_deflate_decode_deflate_distance_32768, //
test_mimic_deflate_decode_deflate_distance_code_31, //
+ test_mimic_deflate_decode_deflate_huffman_primlen_9, //
test_mimic_deflate_decode_midsummer, //
test_mimic_deflate_decode_pi_just_one_read, //
test_mimic_deflate_decode_pi_many_big_reads, //
diff --git a/test/data/artificial/deflate-degenerate-huffman-unused.deflate.make-artificial.txt b/test/data/artificial/deflate-degenerate-huffman-unused.deflate.make-artificial.txt
index ba90dd8..841a4c8 100644
--- a/test/data/artificial/deflate-degenerate-huffman-unused.deflate.make-artificial.txt
+++ b/test/data/artificial/deflate-degenerate-huffman-unused.deflate.make-artificial.txt
@@ -3,10 +3,6 @@
make deflate
blockDynamicHuffman (final) {
- numLCodes 257
- numDCodes 1
- numCLCodeLengths 18
-
huffman CodeLength {
1 00
2 01
diff --git a/test/data/artificial/deflate-huffman-primlen-9.deflate b/test/data/artificial/deflate-huffman-primlen-9.deflate
new file mode 100644
index 0000000..6e0e3a6
--- /dev/null
+++ b/test/data/artificial/deflate-huffman-primlen-9.deflate
@@ -0,0 +1 @@
+íým$I$I#quwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww÷ÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌì=WwwwwÏÌÌÌÌÞóÜàú·ù³þ
\ No newline at end of file
diff --git a/test/data/artificial/deflate-huffman-primlen-9.deflate.commentary.txt b/test/data/artificial/deflate-huffman-primlen-9.deflate.commentary.txt
new file mode 100644
index 0000000..88db6ae
--- /dev/null
+++ b/test/data/artificial/deflate-huffman-primlen-9.deflate.commentary.txt
@@ -0,0 +1,112 @@
+Running deflate-huffman-primlen-9.deflate through script/print-bits.go and
+adding commentary:
+
+ offset xoffset ASCII hex binary
+ 000000 0x0000 . 0xED 0b_...._.101 Dynamic Huffman block, final
+ 000000 0x0000 . 0xED 0b_1110_1... NumLCodes: 286
+ 000001 0x0001 . 0xFD 0b_...1_1101 NumDCodes: 30
+ 000001 0x0001 . 0xFD 0b_111._.... NumCLCodeLengths: 19
+ 000002 0x0002 m 0x6D 0b_...._...1
+
+Decode the H-CL Huffman table (NumCLCodeLengths = 18). Recall the peculiar
+code_order: 16, 17, 18, 0, 8, ..., 2, 14, 1, 15:
+
+ 000002 0x0002 m 0x6D 0b_0110_110. CLCodeLengths: 19 x 3 bits
+ 000003 0x0003 . 0x93 0b_1001_0011 CLCLs[ 0] is 4
+ 000004 0x0004 $ 0x24 0b_0010_0100 CLCLs[...] is 4
+ 000005 0x0005 I 0x49 0b_0100_1001 CLCLs[ 14] is 4
+ 000006 0x0006 . 0x92 0b_1001_0010 CLCLs[ 15] is 6
+ 000007 0x0007 $ 0x24 0b_0010_0100 CLCLs[ 16] is 6
+ 000008 0x0008 I 0x49 0b_0100_1001 CLCLs[ 17] is 6
+ 000009 0x0009 # 0x23 0b_...._..11 CLCLs[ 18] is 6
+
+The H-CL Huffman table is:
+"0000" -> CodeLength=0
+"0001" -> CodeLength=1
+"0010" -> CodeLength=2
+etc
+"1110" -> CodeLength=14
+"111100" -> CodeLength=15
+"111101" -> CodeLength=16 which means run-length encoding
+"111110" -> CodeLength=17 which means a block of ( 3 + 3_extra_bits) zeroes
+"111111" -> CodeLength=18 which means a block of (11 + 7_extra_bits) zeroes
+
+Decode the H-L Huffman table (NumLCodes = 286):
+
+ 000009 0x0009 # 0x23 0b_..10_00.. "0001" is CL=1
+ 000009 0x0009 # 0x23 0b_00.._.... "0010" is CL=2
+ 000010 0x000A q 0x71 0b_...._..01
+ 000010 0x000A q 0x71 0b_..11_00.. "0011" is CL=3
+ 000010 0x000A q 0x71 0b_01.._....
+ 000011 0x000B u 0x75 0b_...._..01 "1010" is CL=10
+ 000011 0x000B u 0x75 0b_..11_01.. "1011" is CL=11
+ 000011 0x000B u 0x75 0b_01.._.... "1011" is CL=11
+ 000012 0x000C w 0x77 0b_...._..11
+ 000012 0x000C w 0x77 0b_..11_01.. "1011" is CL=11
+ etc
+ 000149 0x0095 . 0xCC 0b_..00_11.. "1100" is CL=12
+ 000149 0x0095 . 0xCC 0b_11.._.... "1100" is CL=12
+ 000150 0x0096 . 0xEC 0b_...._..00
+ 000150 0x0096 . 0xEC 0b_..10_11.. "1101" is CL=13
+ 000150 0x0096 . 0xEC 0b_11.._.... "1110" is CL=14
+ 000151 0x0097 = 0x3D 0b_...._..01
+ 000151 0x0097 = 0x3D 0b_0011_11.. "111100" is CL=15
+ 000152 0x0098 . 0x0F 0b_..00_1111 "111100" is CL=15
+
+The H-L Huffman table is:
+"0" -> '\x00'
+"10" -> '\x01'
+"110" -> '\x02'
+"1110000000" -> '\x03'
+"11100000010" -> '\x04'
+etc
+"11101011111" -> '\x61' aka 'a' aka 97
+"11101100000" -> '\x62' aka 'b' aka 98
+etc
+"111111100101" -> Lit/Len code 256 (EOB)
+"111111100110" -> Lit/Len code 257 (Length = 3)
+etc
+"111111111111111" -> Lit/Len code 285 (Length = 258)
+
+Decode the H-D Huffman table (NumDCodes = 30):
+
+ 000152 0x0098 . 0x0F 0b_00.._.... "0001" is CL=1
+ 000153 0x0099 . 0x12 0b_...._..10
+ 000153 0x0099 . 0x12 0b_..01_00.. "0010" is CL=2
+ 000153 0x0099 . 0x12 0b_00.._....
+ 000154 0x009A . 0x8B 0b_...._..11 "0011" is CL=3
+ 000154 0x009A . 0x8B 0b_..00_10.. "0100" is CL=4
+ etc
+ 000165 0x00A5 . 0xCC 0b_..00_11.. "1100" is CL=12
+ 000165 0x00A5 . 0xCC 0b_11.._.... "1101" is CL=13
+ 000166 0x00A6 . 0xDE 0b_...._..10
+ 000166 0x00A6 . 0xDE 0b_..01_11.. "1110" is CL=14
+ 000166 0x00A6 . 0xDE 0b_11.._.... "111100" is CL=15
+ 000167 0x00A7 . 0xF3 0b_...._0011
+ 000167 0x00A7 . 0xF3 0b_1111_.... "111100" is CL=15
+ 000168 0x00A8 . 0xDC 0b_...._..00
+
+The H-D Huffman table is:
+"0" -> Distance code 0 (Distance = 1)
+"10" -> Distance code 1 (Distance = 2)
+"110" -> Distance code 2 (Distance = 3)
+etc
+"11111110010" -> Distance code 8 (Distance = 17 + 3_extra_bits)
+etc
+"111111111111111" -> Distance code 29 (Distance = 24577 + 13_extra_bits)
+
+Apply H-L and H-D. The length=3, distance=2 back-reference decodes to "ana", so
+the overall decoding is "banana".
+
+ 000168 0x00A8 . 0xDC 0b_1101_11.. lcode: 98 literal 'b'
+ 000169 0x00A9 . 0xE0 0b_...0_0000
+ 000169 0x00A9 . 0xE0 0b_111._.... lcode: 97 literal 'a'
+ 000170 0x00AA . 0xFA 0b_1111_1010
+ 000171 0x00AB . 0xB7 0b_1011_0111 lcode: 110 literal 'n'
+ 000172 0x00AC . 0xF9 0b_...._.001
+ 000172 0x00AC . 0xF9 0b_1111_1... lcode: 256 length=3
+ 000173 0x00AD . 0xB3 0b_.011_0011
+ 000173 0x00AD . 0xB3 0b_1..._.... dcode: 1 distance=2
+ 000174 0x00AE . 0xFE 0b_...._...0
+ 000174 0x00AE . 0xFE 0b_1111_111. lcode: 256 end of block
+ 000175 0x00AF . 0x14 0b_...1_0100
diff --git a/test/data/artificial/deflate-huffman-primlen-9.deflate.decompressed b/test/data/artificial/deflate-huffman-primlen-9.deflate.decompressed
new file mode 100644
index 0000000..570a66f
--- /dev/null
+++ b/test/data/artificial/deflate-huffman-primlen-9.deflate.decompressed
@@ -0,0 +1 @@
+banana
\ No newline at end of file
diff --git a/test/data/artificial/deflate-huffman-primlen-9.deflate.make-artificial.txt b/test/data/artificial/deflate-huffman-primlen-9.deflate.make-artificial.txt
new file mode 100644
index 0000000..e4d4757
--- /dev/null
+++ b/test/data/artificial/deflate-huffman-primlen-9.deflate.make-artificial.txt
@@ -0,0 +1,93 @@
+# Feed this file to script/make-artificial.go
+
+make deflate
+
+blockDynamicHuffman (final) {
+ huffman CodeLength {
+ # 15 codes of length 4.
+ 0 0000
+ 1 0001
+ etcetera
+ 13 1101
+ 14 1110
+
+ # 4 codes of length 6.
+ 15 111100
+ 16 111101
+ 17 111110
+ 18 111111
+ }
+
+ # script/print-deflate-huff-table-size.go prints:
+ # primLen 9: 852 entries = 512 prim + 340 seco
+ # @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
+ # 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2
+ huffman Length/Literal {
+ 0 0
+ 1 10
+ 2 110
+ 3 1110000000
+
+ # 229 codes of length 11.
+ 4 11100000010
+ 5 11100000011
+ etcetera
+ 231 11111100101
+ 232 11111100110
+
+ # 49 codes of length 12.
+ 233 111111001110
+ 234 111111001111
+ etcetera
+ 256 111111100101
+ 257 111111100110
+ etcetera
+ 280 111111111101
+ 281 111111111110
+
+ # 1+1+2 = 4 codes of length 13+.
+ 282 1111111111110
+ 283 11111111111110
+ 284 111111111111110
+ 285 111111111111111
+ }
+
+ # script/print-deflate-huff-table-size.go prints:
+ # primLen 9: 592 entries = 512 prim + 80 seco
+ # @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
+ # 1 1 1 1 1 1 1 0 0; 1 9 9 1 1 2
+ huffman Distance {
+ 0 0
+ 1 10
+ 2 110
+ 3 1110
+ 4 11110
+ 5 111110
+ 6 1111110
+ 7 1111111000
+
+ # 9 codes of length 11.
+ 8 11111110010
+ 9 11111110011
+ etcetera
+ 15 11111111001
+ 16 11111111010
+
+ # 9 codes of length 12.
+ 17 111111110110
+ 18 111111110111
+ etcetera
+ 24 111111111101
+ 25 111111111110
+
+ # 1+1+2 = 4 codes of length 13+.
+ 26 1111111111110
+ 27 11111111111110
+ 28 111111111111110
+ 29 111111111111111
+ }
+
+ literal "ban"
+ len 3 dist 2
+ endOfBlock
+}