script/print-deflate-huff-table-size.go - external/github.com/google/wuffs - Git at Google

 // Copyright 2019 The Wuffs Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // +build ignore

 package main

 // print-deflate-huff-table-size.go prints the worst-case number of elements in
 // the std/deflate decoder's huffs tables.
 //
 // Usage: go run print-deflate-huff-table-size.go
 //
 // This program might take tens of seconds to run.
 //
 // It is algorithmically similar to examples/enough.c in the zlib C library.
 // Much of the initial commentary in that file is also relevant for this
 // program. Not all of it applies, though. For example, the Wuffs decoder
 // implementation's primary (root) table length is fixed at compile time. It
 // does not grow that at run-time, even if the shortest Huffman code is longer
 // than that primary length.
 //
 // This program should print:
 //
 // ----------------
 // --------  Lit/Len (up to 286 symbols, 149142 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15
 // primLen  3:   4376  entries =     8 prim + 4368 seco   1   0   0;  1   1   1  17   1   1 257   1   1   1   1   2
 // primLen  4:   2338  entries =    16 prim + 2322 seco   0   0   0   0;  3   1   1 177  97   1   1   1   1   1   2
 // primLen  5:   1330  entries =    32 prim + 1298 seco   1   0   0   0   0;  3   1   1 177  97   1   1   1   1   2
 // primLen  6:    852  entries =    64 prim +  788 seco   0   0   0   0   0   0;  1 229  49   1   1   1   1   1   2
 // primLen  7:    660  entries =   128 prim +  532 seco   1   0   0   0   0   0   0;  1 229  49   1   1   1   1   2
 // primLen  8:    660  entries =   256 prim +  404 seco   1   1   0   0   0   0   0   0;  1 229  49   1   1   1   2
 // primLen  9:    852  entries =   512 prim +  340 seco   1   1   1   0   0   0   0   0   0;  1 229  49   1   1   2
 // primLen 10:   1332  entries =  1024 prim +  308 seco   1   1   1   1   0   0   0   0   0   0;  1 229  49   1   2
 // primLen 11:   2340  entries =  2048 prim +  292 seco   1   1   1   1   1   0   0   0   0   0   0;  1 229  49   2
 // primLen 12:   4380  entries =  4096 prim +  284 seco   1   1   1   1   1   1   0   0   0   0   0   0;  1 229  50
 // primLen 13:   8472  entries =  8192 prim +  280 seco   1   1   1   1   1   1   0   1   0   0   0   0   0;105 174
 // primLen 14:  16658  entries = 16384 prim +  274 seco   1   1   1   1   1   1   0   1   1   1   0   1   1   1;274
 // primLen 15:  32768  entries = 32768 prim +    0 seco   2   0   0   0   0   0   0   0   0   0   0   0   0   0   0
 //
 // -------- Distance (up to  30 symbols,   1666 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15
 // primLen  3:   4120  entries =     8 prim + 4112 seco   1   0   0;  1   9   9   1   1   1   1   1   1   1   1   2
 // primLen  4:   2080  entries =    16 prim + 2064 seco   1   1   0   0;  1   9   9   1   1   1   1   1   1   1   2
 // primLen  5:   1072  entries =    32 prim + 1040 seco   1   1   1   0   0;  1   9   9   1   1   1   1   1   1   2
 // primLen  6:    592  entries =    64 prim +  528 seco   1   1   1   1   0   0;  1   9   9   1   1   1   1   1   2
 // primLen  7:    400  entries =   128 prim +  272 seco   1   1   1   1   1   0   0;  1   9   9   1   1   1   1   2
 // primLen  8:    400  entries =   256 prim +  144 seco   1   1   1   1   1   1   0   0;  1   9   9   1   1   1   2
 // primLen  9:    592  entries =   512 prim +   80 seco   1   1   1   1   1   1   1   0   0;  1   9   9   1   1   2
 // primLen 10:   1072  entries =  1024 prim +   48 seco   1   1   1   1   1   1   1   1   0   0;  1   9   9   1   2
 // primLen 11:   2080  entries =  2048 prim +   32 seco   1   1   1   1   1   1   1   1   1   0   0;  1   9   9   2
 // primLen 12:   4120  entries =  4096 prim +   24 seco   1   1   1   1   1   1   1   1   1   1   0   0;  1   9  10
 // primLen 13:   8212  entries =  8192 prim +   20 seco   1   1   1   1   1   1   1   1   1   1   0   1   0;  5  14
 // primLen 14:  16400  entries = 16384 prim +   16 seco   1   1   1   1   1   1   1   1   1   1   1   0   0   0; 16
 // primLen 15:  32768  entries = 32768 prim +    0 seco   2   0   0   0   0   0   0   0   0   0   0   0   0   0   0
 // ----------------
 //
 // Deflate's canonical Huffman codes are up to 15 bits long, but the decoder
 // can use fewer than (1 << 15) entries in its look-up table, by splitting it
 // into one primary table (of an at-compile-time fixed length <= 15) and zero
 // or more secondary tables (of variable lengths, determined at-run-time).
 //
 // Symbols whose Huffman codes' length are less than or equal to the primary
 // table length do not require any secondary tables. Longer codes are grouped
 // by their primLen length prefix. Each group occupies one entry in the primary
 // table, which redirects to a secondary table of size (1 << (N - primLen)),
 // where N is the length of the longest code in that group.
 //
 // This program calculates that, for a primary table length of 9 bits, the
 // worst case Huffman table size is 852 for the Literal/Length table (852 being
 // a 512 entry primary table and the secondary tables totalling 340 further
 // entries) and 592 (512 + 80) for the Distance table.
 //
 // Copy/pasting one row of that output (and the column headers):
 //
 // --------  Lit/Len (up to 286 symbols, 149142 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15
 // primLen  9:    852  entries =   512 prim +  340 seco   1   1   1   0   0   0   0   0   0;  1 229  49   1   1   2
 //
 // The "@1, @2, @3, ..." columns mean that one combination that hits that 852
 // worst case is having 1 1-length code, 1 2-length code, 1 3-length code, 0
 // 4-length codes, ..., 1 10-length code, 229 11-length codes, etc. There are a
 // total of 286 (1 + 1 + 1 + 0 + ... + 49 + 1 + 1 + 2) codes, for 286 symbols.
 //
 // See test/data/artificial/deflate-huffman-primlen-9.deflate* for an actual
 // example of valid Deflate-formatted data that exercises these worst-case code
 // lengths (for a primary table length of 9 bits).
 //
 // Hypothetically, suppose that we used a primLen of 13 for the Distance table.
 // Here is a code length assignment that produces 20 secondary table entries:
 //
 // -------- Distance (up to  30 symbols,   1666 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15
 // primLen 13:   8212  entries =  8192 prim +   20 seco   1   1   1   1   1   1   1   1   1   1   0   1   0;  5  14
 //
 // In detail: call the 14 length codes "e0", "e1", ..., "e4" and the 15 length
 // codes "f00", "f01", "f02", ..., "f13".
 //
 // Recall that secondary tables' lengths are (1 << (N - primLen)), where N is
 // the longest code for a given primLen length prefix. In this case, (N -
 // primLen) is (15 - 13) if the secondary table contains a "f??" code,
 // otherwise it N is (14 - 13). When the secondary table contains a mixture of
 // lengths, some of the shorter codes' entries are duplicated.
 //
 // Also, for canonical Huffman codes, the codes (bitstrings) for 14-length
 // codes are all assigned (sequentially) before any 15-length codes. There are
 // therefore 6 secondary tables:
 //
 //  - length 2: e0,  e1.
 //  - length 2: e2,  e3.
 //  - length 4: e4,  e4,  f00, f01.
 //  - length 4: f02, f03, f04, f05.
 //  - length 4: f06, f07, f08, f09.
 //  - length 4: f10, f11, f12, f13.
 //
 // The total size of the secondary tables is (2 + 2 + 4 + 4 + 4 + 4) = 20.

 import (
 	"fmt"
 	"os"
 	"sort"
 )

 const (
 	maxMaxNSyms = 286 // RFC 1951 lists 286 literal/length codes and 30 distance codes.
 	maxCodeLen  = 15  // Each code's encoding is at most 15 bits.
 )

 func main() {
 	if err := main1(); err != nil {
 		os.Stderr.WriteString(err.Error() + "\n")
 		os.Exit(1)
 	}
 }

 func main1() error {
 	do("Lit/Len", 286)
 	do("Distance", 30)
 	return nil
 }

 func do(name string, maxNSyms uint32) {
 	ftab := &feasibleTable{}
 	ftab.build(maxNSyms)

 	// We can do the per-primLen computations concurrently, speeding up the
 	// overall wall time taken.
 	ch := make(chan string)
 	numPending := 0
 	for primLen := uint32(3); primLen <= maxCodeLen; primLen++ {
 		go doPrimLen(ch, maxNSyms, ftab, primLen)
 		numPending++
 	}

 	// Gather, sort and print the results for each primLen.
 	results := make([]string, 0, numPending)
 	for ; numPending > 0; numPending-- {
 		results = append(results, <-ch)
 	}
 	sort.Strings(results)
 	fmt.Printf("\n-------- %8s (up to %3d symbols, %6d combos)"+
 		"  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15\n",
 		name, maxNSyms, ftab.count())
 	for _, s := range results {
 		fmt.Println(s)
 	}
 }

 type state struct {
 	ftab    *feasibleTable
 	primLen uint32

 	currCase struct {
 		nAssignedCodes nAssignedCodes
 	}

 	worstCase struct {
 		nAssignedCodes nAssignedCodes
 		nSecondary     uint32
 	}

 	visited [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bitVector
 }

 func doPrimLen(ch chan<- string, maxNSyms uint32, ftab *feasibleTable, primLen uint32) {
 	z := &state{
 		ftab:    ftab,
 		primLen: primLen,
 	}

 	if z.primLen > maxCodeLen {
 		panic("unreachable")

 	} else if z.primLen == maxCodeLen {
 		// There's only the primary table and no secondary tables. For an
 		// example that fills that primary table, create a trivial encoding of
 		// two symbols: one symbol's code is "0", the other's is "1".
 		z.worstCase.nAssignedCodes[1] = 2
 		for n := uint32(2); n <= maxCodeLen; n++ {
 			z.worstCase.nAssignedCodes[n] = 0
 		}

 	} else {
 		// Brute force search the tree of possible code length assignments.
 		//
 		// nCodes is always even: the number of unassigned Huffman codes at
 		// length L is twice the number at length (L-1). At the start, there
 		// are two unassigned codes of length 1: "0" and "1".
 		//
 		// nSyms starts at nCodes, since (nCodes > nSyms) is infeasible: there
 		// will be unassigned codes.
 		for nCodes := uint32(2); nCodes <= maxNSyms; nCodes += 2 {
 			for nSyms := nCodes; nSyms <= maxNSyms; nSyms++ {
 				if z.ftab[z.primLen+1][nCodes][nSyms] {
 					z.visit(z.primLen+1, nCodes, nSyms, 0, 0)
 				}
 			}
 		}

 		z.worstCase.nAssignedCodes.fillPrimaryValues(z.primLen)
 	}

 	// Collect the details: the number of codes assigned to each length in the
 	// worst case.
 	details := ""
 	for n := uint32(1); n <= maxCodeLen; n++ {
 		if n == z.primLen+1 {
 			details += ";"
 		} else {
 			details += " "
 		}
 		details += fmt.Sprintf("%3d", z.worstCase.nAssignedCodes[n])
 	}

 	nPrimary := uint32(1) << z.primLen
 	nSecondary := z.worstCase.nSecondary
 	nEntries := nPrimary + nSecondary

 	ch <- fmt.Sprintf("primLen %2d:  %5d  entries = %5d prim + %4d seco%s",
 		z.primLen, nEntries, nPrimary, nSecondary, details)
 }

 // visit updates z.worstCase, starting with nCodes unassigned Huffman codes of
 // length codeLen for nSyms symbols. nSecondary is the total size (number of
 // entries) so far of all of the secondary tables.
 //
 // nRemain > 0 iff we have an incomplete secondary table, whose size was
 // partially accounted for in nSecondary, by (1 << (codeLen - z.primLen)). If
 // positive, nRemain is the number of unassigned entries in that incomplete
 // secondary table.
 //
 // visit can recursively call itself with longer codeLen values. As it does so,
 // it temporarily sets z.currCase.nAssignedCodes[codeLen], restoring that to
 // zero before it returns.
 func (z *state) visit(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) {
 	if z.alreadyVisited(codeLen, nCodes, nSyms, nSecondary, nRemain) {
 		// No-op.

 	} else if nCodes > nSyms {
 		// Infeasible: there will be unassigned codes.

 	} else if nCodes == nSyms {
 		// Fill in the incomplete secondary table (if present) and any new
 		// secondary tables (if necessary) to assign the nCodes codes to the
 		// nSyms symbols. At the end, we should have no remaining entries.
 		n := nCodes
 		for n > nRemain {
 			n -= nRemain
 			nRemain = 1 << (codeLen - z.primLen)
 			nSecondary += nRemain
 		}
 		if n != nRemain {
 			panic("inconsistent secondary table size")
 		}

 		// Update z.worstCase if we have a new record for nSecondary.
 		if z.worstCase.nSecondary < nSecondary {
 			z.worstCase.nSecondary = nSecondary
 			z.worstCase.nAssignedCodes = z.currCase.nAssignedCodes
 			z.worstCase.nAssignedCodes[codeLen] = nCodes
 		}

 	} else { // nCodes < nSyms.
 		codeLen1 := codeLen + 1
 		// Brute force assigning n codes of this codeLen. The other (nCodes -
 		// n) codes will have longer codes.
 		for n := uint32(0); n < nCodes; n++ {
 			nCodes1 := (nCodes - n) * 2
 			nSyms1 := nSyms - n

 			if nCodes1 > nSyms1 {
 				// Infeasible: there will be unassigned codes.

 			} else if z.ftab[codeLen1][nCodes1][nSyms1] {
 				nSecondary1 := nSecondary
 				nRemain1 := nRemain * 2

 				if nRemain1 > 0 {
 					// We have an incomplete secondary table, which has been
 					// partially accounted for: nSecondary has previously been
 					// incremented by X, where X equals (1 << (codeLen -
 					// z.primLen)).
 					//
 					// As we recursively call visit, with codeLen increased by
 					// 1, then we need to double the accounted size of that
 					// secondary table to 2*X. Since X out of that 2*X has
 					// already been added, we need to increment nSecondary1 by
 					// just the difference, which is also X.
 					nSecondary1 += 1 << (codeLen - z.primLen)
 				}

 				z.currCase.nAssignedCodes[codeLen] = n
 				z.visit(codeLen1, nCodes1, nSyms1, nSecondary1, nRemain1)
 				z.currCase.nAssignedCodes[codeLen] = 0
 			}

 			// Open a new secondary table if necessary: one whose size is (1 <<
 			// (codeLen - z.primLen)).
 			if nRemain == 0 {
 				nRemain = 1 << (codeLen - z.primLen)
 				nSecondary += nRemain
 			}

 			// Assign one of the incomplete secondary table's entries.
 			nRemain--
 		}
 	}
 }

 // alreadyVisited returns whether z has already visited the (nSecondary,
 // nRemain) pair. As a side effect, it also marks that pair as visited.
 func (z *state) alreadyVisited(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) bool {
 	// Use the zig re-numbering to minimize the memory requirements for the
 	// visited slice. There's an additional 8-fold memory reduction by using 1
 	// bit per element, not 1 bool (1 byte) per element.
 	i := zig(nSecondary/8, nRemain)
 	mask := uint8(1) << (nSecondary & 7)

 	visited := z.visited[codeLen][nCodes][nSyms]
 	if uint64(i) < uint64(len(visited)) {
 		x := visited[i]
 		visited[i] = x | mask
 		return x&mask != 0
 	}

 	oldDone := visited
 	visited = make(bitVector, i+1)
 	copy(visited, oldDone)
 	visited[i] = mask

 	z.visited[codeLen][nCodes][nSyms] = visited
 	return false
 }

 type bitVector []uint8

 // zig packs a pair of non-negative integers (i, j) into a single unique
 // non-negative integer, in a zig-zagging pattern:
 //
 //      i=0 i=1 i=2 i=3 i=4 etc
 // j=0:   0   1   3   6  10 ...
 // j=1:   2   4   7  11  16 ...
 // j=2:   5   8  12  17  23 ...
 // j=3:   9  13  18  24  31 ...
 // j=4:  14  19  25  32  40 ...
 // etc: ... ... ... ... ... ...
 //
 // The closed from expression relies on the fact that the sum (0 + 1 + ... + n)
 // equals ((n * (n + 1)) / 2).
 //
 // This lets us minimize the memory requirements of a triangular array: one for
 // which a[i][j] is only accessed when ((i+j) < someBound).
 func zig(i uint32, j uint32) uint32 {
 	if (i > 0x4000) || (j > 0x4000) {
 		panic("overflow")
 	}
 	n := i + j
 	return ((n * (n + 1)) / 2) + j
 }

 // nAssignedCodes[i] holds the number of codes of length i.
 type nAssignedCodes [maxCodeLen + 1]uint32

 // fillPrimaryValues fills in feasible a[n] values for n <= primLen. It assumes
 // that the caller has already filled in a[n] for n > primLen.
 func (a *nAssignedCodes) fillPrimaryValues(primLen uint32) {
 	// Looping backwards from the maxCodeLen, figure out how many unassigned
 	// primLen length codes there were.
 	//
 	// For example, if there were 10 assigned 15 length codes, then there must
 	// have been (10 / 2 = 5) unassigned 14 length codes. If there were also 7
 	// 14 length codes, then there must have been ((5 + 7) / 2 = 6) unassigned
 	// 13 length codes. Etcetera.
 	nUnassignedPrimLen := uint32(0)
 	for n := uint32(maxCodeLen); n > primLen; n-- {
 		nUnassignedPrimLen += a[n]
 		if nUnassignedPrimLen%2 != 0 {
 			panic("cannot undo Huffman code split")
 		}
 		nUnassignedPrimLen /= 2
 	}
 	if nUnassignedPrimLen < 1 {
 		panic("not enough unassigned primLen length codes")
 	}

 	// Pick a combination of assigning shorter length codes (i.e. setting a[n]
 	// values) to reach that nUnassignedPrimLen target. There are many possible
 	// ways to do so, but only one unique way with the additional constraint
 	// that each value is either 0 or 1: unique because every positive number
 	// has a unique binary representation.
 	//
 	// For example, suppose we need to have 12 unassigned primLen length codes.
 	// With that additional constraint, the way to do this is:
 	//
 	//  - 12 unassigned code of length primLen-0.
 	//  -  6 unassigned code of length primLen-1.
 	//  -  3 unassigned code of length primLen-2.
 	//  -  2 unassigned code of length primLen-3.
 	//  -  1 unassigned code of length primLen-4.
 	//  -  1 unassigned code of length primLen-5.
 	//  -  1 unassigned code of length primLen-6.
 	//  -  etc
 	//
 	// Each left hand column value is either ((2*b)-0) or ((2*b)-1), where b is
 	// the the number below. Subtracting 0 or 1 means that we assign 0 or 1
 	// codes of that row's length:
 	//
 	//  - 0 assigned code of length primLen-0.
 	//  - 0 assigned code of length primLen-1.
 	//  - 1 assigned code of length primLen-2.
 	//  - 0 assigned code of length primLen-3.
 	//  - 1 assigned code of length primLen-4.
 	//  - 1 assigned code of length primLen-5.
 	//  - 1 assigned code of length primLen-6.
 	//  - etc
 	//
 	// Reading upwards, this is the binary string "...1110100", which is the
 	// two's complement representation of the decimal number -12: the negative
 	// of nUnassignedPrimLen.
 	bits := -int32(nUnassignedPrimLen)
 	for n := primLen; n >= 1; n-- {
 		a[n] = uint32(bits & 1)
 		bits >>= 1
 	}
 	if bits != -1 {
 		panic("did not finish with one unassigned zero-length code")
 	}

 	// As a sanity check of the "negative two's complement" theory, loop
 	// forwards to calculate the number of unassigned primLen length codes,
 	// which should match nUnassignedPrimLen.
 	nUnassigned := uint32(1)
 	for n := uint32(1); n <= primLen; n++ {
 		nUnassigned *= 2
 		if nUnassigned <= a[n] {
 			panic("not enough unassigned codes")
 		}
 		nUnassigned -= a[n]
 	}
 	if nUnassigned != nUnassignedPrimLen {
 		panic("inconsistent unassigned codes")
 	}
 }

 // feasibleTable[codeLen][nCodes][nSyms] is whether it is feasible to assign
 // Huffman codes (of length at least codeLen) to nSyms symbols, given nCodes
 // unassigned codes of length codeLen. Each unassigned code of length L can be
 // split into 2 codes of length L+1, 4 codes of length L+2, etc, up to a
 // maximum code length of maxCodeLen. A feasible assignment ends with zero
 // unassigned codes, no more and no less.
 type feasibleTable [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bool

 func (f *feasibleTable) count() (ret uint64) {
 	for i := range *f {
 		for j := range f[i] {
 			for k := range f[i][j] {
 				if f[i][j][k] {
 					ret++
 				}
 			}
 		}
 	}
 	return ret
 }

 func (f *feasibleTable) build(maxNSyms uint32) {
 	for nSyms := uint32(2); nSyms <= maxNSyms; nSyms++ {
 		if f.buildMore(1, 2, nSyms) != bmResultOK {
 			panic("infeasible [codeLen][nCodes][nSyms] combination")
 		}
 	}
 }

 const (
 	bmResultOK            = 0
 	bmResultUseFewerCodes = 1
 	bmResultUseMoreCodes  = 2
 	bmResultUnsatisfiable = 3
 )

 func (f *feasibleTable) buildMore(codeLen uint32, nCodes uint32, nSyms uint32) uint32 {
 	if nCodes == nSyms {
 		if nCodes%2 == 1 {
 			panic("odd nCodes declared feasible")
 		}
 		// This is trivial: assign every symbol a code of length codeLen.
 		f[codeLen][nCodes][nSyms] = true
 		return bmResultOK
 	} else if nCodes > nSyms {
 		// Infeasible: there will be unassigned codes.
 		return bmResultUseMoreCodes
 	}
 	// From here onwards, we have (nCodes < nSyms).

 	if (nCodes << (maxCodeLen - codeLen)) < nSyms {
 		// Infeasible: there will be unassigned symbols, even if we extend the
 		// codes out to maxCodeLen.
 		return bmResultUseFewerCodes
 	}

 	// If we've already visited this [codeLen][nCodes][nSyms] combination,
 	// there's no need to re-do the computation.
 	if f[codeLen][nCodes][nSyms] {
 		return bmResultOK
 	}

 	// Try assigning n out of the nCodes codes 1-to-1 to a symbol, remembering
 	// that we have checked above that (nCodes < nSyms). The remaining (nCodes
 	// - n) codes are lengthened by 1, doubling the number of them, and try to
 	// assign those longer codes to the remaining (nSyms - n) symbols.
 	ok := false
 	codeLen1 := codeLen + 1
 	for n := uint32(0); n < nCodes; n++ {
 		nCodes1 := (nCodes - n) * 2
 		nSyms1 := nSyms - n

 		if x := f.buildMore(codeLen1, nCodes1, nSyms1); x == bmResultOK {
 			ok = true
 		} else if x == bmResultUseFewerCodes {
 			// This value of n didn't succeed, but also any larger n also won't
 			// succeed, so we can break the loop early.
 			break
 		}
 	}

 	if !ok {
 		return bmResultUnsatisfiable
 	}
 	if nCodes%2 == 1 {
 		panic("odd nCodes declared feasible")
 	}
 	f[codeLen][nCodes][nSyms] = true
 	return bmResultOK
 }
	// Copyright 2019 The Wuffs Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// +build ignore

	package main

	// print-deflate-huff-table-size.go prints the worst-case number of elements in
	// the std/deflate decoder's huffs tables.
	//
	// Usage: go run print-deflate-huff-table-size.go
	//
	// This program might take tens of seconds to run.
	//
	// It is algorithmically similar to examples/enough.c in the zlib C library.
	// Much of the initial commentary in that file is also relevant for this
	// program. Not all of it applies, though. For example, the Wuffs decoder
	// implementation's primary (root) table length is fixed at compile time. It
	// does not grow that at run-time, even if the shortest Huffman code is longer
	// than that primary length.
	//
	// This program should print:
	//
	// ----------------
	// -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
	// primLen 3: 4376 entries = 8 prim + 4368 seco 1 0 0; 1 1 1 17 1 1 257 1 1 1 1 2
	// primLen 4: 2338 entries = 16 prim + 2322 seco 0 0 0 0; 3 1 1 177 97 1 1 1 1 1 2
	// primLen 5: 1330 entries = 32 prim + 1298 seco 1 0 0 0 0; 3 1 1 177 97 1 1 1 1 2
	// primLen 6: 852 entries = 64 prim + 788 seco 0 0 0 0 0 0; 1 229 49 1 1 1 1 1 2
	// primLen 7: 660 entries = 128 prim + 532 seco 1 0 0 0 0 0 0; 1 229 49 1 1 1 1 2
	// primLen 8: 660 entries = 256 prim + 404 seco 1 1 0 0 0 0 0 0; 1 229 49 1 1 1 2
	// primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2
	// primLen 10: 1332 entries = 1024 prim + 308 seco 1 1 1 1 0 0 0 0 0 0; 1 229 49 1 2
	// primLen 11: 2340 entries = 2048 prim + 292 seco 1 1 1 1 1 0 0 0 0 0 0; 1 229 49 2
	// primLen 12: 4380 entries = 4096 prim + 284 seco 1 1 1 1 1 1 0 0 0 0 0 0; 1 229 50
	// primLen 13: 8472 entries = 8192 prim + 280 seco 1 1 1 1 1 1 0 1 0 0 0 0 0;105 174
	// primLen 14: 16658 entries = 16384 prim + 274 seco 1 1 1 1 1 1 0 1 1 1 0 1 1 1;274
	// primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
	//
	// -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
	// primLen 3: 4120 entries = 8 prim + 4112 seco 1 0 0; 1 9 9 1 1 1 1 1 1 1 1 2
	// primLen 4: 2080 entries = 16 prim + 2064 seco 1 1 0 0; 1 9 9 1 1 1 1 1 1 1 2
	// primLen 5: 1072 entries = 32 prim + 1040 seco 1 1 1 0 0; 1 9 9 1 1 1 1 1 1 2
	// primLen 6: 592 entries = 64 prim + 528 seco 1 1 1 1 0 0; 1 9 9 1 1 1 1 1 2
	// primLen 7: 400 entries = 128 prim + 272 seco 1 1 1 1 1 0 0; 1 9 9 1 1 1 1 2
	// primLen 8: 400 entries = 256 prim + 144 seco 1 1 1 1 1 1 0 0; 1 9 9 1 1 1 2
	// primLen 9: 592 entries = 512 prim + 80 seco 1 1 1 1 1 1 1 0 0; 1 9 9 1 1 2
	// primLen 10: 1072 entries = 1024 prim + 48 seco 1 1 1 1 1 1 1 1 0 0; 1 9 9 1 2
	// primLen 11: 2080 entries = 2048 prim + 32 seco 1 1 1 1 1 1 1 1 1 0 0; 1 9 9 2
	// primLen 12: 4120 entries = 4096 prim + 24 seco 1 1 1 1 1 1 1 1 1 1 0 0; 1 9 10
	// primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14
	// primLen 14: 16400 entries = 16384 prim + 16 seco 1 1 1 1 1 1 1 1 1 1 1 0 0 0; 16
	// primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
	// ----------------
	//
	// Deflate's canonical Huffman codes are up to 15 bits long, but the decoder
	// can use fewer than (1 << 15) entries in its look-up table, by splitting it
	// into one primary table (of an at-compile-time fixed length <= 15) and zero
	// or more secondary tables (of variable lengths, determined at-run-time).
	//
	// Symbols whose Huffman codes' length are less than or equal to the primary
	// table length do not require any secondary tables. Longer codes are grouped
	// by their primLen length prefix. Each group occupies one entry in the primary
	// table, which redirects to a secondary table of size (1 << (N - primLen)),
	// where N is the length of the longest code in that group.
	//
	// This program calculates that, for a primary table length of 9 bits, the
	// worst case Huffman table size is 852 for the Literal/Length table (852 being
	// a 512 entry primary table and the secondary tables totalling 340 further
	// entries) and 592 (512 + 80) for the Distance table.
	//
	// Copy/pasting one row of that output (and the column headers):
	//
	// -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
	// primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2
	//
	// The "@1, @2, @3, ..." columns mean that one combination that hits that 852
	// worst case is having 1 1-length code, 1 2-length code, 1 3-length code, 0
	// 4-length codes, ..., 1 10-length code, 229 11-length codes, etc. There are a
	// total of 286 (1 + 1 + 1 + 0 + ... + 49 + 1 + 1 + 2) codes, for 286 symbols.
	//
	// See test/data/artificial/deflate-huffman-primlen-9.deflate* for an actual
	// example of valid Deflate-formatted data that exercises these worst-case code
	// lengths (for a primary table length of 9 bits).
	//
	// Hypothetically, suppose that we used a primLen of 13 for the Distance table.
	// Here is a code length assignment that produces 20 secondary table entries:
	//
	// -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
	// primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14
	//
	// In detail: call the 14 length codes "e0", "e1", ..., "e4" and the 15 length
	// codes "f00", "f01", "f02", ..., "f13".
	//
	// Recall that secondary tables' lengths are (1 << (N - primLen)), where N is
	// the longest code for a given primLen length prefix. In this case, (N -
	// primLen) is (15 - 13) if the secondary table contains a "f??" code,
	// otherwise it N is (14 - 13). When the secondary table contains a mixture of
	// lengths, some of the shorter codes' entries are duplicated.
	//
	// Also, for canonical Huffman codes, the codes (bitstrings) for 14-length
	// codes are all assigned (sequentially) before any 15-length codes. There are
	// therefore 6 secondary tables:
	//
	// - length 2: e0, e1.
	// - length 2: e2, e3.
	// - length 4: e4, e4, f00, f01.
	// - length 4: f02, f03, f04, f05.
	// - length 4: f06, f07, f08, f09.
	// - length 4: f10, f11, f12, f13.
	//
	// The total size of the secondary tables is (2 + 2 + 4 + 4 + 4 + 4) = 20.

	import (
	"fmt"
	"os"
	"sort"
	)

	const (
	maxMaxNSyms = 286 // RFC 1951 lists 286 literal/length codes and 30 distance codes.
	maxCodeLen = 15 // Each code's encoding is at most 15 bits.
	)

	func main() {
	if err := main1(); err != nil {
	os.Stderr.WriteString(err.Error() + "\n")
	os.Exit(1)
	}
	}

	func main1() error {
	do("Lit/Len", 286)
	do("Distance", 30)
	return nil
	}

	func do(name string, maxNSyms uint32) {
	ftab := &feasibleTable{}
	ftab.build(maxNSyms)

	// We can do the per-primLen computations concurrently, speeding up the
	// overall wall time taken.
	ch := make(chan string)
	numPending := 0
	for primLen := uint32(3); primLen <= maxCodeLen; primLen++ {
	go doPrimLen(ch, maxNSyms, ftab, primLen)
	numPending++
	}

	// Gather, sort and print the results for each primLen.
	results := make([]string, 0, numPending)
	for ; numPending > 0; numPending-- {
	results = append(results, <-ch)
	}
	sort.Strings(results)
	fmt.Printf("\n-------- %8s (up to %3d symbols, %6d combos)"+
	" @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15\n",
	name, maxNSyms, ftab.count())
	for _, s := range results {
	fmt.Println(s)
	}
	}

	type state struct {
	ftab *feasibleTable
	primLen uint32

	currCase struct {
	nAssignedCodes nAssignedCodes
	}

	worstCase struct {
	nAssignedCodes nAssignedCodes
	nSecondary uint32
	}

	visited [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bitVector
	}

	func doPrimLen(ch chan<- string, maxNSyms uint32, ftab *feasibleTable, primLen uint32) {
	z := &state{
	ftab: ftab,
	primLen: primLen,
	}

	if z.primLen > maxCodeLen {
	panic("unreachable")

	} else if z.primLen == maxCodeLen {
	// There's only the primary table and no secondary tables. For an
	// example that fills that primary table, create a trivial encoding of
	// two symbols: one symbol's code is "0", the other's is "1".
	z.worstCase.nAssignedCodes[1] = 2
	for n := uint32(2); n <= maxCodeLen; n++ {
	z.worstCase.nAssignedCodes[n] = 0
	}

	} else {
	// Brute force search the tree of possible code length assignments.
	//
	// nCodes is always even: the number of unassigned Huffman codes at
	// length L is twice the number at length (L-1). At the start, there
	// are two unassigned codes of length 1: "0" and "1".
	//
	// nSyms starts at nCodes, since (nCodes > nSyms) is infeasible: there
	// will be unassigned codes.
	for nCodes := uint32(2); nCodes <= maxNSyms; nCodes += 2 {
	for nSyms := nCodes; nSyms <= maxNSyms; nSyms++ {
	if z.ftab[z.primLen+1][nCodes][nSyms] {
	z.visit(z.primLen+1, nCodes, nSyms, 0, 0)
	}
	}
	}

	z.worstCase.nAssignedCodes.fillPrimaryValues(z.primLen)
	}

	// Collect the details: the number of codes assigned to each length in the
	// worst case.
	details := ""
	for n := uint32(1); n <= maxCodeLen; n++ {
	if n == z.primLen+1 {
	details += ";"
	} else {
	details += " "
	}
	details += fmt.Sprintf("%3d", z.worstCase.nAssignedCodes[n])
	}

	nPrimary := uint32(1) << z.primLen
	nSecondary := z.worstCase.nSecondary
	nEntries := nPrimary + nSecondary

	ch <- fmt.Sprintf("primLen %2d: %5d entries = %5d prim + %4d seco%s",
	z.primLen, nEntries, nPrimary, nSecondary, details)
	}

	// visit updates z.worstCase, starting with nCodes unassigned Huffman codes of
	// length codeLen for nSyms symbols. nSecondary is the total size (number of
	// entries) so far of all of the secondary tables.
	//
	// nRemain > 0 iff we have an incomplete secondary table, whose size was
	// partially accounted for in nSecondary, by (1 << (codeLen - z.primLen)). If
	// positive, nRemain is the number of unassigned entries in that incomplete
	// secondary table.
	//
	// visit can recursively call itself with longer codeLen values. As it does so,
	// it temporarily sets z.currCase.nAssignedCodes[codeLen], restoring that to
	// zero before it returns.
	func (z *state) visit(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) {
	if z.alreadyVisited(codeLen, nCodes, nSyms, nSecondary, nRemain) {
	// No-op.

	} else if nCodes > nSyms {
	// Infeasible: there will be unassigned codes.

	} else if nCodes == nSyms {
	// Fill in the incomplete secondary table (if present) and any new
	// secondary tables (if necessary) to assign the nCodes codes to the
	// nSyms symbols. At the end, we should have no remaining entries.
	n := nCodes
	for n > nRemain {
	n -= nRemain
	nRemain = 1 << (codeLen - z.primLen)
	nSecondary += nRemain
	}
	if n != nRemain {
	panic("inconsistent secondary table size")
	}

	// Update z.worstCase if we have a new record for nSecondary.
	if z.worstCase.nSecondary < nSecondary {
	z.worstCase.nSecondary = nSecondary
	z.worstCase.nAssignedCodes = z.currCase.nAssignedCodes
	z.worstCase.nAssignedCodes[codeLen] = nCodes
	}

	} else { // nCodes < nSyms.
	codeLen1 := codeLen + 1
	// Brute force assigning n codes of this codeLen. The other (nCodes -
	// n) codes will have longer codes.
	for n := uint32(0); n < nCodes; n++ {
	nCodes1 := (nCodes - n) * 2
	nSyms1 := nSyms - n

	if nCodes1 > nSyms1 {
	// Infeasible: there will be unassigned codes.

	} else if z.ftab[codeLen1][nCodes1][nSyms1] {
	nSecondary1 := nSecondary
	nRemain1 := nRemain * 2

	if nRemain1 > 0 {
	// We have an incomplete secondary table, which has been
	// partially accounted for: nSecondary has previously been
	// incremented by X, where X equals (1 << (codeLen -
	// z.primLen)).
	//
	// As we recursively call visit, with codeLen increased by
	// 1, then we need to double the accounted size of that
	// secondary table to 2X. Since X out of that 2X has
	// already been added, we need to increment nSecondary1 by
	// just the difference, which is also X.
	nSecondary1 += 1 << (codeLen - z.primLen)
	}

	z.currCase.nAssignedCodes[codeLen] = n
	z.visit(codeLen1, nCodes1, nSyms1, nSecondary1, nRemain1)
	z.currCase.nAssignedCodes[codeLen] = 0
	}

	// Open a new secondary table if necessary: one whose size is (1 <<
	// (codeLen - z.primLen)).
	if nRemain == 0 {
	nRemain = 1 << (codeLen - z.primLen)
	nSecondary += nRemain
	}

	// Assign one of the incomplete secondary table's entries.
	nRemain--
	}
	}
	}

	// alreadyVisited returns whether z has already visited the (nSecondary,
	// nRemain) pair. As a side effect, it also marks that pair as visited.
	func (z *state) alreadyVisited(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) bool {
	// Use the zig re-numbering to minimize the memory requirements for the
	// visited slice. There's an additional 8-fold memory reduction by using 1
	// bit per element, not 1 bool (1 byte) per element.
	i := zig(nSecondary/8, nRemain)
	mask := uint8(1) << (nSecondary & 7)

	visited := z.visited[codeLen][nCodes][nSyms]
	if uint64(i) < uint64(len(visited)) {
	x := visited[i]
	visited[i] = x \| mask
	return x&mask != 0
	}

	oldDone := visited
	visited = make(bitVector, i+1)
	copy(visited, oldDone)
	visited[i] = mask

	z.visited[codeLen][nCodes][nSyms] = visited
	return false
	}

	type bitVector []uint8

	// zig packs a pair of non-negative integers (i, j) into a single unique
	// non-negative integer, in a zig-zagging pattern:
	//
	// i=0 i=1 i=2 i=3 i=4 etc
	// j=0: 0 1 3 6 10 ...
	// j=1: 2 4 7 11 16 ...
	// j=2: 5 8 12 17 23 ...
	// j=3: 9 13 18 24 31 ...
	// j=4: 14 19 25 32 40 ...
	// etc: ... ... ... ... ... ...
	//
	// The closed from expression relies on the fact that the sum (0 + 1 + ... + n)
	// equals ((n * (n + 1)) / 2).
	//
	// This lets us minimize the memory requirements of a triangular array: one for
	// which a[i][j] is only accessed when ((i+j) < someBound).
	func zig(i uint32, j uint32) uint32 {
	if (i > 0x4000) \|\| (j > 0x4000) {
	panic("overflow")
	}
	n := i + j
	return ((n * (n + 1)) / 2) + j
	}

	// nAssignedCodes[i] holds the number of codes of length i.
	type nAssignedCodes [maxCodeLen + 1]uint32

	// fillPrimaryValues fills in feasible a[n] values for n <= primLen. It assumes
	// that the caller has already filled in a[n] for n > primLen.
	func (a *nAssignedCodes) fillPrimaryValues(primLen uint32) {
	// Looping backwards from the maxCodeLen, figure out how many unassigned
	// primLen length codes there were.
	//
	// For example, if there were 10 assigned 15 length codes, then there must
	// have been (10 / 2 = 5) unassigned 14 length codes. If there were also 7
	// 14 length codes, then there must have been ((5 + 7) / 2 = 6) unassigned
	// 13 length codes. Etcetera.
	nUnassignedPrimLen := uint32(0)
	for n := uint32(maxCodeLen); n > primLen; n-- {
	nUnassignedPrimLen += a[n]
	if nUnassignedPrimLen%2 != 0 {
	panic("cannot undo Huffman code split")
	}
	nUnassignedPrimLen /= 2
	}
	if nUnassignedPrimLen < 1 {
	panic("not enough unassigned primLen length codes")
	}

	// Pick a combination of assigning shorter length codes (i.e. setting a[n]
	// values) to reach that nUnassignedPrimLen target. There are many possible
	// ways to do so, but only one unique way with the additional constraint
	// that each value is either 0 or 1: unique because every positive number
	// has a unique binary representation.
	//
	// For example, suppose we need to have 12 unassigned primLen length codes.
	// With that additional constraint, the way to do this is:
	//
	// - 12 unassigned code of length primLen-0.
	// - 6 unassigned code of length primLen-1.
	// - 3 unassigned code of length primLen-2.
	// - 2 unassigned code of length primLen-3.
	// - 1 unassigned code of length primLen-4.
	// - 1 unassigned code of length primLen-5.
	// - 1 unassigned code of length primLen-6.
	// - etc
	//
	// Each left hand column value is either ((2b)-0) or ((2b)-1), where b is
	// the the number below. Subtracting 0 or 1 means that we assign 0 or 1
	// codes of that row's length:
	//
	// - 0 assigned code of length primLen-0.
	// - 0 assigned code of length primLen-1.
	// - 1 assigned code of length primLen-2.
	// - 0 assigned code of length primLen-3.
	// - 1 assigned code of length primLen-4.
	// - 1 assigned code of length primLen-5.
	// - 1 assigned code of length primLen-6.
	// - etc
	//
	// Reading upwards, this is the binary string "...1110100", which is the
	// two's complement representation of the decimal number -12: the negative
	// of nUnassignedPrimLen.
	bits := -int32(nUnassignedPrimLen)
	for n := primLen; n >= 1; n-- {
	a[n] = uint32(bits & 1)
	bits >>= 1
	}
	if bits != -1 {
	panic("did not finish with one unassigned zero-length code")
	}

	// As a sanity check of the "negative two's complement" theory, loop
	// forwards to calculate the number of unassigned primLen length codes,
	// which should match nUnassignedPrimLen.
	nUnassigned := uint32(1)
	for n := uint32(1); n <= primLen; n++ {
	nUnassigned *= 2
	if nUnassigned <= a[n] {
	panic("not enough unassigned codes")
	}
	nUnassigned -= a[n]
	}
	if nUnassigned != nUnassignedPrimLen {
	panic("inconsistent unassigned codes")
	}
	}

	// feasibleTable[codeLen][nCodes][nSyms] is whether it is feasible to assign
	// Huffman codes (of length at least codeLen) to nSyms symbols, given nCodes
	// unassigned codes of length codeLen. Each unassigned code of length L can be
	// split into 2 codes of length L+1, 4 codes of length L+2, etc, up to a
	// maximum code length of maxCodeLen. A feasible assignment ends with zero
	// unassigned codes, no more and no less.
	type feasibleTable [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bool

	func (f *feasibleTable) count() (ret uint64) {
	for i := range *f {
	for j := range f[i] {
	for k := range f[i][j] {
	if f[i][j][k] {
	ret++
	}
	}
	}
	}
	return ret
	}

	func (f *feasibleTable) build(maxNSyms uint32) {
	for nSyms := uint32(2); nSyms <= maxNSyms; nSyms++ {
	if f.buildMore(1, 2, nSyms) != bmResultOK {
	panic("infeasible [codeLen][nCodes][nSyms] combination")
	}
	}
	}

	const (
	bmResultOK = 0
	bmResultUseFewerCodes = 1
	bmResultUseMoreCodes = 2
	bmResultUnsatisfiable = 3
	)

	func (f *feasibleTable) buildMore(codeLen uint32, nCodes uint32, nSyms uint32) uint32 {
	if nCodes == nSyms {
	if nCodes%2 == 1 {
	panic("odd nCodes declared feasible")
	}
	// This is trivial: assign every symbol a code of length codeLen.
	f[codeLen][nCodes][nSyms] = true
	return bmResultOK
	} else if nCodes > nSyms {
	// Infeasible: there will be unassigned codes.
	return bmResultUseMoreCodes
	}
	// From here onwards, we have (nCodes < nSyms).

	if (nCodes << (maxCodeLen - codeLen)) < nSyms {
	// Infeasible: there will be unassigned symbols, even if we extend the
	// codes out to maxCodeLen.
	return bmResultUseFewerCodes
	}

	// If we've already visited this [codeLen][nCodes][nSyms] combination,
	// there's no need to re-do the computation.
	if f[codeLen][nCodes][nSyms] {
	return bmResultOK
	}

	// Try assigning n out of the nCodes codes 1-to-1 to a symbol, remembering
	// that we have checked above that (nCodes < nSyms). The remaining (nCodes
	// - n) codes are lengthened by 1, doubling the number of them, and try to
	// assign those longer codes to the remaining (nSyms - n) symbols.
	ok := false
	codeLen1 := codeLen + 1
	for n := uint32(0); n < nCodes; n++ {
	nCodes1 := (nCodes - n) * 2
	nSyms1 := nSyms - n

	if x := f.buildMore(codeLen1, nCodes1, nSyms1); x == bmResultOK {
	ok = true
	} else if x == bmResultUseFewerCodes {
	// This value of n didn't succeed, but also any larger n also won't
	// succeed, so we can break the loop early.
	break
	}
	}

	if !ok {
	return bmResultUnsatisfiable
	}
	if nCodes%2 == 1 {
	panic("odd nCodes declared feasible")
	}
	f[codeLen][nCodes][nSyms] = true
	return bmResultOK
	}