blob: 32cbe27f9a218e99a181bf4ffa81a7636fbbd1a6 [file] [log] [blame]
// Copyright 2019 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build ignore
// +build ignore
package main
// print-deflate-huff-table-size.go prints the worst-case number of elements in
// the std/deflate decoder's huffs tables.
//
// Usage: go run print-deflate-huff-table-size.go
//
// This program might take tens of seconds to run.
//
// It is algorithmically similar to examples/enough.c in the zlib C library.
// Much of the initial commentary in that file is also relevant for this
// program. Not all of it applies, though. For example, the Wuffs decoder
// implementation's primary (root) table length is fixed at compile time. It
// does not grow that at run-time, even if the shortest Huffman code is longer
// than that primary length.
//
// This program should print:
//
// ----------------
// -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
// primLen 3: 4376 entries = 8 prim + 4368 seco 1 0 0; 1 1 1 17 1 1 257 1 1 1 1 2
// primLen 4: 2338 entries = 16 prim + 2322 seco 0 0 0 0; 3 1 1 177 97 1 1 1 1 1 2
// primLen 5: 1330 entries = 32 prim + 1298 seco 1 0 0 0 0; 3 1 1 177 97 1 1 1 1 2
// primLen 6: 852 entries = 64 prim + 788 seco 0 0 0 0 0 0; 1 229 49 1 1 1 1 1 2
// primLen 7: 660 entries = 128 prim + 532 seco 1 0 0 0 0 0 0; 1 229 49 1 1 1 1 2
// primLen 8: 660 entries = 256 prim + 404 seco 1 1 0 0 0 0 0 0; 1 229 49 1 1 1 2
// primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2
// primLen 10: 1332 entries = 1024 prim + 308 seco 1 1 1 1 0 0 0 0 0 0; 1 229 49 1 2
// primLen 11: 2340 entries = 2048 prim + 292 seco 1 1 1 1 1 0 0 0 0 0 0; 1 229 49 2
// primLen 12: 4380 entries = 4096 prim + 284 seco 1 1 1 1 1 1 0 0 0 0 0 0; 1 229 50
// primLen 13: 8472 entries = 8192 prim + 280 seco 1 1 1 1 1 1 0 1 0 0 0 0 0;105 174
// primLen 14: 16658 entries = 16384 prim + 274 seco 1 1 1 1 1 1 0 1 1 1 0 1 1 1;274
// primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
//
// -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
// primLen 3: 4120 entries = 8 prim + 4112 seco 1 0 0; 1 9 9 1 1 1 1 1 1 1 1 2
// primLen 4: 2080 entries = 16 prim + 2064 seco 1 1 0 0; 1 9 9 1 1 1 1 1 1 1 2
// primLen 5: 1072 entries = 32 prim + 1040 seco 1 1 1 0 0; 1 9 9 1 1 1 1 1 1 2
// primLen 6: 592 entries = 64 prim + 528 seco 1 1 1 1 0 0; 1 9 9 1 1 1 1 1 2
// primLen 7: 400 entries = 128 prim + 272 seco 1 1 1 1 1 0 0; 1 9 9 1 1 1 1 2
// primLen 8: 400 entries = 256 prim + 144 seco 1 1 1 1 1 1 0 0; 1 9 9 1 1 1 2
// primLen 9: 592 entries = 512 prim + 80 seco 1 1 1 1 1 1 1 0 0; 1 9 9 1 1 2
// primLen 10: 1072 entries = 1024 prim + 48 seco 1 1 1 1 1 1 1 1 0 0; 1 9 9 1 2
// primLen 11: 2080 entries = 2048 prim + 32 seco 1 1 1 1 1 1 1 1 1 0 0; 1 9 9 2
// primLen 12: 4120 entries = 4096 prim + 24 seco 1 1 1 1 1 1 1 1 1 1 0 0; 1 9 10
// primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14
// primLen 14: 16400 entries = 16384 prim + 16 seco 1 1 1 1 1 1 1 1 1 1 1 0 0 0; 16
// primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// ----------------
//
// Deflate's canonical Huffman codes are up to 15 bits long, but the decoder
// can use fewer than (1 << 15) entries in its look-up table, by splitting it
// into one primary table (of an at-compile-time fixed length <= 15) and zero
// or more secondary tables (of variable lengths, determined at-run-time).
//
// Symbols whose Huffman codes' length are less than or equal to the primary
// table length do not require any secondary tables. Longer codes are grouped
// by their primLen length prefix. Each group occupies one entry in the primary
// table, which redirects to a secondary table of size (1 << (N - primLen)),
// where N is the length of the longest code in that group.
//
// This program calculates that, for a primary table length of 9 bits, the
// worst case Huffman table size is 852 for the Literal/Length table (852 being
// a 512 entry primary table and the secondary tables totalling 340 further
// entries) and 592 (512 + 80) for the Distance table.
//
// Copy/pasting one row of that output (and the column headers):
//
// -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
// primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2
//
// The "@1, @2, @3, ..." columns mean that one combination that hits that 852
// worst case is having 1 1-length code, 1 2-length code, 1 3-length code, 0
// 4-length codes, ..., 1 10-length code, 229 11-length codes, etc. There are a
// total of 286 (1 + 1 + 1 + 0 + ... + 49 + 1 + 1 + 2) codes, for 286 symbols.
//
// See test/data/artificial/deflate-huffman-primlen-9.deflate* for an actual
// example of valid Deflate-formatted data that exercises these worst-case code
// lengths (for a primary table length of 9 bits).
//
// Hypothetically, suppose that we used a primLen of 13 for the Distance table.
// Here is a code length assignment that produces 20 secondary table entries:
//
// -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15
// primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14
//
// In detail: call the 14 length codes "e0", "e1", ..., "e4" and the 15 length
// codes "f00", "f01", "f02", ..., "f13".
//
// Recall that secondary tables' lengths are (1 << (N - primLen)), where N is
// the longest code for a given primLen length prefix. In this case, (N -
// primLen) is (15 - 13) if the secondary table contains a "f??" code,
// otherwise it N is (14 - 13). When the secondary table contains a mixture of
// lengths, some of the shorter codes' entries are duplicated.
//
// Also, for canonical Huffman codes, the codes (bitstrings) for 14-length
// codes are all assigned (sequentially) before any 15-length codes. There are
// therefore 6 secondary tables:
//
// - length 2: e0, e1.
// - length 2: e2, e3.
// - length 4: e4, e4, f00, f01.
// - length 4: f02, f03, f04, f05.
// - length 4: f06, f07, f08, f09.
// - length 4: f10, f11, f12, f13.
//
// The total size of the secondary tables is (2 + 2 + 4 + 4 + 4 + 4) = 20.
import (
"fmt"
"os"
"sort"
)
const (
maxMaxNSyms = 286 // RFC 1951 lists 286 literal/length codes and 30 distance codes.
maxCodeLen = 15 // Each code's encoding is at most 15 bits.
)
func main() {
if err := main1(); err != nil {
os.Stderr.WriteString(err.Error() + "\n")
os.Exit(1)
}
}
func main1() error {
do("Lit/Len", 286)
do("Distance", 30)
return nil
}
func do(name string, maxNSyms uint32) {
ftab := &feasibleTable{}
ftab.build(maxNSyms)
// We can do the per-primLen computations concurrently, speeding up the
// overall wall time taken.
ch := make(chan string)
numPending := 0
for primLen := uint32(3); primLen <= maxCodeLen; primLen++ {
go doPrimLen(ch, maxNSyms, ftab, primLen)
numPending++
}
// Gather, sort and print the results for each primLen.
results := make([]string, 0, numPending)
for ; numPending > 0; numPending-- {
results = append(results, <-ch)
}
sort.Strings(results)
fmt.Printf("\n-------- %8s (up to %3d symbols, %6d combos)"+
" @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15\n",
name, maxNSyms, ftab.count())
for _, s := range results {
fmt.Println(s)
}
}
type state struct {
ftab *feasibleTable
primLen uint32
currCase struct {
nAssignedCodes nAssignedCodes
}
worstCase struct {
nAssignedCodes nAssignedCodes
nSecondary uint32
}
visited [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bitVector
}
func doPrimLen(ch chan<- string, maxNSyms uint32, ftab *feasibleTable, primLen uint32) {
z := &state{
ftab: ftab,
primLen: primLen,
}
if z.primLen > maxCodeLen {
panic("unreachable")
} else if z.primLen == maxCodeLen {
// There's only the primary table and no secondary tables. For an
// example that fills that primary table, create a trivial encoding of
// two symbols: one symbol's code is "0", the other's is "1".
z.worstCase.nAssignedCodes[1] = 2
for n := uint32(2); n <= maxCodeLen; n++ {
z.worstCase.nAssignedCodes[n] = 0
}
} else {
// Brute force search the tree of possible code length assignments.
//
// nCodes is always even: the number of unassigned Huffman codes at
// length L is twice the number at length (L-1). At the start, there
// are two unassigned codes of length 1: "0" and "1".
//
// nSyms starts at nCodes, since (nCodes > nSyms) is infeasible: there
// will be unassigned codes.
for nCodes := uint32(2); nCodes <= maxNSyms; nCodes += 2 {
for nSyms := nCodes; nSyms <= maxNSyms; nSyms++ {
if z.ftab[z.primLen+1][nCodes][nSyms] {
z.visit(z.primLen+1, nCodes, nSyms, 0, 0)
}
}
}
z.worstCase.nAssignedCodes.fillPrimaryValues(z.primLen)
}
// Collect the details: the number of codes assigned to each length in the
// worst case.
details := ""
for n := uint32(1); n <= maxCodeLen; n++ {
if n == z.primLen+1 {
details += ";"
} else {
details += " "
}
details += fmt.Sprintf("%3d", z.worstCase.nAssignedCodes[n])
}
nPrimary := uint32(1) << z.primLen
nSecondary := z.worstCase.nSecondary
nEntries := nPrimary + nSecondary
ch <- fmt.Sprintf("primLen %2d: %5d entries = %5d prim + %4d seco%s",
z.primLen, nEntries, nPrimary, nSecondary, details)
}
// visit updates z.worstCase, starting with nCodes unassigned Huffman codes of
// length codeLen for nSyms symbols. nSecondary is the total size (number of
// entries) so far of all of the secondary tables.
//
// nRemain > 0 iff we have an incomplete secondary table, whose size was
// partially accounted for in nSecondary, by (1 << (codeLen - z.primLen)). If
// positive, nRemain is the number of unassigned entries in that incomplete
// secondary table.
//
// visit can recursively call itself with longer codeLen values. As it does so,
// it temporarily sets z.currCase.nAssignedCodes[codeLen], restoring that to
// zero before it returns.
func (z *state) visit(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) {
if z.alreadyVisited(codeLen, nCodes, nSyms, nSecondary, nRemain) {
// No-op.
} else if nCodes > nSyms {
// Infeasible: there will be unassigned codes.
} else if nCodes == nSyms {
// Fill in the incomplete secondary table (if present) and any new
// secondary tables (if necessary) to assign the nCodes codes to the
// nSyms symbols. At the end, we should have no remaining entries.
n := nCodes
for n > nRemain {
n -= nRemain
nRemain = 1 << (codeLen - z.primLen)
nSecondary += nRemain
}
if n != nRemain {
panic("inconsistent secondary table size")
}
// Update z.worstCase if we have a new record for nSecondary.
if z.worstCase.nSecondary < nSecondary {
z.worstCase.nSecondary = nSecondary
z.worstCase.nAssignedCodes = z.currCase.nAssignedCodes
z.worstCase.nAssignedCodes[codeLen] = nCodes
}
} else { // nCodes < nSyms.
codeLen1 := codeLen + 1
// Brute force assigning n codes of this codeLen. The other (nCodes -
// n) codes will have longer codes.
for n := uint32(0); n < nCodes; n++ {
nCodes1 := (nCodes - n) * 2
nSyms1 := nSyms - n
if nCodes1 > nSyms1 {
// Infeasible: there will be unassigned codes.
} else if z.ftab[codeLen1][nCodes1][nSyms1] {
nSecondary1 := nSecondary
nRemain1 := nRemain * 2
if nRemain1 > 0 {
// We have an incomplete secondary table, which has been
// partially accounted for: nSecondary has previously been
// incremented by X, where X equals (1 << (codeLen -
// z.primLen)).
//
// As we recursively call visit, with codeLen increased by
// 1, then we need to double the accounted size of that
// secondary table to 2*X. Since X out of that 2*X has
// already been added, we need to increment nSecondary1 by
// just the difference, which is also X.
nSecondary1 += 1 << (codeLen - z.primLen)
}
z.currCase.nAssignedCodes[codeLen] = n
z.visit(codeLen1, nCodes1, nSyms1, nSecondary1, nRemain1)
z.currCase.nAssignedCodes[codeLen] = 0
}
// Open a new secondary table if necessary: one whose size is (1 <<
// (codeLen - z.primLen)).
if nRemain == 0 {
nRemain = 1 << (codeLen - z.primLen)
nSecondary += nRemain
}
// Assign one of the incomplete secondary table's entries.
nRemain--
}
}
}
// alreadyVisited returns whether z has already visited the (nSecondary,
// nRemain) pair. As a side effect, it also marks that pair as visited.
func (z *state) alreadyVisited(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) bool {
// Use the zig re-numbering to minimize the memory requirements for the
// visited slice. There's an additional 8-fold memory reduction by using 1
// bit per element, not 1 bool (1 byte) per element.
i := zig(nSecondary/8, nRemain)
mask := uint8(1) << (nSecondary & 7)
visited := z.visited[codeLen][nCodes][nSyms]
if uint64(i) < uint64(len(visited)) {
x := visited[i]
visited[i] = x | mask
return x&mask != 0
}
oldDone := visited
visited = make(bitVector, i+1)
copy(visited, oldDone)
visited[i] = mask
z.visited[codeLen][nCodes][nSyms] = visited
return false
}
type bitVector []uint8
// zig packs a pair of non-negative integers (i, j) into a single unique
// non-negative integer, in a zig-zagging pattern:
//
// i=0 i=1 i=2 i=3 i=4 etc
// j=0: 0 1 3 6 10 ...
// j=1: 2 4 7 11 16 ...
// j=2: 5 8 12 17 23 ...
// j=3: 9 13 18 24 31 ...
// j=4: 14 19 25 32 40 ...
// etc: ... ... ... ... ... ...
//
// The closed from expression relies on the fact that the sum (0 + 1 + ... + n)
// equals ((n * (n + 1)) / 2).
//
// This lets us minimize the memory requirements of a triangular array: one for
// which a[i][j] is only accessed when ((i+j) < someBound).
func zig(i uint32, j uint32) uint32 {
if (i > 0x4000) || (j > 0x4000) {
panic("overflow")
}
n := i + j
return ((n * (n + 1)) / 2) + j
}
// nAssignedCodes[i] holds the number of codes of length i.
type nAssignedCodes [maxCodeLen + 1]uint32
// fillPrimaryValues fills in feasible a[n] values for n <= primLen. It assumes
// that the caller has already filled in a[n] for n > primLen.
func (a *nAssignedCodes) fillPrimaryValues(primLen uint32) {
// Looping backwards from the maxCodeLen, figure out how many unassigned
// primLen length codes there were.
//
// For example, if there were 10 assigned 15 length codes, then there must
// have been (10 / 2 = 5) unassigned 14 length codes. If there were also 7
// 14 length codes, then there must have been ((5 + 7) / 2 = 6) unassigned
// 13 length codes. Etcetera.
nUnassignedPrimLen := uint32(0)
for n := uint32(maxCodeLen); n > primLen; n-- {
nUnassignedPrimLen += a[n]
if nUnassignedPrimLen%2 != 0 {
panic("cannot undo Huffman code split")
}
nUnassignedPrimLen /= 2
}
if nUnassignedPrimLen < 1 {
panic("not enough unassigned primLen length codes")
}
// Pick a combination of assigning shorter length codes (i.e. setting a[n]
// values) to reach that nUnassignedPrimLen target. There are many possible
// ways to do so, but only one unique way with the additional constraint
// that each value is either 0 or 1: unique because every positive number
// has a unique binary representation.
//
// For example, suppose we need to have 12 unassigned primLen length codes.
// With that additional constraint, the way to do this is:
//
// - 12 unassigned code of length primLen-0.
// - 6 unassigned code of length primLen-1.
// - 3 unassigned code of length primLen-2.
// - 2 unassigned code of length primLen-3.
// - 1 unassigned code of length primLen-4.
// - 1 unassigned code of length primLen-5.
// - 1 unassigned code of length primLen-6.
// - etc
//
// Each left hand column value is either ((2*b)-0) or ((2*b)-1), where b is
// the the number below. Subtracting 0 or 1 means that we assign 0 or 1
// codes of that row's length:
//
// - 0 assigned code of length primLen-0.
// - 0 assigned code of length primLen-1.
// - 1 assigned code of length primLen-2.
// - 0 assigned code of length primLen-3.
// - 1 assigned code of length primLen-4.
// - 1 assigned code of length primLen-5.
// - 1 assigned code of length primLen-6.
// - etc
//
// Reading upwards, this is the binary string "...1110100", which is the
// two's complement representation of the decimal number -12: the negative
// of nUnassignedPrimLen.
bits := -int32(nUnassignedPrimLen)
for n := primLen; n >= 1; n-- {
a[n] = uint32(bits & 1)
bits >>= 1
}
if bits != -1 {
panic("did not finish with one unassigned zero-length code")
}
// As a coherence check of the "negative two's complement" theory, loop
// forwards to calculate the number of unassigned primLen length codes,
// which should match nUnassignedPrimLen.
nUnassigned := uint32(1)
for n := uint32(1); n <= primLen; n++ {
nUnassigned *= 2
if nUnassigned <= a[n] {
panic("not enough unassigned codes")
}
nUnassigned -= a[n]
}
if nUnassigned != nUnassignedPrimLen {
panic("inconsistent unassigned codes")
}
}
// feasibleTable[codeLen][nCodes][nSyms] is whether it is feasible to assign
// Huffman codes (of length at least codeLen) to nSyms symbols, given nCodes
// unassigned codes of length codeLen. Each unassigned code of length L can be
// split into 2 codes of length L+1, 4 codes of length L+2, etc, up to a
// maximum code length of maxCodeLen. A feasible assignment ends with zero
// unassigned codes, no more and no less.
type feasibleTable [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bool
func (f *feasibleTable) count() (ret uint64) {
for i := range *f {
for j := range f[i] {
for k := range f[i][j] {
if f[i][j][k] {
ret++
}
}
}
}
return ret
}
func (f *feasibleTable) build(maxNSyms uint32) {
for nSyms := uint32(2); nSyms <= maxNSyms; nSyms++ {
if f.buildMore(1, 2, nSyms) != bmResultOK {
panic("infeasible [codeLen][nCodes][nSyms] combination")
}
}
}
const (
bmResultOK = 0
bmResultUseFewerCodes = 1
bmResultUseMoreCodes = 2
bmResultUnsatisfiable = 3
)
func (f *feasibleTable) buildMore(codeLen uint32, nCodes uint32, nSyms uint32) uint32 {
if nCodes == nSyms {
if nCodes%2 == 1 {
panic("odd nCodes declared feasible")
}
// This is trivial: assign every symbol a code of length codeLen.
f[codeLen][nCodes][nSyms] = true
return bmResultOK
} else if nCodes > nSyms {
// Infeasible: there will be unassigned codes.
return bmResultUseMoreCodes
}
// From here onwards, we have (nCodes < nSyms).
if (nCodes << (maxCodeLen - codeLen)) < nSyms {
// Infeasible: there will be unassigned symbols, even if we extend the
// codes out to maxCodeLen.
return bmResultUseFewerCodes
}
// If we've already visited this [codeLen][nCodes][nSyms] combination,
// there's no need to re-do the computation.
if f[codeLen][nCodes][nSyms] {
return bmResultOK
}
// Try assigning n out of the nCodes codes 1-to-1 to a symbol, remembering
// that we have checked above that (nCodes < nSyms). The remaining (nCodes
// - n) codes are lengthened by 1, doubling the number of them, and try to
// assign those longer codes to the remaining (nSyms - n) symbols.
ok := false
codeLen1 := codeLen + 1
for n := uint32(0); n < nCodes; n++ {
nCodes1 := (nCodes - n) * 2
nSyms1 := nSyms - n
if x := f.buildMore(codeLen1, nCodes1, nSyms1); x == bmResultOK {
ok = true
} else if x == bmResultUseFewerCodes {
// This value of n didn't succeed, but also any larger n also won't
// succeed, so we can break the loop early.
break
}
}
if !ok {
return bmResultUnsatisfiable
}
if nCodes%2 == 1 {
panic("odd nCodes declared feasible")
}
f[codeLen][nCodes][nSyms] = true
return bmResultOK
}