|  | // Copyright 2019 The Wuffs Authors. | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //    https://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | // +build ignore | 
|  |  | 
|  | package main | 
|  |  | 
|  | // print-deflate-huff-table-size.go prints the worst-case number of elements in | 
|  | // the std/deflate decoder's huffs tables. | 
|  | // | 
|  | // Usage: go run print-deflate-huff-table-size.go | 
|  | // | 
|  | // This program might take tens of seconds to run. | 
|  | // | 
|  | // It is algorithmically similar to examples/enough.c in the zlib C library. | 
|  | // Much of the initial commentary in that file is also relevant for this | 
|  | // program. Not all of it applies, though. For example, the Wuffs decoder | 
|  | // implementation's primary (root) table length is fixed at compile time. It | 
|  | // does not grow that at run-time, even if the shortest Huffman code is longer | 
|  | // than that primary length. | 
|  | // | 
|  | // This program should print: | 
|  | // | 
|  | // ---------------- | 
|  | // --------  Lit/Len (up to 286 symbols, 149142 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15 | 
|  | // primLen  3:   4376  entries =     8 prim + 4368 seco   1   0   0;  1   1   1  17   1   1 257   1   1   1   1   2 | 
|  | // primLen  4:   2338  entries =    16 prim + 2322 seco   0   0   0   0;  3   1   1 177  97   1   1   1   1   1   2 | 
|  | // primLen  5:   1330  entries =    32 prim + 1298 seco   1   0   0   0   0;  3   1   1 177  97   1   1   1   1   2 | 
|  | // primLen  6:    852  entries =    64 prim +  788 seco   0   0   0   0   0   0;  1 229  49   1   1   1   1   1   2 | 
|  | // primLen  7:    660  entries =   128 prim +  532 seco   1   0   0   0   0   0   0;  1 229  49   1   1   1   1   2 | 
|  | // primLen  8:    660  entries =   256 prim +  404 seco   1   1   0   0   0   0   0   0;  1 229  49   1   1   1   2 | 
|  | // primLen  9:    852  entries =   512 prim +  340 seco   1   1   1   0   0   0   0   0   0;  1 229  49   1   1   2 | 
|  | // primLen 10:   1332  entries =  1024 prim +  308 seco   1   1   1   1   0   0   0   0   0   0;  1 229  49   1   2 | 
|  | // primLen 11:   2340  entries =  2048 prim +  292 seco   1   1   1   1   1   0   0   0   0   0   0;  1 229  49   2 | 
|  | // primLen 12:   4380  entries =  4096 prim +  284 seco   1   1   1   1   1   1   0   0   0   0   0   0;  1 229  50 | 
|  | // primLen 13:   8472  entries =  8192 prim +  280 seco   1   1   1   1   1   1   0   1   0   0   0   0   0;105 174 | 
|  | // primLen 14:  16658  entries = 16384 prim +  274 seco   1   1   1   1   1   1   0   1   1   1   0   1   1   1;274 | 
|  | // primLen 15:  32768  entries = 32768 prim +    0 seco   2   0   0   0   0   0   0   0   0   0   0   0   0   0   0 | 
|  | // | 
|  | // -------- Distance (up to  30 symbols,   1666 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15 | 
|  | // primLen  3:   4120  entries =     8 prim + 4112 seco   1   0   0;  1   9   9   1   1   1   1   1   1   1   1   2 | 
|  | // primLen  4:   2080  entries =    16 prim + 2064 seco   1   1   0   0;  1   9   9   1   1   1   1   1   1   1   2 | 
|  | // primLen  5:   1072  entries =    32 prim + 1040 seco   1   1   1   0   0;  1   9   9   1   1   1   1   1   1   2 | 
|  | // primLen  6:    592  entries =    64 prim +  528 seco   1   1   1   1   0   0;  1   9   9   1   1   1   1   1   2 | 
|  | // primLen  7:    400  entries =   128 prim +  272 seco   1   1   1   1   1   0   0;  1   9   9   1   1   1   1   2 | 
|  | // primLen  8:    400  entries =   256 prim +  144 seco   1   1   1   1   1   1   0   0;  1   9   9   1   1   1   2 | 
|  | // primLen  9:    592  entries =   512 prim +   80 seco   1   1   1   1   1   1   1   0   0;  1   9   9   1   1   2 | 
|  | // primLen 10:   1072  entries =  1024 prim +   48 seco   1   1   1   1   1   1   1   1   0   0;  1   9   9   1   2 | 
|  | // primLen 11:   2080  entries =  2048 prim +   32 seco   1   1   1   1   1   1   1   1   1   0   0;  1   9   9   2 | 
|  | // primLen 12:   4120  entries =  4096 prim +   24 seco   1   1   1   1   1   1   1   1   1   1   0   0;  1   9  10 | 
|  | // primLen 13:   8212  entries =  8192 prim +   20 seco   1   1   1   1   1   1   1   1   1   1   0   1   0;  5  14 | 
|  | // primLen 14:  16400  entries = 16384 prim +   16 seco   1   1   1   1   1   1   1   1   1   1   1   0   0   0; 16 | 
|  | // primLen 15:  32768  entries = 32768 prim +    0 seco   2   0   0   0   0   0   0   0   0   0   0   0   0   0   0 | 
|  | // ---------------- | 
|  | // | 
|  | // Deflate's canonical Huffman codes are up to 15 bits long, but the decoder | 
|  | // can use fewer than (1 << 15) entries in its look-up table, by splitting it | 
|  | // into one primary table (of an at-compile-time fixed length <= 15) and zero | 
|  | // or more secondary tables (of variable lengths, determined at-run-time). | 
|  | // | 
|  | // Symbols whose Huffman codes' length are less than or equal to the primary | 
|  | // table length do not require any secondary tables. Longer codes are grouped | 
|  | // by their primLen length prefix. Each group occupies one entry in the primary | 
|  | // table, which redirects to a secondary table of size (1 << (N - primLen)), | 
|  | // where N is the length of the longest code in that group. | 
|  | // | 
|  | // This program calculates that, for a primary table length of 9 bits, the | 
|  | // worst case Huffman table size is 852 for the Literal/Length table (852 being | 
|  | // a 512 entry primary table and the secondary tables totalling 340 further | 
|  | // entries) and 592 (512 + 80) for the Distance table. | 
|  | // | 
|  | // Copy/pasting one row of that output (and the column headers): | 
|  | // | 
|  | // --------  Lit/Len (up to 286 symbols, 149142 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15 | 
|  | // primLen  9:    852  entries =   512 prim +  340 seco   1   1   1   0   0   0   0   0   0;  1 229  49   1   1   2 | 
|  | // | 
|  | // The "@1, @2, @3, ..." columns mean that one combination that hits that 852 | 
|  | // worst case is having 1 1-length code, 1 2-length code, 1 3-length code, 0 | 
|  | // 4-length codes, ..., 1 10-length code, 229 11-length codes, etc. There are a | 
|  | // total of 286 (1 + 1 + 1 + 0 + ... + 49 + 1 + 1 + 2) codes, for 286 symbols. | 
|  | // | 
|  | // See test/data/artificial/deflate-huffman-primlen-9.deflate* for an actual | 
|  | // example of valid Deflate-formatted data that exercises these worst-case code | 
|  | // lengths (for a primary table length of 9 bits). | 
|  | // | 
|  | // Hypothetically, suppose that we used a primLen of 13 for the Distance table. | 
|  | // Here is a code length assignment that produces 20 secondary table entries: | 
|  | // | 
|  | // -------- Distance (up to  30 symbols,   1666 combos)  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15 | 
|  | // primLen 13:   8212  entries =  8192 prim +   20 seco   1   1   1   1   1   1   1   1   1   1   0   1   0;  5  14 | 
|  | // | 
|  | // In detail: call the 14 length codes "e0", "e1", ..., "e4" and the 15 length | 
|  | // codes "f00", "f01", "f02", ..., "f13". | 
|  | // | 
|  | // Recall that secondary tables' lengths are (1 << (N - primLen)), where N is | 
|  | // the longest code for a given primLen length prefix. In this case, (N - | 
|  | // primLen) is (15 - 13) if the secondary table contains a "f??" code, | 
|  | // otherwise it N is (14 - 13). When the secondary table contains a mixture of | 
|  | // lengths, some of the shorter codes' entries are duplicated. | 
|  | // | 
|  | // Also, for canonical Huffman codes, the codes (bitstrings) for 14-length | 
|  | // codes are all assigned (sequentially) before any 15-length codes. There are | 
|  | // therefore 6 secondary tables: | 
|  | // | 
|  | //  - length 2: e0,  e1. | 
|  | //  - length 2: e2,  e3. | 
|  | //  - length 4: e4,  e4,  f00, f01. | 
|  | //  - length 4: f02, f03, f04, f05. | 
|  | //  - length 4: f06, f07, f08, f09. | 
|  | //  - length 4: f10, f11, f12, f13. | 
|  | // | 
|  | // The total size of the secondary tables is (2 + 2 + 4 + 4 + 4 + 4) = 20. | 
|  |  | 
|  | import ( | 
|  | "fmt" | 
|  | "os" | 
|  | "sort" | 
|  | ) | 
|  |  | 
|  | const ( | 
|  | maxMaxNSyms = 286 // RFC 1951 lists 286 literal/length codes and 30 distance codes. | 
|  | maxCodeLen  = 15  // Each code's encoding is at most 15 bits. | 
|  | ) | 
|  |  | 
|  | func main() { | 
|  | if err := main1(); err != nil { | 
|  | os.Stderr.WriteString(err.Error() + "\n") | 
|  | os.Exit(1) | 
|  | } | 
|  | } | 
|  |  | 
|  | func main1() error { | 
|  | do("Lit/Len", 286) | 
|  | do("Distance", 30) | 
|  | return nil | 
|  | } | 
|  |  | 
|  | func do(name string, maxNSyms uint32) { | 
|  | ftab := &feasibleTable{} | 
|  | ftab.build(maxNSyms) | 
|  |  | 
|  | // We can do the per-primLen computations concurrently, speeding up the | 
|  | // overall wall time taken. | 
|  | ch := make(chan string) | 
|  | numPending := 0 | 
|  | for primLen := uint32(3); primLen <= maxCodeLen; primLen++ { | 
|  | go doPrimLen(ch, maxNSyms, ftab, primLen) | 
|  | numPending++ | 
|  | } | 
|  |  | 
|  | // Gather, sort and print the results for each primLen. | 
|  | results := make([]string, 0, numPending) | 
|  | for ; numPending > 0; numPending-- { | 
|  | results = append(results, <-ch) | 
|  | } | 
|  | sort.Strings(results) | 
|  | fmt.Printf("\n-------- %8s (up to %3d symbols, %6d combos)"+ | 
|  | "  @1  @2  @3  @4  @5  @6  @7  @8  @9 @10 @11 @12 @13 @14 @15\n", | 
|  | name, maxNSyms, ftab.count()) | 
|  | for _, s := range results { | 
|  | fmt.Println(s) | 
|  | } | 
|  | } | 
|  |  | 
|  | type state struct { | 
|  | ftab    *feasibleTable | 
|  | primLen uint32 | 
|  |  | 
|  | currCase struct { | 
|  | nAssignedCodes nAssignedCodes | 
|  | } | 
|  |  | 
|  | worstCase struct { | 
|  | nAssignedCodes nAssignedCodes | 
|  | nSecondary     uint32 | 
|  | } | 
|  |  | 
|  | visited [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bitVector | 
|  | } | 
|  |  | 
|  | func doPrimLen(ch chan<- string, maxNSyms uint32, ftab *feasibleTable, primLen uint32) { | 
|  | z := &state{ | 
|  | ftab:    ftab, | 
|  | primLen: primLen, | 
|  | } | 
|  |  | 
|  | if z.primLen > maxCodeLen { | 
|  | panic("unreachable") | 
|  |  | 
|  | } else if z.primLen == maxCodeLen { | 
|  | // There's only the primary table and no secondary tables. For an | 
|  | // example that fills that primary table, create a trivial encoding of | 
|  | // two symbols: one symbol's code is "0", the other's is "1". | 
|  | z.worstCase.nAssignedCodes[1] = 2 | 
|  | for n := uint32(2); n <= maxCodeLen; n++ { | 
|  | z.worstCase.nAssignedCodes[n] = 0 | 
|  | } | 
|  |  | 
|  | } else { | 
|  | // Brute force search the tree of possible code length assignments. | 
|  | // | 
|  | // nCodes is always even: the number of unassigned Huffman codes at | 
|  | // length L is twice the number at length (L-1). At the start, there | 
|  | // are two unassigned codes of length 1: "0" and "1". | 
|  | // | 
|  | // nSyms starts at nCodes, since (nCodes > nSyms) is infeasible: there | 
|  | // will be unassigned codes. | 
|  | for nCodes := uint32(2); nCodes <= maxNSyms; nCodes += 2 { | 
|  | for nSyms := nCodes; nSyms <= maxNSyms; nSyms++ { | 
|  | if z.ftab[z.primLen+1][nCodes][nSyms] { | 
|  | z.visit(z.primLen+1, nCodes, nSyms, 0, 0) | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | z.worstCase.nAssignedCodes.fillPrimaryValues(z.primLen) | 
|  | } | 
|  |  | 
|  | // Collect the details: the number of codes assigned to each length in the | 
|  | // worst case. | 
|  | details := "" | 
|  | for n := uint32(1); n <= maxCodeLen; n++ { | 
|  | if n == z.primLen+1 { | 
|  | details += ";" | 
|  | } else { | 
|  | details += " " | 
|  | } | 
|  | details += fmt.Sprintf("%3d", z.worstCase.nAssignedCodes[n]) | 
|  | } | 
|  |  | 
|  | nPrimary := uint32(1) << z.primLen | 
|  | nSecondary := z.worstCase.nSecondary | 
|  | nEntries := nPrimary + nSecondary | 
|  |  | 
|  | ch <- fmt.Sprintf("primLen %2d:  %5d  entries = %5d prim + %4d seco%s", | 
|  | z.primLen, nEntries, nPrimary, nSecondary, details) | 
|  | } | 
|  |  | 
|  | // visit updates z.worstCase, starting with nCodes unassigned Huffman codes of | 
|  | // length codeLen for nSyms symbols. nSecondary is the total size (number of | 
|  | // entries) so far of all of the secondary tables. | 
|  | // | 
|  | // nRemain > 0 iff we have an incomplete secondary table, whose size was | 
|  | // partially accounted for in nSecondary, by (1 << (codeLen - z.primLen)). If | 
|  | // positive, nRemain is the number of unassigned entries in that incomplete | 
|  | // secondary table. | 
|  | // | 
|  | // visit can recursively call itself with longer codeLen values. As it does so, | 
|  | // it temporarily sets z.currCase.nAssignedCodes[codeLen], restoring that to | 
|  | // zero before it returns. | 
|  | func (z *state) visit(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) { | 
|  | if z.alreadyVisited(codeLen, nCodes, nSyms, nSecondary, nRemain) { | 
|  | // No-op. | 
|  |  | 
|  | } else if nCodes > nSyms { | 
|  | // Infeasible: there will be unassigned codes. | 
|  |  | 
|  | } else if nCodes == nSyms { | 
|  | // Fill in the incomplete secondary table (if present) and any new | 
|  | // secondary tables (if necessary) to assign the nCodes codes to the | 
|  | // nSyms symbols. At the end, we should have no remaining entries. | 
|  | n := nCodes | 
|  | for n > nRemain { | 
|  | n -= nRemain | 
|  | nRemain = 1 << (codeLen - z.primLen) | 
|  | nSecondary += nRemain | 
|  | } | 
|  | if n != nRemain { | 
|  | panic("inconsistent secondary table size") | 
|  | } | 
|  |  | 
|  | // Update z.worstCase if we have a new record for nSecondary. | 
|  | if z.worstCase.nSecondary < nSecondary { | 
|  | z.worstCase.nSecondary = nSecondary | 
|  | z.worstCase.nAssignedCodes = z.currCase.nAssignedCodes | 
|  | z.worstCase.nAssignedCodes[codeLen] = nCodes | 
|  | } | 
|  |  | 
|  | } else { // nCodes < nSyms. | 
|  | codeLen1 := codeLen + 1 | 
|  | // Brute force assigning n codes of this codeLen. The other (nCodes - | 
|  | // n) codes will have longer codes. | 
|  | for n := uint32(0); n < nCodes; n++ { | 
|  | nCodes1 := (nCodes - n) * 2 | 
|  | nSyms1 := nSyms - n | 
|  |  | 
|  | if nCodes1 > nSyms1 { | 
|  | // Infeasible: there will be unassigned codes. | 
|  |  | 
|  | } else if z.ftab[codeLen1][nCodes1][nSyms1] { | 
|  | nSecondary1 := nSecondary | 
|  | nRemain1 := nRemain * 2 | 
|  |  | 
|  | if nRemain1 > 0 { | 
|  | // We have an incomplete secondary table, which has been | 
|  | // partially accounted for: nSecondary has previously been | 
|  | // incremented by X, where X equals (1 << (codeLen - | 
|  | // z.primLen)). | 
|  | // | 
|  | // As we recursively call visit, with codeLen increased by | 
|  | // 1, then we need to double the accounted size of that | 
|  | // secondary table to 2*X. Since X out of that 2*X has | 
|  | // already been added, we need to increment nSecondary1 by | 
|  | // just the difference, which is also X. | 
|  | nSecondary1 += 1 << (codeLen - z.primLen) | 
|  | } | 
|  |  | 
|  | z.currCase.nAssignedCodes[codeLen] = n | 
|  | z.visit(codeLen1, nCodes1, nSyms1, nSecondary1, nRemain1) | 
|  | z.currCase.nAssignedCodes[codeLen] = 0 | 
|  | } | 
|  |  | 
|  | // Open a new secondary table if necessary: one whose size is (1 << | 
|  | // (codeLen - z.primLen)). | 
|  | if nRemain == 0 { | 
|  | nRemain = 1 << (codeLen - z.primLen) | 
|  | nSecondary += nRemain | 
|  | } | 
|  |  | 
|  | // Assign one of the incomplete secondary table's entries. | 
|  | nRemain-- | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // alreadyVisited returns whether z has already visited the (nSecondary, | 
|  | // nRemain) pair. As a side effect, it also marks that pair as visited. | 
|  | func (z *state) alreadyVisited(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) bool { | 
|  | // Use the zig re-numbering to minimize the memory requirements for the | 
|  | // visited slice. There's an additional 8-fold memory reduction by using 1 | 
|  | // bit per element, not 1 bool (1 byte) per element. | 
|  | i := zig(nSecondary/8, nRemain) | 
|  | mask := uint8(1) << (nSecondary & 7) | 
|  |  | 
|  | visited := z.visited[codeLen][nCodes][nSyms] | 
|  | if uint64(i) < uint64(len(visited)) { | 
|  | x := visited[i] | 
|  | visited[i] = x | mask | 
|  | return x&mask != 0 | 
|  | } | 
|  |  | 
|  | oldDone := visited | 
|  | visited = make(bitVector, i+1) | 
|  | copy(visited, oldDone) | 
|  | visited[i] = mask | 
|  |  | 
|  | z.visited[codeLen][nCodes][nSyms] = visited | 
|  | return false | 
|  | } | 
|  |  | 
|  | type bitVector []uint8 | 
|  |  | 
|  | // zig packs a pair of non-negative integers (i, j) into a single unique | 
|  | // non-negative integer, in a zig-zagging pattern: | 
|  | // | 
|  | //      i=0 i=1 i=2 i=3 i=4 etc | 
|  | // j=0:   0   1   3   6  10 ... | 
|  | // j=1:   2   4   7  11  16 ... | 
|  | // j=2:   5   8  12  17  23 ... | 
|  | // j=3:   9  13  18  24  31 ... | 
|  | // j=4:  14  19  25  32  40 ... | 
|  | // etc: ... ... ... ... ... ... | 
|  | // | 
|  | // The closed from expression relies on the fact that the sum (0 + 1 + ... + n) | 
|  | // equals ((n * (n + 1)) / 2). | 
|  | // | 
|  | // This lets us minimize the memory requirements of a triangular array: one for | 
|  | // which a[i][j] is only accessed when ((i+j) < someBound). | 
|  | func zig(i uint32, j uint32) uint32 { | 
|  | if (i > 0x4000) || (j > 0x4000) { | 
|  | panic("overflow") | 
|  | } | 
|  | n := i + j | 
|  | return ((n * (n + 1)) / 2) + j | 
|  | } | 
|  |  | 
|  | // nAssignedCodes[i] holds the number of codes of length i. | 
|  | type nAssignedCodes [maxCodeLen + 1]uint32 | 
|  |  | 
|  | // fillPrimaryValues fills in feasible a[n] values for n <= primLen. It assumes | 
|  | // that the caller has already filled in a[n] for n > primLen. | 
|  | func (a *nAssignedCodes) fillPrimaryValues(primLen uint32) { | 
|  | // Looping backwards from the maxCodeLen, figure out how many unassigned | 
|  | // primLen length codes there were. | 
|  | // | 
|  | // For example, if there were 10 assigned 15 length codes, then there must | 
|  | // have been (10 / 2 = 5) unassigned 14 length codes. If there were also 7 | 
|  | // 14 length codes, then there must have been ((5 + 7) / 2 = 6) unassigned | 
|  | // 13 length codes. Etcetera. | 
|  | nUnassignedPrimLen := uint32(0) | 
|  | for n := uint32(maxCodeLen); n > primLen; n-- { | 
|  | nUnassignedPrimLen += a[n] | 
|  | if nUnassignedPrimLen%2 != 0 { | 
|  | panic("cannot undo Huffman code split") | 
|  | } | 
|  | nUnassignedPrimLen /= 2 | 
|  | } | 
|  | if nUnassignedPrimLen < 1 { | 
|  | panic("not enough unassigned primLen length codes") | 
|  | } | 
|  |  | 
|  | // Pick a combination of assigning shorter length codes (i.e. setting a[n] | 
|  | // values) to reach that nUnassignedPrimLen target. There are many possible | 
|  | // ways to do so, but only one unique way with the additional constraint | 
|  | // that each value is either 0 or 1: unique because every positive number | 
|  | // has a unique binary representation. | 
|  | // | 
|  | // For example, suppose we need to have 12 unassigned primLen length codes. | 
|  | // With that additional constraint, the way to do this is: | 
|  | // | 
|  | //  - 12 unassigned code of length primLen-0. | 
|  | //  -  6 unassigned code of length primLen-1. | 
|  | //  -  3 unassigned code of length primLen-2. | 
|  | //  -  2 unassigned code of length primLen-3. | 
|  | //  -  1 unassigned code of length primLen-4. | 
|  | //  -  1 unassigned code of length primLen-5. | 
|  | //  -  1 unassigned code of length primLen-6. | 
|  | //  -  etc | 
|  | // | 
|  | // Each left hand column value is either ((2*b)-0) or ((2*b)-1), where b is | 
|  | // the the number below. Subtracting 0 or 1 means that we assign 0 or 1 | 
|  | // codes of that row's length: | 
|  | // | 
|  | //  - 0 assigned code of length primLen-0. | 
|  | //  - 0 assigned code of length primLen-1. | 
|  | //  - 1 assigned code of length primLen-2. | 
|  | //  - 0 assigned code of length primLen-3. | 
|  | //  - 1 assigned code of length primLen-4. | 
|  | //  - 1 assigned code of length primLen-5. | 
|  | //  - 1 assigned code of length primLen-6. | 
|  | //  - etc | 
|  | // | 
|  | // Reading upwards, this is the binary string "...1110100", which is the | 
|  | // two's complement representation of the decimal number -12: the negative | 
|  | // of nUnassignedPrimLen. | 
|  | bits := -int32(nUnassignedPrimLen) | 
|  | for n := primLen; n >= 1; n-- { | 
|  | a[n] = uint32(bits & 1) | 
|  | bits >>= 1 | 
|  | } | 
|  | if bits != -1 { | 
|  | panic("did not finish with one unassigned zero-length code") | 
|  | } | 
|  |  | 
|  | // As a sanity check of the "negative two's complement" theory, loop | 
|  | // forwards to calculate the number of unassigned primLen length codes, | 
|  | // which should match nUnassignedPrimLen. | 
|  | nUnassigned := uint32(1) | 
|  | for n := uint32(1); n <= primLen; n++ { | 
|  | nUnassigned *= 2 | 
|  | if nUnassigned <= a[n] { | 
|  | panic("not enough unassigned codes") | 
|  | } | 
|  | nUnassigned -= a[n] | 
|  | } | 
|  | if nUnassigned != nUnassignedPrimLen { | 
|  | panic("inconsistent unassigned codes") | 
|  | } | 
|  | } | 
|  |  | 
|  | // feasibleTable[codeLen][nCodes][nSyms] is whether it is feasible to assign | 
|  | // Huffman codes (of length at least codeLen) to nSyms symbols, given nCodes | 
|  | // unassigned codes of length codeLen. Each unassigned code of length L can be | 
|  | // split into 2 codes of length L+1, 4 codes of length L+2, etc, up to a | 
|  | // maximum code length of maxCodeLen. A feasible assignment ends with zero | 
|  | // unassigned codes, no more and no less. | 
|  | type feasibleTable [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bool | 
|  |  | 
|  | func (f *feasibleTable) count() (ret uint64) { | 
|  | for i := range *f { | 
|  | for j := range f[i] { | 
|  | for k := range f[i][j] { | 
|  | if f[i][j][k] { | 
|  | ret++ | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | return ret | 
|  | } | 
|  |  | 
|  | func (f *feasibleTable) build(maxNSyms uint32) { | 
|  | for nSyms := uint32(2); nSyms <= maxNSyms; nSyms++ { | 
|  | if f.buildMore(1, 2, nSyms) != bmResultOK { | 
|  | panic("infeasible [codeLen][nCodes][nSyms] combination") | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | const ( | 
|  | bmResultOK            = 0 | 
|  | bmResultUseFewerCodes = 1 | 
|  | bmResultUseMoreCodes  = 2 | 
|  | bmResultUnsatisfiable = 3 | 
|  | ) | 
|  |  | 
|  | func (f *feasibleTable) buildMore(codeLen uint32, nCodes uint32, nSyms uint32) uint32 { | 
|  | if nCodes == nSyms { | 
|  | if nCodes%2 == 1 { | 
|  | panic("odd nCodes declared feasible") | 
|  | } | 
|  | // This is trivial: assign every symbol a code of length codeLen. | 
|  | f[codeLen][nCodes][nSyms] = true | 
|  | return bmResultOK | 
|  | } else if nCodes > nSyms { | 
|  | // Infeasible: there will be unassigned codes. | 
|  | return bmResultUseMoreCodes | 
|  | } | 
|  | // From here onwards, we have (nCodes < nSyms). | 
|  |  | 
|  | if (nCodes << (maxCodeLen - codeLen)) < nSyms { | 
|  | // Infeasible: there will be unassigned symbols, even if we extend the | 
|  | // codes out to maxCodeLen. | 
|  | return bmResultUseFewerCodes | 
|  | } | 
|  |  | 
|  | // If we've already visited this [codeLen][nCodes][nSyms] combination, | 
|  | // there's no need to re-do the computation. | 
|  | if f[codeLen][nCodes][nSyms] { | 
|  | return bmResultOK | 
|  | } | 
|  |  | 
|  | // Try assigning n out of the nCodes codes 1-to-1 to a symbol, remembering | 
|  | // that we have checked above that (nCodes < nSyms). The remaining (nCodes | 
|  | // - n) codes are lengthened by 1, doubling the number of them, and try to | 
|  | // assign those longer codes to the remaining (nSyms - n) symbols. | 
|  | ok := false | 
|  | codeLen1 := codeLen + 1 | 
|  | for n := uint32(0); n < nCodes; n++ { | 
|  | nCodes1 := (nCodes - n) * 2 | 
|  | nSyms1 := nSyms - n | 
|  |  | 
|  | if x := f.buildMore(codeLen1, nCodes1, nSyms1); x == bmResultOK { | 
|  | ok = true | 
|  | } else if x == bmResultUseFewerCodes { | 
|  | // This value of n didn't succeed, but also any larger n also won't | 
|  | // succeed, so we can break the loop early. | 
|  | break | 
|  | } | 
|  | } | 
|  |  | 
|  | if !ok { | 
|  | return bmResultUnsatisfiable | 
|  | } | 
|  | if nCodes%2 == 1 { | 
|  | panic("odd nCodes declared feasible") | 
|  | } | 
|  | f[codeLen][nCodes][nSyms] = true | 
|  | return bmResultOK | 
|  | } |