blob: c5bb8d66b3f80007faaa5b87905c4011e17a7823 [file] [log] [blame]
// Copyright 2019 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// SPDX-License-Identifier: Apache-2.0 OR MIT
package rac_test
import (
"bytes"
"compress/zlib"
"encoding/binary"
"encoding/hex"
"fmt"
"hash/adler32"
"hash/crc32"
"io"
"log"
"os"
"github.com/google/wuffs/lib/rac"
)
// Example_indexLocationAtEnd demonstrates using the low level "rac" package to
// encode a RAC+Zlib formatted file with IndexLocationAtEnd.
//
// The sibling "raczlib" package provides a higher level API that is easier to
// use.
//
// See the RAC specification for an explanation of the file format.
func Example_indexLocationAtEnd() {
// Manually construct a zlib encoding of "More!\n", one that uses a literal
// block (that's easy to see in a hex dump) instead of a more compressible
// Huffman block.
const src = "More!\n"
hasher := adler32.New()
hasher.Write([]byte(src))
enc := []byte{ // See RFCs 1950 and 1951 for details.
0x78, // Deflate compression method; 32KiB window size.
0x9C, // Default encoding algorithm; FCHECK bits.
0x01, // Literal block (final).
0x06, 0x00, // Literal length.
0xF9, 0xFF, // Inverse of the literal length.
}
enc = append(enc, src...) // Literal bytes.
enc = hasher.Sum(enc) // Adler-32 hash.
// Check that we've constructed a valid zlib-formatted encoding, by
// checking that decoding enc produces src.
{
b := &bytes.Buffer{}
r, err := zlib.NewReader(bytes.NewReader(enc))
if err != nil {
log.Fatalf("NewReader: %v", err)
}
if _, err := io.Copy(b, r); err != nil {
log.Fatalf("Copy: %v", err)
}
if got := b.String(); got != src {
log.Fatalf("zlib check: got %q, want %q", got, src)
}
}
buf := &bytes.Buffer{}
w := &rac.ChunkWriter{
Writer: buf,
}
if err := w.AddChunk(uint64(len(src)), rac.CodecZlib, enc, 0, 0); err != nil {
log.Fatalf("AddChunk: %v", err)
}
if err := w.Close(); err != nil {
log.Fatalf("Close: %v", err)
}
fmt.Printf("RAC file:\n%s", hex.Dump(buf.Bytes()))
// Output:
// RAC file:
// 00000000 72 c3 63 00 78 9c 01 06 00 f9 ff 4d 6f 72 65 21 |r.c.x......More!|
// 00000010 0a 07 42 01 bf 72 c3 63 01 65 a9 00 ff 06 00 00 |..B..r.c.e......|
// 00000020 00 00 00 00 01 04 00 00 00 00 00 01 ff 35 00 00 |.............5..|
// 00000030 00 00 00 01 01 |.....|
}
// Example_indexLocationAtStart demonstrates using the low level "rac" package
// to encode and then decode a RAC+Zlib formatted file with
// IndexLocationAtStart.
//
// The sibling "raczlib" package provides a higher level API that is easier to
// use.
//
// See the RAC specification for an explanation of the file format.
func Example_indexLocationAtStart() {
buf := &bytes.Buffer{}
w := &rac.ChunkWriter{
Writer: buf,
IndexLocation: rac.IndexLocationAtStart,
TempFile: &bytes.Buffer{},
}
dict := []byte(" sheep.\n")
if len(dict) >= (1 << 30) {
log.Fatal("len(dict) is too large")
}
encodedDict := []byte{
uint8(len(dict) >> 0),
uint8(len(dict) >> 8),
uint8(len(dict) >> 16),
uint8(len(dict) >> 24),
}
encodedDict = append(encodedDict, dict...)
checksum := crc32.ChecksumIEEE(dict)
encodedDict = append(encodedDict,
uint8(checksum>>0),
uint8(checksum>>8),
uint8(checksum>>16),
uint8(checksum>>24),
)
fmt.Printf("Encoded dictionary resource:\n%s\n", hex.Dump(encodedDict))
dictResource, err := w.AddResource(encodedDict)
if err != nil {
log.Fatalf("AddResource: %v", err)
}
chunks := []string{
"One sheep.\n",
"Two sheep.\n",
"Three sheep.\n",
}
for i, chunk := range chunks {
b := &bytes.Buffer{}
if z, err := zlib.NewWriterLevelDict(b, zlib.BestCompression, dict); err != nil {
log.Fatalf("NewWriterLevelDict: %v", err)
} else if _, err := z.Write([]byte(chunk)); err != nil {
log.Fatalf("Write: %v", err)
} else if err := z.Close(); err != nil {
log.Fatalf("Close: %v", err)
}
encodedChunk := b.Bytes()
if err := w.AddChunk(uint64(len(chunk)), rac.CodecZlib, encodedChunk, dictResource, 0); err != nil {
log.Fatalf("AddChunk: %v", err)
}
fmt.Printf("Encoded chunk #%d:\n%s\n", i, hex.Dump(encodedChunk))
}
if err := w.Close(); err != nil {
log.Fatalf("Close: %v", err)
}
encoded := buf.Bytes()
fmt.Printf("RAC file:\n%s\n", hex.Dump(encoded))
// Decode the encoded bytes (the RAC-formatted bytes) to recover the
// original "One sheep.\nTwo sheep\.Three sheep.\n" source.
fmt.Printf("Decoded:\n")
r := &rac.ChunkReader{
ReadSeeker: bytes.NewReader(encoded),
CompressedSize: int64(len(encoded)),
}
zr := io.ReadCloser(nil)
for {
chunk, err := r.NextChunk()
if err == io.EOF {
break
} else if err != nil {
log.Fatalf("NextChunk: %v", err)
}
if chunk.Codec != rac.CodecZlib {
log.Fatalf("unexpected chunk codec")
}
fmt.Printf("[%2d, %2d): ", chunk.DRange[0], chunk.DRange[1])
// Parse the RAC+Zlib secondary data. For details, see
// https://github.com/google/wuffs/blob/main/doc/spec/rac-spec.md#rac--zlib
dict := []byte(nil)
if secondary := encoded[chunk.CSecondary[0]:chunk.CSecondary[1]]; len(secondary) > 0 {
if len(secondary) < 8 {
log.Fatalf("invalid secondary data")
}
dictLen := int(binary.LittleEndian.Uint32(secondary))
secondary = secondary[4:]
if (dictLen >= (1 << 30)) || ((dictLen + 4) > len(secondary)) {
log.Fatalf("invalid secondary data")
}
checksum := binary.LittleEndian.Uint32(secondary[dictLen:])
dict = secondary[:dictLen]
if checksum != crc32.ChecksumIEEE(dict) {
log.Fatalf("invalid checksum")
}
}
// Decompress the Zlib-encoded primary payload.
primary := encoded[chunk.CPrimary[0]:chunk.CPrimary[1]]
if zr == nil {
if zr, err = zlib.NewReaderDict(bytes.NewReader(primary), dict); err != nil {
log.Fatalf("zlib.NewReader: %v", err)
}
} else if err := zr.(zlib.Resetter).Reset(bytes.NewReader(primary), dict); err != nil {
log.Fatalf("zlib.Reader.Reset: %v", err)
}
if n, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatalf("io.Copy: %v", err)
} else if n != chunk.DRange.Size() {
log.Fatalf("inconsistent DRange size")
}
if err := zr.Close(); err != nil {
log.Fatalf("zlib.Reader.Close: %v", err)
}
}
// Note that these exact bytes depends on the zlib encoder's algorithm, but
// there is more than one valid zlib encoding of any given input. This
// "compare to golden output" test is admittedly brittle, as the standard
// library's zlib package's output isn't necessarily stable across Go
// releases.
// Output:
// Encoded dictionary resource:
// 00000000 08 00 00 00 20 73 68 65 65 70 2e 0a d0 8d 7a 47 |.... sheep....zG|
//
// Encoded chunk #0:
// 00000000 78 f9 0b e0 02 6e f2 cf 4b 85 31 01 01 00 00 ff |x....n..K.1.....|
// 00000010 ff 17 21 03 90 |..!..|
//
// Encoded chunk #1:
// 00000000 78 f9 0b e0 02 6e 0a 29 cf 87 31 01 01 00 00 ff |x....n.)..1.....|
// 00000010 ff 18 0c 03 a8 |.....|
//
// Encoded chunk #2:
// 00000000 78 f9 0b e0 02 6e 0a c9 28 4a 4d 85 71 00 01 00 |x....n..(JM.q...|
// 00000010 00 ff ff 21 6e 04 66 |...!n.f|
//
// RAC file:
// 00000000 72 c3 63 04 37 39 00 ff 00 00 00 00 00 00 00 ff |r.c.79..........|
// 00000010 0b 00 00 00 00 00 00 ff 16 00 00 00 00 00 00 ff |................|
// 00000020 23 00 00 00 00 00 00 01 50 00 00 00 00 00 01 ff |#.......P.......|
// 00000030 60 00 00 00 00 00 01 00 75 00 00 00 00 00 01 00 |`.......u.......|
// 00000040 8a 00 00 00 00 00 01 00 a1 00 00 00 00 00 01 04 |................|
// 00000050 08 00 00 00 20 73 68 65 65 70 2e 0a d0 8d 7a 47 |.... sheep....zG|
// 00000060 78 f9 0b e0 02 6e f2 cf 4b 85 31 01 01 00 00 ff |x....n..K.1.....|
// 00000070 ff 17 21 03 90 78 f9 0b e0 02 6e 0a 29 cf 87 31 |..!..x....n.)..1|
// 00000080 01 01 00 00 ff ff 18 0c 03 a8 78 f9 0b e0 02 6e |..........x....n|
// 00000090 0a c9 28 4a 4d 85 71 00 01 00 00 ff ff 21 6e 04 |..(JM.q......!n.|
// 000000a0 66 |f|
//
// Decoded:
// [ 0, 11): One sheep.
// [11, 22): Two sheep.
// [22, 35): Three sheep.
}