blob: 4816bc42b7619cb887b9dea926956ad593b28f5f [file] [log] [blame]
// Copyright 2019 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package raczlib
import (
"bytes"
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"strings"
"testing"
"github.com/google/wuffs/lib/rac"
)
// These example RAC files come from "../rac/example_test.go".
//
// They are also presented in the RAC specification.
const (
decodedMore = "" +
"More!\n"
decodedSheep = "" +
"One sheep.\n" +
"Two sheep.\n" +
"Three sheep.\n"
encodedMore = "" +
"\x72\xC3\x63\x00\x78\x9C\x01\x06\x00\xF9\xFF\x4D\x6F\x72\x65\x21" +
"\x0A\x07\x42\x01\xBF\x72\xC3\x63\x01\x65\xA9\x00\xFF\x06\x00\x00" +
"\x00\x00\x00\x00\x01\x04\x00\x00\x00\x00\x00\x01\xFF\x35\x00\x00" +
"\x00\x00\x00\x01\x01"
encodedSheep = "" +
"\x72\xC3\x63\x04\x37\x39\x00\xFF\x00\x00\x00\x00\x00\x00\x00\xFF" +
"\x0B\x00\x00\x00\x00\x00\x00\xFF\x16\x00\x00\x00\x00\x00\x00\xFF" +
"\x23\x00\x00\x00\x00\x00\x00\x01\x50\x00\x00\x00\x00\x00\x01\xFF" +
"\x60\x00\x00\x00\x00\x00\x01\x00\x75\x00\x00\x00\x00\x00\x01\x00" +
"\x8A\x00\x00\x00\x00\x00\x01\x00\xA1\x00\x00\x00\x00\x00\x01\x04" +
"\x08\x00\x00\x00\x20\x73\x68\x65\x65\x70\x2E\x0A\xD0\x8D\x7A\x47" +
"\x78\xF9\x0B\xE0\x02\x6E\xF2\xCF\x4B\x85\x31\x01\x01\x00\x00\xFF" +
"\xFF\x17\x21\x03\x90\x78\xF9\x0B\xE0\x02\x6E\x0A\x29\xCF\x87\x31" +
"\x01\x01\x00\x00\xFF\xFF\x18\x0C\x03\xA8\x78\xF9\x0B\xE0\x02\x6E" +
"\x0A\xC9\x28\x4A\x4D\x85\x71\x00\x01\x00\x00\xFF\xFF\x21\x6E\x04" +
"\x66"
)
func racCompress(original []byte, cChunkSize uint64, dChunkSize uint64, resourcesData [][]byte) ([]byte, error) {
buf := &bytes.Buffer{}
w := &rac.Writer{
Writer: buf,
CodecWriter: &CodecWriter{},
CChunkSize: cChunkSize,
DChunkSize: dChunkSize,
ResourcesData: resourcesData,
}
if _, err := w.Write(original); err != nil {
return nil, fmt.Errorf("Write: %v", err)
}
if err := w.Close(); err != nil {
return nil, fmt.Errorf("Close: %v", err)
}
return buf.Bytes(), nil
}
func racDecompress(compressed []byte, concurrency int) ([]byte, error) {
buf := &bytes.Buffer{}
r := &rac.Reader{
ReadSeeker: bytes.NewReader(compressed),
CompressedSize: int64(len(compressed)),
CodecReaders: []rac.CodecReader{&CodecReader{}},
Concurrency: concurrency,
}
defer r.Close()
if _, err := io.Copy(buf, r); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func testReader(tt *testing.T, decoded string, encoded string, concurrency int) {
g, err := racDecompress([]byte(encoded), concurrency)
if err != nil {
tt.Fatalf("racDecompress: %v", err)
}
if got, want := string(g), decoded; got != want {
tt.Fatalf("got:\n%s\nwant:\n%s", got, want)
}
}
func TestReaderSansDictionary(tt *testing.T) { testReader(tt, decodedMore, encodedMore, 0) }
func TestReaderWithDictionary(tt *testing.T) { testReader(tt, decodedSheep, encodedSheep, 0) }
func TestConcurrentReader(tt *testing.T) { testReader(tt, decodedSheep, encodedSheep, 2) }
func TestReaderConcatenation(tt *testing.T) {
// Create a RAC file whose decoding is the concatenation of two other RAC
// file's decoding. The resultant RAC file's contents (the encoded form) is
// the concatenation of the two RAC files, plus a new root node.
const rootNodeRelativeOffset0 = 0x00 // Sheep's root node is at its start.
const rootNodeRelativeOffset1 = 0x15 // More's root node is at its end.
decLen0 := uint64(len(decodedSheep))
decLen1 := uint64(len(decodedMore))
encLen0 := uint64(len(encodedSheep))
encLen1 := uint64(len(encodedMore))
// Define a buffer to hold a new root node with 3 children: 1 metadata node
// and 2 branch nodes. The metadata node (one whose DRange is empty) is
// required because one of the original RAC files' root node is not located
// at its start. Walking to that child branch node needs two COffset
// values: one for the embedded RAC file's start and one for the embedded
// RAC file's root node.
//
// Whether the metadata node is the first, middle or last child is
// arbitrary. We choose to make it the first child.
const arity = 3
buf := [16 * (arity + 1)]byte{}
// DPtr values.
binary.LittleEndian.PutUint64(buf[0x00:], 0)
binary.LittleEndian.PutUint64(buf[0x08:], 0)
binary.LittleEndian.PutUint64(buf[0x10:], 0+decLen0)
binary.LittleEndian.PutUint64(buf[0x18:], 0+decLen0+decLen1)
// CPtr values.
binary.LittleEndian.PutUint64(buf[0x20:], encLen0)
binary.LittleEndian.PutUint64(buf[0x28:], 0)
binary.LittleEndian.PutUint64(buf[0x30:], encLen0+rootNodeRelativeOffset1)
binary.LittleEndian.PutUint64(buf[0x38:], encLen0+encLen1+uint64(len(buf)))
// Magic and Arity.
buf[0x00] = 0x72
buf[0x01] = 0xC3
buf[0x02] = 0x63
buf[0x03] = arity
buf[0x3F] = arity
// TTag values.
buf[0x07] = 0xFF // Unused (metadata node).
buf[0x0F] = 0xFE // Branch node.
buf[0x17] = 0xFE // Branch node.
// CLen values.
buf[0x26] = 0x00 // Unused (metadata node).
buf[0x2E] = 0x04 // Branch node, which is always at most 4 KiB in size.
buf[0x36] = 0x04 // Branch node, which is always at most 4 KiB in size.
// STag values.
buf[0x27] = 0xFF // Unused (metadata node).
buf[0x2F] = 0x01 // CBiasing with COff[1], which is 0.
buf[0x37] = 0x00 // CBiasing with COff[0], which is encLen0.
// Codec and Version.
buf[0x1F] = byte(rac.CodecZlib >> 56)
buf[0x3E] = 0x01
// Checksum.
checksum := crc32.ChecksumIEEE(buf[6:])
checksum ^= checksum >> 16
buf[0x04] = byte(checksum >> 0)
buf[0x05] = byte(checksum >> 8)
// Test the concatenation.
testReader(tt,
decodedSheep+decodedMore,
encodedSheep+encodedMore+string(buf[:]),
0,
)
}
func TestZeroedBytes(tt *testing.T) {
original := make([]byte, 32)
original[0] = 'a'
original[1] = 'b'
original[2] = 'c'
original[20] = 'm'
original[31] = 'z'
for i := 0; i < 2; i++ {
cChunkSize, dChunkSize := uint64(0), uint64(0)
if i == 0 {
cChunkSize = 10
} else {
dChunkSize = 8
}
compressed, err := racCompress(original, cChunkSize, dChunkSize, nil)
if err != nil {
tt.Fatalf("i=%d: racCompress: %v", i, err)
}
r := &rac.Reader{
ReadSeeker: bytes.NewReader(compressed),
CompressedSize: int64(len(compressed)),
CodecReaders: []rac.CodecReader{&CodecReader{}},
}
defer r.Close()
for j := 0; j <= len(original); j++ {
want := original[j:]
got := make([]byte, len(want))
for j := range got {
got[j] = '?'
}
if _, err := r.Seek(int64(j), io.SeekStart); err != nil {
tt.Errorf("i=%d, j=%d: Seek: %v", i, j, err)
continue
}
if _, err := io.ReadFull(r, got); err != nil {
tt.Errorf("i=%d, j=%d: ReadFull: %v", i, j, err)
continue
}
if !bytes.Equal(got, want) {
tt.Errorf("i=%d, j=%d: got\n% 02x\nwant\n% 02x", i, j, got, want)
continue
}
}
}
}
func TestSharedDictionary(tt *testing.T) {
// Make some "dictionary" data that, as an independent chunk, does not
// compress very well.
const n = 256
dictionary := make([]byte, n)
for i := range dictionary {
dictionary[i] = uint8(i)
}
// Replicate it 32 times.
original := make([]byte, 0, 32*n)
for len(original) < 32*n {
original = append(original, dictionary...)
}
// Measure the RAC-compressed form of that replicated data, without and
// with a shared dictionary.
compressedLengths := [2]int{}
for i := range compressedLengths {
resourcesData := [][]byte{}
if i > 0 {
resourcesData = [][]byte{dictionary}
}
// Compress.
compressed, err := racCompress(original, 0, n, resourcesData)
if err != nil {
tt.Fatalf("i=%d: racCompress: %v", i, err)
}
if len(compressed) == 0 {
tt.Fatalf("i=%d: compressed form is empty", i)
}
compressedLengths[i] = len(compressed)
// Decompress.
decompressed, err := racDecompress(compressed, 0)
if err != nil {
tt.Fatalf("i=%d: racDecompress: %v", i, err)
}
if !bytes.Equal(decompressed, original) {
tt.Fatalf("i=%d: racDecompress: round trip did not match original", i)
}
}
// Using a shared dictionary should improve the compression ratio. The
// exact value depends on the Zlib compression algorithm, but we should
// expect at least a 4x improvement.
if ratio := compressedLengths[0] / compressedLengths[1]; ratio < 4 {
tt.Fatalf("ratio: got %dx, want at least 4x", ratio)
}
}
// rsSansReadAt wraps a strings.Reader to have only Read and Seek methods.
type rsSansReadAt struct {
r *strings.Reader
}
func (r *rsSansReadAt) Read(p []byte) (int, error) { return r.r.Read(p) }
func (r *rsSansReadAt) Seek(o int64, w int) (int64, error) { return r.r.Seek(o, w) }
// rsWithReadAt wraps a strings.Reader to have Read, Seek and ReadAt methods.
// Technically, it satisfies the io.ReadSeeker interface, but calling Read or
// Seek will panic.
type rsWithReadAt struct {
r *strings.Reader
}
func (r *rsWithReadAt) Read(p []byte) (int, error) { panic("unimplemented") }
func (r *rsWithReadAt) Seek(o int64, w int) (int64, error) { panic("unimplemented") }
func (r *rsWithReadAt) ReadAt(p []byte, o int64) (int, error) { return r.r.ReadAt(p, o) }
// testReadSeeker tests that decoding from rs works, regardless of whether rs
// implements the optional ReadAt method. If rs does implement io.ReaderAt then
// its Read and Seek methods should never be called.
func testReadSeeker(tt *testing.T, rs io.ReadSeeker) {
buf := &bytes.Buffer{}
r := &rac.Reader{
ReadSeeker: rs,
CompressedSize: int64(len(encodedSheep)),
CodecReaders: []rac.CodecReader{&CodecReader{}},
}
defer r.Close()
if _, err := io.Copy(buf, r); err != nil {
tt.Fatalf("io.Copy: %v", err)
}
if got, want := buf.String(), decodedSheep; got != want {
tt.Fatalf("got:\n%s\nwant:\n%s", got, want)
}
}
func TestReadSeekerSansReadAt(tt *testing.T) {
testReadSeeker(tt, &rsSansReadAt{strings.NewReader(encodedSheep)})
}
func TestReadSeekerWithReadAt(tt *testing.T) {
testReadSeeker(tt, &rsWithReadAt{strings.NewReader(encodedSheep)})
}