lib/raczlib/reader.go - external/github.com/google/wuffs - Git at Google

 // Copyright 2019 The Wuffs Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package raczlib

 import (
 	"compress/zlib"
 	"errors"
 	"hash/adler32"
 	"io"

 	"github.com/google/wuffs/lib/rac"
 )

 var (
 	errInvalidCodec      = errors.New("raczlib: invalid Codec (expected rac.CodecZlib)")
 	errInvalidReadSeeker = errors.New("raczlib: invalid ReadSeeker")
 	errInvalidDictionary = errors.New("raczlib: invalid dictionary")
 )

 func u32BE(b []byte) uint32 {
 	_ = b[3] // Early bounds check to guarantee safety of reads below.
 	return (uint32(b[0]) << 24) | (uint32(b[1]) << 16) | (uint32(b[2]) << 8) | (uint32(b[3]))
 }

 // readAt calls ReadAt if it is available, otherwise it falls back to calling
 // Seek and then ReadFull. Calling ReadAt is presumably slightly more
 // efficient, e.g. one syscall instead of two.
 func readAt(r io.ReadSeeker, p []byte, offset int64) error {
 	if a, ok := r.(io.ReaderAt); ok && false {
 		n, err := a.ReadAt(p, offset)
 		if (n == len(p)) && (err == io.EOF) {
 			err = nil
 		}
 		return err
 	}
 	if _, err := r.Seek(offset, io.SeekStart); err != nil {
 		return err
 	}
 	_, err := io.ReadFull(r, p)
 	return err
 }

 // Reader reads a RAC file.
 //
 // Do not modify its exported fields after calling any of its methods.
 type Reader struct {
 	// ReadSeeker is where the RAC-encoded data is read from.
 	//
 	// It may also implement io.ReaderAt, in which case its ReadAt method will
 	// be preferred over combining Read and Seek, as the former is presumably
 	// more efficient. This is optional: io.ReaderAt is a stronger contract
 	// than io.ReadSeeker, as multiple concurrent ReadAt calls must not
 	// interfere with each other.
 	//
 	// For example, this type itself only implements io.ReadSeeker, not
 	// io.ReaderAt, as it is not safe for concurrent use.
 	//
 	// Nil is an invalid value.
 	ReadSeeker io.ReadSeeker

 	// CompressedSize is the size of the RAC file.
 	//
 	// Zero is an invalid value, as an empty file is not a valid RAC file.
 	CompressedSize int64

 	// err is the first error encountered. It is sticky: once a non-nil error
 	// occurs, all public methods will return that error.
 	err error

 	// reader is the low-level (Codec-agnostic) RAC reader.
 	racReader rac.Reader

 	// buf is a scratch buffer.
 	buf [2]byte

 	// cachedZlibReader lets us re-use the memory allocated for a zlib reader,
 	// when decompressing multiple chunks.
 	cachedZlibReader zlib.Resetter

 	// These fields contain the most recently used shared dictionary.
 	cachedDictionary       []byte
 	cachedDictionaryCRange rac.Range
 }

 func (r *Reader) initialize() error {
 	if r.err != nil {
 		return r.err
 	}
 	if r.racReader.ReadSeeker != nil {
 		// We're already initialized.
 		return nil
 	}
 	if r.ReadSeeker == nil {
 		r.err = errInvalidReadSeeker
 		return r.err
 	}

 	r.racReader.ReadSeeker = r.ReadSeeker
 	r.racReader.CompressedSize = r.CompressedSize
 	r.racReader.MakeDecompressor = r.makeDecompressor
 	r.racReader.MakeReaderContext = r.makeReaderContext
 	return nil
 }

 // Read implements io.Reader.
 func (r *Reader) Read(p []byte) (int, error) {
 	if err := r.initialize(); err != nil {
 		return 0, err
 	}
 	n, err := r.racReader.Read(p)
 	if (err != nil) && (err != io.EOF) {
 		r.err = err
 	}
 	return n, err
 }

 // Seek implements io.Seeker.
 func (r *Reader) Seek(offset int64, whence int) (int64, error) {
 	if err := r.initialize(); err != nil {
 		return 0, err
 	}
 	n, err := r.racReader.Seek(offset, whence)
 	if (err != nil) && (err != io.EOF) {
 		r.err = err
 	}
 	return n, err
 }

 func (r *Reader) makeDecompressor(compressed io.Reader, rctx rac.ReaderContext) (io.Reader, error) {
 	if r.cachedZlibReader != nil {
 		if err := r.cachedZlibReader.Reset(compressed, rctx.Secondary); err != nil {
 			if err == io.EOF {
 				err = io.ErrUnexpectedEOF
 			}
 			r.err = err
 			return nil, r.err
 		}
 		return r.cachedZlibReader.(io.Reader), nil
 	}

 	zlibReader, err := zlib.NewReaderDict(compressed, rctx.Secondary)
 	if err != nil {
 		if err == io.EOF {
 			err = io.ErrUnexpectedEOF
 		}
 		r.err = err
 		return nil, r.err
 	}
 	r.cachedZlibReader = zlibReader.(zlib.Resetter)
 	return zlibReader, nil
 }

 func (r *Reader) makeReaderContext(chunk rac.Chunk) (rac.ReaderContext, error) {
 	// For a description of the RAC+Zlib secondary-data format, see
 	// https://github.com/google/wuffs/blob/master/doc/spec/rac-spec.md#rac--zlib

 	if chunk.Codec != rac.CodecZlib {
 		r.err = errInvalidCodec
 		return rac.ReaderContext{}, r.err
 	}
 	if !chunk.CTertiary.Empty() {
 		r.err = errInvalidDictionary
 		return rac.ReaderContext{}, r.err
 	}
 	if chunk.CSecondary.Empty() {
 		return rac.ReaderContext{}, nil
 	}
 	cRange := chunk.CSecondary

 	// Load from the MRU cache, if it was loaded from the same cRange.
 	if (cRange == r.cachedDictionaryCRange) && !cRange.Empty() {
 		return rac.ReaderContext{Secondary: r.cachedDictionary}, nil
 	}

 	// Check the cRange size and the tTag.
 	if (cRange.Size() < 6) || (cRange[1] > r.CompressedSize) || (chunk.TTag != 0xFF) {
 		r.err = errInvalidDictionary
 		return rac.ReaderContext{}, r.err
 	}

 	// Read the dictionary size.
 	if err := readAt(r.ReadSeeker, r.buf[:2], cRange[0]); err != nil {
 		r.err = err
 		return rac.ReaderContext{}, r.err
 	}
 	dictSize := int64(r.buf[0]) | (int64(r.buf[1]) << 8)

 	// Check the size. The +6 is for the 2 byte prefix (dictionary size) and
 	// the 4 byte suffix (checksum).
 	if (dictSize + 6) > cRange.Size() {
 		r.err = errInvalidDictionary
 		return rac.ReaderContext{}, r.err
 	}

 	// Allocate or re-use the cachedDictionary buffer.
 	buffer := []byte(nil)
 	if n := dictSize + 4; int64(cap(r.cachedDictionary)) >= n {
 		buffer = r.cachedDictionary[:n]
 	} else {
 		buffer = make([]byte, n)
 	}

 	// Read the dictionary and checksum.
 	if err := readAt(r.ReadSeeker, buffer, cRange[0]+2); err != nil {
 		r.err = err
 		return rac.ReaderContext{}, r.err
 	}

 	// Verify the checksum.
 	dict, checksum := buffer[:dictSize], buffer[dictSize:]
 	if u32BE(checksum) != adler32.Checksum(dict) {
 		r.err = errInvalidDictionary
 		return rac.ReaderContext{}, r.err
 	}

 	// Save to the MRU cache and return.
 	r.cachedDictionary = dict
 	r.cachedDictionaryCRange = cRange
 	return rac.ReaderContext{Secondary: dict}, nil
 }
	// Copyright 2019 The Wuffs Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	package raczlib

	import (
	"compress/zlib"
	"errors"
	"hash/adler32"
	"io"

	"github.com/google/wuffs/lib/rac"
	)

	var (
	errInvalidCodec = errors.New("raczlib: invalid Codec (expected rac.CodecZlib)")
	errInvalidReadSeeker = errors.New("raczlib: invalid ReadSeeker")
	errInvalidDictionary = errors.New("raczlib: invalid dictionary")
	)

	func u32BE(b []byte) uint32 {
	_ = b[3] // Early bounds check to guarantee safety of reads below.
	return (uint32(b[0]) << 24) \| (uint32(b[1]) << 16) \| (uint32(b[2]) << 8) \| (uint32(b[3]))
	}

	// readAt calls ReadAt if it is available, otherwise it falls back to calling
	// Seek and then ReadFull. Calling ReadAt is presumably slightly more
	// efficient, e.g. one syscall instead of two.
	func readAt(r io.ReadSeeker, p []byte, offset int64) error {
	if a, ok := r.(io.ReaderAt); ok && false {
	n, err := a.ReadAt(p, offset)
	if (n == len(p)) && (err == io.EOF) {
	err = nil
	}
	return err
	}
	if _, err := r.Seek(offset, io.SeekStart); err != nil {
	return err
	}
	_, err := io.ReadFull(r, p)
	return err
	}

	// Reader reads a RAC file.
	//
	// Do not modify its exported fields after calling any of its methods.
	type Reader struct {
	// ReadSeeker is where the RAC-encoded data is read from.
	//
	// It may also implement io.ReaderAt, in which case its ReadAt method will
	// be preferred over combining Read and Seek, as the former is presumably
	// more efficient. This is optional: io.ReaderAt is a stronger contract
	// than io.ReadSeeker, as multiple concurrent ReadAt calls must not
	// interfere with each other.
	//
	// For example, this type itself only implements io.ReadSeeker, not
	// io.ReaderAt, as it is not safe for concurrent use.
	//
	// Nil is an invalid value.
	ReadSeeker io.ReadSeeker

	// CompressedSize is the size of the RAC file.
	//
	// Zero is an invalid value, as an empty file is not a valid RAC file.
	CompressedSize int64

	// err is the first error encountered. It is sticky: once a non-nil error
	// occurs, all public methods will return that error.
	err error

	// reader is the low-level (Codec-agnostic) RAC reader.
	racReader rac.Reader

	// buf is a scratch buffer.
	buf [2]byte

	// cachedZlibReader lets us re-use the memory allocated for a zlib reader,
	// when decompressing multiple chunks.
	cachedZlibReader zlib.Resetter

	// These fields contain the most recently used shared dictionary.
	cachedDictionary []byte
	cachedDictionaryCRange rac.Range
	}

	func (r *Reader) initialize() error {
	if r.err != nil {
	return r.err
	}
	if r.racReader.ReadSeeker != nil {
	// We're already initialized.
	return nil
	}
	if r.ReadSeeker == nil {
	r.err = errInvalidReadSeeker
	return r.err
	}

	r.racReader.ReadSeeker = r.ReadSeeker
	r.racReader.CompressedSize = r.CompressedSize
	r.racReader.MakeDecompressor = r.makeDecompressor
	r.racReader.MakeReaderContext = r.makeReaderContext
	return nil
	}

	// Read implements io.Reader.
	func (r *Reader) Read(p []byte) (int, error) {
	if err := r.initialize(); err != nil {
	return 0, err
	}
	n, err := r.racReader.Read(p)
	if (err != nil) && (err != io.EOF) {
	r.err = err
	}
	return n, err
	}

	// Seek implements io.Seeker.
	func (r *Reader) Seek(offset int64, whence int) (int64, error) {
	if err := r.initialize(); err != nil {
	return 0, err
	}
	n, err := r.racReader.Seek(offset, whence)
	if (err != nil) && (err != io.EOF) {
	r.err = err
	}
	return n, err
	}

	func (r *Reader) makeDecompressor(compressed io.Reader, rctx rac.ReaderContext) (io.Reader, error) {
	if r.cachedZlibReader != nil {
	if err := r.cachedZlibReader.Reset(compressed, rctx.Secondary); err != nil {
	if err == io.EOF {
	err = io.ErrUnexpectedEOF
	}
	r.err = err
	return nil, r.err
	}
	return r.cachedZlibReader.(io.Reader), nil
	}

	zlibReader, err := zlib.NewReaderDict(compressed, rctx.Secondary)
	if err != nil {
	if err == io.EOF {
	err = io.ErrUnexpectedEOF
	}
	r.err = err
	return nil, r.err
	}
	r.cachedZlibReader = zlibReader.(zlib.Resetter)
	return zlibReader, nil
	}

	func (r *Reader) makeReaderContext(chunk rac.Chunk) (rac.ReaderContext, error) {
	// For a description of the RAC+Zlib secondary-data format, see
	// https://github.com/google/wuffs/blob/master/doc/spec/rac-spec.md#rac--zlib

	if chunk.Codec != rac.CodecZlib {
	r.err = errInvalidCodec
	return rac.ReaderContext{}, r.err
	}
	if !chunk.CTertiary.Empty() {
	r.err = errInvalidDictionary
	return rac.ReaderContext{}, r.err
	}
	if chunk.CSecondary.Empty() {
	return rac.ReaderContext{}, nil
	}
	cRange := chunk.CSecondary

	// Load from the MRU cache, if it was loaded from the same cRange.
	if (cRange == r.cachedDictionaryCRange) && !cRange.Empty() {
	return rac.ReaderContext{Secondary: r.cachedDictionary}, nil
	}

	// Check the cRange size and the tTag.
	if (cRange.Size() < 6) \|\| (cRange[1] > r.CompressedSize) \|\| (chunk.TTag != 0xFF) {
	r.err = errInvalidDictionary
	return rac.ReaderContext{}, r.err
	}

	// Read the dictionary size.
	if err := readAt(r.ReadSeeker, r.buf[:2], cRange[0]); err != nil {
	r.err = err
	return rac.ReaderContext{}, r.err
	}
	dictSize := int64(r.buf[0]) \| (int64(r.buf[1]) << 8)

	// Check the size. The +6 is for the 2 byte prefix (dictionary size) and
	// the 4 byte suffix (checksum).
	if (dictSize + 6) > cRange.Size() {
	r.err = errInvalidDictionary
	return rac.ReaderContext{}, r.err
	}

	// Allocate or re-use the cachedDictionary buffer.
	buffer := []byte(nil)
	if n := dictSize + 4; int64(cap(r.cachedDictionary)) >= n {
	buffer = r.cachedDictionary[:n]
	} else {
	buffer = make([]byte, n)
	}

	// Read the dictionary and checksum.
	if err := readAt(r.ReadSeeker, buffer, cRange[0]+2); err != nil {
	r.err = err
	return rac.ReaderContext{}, r.err
	}

	// Verify the checksum.
	dict, checksum := buffer[:dictSize], buffer[dictSize:]
	if u32BE(checksum) != adler32.Checksum(dict) {
	r.err = errInvalidDictionary
	return rac.ReaderContext{}, r.err
	}

	// Save to the MRU cache and return.
	r.cachedDictionary = dict
	r.cachedDictionaryCRange = cRange
	return rac.ReaderContext{Secondary: dict}, nil
	}