| // Copyright 2019 The Wuffs Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package raczlib |
| |
| import ( |
| "compress/zlib" |
| "errors" |
| "io" |
| |
| "github.com/google/wuffs/lib/rac" |
| ) |
| |
| var ( |
| errInvalidCodec = errors.New("raczlib: invalid Codec (expected rac.CodecZlib)") |
| errInvalidChunk = errors.New("raczlib: invalid chunk") |
| errInvalidChunkTooLarge = errors.New("raczlib: invalid chunk (too large)") |
| errInvalidChunkTruncated = errors.New("raczlib: invalid chunk (truncated)") |
| errInvalidReadSeeker = errors.New("raczlib: invalid ReadSeeker") |
| |
| errInternalInconsistentPosition = errors.New("raczlib: internal error: inconsistent position") |
| ) |
| |
| // Reader reads a RAC file. |
| // |
| // Do not modify its exported fields after calling any of its methods. |
| type Reader struct { |
| // ReadSeeker is where the RAC-encoded data is read from. |
| // |
| // It may also implement io.ReaderAt, in which case its ReadAt method will |
| // be preferred over combining Read and Seek, as the former is presumably |
| // more efficient. This is optional: io.ReaderAt is a stronger contract |
| // than io.ReadSeeker, as multiple concurrent ReadAt calls must not |
| // interfere with each other. |
| // |
| // For example, this type itself only implements io.ReadSeeker, not |
| // io.ReaderAt, as it is not safe for concurrent use. |
| // |
| // Nil is an invalid value. |
| ReadSeeker io.ReadSeeker |
| |
| // CompressedSize is the size of the RAC file. |
| // |
| // Zero is an invalid value, as an empty file is not a valid RAC file. |
| CompressedSize int64 |
| |
| // err is the first error encountered. It is sticky: once a non-nil error |
| // occurs, all public methods will return that error. |
| err error |
| |
| // racReader is the low-level (Codec-agnostic) RAC reader. |
| racReader rac.Reader |
| |
| // These two fields combine for a 3-state state machine: |
| // |
| // - "State A" (both fields are zero): no RAC chunk is loaded. |
| // |
| // - "State B" (zlibReader is non-zero, inImplicitZeroes is zero): a RAC |
| // chunk is loaded, but not fully exhausted: decompressing the zlib |
| // stream has not hit io.EOF yet. |
| // |
| // - "State C" (zlibReader is zero, inImplicitZeroes is non-zero): a RAC |
| // chunk was exhausted, and we now serve the implicit NUL bytes after a |
| // chunk's explicitly encoded data. The number of NUL bytes can be (and |
| // often is) zero. |
| // |
| // Calling Read may trigger state transitions (which form a cycle): "State |
| // A" -> "State B" -> "State C" -> "State A" -> "State B" -> etc. |
| // |
| // Calling Seek may reset the state machine to "State A". |
| // |
| // The initial state is "State A". |
| zlibReader io.ReadCloser |
| inImplicitZeroes bool |
| |
| // cachedZlibReader lets us re-use the memory allocated for a zlib reader, |
| // when decompressing multiple chunks. |
| cachedZlibReader zlib.Resetter |
| |
| // currChunk is an io.Reader for the current chunk, used while in "State |
| // B". It serves zlib-compressed data, which the (non-nil) zlibReader turns |
| // into decompressed data. |
| currChunk io.LimitedReader |
| |
| // pos is the current position, in DSpace. It is the base value when Seek |
| // is called with io.SeekCurrent. |
| pos int64 |
| |
| // dRange is, in "State B" and "State C", what part (in DSpace) of the |
| // current chunk has not yet been passed on (via this type's Read method). |
| // |
| // Within those states, dRange[0] increases over time, as parts of the |
| // chunk are decompressed and passed on, but dRange[1] does not change. |
| // |
| // An invariant is that ((dRange[0] <= pos) && (pos <= dRange[1])). |
| // |
| // If the first inequality is strict (i.e. dRange[0] < pos) then we have |
| // Seek'ed to a pos that is not a chunk boundary, and satisfying the Read |
| // method will first require decompressing and discarding some of the chunk |
| // data, until dRange[0] reaches pos. |
| // |
| // If the second inequality is strict (i.e. pos < dRange[1]) and we are in |
| // "State C" then we have a non-zero number of implicit NUL bytes left. |
| // |
| // In "State A", the dRange is empty and unused, other than trivially |
| // maintaining the invariant. |
| dRange rac.Range |
| } |
| |
| func (r *Reader) initialize() error { |
| if r.err != nil { |
| return r.err |
| } |
| if r.racReader.ReadSeeker != nil { |
| // We're already initialized. |
| return nil |
| } |
| if r.ReadSeeker == nil { |
| r.err = errInvalidReadSeeker |
| return r.err |
| } |
| |
| r.racReader.ReadSeeker = r.ReadSeeker |
| r.racReader.CompressedSize = r.CompressedSize |
| r.currChunk.R = r.ReadSeeker |
| return nil |
| } |
| |
| // Read implements io.Reader. |
| func (r *Reader) Read(p []byte) (int, error) { |
| if err := r.initialize(); err != nil { |
| return 0, err |
| } |
| numRead := 0 |
| |
| for len(p) > 0 { |
| if (r.pos < r.dRange[0]) || (r.dRange[1] < r.pos) { |
| r.err = errInternalInconsistentPosition |
| return numRead, r.err |
| } |
| |
| readFunc := (func(*Reader, []byte) (int, error))(nil) |
| switch { |
| default: // "State A". |
| if err := r.nextChunk(); err != nil { |
| return numRead, err |
| } |
| continue |
| |
| case r.zlibReader != nil: // "State B". |
| readFunc = (*Reader).readExplicitData |
| |
| case r.inImplicitZeroes: // "State C". |
| readFunc = (*Reader).readImplicitZeroes |
| } |
| |
| n, err := readFunc(r, p) |
| numRead += n |
| p = p[n:] |
| if err != nil { |
| return numRead, err |
| } |
| } |
| return numRead, nil |
| } |
| |
| // readExplicitData serves the zlib-compressed data in a chunk. |
| func (r *Reader) readExplicitData(p []byte) (int, error) { |
| // If the chunk started before r.pos, discard the opening bytes of the |
| // chunk's decompressed data. |
| for r.pos > r.dRange[0] { |
| discardBuffer := p |
| discardBufferLen := r.pos - r.dRange[0] |
| if int64(len(discardBuffer)) > discardBufferLen { |
| discardBuffer = discardBuffer[:discardBufferLen] |
| } |
| |
| n, err := r.zlibReader.Read(discardBuffer) |
| r.dRange[0] += int64(n) |
| if err == io.EOF { |
| return n, r.transitionFromStateBToStateC() |
| } |
| if err != nil { |
| r.err = err |
| return 0, r.err |
| } |
| } |
| |
| // Delegate to the zlib reader. |
| n, err := r.zlibReader.Read(p) |
| if size := r.dRange.Size(); int64(n) > size { |
| n = int(size) |
| err = errInvalidChunkTooLarge |
| } |
| r.pos += int64(n) |
| r.dRange[0] += int64(n) |
| if err == io.EOF { |
| return n, r.transitionFromStateBToStateC() |
| } else if err == io.ErrUnexpectedEOF { |
| err = errInvalidChunkTruncated |
| } |
| if err != nil { |
| r.err = err |
| } |
| return n, err |
| } |
| |
| func (r *Reader) transitionFromStateBToStateC() error { |
| if err := r.zlibReader.Close(); err != nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| r.err = err |
| return r.err |
| } |
| r.zlibReader = nil |
| r.inImplicitZeroes = true |
| return nil |
| } |
| |
| // readImplicitZeroes serves the implicit NUL bytes after a chunk's explicit |
| // data. As |
| // https://github.com/google/wuffs/blob/master/doc/spec/rac-spec.md#decompressing-a-leaf-node |
| // says, "The Codec may produce fewer bytes than the DRange size. In that case, |
| // the remaining bytes (in DSpace) are set to NUL (memset to zero)." |
| func (r *Reader) readImplicitZeroes(p []byte) (int, error) { |
| // If the chunk's explicit data finished before r.pos, discard some of the |
| // implicit NULs. |
| if r.dRange[0] < r.pos { |
| r.dRange[0] = r.pos |
| } |
| |
| // The next r.dRange.Size() bytes are all implicitly zero. |
| n := r.dRange.Size() |
| if int64(len(p)) > n { |
| p = p[:n] |
| } |
| for i := range p { |
| p[i] = 0 |
| } |
| |
| // Update the cursors, check for exhaustion and return. |
| r.pos += int64(len(p)) |
| r.dRange[0] += int64(len(p)) |
| if r.dRange.Empty() { |
| // Transition from "State C" to "State A". |
| r.inImplicitZeroes = false |
| } |
| return len(p), nil |
| } |
| |
| // nextChunk loads the next independently compressed chunk. It transitions from |
| // "State A" to "State B". |
| // |
| // It may return io.EOF, in which case the Reader stays in "State A", and the |
| // r.err "sticky error" field stays nil. |
| func (r *Reader) nextChunk() error { |
| chunk, err := r.racReader.NextChunk() |
| if err == io.EOF { |
| return io.EOF |
| } else if err != nil { |
| r.err = err |
| return r.err |
| } |
| if chunk.Codec != rac.CodecZlib { |
| r.err = errInvalidCodec |
| return r.err |
| } |
| if chunk.DRange.Empty() || chunk.CPrimary.Empty() || !chunk.CTertiary.Empty() { |
| r.err = errInvalidChunk |
| return r.err |
| } |
| |
| dict := []byte(nil) |
| if !chunk.CSecondary.Empty() { |
| panic("TODO: dictionary support") |
| } |
| |
| if _, err := r.ReadSeeker.Seek(chunk.CPrimary[0], io.SeekStart); err != nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| r.err = err |
| return r.err |
| } |
| r.currChunk.N = chunk.CPrimary.Size() |
| r.dRange = chunk.DRange |
| |
| if r.cachedZlibReader != nil { |
| if err := r.cachedZlibReader.Reset(&r.currChunk, dict); err != nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| r.err = err |
| return r.err |
| } |
| r.zlibReader = r.cachedZlibReader.(io.ReadCloser) |
| } else { |
| r.zlibReader, err = zlib.NewReaderDict(&r.currChunk, dict) |
| if err != nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| r.err = err |
| return r.err |
| } |
| r.cachedZlibReader = r.zlibReader.(zlib.Resetter) |
| } |
| return nil |
| } |
| |
| // Seek implements io.Seeker. |
| func (r *Reader) Seek(offset int64, whence int) (int64, error) { |
| if err := r.initialize(); err != nil { |
| return 0, err |
| } |
| |
| pos := r.pos |
| switch whence { |
| case io.SeekStart: |
| pos = offset |
| case io.SeekCurrent: |
| pos += offset |
| case io.SeekEnd: |
| end, err := r.racReader.DecompressedSize() |
| if err != nil { |
| r.err = err |
| return 0, r.err |
| } |
| pos = end + offset |
| default: |
| return 0, errors.New("raczlib.Reader.Seek: invalid whence") |
| } |
| |
| if r.pos != pos { |
| if pos < 0 { |
| r.err = errors.New("raczlib.Reader.Seek: negative position") |
| return 0, r.err |
| } |
| if err := r.racReader.SeekToChunkContaining(pos); err != nil { |
| r.err = err |
| return 0, r.err |
| } |
| r.pos = pos |
| |
| // Maintain the dRange/pos invariant. |
| r.dRange[0] = pos |
| r.dRange[1] = pos |
| |
| // Reset to "State A". |
| r.zlibReader = nil |
| r.inImplicitZeroes = false |
| } |
| return r.pos, nil |
| } |