Add dictionary API to cgo wrapper PiperOrigin-RevId: 698745795
diff --git a/go/cbrotli/cbrotli_test.go b/go/cbrotli/cbrotli_test.go index 254c0cd..5d15eaa 100644 --- a/go/cbrotli/cbrotli_test.go +++ b/go/cbrotli/cbrotli_test.go
@@ -375,3 +375,44 @@ } } } + +func TestEncodeDecodeWithDictionary(t *testing.T) { + q := 5 + l := 4096 + + input := make([]byte, l) + for i := 0; i < l; i++ { + input[i] = byte(i*7 + i*i*5) + } + // use dictionary same as input + pd := NewPreparedDictionary(input, DtRaw, q) + defer pd.Close() + + encoded, err := Encode(input, WriterOptions{Quality: q, Dictionary: pd}) + if err != nil { + t.Errorf("Encode: %v", err) + } + limit := 20 + if len(encoded) > limit { + t.Errorf("Output length exceeds expectations: %d > %d", len(encoded), limit) + } + + decoded, err := DecodeWithRawDictionary(encoded, input) + if err != nil { + t.Errorf("Decode: %v", err) + } + if !bytes.Equal(decoded, input) { + var want string + if len(input) > 320 { + want = fmt.Sprintf("<%d bytes>", len(input)) + } else { + want = fmt.Sprintf("%q", input) + } + t.Errorf(""+ + "Decode content:\n"+ + "%q\n"+ + "want:\n"+ + "%s", + decoded, want) + } +}
diff --git a/go/cbrotli/reader.go b/go/cbrotli/reader.go index 6e390c3..6a6aa49 100644 --- a/go/cbrotli/reader.go +++ b/go/cbrotli/reader.go
@@ -33,6 +33,7 @@ "errors" "io" "io/ioutil" + "runtime" ) type decodeError C.BrotliDecoderErrorCode @@ -49,10 +50,11 @@ // Reader implements io.ReadCloser by reading Brotli-encoded data from an // underlying Reader. type Reader struct { - src io.Reader - state *C.BrotliDecoderState - buf []byte // scratch space for reading from src - in []byte // current chunk to decode; usually aliases buf + src io.Reader + state *C.BrotliDecoderState + buf []byte // scratch space for reading from src + in []byte // current chunk to decode; usually aliases buf + pinner *runtime.Pinner // raw dictionary pinner } // readBufSize is a "good" buffer size that avoids excessive round-trips @@ -63,10 +65,26 @@ // NewReader initializes new Reader instance. // Close MUST be called to free resources. func NewReader(src io.Reader) *Reader { + return NewReaderWithRawDictionary(src, nil) +} + +// NewReaderWithRawDictionary initializes new Reader instance with shared dictionary. +// Close MUST be called to free resources. +func NewReaderWithRawDictionary(src io.Reader, dictionary []byte) *Reader { + s := C.BrotliDecoderCreateInstance(nil, nil, nil) + var p *runtime.Pinner + if dictionary != nil { + p = new(runtime.Pinner) + p.Pin(&dictionary[0]) + // TODO(eustas): use return value + C.BrotliDecoderAttachDictionary(s, C.BrotliSharedDictionaryType( /* RAW */ 0), + C.size_t(len(dictionary)), (*C.uint8_t)(&dictionary[0])) + } return &Reader{ - src: src, - state: C.BrotliDecoderCreateInstance(nil, nil, nil), - buf: make([]byte, readBufSize), + src: src, + state: s, + buf: make([]byte, readBufSize), + pinner: p, } } @@ -78,6 +96,10 @@ // Close despite the state; i.e. there might be some unread decoded data. C.BrotliDecoderDestroyInstance(r.state) r.state = nil + if r.pinner != nil { + r.pinner.Unpin() + r.pinner = nil + } return nil } @@ -153,11 +175,26 @@ // Decode decodes Brotli encoded data. func Decode(encodedData []byte) ([]byte, error) { + return DecodeWithRawDictionary(encodedData, nil) +} + +// DecodeWithRawDictionary decodes Brotli encoded data with shared dictionary. +func DecodeWithRawDictionary(encodedData []byte, dictionary []byte) ([]byte, error) { + s := C.BrotliDecoderCreateInstance(nil, nil, nil) + var p *runtime.Pinner + if dictionary != nil { + p = new(runtime.Pinner) + p.Pin(&dictionary[0]) + // TODO(eustas): use return value + C.BrotliDecoderAttachDictionary(s, C.BrotliSharedDictionaryType( /* RAW */ 0), + C.size_t(len(dictionary)), (*C.uint8_t)(&dictionary[0])) + } r := &Reader{ - src: bytes.NewReader(nil), - state: C.BrotliDecoderCreateInstance(nil, nil, nil), - buf: make([]byte, 4), // arbitrarily small but nonzero so that r.src.Read returns io.EOF - in: encodedData, + src: bytes.NewReader(nil), + state: s, + buf: make([]byte, 4), // arbitrarily small but nonzero so that r.src.Read returns io.EOF + in: encodedData, + pinner: p, } defer r.Close() return ioutil.ReadAll(r)
diff --git a/go/cbrotli/writer.go b/go/cbrotli/writer.go index 44575fc..e1ea467 100644 --- a/go/cbrotli/writer.go +++ b/go/cbrotli/writer.go
@@ -45,9 +45,54 @@ "bytes" "errors" "io" + "runtime" "unsafe" ) +// PreparedDictionary is a handle to native object. +type PreparedDictionary struct { + opaque *C.BrotliEncoderPreparedDictionary + pinner *runtime.Pinner +} + +// DictionaryType is type for shared dictionary +type DictionaryType int + +const ( + // DtRaw denotes LZ77 prefix dictionary + DtRaw DictionaryType = 0 + // DtSerialized denotes serialized format + DtSerialized DictionaryType = 1 +) + +// NewPreparedDictionary prepares dictionary data for encoder. +// Same instance can be used for multiple encoding sessions. +// Close MUST be called to free resources. +func NewPreparedDictionary(data []byte, dictionaryType DictionaryType, quality int) *PreparedDictionary { + var ptr *C.uint8_t + if len(data) != 0 { + ptr = (*C.uint8_t)(&data[0]) + } + p := new(runtime.Pinner) + p.Pin(&data[0]) + d := C.BrotliEncoderPrepareDictionary(C.BrotliSharedDictionaryType(dictionaryType), C.size_t(len(data)), ptr, C.int(quality), nil, nil, nil) + return &PreparedDictionary{ + opaque: d, + pinner: p, + } +} + +// Close frees C resources. +// IMPORTANT: calling Close until all encoders that use that dictionary are closed as well will +// cause crash. +func (p *PreparedDictionary) Close() error { + // C-Brotli tolerates `nil` pointer here. + C.BrotliEncoderDestroyPreparedDictionary(p.opaque) + p.opaque = nil + p.pinner.Unpin() + return nil +} + // WriterOptions configures Writer. type WriterOptions struct { // Quality controls the compression-speed vs compression-density trade-offs. @@ -56,38 +101,56 @@ // LGWin is the base 2 logarithm of the sliding window size. // Range is 10 to 24. 0 indicates automatic configuration based on Quality. LGWin int + // Prepared shared dictionary + Dictionary *PreparedDictionary } // Writer implements io.WriteCloser by writing Brotli-encoded data to an // underlying Writer. type Writer struct { + healthy bool dst io.Writer state *C.BrotliEncoderState buf, encoded []byte } var ( - errEncode = errors.New("cbrotli: encode error") - errWriterClosed = errors.New("cbrotli: Writer is closed") + errEncode = errors.New("cbrotli: encode error") + errWriterClosed = errors.New("cbrotli: Writer is closed") + errWriterUnhealthy = errors.New("cbrotli: Writer is unhealthy") ) // NewWriter initializes new Writer instance. // Close MUST be called to free resources. func NewWriter(dst io.Writer, options WriterOptions) *Writer { state := C.BrotliEncoderCreateInstance(nil, nil, nil) - C.BrotliEncoderSetParameter( - state, C.BROTLI_PARAM_QUALITY, (C.uint32_t)(options.Quality)) + healthy := state != nil + if C.BrotliEncoderSetParameter( + state, C.BROTLI_PARAM_QUALITY, (C.uint32_t)(options.Quality)) == 0 { + healthy = false + } if options.LGWin > 0 { - C.BrotliEncoderSetParameter( - state, C.BROTLI_PARAM_LGWIN, (C.uint32_t)(options.LGWin)) + if C.BrotliEncoderSetParameter( + state, C.BROTLI_PARAM_LGWIN, (C.uint32_t)(options.LGWin)) == 0 { + healthy = false + } + } + if options.Dictionary != nil { + if C.BrotliEncoderAttachPreparedDictionary(state, options.Dictionary.opaque) == 0 { + healthy = false + } } return &Writer{ - dst: dst, - state: state, + healthy: healthy, + dst: dst, + state: state, } } func (w *Writer) writeChunk(p []byte, op C.BrotliEncoderOperation) (n int, err error) { + if !w.healthy { + return 0, errWriterUnhealthy + } if w.state == nil { return 0, errWriterClosed }