cmd/ractool/main.go - external/github.com/google/wuffs - Git at Google

 // Copyright 2019 The Wuffs Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // ----------------

 //go:generate go run gen.go

 /*
 ractool manipulates Random Access Compression (RAC) files.

 See the RAC specification for more details:
 https://github.com/google/wuffs/blob/master/doc/spec/rac-spec.md

 Usage:

 ractool [flags] [input_filename]

 If no input_filename is given, stdin is used. Either way, output is written to
 stdout.

 The flags should include exactly one of -decode or -encode.

 When encoding, the input is partitioned into chunks and each chunk is
 compressed independently. You can specify the target chunk size in terms of
 either its compressed size or decompressed size. By default (if both
 -cchunksize and -dchunksize are zero), a 64KiB -dchunksize is used.

 You can also specify a -cpagesize, which is similar to but not exactly the same
 concept as alignment. If non-zero, padding is inserted into the output to
 minimize the number of pages that each chunk occupies. Look for "CPageSize" in
 the "package rac" documentation for more details:
 https://godoc.org/github.com/google/wuffs/lib/rac

 A RAC file consists of an index and the chunks. The index may be either at the
 start or at the end of the file. At the start results in slightly smaller and
 slightly more efficient RAC files, but the encoding process needs more memory
 or temporary disk space.

 Examples:

   ractool -decode foo.rac | sha256sum
   ractool -decode -drange=400:500 foo.rac
   ractool -encode foo.dat > foo.rac
   ractool -encode -codec=zlib -dchunksize=256k foo.dat > foo.rac

 The "400:500" flag value means the 100 bytes ranging from a DSpace offset
 (offset in terms of decompressed bytes, not compressed bytes) of 400
 (inclusive) to 500 (exclusive). Either or both bounds may be omitted, similar
 to Go slice syntax. A "400:" flag value would mean ranging from 400 (inclusive)
 to the end of the decompressed file.

 The "256k" flag value means 256 kibibytes (262144 bytes). Similarly, "1m" and
 "1M" both mean 1 mebibyte (1048576 bytes).

 General Flags:

 -decode
     whether to decode the input
 -encode
     whether to encode the input

 Decode-Related Flags:

 -drange
     the "i:j" range to decompress, ":8" means the first 8 bytes

 Encode-Related Flags:

 -cchunksize
     the chunk size (in CSpace)
 -codec
     the compression codec (default "zlib")
 -cpagesize
     the page size (in CSpace)
 -dchunksize
     the chunk size (in DSpace)
 -indexlocation
     the index location, "start" or "end" (default "start")
 -resources
     comma-separated list of resource files, such as shared dictionaries
 */
 package main

 import (
 	"bytes"
 	"errors"
 	"flag"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"os"
 	"strconv"
 	"strings"

 	"github.com/google/wuffs/lib/rac"
 	"github.com/google/wuffs/lib/raczlib"
 )

 // TODO: a flag to use a disk-backed (not memory-backed) TempFile.

 var (
 	decodeFlag = flag.Bool("decode", false, "whether to decode the input")
 	encodeFlag = flag.Bool("encode", false, "whether to encode the input")

 	// Decode-related flags.
 	drangeFlag = flag.String("drange", ":",
 		"the \"i:j\" range to decompress, \":8\" means the first 8 bytes")

 	// Encode-related flags.
 	codecFlag         = flag.String("codec", "zlib", "the compression codec")
 	cpagesizeFlag     = flag.String("cpagesize", "0", "the page size (in CSpace)")
 	cchunksizeFlag    = flag.String("cchunksize", "0", "the chunk size (in CSpace)")
 	dchunksizeFlag    = flag.String("dchunksize", "0", "the chunk size (in DSpace)")
 	indexlocationFlag = flag.String("indexlocation", "start",
 		"the index location, \"start\" or \"end\"")
 	resourcesFlag = flag.String("resources", "",
 		"comma-separated list of resource files, such as shared dictionaries")
 )

 func usage() {
 	os.Stderr.WriteString(usageStr)
 }

 func main() {
 	if err := main1(); err != nil {
 		os.Stderr.WriteString(err.Error() + "\n")
 		os.Exit(1)
 	}
 }

 func main1() error {
 	flag.Usage = usage
 	flag.Parse()

 	r, usingStdin := io.Reader(os.Stdin), true
 	switch flag.NArg() {
 	case 0:
 		// No-op.
 	case 1:
 		f, err := os.Open(flag.Arg(0))
 		if err != nil {
 			return err
 		}
 		defer f.Close()
 		r, usingStdin = f, false
 	default:
 		return errors.New("too many filenames; the maximum is one")
 	}

 	if *decodeFlag && !*encodeFlag {
 		return decode(r, usingStdin)
 	}
 	if *encodeFlag && !*decodeFlag {
 		return encode(r)
 	}
 	return errors.New("must specify exactly one of -decode or -encode")
 }

 // parseNumber converts strings like "3", "4k" and "0x50" to the integers 3,
 // 4096 and 48. It returns a negative value if and only if an error is
 // encountered.
 func parseNumber(s string) int64 {
 	if s == "" {
 		return -1
 	}
 	shift := uint32(0)
 	switch n := len(s) - 1; s[n] {
 	case 'k', 'K':
 		shift, s = 10, s[:n]
 	case 'm', 'M':
 		shift, s = 20, s[:n]
 	case 'g', 'G':
 		shift, s = 30, s[:n]
 	case 't', 'T':
 		shift, s = 40, s[:n]
 	case 'p', 'P':
 		shift, s = 50, s[:n]
 	case 'e', 'E':
 		shift, s = 60, s[:n]
 	}
 	i, err := strconv.ParseInt(s, 0, 64)
 	if (err != nil) || (i < 0) {
 		return -1
 	}
 	const int64Max = (1 << 63) - 1
 	if i > (int64Max >> shift) {
 		return -1
 	}
 	return i << shift
 }

 // parseRange parses a string like "1:23", returning i=1 and j=23. Either or
 // both numbers can be missing, in which case i and/or j will be negative, and
 // it is up to the caller to interpret that placeholder value meaningfully.
 func parseRange(s string) (i int64, j int64, ok bool) {
 	n := strings.IndexByte(s, ':')
 	if n < 0 {
 		return 0, 0, false
 	}

 	if n == 0 {
 		i = -1
 	} else if i = parseNumber(s[:n]); i < 0 {
 		return 0, 0, false
 	}

 	if n+1 >= len(s) {
 		j = -1
 	} else if j = parseNumber(s[n+1:]); j < 0 {
 		return 0, 0, false
 	}

 	if (i >= 0) && (j >= 0) && (i > j) {
 		return 0, 0, false
 	}
 	return i, j, true
 }

 func decode(r io.Reader, usingStdin bool) error {
 	i, j, ok := parseRange(*drangeFlag)
 	if !ok {
 		return fmt.Errorf("invalid -drange")
 	}

 	rs, ok := r.(io.ReadSeeker)
 	if !ok {
 		return fmt.Errorf("input is not seekable")
 	}

 	// Even if the os.File type is a ReadSeeker, it might not actually support
 	// seeking. Instead, read all of stdin into memory.
 	if usingStdin {
 		if input, err := ioutil.ReadAll(r); err != nil {
 			return err
 		} else {
 			rs = bytes.NewReader(input)
 		}
 	}

 	racReader := &rac.Reader{
 		ReadSeeker: rs,
 	}
 	switch *codecFlag {
 	case "zlib":
 		racReader.CodecReaders = []rac.CodecReader{&raczlib.CodecReader{}}
 	default:
 		return errors.New("unsupported -codec")
 	}

 	decompressedSize, err := racReader.Seek(0, io.SeekEnd)
 	if err != nil {
 		return err
 	}
 	if i < 0 {
 		i = 0
 	}
 	if (j < 0) || (j > decompressedSize) {
 		j = decompressedSize
 	}
 	if i >= j {
 		return nil
 	}
 	if _, err := racReader.Seek(i, io.SeekStart); err != nil {
 		return err
 	}

 	_, err = io.Copy(os.Stdout, &io.LimitedReader{
 		R: racReader,
 		N: j - i,
 	})
 	return err
 }

 func encode(r io.Reader) error {
 	indexLocation := rac.IndexLocation(0)
 	switch *indexlocationFlag {
 	case "start":
 		indexLocation = rac.IndexLocationAtStart
 	case "end":
 		indexLocation = rac.IndexLocationAtEnd
 	default:
 		return errors.New("invalid -indexlocation")
 	}

 	cchunksize := parseNumber(*cchunksizeFlag)
 	if cchunksize < 0 {
 		return errors.New("invalid -cchunksize")
 	}
 	cpagesize := parseNumber(*cpagesizeFlag)
 	if cpagesize < 0 {
 		return errors.New("invalid -cpagesize")
 	}
 	dchunksize := parseNumber(*dchunksizeFlag)
 	if dchunksize < 0 {
 		return errors.New("invalid -dchunksize")
 	}

 	if (cchunksize != 0) && (dchunksize != 0) {
 		return errors.New("must specify none or one of -cchunksize or -dchunksize")
 	} else if (cchunksize == 0) && (dchunksize == 0) {
 		dchunksize = 65536 // 64 KiB.
 	}

 	w := &rac.Writer{
 		Writer:        os.Stdout,
 		IndexLocation: indexLocation,
 		TempFile:      &bytes.Buffer{},
 		CPageSize:     uint64(cpagesize),
 		CChunkSize:    uint64(cchunksize),
 		DChunkSize:    uint64(dchunksize),
 	}
 	switch *codecFlag {
 	case "zlib":
 		w.CodecWriter = &raczlib.CodecWriter{}
 	default:
 		return errors.New("unsupported -codec")
 	}

 	if *resourcesFlag != "" {
 		for _, filename := range strings.Split(*resourcesFlag, ",") {
 			resource, err := ioutil.ReadFile(filename)
 			if err != nil {
 				return err
 			}
 			w.ResourcesData = append(w.ResourcesData, resource)
 		}
 	}

 	if _, err := io.Copy(w, r); err != nil {
 		return err
 	}
 	return w.Close()
 }
	// Copyright 2019 The Wuffs Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// ----------------

	//go:generate go run gen.go

	/*
	ractool manipulates Random Access Compression (RAC) files.

	See the RAC specification for more details:
	https://github.com/google/wuffs/blob/master/doc/spec/rac-spec.md

	Usage:

	ractool [flags] [input_filename]

	If no input_filename is given, stdin is used. Either way, output is written to
	stdout.

	The flags should include exactly one of -decode or -encode.

	When encoding, the input is partitioned into chunks and each chunk is
	compressed independently. You can specify the target chunk size in terms of
	either its compressed size or decompressed size. By default (if both
	-cchunksize and -dchunksize are zero), a 64KiB -dchunksize is used.

	You can also specify a -cpagesize, which is similar to but not exactly the same
	concept as alignment. If non-zero, padding is inserted into the output to
	minimize the number of pages that each chunk occupies. Look for "CPageSize" in
	the "package rac" documentation for more details:
	https://godoc.org/github.com/google/wuffs/lib/rac

	A RAC file consists of an index and the chunks. The index may be either at the
	start or at the end of the file. At the start results in slightly smaller and
	slightly more efficient RAC files, but the encoding process needs more memory
	or temporary disk space.

	Examples:

	ractool -decode foo.rac \| sha256sum
	ractool -decode -drange=400:500 foo.rac
	ractool -encode foo.dat > foo.rac
	ractool -encode -codec=zlib -dchunksize=256k foo.dat > foo.rac

	The "400:500" flag value means the 100 bytes ranging from a DSpace offset
	(offset in terms of decompressed bytes, not compressed bytes) of 400
	(inclusive) to 500 (exclusive). Either or both bounds may be omitted, similar
	to Go slice syntax. A "400:" flag value would mean ranging from 400 (inclusive)
	to the end of the decompressed file.

	The "256k" flag value means 256 kibibytes (262144 bytes). Similarly, "1m" and
	"1M" both mean 1 mebibyte (1048576 bytes).

	General Flags:

	-decode
	whether to decode the input
	-encode
	whether to encode the input

	Decode-Related Flags:

	-drange
	the "i:j" range to decompress, ":8" means the first 8 bytes

	Encode-Related Flags:

	-cchunksize
	the chunk size (in CSpace)
	-codec
	the compression codec (default "zlib")
	-cpagesize
	the page size (in CSpace)
	-dchunksize
	the chunk size (in DSpace)
	-indexlocation
	the index location, "start" or "end" (default "start")
	-resources
	comma-separated list of resource files, such as shared dictionaries
	*/
	package main

	import (
	"bytes"
	"errors"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"strconv"
	"strings"

	"github.com/google/wuffs/lib/rac"
	"github.com/google/wuffs/lib/raczlib"
	)

	// TODO: a flag to use a disk-backed (not memory-backed) TempFile.

	var (
	decodeFlag = flag.Bool("decode", false, "whether to decode the input")
	encodeFlag = flag.Bool("encode", false, "whether to encode the input")

	// Decode-related flags.
	drangeFlag = flag.String("drange", ":",
	"the \"i:j\" range to decompress, \":8\" means the first 8 bytes")

	// Encode-related flags.
	codecFlag = flag.String("codec", "zlib", "the compression codec")
	cpagesizeFlag = flag.String("cpagesize", "0", "the page size (in CSpace)")
	cchunksizeFlag = flag.String("cchunksize", "0", "the chunk size (in CSpace)")
	dchunksizeFlag = flag.String("dchunksize", "0", "the chunk size (in DSpace)")
	indexlocationFlag = flag.String("indexlocation", "start",
	"the index location, \"start\" or \"end\"")
	resourcesFlag = flag.String("resources", "",
	"comma-separated list of resource files, such as shared dictionaries")
	)

	func usage() {
	os.Stderr.WriteString(usageStr)
	}

	func main() {
	if err := main1(); err != nil {
	os.Stderr.WriteString(err.Error() + "\n")
	os.Exit(1)
	}
	}

	func main1() error {
	flag.Usage = usage
	flag.Parse()

	r, usingStdin := io.Reader(os.Stdin), true
	switch flag.NArg() {
	case 0:
	// No-op.
	case 1:
	f, err := os.Open(flag.Arg(0))
	if err != nil {
	return err
	}
	defer f.Close()
	r, usingStdin = f, false
	default:
	return errors.New("too many filenames; the maximum is one")
	}

	if decodeFlag && !encodeFlag {
	return decode(r, usingStdin)
	}
	if encodeFlag && !decodeFlag {
	return encode(r)
	}
	return errors.New("must specify exactly one of -decode or -encode")
	}

	// parseNumber converts strings like "3", "4k" and "0x50" to the integers 3,
	// 4096 and 48. It returns a negative value if and only if an error is
	// encountered.
	func parseNumber(s string) int64 {
	if s == "" {
	return -1
	}
	shift := uint32(0)
	switch n := len(s) - 1; s[n] {
	case 'k', 'K':
	shift, s = 10, s[:n]
	case 'm', 'M':
	shift, s = 20, s[:n]
	case 'g', 'G':
	shift, s = 30, s[:n]
	case 't', 'T':
	shift, s = 40, s[:n]
	case 'p', 'P':
	shift, s = 50, s[:n]
	case 'e', 'E':
	shift, s = 60, s[:n]
	}
	i, err := strconv.ParseInt(s, 0, 64)
	if (err != nil) \|\| (i < 0) {
	return -1
	}
	const int64Max = (1 << 63) - 1
	if i > (int64Max >> shift) {
	return -1
	}
	return i << shift
	}

	// parseRange parses a string like "1:23", returning i=1 and j=23. Either or
	// both numbers can be missing, in which case i and/or j will be negative, and
	// it is up to the caller to interpret that placeholder value meaningfully.
	func parseRange(s string) (i int64, j int64, ok bool) {
	n := strings.IndexByte(s, ':')
	if n < 0 {
	return 0, 0, false
	}

	if n == 0 {
	i = -1
	} else if i = parseNumber(s[:n]); i < 0 {
	return 0, 0, false
	}

	if n+1 >= len(s) {
	j = -1
	} else if j = parseNumber(s[n+1:]); j < 0 {
	return 0, 0, false
	}

	if (i >= 0) && (j >= 0) && (i > j) {
	return 0, 0, false
	}
	return i, j, true
	}

	func decode(r io.Reader, usingStdin bool) error {
	i, j, ok := parseRange(*drangeFlag)
	if !ok {
	return fmt.Errorf("invalid -drange")
	}

	rs, ok := r.(io.ReadSeeker)
	if !ok {
	return fmt.Errorf("input is not seekable")
	}

	// Even if the os.File type is a ReadSeeker, it might not actually support
	// seeking. Instead, read all of stdin into memory.
	if usingStdin {
	if input, err := ioutil.ReadAll(r); err != nil {
	return err
	} else {
	rs = bytes.NewReader(input)
	}
	}

	racReader := &rac.Reader{
	ReadSeeker: rs,
	}
	switch *codecFlag {
	case "zlib":
	racReader.CodecReaders = []rac.CodecReader{&raczlib.CodecReader{}}
	default:
	return errors.New("unsupported -codec")
	}

	decompressedSize, err := racReader.Seek(0, io.SeekEnd)
	if err != nil {
	return err
	}
	if i < 0 {
	i = 0
	}
	if (j < 0) \|\| (j > decompressedSize) {
	j = decompressedSize
	}
	if i >= j {
	return nil
	}
	if _, err := racReader.Seek(i, io.SeekStart); err != nil {
	return err
	}

	_, err = io.Copy(os.Stdout, &io.LimitedReader{
	R: racReader,
	N: j - i,
	})
	return err
	}

	func encode(r io.Reader) error {
	indexLocation := rac.IndexLocation(0)
	switch *indexlocationFlag {
	case "start":
	indexLocation = rac.IndexLocationAtStart
	case "end":
	indexLocation = rac.IndexLocationAtEnd
	default:
	return errors.New("invalid -indexlocation")
	}

	cchunksize := parseNumber(*cchunksizeFlag)
	if cchunksize < 0 {
	return errors.New("invalid -cchunksize")
	}
	cpagesize := parseNumber(*cpagesizeFlag)
	if cpagesize < 0 {
	return errors.New("invalid -cpagesize")
	}
	dchunksize := parseNumber(*dchunksizeFlag)
	if dchunksize < 0 {
	return errors.New("invalid -dchunksize")
	}

	if (cchunksize != 0) && (dchunksize != 0) {
	return errors.New("must specify none or one of -cchunksize or -dchunksize")
	} else if (cchunksize == 0) && (dchunksize == 0) {
	dchunksize = 65536 // 64 KiB.
	}

	w := &rac.Writer{
	Writer: os.Stdout,
	IndexLocation: indexLocation,
	TempFile: &bytes.Buffer{},
	CPageSize: uint64(cpagesize),
	CChunkSize: uint64(cchunksize),
	DChunkSize: uint64(dchunksize),
	}
	switch *codecFlag {
	case "zlib":
	w.CodecWriter = &raczlib.CodecWriter{}
	default:
	return errors.New("unsupported -codec")
	}

	if *resourcesFlag != "" {
	for _, filename := range strings.Split(*resourcesFlag, ",") {
	resource, err := ioutil.ReadFile(filename)
	if err != nil {
	return err
	}
	w.ResourcesData = append(w.ResourcesData, resource)
	}
	}

	if _, err := io.Copy(w, r); err != nil {
	return err
	}
	return w.Close()
	}