blob: f18b229939f65e9f9c99ab24f30dc72a57faa106 [file] [log] [blame]
// Copyright 2017 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build ignore
// +build ignore
package main
// extract-deflate-offsets.go extracts the start and end offsets of the
// deflate-compressed data wrapped in a .gz file.
//
// Usage: go run extract-deflate-offsets.go foo.gz bar.gz
//
// Alternatively: go run extract-deflate-offsets.go -write-deflate foo.gz
//
// Alternatively: go run extract-deflate-offsets.go -write-zlib foo.gz
import (
"bytes"
"compress/flate"
"compress/zlib"
"crypto/md5"
"flag"
"fmt"
"hash/adler32"
"io"
"os"
"strings"
)
var (
writeDeflate = flag.Bool("write-deflate", false, "whether to convert gzip to raw deflate")
writeZlib = flag.Bool("write-zlib", false, "whether to convert gzip to zlib")
)
// GZIP wraps a header and footer around deflate data. The format is described in
// RFC 1952: https://www.ietf.org/rfc/rfc1952.txt
const (
flagText = 1 << 0
flagHCRC = 1 << 1
flagExtra = 1 << 2
flagName = 1 << 3
flagComment = 1 << 4
)
func main() {
if err := main1(); err != nil {
os.Stderr.WriteString(err.Error() + "\n")
os.Exit(1)
}
}
func main1() error {
flag.Parse()
for _, a := range flag.Args() {
if err := decode(a); err != nil {
return err
}
}
return nil
}
func decode(filename string) error {
src, err := os.ReadFile(filename)
if err != nil {
return err
}
const (
headerSize = 10
footerSize = 8
)
if len(src) < headerSize+footerSize || src[0] != 0x1F || src[1] != 0x8B || src[2] != 0x08 {
return fmt.Errorf("not a GZIP")
}
if len(src) >= 0x10000000 {
return fmt.Errorf("file too large")
}
flags := src[3]
i := headerSize
src = src[:len(src)-footerSize]
if flags&flagExtra != 0 {
return fmt.Errorf("TODO: support gzip extra flag")
}
if flags&flagName != 0 {
if i, err = readString(src, i); err != nil {
return err
}
}
if flags&flagComment != 0 {
if i, err = readString(src, i); err != nil {
return err
}
}
if flags&flagHCRC != 0 {
return fmt.Errorf("TODO: support gzip HCRC flag")
}
// As a coherence check, the result should be valid deflate.
uncompressed, err := checkDeflate(src[i:])
if err != nil {
return err
}
if *writeDeflate {
return doWriteDeflate(src[i:], uncompressed, filename)
} else if *writeZlib {
return doWriteZlib(src[i:], uncompressed, filename)
}
fmt.Printf("%7d %7d %x %s\n", i, len(src), md5.Sum(uncompressed), filename)
return nil
}
func doWriteDeflate(deflateCompressed []byte, uncompressed []byte, filename string) error {
if strings.HasSuffix(filename, ".gz") {
filename = filename[:len(filename)-3]
}
filename += ".deflate"
if err := os.WriteFile(filename, deflateCompressed, 0666); err != nil {
return err
}
fmt.Printf("wrote %s\n", filename)
return nil
}
func doWriteZlib(deflateCompressed []byte, uncompressed []byte, filename string) error {
buf := bytes.NewBuffer(nil)
// The ZLIB header (as per https://www.ietf.org/rfc/rfc1950.txt) is 2
// bytes.
//
// The first byte's low 4 bits is the compression method: 8 means deflate.
// The first byte's high 4 bits is the compression info: 7 means a 32KiB
// deflate window size.
//
// The second byte's low 5 bits are a parity check. The 5th bit (0 in this
// case) indicates a preset dictionary. The high 2 bits (2 in this case)
// means the default compression algorithm.
buf.WriteString("\x78\x9c")
// Write the payload.
buf.Write(deflateCompressed)
// The ZLIB footer is 4 bytes: a big-endian checksum.
checksum := adler32.Checksum(uncompressed)
buf.WriteByte(uint8(checksum >> 24))
buf.WriteByte(uint8(checksum >> 16))
buf.WriteByte(uint8(checksum >> 8))
buf.WriteByte(uint8(checksum >> 0))
asZlib := buf.Bytes()
// As a coherence check, the result should be valid zlib.
if _, err := checkZlib(asZlib); err != nil {
return err
}
if strings.HasSuffix(filename, ".gz") {
filename = filename[:len(filename)-3]
}
filename += ".zlib"
if err := os.WriteFile(filename, asZlib, 0666); err != nil {
return err
}
fmt.Printf("wrote %s\n", filename)
return nil
}
func readString(src []byte, i int) (int, error) {
for {
if i >= len(src) {
return 0, fmt.Errorf("bad GZIP string")
}
if src[i] == 0 {
return i + 1, nil
}
i++
}
}
func checkDeflate(x []byte) ([]byte, error) {
rc := flate.NewReader(bytes.NewReader(x))
defer rc.Close()
x, err := io.ReadAll(rc)
if err != nil {
return nil, fmt.Errorf("data is not valid deflate: %v", err)
}
return x, nil
}
func checkZlib(x []byte) ([]byte, error) {
rc, err := zlib.NewReader(bytes.NewReader(x))
if err != nil {
return nil, fmt.Errorf("data is not valid zlib: %v", err)
}
defer rc.Close()
x, err = io.ReadAll(rc)
if err != nil {
return nil, fmt.Errorf("data is not valid zlib: %v", err)
}
return x, nil
}