blob: 544966a052f9fccaf1ac9c856e41907180fe4984 [file] [log] [blame]
// Copyright 2017 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//go:build ignore
// +build ignore
package main
// extract-giflzw.go extracts or analyzes the LZW-compressed block data in a
// GIF image.
//
// - Add the "-extract" flag to write the LZW data to a file. The initial
// byte in each written file is the LZW literal width.
//
// - Add the "-histogram" flag to print emission length histograms instead of
// writing out *.indexes.giflzw files.
//
// Add the optional "-allframes" flag to process all frames of an animated GIF,
// not just the first frame. It applies to both -extract and -histogram.
//
// Usage: go run extract-giflzw.go -extract foo.gif bar.gif
// go run extract-giflzw.go -histogram -allframes foo.gif bar.gif
import (
"bytes"
"compress/lzw"
"flag"
"fmt"
"io"
"os"
"path/filepath"
)
var (
allframesFlag = flag.Bool("allframes", false, "process all frames, not just the first")
extractFlag = flag.Bool("extract", false, "write LZW data to a file")
histogramFlag = flag.Bool("histogram", false, "print emission length histograms")
)
func main() {
if err := main1(); err != nil {
os.Stderr.WriteString(err.Error() + "\n")
os.Exit(1)
}
}
func main1() error {
flag.Parse()
if !*extractFlag && !*histogramFlag {
return fmt.Errorf("none of -extract or -histogram given")
}
for argIndex, arg := range flag.Args() {
frames, err := extractLZWFrames(arg)
if err != nil {
return err
}
if *histogramFlag {
which := "first frame"
if *allframesFlag {
which = "all frames"
}
fmt.Printf("\n%s (%s)\n", arg, which)
histogramTotal = 0
for i := range histogram {
histogram[i] = 0
}
}
for frameIndex, frame := range frames {
if err := checkLZW(frame); err != nil {
fmt.Printf("Warning: frame #%d: %v\n", frameIndex, err)
}
if *extractFlag {
filename := arg[:len(arg)-len(filepath.Ext(arg))]
if *allframesFlag {
filename = fmt.Sprintf("%s-frame-%03d", filename, argIndex)
}
filename += ".indexes.giflzw"
if err := os.WriteFile(filename, frame, 0644); err != nil {
return err
}
}
if *histogramFlag {
// Ignore any LZW format errors. Decode as much as we can.
_ = buildHistogram(frame)
}
if !*allframesFlag {
break
}
}
if *histogramFlag {
fmt.Printf(" Percent Count Emission_Length_Bucket\n")
for i, h := range histogram {
percent := float64(100*h) / float64(histogramTotal)
fmt.Printf(" %6.2f %6d %s\n", percent, h, bucketNames[i])
}
}
}
return nil
}
func extractLZWFrames(filename string) (ret [][]byte, err error) {
src, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
// Read the header (6 bytes) and screen descriptor (7 bytes).
if len(src) < 6+7 {
return nil, fmt.Errorf("not a GIF")
}
switch string(src[:6]) {
case "GIF87a", "GIF89a":
// No-op.
default:
return nil, fmt.Errorf("not a GIF")
}
ctSize := 0
if src[10]&fColorTable != 0 {
ctSize = 1 << (1 + src[10]&fColorTableBitsMask)
}
if src, err = skipColorTable(src[13:], ctSize); err != nil {
return nil, err
}
for len(src) > 0 {
switch src[0] {
case sExtension:
if src, err = skipExtension(src[1:]); err != nil {
return nil, err
}
case sImageDescriptor:
compressed := []byte(nil)
if src, compressed, err = readImageDescriptor(src[1:]); err != nil {
return nil, err
}
ret = append(ret, compressed)
case sTrailer:
if len(src) != 1 {
return nil, fmt.Errorf("extraneous data after GIF trailer section")
}
return ret, nil
default:
return nil, fmt.Errorf("unsupported GIF section")
}
}
return nil, fmt.Errorf("missing GIF trailer section")
}
func skipColorTable(src []byte, ctSize int) (src1 []byte, err error) {
if ctSize == 0 {
return src, nil
}
n := 3 * ctSize
if len(src) < n {
return nil, fmt.Errorf("short color table")
}
return src[n:], nil
}
func skipExtension(src []byte) (src1 []byte, err error) {
if len(src) < 2 {
return nil, fmt.Errorf("bad GIF extension")
}
ext := src[0]
blockSize := int(src[1])
if len(src) < 2+blockSize {
return nil, fmt.Errorf("bad GIF extension")
}
src = src[2+blockSize:]
switch ext {
case ePlainText, eGraphicControl, eComment, eApplication:
src1, _, err = readBlockData(src, nil)
return src1, err
}
return nil, fmt.Errorf("unsupported GIF extension")
}
func readImageDescriptor(src []byte) (src1 []byte, ret1 []byte, err error) {
if len(src) < 9 {
return nil, nil, fmt.Errorf("bad GIF image descriptor")
}
ctSize := 0
if src[8]&fColorTable != 0 {
ctSize = 1 << (1 + src[8]&fColorTableBitsMask)
}
if src, err = skipColorTable(src[9:], ctSize); err != nil {
return nil, nil, err
}
if len(src) < 1 {
return nil, nil, fmt.Errorf("bad GIF image descriptor")
}
literalWidth := src[0]
if literalWidth < 2 || 8 < literalWidth {
return nil, nil, fmt.Errorf("bad GIF literal width")
}
return readBlockData(src[1:], []byte{literalWidth})
}
func readBlockData(src []byte, ret []byte) (src1 []byte, ret1 []byte, err error) {
for {
if len(src) < 1 {
return nil, nil, fmt.Errorf("bad GIF block")
}
n := int(src[0]) + 1
if len(src) < n {
return nil, nil, fmt.Errorf("bad GIF block")
}
ret = append(ret, src[1:n]...)
src = src[n:]
if n == 1 {
return src, ret, nil
}
}
}
func checkLZW(data []byte) error {
if len(data) == 0 {
return fmt.Errorf("missing GIF literal width")
}
rc := lzw.NewReader(bytes.NewReader(data[1:]), lzw.LSB, int(data[0]))
defer rc.Close()
_, err := io.ReadAll(rc)
if err != nil {
return fmt.Errorf("block data is not valid LZW: %v", err)
}
return nil
}
const numBuckets = 14
var (
histogramTotal = uint32(0)
histogram = [numBuckets]uint32{}
bucketNames = [numBuckets]string{
" 1 (Literal Code)",
" 2",
" 3",
" 4",
" 5",
" 6",
" 7",
" 8",
" 9 .. 16",
" 17 .. 32",
" 33 .. 64",
" 65 .. 128",
"127 .. 256",
"256+ ",
}
)
func bucket(emissionLength uint32) uint32 {
switch {
case emissionLength <= 0:
panic("bad emissionLength")
case emissionLength <= 8:
return emissionLength - 1
case emissionLength <= 16:
return 8
case emissionLength <= 32:
return 9
case emissionLength <= 64:
return 10
case emissionLength <= 128:
return 11
case emissionLength <= 256:
return 12
default:
return 13
}
}
func buildHistogram(data []byte) error {
if len(data) == 0 {
return fmt.Errorf("missing GIF literal width")
}
literalWidth, data := uint32(data[0]), data[1:]
clearCode := uint32(1) << literalWidth
endCode := clearCode + 1
saveCode := endCode
prevCode := uint32(0)
width := literalWidth + 1
prefixes := [4096]uint32{}
emissionLengths := [4096]uint32{}
for i := uint32(0); i < clearCode; i++ {
emissionLengths[i] = 1
}
bits := uint32(0)
nBits := uint32(0)
for {
for nBits < width {
if len(data) == 0 {
return fmt.Errorf("bad LZW data: short read")
}
bits |= uint32(data[0]) << nBits
data = data[1:]
nBits += 8
}
code := bits & ((uint32(1) << width) - 1)
bits >>= width
nBits -= width
if code < clearCode {
// Literal, emitting exactly 1 byte.
} else if code == clearCode {
saveCode = endCode
prevCode = 0
width = literalWidth + 1
continue
} else if code == endCode {
break
} else if code <= saveCode {
// Copy, emitting 2 or more bytes.
} else {
return fmt.Errorf("bad LZW data: code out of range")
}
n := emissionLengths[code]
if code == saveCode {
n = emissionLengths[prevCode] + 1
}
histogram[bucket(n)]++
histogramTotal++
if saveCode <= 4095 {
prefixes[saveCode] = prevCode
emissionLengths[saveCode] = emissionLengths[prevCode] + 1
saveCode++
if (saveCode == (1 << width)) && (width < 12) {
width++
}
prevCode = code
}
}
return nil
}
// The constants below are intrinsic to the GIF file format. The spec is
// http://www.w3.org/Graphics/GIF/spec-gif89a.txt
const (
// Flags.
fColorTableBitsMask = 0x07
fColorTable = 0x80
// Sections.
sExtension = 0x21
sImageDescriptor = 0x2C
sTrailer = 0x3B
// Extensions.
ePlainText = 0x01
eGraphicControl = 0xF9
eComment = 0xFE
eApplication = 0xFF
)