Add test/data/artificial-bzip2/abraca.txt.bz2
diff --git a/script/make-artificial.go b/script/make-artificial.go
index 24a4fcd..fe41d8c 100644
--- a/script/make-artificial.go
+++ b/script/make-artificial.go
@@ -88,7 +88,7 @@
const rep = "repeat "
if strings.HasPrefix(s, rep) {
args := s[len(rep):]
- count, args, ok := parseNum(args)
+ count, args, ok := parseNum32(args)
if !ok || count <= 0 || args != "[" {
return fmt.Errorf("bad repeat command: %q", s)
}
@@ -157,7 +157,7 @@
return -1
}
-func parseHex(s string) (num uint32, remaining string, ok bool) {
+func parseHex32(s string) (num uint32, remaining string, ok bool) {
if i := strings.IndexByte(s, ' '); i >= 0 {
s, remaining = s[:i], s[i+1:]
for len(remaining) > 0 && remaining[0] == ' ' {
@@ -177,7 +177,27 @@
return uint32(u), remaining, true
}
-func parseNum(s string) (num uint32, remaining string, ok bool) {
+func parseHex64(s string) (num uint64, remaining string, ok bool) {
+ if i := strings.IndexByte(s, ' '); i >= 0 {
+ s, remaining = s[:i], s[i+1:]
+ for len(remaining) > 0 && remaining[0] == ' ' {
+ remaining = remaining[1:]
+ }
+ }
+
+ if len(s) < 2 || s[0] != '0' || s[1] != 'x' {
+ return 0, "", false
+ }
+ s = s[2:]
+
+ u, err := strconv.ParseUint(s, 16, 64)
+ if err != nil {
+ return 0, "", false
+ }
+ return u, remaining, true
+}
+
+func parseNum32(s string) (num uint32, remaining string, ok bool) {
if i := strings.IndexByte(s, ' '); i >= 0 {
s, remaining = s[:i], s[i+1:]
for len(remaining) > 0 && remaining[0] == ' ' {
@@ -238,6 +258,70 @@
// ----
func init() {
+ formats["bzip2"] = stateBzip2
+}
+
+var bzip2Globals struct {
+ stream bzip2BitStream
+}
+
+func stateBzip2(line string) (stateFunc, error) {
+ g := &bzip2Globals
+ const (
+ cmdB = "bits "
+ )
+ switch {
+ case line == "":
+ g.stream.flush()
+ return stateBzip2, nil
+
+ case strings.HasPrefix(line, cmdB):
+ s := strings.TrimSpace(line[len(cmdB):])
+ n, s, ok := parseNum32(s)
+ if !ok || s == "" {
+ break
+ }
+ x, s, ok := parseHex64(s)
+ if !ok {
+ break
+ }
+ g.stream.writeBits64(x, n)
+ return stateBzip2, nil
+ }
+
+ return nil, fmt.Errorf("bad stateBzip2 command: %q", line)
+}
+
+type bzip2BitStream struct {
+ bits uint64
+ nBits uint32 // Always within [0, 7].
+}
+
+// writeBits64 writes the low n bits of b to z.
+func (z *bzip2BitStream) writeBits64(b uint64, n uint32) {
+ if n > 56 {
+ panic("writeBits64: n is too large")
+ }
+ z.bits |= b << (64 - n - z.nBits)
+ z.nBits += n
+ for z.nBits >= 8 {
+ out = append(out, uint8(z.bits>>56))
+ z.bits <<= 8
+ z.nBits -= 8
+ }
+}
+
+func (z *bzip2BitStream) flush() {
+ if z.nBits > 0 {
+ out = append(out, uint8(z.bits>>56))
+ z.bits = 0
+ z.nBits = 0
+ }
+}
+
+// ----
+
+func init() {
formats["deflate"] = stateDeflate
}
@@ -529,7 +613,7 @@
s := line[len(cmdB):]
for s != "" {
x, ok := uint32(0), false
- x, s, ok = parseHex(s)
+ x, s, ok = parseHex32(s)
if !ok {
return nil, fmt.Errorf("bad stateDeflate command: %q", line)
}
@@ -728,7 +812,7 @@
default:
s := line
- n, s, ok := parseNum(s)
+ n, s, ok := parseNum32(s)
if !ok || s == "" {
break
}
@@ -743,7 +827,7 @@
if g.etcetera {
g.etcetera = false
- n0, s0, ok := parseNum(g.prevLine)
+ n0, s0, ok := parseNum32(g.prevLine)
if !ok {
return nil, fmt.Errorf("bad etcetera command")
}
@@ -965,10 +1049,10 @@
s := line
if strings.HasPrefix(s, lStr) {
s = s[len(lStr):]
- if l, s, ok := parseNum(s); ok && 3 <= l && l <= 258 {
+ if l, s, ok := parseNum32(s); ok && 3 <= l && l <= 258 {
if strings.HasPrefix(s, dStr) {
s = s[len(dStr):]
- if d, s, ok := parseNum(s); ok && 1 <= d && d <= 32768 && s == "" {
+ if d, s, ok := parseNum32(s); ok && 1 <= d && d <= 32768 && s == "" {
return l, d, true
}
}
@@ -1035,7 +1119,7 @@
s := line[len(cmdB):]
for s != "" {
x, ok := uint32(0), false
- x, s, ok = parseHex(s)
+ x, s, ok = parseHex32(s)
if !ok {
break outer
}
@@ -1076,7 +1160,7 @@
flags |= 0x01
transparentIndex = uint8(num)
case strings.HasSuffix(term, ms):
- num, remaining, ok := parseNum(term[:len(term)-len(ms)])
+ num, remaining, ok := parseNum32(term[:len(term)-len(ms)])
if !ok || remaining != "" {
break outer
}
@@ -1098,7 +1182,7 @@
case strings.HasPrefix(line, cmdL):
s := line[len(cmdL):]
- litWidth, s, ok := parseNum(s)
+ litWidth, s, ok := parseNum32(s)
if !ok || litWidth < 2 || 8 < litWidth {
break
}
@@ -1107,7 +1191,7 @@
uncompressed := []byte(nil)
for s != "" {
x := uint32(0)
- x, s, ok = parseHex(s)
+ x, s, ok = parseHex32(s)
if !ok {
break outer
}
@@ -1140,7 +1224,7 @@
case strings.HasPrefix(line, cmdLC):
s := line[len(cmdLC):]
- loopCount, _, ok := parseNum(s)
+ loopCount, _, ok := parseNum32(s)
if !ok || 0xFFFF < loopCount {
break
}
@@ -1191,15 +1275,15 @@
switch {
case strings.HasPrefix(line, cmdBCI):
s := line[len(cmdBCI):]
- if i, _, ok := parseNum(s); ok {
+ if i, _, ok := parseNum32(s); ok {
g.imageBackgroundColorIndex = i
}
return stateGifImage, nil
case strings.HasPrefix(line, cmdIWH):
s := line[len(cmdIWH):]
- if w, s, ok := parseNum(s); ok {
- if h, _, ok := parseNum(s); ok {
+ if w, s, ok := parseNum32(s); ok {
+ if h, _, ok := parseNum32(s); ok {
g.imageWidth = w
g.imageHeight = h
return stateGifImage, nil
@@ -1248,10 +1332,10 @@
case strings.HasPrefix(line, cmdFLTWH):
s := line[len(cmdFLTWH):]
- if l, s, ok := parseNum(s); ok {
- if t, s, ok := parseNum(s); ok {
- if w, s, ok := parseNum(s); ok {
- if h, _, ok := parseNum(s); ok {
+ if l, s, ok := parseNum32(s); ok {
+ if t, s, ok := parseNum32(s); ok {
+ if w, s, ok := parseNum32(s); ok {
+ if h, _, ok := parseNum32(s); ok {
g.frameLeft = l
g.frameTop = t
g.frameWidth = w
@@ -1276,9 +1360,9 @@
}
s := line
- if rgb0, s, ok := parseHex(s); ok {
- if rgb1, s, ok := parseHex(s); ok {
- if rgb2, _, ok := parseHex(s); ok {
+ if rgb0, s, ok := parseHex32(s); ok {
+ if rgb1, s, ok := parseHex32(s); ok {
+ if rgb2, _, ok := parseHex32(s); ok {
g.globalPalette = append(g.globalPalette,
[4]uint8{uint8(rgb0), uint8(rgb1), uint8(rgb2), 0xFF})
return stateGifImagePalette, nil
@@ -1296,9 +1380,9 @@
}
s := line
- if rgb0, s, ok := parseHex(s); ok {
- if rgb1, s, ok := parseHex(s); ok {
- if rgb2, _, ok := parseHex(s); ok {
+ if rgb0, s, ok := parseHex32(s); ok {
+ if rgb1, s, ok := parseHex32(s); ok {
+ if rgb2, _, ok := parseHex32(s); ok {
g.localPalette = append(g.localPalette,
[4]uint8{uint8(rgb0), uint8(rgb1), uint8(rgb2), 0xFF})
return stateGifFramePalette, nil
@@ -1381,7 +1465,7 @@
}
for s := line; s != ""; {
- if x, remaining, ok := parseHex(s); ok {
+ if x, remaining, ok := parseHex32(s); ok {
g.chunkData.WriteByte(byte(x))
s = remaining
} else {
@@ -1401,7 +1485,7 @@
}
for s := line; s != ""; {
- if x, remaining, ok := parseHex(s); ok {
+ if x, remaining, ok := parseHex32(s); ok {
g.scratch[0] = byte(x)
g.zlibWriter.Write(g.scratch[:1])
s = remaining
diff --git a/test/data/artificial-bzip2/abraca.txt.bz2 b/test/data/artificial-bzip2/abraca.txt.bz2
new file mode 100644
index 0000000..c4931a7
--- /dev/null
+++ b/test/data/artificial-bzip2/abraca.txt.bz2
Binary files differ
diff --git a/test/data/artificial-bzip2/abraca.txt.bz2.make-artificial.txt b/test/data/artificial-bzip2/abraca.txt.bz2.make-artificial.txt
new file mode 100644
index 0000000..abfac12
--- /dev/null
+++ b/test/data/artificial-bzip2/abraca.txt.bz2.make-artificial.txt
@@ -0,0 +1,48 @@
+# Feed this file to script/make-artificial.go
+
+# This script generates test/data/abraca.txt.bz2 exactly, following the
+# description in std/bzip2/README.md and its sections (a) ..= (s).
+#
+# By itself, this isn't a very interesting foo.make-artificial.txt file, in
+# that it doesn't make *artificial* bzip2 test data (as opposed to test data
+# *naturally* created by the /usr/bin/bzip2 tool). But this file is a useful
+# diff-base for other bar.make-artificial.txt files in the same directory.
+
+make bzip2
+
+# Sections (a) ..= (g)
+bits 16 0x425A
+bits 8 0x68
+bits 8 0x39
+bits 48 0x314159265359
+bits 32 0x76A70995
+bits 1 0x0
+bits 24 0x000001
+
+# Sections (h) ..= (j)
+bits 16 0x0300
+bits 16 0x7000
+bits 16 0x2000
+
+# Sections (k) ..= (m)
+bits 3 0x2
+bits 15 0x0001
+bits 1 0x0
+
+# Sections (n) ..= (o)
+bits 19 0x0CD34
+bits 19 0x0CD34
+
+# Section (p)
+bits 3 0x6
+bits 2 0x0
+bits 2 0x1
+bits 2 0x0
+bits 3 0x4
+bits 2 0x1
+bits 3 0x7
+
+# Sections (q) ..= (s)
+bits 48 0x177245385090
+bits 32 0x76A70995
+bits 5 0x00