Add {cmd,lib}/dumbindent
diff --git a/cmd/dumbindent/main.go b/cmd/dumbindent/main.go
new file mode 100644
index 0000000..e19b135
--- /dev/null
+++ b/cmd/dumbindent/main.go
@@ -0,0 +1,180 @@
+// Copyright 2020 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ----------------
+
+// dumbindent formats C (and C-like) programs.
+//
+// Without explicit paths, it rewrites the standard input to standard output.
+// Otherwise, the -l or -w or both flags must be given. Given a file path, it
+// operates on that file; given a directory path, it operates on all *.{c,h}
+// files in that directory, recursively. File paths starting with a period are
+// ignored.
+//
+// It is similar in concept to pretty-printers like `indent` or `clang-format`.
+// It is much dumber (it will not add line breaks or otherwise re-flow lines of
+// code, not to fit within an 80 character limit nor for any other reason) but
+// it can therefore be much, much faster at the basic task of automatically
+// indenting nested blocks. The output isn't 'perfect', but it's usually
+// sufficiently readable if the input already has sensible line breaks.
+//
+// There are no configuration options (e.g. tabs versus spaces).
+//
+// Known bug: it cannot handle /* slash-star comments */ or multi-line strings
+// yet. This is tracked at https://github.com/google/wuffs/issues/31
+package main
+
+import (
+ "bytes"
+ "errors"
+ "flag"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strings"
+
+ "github.com/google/wuffs/lib/dumbindent"
+)
+
+var (
+ lFlag = flag.Bool("l", false, "list files whose formatting differs from dumbindent's")
+ wFlag = flag.Bool("w", false, "write result to (source) file instead of stdout")
+)
+
+func usage() {
+ fmt.Fprintf(os.Stderr, "usage: dumbindent [flags] [path ...]\n")
+ flag.PrintDefaults()
+}
+
+func main() {
+ if err := main1(); err != nil {
+ os.Stderr.WriteString(err.Error() + "\n")
+ os.Exit(1)
+ }
+}
+
+func main1() error {
+ flag.Usage = usage
+ flag.Parse()
+
+ if flag.NArg() == 0 {
+ if *lFlag {
+ return errors.New("cannot use -l with standard input")
+ }
+ if *wFlag {
+ return errors.New("cannot use -w with standard input")
+ }
+ return do(os.Stdin, "<standard input>")
+ }
+
+ if !*lFlag && !*wFlag {
+ return errors.New("must use -l or -w if paths are given")
+ }
+
+ for i := 0; i < flag.NArg(); i++ {
+ arg := flag.Arg(i)
+ switch dir, err := os.Stat(arg); {
+ case err != nil:
+ return err
+ case dir.IsDir():
+ return filepath.Walk(arg, walk)
+ default:
+ if err := do(nil, arg); err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func isCHFile(info os.FileInfo) bool {
+ name := info.Name()
+ return !info.IsDir() && !strings.HasPrefix(name, ".") &&
+ (strings.HasSuffix(name, ".c") || strings.HasSuffix(name, ".h"))
+}
+
+func walk(filename string, info os.FileInfo, err error) error {
+ if (err == nil) && isCHFile(info) {
+ err = do(nil, filename)
+ }
+ // Don't complain if a file was deleted in the meantime (i.e. the directory
+ // changed concurrently while running this program).
+ if (err != nil) && !os.IsNotExist(err) {
+ return err
+ }
+ return nil
+}
+
+func do(r io.Reader, filename string) error {
+ src, err := []byte(nil), error(nil)
+ if r != nil {
+ src, err = ioutil.ReadAll(r)
+ } else {
+ src, err = ioutil.ReadFile(filename)
+ }
+ if err != nil {
+ return err
+ }
+
+ dst, err := dumbindent.Format(src)
+ if err != nil {
+ return err
+ }
+
+ if r != nil {
+ if _, err := os.Stdout.Write(dst); err != nil {
+ return err
+ }
+ } else if !bytes.Equal(dst, src) {
+ if *lFlag {
+ fmt.Println(filename)
+ }
+ if *wFlag {
+ if err := writeFile(filename, dst); err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+const chmodSupported = runtime.GOOS != "windows"
+
+func writeFile(filename string, b []byte) error {
+ f, err := ioutil.TempFile(filepath.Dir(filename), filepath.Base(filename))
+ if err != nil {
+ return err
+ }
+ if chmodSupported {
+ if info, err := os.Stat(filename); err == nil {
+ f.Chmod(info.Mode().Perm())
+ }
+ }
+ _, werr := f.Write(b)
+ cerr := f.Close()
+ if werr != nil {
+ os.Remove(f.Name())
+ return werr
+ }
+ if cerr != nil {
+ os.Remove(f.Name())
+ return cerr
+ }
+ return os.Rename(f.Name(), filename)
+}
diff --git a/internal/cgen/cgen.go b/internal/cgen/cgen.go
index 3c65529..0c51f32 100644
--- a/internal/cgen/cgen.go
+++ b/internal/cgen/cgen.go
@@ -30,6 +30,7 @@
"github.com/google/wuffs/lang/builtin"
"github.com/google/wuffs/lang/generate"
+ "github.com/google/wuffs/lib/dumbindent"
cf "github.com/google/wuffs/cmd/commonflags"
@@ -200,6 +201,8 @@
// C code, so further C formatting is unnecessary (and not costless).
if (*cformatterFlag == "") || (pkgName == "base") {
return unformatted, nil
+ } else if *cformatterFlag == "dumbindent" {
+ return dumbindent.Format(unformatted)
}
stdout := &bytes.Buffer{}
diff --git a/lib/dumbindent/dumbindent.go b/lib/dumbindent/dumbindent.go
new file mode 100644
index 0000000..0c2b01f
--- /dev/null
+++ b/lib/dumbindent/dumbindent.go
@@ -0,0 +1,225 @@
+// Copyright 2020 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ----------------
+
+// Package dumbindent formats C (and C-like) programs.
+//
+// It is similar in concept to pretty-printers like `indent` or `clang-format`.
+// It is much dumber (it will not add line breaks or otherwise re-flow lines of
+// code, not to fit within an 80 character limit nor for any other reason) but
+// it can therefore be much, much faster at the basic task of automatically
+// indenting nested blocks. The output isn't 'perfect', but it's usually
+// sufficiently readable if the input already has sensible line breaks.
+//
+// There are no configuration options (e.g. tabs versus spaces).
+//
+// Known bug: it cannot handle /* slash-star comments */ or multi-line strings
+// yet. This is tracked at https://github.com/google/wuffs/issues/31
+package dumbindent
+
+import (
+ "bytes"
+ "errors"
+)
+
+// 'Constants', but their type is []byte, not string.
+var (
+ externC = []byte("extern \"C\"")
+ spaces = []byte(" ")
+)
+
+// Global state.
+var (
+ nBraces int // The number of unbalanced '{'s.
+ nParens int // The number of unbalanced '('s.
+ hangingEq bool // Whether the previous line ends with '='.
+)
+
+// Format formats the C (or C-like) program in src.
+func Format(src []byte) (dst []byte, retErr error) {
+ dst = make([]byte, 0, len(src)+(len(src)/2))
+ blankLine := false
+ for line, remaining := src, []byte(nil); len(src) > 0; src = remaining {
+ line, remaining = src, nil
+ if i := bytes.IndexByte(line, '\n'); i >= 0 {
+ line, remaining = line[:i], line[i+1:]
+ }
+ line = trimSpace(line)
+
+ // Collapse 2 or more consecutive blank lines into 1. Also strip any
+ // trailing blank lines.
+ if len(line) == 0 {
+ blankLine = true
+ continue
+ }
+ if blankLine {
+ blankLine = false
+ dst = append(dst, '\n')
+ }
+
+ // Preprocessor lines (#ifdef, #pragma, etc) are never indented.
+ if (line[0] == '#') ||
+ ((line[0] == 'e') && bytes.HasPrefix(line, externC)) ||
+ ((line[0] == '}') && bytes.HasSuffix(line, externC)) {
+ dst = append(dst, line...)
+ dst = append(dst, '\n')
+ continue
+ }
+
+ // Account for leading '}'s before we print the line's indentation.
+ closeBraces := 0
+ for ; (closeBraces < len(line)) && line[closeBraces] == '}'; closeBraces++ {
+ }
+ nBraces -= closeBraces
+
+ // When debugging, uncomment these (and import "strconv") to prefix
+ // every non-blank line with the global state.
+ //
+ // dst = append(dst, strconv.Itoa(nBraces)...)
+ // dst = append(dst, ',')
+ // dst = append(dst, strconv.Itoa(nParens)...)
+ // if hangingEq {
+ // dst = append(dst, '=')
+ // } else {
+ // dst = append(dst, ':')
+ // }
+
+ // Output a certain number of spaces to rougly approximate the
+ // "clang-format -style=Chromium" indentation style.
+ indent := 0
+ if nBraces > 0 {
+ indent += 2 * nBraces
+ }
+ if (nParens > 0) || hangingEq {
+ indent += 4
+ }
+ if (indent >= 2) && isLabel(line) {
+ indent -= 2
+ }
+ for indent > 0 {
+ n := indent
+ if n > len(spaces) {
+ n = len(spaces)
+ }
+ dst = append(dst, spaces[:n]...)
+ indent -= n
+ }
+ hangingEq = false
+
+ // Output the line itself.
+ dst = append(dst, line...)
+ dst = append(dst, "\n"...)
+
+ // Adjust the global state according to the braces and parentheses
+ // within the line (except for those in comments and strings).
+ last := lastNonWhiteSpace(line)
+ loop:
+ for s := line[closeBraces:]; ; {
+ for i, c := range s {
+ switch c {
+ case '{':
+ nBraces++
+ case '}':
+ nBraces--
+ case '(':
+ nParens++
+ case ')':
+ nParens--
+ case '/':
+ if (i + 1) >= len(s) {
+ break
+ }
+ if s[i+1] == '/' {
+ // A slash-slash comment. Skip the rest of the line.
+ last = lastNonWhiteSpace(s[:i])
+ break loop
+ } else if s[i+1] == '*' {
+ return nil, errors.New("dumbindent: TODO: support slash-star comments")
+ }
+ case '"', '\'':
+ if suffix, err := skipString(s[i+1:], c); err != nil {
+ return nil, err
+ } else {
+ s = suffix
+ }
+ continue loop
+ }
+ }
+ break loop
+ }
+ hangingEq = last == '='
+ }
+ return dst, nil
+}
+
+// trimSpace converts "\t foo bar " to "foo bar".
+func trimSpace(s []byte) []byte {
+ for (len(s) > 0) && ((s[0] == ' ') || (s[0] == '\t')) {
+ s = s[1:]
+ }
+ for (len(s) > 0) && ((s[len(s)-1] == ' ') || (s[len(s)-1] == '\t')) {
+ s = s[:len(s)-1]
+ }
+ return s
+}
+
+// isLabel returns whether s looks like "foo:" or "bar_baz:;".
+func isLabel(s []byte) bool {
+ for (len(s) > 0) && (s[len(s)-1] == ';') {
+ s = s[:len(s)-1]
+ }
+ if (len(s) < 2) || (s[len(s)-1] != ':') {
+ return false
+ }
+ s = s[:len(s)-1]
+ for _, c := range s {
+ switch {
+ case ('0' <= c) && (c <= '9'):
+ case ('A' <= c) && (c <= 'Z'):
+ case ('a' <= c) && (c <= 'z'):
+ case c == '_':
+ default:
+ return false
+ }
+ }
+ return true
+}
+
+// lastNonWhiteSpace returns the 'z' in "abc xyz ". It returns '\x00' if s
+// consists entirely of spaces or tabs.
+func lastNonWhiteSpace(s []byte) byte {
+ for i := len(s) - 1; i >= 0; i-- {
+ if x := s[i]; (x != ' ') && (x != '\t') {
+ return x
+ }
+ }
+ return 0
+}
+
+// skipString converts `ijk \" lmn" pqr` to ` pqr`.
+func skipString(s []byte, quote byte) (suffix []byte, retErr error) {
+ for i := 0; i < len(s); {
+ if x := s[i]; x == quote {
+ return s[i+1:], nil
+ } else if x != '\\' {
+ i += 1
+ } else if (i + 1) < len(s) {
+ i += 2
+ } else {
+ break
+ }
+ }
+ return nil, errors.New("dumbindent: TODO: support multi-line strings")
+}