blob: 5b3fd366454c13b415d38cd8965601b5d3505d82 [file] [log] [blame]
// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------
// Package dumbindent formats C (and C-like) programs.
//
// It is similar in concept to pretty-printers like `indent` or `clang-format`.
// It is much dumber (it will not add line breaks or otherwise re-flow lines of
// code just to fit within an 80 character limit) but it can therefore be much,
// much faster at the basic task of automatically indenting nested blocks. The
// output isn't 'perfect', but it's usually sufficiently readable if the input
// already has sensible line breaks.
//
// See `cmd/dumbindent/main.go` in this repository for an example where
// `dumbindent` was 80 times faster than `clang-format`.
//
// There are no configuration options (e.g. tabs versus spaces).
package dumbindent
import (
"bytes"
)
// 'Constants', but their type is []byte, not string.
var (
backTick = []byte("`")
externC = []byte("extern \"C\"")
spaces = []byte(" ")
starSlash = []byte("*/")
)
// hangingBytes is a look-up table for updating the hanging variable.
var hangingBytes = [256]bool{
'=': true,
'\\': true,
}
// FormatBytes formats the C (or C-like) program in src, appending the result
// to dst, and returns that longer slice.
//
// It is valid to pass a dst slice (such as nil) whose unused capacity
// (cap(dst) - len(dst)) is too short to hold the formatted program. In this
// case, a new slice will be allocated and returned.
func FormatBytes(dst []byte, src []byte) []byte {
src = trimLeadingWhiteSpaceAndNewLines(src)
if len(src) == 0 {
return dst
} else if len(dst) == 0 {
dst = make([]byte, 0, len(src)+(len(src)/2))
}
nBraces := 0 // The number of unbalanced '{'s.
nParens := 0 // The number of unbalanced '('s.
openBrace := false // Whether the previous non-blank line ends with '{'.
hanging := false // Whether the previous non-blank line ends with '=' or '\\'.
blankLine := false // Whether the previous line was blank.
outer:
for line, remaining := src, []byte(nil); len(src) > 0; src = remaining {
src = trimLeadingWhiteSpace(src)
line, remaining = src, nil
if i := bytes.IndexByte(line, '\n'); i >= 0 {
line, remaining = line[:i], line[i+1:]
}
lineLength := len(line)
// Collapse 2 or more consecutive blank lines into 1. Also strip any
// blank lines:
// - immediately after a '{',
// - immediately before a '}',
// - at the end of file.
if len(line) == 0 {
blankLine = true
continue
}
if blankLine {
blankLine = false
if !openBrace && (line[0] != '}') {
dst = append(dst, '\n')
}
}
// Preprocessor lines (#ifdef, #pragma, etc) are never indented.
//
// The '{' and '}' for an `extern "C"` are also special cased not to
// change indentation inside the block. This assumes that the closing
// brace is followed by a `// extern "C"` comment.
if (line[0] == '#') ||
((line[0] == 'e') && bytes.HasPrefix(line, externC)) ||
((line[0] == '}') && bytes.HasSuffix(line, externC)) {
line = trimTrailingWhiteSpace(line)
dst = append(dst, line...)
dst = append(dst, '\n')
openBrace = false
hanging = lastNonWhiteSpace(line) == '\\'
continue
}
// Account for leading '}'s before we print the line's indentation.
closeBraces := 0
for ; (closeBraces < len(line)) && line[closeBraces] == '}'; closeBraces++ {
}
nBraces -= closeBraces
// Output a certain number of spaces to rougly approximate the
// "clang-format -style=Chromium" indentation style.
indent := 0
if nBraces > 0 {
indent += 2 * nBraces
}
if (nParens > 0) || hanging {
indent += 4
}
for indent > 0 {
n := indent
if n > len(spaces) {
n = len(spaces)
}
dst = append(dst, spaces[:n]...)
indent -= n
}
// Output the leading '}'s.
dst = append(dst, line[:closeBraces]...)
line = line[closeBraces:]
// Adjust the state according to the braces and parentheses within the
// line (except for those in comments and strings).
last := lastNonWhiteSpace(line)
inner:
for {
for i, c := range line {
switch c {
case '{':
nBraces++
if l := lastNonWhiteSpace(line[:i]); (l != '=') && (l != ':') {
if breakAfterBrace(line[i+1:]) {
dst = append(dst, line[:i+1]...)
dst = append(dst, '\n')
restOfLine := line[i+1:]
remaining = src[lineLength-len(restOfLine):]
openBrace = true
hanging = false
continue outer
}
}
case '}':
nBraces--
case '(':
nParens++
case ')':
nParens--
case ';':
if (nParens == 0) && (breakAfterSemicolon(line[i+1:])) {
dst = append(dst, line[:i+1]...)
dst = append(dst, '\n')
restOfLine := line[i+1:]
remaining = src[lineLength-len(restOfLine):]
openBrace = false
hanging = false
continue outer
}
case '/':
if (i + 1) >= len(line) {
break
}
if line[i+1] == '/' {
// A slash-slash comment. Skip the rest of the line.
last = lastNonWhiteSpace(line[:i])
break inner
} else if line[i+1] == '*' {
// A slash-star comment.
dst = append(dst, line[:i+2]...)
restOfLine := line[i+2:]
restOfSrc := src[lineLength-len(restOfLine):]
dst, line, remaining = handleRaw(dst, restOfSrc, starSlash)
last = lastNonWhiteSpace(line)
continue inner
}
case '"', '\'':
// A cooked string, whose contents are backslash-escaped.
suffix := skipCooked(line[i+1:], c)
dst = append(dst, line[:len(line)-len(suffix)]...)
line = suffix
continue inner
case '`':
// A raw string.
dst = append(dst, line[:i+1]...)
restOfLine := line[i+1:]
restOfSrc := src[lineLength-len(restOfLine):]
dst, line, remaining = handleRaw(dst, restOfSrc, backTick)
last = lastNonWhiteSpace(line)
continue inner
}
}
break inner
}
openBrace = last == '{'
hanging = hangingBytes[last]
// Output the line (minus any trailing space).
line = trimTrailingWhiteSpace(line)
dst = append(dst, line...)
dst = append(dst, "\n"...)
}
return dst
}
// trimLeadingWhiteSpaceAndNewLines converts "\t\n foo bar " to "foo bar ".
func trimLeadingWhiteSpaceAndNewLines(s []byte) []byte {
for (len(s) > 0) && ((s[0] == ' ') || (s[0] == '\t') || (s[0] == '\n')) {
s = s[1:]
}
return s
}
// trimLeadingWhiteSpace converts "\t\t foo bar " to "foo bar ".
func trimLeadingWhiteSpace(s []byte) []byte {
for (len(s) > 0) && ((s[0] == ' ') || (s[0] == '\t')) {
s = s[1:]
}
return s
}
// trimTrailingWhiteSpace converts "\t\t foo bar " to "\t\t foo bar".
func trimTrailingWhiteSpace(s []byte) []byte {
for (len(s) > 0) && ((s[len(s)-1] == ' ') || (s[len(s)-1] == '\t')) {
s = s[:len(s)-1]
}
return s
}
// lastNonWhiteSpace returns the 'z' in "abc xyz ". It returns '\x00' if s
// consists entirely of spaces or tabs.
func lastNonWhiteSpace(s []byte) byte {
for i := len(s) - 1; i >= 0; i-- {
if x := s[i]; (x != ' ') && (x != '\t') {
return x
}
}
return 0
}
// skipCooked converts `ijk \" lmn" pqr` to ` pqr`.
func skipCooked(s []byte, quote byte) (suffix []byte) {
for i := 0; i < len(s); {
if x := s[i]; x == quote {
return s[i+1:]
} else if x != '\\' {
i += 1
} else if (i + 1) < len(s) {
i += 2
} else {
break
}
}
return nil
}
// handleRaw copies a raw string from restOfSrc to dst, re-calculating the
// (line, remaining) pair afterwards.
func handleRaw(dst []byte, restOfSrc []byte, endQuote []byte) (retDst []byte, line []byte, remaining []byte) {
end := bytes.Index(restOfSrc, endQuote)
if end < 0 {
end = len(restOfSrc)
} else {
end += len(endQuote)
}
dst = append(dst, restOfSrc[:end]...)
line, remaining = restOfSrc[end:], nil
if i := bytes.IndexByte(line, '\n'); i >= 0 {
line, remaining = line[:i], line[i+1:]
}
return dst, line, remaining
}
// breakAfterBrace returns whether the first non-space non-tab byte of s (if
// any) does not look like a comment or another open-brace.
func breakAfterBrace(s []byte) bool {
for _, c := range s {
if (c != ' ') && (c != '\t') {
return (c != '/') && (c != '{')
}
}
return false
}
// breakAfterBrace returns whether the first non-space non-tab byte of s (if
// any) does not look like a comment.
func breakAfterSemicolon(s []byte) bool {
for _, c := range s {
if (c != ' ') && (c != '\t') {
return c != '/'
}
}
return false
}