// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// ----------------

// Package dumbindent formats C (and C-like) programs.
//
// It is similar in concept to pretty-printers like `indent` or `clang-format`.
// It is much dumber (it will not add or remove line breaks or otherwise
// re-flow lines of code just to fit within an 80 column limit) but it can
// therefore be much faster at the basic task of automatically indenting nested
// blocks. The output isn't 'perfect', but it's usually sufficiently readable
// if the input already has sensible line breaks.
//
// To quantify "much faster", on this one C file, `cmd/dumbindent` in this
// repository was 80 times faster than `clang-format`, even without a column
// limit:
//
//	$ wc release/c/wuffs-v0.2.c
//	 11858  35980 431885 release/c/wuffs-v0.2.c
//	$ time dumbindent                               < release/c/wuffs-v0.2.c > /dev/null
//	real    0m0.008s
//	user    0m0.005s
//	sys     0m0.005s
//	$ time clang-format-9                           < release/c/wuffs-v0.2.c > /dev/null
//	real    0m0.668s
//	user    0m0.618s
//	sys     0m0.032s
//	$ time clang-format-9 -style='{ColumnLimit: 0}' < release/c/wuffs-v0.2.c > /dev/null
//	real    0m0.641s
//	user    0m0.585s
//	sys     0m0.037s
//
// Apart from some rare and largely uninteresting exceptions, the dumbindent
// algorithm only considers:
//
//	∙ '{' and '}' curly braces,
//	∙ '(' and ')' round parentheses,
//	∙ '\n' line breaks,
//	∙ ' ' spaces and '\t' tabs that start or end a line, and
//	∙ strings, comments and preprocessor directives (in order to ignore any of
//	  the above special characters within them),
//
// Everything else is an opaque byte. Consider this input:
//
//	for (i = 0; i < 3; i++) {
//	j = 0;  // Ignore { in a comment.
//	if (i < j) { foo(); }
//	u = (v +
//	w);
//	}
//
// From the algorithm's point of view, this input is equivalent to:
//
//	....(.................).{
//	.................................
//	...(.....).{....()..}
//	....(...
//	.);
//	}
//
// The formatted output (using the default of 2 spaces per indent level) is:
//
//	....(.................).{
//	  .................................
//	  ...(.....).{....()..}
//	  ....(...
//	      .);
//	}
//
// Dumbindent adjusts lines horizontally (indenting) but not vertically (it
// does not break or un-break lines, or collapse consecutive blank lines),
// although it will remove blank lines at the end of the input. In the example
// above, it will not remove the "\n" between "u = (v +" and "w);", even though
// both lines are short.
//
// Each output line is indented according to the net number of open braces
// preceding it, although lines starting with close braces will outdent first,
// similar to `gofmt` style. A line which starts in a so-far-unbalanced open
// parenthesis, such as the "w);" line above, gets 2 additional indent levels.
//
// Horizontal adjustment only affects a line's leading white space (and will
// trim trailing white space). It does not affect white space within a line.
// Dumbindent does not parse the input as C/C++ source code.
//
// In particular, the algorithm does not solve C++'s "most vexing parse" or
// otherwise determine whether "x*y" is a multiplication or a type definition
// (where y is a pointer-to-x typed variable or function argument, such as
// "int*p"). For a type definition, where other formatting algorithms would
// re-write around the "*" as either "x* y" or "x *y", dumbindent will not
// insert spaces.
//
// Similarly, dumbindent will not correct this mis-indentation:
//
//	if (condition)
//	  goto fail;
//	  goto fail;
//
// Instead, when automatically or manually generating the input for dumbindent,
// it is recommended to always emit curly braces (again, similar to `gofmt`
// style), even for what would otherwise be 'one-liner' if statements.
//
// More commentary is at:
// https://nigeltao.github.io/blog/2020/dumbindent.html
package dumbindent

import (
	"bytes"
)

// 'Constants', but their type is []byte, not string.
var (
	backTick  = []byte("`")
	extern    = []byte("extern ")
	namespace = []byte("namespace ")
	starSlash = []byte("*/")

	newLines = []byte("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
	spaces   = []byte("                                ")
	tabs     = []byte("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t")
)

// hangingBytes is a look-up table for updating the hanging variable.
var hangingBytes = [256]bool{
	'=':  true,
	'\\': true,
}

// Options are formatting options.
type Options struct {
	// Spaces, if positive, is the number of spaces per indent level. A
	// non-positive value means to use the default: 2 spaces per indent level.
	//
	// This field is ignored when Tabs is true.
	Spaces int

	// Tabs is whether to indent with tabs instead of spaces. If true, it's one
	// '\t' tab character per indent and the Spaces field is ignored.
	Tabs bool
}

// FormatBytes formats the C (or C-like) program in src, appending the result
// to dst, and returns that longer slice.
//
// It is valid to pass a dst slice (such as nil) whose unused capacity,
// cap(dst) - len(dst), is too short to hold the formatted program. In this
// case, a new slice will be allocated and returned.
//
// Passing a nil opts is valid and equivalent to passing &Options{}.
func FormatBytes(dst []byte, src []byte, opts *Options) []byte {
	indentBytes := spaces
	indentCount := 2
	if opts != nil {
		if opts.Tabs {
			indentBytes = tabs
			indentCount = 1
		} else if opts.Spaces > 0 {
			indentCount = opts.Spaces
		}
	}

	// Count the number of leading spaces (or tabs) on the first line. Every
	// output line starts with that much indent, before further indentation
	// from unbalanced braces and parentheses.
	initialIndent := countInitialOccurrences(src, indentBytes[0])

	src = trimLeadingWhiteSpaceAndNewLines(src)
	if len(src) == 0 {
		return dst
	} else if len(dst) == 0 {
		dst = make([]byte, 0, len(src)+(len(src)/2))
	}

	nBlankLines := 0 // The number of preceding blank lines.
	nBraces := 0     // The number of unbalanced '{'s.
	nParens := 0     // The number of unbalanced '('s.
	hanging := false // Whether the previous non-blank line ends with '=' or '\\'.
	preproc := false // Whether we're in a #preprocessor line.

	for line, remaining := src, []byte(nil); len(src) > 0; src = remaining {
		src = trimLeadingWhiteSpace(src)
		line, remaining = src, nil
		if i := bytes.IndexByte(line, '\n'); i >= 0 {
			line, remaining = line[:i], line[i+1:]
		}
		lineLength := len(line)

		// Strip any blank lines at the end of the file.
		if len(line) == 0 {
			nBlankLines++
			continue
		}
		if nBlankLines > 0 {
			dst = appendRepeatedBytes(dst, newLines, nBlankLines)
			nBlankLines = 0
		}

		// Handle preprocessor lines (#ifdef, #pragma, etc).
		if preproc || (line[0] == '#') {
			indent := initialIndent
			if preproc {
				indent += indentCount * 2
			}
			line = trimTrailingWhiteSpace(line)
			dst = appendRepeatedBytes(dst, indentBytes, indent)
			dst = append(dst, line...)
			dst = append(dst, '\n')
			hanging = false
			preproc = lastNonWhiteSpace(line) == '\\'
			continue
		}

		closeBraces := 0

		// Don't indent for `extern "C" {` or `namespace foo {`.
		if ((line[0] == 'e') && hasPrefixAndBrace(line, extern)) ||
			((line[0] == 'n') && hasPrefixAndBrace(line, namespace)) {
			nBraces--

		} else {
			// Account for leading '}'s before we print the line's indentation.
			for ; (closeBraces < len(line)) && line[closeBraces] == '}'; closeBraces++ {
			}
			nBraces -= closeBraces

			// Because the "{" in "extern .*{" and "namespace .*{" is had no
			// net effect on nBraces, the matching "}" can cause the nBraces
			// count to dip below zero. Correct for that here.
			if nBraces < 0 {
				nBraces = 0
			}
		}

		// Output indentation. Dumbindent's default, 2 spaces per indent level,
		// roughly approximates clang-format's default style.
		indent := initialIndent
		if nBraces > 0 {
			indent += indentCount * nBraces
		}
		if (nParens > 0) || hanging {
			indent += indentCount * 2
		}
		dst = appendRepeatedBytes(dst, indentBytes, indent)

		// Output the leading '}'s.
		dst = append(dst, line[:closeBraces]...)
		line = line[closeBraces:]

		// Adjust the state according to the braces and parentheses within the
		// line (except for those in comments and strings).
		last := lastNonWhiteSpace(line)
	loop:
		for {
			for i, c := range line {
				switch c {
				case '{':
					nBraces++
				case '}':
					nBraces--
				case '(':
					nParens++
				case ')':
					nParens--

				case '/':
					if (i + 1) >= len(line) {
						break
					}
					if line[i+1] == '/' {
						// A slash-slash comment. Skip the rest of the line.
						last = lastNonWhiteSpace(line[:i])
						break loop
					} else if line[i+1] == '*' {
						// A slash-star comment.
						dst = append(dst, line[:i+2]...)
						restOfLine := line[i+2:]
						restOfSrc := src[lineLength-len(restOfLine):]
						dst, line, remaining = handleRaw(dst, restOfSrc, starSlash)
						last = lastNonWhiteSpace(line)
						continue loop
					}

				case '"', '\'':
					// A cooked string, whose contents are backslash-escaped.
					suffix := skipCooked(line[i+1:], c)
					dst = append(dst, line[:len(line)-len(suffix)]...)
					line = suffix
					continue loop

				case '`':
					// A raw string.
					dst = append(dst, line[:i+1]...)
					restOfLine := line[i+1:]
					restOfSrc := src[lineLength-len(restOfLine):]
					dst, line, remaining = handleRaw(dst, restOfSrc, backTick)
					last = lastNonWhiteSpace(line)
					continue loop
				}
			}
			break loop
		}
		hanging = hangingBytes[last]

		// Output the line (minus any trailing space).
		line = trimTrailingWhiteSpace(line)
		dst = append(dst, line...)
		dst = append(dst, "\n"...)
	}
	return dst
}

// hasPrefixAndBrace returns whether line starts with prefix and after that
// contains a '{'.
func hasPrefixAndBrace(line []byte, prefix []byte) bool {
	return bytes.HasPrefix(line, prefix) &&
		bytes.IndexByte(line[len(prefix):], '{') >= 0
}

// trimLeadingWhiteSpaceAndNewLines converts "\t\n  foo bar " to "foo bar ".
func trimLeadingWhiteSpaceAndNewLines(s []byte) []byte {
	for (len(s) > 0) && ((s[0] == ' ') || (s[0] == '\t') || (s[0] == '\n')) {
		s = s[1:]
	}
	return s
}

// trimLeadingWhiteSpace converts "\t\t  foo bar " to "foo bar ".
func trimLeadingWhiteSpace(s []byte) []byte {
	for (len(s) > 0) && ((s[0] == ' ') || (s[0] == '\t')) {
		s = s[1:]
	}
	return s
}

// trimTrailingWhiteSpace converts "\t\t  foo bar " to "\t\t  foo bar".
func trimTrailingWhiteSpace(s []byte) []byte {
	for (len(s) > 0) && ((s[len(s)-1] == ' ') || (s[len(s)-1] == '\t')) {
		s = s[:len(s)-1]
	}
	return s
}

// appendRepeatedBytes appends number copies of a byte, assuming that
// repeatedBytes' elements are all the same byte.
func appendRepeatedBytes(dst []byte, repeatedBytes []byte, number int) []byte {
	for number > 0 {
		n := number
		if n > len(repeatedBytes) {
			n = len(repeatedBytes)
		}
		dst = append(dst, repeatedBytes[:n]...)
		number -= n
	}
	return dst
}

// countInitialOccurrences returns the length of the longest prefix of s that
// consists entirely of x. Passing ("aaabacdaa", 'a') returns 3.
func countInitialOccurrences(s []byte, x byte) int {
	for i, c := range s {
		if c != x {
			return i
		}
	}
	return len(s)
}

// lastNonWhiteSpace returns the 'z' in "abc xyz  ". It returns '\x00' if s
// consists entirely of spaces or tabs.
func lastNonWhiteSpace(s []byte) byte {
	for i := len(s) - 1; i >= 0; i-- {
		if x := s[i]; (x != ' ') && (x != '\t') {
			return x
		}
	}
	return 0
}

// skipCooked converts `ijk \" lmn" pqr` to ` pqr`.
func skipCooked(s []byte, quote byte) (suffix []byte) {
	for i := 0; i < len(s); {
		if x := s[i]; x == quote {
			return s[i+1:]
		} else if x != '\\' {
			i += 1
		} else if (i + 1) < len(s) {
			i += 2
		} else {
			break
		}
	}
	return nil
}

// handleRaw copies a raw string from restOfSrc to dst, re-calculating the
// (line, remaining) pair afterwards.
func handleRaw(dst []byte, restOfSrc []byte, endQuote []byte) (retDst []byte, line []byte, remaining []byte) {
	end := bytes.Index(restOfSrc, endQuote)
	if end < 0 {
		end = len(restOfSrc)
	} else {
		end += len(endQuote)
	}
	dst = append(dst, restOfSrc[:end]...)
	line, remaining = restOfSrc[end:], nil
	if i := bytes.IndexByte(line, '\n'); i >= 0 {
		line, remaining = line[:i], line[i+1:]
	}
	return dst, line, remaining
}
