Have dumbindent break after some '{' and ';'s

commit: 53d5f5155cdb51d4e6ed235b0c36533830110a5f [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Sun Jun 07 21:34:51 2020 +1000
committer: Nigel Tao <nigeltao@golang.org> Sun Jun 07 21:52:24 2020 +1000
tree: 9814bf90a99c8a226050d951ffbb19f55787b6ec
parent: 709e1699557baba658e3a68a455aa11323961df5 [diff]
diff --git a/cmd/dumbindent/main.go b/cmd/dumbindent/main.go
index d01652f..b171d5c 100644
--- a/cmd/dumbindent/main.go
+++ b/cmd/dumbindent/main.go

@@ -24,10 +24,10 @@
 //
 // It is similar in concept to pretty-printers like `indent` or `clang-format`.
 // It is much dumber (it will not add line breaks or otherwise re-flow lines of
-// code, not to fit within an 80 character limit nor for any other reason) but
-// it can therefore be much, much faster at the basic task of automatically
-// indenting nested blocks. The output isn't 'perfect', but it's usually
-// sufficiently readable if the input already has sensible line breaks.
+// code just to fit within an 80 character limit) but it can therefore be much,
+// much faster at the basic task of automatically indenting nested blocks. The
+// output isn't 'perfect', but it's usually sufficiently readable if the input
+// already has sensible line breaks.
 //
 // An example of "much, much faster", 80 times faster than clang-format:
 // ----

diff --git a/internal/cgen/cgen.go b/internal/cgen/cgen.go
index 2db9c38..b06f000 100644
--- a/internal/cgen/cgen.go
+++ b/internal/cgen/cgen.go

@@ -131,7 +131,6 @@
 					for _, n := range builtin.Interfaces {
 						buf.printf("const char* wuffs_base__%s__vtable_name = "+
 							"\"{vtable}wuffs_base__%s\";\n", n, n)
-
 					}
 					return nil
 				},
@@ -1383,10 +1382,10 @@
 		}
 
 		b.printf("{\n")
-		b.printf("wuffs_base__status z = %s%s__initialize("+
+		b.printf("wuffs_base__status z = %s%s__initialize(\n"+
 			"&self->private_data.%s%s, sizeof(self->private_data.%s%s), WUFFS_VERSION, initialize_flags);\n",
 			prefix, qid[1].Str(g.tm), fPrefix, f.Name().Str(g.tm), fPrefix, f.Name().Str(g.tm))
-		b.printf("if (z.repr) { return z; }\n")
+		b.printf("if (z.repr) {\nreturn z;\n}\n")
 		b.printf("}\n")
 	}
 

diff --git a/lib/dumbindent/dumbindent.go b/lib/dumbindent/dumbindent.go
index f1657f6..5b3fd36 100644
--- a/lib/dumbindent/dumbindent.go
+++ b/lib/dumbindent/dumbindent.go

@@ -18,10 +18,10 @@
 //
 // It is similar in concept to pretty-printers like `indent` or `clang-format`.
 // It is much dumber (it will not add line breaks or otherwise re-flow lines of
-// code, not to fit within an 80 character limit nor for any other reason) but
-// it can therefore be much, much faster at the basic task of automatically
-// indenting nested blocks. The output isn't 'perfect', but it's usually
-// sufficiently readable if the input already has sensible line breaks.
+// code just to fit within an 80 character limit) but it can therefore be much,
+// much faster at the basic task of automatically indenting nested blocks. The
+// output isn't 'perfect', but it's usually sufficiently readable if the input
+// already has sensible line breaks.
 //
 // See `cmd/dumbindent/main.go` in this repository for an example where
 // `dumbindent` was 80 times faster than `clang-format`.
@@ -50,10 +50,11 @@
 // FormatBytes formats the C (or C-like) program in src, appending the result
 // to dst, and returns that longer slice.
 //
-// It is valid to pass a dst slice (such as nil) whose spare capacity (not
-// including its existing elements) is too short to hold the formatted program.
-// In this case, a new slice will be allocated and returned.
+// It is valid to pass a dst slice (such as nil) whose unused capacity
+// (cap(dst) - len(dst)) is too short to hold the formatted program. In this
+// case, a new slice will be allocated and returned.
 func FormatBytes(dst []byte, src []byte) []byte {
+	src = trimLeadingWhiteSpaceAndNewLines(src)
 	if len(src) == 0 {
 		return dst
 	} else if len(dst) == 0 {
@@ -66,6 +67,7 @@
 	hanging := false   // Whether the previous non-blank line ends with '=' or '\\'.
 	blankLine := false // Whether the previous line was blank.
 
+outer:
 	for line, remaining := src, []byte(nil); len(src) > 0; src = remaining {
 		src = trimLeadingWhiteSpace(src)
 		line, remaining = src, nil
@@ -137,12 +139,23 @@
 		// Adjust the state according to the braces and parentheses within the
 		// line (except for those in comments and strings).
 		last := lastNonWhiteSpace(line)
-	loop:
+	inner:
 		for {
 			for i, c := range line {
 				switch c {
 				case '{':
 					nBraces++
+					if l := lastNonWhiteSpace(line[:i]); (l != '=') && (l != ':') {
+						if breakAfterBrace(line[i+1:]) {
+							dst = append(dst, line[:i+1]...)
+							dst = append(dst, '\n')
+							restOfLine := line[i+1:]
+							remaining = src[lineLength-len(restOfLine):]
+							openBrace = true
+							hanging = false
+							continue outer
+						}
+					}
 				case '}':
 					nBraces--
 				case '(':
@@ -150,6 +163,17 @@
 				case ')':
 					nParens--
 
+				case ';':
+					if (nParens == 0) && (breakAfterSemicolon(line[i+1:])) {
+						dst = append(dst, line[:i+1]...)
+						dst = append(dst, '\n')
+						restOfLine := line[i+1:]
+						remaining = src[lineLength-len(restOfLine):]
+						openBrace = false
+						hanging = false
+						continue outer
+					}
+
 				case '/':
 					if (i + 1) >= len(line) {
 						break
@@ -157,7 +181,7 @@
 					if line[i+1] == '/' {
 						// A slash-slash comment. Skip the rest of the line.
 						last = lastNonWhiteSpace(line[:i])
-						break loop
+						break inner
 					} else if line[i+1] == '*' {
 						// A slash-star comment.
 						dst = append(dst, line[:i+2]...)
@@ -165,7 +189,7 @@
 						restOfSrc := src[lineLength-len(restOfLine):]
 						dst, line, remaining = handleRaw(dst, restOfSrc, starSlash)
 						last = lastNonWhiteSpace(line)
-						continue loop
+						continue inner
 					}
 
 				case '"', '\'':
@@ -173,7 +197,7 @@
 					suffix := skipCooked(line[i+1:], c)
 					dst = append(dst, line[:len(line)-len(suffix)]...)
 					line = suffix
-					continue loop
+					continue inner
 
 				case '`':
 					// A raw string.
@@ -182,10 +206,10 @@
 					restOfSrc := src[lineLength-len(restOfLine):]
 					dst, line, remaining = handleRaw(dst, restOfSrc, backTick)
 					last = lastNonWhiteSpace(line)
-					continue loop
+					continue inner
 				}
 			}
-			break loop
+			break inner
 		}
 		openBrace = last == '{'
 		hanging = hangingBytes[last]
@@ -198,7 +222,15 @@
 	return dst
 }
 
-// trimLeadingWhiteSpace converts "\t  foo bar " to "foo bar ".
+// trimLeadingWhiteSpaceAndNewLines converts "\t\n  foo bar " to "foo bar ".
+func trimLeadingWhiteSpaceAndNewLines(s []byte) []byte {
+	for (len(s) > 0) && ((s[0] == ' ') || (s[0] == '\t') || (s[0] == '\n')) {
+		s = s[1:]
+	}
+	return s
+}
+
+// trimLeadingWhiteSpace converts "\t\t  foo bar " to "foo bar ".
 func trimLeadingWhiteSpace(s []byte) []byte {
 	for (len(s) > 0) && ((s[0] == ' ') || (s[0] == '\t')) {
 		s = s[1:]
@@ -206,7 +238,7 @@
 	return s
 }
 
-// trimTrailingWhiteSpace converts "\t  foo bar " to "\t  foo bar".
+// trimTrailingWhiteSpace converts "\t\t  foo bar " to "\t\t  foo bar".
 func trimTrailingWhiteSpace(s []byte) []byte {
 	for (len(s) > 0) && ((s[len(s)-1] == ' ') || (s[len(s)-1] == '\t')) {
 		s = s[:len(s)-1]
@@ -257,3 +289,25 @@
 	}
 	return dst, line, remaining
 }
+
+// breakAfterBrace returns whether the first non-space non-tab byte of s (if
+// any) does not look like a comment or another open-brace.
+func breakAfterBrace(s []byte) bool {
+	for _, c := range s {
+		if (c != ' ') && (c != '\t') {
+			return (c != '/') && (c != '{')
+		}
+	}
+	return false
+}
+
+// breakAfterBrace returns whether the first non-space non-tab byte of s (if
+// any) does not look like a comment.
+func breakAfterSemicolon(s []byte) bool {
+	for _, c := range s {
+		if (c != ' ') && (c != '\t') {
+			return c != '/'
+		}
+	}
+	return false
+}

diff --git a/lib/dumbindent/dumbindent_test.go b/lib/dumbindent/dumbindent_test.go
index 45de5bf..b54a64c 100644
--- a/lib/dumbindent/dumbindent_test.go
+++ b/lib/dumbindent/dumbindent_test.go

@@ -15,6 +15,7 @@
 package dumbindent
 
 import (
+	"os"
 	"testing"
 )
 
@@ -66,6 +67,14 @@
 		// Nested blocks with label.
 		src:  "if (b) {\nlabel:\nswitch (i) {\ncase 0:\nj = k\nbreak;\n}\n}\n",
 		want: "if (b) {\n  label:\n  switch (i) {\n    case 0:\n    j = k\n    break;\n  }\n}\n",
+	}, {
+		// Inserted line breaks.
+		src:  "if (x) { goto fail; }\n",
+		want: "if (x) {\n  goto fail;\n}\n",
+	}, {
+		// Leading blank lines.
+		src:  "\n\n\n  x = y;",
+		want: "x = y;\n",
 	}}
 
 	for i, tc := range testCases {
@@ -74,3 +83,29 @@
 		}
 	}
 }
+
+func ExampleFormatBytes() {
+	const src = `
+// Blah blah blah.
+
+
+for (i = 0; i < 3; i++) {
+j = 0; j < 4; j++;
+if (i < j) { foo(); }
+}
+`
+
+	os.Stdout.Write(FormatBytes(nil, []byte(src)))
+
+	// Output:
+	// // Blah blah blah.
+	//
+	// for (i = 0; i < 3; i++) {
+	//   j = 0;
+	//   j < 4;
+	//   j++;
+	//   if (i < j) {
+	//     foo();
+	//   }
+	// }
+}

diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index ac55550..af8aab7 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c

@@ -17003,8 +17003,11 @@
   }
 
   {
-    wuffs_base__status z = wuffs_lzw__decoder__initialize(&self->private_data.f_lzw, sizeof(self->private_data.f_lzw), WUFFS_VERSION, initialize_flags);
-    if (z.repr) { return z; }
+    wuffs_base__status z = wuffs_lzw__decoder__initialize(
+        &self->private_data.f_lzw, sizeof(self->private_data.f_lzw), WUFFS_VERSION, initialize_flags);
+    if (z.repr) {
+      return z;
+    }
   }
   self->private_impl.magic = WUFFS_BASE__MAGIC;
   self->private_impl.vtable_for__wuffs_base__image_decoder.vtable_name =
@@ -21193,12 +21196,18 @@
   }
 
   {
-    wuffs_base__status z = wuffs_crc32__ieee_hasher__initialize(&self->private_data.f_checksum, sizeof(self->private_data.f_checksum), WUFFS_VERSION, initialize_flags);
-    if (z.repr) { return z; }
+    wuffs_base__status z = wuffs_crc32__ieee_hasher__initialize(
+        &self->private_data.f_checksum, sizeof(self->private_data.f_checksum), WUFFS_VERSION, initialize_flags);
+    if (z.repr) {
+      return z;
+    }
   }
   {
-    wuffs_base__status z = wuffs_deflate__decoder__initialize(&self->private_data.f_flate, sizeof(self->private_data.f_flate), WUFFS_VERSION, initialize_flags);
-    if (z.repr) { return z; }
+    wuffs_base__status z = wuffs_deflate__decoder__initialize(
+        &self->private_data.f_flate, sizeof(self->private_data.f_flate), WUFFS_VERSION, initialize_flags);
+    if (z.repr) {
+      return z;
+    }
   }
   self->private_impl.magic = WUFFS_BASE__MAGIC;
   self->private_impl.vtable_for__wuffs_base__io_transformer.vtable_name =
@@ -24667,16 +24676,25 @@
   }
 
   {
-    wuffs_base__status z = wuffs_adler32__hasher__initialize(&self->private_data.f_checksum, sizeof(self->private_data.f_checksum), WUFFS_VERSION, initialize_flags);
-    if (z.repr) { return z; }
+    wuffs_base__status z = wuffs_adler32__hasher__initialize(
+        &self->private_data.f_checksum, sizeof(self->private_data.f_checksum), WUFFS_VERSION, initialize_flags);
+    if (z.repr) {
+      return z;
+    }
   }
   {
-    wuffs_base__status z = wuffs_adler32__hasher__initialize(&self->private_data.f_dict_id_hasher, sizeof(self->private_data.f_dict_id_hasher), WUFFS_VERSION, initialize_flags);
-    if (z.repr) { return z; }
+    wuffs_base__status z = wuffs_adler32__hasher__initialize(
+        &self->private_data.f_dict_id_hasher, sizeof(self->private_data.f_dict_id_hasher), WUFFS_VERSION, initialize_flags);
+    if (z.repr) {
+      return z;
+    }
   }
   {
-    wuffs_base__status z = wuffs_deflate__decoder__initialize(&self->private_data.f_flate, sizeof(self->private_data.f_flate), WUFFS_VERSION, initialize_flags);
-    if (z.repr) { return z; }
+    wuffs_base__status z = wuffs_deflate__decoder__initialize(
+        &self->private_data.f_flate, sizeof(self->private_data.f_flate), WUFFS_VERSION, initialize_flags);
+    if (z.repr) {
+      return z;
+    }
   }
   self->private_impl.magic = WUFFS_BASE__MAGIC;
   self->private_impl.vtable_for__wuffs_base__io_transformer.vtable_name =
commit	53d5f5155cdb51d4e6ed235b0c36533830110a5f	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Sun Jun 07 21:34:51 2020 +1000
committer	Nigel Tao <nigeltao@golang.org>	Sun Jun 07 21:52:24 2020 +1000
tree	9814bf90a99c8a226050d951ffbb19f55787b6ec
parent	709e1699557baba658e3a68a455aa11323961df5 [diff]