blob: 31fc432e9672e9fb3551f1ec5aedbb2af77ef920 [file] [log] [blame]
package parsers
import (
"fmt"
"regexp"
"strings"
)
// SplitLinesAndRemoveComments takes a multiline string, splits it into lines, and returns two
// equal-length arrays of strings: one with the original lines, and another one with the original
// lines minus any "// line comments" and "/* block comments */".
//
// This should work for any language that uses C++-style comments, including TypeScript and Sass.
func SplitLinesAndRemoveComments(source string) ([]string, []string) {
verbatim := strings.Split(source, "\n")
noComments := stripBlockComments(verbatim)
noComments = stripCommentedOutLines(noComments)
return verbatim, noComments
}
var (
// singleLineBlockCommentRegexp matches a single-line /* block comment */, and captures any
// uncommented code before and after the block comment.
//
// Known limitation: This regexp ignores string literals.
singleLineBlockCommentRegexp = regexp.MustCompile(`(?P<uncommented_before>.*)/\*.*\*/(?P<uncommented_after>.*)`)
// blockCommentStartRegexp matches the "/*" at the beginning of a /* block comment */, and
// captures any uncommented code that precedes it.
//
// Known limitation: This regexp ignores the beginning of a block comment if it is preceded by a
// single or double quote, as the block comment itself might be part of a string literal.
blockCommentStartRegexp = regexp.MustCompile(fmt.Sprintf(`(?P<uncommented>[^'"%s]*)/\*`, "`"))
// blockCommentEndRegexp matches the "*/" at the end of a /* block comment */, and captures any
// uncommented code that succeeds it.
blockCommentEndRegexp = regexp.MustCompile(`\*/(?P<uncommented>.*)`)
)
// stripBlockComments strips /* block comments */ from the given lines of code. Any commented-out
// lines are replaced by blank lines. Thus, the returned array has the same length as the input
// array.
func stripBlockComments(lines []string) []string {
var outputLines []string
blockComment := false // Keeps track of whether we're currently inside a /* block comment */.
for _, line := range lines {
if !blockComment {
// We are not currently inside a /* block comment */. Does this line have one or more
// single-line block comments?
match := singleLineBlockCommentRegexp.FindStringSubmatch(line)
for len(match) > 0 {
// Remove the single-line block-comment and proceed as if it was never there.
line = match[1] + match[2]
match = singleLineBlockCommentRegexp.FindStringSubmatch(line)
}
// Does a multi-line block-comment start on the current line?
match = blockCommentStartRegexp.FindStringSubmatch(line)
if len(match) > 0 {
// Block comment found. Keep the portion of the line that precedes the
// "/*" characters.
blockComment = true
outputLines = append(outputLines, match[1])
} else {
// No block comment found. We can keep the current line as-is.
outputLines = append(outputLines, line)
}
} else {
// We are currently inside a /* block comment */. Does it end on the current line?
match := blockCommentEndRegexp.FindStringSubmatch(line)
if len(match) > 0 {
// Found the end of the block comment. Keep the portion of the line that succeeds
// the "*/" characters.
blockComment = false
outputLines = append(outputLines, match[1])
} else {
// We are still inside a block comment. The current line is replaced with a blank
// line.
outputLines = append(outputLines, "")
}
}
}
return outputLines
}
// commentedOutLineRegexp matches lines that are commented out via a single-line comment.
var commentedOutLineRegexp = regexp.MustCompile(`^\s*//`)
// stripCommentedOutLines replaces any lines beginning with a "//" comment with an empty line. The
// returned array has the same length as the input array.
func stripCommentedOutLines(lines []string) []string {
var outputLines []string
for _, line := range lines {
if commentedOutLineRegexp.MatchString(line) {
outputLines = append(outputLines, "")
} else {
outputLines = append(outputLines, line)
}
}
return outputLines
}