bazel/gazelle/parsers/parsers.go - buildbot - Git at Google

 package parsers

 import (
 	"fmt"
 	"regexp"
 	"strings"
 )

 // SplitLinesAndRemoveComments takes a multiline string, splits it into lines, and returns two
 // equal-length arrays of strings: one with the original lines, and another one with the original
 // lines minus any "// line comments" and "/* block comments */".
 //
 // This should work for any language that uses C++-style comments, including TypeScript and Sass.
 func SplitLinesAndRemoveComments(source string) ([]string, []string) {
 	verbatim := strings.Split(source, "\n")
 	noComments := stripBlockComments(verbatim)
 	noComments = stripCommentedOutLines(noComments)
 	return verbatim, noComments
 }

 var (
 	// singleLineBlockCommentRegexp matches a single-line /* block comment */, and captures any
 	// uncommented code before and after the block comment.
 	//
 	// Known limitation: This regexp ignores string literals.
 	singleLineBlockCommentRegexp = regexp.MustCompile(`(?P<uncommented_before>.*)/\*.*\*/(?P<uncommented_after>.*)`)

 	// blockCommentStartRegexp matches the "/*" at the beginning of a /* block comment */, and
 	// captures any uncommented code that precedes it.
 	//
 	// Known limitation: This regexp ignores the beginning of a block comment if it is preceded by a
 	// single or double quote, as the block comment itself might be part of a string literal.
 	blockCommentStartRegexp = regexp.MustCompile(fmt.Sprintf(`(?P<uncommented>[^'"%s]*)/\*`, "`"))

 	// blockCommentEndRegexp matches the "*/" at the end of a /* block comment */, and captures any
 	// uncommented code that succeeds it.
 	blockCommentEndRegexp = regexp.MustCompile(`\*/(?P<uncommented>.*)`)
 )

 // stripBlockComments strips /* block comments */ from the given lines of code. Any commented-out
 // lines are replaced by blank lines. Thus, the returned array has the same length as the input
 // array.
 func stripBlockComments(lines []string) []string {
 	var outputLines []string
 	blockComment := false // Keeps track of whether we're currently inside a /* block comment */.

 	for _, line := range lines {
 		if !blockComment {
 			// We are not currently inside a /* block comment */. Does this line have one or more
 			// single-line block comments?
 			match := singleLineBlockCommentRegexp.FindStringSubmatch(line)
 			for len(match) > 0 {
 				// Remove the single-line block-comment and proceed as if it was never there.
 				line = match[1] + match[2]
 				match = singleLineBlockCommentRegexp.FindStringSubmatch(line)
 			}

 			// Does a multi-line block-comment start on the current line?
 			match = blockCommentStartRegexp.FindStringSubmatch(line)
 			if len(match) > 0 {
 				// Block comment found. Keep the portion of the line that precedes the
 				// "/*" characters.
 				blockComment = true
 				outputLines = append(outputLines, match[1])
 			} else {
 				// No block comment found. We can keep the current line as-is.
 				outputLines = append(outputLines, line)
 			}
 		} else {
 			// We are currently inside a /* block comment */. Does it end on the current line?
 			match := blockCommentEndRegexp.FindStringSubmatch(line)
 			if len(match) > 0 {
 				// Found the end of the block comment. Keep the portion of the line that succeeds
 				// the "*/" characters.
 				blockComment = false
 				outputLines = append(outputLines, match[1])
 			} else {
 				// We are still inside a block comment. The current line is replaced with a blank
 				// line.
 				outputLines = append(outputLines, "")
 			}
 		}
 	}

 	return outputLines
 }

 // commentedOutLineRegexp matches lines that are commented out via a single-line comment.
 var commentedOutLineRegexp = regexp.MustCompile(`^\s*//`)

 // stripCommentedOutLines replaces any lines beginning with a "//" comment with an empty line. The
 // returned array has the same length as the input array.
 func stripCommentedOutLines(lines []string) []string {
 	var outputLines []string
 	for _, line := range lines {
 		if commentedOutLineRegexp.MatchString(line) {
 			outputLines = append(outputLines, "")
 		} else {
 			outputLines = append(outputLines, line)
 		}
 	}
 	return outputLines
 }
	package parsers

	import (
	"fmt"
	"regexp"
	"strings"
	)

	// SplitLinesAndRemoveComments takes a multiline string, splits it into lines, and returns two
	// equal-length arrays of strings: one with the original lines, and another one with the original
	// lines minus any "// line comments" and "/* block comments */".
	//
	// This should work for any language that uses C++-style comments, including TypeScript and Sass.
	func SplitLinesAndRemoveComments(source string) ([]string, []string) {
	verbatim := strings.Split(source, "\n")
	noComments := stripBlockComments(verbatim)
	noComments = stripCommentedOutLines(noComments)
	return verbatim, noComments
	}

	var (
	// singleLineBlockCommentRegexp matches a single-line /* block comment */, and captures any
	// uncommented code before and after the block comment.
	//
	// Known limitation: This regexp ignores string literals.
	singleLineBlockCommentRegexp = regexp.MustCompile(`(?P<uncommented_before>.)/\.\/(?P<uncommented_after>.*)`)

	// blockCommentStartRegexp matches the "/" at the beginning of a / block comment */, and
	// captures any uncommented code that precedes it.
	//
	// Known limitation: This regexp ignores the beginning of a block comment if it is preceded by a
	// single or double quote, as the block comment itself might be part of a string literal.
	blockCommentStartRegexp = regexp.MustCompile(fmt.Sprintf(`(?P<uncommented>[^'"%s])/\`, "`"))

	// blockCommentEndRegexp matches the "/" at the end of a / block comment */, and captures any
	// uncommented code that succeeds it.
	blockCommentEndRegexp = regexp.MustCompile(`\/(?P<uncommented>.)`)
	)

	// stripBlockComments strips /* block comments */ from the given lines of code. Any commented-out
	// lines are replaced by blank lines. Thus, the returned array has the same length as the input
	// array.
	func stripBlockComments(lines []string) []string {
	var outputLines []string
	blockComment := false // Keeps track of whether we're currently inside a /* block comment */.

	for _, line := range lines {
	if !blockComment {
	// We are not currently inside a /* block comment */. Does this line have one or more
	// single-line block comments?
	match := singleLineBlockCommentRegexp.FindStringSubmatch(line)
	for len(match) > 0 {
	// Remove the single-line block-comment and proceed as if it was never there.
	line = match[1] + match[2]
	match = singleLineBlockCommentRegexp.FindStringSubmatch(line)
	}

	// Does a multi-line block-comment start on the current line?
	match = blockCommentStartRegexp.FindStringSubmatch(line)
	if len(match) > 0 {
	// Block comment found. Keep the portion of the line that precedes the
	// "/*" characters.
	blockComment = true
	outputLines = append(outputLines, match[1])
	} else {
	// No block comment found. We can keep the current line as-is.
	outputLines = append(outputLines, line)
	}
	} else {
	// We are currently inside a /* block comment */. Does it end on the current line?
	match := blockCommentEndRegexp.FindStringSubmatch(line)
	if len(match) > 0 {
	// Found the end of the block comment. Keep the portion of the line that succeeds
	// the "*/" characters.
	blockComment = false
	outputLines = append(outputLines, match[1])
	} else {
	// We are still inside a block comment. The current line is replaced with a blank
	// line.
	outputLines = append(outputLines, "")
	}
	}
	}

	return outputLines
	}

	// commentedOutLineRegexp matches lines that are commented out via a single-line comment.
	var commentedOutLineRegexp = regexp.MustCompile(`^\s*//`)

	// stripCommentedOutLines replaces any lines beginning with a "//" comment with an empty line. The
	// returned array has the same length as the input array.
	func stripCommentedOutLines(lines []string) []string {
	var outputLines []string
	for _, line := range lines {
	if commentedOutLineRegexp.MatchString(line) {
	outputLines = append(outputLines, "")
	} else {
	outputLines = append(outputLines, line)
	}
	}
	return outputLines
	}