blob: 4ec8b6fadf34268e0ba0b64f307ac780d15cb6d5 [file] [log] [blame]
// Package parsers defines parsers for the paths of "import" statements found in TypeScript and Sass
// files.
//
// The ad-hoc parsers in this package utilize regular expressions to extract out import paths,
// filter out comments, etc. While these parsers do not capture every aspect of the TypeScript and
// Sass grammars, they are sufficient for the purpose of parsing the import statements in the
// TypeScript and Sass files found in our codebase.
//
// The following alternatives were ruled out because of their high implementation and maintenance
// cost:
//
// - Using third-party parsers written in Go (none exist at this time).
// - Generate real parsers using e.g. Goyacc (https://pkg.go.dev/golang.org/x/tools/cmd/goyacc).
// - Use the TypeScript compiler API to inspect the AST of a TypeScript file (requires calling
// Node.js code from Gazelle).
package parsers
import (
"fmt"
"regexp"
"sort"
"strings"
)
////////////////////////////////
// TypeScript imports parser. //
////////////////////////////////
// tsImportRegexps contains all the regular expressions necessary to extract imports from a
// TypeScript source file.
var tsImportRegexps = []*regexp.Regexp{
// Matches the following styles of imports:
// import * from 'foo';
// export * from 'foo';
// import * as bar from 'foo';
// import { bar, baz } from 'foo';
// import { bar, baz as qux } from 'foo';
regexp.MustCompile(`^\s*(import|export)\s*(\*|[[:alnum:]]|_|\$|,|\{|\}|\s)*\s*from\s*'(?P<path>.*)'`), // Single quotes.
regexp.MustCompile(`^\s*(import|export)\s*(\*|[[:alnum:]]|_|\$|,|\{|\}|\s)*\s*from\s*"(?P<path>.*)"`), // Double quotes.
// Matches multiline imports, e.g.:
// import {
// bar,
// baz as qux,
// } from 'foo';
regexp.MustCompile(`^\s*}?\s*from\s*'(?P<path>.*)'`), // Single quotes.
regexp.MustCompile(`^\s*}?\s*from\s*"(?P<path>.*)"`), // Double quotes.
// Matches imports for side-effects only, e.g.:
// import 'foo';
regexp.MustCompile(`^\s*import\s*'(?P<path>.*)'`), // Single quotes.
regexp.MustCompile(`^\s*import\s*"(?P<path>.*)"`), // Double quotes.
}
// ignoredTsImportsRegexp matches import paths that should be ignored, namely CSS and Sass imports.
// Importing CSS and Sass files from TypeScript files is a Webpack idiom that both the TypeScript
// compiler and our front-end BUILD rules ignore, in favor of other mechanisms such as the
// sass_deps and sk_element_deps in various rules, and "ghost" Sass imports.
//
// See the sk_element macro definition for more, or go/skia-infra-bazel-frontend for the design.
var ignoredTsImportsRegexp = regexp.MustCompile(`\.s?css$`)
// ParseTSImports takes the contents of a TypeScript source file and extracts the verbatim paths of
// any imported modules.
func ParseTSImports(source string) []string {
// Remove comments from the source file.
lines := splitLinesAndRemoveComments(source)
// Extract all imports.
importsSet := map[string]bool{}
for _, line := range lines {
for _, re := range tsImportRegexps {
match := re.FindStringSubmatch(line)
if len(match) != 0 {
importPath := match[len(match)-1] // The path is the last capture group on all regexps.
importsSet[importPath] = true
}
}
}
// Filter out ignored imports, and sort imports lexicographically.
var imports []string
for path := range importsSet {
if !ignoredTsImportsRegexp.MatchString(path) {
imports = append(imports, path)
}
}
sort.Strings(imports)
return imports
}
//////////////////////////
// Sass imports parser. //
//////////////////////////
// sassImportRegexps match the following kinds of Sass imports:
//
// @import 'foo';
// @use 'foo';
// @use 'foo' as bar;
// @use 'foo' with (
// $bar: 1px
// );
// @forward 'foo';
// @forward 'foo' as foo-*;
// @forward 'foo' hide $bar, $baz;
// @forward 'foo' with (
// $bar: $1px
// );
//
// See https://sass-lang.com/documentation/at-rules.
var sassImportRegexps = []*regexp.Regexp{
regexp.MustCompile(`^\s*@(import|use|forward)\s*'(?P<path>[\w~_/\.\-]+)'`), // Single quotes.
regexp.MustCompile(`^\s*@(import|use|forward)\s*"(?P<path>[\w~_/\.\-]+)"`), // Double quotes.
}
// ParseSassImports takes the contents of a Sass source file and extracts the verbatim paths of any
// imported modules.
func ParseSassImports(source string) []string {
// Remove comments from the source file.
lines := splitLinesAndRemoveComments(source)
// Extract all imports.
importsSet := map[string]bool{}
for _, line := range lines {
for _, re := range sassImportRegexps {
match := re.FindStringSubmatch(line)
if len(match) != 0 {
importPath := match[len(match)-1] // The path is the last capture group on all regexps.
// Filter out plain CSS imports. See
// https://sass-lang.com/documentation/at-rules/import#plain-css-imports.
if strings.HasSuffix(importPath, ".css") {
continue
}
importsSet[importPath] = true
}
}
}
// Sort imports lexicographically.
var imports []string
for path := range importsSet {
imports = append(imports, path)
}
sort.Strings(imports)
return imports
}
///////////////////////////////////////////
// Functions for filtering out comments. //
///////////////////////////////////////////
// splitLinesAndRemoveComments deletes "// line comments" and "/* block comments */" from the given
// source, and splits the results into lines.
//
// This works for both TypeScript and Sass because both languages use the same syntax for comments.
func splitLinesAndRemoveComments(source string) []string {
lines := strings.Split(source, "\n")
lines = stripBlockComments(lines)
lines = stripCommentedOutLines(lines)
return lines
}
var (
// singleLineBlockCommentRegexp matches a single-line /* block comment */, and captures any
// uncommented code before and after the block comment.
//
// Known limitation: This regexp ignores string literals.
singleLineBlockCommentRegexp = regexp.MustCompile(`(?P<uncommented_before>.*)/\*.*\*/(?P<uncommented_after>.*)`)
// blockCommentStartRegexp matches the "/*" at the beginning of a /* block comment */, and
// captures any uncommented code that precedes it.
//
// Known limitation: This regexp ignores the beginning of a block comment if it is preceded by a
// single or double quote, as the block comment itself might be part of a string literal.
blockCommentStartRegexp = regexp.MustCompile(fmt.Sprintf(`(?P<uncommented>[^'"%s]*)/\*`, "`"))
// blockCommentEndRegexp matches the "*/" at the end of a /* block comment */, and captures any
// uncommented code that succeeds it.
blockCommentEndRegexp = regexp.MustCompile(`\*/(?P<uncommented>.*)`)
)
// stripBlockComments strips /* block comments */ from the given lines of code.
func stripBlockComments(lines []string) []string {
var outputLines []string
blockComment := false // Keeps track of whether we're currently inside a /* block comment */.
for _, line := range lines {
if !blockComment {
// We are not currently inside a /* block comment */. Does this line have a single-line block
// comment?
match := singleLineBlockCommentRegexp.FindStringSubmatch(line)
if len(match) > 0 {
// Remove the single-line block-comment and proceed as if it was never there.
line = match[1] + match[2]
}
// Does a multi-line block-comment start on the current line?
match = blockCommentStartRegexp.FindStringSubmatch(line)
if len(match) > 0 {
// Block comment found. Keep the portion of the line that precedes the "/*" characters.
blockComment = true
outputLines = append(outputLines, match[1])
} else {
// No block comment found. We can keep the current line as-is.
outputLines = append(outputLines, line)
}
} else {
// We are currently inside a /* block comment */. Does it end on the current line?
match := blockCommentEndRegexp.FindStringSubmatch(line)
if len(match) > 0 {
// Found the end of the block comment. Keep the portion of the line that succeeds the "*/"
// characters.
blockComment = false
outputLines = append(outputLines, match[1])
} else {
// We are still inside a block comment. The entire line can be discarded, so we do nothing.
}
}
}
return outputLines
}
// commentedOutLineRegexp matches lines that are commented out via a single-line comment.
var commentedOutLineRegexp = regexp.MustCompile(`^\s*//`)
// stripCommentedOutLines strips out any lines that begin with a "//" single-line comment.
func stripCommentedOutLines(lines []string) []string {
var outputLines []string
for _, line := range lines {
if !commentedOutLineRegexp.MatchString(line) {
outputLines = append(outputLines, line)
}
}
return outputLines
}