| /* |
| * Copyright 2017 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #include "src/sksl/lex/DFA.h" |
| #include "src/sksl/lex/LexUtil.h" |
| #include "src/sksl/lex/NFA.h" |
| #include "src/sksl/lex/NFAtoDFA.h" |
| #include "src/sksl/lex/RegexNode.h" |
| #include "src/sksl/lex/RegexParser.h" |
| #include "src/sksl/lex/TransitionTable.h" |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <algorithm> |
| #include <sstream> |
| #include <string> |
| #include <vector> |
| |
| /** |
| * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex |
| * file is a text file with one token definition per line. Each line is of the form: |
| * <TOKEN_NAME> = <pattern> |
| * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string. |
| */ |
| |
| static constexpr const char HEADER[] = |
| "/*\n" |
| " * Copyright 2017 Google Inc.\n" |
| " *\n" |
| " * Use of this source code is governed by a BSD-style license that can be\n" |
| " * found in the LICENSE file.\n" |
| " */\n" |
| "/*****************************************************************************************\n" |
| " ******************** This file was generated by sksllex. Do not edit. *******************\n" |
| " *****************************************************************************************/\n"; |
| |
| static void writeH(const DFA& dfa, const char* lexer, const char* token, |
| const std::vector<std::string>& tokens, const char* hPath) { |
| std::ofstream out(hPath); |
| SkASSERT(out.good()); |
| out << HEADER; |
| out << "#ifndef SKSL_" << lexer << "\n"; |
| out << "#define SKSL_" << lexer << "\n"; |
| out << "#include <cstdint>\n"; |
| out << "#include <string_view>\n"; |
| out << "namespace SkSL {\n"; |
| out << "\n"; |
| out << "struct " << token << " {\n"; |
| out << " enum class Kind {\n"; |
| for (const std::string& t : tokens) { |
| out << " TK_" << t << ",\n"; |
| } |
| out << " TK_NONE,"; |
| out << R"( |
| }; |
| |
| )" << token << "() {}"; |
| |
| out << token << R"((Kind kind, int32_t offset, int32_t length) |
| : fKind(kind) |
| , fOffset(offset) |
| , fLength(length) {} |
| |
| Kind fKind = Kind::TK_NONE; |
| int32_t fOffset = -1; |
| int32_t fLength = -1; |
| }; |
| |
| class )" << lexer << R"( { |
| public: |
| void start(std::string_view text) { |
| fText = text; |
| fOffset = 0; |
| } |
| |
| )" << token << R"( next(); |
| |
| struct Checkpoint { |
| int32_t fOffset; |
| }; |
| |
| Checkpoint getCheckpoint() const { |
| return {fOffset}; |
| } |
| |
| void rewindToCheckpoint(Checkpoint checkpoint) { |
| fOffset = checkpoint.fOffset; |
| } |
| |
| private: |
| std::string_view fText; |
| int32_t fOffset; |
| }; |
| |
| } // namespace |
| #endif |
| )"; |
| } |
| |
| static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include, |
| const char* cppPath) { |
| std::ofstream out(cppPath); |
| SkASSERT(out.good()); |
| out << HEADER; |
| out << "#include \"" << include << "\"\n"; |
| out << "\n"; |
| out << "namespace SkSL {\n"; |
| out << "\n"; |
| |
| size_t states = 0; |
| for (const auto& row : dfa.fTransitions) { |
| states = std::max(states, row.size()); |
| } |
| out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n"; |
| |
| // Find the first character mapped in our DFA. |
| size_t startChar = 0; |
| for (; startChar < dfa.fCharMappings.size(); ++startChar) { |
| if (dfa.fCharMappings[startChar] != 0) { |
| break; |
| } |
| } |
| |
| // Arbitrarily-chosen character which is greater than startChar, and should not appear in actual |
| // input. |
| SkASSERT(startChar < 18); |
| out << "static constexpr uint8_t kInvalidChar = 18;"; |
| out << "static constexpr int8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {\n" |
| " "; |
| const char* separator = ""; |
| for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) { |
| out << separator << std::to_string(dfa.fCharMappings[index]); |
| separator = ", "; |
| } |
| out << "\n};\n"; |
| |
| WriteTransitionTable(out, dfa, states); |
| |
| out << "static const int8_t kAccepts[" << states << "] = {"; |
| for (size_t i = 0; i < states; ++i) { |
| if (i < dfa.fAccepts.size()) { |
| out << " " << dfa.fAccepts[i] << ","; |
| } else { |
| out << " " << INVALID << ","; |
| } |
| } |
| out << " };\n"; |
| out << "\n"; |
| |
| out << token << " " << lexer << "::next() {"; |
| out << R"( |
| // note that we cheat here: normally a lexer needs to worry about the case |
| // where a token has a prefix which is not itself a valid token - for instance, |
| // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid |
| // tokens. Our grammar doesn't have this property, so we can simplify the logic |
| // a bit. |
| int32_t startOffset = fOffset; |
| State state = 1; |
| for (;;) { |
| if (fOffset >= (int32_t)fText.length()) { |
| if (startOffset == (int32_t)fText.length() || kAccepts[state] == -1) { |
| return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0); |
| } |
| break; |
| } |
| uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"(); |
| if (c >= )" << dfa.fCharMappings.size() - startChar << R"() { |
| c = kInvalidChar; |
| } |
| State newState = get_transition(kMappings[c], state); |
| if (!newState) { |
| break; |
| } |
| state = newState; |
| ++fOffset; |
| } |
| Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state]; |
| return )" << token << R"((kind, startOffset, fOffset - startOffset); |
| } |
| |
| } // namespace |
| )"; |
| } |
| |
| static void process(const char* inPath, const char* lexer, const char* token, const char* hPath, |
| const char* cppPath) { |
| NFA nfa; |
| std::vector<std::string> tokens; |
| tokens.push_back("END_OF_FILE"); |
| std::string line; |
| std::ifstream in(inPath); |
| while (std::getline(in, line)) { |
| if (line.length() == 0) { |
| continue; |
| } |
| if (line.length() >= 2 && line[0] == '/' && line[1] == '/') { |
| continue; |
| } |
| std::istringstream split(line); |
| std::string name, delimiter, pattern; |
| if (split >> name >> delimiter >> pattern) { |
| SkASSERT(split.eof()); |
| SkASSERT(name != ""); |
| SkASSERT(delimiter == "="); |
| SkASSERT(pattern != ""); |
| tokens.push_back(name); |
| if (pattern[0] == '"') { |
| SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"'); |
| RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]); |
| for (size_t i = 2; i < pattern.size() - 1; ++i) { |
| node = RegexNode(RegexNode::kConcat_Kind, node, |
| RegexNode(RegexNode::kChar_Kind, pattern[i])); |
| } |
| nfa.addRegex(node); |
| } |
| else { |
| nfa.addRegex(RegexParser().parse(pattern)); |
| } |
| } |
| } |
| NFAtoDFA converter(&nfa); |
| DFA dfa = converter.convert(); |
| writeH(dfa, lexer, token, tokens, hPath); |
| writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath); |
| } |
| |
| int main(int argc, const char** argv) { |
| if (argc != 6) { |
| printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n"); |
| exit(1); |
| } |
| process(argv[1], argv[2], argv[3], argv[4], argv[5]); |
| return 0; |
| } |