src/sksl/lex/Main.cpp - skia - Git at Google

 /*
  * Copyright 2017 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "src/sksl/lex/DFA.h"
 #include "src/sksl/lex/LexUtil.h"
 #include "src/sksl/lex/NFA.h"
 #include "src/sksl/lex/NFAtoDFA.h"
 #include "src/sksl/lex/RegexNode.h"
 #include "src/sksl/lex/RegexParser.h"
 #include "src/sksl/lex/TransitionTable.h"

 #include <stdio.h>
 #include <stdlib.h>
 #include <algorithm>
 #include <sstream>
 #include <string>
 #include <vector>

 /**
  * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
  * file is a text file with one token definition per line. Each line is of the form:
  * <TOKEN_NAME> = <pattern>
  * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
  */

 static constexpr const char HEADER[] =
     "/*\n"
     " * Copyright 2017 Google Inc.\n"
     " *\n"
     " * Use of this source code is governed by a BSD-style license that can be\n"
     " * found in the LICENSE file.\n"
     " */\n"
     "/*****************************************************************************************\n"
     " ******************** This file was generated by sksllex. Do not edit. *******************\n"
     " *****************************************************************************************/\n";

 static void writeH(const DFA& dfa, const char* lexer, const char* token,
                    const std::vector<std::string>& tokens, const char* hPath) {
     std::ofstream out(hPath);
     SkASSERT(out.good());
     out << HEADER;
     out << "#ifndef SKSL_" << lexer << "\n";
     out << "#define SKSL_" << lexer << "\n";
     out << "#include <cstdint>\n";
     out << "#include <string_view>\n";
     out << "namespace SkSL {\n";
     out << "\n";
     out << "struct " << token << " {\n";
     out << "    enum class Kind {\n";
     for (const std::string& t : tokens) {
         out << "        TK_" << t << ",\n";
     }
     out << "        TK_NONE,";
     out << R"(
     };

     )" << token << "() {}";

     out << token << R"((Kind kind, int32_t offset, int32_t length)
     : fKind(kind)
     , fOffset(offset)
     , fLength(length) {}

     Kind fKind      = Kind::TK_NONE;
     int32_t fOffset = -1;
     int32_t fLength = -1;
 };

 class )" << lexer << R"( {
 public:
     void start(std::string_view text) {
         fText = text;
         fOffset = 0;
     }

     )" << token << R"( next();

     struct Checkpoint {
         int32_t fOffset;
     };

     Checkpoint getCheckpoint() const {
         return {fOffset};
     }

     void rewindToCheckpoint(Checkpoint checkpoint) {
         fOffset = checkpoint.fOffset;
     }

 private:
     std::string_view fText;
     int32_t fOffset;
 };

 } // namespace
 #endif
 )";
 }

 static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
                      const char* cppPath) {
     std::ofstream out(cppPath);
     SkASSERT(out.good());
     out << HEADER;
     out << "#include \"" << include << "\"\n";
     out << "\n";
     out << "namespace SkSL {\n";
     out << "\n";

     size_t states = 0;
     for (const auto& row : dfa.fTransitions) {
         states = std::max(states, row.size());
     }
     out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";

     // Find the first character mapped in our DFA.
     size_t startChar = 0;
     for (; startChar < dfa.fCharMappings.size(); ++startChar) {
         if (dfa.fCharMappings[startChar] != 0) {
             break;
         }
     }

     // Arbitrarily-chosen character which is greater than startChar, and should not appear in actual
     // input.
     SkASSERT(startChar < 18);
     out << "static constexpr uint8_t kInvalidChar = 18;";
     out << "static constexpr int8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {\n"
            "    ";
     const char* separator = "";
     for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) {
         out << separator << std::to_string(dfa.fCharMappings[index]);
         separator = ", ";
     }
     out << "\n};\n";

     WriteTransitionTable(out, dfa, states);

     out << "static const int8_t kAccepts[" << states << "] = {";
     for (size_t i = 0; i < states; ++i) {
         if (i < dfa.fAccepts.size()) {
             out << " " << dfa.fAccepts[i] << ",";
         } else {
             out << " " << INVALID << ",";
         }
     }
     out << " };\n";
     out << "\n";

     out << token << " " << lexer << "::next() {";
     out << R"(
     // note that we cheat here: normally a lexer needs to worry about the case
     // where a token has a prefix which is not itself a valid token - for instance,
     // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
     // tokens. Our grammar doesn't have this property, so we can simplify the logic
     // a bit.
     int32_t startOffset = fOffset;
     State   state = 1;
     for (;;) {
         if (fOffset >= (int32_t)fText.length()) {
             if (startOffset == (int32_t)fText.length() || kAccepts[state] == -1) {
                 return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
             }
             break;
         }
         uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"();
         if (c >= )" << dfa.fCharMappings.size() - startChar << R"() {
             c = kInvalidChar;
         }
         State newState = get_transition(kMappings[c], state);
         if (!newState) {
             break;
         }
         state = newState;
         ++fOffset;
     }
     Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];
     return )" << token << R"((kind, startOffset, fOffset - startOffset);
 }

 } // namespace
 )";
 }

 static void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
                     const char* cppPath) {
     NFA nfa;
     std::vector<std::string> tokens;
     tokens.push_back("END_OF_FILE");
     std::string line;
     std::ifstream in(inPath);
     while (std::getline(in, line)) {
         if (line.length() == 0) {
             continue;
         }
         if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {
             continue;
         }
         std::istringstream split(line);
         std::string name, delimiter, pattern;
         if (split >> name >> delimiter >> pattern) {
             SkASSERT(split.eof());
             SkASSERT(name != "");
             SkASSERT(delimiter == "=");
             SkASSERT(pattern != "");
             tokens.push_back(name);
             if (pattern[0] == '"') {
                 SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
                 RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
                 for (size_t i = 2; i < pattern.size() - 1; ++i) {
                     node = RegexNode(RegexNode::kConcat_Kind, node,
                                      RegexNode(RegexNode::kChar_Kind, pattern[i]));
                 }
                 nfa.addRegex(node);
             }
             else {
                 nfa.addRegex(RegexParser().parse(pattern));
             }
         }
     }
     NFAtoDFA converter(&nfa);
     DFA dfa = converter.convert();
     writeH(dfa, lexer, token, tokens, hPath);
     writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);
 }

 int main(int argc, const char** argv) {
     if (argc != 6) {
         printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
         exit(1);
     }
     process(argv[1], argv[2], argv[3], argv[4], argv[5]);
     return 0;
 }
	/*
	* Copyright 2017 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "src/sksl/lex/DFA.h"
	#include "src/sksl/lex/LexUtil.h"
	#include "src/sksl/lex/NFA.h"
	#include "src/sksl/lex/NFAtoDFA.h"
	#include "src/sksl/lex/RegexNode.h"
	#include "src/sksl/lex/RegexParser.h"
	#include "src/sksl/lex/TransitionTable.h"

	#include <stdio.h>
	#include <stdlib.h>
	#include <algorithm>
	#include <sstream>
	#include <string>
	#include <vector>

	/**
	* Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
	* file is a text file with one token definition per line. Each line is of the form:
	* <TOKEN_NAME> = <pattern>
	* where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
	*/

	static constexpr const char HEADER[] =
	"/*\n"
	" * Copyright 2017 Google Inc.\n"
	" *\n"
	" * Use of this source code is governed by a BSD-style license that can be\n"
	" * found in the LICENSE file.\n"
	" */\n"
	"/*****************************************************************************************\n"
	" ****************** This file was generated by sksllex. Do not edit. *****************\n"
	" *****************************************************************************************/\n";

	static void writeH(const DFA& dfa, const char* lexer, const char* token,
	const std::vector<std::string>& tokens, const char* hPath) {
	std::ofstream out(hPath);
	SkASSERT(out.good());
	out << HEADER;
	out << "#ifndef SKSL_" << lexer << "\n";
	out << "#define SKSL_" << lexer << "\n";
	out << "#include <cstdint>\n";
	out << "#include <string_view>\n";
	out << "namespace SkSL {\n";
	out << "\n";
	out << "struct " << token << " {\n";
	out << " enum class Kind {\n";
	for (const std::string& t : tokens) {
	out << " TK_" << t << ",\n";
	}
	out << " TK_NONE,";
	out << R"(
	};

	)" << token << "() {}";

	out << token << R"((Kind kind, int32_t offset, int32_t length)
	: fKind(kind)
	, fOffset(offset)
	, fLength(length) {}

	Kind fKind = Kind::TK_NONE;
	int32_t fOffset = -1;
	int32_t fLength = -1;
	};

	class )" << lexer << R"( {
	public:
	void start(std::string_view text) {
	fText = text;
	fOffset = 0;
	}

	)" << token << R"( next();

	struct Checkpoint {
	int32_t fOffset;
	};

	Checkpoint getCheckpoint() const {
	return {fOffset};
	}

	void rewindToCheckpoint(Checkpoint checkpoint) {
	fOffset = checkpoint.fOffset;
	}

	private:
	std::string_view fText;
	int32_t fOffset;
	};

	} // namespace
	#endif
	)";
	}

	static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
	const char* cppPath) {
	std::ofstream out(cppPath);
	SkASSERT(out.good());
	out << HEADER;
	out << "#include \"" << include << "\"\n";
	out << "\n";
	out << "namespace SkSL {\n";
	out << "\n";

	size_t states = 0;
	for (const auto& row : dfa.fTransitions) {
	states = std::max(states, row.size());
	}
	out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";

	// Find the first character mapped in our DFA.
	size_t startChar = 0;
	for (; startChar < dfa.fCharMappings.size(); ++startChar) {
	if (dfa.fCharMappings[startChar] != 0) {
	break;
	}
	}

	// Arbitrarily-chosen character which is greater than startChar, and should not appear in actual
	// input.
	SkASSERT(startChar < 18);
	out << "static constexpr uint8_t kInvalidChar = 18;";
	out << "static constexpr int8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {\n"
	" ";
	const char* separator = "";
	for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) {
	out << separator << std::to_string(dfa.fCharMappings[index]);
	separator = ", ";
	}
	out << "\n};\n";

	WriteTransitionTable(out, dfa, states);

	out << "static const int8_t kAccepts[" << states << "] = {";
	for (size_t i = 0; i < states; ++i) {
	if (i < dfa.fAccepts.size()) {
	out << " " << dfa.fAccepts[i] << ",";
	} else {
	out << " " << INVALID << ",";
	}
	}
	out << " };\n";
	out << "\n";

	out << token << " " << lexer << "::next() {";
	out << R"(
	// note that we cheat here: normally a lexer needs to worry about the case
	// where a token has a prefix which is not itself a valid token - for instance,
	// maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
	// tokens. Our grammar doesn't have this property, so we can simplify the logic
	// a bit.
	int32_t startOffset = fOffset;
	State state = 1;
	for (;;) {
	if (fOffset >= (int32_t)fText.length()) {
	if (startOffset == (int32_t)fText.length() \|\| kAccepts[state] == -1) {
	return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
	}
	break;
	}
	uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"();
	if (c >= )" << dfa.fCharMappings.size() - startChar << R"() {
	c = kInvalidChar;
	}
	State newState = get_transition(kMappings[c], state);
	if (!newState) {
	break;
	}
	state = newState;
	++fOffset;
	}
	Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];
	return )" << token << R"((kind, startOffset, fOffset - startOffset);
	}

	} // namespace
	)";
	}

	static void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
	const char* cppPath) {
	NFA nfa;
	std::vector<std::string> tokens;
	tokens.push_back("END_OF_FILE");
	std::string line;
	std::ifstream in(inPath);
	while (std::getline(in, line)) {
	if (line.length() == 0) {
	continue;
	}
	if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {
	continue;
	}
	std::istringstream split(line);
	std::string name, delimiter, pattern;
	if (split >> name >> delimiter >> pattern) {
	SkASSERT(split.eof());
	SkASSERT(name != "");
	SkASSERT(delimiter == "=");
	SkASSERT(pattern != "");
	tokens.push_back(name);
	if (pattern[0] == '"') {
	SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
	RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
	for (size_t i = 2; i < pattern.size() - 1; ++i) {
	node = RegexNode(RegexNode::kConcat_Kind, node,
	RegexNode(RegexNode::kChar_Kind, pattern[i]));
	}
	nfa.addRegex(node);
	}
	else {
	nfa.addRegex(RegexParser().parse(pattern));
	}
	}
	}
	NFAtoDFA converter(&nfa);
	DFA dfa = converter.convert();
	writeH(dfa, lexer, token, tokens, hPath);
	writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);
	}

	int main(int argc, const char** argv) {
	if (argc != 6) {
	printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
	exit(1);
	}
	process(argv[1], argv[2], argv[3], argv[4], argv[5]);
	return 0;
	}