src/sksl/lex/TransitionTable.cpp - skia - Git at Google

 /*
  * Copyright 2021 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "src/sksl/lex/DFA.h"
 #include "src/sksl/lex/TransitionTable.h"

 #include <array>
 #include <algorithm>
 #include <cassert>
 #include <cmath>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
 #include <vector>

 namespace {

 // The number of bits to use per entry in our compact transition table. This is customizable:
 // - 1-bit: reasonable in theory. Doesn't actually pack many slices.
 // - 2-bit: best fit for our data. Packs extremely well.
 // - 4-bit: packs all but one slice, but doesn't save as much space overall.
 // - 8-bit: way too large (an 8-bit LUT plus an 8-bit data table is as big as a 16-bit table)
 // Other values don't divide cleanly into a byte and do not work.
 constexpr int kNumBits = 2;

 // These values are derived from kNumBits and shouldn't need to change.
 constexpr int kNumValues = (1 << kNumBits) - 1;
 constexpr int kDataPerByte = 8 / kNumBits;

 enum IndexType {
     kCompactEntry = 0,
     kFullEntry,
 };
 struct IndexEntry {
     IndexType type;
     int pos;
 };
 struct CompactEntry {
     std::array<int, kNumValues> v = {};
     std::vector<int> data;
 };
 struct FullEntry {
     std::vector<int> data;
 };

 using TransitionSet = std::unordered_set<int>;

 static int add_compact_entry(const TransitionSet& transitionSet,
                              const std::vector<int>& data,
                              std::vector<CompactEntry>* entries) {
     // Create a compact entry with the unique values from the transition set, padded out with zeros
     // and sorted.
     CompactEntry result{};
     assert(transitionSet.size() <= result.v.size());
     std::copy(transitionSet.begin(), transitionSet.end(), result.v.begin());
     std::sort(result.v.rbegin(), result.v.rend());

     // Create a mapping from real values to small values.
     std::unordered_map<int, int> translationTable;
     for (size_t index = 0; index < result.v.size(); ++index) {
         translationTable[result.v[index]] = index;
     }
     translationTable[0] = result.v.size();

     // Convert the real values into small values.
     for (size_t index = 0; index < data.size(); ++index) {
         int value = data[index];
         assert(translationTable.find(value) != translationTable.end());
         result.data.push_back(translationTable[value]);
     }

     // Look for an existing entry that exactly matches this one.
     for (size_t index = 0; index < entries->size(); ++index) {
         if (entries->at(index).v == result.v && entries->at(index).data == result.data) {
             return index;
         }
     }

     // Add this as a new entry.
     entries->push_back(std::move(result));
     return (int)(entries->size() - 1);
 }

 static int add_full_entry(const TransitionSet& transitionMap,
                           const std::vector<int>& data,
                           std::vector<FullEntry>* entries) {
     // Create a full entry with this data.
     FullEntry result{};
     result.data = std::vector<int>(data.begin(), data.end());

     // Look for an existing entry that exactly matches this one.
     for (size_t index = 0; index < entries->size(); ++index) {
         if (entries->at(index).data == result.data) {
             return index;
         }
     }

     // Add this as a new entry.
     entries->push_back(std::move(result));
     return (int)(entries->size() - 1);
 }

 }  // namespace

 void WriteTransitionTable(std::ofstream& out, const DFA& dfa, size_t states) {
     int numTransitions = dfa.fTransitions.size();

     // Assemble our compact and full data tables, and an index into them.
     std::vector<CompactEntry> compactEntries;
     std::vector<FullEntry> fullEntries;
     std::vector<IndexEntry> indices;
     for (size_t s = 0; s < states; ++s) {
         // Copy all the transitions for this state into a flat array, and into a histogram (counting
         // the number of unique state-transition values). Most states only transition to a few
         // possible new states.
         TransitionSet transitionSet;
         std::vector<int> data(numTransitions);
         for (int t = 0; t < numTransitions; ++t) {
             if ((size_t) t < dfa.fTransitions.size() && s < dfa.fTransitions[t].size()) {
                 int value = dfa.fTransitions[t][s];
                 assert(value >= 0 && value < (int)states);
                 data[t] = value;
                 transitionSet.insert(value);
             }
         }

         transitionSet.erase(0);
         if (transitionSet.size() <= kNumValues) {
             // This table only contained a small number of unique nonzero values.
             // Use a compact representation that squishes each value down to a few bits.
             int index = add_compact_entry(transitionSet, data, &compactEntries);
             indices.push_back(IndexEntry{kCompactEntry, index});
         } else {
             // This table contained a large number of values. We can't compact it.
             int index = add_full_entry(transitionSet, data, &fullEntries);
             indices.push_back(IndexEntry{kFullEntry, index});
         }
     }

     // Find the largest value for each compact-entry slot.
     int maxValue = 0;
     for (const CompactEntry& entry : compactEntries) {
         for (int index=0; index < kNumValues; ++index) {
             maxValue = std::max(maxValue, entry.v[index]);
         }
     }

     // Figure out how many bits we need to store our max value.
     int bitsPerValue = std::ceil(std::log2(maxValue));
     maxValue = (1 << bitsPerValue) - 1;

     // If we exceed 10 bits per value, three values would overflow 32 bits. If this happens, we'll
     // need to pack our values another way.
     assert(bitsPerValue <= 10);

     // Emit all the structs our transition table will use.
     out << "using IndexEntry = int16_t;\n"
         << "struct FullEntry {\n"
         << "    State data[" << numTransitions << "];\n"
         << "};\n";

     // Emit the compact-entry structure. We store all three values in `v`. If kNumBits were to
     // change, we would need to adjust the packing algorithm.
     static_assert(kNumBits == 2);
     out << "struct CompactEntry {\n"
         << "    uint32_t values;\n"
         << "    uint8_t data[" << std::ceil(float(numTransitions) / float(kDataPerByte)) << "];\n"
         << "};\n";

     // Emit the full-table data.
     out << "static constexpr FullEntry kFull[] = {\n";
     for (const FullEntry& entry : fullEntries) {
         out << "    {";
         for (int value : entry.data) {
             out << value << ", ";
         }
         out << "},\n";
     }
     out << "};\n";

     // Emit the compact-table data.
     out << "static constexpr CompactEntry kCompact[] = {\n";
     for (const CompactEntry& entry : compactEntries) {
         out << "    {";

         // We pack all three values into `v`. If kNumBits were to change, we would need to adjust
         // this packing algorithm.
         static_assert(kNumBits == 2);
         out << entry.v[0];
         if (entry.v[1]) {
             out << " | (" << entry.v[1] << " << " << bitsPerValue << ")";
         }
         if (entry.v[2]) {
             out << " | (" << entry.v[2] << " << " << (2 * bitsPerValue) << ")";
         }
         out << ", {";

         unsigned int shiftBits = 0, combinedBits = 0;
         for (int index = 0; index < numTransitions; index++) {
             combinedBits |= entry.data[index] << shiftBits;
             shiftBits += kNumBits;
             assert(shiftBits <= 8);
             if (shiftBits == 8) {
                 out << combinedBits << ", ";
                 shiftBits = 0;
                 combinedBits = 0;
             }
         }
         if (shiftBits > 0) {
             // Flush any partial values.
             out << combinedBits;
         }
         out << "}},\n";
     }
     out << "};\n"
         << "static constexpr IndexEntry kIndices[] = {\n";
     for (const IndexEntry& entry : indices) {
         if (entry.type == kFullEntry) {
             // Bit-not is used so that full entries start at -1 and go down from there.
             out << ~entry.pos << ", ";
         } else {
             // Compact entries start at 0 and go up from there.
             out << entry.pos << ", ";
         }
     }
     out << "};\n"
         << "State get_transition(int transition, int state) {\n"
         << "    IndexEntry index = kIndices[state];\n"
         << "    if (index < 0) { return kFull[~index].data[transition]; }\n"
         << "    const CompactEntry& entry = kCompact[index];\n"
         << "    int v = entry.data[transition >> " << std::log2(kDataPerByte) << "];\n"
         << "    v >>= " << kNumBits << " * (transition & " << kDataPerByte - 1 << ");\n"
         << "    v &= " << kNumValues << ";\n"
         << "    v *= " << bitsPerValue << ";\n"
         << "    return (entry.values >> v) & " << maxValue << ";\n"
         << "}\n";
 }
	/*
	* Copyright 2021 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "src/sksl/lex/DFA.h"
	#include "src/sksl/lex/TransitionTable.h"

	#include <array>
	#include <algorithm>
	#include <cassert>
	#include <cmath>
	#include <unordered_map>
	#include <unordered_set>
	#include <utility>
	#include <vector>

	namespace {

	// The number of bits to use per entry in our compact transition table. This is customizable:
	// - 1-bit: reasonable in theory. Doesn't actually pack many slices.
	// - 2-bit: best fit for our data. Packs extremely well.
	// - 4-bit: packs all but one slice, but doesn't save as much space overall.
	// - 8-bit: way too large (an 8-bit LUT plus an 8-bit data table is as big as a 16-bit table)
	// Other values don't divide cleanly into a byte and do not work.
	constexpr int kNumBits = 2;

	// These values are derived from kNumBits and shouldn't need to change.
	constexpr int kNumValues = (1 << kNumBits) - 1;
	constexpr int kDataPerByte = 8 / kNumBits;

	enum IndexType {
	kCompactEntry = 0,
	kFullEntry,
	};
	struct IndexEntry {
	IndexType type;
	int pos;
	};
	struct CompactEntry {
	std::array<int, kNumValues> v = {};
	std::vector<int> data;
	};
	struct FullEntry {
	std::vector<int> data;
	};

	using TransitionSet = std::unordered_set<int>;

	static int add_compact_entry(const TransitionSet& transitionSet,
	const std::vector<int>& data,
	std::vector<CompactEntry>* entries) {
	// Create a compact entry with the unique values from the transition set, padded out with zeros
	// and sorted.
	CompactEntry result{};
	assert(transitionSet.size() <= result.v.size());
	std::copy(transitionSet.begin(), transitionSet.end(), result.v.begin());
	std::sort(result.v.rbegin(), result.v.rend());

	// Create a mapping from real values to small values.
	std::unordered_map<int, int> translationTable;
	for (size_t index = 0; index < result.v.size(); ++index) {
	translationTable[result.v[index]] = index;
	}
	translationTable[0] = result.v.size();

	// Convert the real values into small values.
	for (size_t index = 0; index < data.size(); ++index) {
	int value = data[index];
	assert(translationTable.find(value) != translationTable.end());
	result.data.push_back(translationTable[value]);
	}

	// Look for an existing entry that exactly matches this one.
	for (size_t index = 0; index < entries->size(); ++index) {
	if (entries->at(index).v == result.v && entries->at(index).data == result.data) {
	return index;
	}
	}

	// Add this as a new entry.
	entries->push_back(std::move(result));
	return (int)(entries->size() - 1);
	}

	static int add_full_entry(const TransitionSet& transitionMap,
	const std::vector<int>& data,
	std::vector<FullEntry>* entries) {
	// Create a full entry with this data.
	FullEntry result{};
	result.data = std::vector<int>(data.begin(), data.end());

	// Look for an existing entry that exactly matches this one.
	for (size_t index = 0; index < entries->size(); ++index) {
	if (entries->at(index).data == result.data) {
	return index;
	}
	}

	// Add this as a new entry.
	entries->push_back(std::move(result));
	return (int)(entries->size() - 1);
	}

	} // namespace

	void WriteTransitionTable(std::ofstream& out, const DFA& dfa, size_t states) {
	int numTransitions = dfa.fTransitions.size();

	// Assemble our compact and full data tables, and an index into them.
	std::vector<CompactEntry> compactEntries;
	std::vector<FullEntry> fullEntries;
	std::vector<IndexEntry> indices;
	for (size_t s = 0; s < states; ++s) {
	// Copy all the transitions for this state into a flat array, and into a histogram (counting
	// the number of unique state-transition values). Most states only transition to a few
	// possible new states.
	TransitionSet transitionSet;
	std::vector<int> data(numTransitions);
	for (int t = 0; t < numTransitions; ++t) {
	if ((size_t) t < dfa.fTransitions.size() && s < dfa.fTransitions[t].size()) {
	int value = dfa.fTransitions[t][s];
	assert(value >= 0 && value < (int)states);
	data[t] = value;
	transitionSet.insert(value);
	}
	}

	transitionSet.erase(0);
	if (transitionSet.size() <= kNumValues) {
	// This table only contained a small number of unique nonzero values.
	// Use a compact representation that squishes each value down to a few bits.
	int index = add_compact_entry(transitionSet, data, &compactEntries);
	indices.push_back(IndexEntry{kCompactEntry, index});
	} else {
	// This table contained a large number of values. We can't compact it.
	int index = add_full_entry(transitionSet, data, &fullEntries);
	indices.push_back(IndexEntry{kFullEntry, index});
	}
	}

	// Find the largest value for each compact-entry slot.
	int maxValue = 0;
	for (const CompactEntry& entry : compactEntries) {
	for (int index=0; index < kNumValues; ++index) {
	maxValue = std::max(maxValue, entry.v[index]);
	}
	}

	// Figure out how many bits we need to store our max value.
	int bitsPerValue = std::ceil(std::log2(maxValue));
	maxValue = (1 << bitsPerValue) - 1;

	// If we exceed 10 bits per value, three values would overflow 32 bits. If this happens, we'll
	// need to pack our values another way.
	assert(bitsPerValue <= 10);

	// Emit all the structs our transition table will use.
	out << "using IndexEntry = int16_t;\n"
	<< "struct FullEntry {\n"
	<< " State data[" << numTransitions << "];\n"
	<< "};\n";

	// Emit the compact-entry structure. We store all three values in `v`. If kNumBits were to
	// change, we would need to adjust the packing algorithm.
	static_assert(kNumBits == 2);
	out << "struct CompactEntry {\n"
	<< " uint32_t values;\n"
	<< " uint8_t data[" << std::ceil(float(numTransitions) / float(kDataPerByte)) << "];\n"
	<< "};\n";

	// Emit the full-table data.
	out << "static constexpr FullEntry kFull[] = {\n";
	for (const FullEntry& entry : fullEntries) {
	out << " {";
	for (int value : entry.data) {
	out << value << ", ";
	}
	out << "},\n";
	}
	out << "};\n";

	// Emit the compact-table data.
	out << "static constexpr CompactEntry kCompact[] = {\n";
	for (const CompactEntry& entry : compactEntries) {
	out << " {";

	// We pack all three values into `v`. If kNumBits were to change, we would need to adjust
	// this packing algorithm.
	static_assert(kNumBits == 2);
	out << entry.v[0];
	if (entry.v[1]) {
	out << " \| (" << entry.v[1] << " << " << bitsPerValue << ")";
	}
	if (entry.v[2]) {
	out << " \| (" << entry.v[2] << " << " << (2 * bitsPerValue) << ")";
	}
	out << ", {";

	unsigned int shiftBits = 0, combinedBits = 0;
	for (int index = 0; index < numTransitions; index++) {
	combinedBits \|= entry.data[index] << shiftBits;
	shiftBits += kNumBits;
	assert(shiftBits <= 8);
	if (shiftBits == 8) {
	out << combinedBits << ", ";
	shiftBits = 0;
	combinedBits = 0;
	}
	}
	if (shiftBits > 0) {
	// Flush any partial values.
	out << combinedBits;
	}
	out << "}},\n";
	}
	out << "};\n"
	<< "static constexpr IndexEntry kIndices[] = {\n";
	for (const IndexEntry& entry : indices) {
	if (entry.type == kFullEntry) {
	// Bit-not is used so that full entries start at -1 and go down from there.
	out << ~entry.pos << ", ";
	} else {
	// Compact entries start at 0 and go up from there.
	out << entry.pos << ", ";
	}
	}
	out << "};\n"
	<< "State get_transition(int transition, int state) {\n"
	<< " IndexEntry index = kIndices[state];\n"
	<< " if (index < 0) { return kFull[~index].data[transition]; }\n"
	<< " const CompactEntry& entry = kCompact[index];\n"
	<< " int v = entry.data[transition >> " << std::log2(kDataPerByte) << "];\n"
	<< " v >>= " << kNumBits << " * (transition & " << kDataPerByte - 1 << ");\n"
	<< " v &= " << kNumValues << ";\n"
	<< " v *= " << bitsPerValue << ";\n"
	<< " return (entry.values >> v) & " << maxValue << ";\n"
	<< "}\n";
	}