UTF-8 support WIP

Implemented most of the functionality of UTF8. It needs testing.
Also, fixed a few bugs along the way.
diff --git a/TextEditor.cpp b/TextEditor.cpp
index c48e050..b78e831 100644
--- a/TextEditor.cpp
+++ b/TextEditor.cpp
@@ -11,8 +11,6 @@
 

 // TODO

 // - multiline comments vs single-line: latter is blocking start of a ML

-// - handle unicode/utf

-// - testing

 

 template<class InputIt1, class InputIt2, class BinaryPredicate>

 bool equals(InputIt1 first1, InputIt1 last1,

@@ -36,8 +34,10 @@
 	, mScrollToCursor(false)

 	, mScrollToTop(false)

 	, mTextChanged(false)

+	, mColorizerEnabled(true)

 	, mTextStart(20.0f)

 	, mLeftMargin(10)

+	, mCursorPositionChanged(false)

 	, mColorRangeMin(0)

 	, mColorRangeMax(0)

 	, mSelectionMode(SelectionMode::Normal)

@@ -63,6 +63,8 @@
 

 	for (auto& r : mLanguageDefinition.mTokenRegexStrings)

 		mRegexList.push_back(std::make_pair(std::regex(r.first, std::regex_constants::optimize), r.second));

+

+	Colorize();

 }

 

 void TextEditor::SetPalette(const Palette & aValue)

@@ -70,40 +72,38 @@
 	mPaletteBase = aValue;

 }

 

-int TextEditor::AppendBuffer(std::string& aBuffer, char chr, int aIndex)

-{

-	if (chr != '\t')

-	{

-		aBuffer.push_back(chr);

-		return aIndex + 1;

-	}

-	else

-	{

-		//auto num = mTabSize - aIndex % mTabSize;

-		//for (int j = num; j > 0; --j)

-		//	aBuffer.push_back(' ');

-		//return aIndex + num;

-		return aIndex;

-	}

-}

-

 std::string TextEditor::GetText(const Coordinates & aStart, const Coordinates & aEnd) const

 {

 	std::string result;

 

-	int prevLineNo = aStart.mLine;

-	for (auto it = aStart; it <= aEnd; Advance(it))

-	{

-		if (prevLineNo != it.mLine && it.mLine < (int)mLines.size())

-			result.push_back('\n');

+	auto lstart = aStart.mLine;

+	auto lend = aEnd.mLine;

+	auto istart = GetCharacterIndex(aStart);

+	auto iend = GetCharacterIndex(aEnd);

+	size_t s = 0;

 

-		if (it == aEnd)

+	for (size_t i = lstart; i < lend; i++)

+		s += mLines[i].size();

+

+	result.reserve(s + s / 8);

+

+	while (istart < iend || lstart < lend)

+	{

+		if (lstart >= (int)mLines.size())

 			break;

 

-		prevLineNo = it.mLine;

-		const auto& line = mLines[it.mLine];

-		if (!line.empty() && it.mColumn < (int)line.size())

-			result.push_back(line[it.mColumn].mChar);

+		auto& line = mLines[lstart];

+		if (istart < (int)line.size())

+		{

+			result += line[istart].mChar;

+			istart++;

+		}

+		else

+		{

+			istart = 0;

+			++lstart;

+			result += '\n';

+		}

 	}

 

 	return result;

@@ -118,7 +118,6 @@
 {

 	auto line = aValue.mLine;

 	auto column = aValue.mColumn;

-

 	if (line >= (int)mLines.size())

 	{

 		if (mLines.empty())

@@ -129,15 +128,70 @@
 		else

 		{

 			line = (int)mLines.size() - 1;

-			column = (int)mLines[line].size();

+			column = GetLineMaxColumn(line);

 		}

+		return Coordinates(line, column);

 	}

 	else

 	{

-		column = mLines.empty() ? 0 : std::min((int)mLines[line].size(), aValue.mColumn);

+		column = mLines.empty() ? 0 : std::min(column, GetLineMaxColumn(line));

+		return Coordinates(line, column);

 	}

+}

 

-	return Coordinates(line, column);

+// https://en.wikipedia.org/wiki/UTF-8

+// We assume that the char is a standalone character (<128) or a leading byte of an UTF-8 code sequence (non-10xxxxxx code)

+static int UTF8CharLength(TextEditor::Char c)

+{

+	if ((c & 0xFE) == 0xFC)

+		return 6;

+	if ((c & 0xFC) == 0xF8)

+		return 5;

+	if ((c & 0xF8) == 0xF0)

+		return 4;

+	else if ((c & 0xF0) == 0xE0)

+		return 3;

+	else if ((c & 0xE0) == 0xC0)

+		return 2;

+	return 1;

+}

+

+// "Borrowed" from ImGui source

+static inline int ImTextCharToUtf8(char* buf, int buf_size, unsigned int c)

+{

+	if (c < 0x80)

+	{

+		buf[0] = (char)c;

+		return 1;

+	}

+	if (c < 0x800)

+	{

+		if (buf_size < 2) return 0;

+		buf[0] = (char)(0xc0 + (c >> 6));

+		buf[1] = (char)(0x80 + (c & 0x3f));

+		return 2;

+	}

+	if (c >= 0xdc00 && c < 0xe000)

+	{

+		return 0;

+	}

+	if (c >= 0xd800 && c < 0xdc00)

+	{

+		if (buf_size < 4) return 0;

+		buf[0] = (char)(0xf0 + (c >> 18));

+		buf[1] = (char)(0x80 + ((c >> 12) & 0x3f));

+		buf[2] = (char)(0x80 + ((c >> 6) & 0x3f));

+		buf[3] = (char)(0x80 + ((c) & 0x3f));

+		return 4;

+	}

+	//else if (c < 0x10000)

+	{

+		if (buf_size < 3) return 0;

+		buf[0] = (char)(0xe0 + (c >> 12));

+		buf[1] = (char)(0x80 + ((c >> 6) & 0x3f));

+		buf[2] = (char)(0x80 + ((c) & 0x3f));

+		return 3;

+	}

 }

 

 void TextEditor::Advance(Coordinates & aCoordinates) const

@@ -145,14 +199,19 @@
 	if (aCoordinates.mLine < (int)mLines.size())

 	{

 		auto& line = mLines[aCoordinates.mLine];

+		auto cindex = GetCharacterIndex(aCoordinates);

 

-		if (aCoordinates.mColumn + 1 < (int)line.size())

-			++aCoordinates.mColumn;

+		if (cindex + 1 < (int)line.size())

+		{

+			auto delta = UTF8CharLength(line[cindex].mChar);

+			cindex = std::min(cindex + delta, (int)line.size() - 1);

+		}

 		else

 		{

 			++aCoordinates.mLine;

-			aCoordinates.mColumn = 0;

+			cindex = 0;

 		}

+		aCoordinates.mColumn = GetCharacterColumn(aCoordinates.mLine, cindex);

 	}

 }

 

@@ -161,24 +220,30 @@
 	assert(aEnd >= aStart);

 	assert(!mReadOnly);

 

+	//printf("D(%d.%d)-(%d.%d)\n", aStart.mLine, aStart.mColumn, aEnd.mLine, aEnd.mColumn);

+

 	if (aEnd == aStart)

 		return;

 

+	auto start = GetCharacterIndex(aStart);

+	auto end = GetCharacterIndex(aEnd);

+

 	if (aStart.mLine == aEnd.mLine)

 	{

 		auto& line = mLines[aStart.mLine];

-		if (aEnd.mColumn >= (int)line.size())

-			line.erase(line.begin() + aStart.mColumn, line.end());

+		auto n = GetLineMaxColumn(aStart.mLine);

+		if (aEnd.mColumn >= n)

+			line.erase(line.begin() + start, line.end());

 		else

-			line.erase(line.begin() + aStart.mColumn, line.begin() + aEnd.mColumn);

+			line.erase(line.begin() + start, line.begin() + end);

 	}

 	else

 	{

 		auto& firstLine = mLines[aStart.mLine];

 		auto& lastLine = mLines[aEnd.mLine];

 

-		firstLine.erase(firstLine.begin() + aStart.mColumn, firstLine.end());

-		lastLine.erase(lastLine.begin(), lastLine.begin() + aEnd.mColumn);

+		firstLine.erase(firstLine.begin() + start, firstLine.end());

+		lastLine.erase(lastLine.begin(), lastLine.begin() + end);

 

 		if (aStart.mLine < aEnd.mLine)

 			firstLine.insert(firstLine.end(), lastLine.begin(), lastLine.end());

@@ -194,24 +259,25 @@
 {

 	assert(!mReadOnly);

 

+	int cindex = GetCharacterIndex(aWhere);

 	int totalLines = 0;

-	auto chr = *aValue;

-	while (chr != '\0')

+	while (*aValue != '\0')

 	{

 		assert(!mLines.empty());

 

-		if (chr == '\r')

+		if (*aValue == '\r')

 		{

 			// skip

+			++aValue;

 		}

-		else if (chr == '\n')

+		else if (*aValue == '\n')

 		{

-			if (aWhere.mColumn < (int)mLines[aWhere.mLine].size())

+			if (cindex < (int)mLines[aWhere.mLine].size())

 			{

 				auto& newLine = InsertLine(aWhere.mLine + 1);

 				auto& line = mLines[aWhere.mLine];

-				newLine.insert(newLine.begin(), line.begin() + aWhere.mColumn, line.end());

-				line.erase(line.begin() + aWhere.mColumn, line.end());

+				newLine.insert(newLine.begin(), line.begin() + cindex, line.end());

+				line.erase(line.begin() + cindex, line.end());

 			}

 			else

 			{

@@ -219,15 +285,18 @@
 			}

 			++aWhere.mLine;

 			aWhere.mColumn = 0;

+			cindex = 0;

 			++totalLines;

+			++aValue;

 		}

 		else

 		{

 			auto& line = mLines[aWhere.mLine];

-			line.insert(line.begin() + aWhere.mColumn, Glyph(chr, PaletteIndex::Default));

+			auto d = UTF8CharLength(*aValue);

+			while (d-- > 0 && *aValue != '\0')

+				line.insert(line.begin() + cindex++, Glyph(*aValue++, PaletteIndex::Default));

 			++aWhere.mColumn;

 		}

-		chr = *(++aValue);

 

 		mTextChanged = true;

 	}

@@ -238,8 +307,14 @@
 void TextEditor::AddUndo(UndoRecord& aValue)

 {

 	assert(!mReadOnly);

+	//printf("AddUndo: (@%d.%d) +\'%s' [%d.%d .. %d.%d], -\'%s', [%d.%d .. %d.%d] (@%d.%d)\n",

+	//	aValue.mBefore.mCursorPosition.mLine, aValue.mBefore.mCursorPosition.mColumn,

+	//	aValue.mAdded.c_str(), aValue.mAddedStart.mLine, aValue.mAddedStart.mColumn, aValue.mAddedEnd.mLine, aValue.mAddedEnd.mColumn,

+	//	aValue.mRemoved.c_str(), aValue.mRemovedStart.mLine, aValue.mRemovedStart.mColumn, aValue.mRemovedEnd.mLine, aValue.mRemovedEnd.mColumn,

+	//	aValue.mAfter.mCursorPosition.mLine, aValue.mAfter.mCursorPosition.mColumn

+	//	);

 

-	mUndoBuffer.resize(mUndoIndex + 1);

+	mUndoBuffer.resize((size_t)(mUndoIndex + 1));

 	mUndoBuffer.back() = aValue;

 	++mUndoIndex;

 }

@@ -251,32 +326,46 @@
 

 	int lineNo = std::max(0, (int)floor(local.y / mCharAdvance.y));

 

-	/*

-		Compute columnCoord according to text size

-	*/

 	int columnCoord = 0;

-	float columnWidth = 0.0f;

-	std::string cumulatedString = "";

-	float cumulatedStringWidth[2] = { 0.0f, 0.0f }; //( [0] is the lastest, [1] is the previous. I use that trick to check where cursor is exactly (important for tabs)

 

 	if (lineNo >= 0 && lineNo < (int)mLines.size())

 	{

 		auto& line = mLines.at(lineNo);

 

+		int columnIndex = 0;

+		std::string cumulatedString = "";

+		float columnWidth = 0.0f;

+		float columnX = 0.0f;

+

 		// First we find the hovered column coord.

-		while (mTextStart + cumulatedStringWidth[0] < local.x &&

-			(size_t)columnCoord < line.size())

+		while (mTextStart + columnX < local.x && (size_t)columnIndex < line.size())

 		{

-			cumulatedStringWidth[1] = cumulatedStringWidth[0];

-			cumulatedString += line[columnCoord].mChar;

-			cumulatedStringWidth[0] = ImGui::GetFont()->CalcTextSizeA(ImGui::GetFontSize(), FLT_MAX, -1.0f, cumulatedString.c_str(), nullptr, nullptr).x;

-			columnWidth = (cumulatedStringWidth[0] - cumulatedStringWidth[1]);

-			columnCoord++;

+			if (line[columnIndex].mChar == '\t')

+			{

+				auto fontScale = ImGui::GetFontSize() / ImGui::GetFont()->FontSize;

+				float spaceSize = ImGui::GetFont()->CalcTextSizeA(ImGui::GetFontSize(), FLT_MAX, -1.0f, " ").x;

+				float oldX = columnX;

+				columnX = (1.0f * fontScale + std::floor((1.0f + columnX)) / (float(mTabSize) * spaceSize)) * (float(mTabSize) * spaceSize);

+				columnWidth = columnX - oldX;

+				columnCoord++;

+			}

+			else

+			{

+				char buf[7];

+				auto d = UTF8CharLength(line[columnIndex].mChar);

+				int i = 0;

+				while (i < 6 && d-- > 0)

+					buf[i++] = line[columnIndex++].mChar;

+				buf[i] = '\0';

+				columnWidth = ImGui::GetFont()->CalcTextSizeA(ImGui::GetFontSize(), FLT_MAX, -1.0f, buf).x;

+				columnX += columnWidth;

+				columnCoord++;

+			}

 		}

 

 		// Then we reduce by 1 column coord if cursor is on the left side of the hovered column.

-		if (mTextStart + cumulatedStringWidth[0] - columnWidth / 2.0f > local.x)

-			columnCoord = std::max(0, columnCoord - 1);

+		if (mTextStart + columnX - columnWidth / 2.0f > local.x)

+			columnIndex = std::max(0, columnIndex - 1);

 	}

 

 	return SanitizeCoordinates(Coordinates(lineNo, columnCoord));

@@ -289,18 +378,31 @@
 		return at;

 

 	auto& line = mLines[at.mLine];

+	auto cindex = GetCharacterIndex(at);

 

-	if (at.mColumn >= (int)line.size())

+	if (cindex >= (int)line.size())

 		return at;

 

-	auto cstart = (PaletteIndex)line[at.mColumn].mColorIndex;

-	while (at.mColumn > 0)

+	while (cindex > 0 && isspace(line[cindex].mChar))

+		--cindex;

+

+	auto cstart = (PaletteIndex)line[cindex].mColorIndex;

+	while (cindex > 0)

 	{

-		if (cstart != (PaletteIndex)line[at.mColumn - 1].mColorIndex)

-			break;

-		--at.mColumn;

+		auto c = line[cindex].mChar;

+		if ((c & 0xC0) != 0x80)	// not UTF code sequence 10xxxxxx

+		{

+			if (c <= 32 && isspace(c))

+			{

+				cindex++;

+				break;

+			}

+			if (cstart != (PaletteIndex)line[size_t(cindex - 1)].mColorIndex)

+				break;

+		}

+		--cindex;

 	}

-	return at;

+	return Coordinates(at.mLine, GetCharacterColumn(at.mLine, cindex));

 }

 

 TextEditor::Coordinates TextEditor::FindWordEnd(const Coordinates & aFrom) const

@@ -310,18 +412,96 @@
 		return at;

 

 	auto& line = mLines[at.mLine];

+	auto cindex = GetCharacterIndex(at);

 

-	if (at.mColumn >= (int)line.size())

+	if (cindex >= (int)line.size())

 		return at;

 

-	auto cstart = (PaletteIndex)line[at.mColumn].mColorIndex;

-	while (at.mColumn < (int)line.size())

+	bool prevspace = (bool)isspace(line[cindex].mChar);

+	auto cstart = (PaletteIndex)line[cindex].mColorIndex;

+	while (cindex < (int)line.size())

 	{

-		if (cstart != (PaletteIndex)line[at.mColumn].mColorIndex)

+		auto c = line[cindex].mChar;

+		auto d = UTF8CharLength(c);

+		if (cstart != (PaletteIndex)line[cindex].mColorIndex)

 			break;

-		++at.mColumn;

+

+		if (prevspace != !!isspace(c))

+		{

+			if (isspace(c))

+				while (cindex < (int)line.size() && isspace(line[cindex].mChar))

+					++cindex;

+			break;

+		}

+		cindex += d;

 	}

-	return at;

+	return Coordinates(aFrom.mLine, GetCharacterColumn(aFrom.mLine, cindex));

+}

+

+int TextEditor::GetCharacterIndex(const Coordinates& aCoordinates) const

+{

+	if (aCoordinates.mLine >= mLines.size())

+		return -1;

+	auto& line = mLines[aCoordinates.mLine];

+	int c = 0;

+	int i = 0;

+	for (; i < line.size() && c < aCoordinates.mColumn;)

+	{

+		if (line[i].mChar == '\t')

+			c = (c / mTabSize) * mTabSize + mTabSize;

+		else

+			++c;

+		i += UTF8CharLength(line[i].mChar);

+	}

+	return i;

+}

+

+int TextEditor::GetCharacterColumn(int aLine, int aIndex) const

+{

+	if (aLine >= mLines.size())

+		return 0;

+	auto& line = mLines[aLine];

+	int col = 0;

+	int i = 0;

+	while (i < aIndex && i < (int)line.size())

+	{

+		auto c = line[i].mChar;

+		i += UTF8CharLength(c);

+		if (c == '\t')

+			col = (col / mTabSize) * mTabSize + mTabSize;

+		else

+			col++;

+	}

+	return col;

+}

+

+int TextEditor::GetLineCharacterCount(int aLine) const

+{

+	if (aLine >= mLines.size())

+		return 0;

+	auto& line = mLines[aLine];

+	int c = 0;

+	for (unsigned i = 0; i < line.size(); c++)

+		i += UTF8CharLength(line[i].mChar);

+	return c;

+}

+

+int TextEditor::GetLineMaxColumn(int aLine) const

+{

+	if (aLine >= mLines.size())

+		return 0;

+	auto& line = mLines[aLine];

+	int col = 0;

+	for (unsigned i = 0; i < line.size(); )

+	{

+		auto c = line[i].mChar;

+		if (c == '\t')

+			col = (col / mTabSize) * mTabSize + mTabSize;

+		else

+			col++;

+		i += UTF8CharLength(c);

+	}

+	return col;

 }

 

 bool TextEditor::IsOnWordBoundary(const Coordinates & aAt) const

@@ -330,10 +510,14 @@
 		return true;

 

 	auto& line = mLines[aAt.mLine];

-	if (aAt.mColumn >= (int)line.size())

+	auto cindex = GetCharacterIndex(aAt);

+	if (cindex >= (int)line.size())

 		return true;

 

-	return line[aAt.mColumn].mColorIndex != line[aAt.mColumn - 1].mColorIndex;

+	if (mColorizerEnabled)

+		return line[cindex].mColorIndex != line[size_t(cindex - 1)].mColorIndex;

+

+	return isspace(line[cindex].mChar) != isspace(line[cindex - 1].mChar);

 }

 

 void TextEditor::RemoveLine(int aStart, int aEnd)

@@ -429,14 +613,19 @@
 

 	std::string r;

 

-	for (auto it = start; it < end; Advance(it))

-		r.push_back(mLines[it.mLine][it.mColumn].mChar);

+	auto istart = GetCharacterIndex(start);

+	auto iend = GetCharacterIndex(end);

+

+	for (auto it = istart; it < iend; ++it)

+		r.push_back(mLines[aCoords.mLine][it].mChar);

 

 	return r;

 }

 

 ImU32 TextEditor::GetGlyphColor(const Glyph & aGlyph) const

 {

+	if (!mColorizerEnabled)

+		return mPalette[(int)PaletteIndex::Default];

 	if (aGlyph.mComment)

 		return mPalette[(int)PaletteIndex::Comment];

 	if (aGlyph.mMultiLineComment)

@@ -451,7 +640,6 @@
 		const int c3 = (((ppcolor >> 24) & 0xff) + ((color >> 24) & 0xff)) / 2;

 		return ImU32(c0 | (c1 << 8) | (c2 << 16) | (c3 << 24));

 	}

-

 	return color;

 }

 

@@ -464,7 +652,7 @@
 

 	if (ImGui::IsWindowFocused())

 	{

-		if (ImGui::IsWindowHovered() && !ImGui::IsAnyItemHovered() && !ImGui::IsAnyItemFocused())

+		if (ImGui::IsWindowHovered())

 			ImGui::SetMouseCursor(ImGuiMouseCursor_TextInput);

 		//ImGui::CaptureKeyboardFromApp(true);

 

@@ -500,7 +688,7 @@
 		else if (!IsReadOnly() && !ctrl && !shift && !alt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Delete)))

 			Delete();

 		else if (!IsReadOnly() && !ctrl && !shift && !alt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Backspace)))

-			BackSpace();

+			Backspace();

 		else if (!ctrl && !shift && !alt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Insert)))

 			mOverwrite ^= true;

 		else if (ctrl && !shift && !alt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Insert)))

@@ -521,16 +709,17 @@
 			EnterCharacter('\n', false);

 		else if (!IsReadOnly() && !ctrl && !alt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Tab)))

 			EnterCharacter('\t', shift);

-		else if (!IsReadOnly() && !ctrl && !alt)

+		

+		if (!IsReadOnly() && !io.InputQueueCharacters.empty())

 		{

 			for (int i = 0; i < io.InputQueueCharacters.Size; i++)

 			{

-				auto c = (unsigned char)io.InputQueueCharacters[i];

+				auto c = io.InputQueueCharacters[i];

 				if (c != 0)

 				{

-					if (isprint(c) || isspace(c))

+					if (c == '\t' || c == '\n' || c >= 32)

 					{

-						EnterCharacter((char)c, shift);

+						EnterCharacter(c, shift);

 					}

 				}

 			}

@@ -556,7 +745,7 @@
 			auto tripleClick = click && !doubleClick && (mLastClick != -1.0f && (t - mLastClick) < io.MouseDoubleClickTime);

 

 			/*

-				Left mouse button triple click

+			Left mouse button triple click

 			*/

 

 			if (tripleClick)

@@ -572,7 +761,7 @@
 			}

 

 			/*

-				Left mouse button double click

+			Left mouse button double click

 			*/

 

 			else if (doubleClick)

@@ -591,7 +780,7 @@
 			}

 

 			/*

-				Left mouse button click

+			Left mouse button click

 			*/

 			else if (click)

 			{

@@ -628,14 +817,14 @@
 		color.w *= ImGui::GetStyle().Alpha;

 		mPalette[i] = ImGui::ColorConvertFloat4ToU32(color);

 	}

-	

+

 	static std::string buffer;

 	assert(buffer.empty());

-	

+

 	auto contentSize = ImGui::GetWindowContentRegionMax();

 	auto drawList = ImGui::GetWindowDrawList();

 	float longest(mTextStart);

-	

+

 	if (mScrollToTop)

 	{

 		mScrollToTop = false;

@@ -666,10 +855,10 @@
 			ImVec2 textScreenPos = ImVec2(lineStartScreenPos.x + mTextStart, lineStartScreenPos.y);

 

 			auto& line = mLines[lineNo];

-			longest = std::max(mTextStart + TextDistanceToLineStart(Coordinates(lineNo, (int)line.size())), longest);

+			longest = std::max(mTextStart + TextDistanceToLineStart(Coordinates(lineNo, GetLineMaxColumn(lineNo))), longest);

 			auto columnNo = 0;

 			Coordinates lineStartCoord(lineNo, 0);

-			Coordinates lineEndCoord(lineNo, (int)line.size());

+			Coordinates lineEndCoord(lineNo, GetLineMaxColumn(lineNo));

 

 			// Draw selection for the current line

 			float sstart = -1.0f;

@@ -723,7 +912,7 @@
 

 			// Draw line number (right aligned)

 			snprintf(buf, 16, "%d  ", lineNo + 1);

-			

+

 			auto lineNoWidth = ImGui::GetFont()->CalcTextSizeA(ImGui::GetFontSize(), FLT_MAX, -1.0f, buf, nullptr, nullptr).x;

 			drawList->AddText(ImVec2(lineStartScreenPos.x + mTextStart - lineNoWidth, lineStartScreenPos.y), mPalette[(int)PaletteIndex::LineNumber], buf);

 

@@ -762,8 +951,9 @@
 			auto prevColor = line.empty() ? mPalette[(int)PaletteIndex::Default] : GetGlyphColor(line[0]);

 			ImVec2 bufferOffset;

 

-			for (auto& glyph : line)

+			for (int i = 0; i < line.size();)

 			{

+				auto& glyph = line[i];

 				auto color = GetGlyphColor(glyph);

 

 				if ((color != prevColor || glyph.mChar == '\t') && !buffer.empty())

@@ -777,9 +967,16 @@
 				prevColor = color;

 

 				if (glyph.mChar == '\t')

+				{

 					bufferOffset.x = (1.0f * fontScale + std::floor((1.0f + bufferOffset.x)) / (float(mTabSize) * spaceSize)) * (float(mTabSize) * spaceSize);

+					++i;

+				}

 				else

-					AppendBuffer(buffer, glyph.mChar, 0);

+				{

+					auto l = UTF8CharLength(glyph.mChar);

+					while (l-- > 0)

+						buffer.push_back(line[i++].mChar);

+				}

 				++columnNo;

 			}

 

@@ -842,19 +1039,24 @@
 	if( !mIgnoreImGuiChild)

 		ImGui::BeginChild(aTitle, aSize, aBorder, ImGuiWindowFlags_HorizontalScrollbar | ImGuiWindowFlags_AlwaysHorizontalScrollbar | ImGuiWindowFlags_NoMove);

 	

-	if (mHandleKeyboardInputs) {

+	if (mHandleKeyboardInputs) 

+	{

 		HandleKeyboardInputs();

 		ImGui::PushAllowKeyboardFocus(true);

 	}

 

-	if( mHandleMouseInputs)    HandleMouseInputs();

+	if( mHandleMouseInputs)    

+		HandleMouseInputs();

 

 	ColorizeInternal();

 	Render();

 

-	if (mHandleKeyboardInputs) ImGui::PopAllowKeyboardFocus();

+	if (mHandleKeyboardInputs)

+		ImGui::PopAllowKeyboardFocus();

 

-	if( !mIgnoreImGuiChild) ImGui::EndChild();

+	if(!mIgnoreImGuiChild)

+		ImGui::EndChild();

+

 	ImGui::PopStyleVar();

 	ImGui::PopStyleColor();

 

@@ -878,7 +1080,7 @@
 			mLines.back().emplace_back(Glyph(chr, PaletteIndex::Default));

 		}

 	}

-	

+

 	mTextChanged = true;

 	mScrollToTop = true;

 

@@ -919,7 +1121,7 @@
 	Colorize();

 }

 

-void TextEditor::EnterCharacter(Char aChar, bool aShift)

+void TextEditor::EnterCharacter(ImWchar aChar, bool aShift)

 {

 	assert(!mReadOnly);

 

@@ -929,23 +1131,25 @@
 

 	if (HasSelection())

 	{

-		if (aChar == '\t')

+		if (aChar == '\t' && mState.mSelectionStart.mLine != mState.mSelectionEnd.mLine)

 		{

+

 			auto start = mState.mSelectionStart;

 			auto end = mState.mSelectionEnd;

+			auto originalEnd = end;

 

 			if (start > end)

 				std::swap(start, end);

 			start.mColumn = 0;

 			//			end.mColumn = end.mLine < mLines.size() ? mLines[end.mLine].size() : 0;

 			if (end.mColumn == 0 && end.mLine > 0)

-			{

 				--end.mLine;

-				end.mColumn = (int)mLines[end.mLine].size();

-			}

+			if (end.mLine >= (int)mLines.size())

+				end.mLine = mLines.empty() ? 0 : (int) mLines.size() - 1;

+			end.mColumn = GetLineMaxColumn(end.mLine);

 

-			if (end.mColumn >= (int)mLines[end.mLine].size())

-				end.mColumn = (int)mLines[end.mLine].size() - 1;

+			//if (end.mColumn >= GetLineMaxColumn(end.mLine))

+			//	end.mColumn = GetLineMaxColumn(end.mLine) - 1;

 

 			u.mRemovedStart = start;

 			u.mRemovedEnd = end;

@@ -958,52 +1162,62 @@
 				auto& line = mLines[i];

 				if (aShift)

 				{

-					if (line.empty() == false)

+					if (!line.empty())

 					{

 						if (line.front().mChar == '\t')

 						{

 							line.erase(line.begin());

-							if (i == end.mLine && end.mColumn > 0)

-								end.mColumn--;

 							modified = true;

 						}

-					}

-					else

-					{

-						for (int j = 0; j < mTabSize && line.empty() == false && line.front().mChar == ' '; j++)

+						else

 						{

-							line.erase(line.begin());

-							if (i == end.mLine && end.mColumn > 0)

-								end.mColumn--;

-							modified = true;

+							for (int j = 0; j < mTabSize && !line.empty() && line.front().mChar == ' '; j++)

+							{

+								line.erase(line.begin());

+								modified = true;

+							}

 						}

 					}

 				}

 				else

 				{

 					line.insert(line.begin(), Glyph('\t', TextEditor::PaletteIndex::Background));

-					if (i == end.mLine)

-						++end.mColumn;

 					modified = true;

 				}

 			}

 

 			if (modified)

 			{

-				assert(mLines.size() > start.mLine && mLines[start.mLine].size() > start.mColumn);

-				assert(mLines.size() > end.mLine && mLines[end.mLine].size() > end.mColumn);

+				start = Coordinates(start.mLine, GetCharacterColumn(start.mLine, 0));

+				Coordinates rangeEnd;

+				if (originalEnd.mColumn != 0)

+				{

+					end = Coordinates(end.mLine, GetLineMaxColumn(end.mLine));

+					rangeEnd = end;

+					u.mAdded = GetText(start, end);

+				}

+				else

+				{

+					end = Coordinates(originalEnd.mLine, 0);

+					rangeEnd = Coordinates(end.mLine - 1, GetLineMaxColumn(end.mLine - 1));

+					u.mAdded = GetText(start, rangeEnd);

+				}

+

 				u.mAddedStart = start;

-				u.mAddedEnd = end;

-				u.mAdded = GetText(start, end);

+				u.mAddedEnd = rangeEnd;

+				u.mAfter = mState;

+

+				mState.mSelectionStart = start;

+				mState.mSelectionEnd = end;

+				AddUndo(u);

 

 				mTextChanged = true;

 

-				AddUndo(u);

 				EnsureCursorVisible();

 			}

 

 			return;

-		}

+		} // c == '\t'

 		else

 		{

 			u.mRemoved = GetSelectedText();

@@ -1011,7 +1225,7 @@
 			u.mRemovedEnd = mState.mSelectionEnd;

 			DeleteSelection();

 		}

-	}

+	} // HasSelection

 

 	auto coord = GetActualCursorCoordinates();

 	u.mAddedStart = coord;

@@ -1025,29 +1239,52 @@
 		auto& newLine = mLines[coord.mLine + 1];

 

 		if (mLanguageDefinition.mAutoIndentation)

-		{

-			for (size_t it = 0; it < line.size() && isblank(line[it].mChar); ++it)

+			for (size_t it = 0; it < line.size() && isascii(line[it].mChar) && isblank(line[it].mChar); ++it)

 				newLine.push_back(line[it]);

-		}

 

 		const size_t whitespaceSize = newLine.size();

-		newLine.insert(newLine.end(), line.begin() + coord.mColumn, line.end());

-		line.erase(line.begin() + coord.mColumn, line.begin() + line.size());

-		SetCursorPosition(Coordinates(coord.mLine + 1, (int)whitespaceSize));

+		auto cindex = GetCharacterIndex(coord);

+		newLine.insert(newLine.end(), line.begin() + cindex, line.end());

+		line.erase(line.begin() + cindex, line.begin() + line.size());

+		SetCursorPosition(Coordinates(coord.mLine + 1, GetCharacterColumn(coord.mLine + 1, (int)whitespaceSize)));

+		u.mAdded = (char) aChar;

 	}

 	else

 	{

-		auto& line = mLines[coord.mLine];

-		if (mOverwrite && (int)line.size() > coord.mColumn)

-			line[coord.mColumn] = Glyph(aChar, PaletteIndex::Default);

+		char buf[7];

+		int e = ImTextCharToUtf8(buf, 7, aChar);

+		if (e > 0)

+		{

+			buf[e] = '\0';

+			auto& line = mLines[coord.mLine];

+			auto cindex = GetCharacterIndex(coord);

+

+			if (mOverwrite && cindex < (int)line.size())

+			{

+				auto d = UTF8CharLength(line[cindex].mChar);

+

+				u.mRemovedStart = mState.mCursorPosition;

+				u.mRemovedEnd = Coordinates(coord.mLine, GetCharacterColumn(coord.mLine, cindex + d));

+

+				while (d-- > 0 && cindex < (int) line.size())

+				{

+					u.mRemoved += line[cindex].mChar;

+					line.erase(line.begin() + cindex);

+				}

+			}

+			

+			for (auto p = buf; *p != '\0'; p++, ++cindex)

+				line.insert(line.begin() + cindex, Glyph(*p, PaletteIndex::Default));

+			u.mAdded = buf;

+

+			SetCursorPosition(Coordinates(coord.mLine, GetCharacterColumn(coord.mLine, cindex)));

+		}

 		else

-			line.insert(line.begin() + coord.mColumn, Glyph(aChar, PaletteIndex::Default));

-		SetCursorPosition(Coordinates(coord.mLine, coord.mColumn + 1));

+			return;

 	}

 

 	mTextChanged = true;

 

-	u.mAdded = aChar;

 	u.mAddedEnd = GetActualCursorCoordinates();

 	u.mAfter = mState;

 

@@ -1062,6 +1299,11 @@
 	mReadOnly = aValue;

 }

 

+void TextEditor::SetColorizerEnable(bool aValue)

+{

+	mColorizerEnabled = aValue;

+}

+

 void TextEditor::SetCursorPosition(const Coordinates & aPosition)

 {

 	if (mState.mCursorPosition != aPosition)

@@ -1093,7 +1335,7 @@
 

 	mState.mSelectionStart = SanitizeCoordinates(aStart);

 	mState.mSelectionEnd = SanitizeCoordinates(aEnd);

-	if (aStart > aEnd)

+	if (mState.mSelectionStart > mState.mSelectionEnd)

 		std::swap(mState.mSelectionStart, mState.mSelectionEnd);

 

 	switch (aMode)

@@ -1112,7 +1354,7 @@
 		const auto lineNo = mState.mSelectionEnd.mLine;

 		const auto lineSize = (size_t)lineNo < mLines.size() ? mLines[lineNo].size() : 0;

 		mState.mSelectionStart = Coordinates(mState.mSelectionStart.mLine, 0);

-		mState.mSelectionEnd = Coordinates(lineNo, (int)lineSize);

+		mState.mSelectionEnd = Coordinates(lineNo, GetLineMaxColumn(lineNo));

 		break;

 	}

 	default:

@@ -1213,6 +1455,11 @@
 	}

 }

 

+static bool IsUTFSequence(char c)

+{

+	return (c & 0xC0) == 0x80;

+}

+

 void TextEditor::MoveLeft(int aAmount, bool aSelect, bool aWordMode)

 {

 	if (mLines.empty())

@@ -1220,25 +1467,45 @@
 

 	auto oldPos = mState.mCursorPosition;

 	mState.mCursorPosition = GetActualCursorCoordinates();

+	auto line = mState.mCursorPosition.mLine;

+	auto cindex = GetCharacterIndex(mState.mCursorPosition);

 

 	while (aAmount-- > 0)

 	{

-		if (mState.mCursorPosition.mColumn == 0)

+		if (cindex == 0)

 		{

-			if (mState.mCursorPosition.mLine > 0)

+			if (line > 0)

 			{

-				--mState.mCursorPosition.mLine;

-				mState.mCursorPosition.mColumn = (int)mLines[mState.mCursorPosition.mLine].size();

+				--line;

+				if ((int)mLines.size() > line)

+					cindex = (int)mLines[line].size();

+				else

+					cindex = 0;

 			}

 		}

 		else

 		{

-			mState.mCursorPosition.mColumn = std::max(0, mState.mCursorPosition.mColumn - 1);

-			if (aWordMode)

-				mState.mCursorPosition = FindWordStart(mState.mCursorPosition);

+			--cindex;

+			if (cindex > 0)

+			{

+				if ((int)mLines.size() > line)

+				{

+					while (cindex > 0 && IsUTFSequence(mLines[line][cindex].mChar))

+						--cindex;

+				}

+			}

+		}

+

+		mState.mCursorPosition = Coordinates(line, GetCharacterColumn(line, cindex));

+		if (aWordMode)

+		{

+			mState.mCursorPosition = FindWordStart(mState.mCursorPosition);

+			cindex = GetCharacterIndex(mState.mCursorPosition);

 		}

 	}

 

+	mState.mCursorPosition = Coordinates(line, GetCharacterColumn(line, cindex));

+

 	assert(mState.mCursorPosition.mColumn >= 0);

 	if (aSelect)

 	{

@@ -1268,10 +1535,10 @@
 

 	while (aAmount-- > 0)

 	{

-		auto& line = mLines[mState.mCursorPosition.mLine];

-		if (mState.mCursorPosition.mColumn >= (int)line.size())

+		auto chars = GetLineMaxColumn(mState.mCursorPosition.mLine);

+		if (mState.mCursorPosition.mColumn >= chars)

 		{

-			if (mState.mCursorPosition.mLine < (int)mLines.size() - 1)

+			if (mState.mCursorPosition.mLine < mLines.size() - 1)

 			{

 				mState.mCursorPosition.mLine = std::max(0, std::min((int)mLines.size() - 1, mState.mCursorPosition.mLine + 1));

 				mState.mCursorPosition.mColumn = 0;

@@ -1279,7 +1546,7 @@
 		}

 		else

 		{

-			mState.mCursorPosition.mColumn = std::max(0, std::min((int)line.size(), mState.mCursorPosition.mColumn + 1));

+			mState.mCursorPosition.mColumn = std::max(0, std::min(chars, mState.mCursorPosition.mColumn + 1));

 			if (aWordMode)

 				mState.mCursorPosition = FindWordEnd(mState.mCursorPosition);

 		}

@@ -1365,7 +1632,7 @@
 void TextEditor::MoveEnd(bool aSelect)

 {

 	auto oldPos = mState.mCursorPosition;

-	SetCursorPosition(Coordinates(mState.mCursorPosition.mLine, (int)mLines[oldPos.mLine].size()));

+	SetCursorPosition(Coordinates(mState.mCursorPosition.mLine, GetLineMaxColumn(oldPos.mLine)));

 

 	if (mState.mCursorPosition != oldPos)

 	{

@@ -1411,7 +1678,7 @@
 		SetCursorPosition(pos);

 		auto& line = mLines[pos.mLine];

 

-		if (pos.mColumn == (int)line.size())

+		if (pos.mColumn == GetLineMaxColumn(pos.mLine))

 		{

 			if (pos.mLine == (int)mLines.size() - 1)

 				return;

@@ -1426,11 +1693,14 @@
 		}

 		else

 		{

-			u.mRemoved = line[pos.mColumn].mChar;

+			auto cindex = GetCharacterIndex(pos);

 			u.mRemovedStart = u.mRemovedEnd = GetActualCursorCoordinates();

 			u.mRemovedEnd.mColumn++;

+			u.mRemoved = GetText(u.mRemovedStart, u.mRemovedEnd);

 

-			line.erase(line.begin() + pos.mColumn);

+			auto d = UTF8CharLength(line[cindex].mChar);

+			while (d-- > 0 && cindex < (int)line.size())

+				line.erase(line.begin() + cindex);

 		}

 

 		mTextChanged = true;

@@ -1442,7 +1712,7 @@
 	AddUndo(u);

 }

 

-void TextEditor::BackSpace()

+void TextEditor::Backspace()

 {

 	assert(!mReadOnly);

 

@@ -1471,12 +1741,12 @@
 				return;

 

 			u.mRemoved = '\n';

-			u.mRemovedStart = u.mRemovedEnd = Coordinates(pos.mLine - 1, (int)mLines[pos.mLine - 1].size());

+			u.mRemovedStart = u.mRemovedEnd = Coordinates(pos.mLine - 1, GetLineMaxColumn(pos.mLine - 1));

 			Advance(u.mRemovedEnd);

 

 			auto& line = mLines[mState.mCursorPosition.mLine];

 			auto& prevLine = mLines[mState.mCursorPosition.mLine - 1];

-			auto prevSize = (int)prevLine.size();

+			auto prevSize = GetLineMaxColumn(mState.mCursorPosition.mLine - 1);

 			prevLine.insert(prevLine.end(), line.begin(), line.end());

 

 			ErrorMarkers etmp;

@@ -1491,14 +1761,23 @@
 		else

 		{

 			auto& line = mLines[mState.mCursorPosition.mLine];

+			auto cindex = GetCharacterIndex(pos) - 1;

+			auto cend = cindex + 1;

+			while (cindex > 0 && IsUTFSequence(line[cindex].mChar))

+				--cindex;

 

-			u.mRemoved = line[pos.mColumn - 1].mChar;

+			//if (cindex > 0 && UTF8CharLength(line[cindex].mChar) > 1)

+			//	--cindex;

+

 			u.mRemovedStart = u.mRemovedEnd = GetActualCursorCoordinates();

 			--u.mRemovedStart.mColumn;

-

 			--mState.mCursorPosition.mColumn;

-			if (mState.mCursorPosition.mColumn < (int)line.size())

-				line.erase(line.begin() + mState.mCursorPosition.mColumn);

+

+			while (cindex < line.size() && cend-- > cindex)

+			{

+				u.mRemoved += line[cindex].mChar;

+				line.erase(line.begin() + cindex);

+			}

 		}

 

 		mTextChanged = true;

@@ -1623,84 +1902,84 @@
 const TextEditor::Palette & TextEditor::GetDarkPalette()

 {

 	const static Palette p = { {

-		0xff7f7f7f,	// Default

-		0xffd69c56,	// Keyword	

-		0xff00ff00,	// Number

-		0xff7070e0,	// String

-		0xff70a0e0, // Char literal

-		0xffffffff, // Punctuation

-		0xff408080,	// Preprocessor

-		0xffaaaaaa, // Identifier

-		0xff9bc64d, // Known identifier

-		0xffc040a0, // Preproc identifier

-		0xff206020, // Comment (single line)

-		0xff406020, // Comment (multi line)

-		0xff101010, // Background

-		0xffe0e0e0, // Cursor

-		0x80a06020, // Selection

-		0x800020ff, // ErrorMarker

-		0x40f08000, // Breakpoint

-		0xff707000, // Line number

-		0x40000000, // Current line fill

-		0x40808080, // Current line fill (inactive)

-		0x40a0a0a0, // Current line edge

-	} };

+			0xff7f7f7f,	// Default

+			0xffd69c56,	// Keyword	

+			0xff00ff00,	// Number

+			0xff7070e0,	// String

+			0xff70a0e0, // Char literal

+			0xffffffff, // Punctuation

+			0xff408080,	// Preprocessor

+			0xffaaaaaa, // Identifier

+			0xff9bc64d, // Known identifier

+			0xffc040a0, // Preproc identifier

+			0xff206020, // Comment (single line)

+			0xff406020, // Comment (multi line)

+			0xff101010, // Background

+			0xffe0e0e0, // Cursor

+			0x80a06020, // Selection

+			0x800020ff, // ErrorMarker

+			0x40f08000, // Breakpoint

+			0xff707000, // Line number

+			0x40000000, // Current line fill

+			0x40808080, // Current line fill (inactive)

+			0x40a0a0a0, // Current line edge

+		} };

 	return p;

 }

 

 const TextEditor::Palette & TextEditor::GetLightPalette()

 {

 	const static Palette p = { {

-		0xff7f7f7f,	// None

-		0xffff0c06,	// Keyword	

-		0xff008000,	// Number

-		0xff2020a0,	// String

-		0xff304070, // Char literal

-		0xff000000, // Punctuation

-		0xff406060,	// Preprocessor

-		0xff404040, // Identifier

-		0xff606010, // Known identifier

-		0xffc040a0, // Preproc identifier

-		0xff205020, // Comment (single line)

-		0xff405020, // Comment (multi line)

-		0xffffffff, // Background

-		0xff000000, // Cursor

-		0x80600000, // Selection

-		0xa00010ff, // ErrorMarker

-		0x80f08000, // Breakpoint

-		0xff505000, // Line number

-		0x40000000, // Current line fill

-		0x40808080, // Current line fill (inactive)

-		0x40000000, // Current line edge

-	} };

+			0xff7f7f7f,	// None

+			0xffff0c06,	// Keyword	

+			0xff008000,	// Number

+			0xff2020a0,	// String

+			0xff304070, // Char literal

+			0xff000000, // Punctuation

+			0xff406060,	// Preprocessor

+			0xff404040, // Identifier

+			0xff606010, // Known identifier

+			0xffc040a0, // Preproc identifier

+			0xff205020, // Comment (single line)

+			0xff405020, // Comment (multi line)

+			0xffffffff, // Background

+			0xff000000, // Cursor

+			0x80600000, // Selection

+			0xa00010ff, // ErrorMarker

+			0x80f08000, // Breakpoint

+			0xff505000, // Line number

+			0x40000000, // Current line fill

+			0x40808080, // Current line fill (inactive)

+			0x40000000, // Current line edge

+		} };

 	return p;

 }

 

 const TextEditor::Palette & TextEditor::GetRetroBluePalette()

 {

 	const static Palette p = { {

-		0xff00ffff,	// None

-		0xffffff00,	// Keyword	

-		0xff00ff00,	// Number

-		0xff808000,	// String

-		0xff808000, // Char literal

-		0xffffffff, // Punctuation

-		0xff008000,	// Preprocessor

-		0xff00ffff, // Identifier

-		0xffffffff, // Known identifier

-		0xffff00ff, // Preproc identifier

-		0xff808080, // Comment (single line)

-		0xff404040, // Comment (multi line)

-		0xff800000, // Background

-		0xff0080ff, // Cursor

-		0x80ffff00, // Selection

-		0xa00000ff, // ErrorMarker

-		0x80ff8000, // Breakpoint

-		0xff808000, // Line number

-		0x40000000, // Current line fill

-		0x40808080, // Current line fill (inactive)

-		0x40000000, // Current line edge

-	} };

+			0xff00ffff,	// None

+			0xffffff00,	// Keyword	

+			0xff00ff00,	// Number

+			0xff808000,	// String

+			0xff808000, // Char literal

+			0xffffffff, // Punctuation

+			0xff008000,	// Preprocessor

+			0xff00ffff, // Identifier

+			0xffffffff, // Known identifier

+			0xffff00ff, // Preproc identifier

+			0xff808080, // Comment (single line)

+			0xff404040, // Comment (multi line)

+			0xff800000, // Background

+			0xff0080ff, // Cursor

+			0x80ffff00, // Selection

+			0xa00000ff, // ErrorMarker

+			0x80ff8000, // Breakpoint

+			0xff808000, // Line number

+			0x40000000, // Current line fill

+			0x40808080, // Current line fill (inactive)

+			0x40000000, // Current line edge

+		} };

 	return p;

 }

 

@@ -1738,8 +2017,10 @@
 

 std::string TextEditor::GetCurrentLineText()const

 {

-	auto lineLength = (int)mLines[mState.mCursorPosition.mLine].size();

-	return GetText(Coordinates(mState.mCursorPosition.mLine, 0), Coordinates(mState.mCursorPosition.mLine, lineLength));

+	auto lineLength = GetLineMaxColumn(mState.mCursorPosition.mLine);

+	return GetText(

+		Coordinates(mState.mCursorPosition.mLine, 0),

+		Coordinates(mState.mCursorPosition.mLine, lineLength));

 }

 

 void TextEditor::ProcessInputs()

@@ -1803,7 +2084,7 @@
 			if (hasTokenizeResult == false)

 			{

 				// todo : remove

-					//printf("using regex for %.*s\n", first + 10 < last ? 10 : int(last - first), first);

+				//printf("using regex for %.*s\n", first + 10 < last ? 10 : int(last - first), first);

 

 				for (auto& p : mRegexList)

 				{

@@ -1863,24 +2144,27 @@
 

 void TextEditor::ColorizeInternal()

 {

-	if (mLines.empty())

+	if (mLines.empty() || !mColorizerEnabled)

 		return;

 

 	if (mCheckComments)

 	{

-		auto end = Coordinates((int)mLines.size(), 0);

-		auto commentStart = end;

+		auto endLine = mLines.size();

+		auto endIndex = 0;

+		auto commentStartLine = endLine;

+		auto commentStartIndex = endIndex;

 		auto withinString = false;

 		auto withinSingleLineComment = false;

 		auto withinPreproc = false;

 		auto firstChar = true;			// there is no other non-whitespace characters in the line before

 		auto concatenate = false;		// '\' on the very end of the line

-

-		for (auto currentCoord = Coordinates(0, 0); currentCoord < end; Advance(currentCoord))

+		auto currentLine = 0;

+		auto currentIndex = 0;

+		while (currentLine < endLine || currentIndex < endIndex)

 		{

-			auto& line = mLines[currentCoord.mLine];

+			auto& line = mLines[currentLine];

 

-			if (currentCoord.mColumn == 0 && !concatenate)

+			if (currentIndex == 0 && !concatenate)

 			{

 				withinSingleLineComment = false;

 				withinPreproc = false;

@@ -1891,37 +2175,37 @@
 

 			if (!line.empty())

 			{

-				auto& g = line[currentCoord.mColumn];

+				auto& g = line[currentIndex];

 				auto c = g.mChar;

 

 				if (c != mLanguageDefinition.mPreprocChar && !isspace(c))

 					firstChar = false;

 

-				if (currentCoord.mColumn == line.size() - 1 && line[line.size() - 1].mChar == '\\')

+				if (currentIndex == (int)line.size() - 1 && line[line.size() - 1].mChar == '\\')

 					concatenate = true;

 

-				bool inComment = commentStart <= currentCoord;

+				bool inComment = (commentStartLine < currentLine || (commentStartLine == currentLine && commentStartIndex <= currentIndex));

 

 				if (withinString)

 				{

-					line[currentCoord.mColumn].mMultiLineComment = inComment;

+					line[currentIndex].mMultiLineComment = inComment;

 

 					if (c == '\"')

 					{

-						if (currentCoord.mColumn + 1 < (int)line.size() && line[currentCoord.mColumn + 1].mChar == '\"')

+						if (currentIndex + 1 < (int)line.size() && line[currentIndex + 1].mChar == '\"')

 						{

-							Advance(currentCoord);

-							if (currentCoord.mColumn < (int)line.size())

-								line[currentCoord.mColumn].mMultiLineComment = inComment;

+							currentIndex += 1;

+							if (currentIndex < (int)line.size())

+								line[currentIndex].mMultiLineComment = inComment;

 						}

 						else

 							withinString = false;

 					}

 					else if (c == '\\')

 					{

-						Advance(currentCoord);

-						if (currentCoord.mColumn < (int)line.size())

-							line[currentCoord.mColumn].mMultiLineComment = inComment;

+						currentIndex += 1;

+						if (currentIndex < (int)line.size())

+							line[currentIndex].mMultiLineComment = inComment;

 					}

 				}

 				else

@@ -1932,34 +2216,54 @@
 					if (c == '\"')

 					{

 						withinString = true;

-						line[currentCoord.mColumn].mMultiLineComment = inComment;

+						line[currentIndex].mMultiLineComment = inComment;

 					}

 					else

 					{

 						auto pred = [](const char& a, const Glyph& b) { return a == b.mChar; };

-						auto from = line.begin() + currentCoord.mColumn;

+						auto from = line.begin() + currentIndex;

 						auto& startStr = mLanguageDefinition.mCommentStart;

 						auto& singleStartStr = mLanguageDefinition.mSingleLineComment;

+

 						if (singleStartStr.size() > 0 &&

-							currentCoord.mColumn + singleStartStr.size() <= line.size() &&

+							currentIndex + singleStartStr.size() <= line.size() &&

 							equals(singleStartStr.begin(), singleStartStr.end(), from, from + singleStartStr.size(), pred))

+						{

 							withinSingleLineComment = true;

-						else if (!withinSingleLineComment && currentCoord.mColumn + startStr.size() <= line.size() &&

+						}

+						else if (!withinSingleLineComment && currentIndex + startStr.size() <= line.size() &&

 							equals(startStr.begin(), startStr.end(), from, from + startStr.size(), pred))

-							commentStart = currentCoord;

+						{

+							commentStartLine = currentLine;

+							commentStartIndex = currentIndex;

+						}

 

-						inComment = commentStart <= currentCoord;

+						inComment = inComment = (commentStartLine < currentLine || (commentStartLine == currentLine && commentStartIndex <= currentIndex));

 

-						line[currentCoord.mColumn].mMultiLineComment = inComment;

-						line[currentCoord.mColumn].mComment = withinSingleLineComment;

+						line[currentIndex].mMultiLineComment = inComment;

+						line[currentIndex].mComment = withinSingleLineComment;

 

 						auto& endStr = mLanguageDefinition.mCommentEnd;

-						if (currentCoord.mColumn + 1 >= (int)endStr.size() &&

+						if (currentIndex + 1 >= (int)endStr.size() &&

 							equals(endStr.begin(), endStr.end(), from + 1 - endStr.size(), from + 1, pred))

-							commentStart = end;

+						{

+							commentStartIndex = endIndex;

+							commentStartLine = endLine;

+						}

 					}

 				}

-				line[currentCoord.mColumn].mPreprocessor = withinPreproc;

+				line[currentIndex].mPreprocessor = withinPreproc;

+				currentIndex += UTF8CharLength(c);

+				if (currentIndex >= (int)line.size())

+				{

+					currentIndex = 0;

+					++currentLine;

+				}

+			}

+			else

+			{

+				currentIndex = 0;

+				++currentLine;

 			}

 		}

 		mCheckComments = false;

@@ -1987,17 +2291,23 @@
 	float distance = 0.0f;

 	auto fontScale = ImGui::GetFontSize() / ImGui::GetFont()->FontSize;

 	float spaceSize = ImGui::GetFont()->CalcTextSizeA(ImGui::GetFontSize(), FLT_MAX, -1.0f, " ", nullptr, nullptr).x;

-	for (size_t it = 0u; it < line.size() && it < (unsigned)aFrom.mColumn; ++it)

+	int colIndex = GetCharacterIndex(aFrom);

+	for (size_t it = 0u; it < line.size() && it < colIndex; )

 	{

 		if (line[it].mChar == '\t')

 		{

 			distance = (1.0f * fontScale + std::floor((1.0f + distance)) / (float(mTabSize) * spaceSize)) * (float(mTabSize) * spaceSize);

+			++it;

 		}

 		else

 		{

-			char tempCString[2];

-			tempCString[0] = line[it].mChar;

-			tempCString[1] = '\0';

+			auto d = UTF8CharLength(line[it].mChar);

+			char tempCString[7];

+			int i = 0;

+			for (; i < 6 && d-- > 0 && it < (int) line.size(); i++, it++)

+				tempCString[i] = line[it].mChar;

+

+			tempCString[i] = '\0';

 			distance += ImGui::GetFont()->CalcTextSizeA(ImGui::GetFontSize(), FLT_MAX, -1.0f, tempCString, nullptr, nullptr).x;

 		}

 	}

@@ -2354,7 +2664,7 @@
 		{

 			paletteIndex = PaletteIndex::Max;

 

-			while (in_begin < in_end && isblank(*in_begin))

+			while (in_begin < in_end && isascii(*in_begin) && isblank(*in_begin))

 				in_begin++;

 

 			if (in_begin == in_end)

@@ -2541,7 +2851,7 @@
 		{

 			paletteIndex = PaletteIndex::Max;

 

-			while (in_begin < in_end && isblank(*in_begin))

+			while (in_begin < in_end && isascii(*in_begin) && isblank(*in_begin))

 				in_begin++;

 

 			if (in_begin == in_end)

@@ -2712,10 +3022,10 @@
 			"getupvalue", "upvaluejoin", "upvalueid", "setuservalue", "sethook", "setlocal", "setmetatable", "setupvalue", "traceback", "close", "flush", "input", "lines", "open", "output", "popen",

 			"read", "tmpfile", "type", "write", "close", "flush", "lines", "read", "seek", "setvbuf", "write", "__gc", "__tostring", "abs", "acos", "asin", "atan", "ceil", "cos", "deg", "exp", "tointeger",

 			"floor", "fmod", "ult", "log", "max", "min", "modf", "rad", "random", "randomseed", "sin", "sqrt", "string", "tan", "type", "atan2", "cosh", "sinh", "tanh",

-			 "pow", "frexp", "ldexp", "log10", "pi", "huge", "maxinteger", "mininteger", "loadlib", "searchpath", "seeall", "preload", "cpath", "path", "searchers", "loaded", "module", "require", "clock",

-			 "date", "difftime", "execute", "exit", "getenv", "remove", "rename", "setlocale", "time", "tmpname", "byte", "char", "dump", "find", "format", "gmatch", "gsub", "len", "lower", "match", "rep",

-			 "reverse", "sub", "upper", "pack", "packsize", "unpack", "concat", "maxn", "insert", "pack", "unpack", "remove", "move", "sort", "offset", "codepoint", "char", "len", "codes", "charpattern",

-			 "coroutine", "table", "io", "os", "string", "utf8", "bit32", "math", "debug", "package"

+			"pow", "frexp", "ldexp", "log10", "pi", "huge", "maxinteger", "mininteger", "loadlib", "searchpath", "seeall", "preload", "cpath", "path", "searchers", "loaded", "module", "require", "clock",

+			"date", "difftime", "execute", "exit", "getenv", "remove", "rename", "setlocale", "time", "tmpname", "byte", "char", "dump", "find", "format", "gmatch", "gsub", "len", "lower", "match", "rep",

+			"reverse", "sub", "upper", "pack", "packsize", "unpack", "concat", "maxn", "insert", "pack", "unpack", "remove", "move", "sort", "offset", "codepoint", "char", "len", "codes", "charpattern",

+			"coroutine", "table", "io", "os", "string", "utf8", "bit32", "math", "debug", "package"

 		};

 		for (auto& k : identifiers)

 		{

diff --git a/TextEditor.h b/TextEditor.h
index cfc68d5..9ae54af 100644
--- a/TextEditor.h
+++ b/TextEditor.h
@@ -57,7 +57,14 @@
 			, mEnabled(false)

 		{}

 	};

-	

+

+	// Represents a character coordinate from the user's point of view,

+	// i. e. consider an uniform grid (assuming fixed-width font) on the

+	// screen as it is rendered, and each cell has its own coordinate, starting from 0.

+	// Tabs are counted as [1..mTabSize] count empty spaces, depending on

+	// how many space is necessary to reach the next tab stop.

+	// For example, coordinate (1, 5) represents the character 'B' in a line "\tABC", when mTabSize = 4,

+	// because it is rendered as "    ABC" on the screen.

 	struct Coordinates

 	{

 		int mLine, mColumn;

@@ -124,8 +131,8 @@
 	typedef std::map<int, std::string> ErrorMarkers;

 	typedef std::unordered_set<int> Breakpoints;

 	typedef std::array<ImU32, (unsigned)PaletteIndex::Max> Palette;

-	typedef char Char;

-	

+	typedef uint8_t Char;

+

 	struct Glyph

 	{

 		Char mChar;

@@ -134,7 +141,7 @@
 		bool mMultiLineComment : 1;

 		bool mPreprocessor : 1;

 

-		Glyph(Char aChar, PaletteIndex aColorIndex) : mChar(aChar), mColorIndex(aColorIndex), 

+		Glyph(Char aChar, PaletteIndex aColorIndex) : mChar(aChar), mColorIndex(aColorIndex),

 			mComment(false), mMultiLineComment(false), mPreprocessor(false) {}

 	};

 

@@ -145,7 +152,7 @@
 	{

 		typedef std::pair<std::string, PaletteIndex> TokenRegexString;

 		typedef std::vector<TokenRegexString> TokenRegexStrings;

-		typedef bool (*TokenizeCallback)(const char * in_begin, const char * in_end, const char *& out_begin, const char *& out_end, PaletteIndex & paletteIndex);

+		typedef bool(*TokenizeCallback)(const char * in_begin, const char * in_end, const char *& out_begin, const char *& out_end, PaletteIndex & paletteIndex);

 

 		std::string mName;

 		Keywords mKeywords;

@@ -160,12 +167,12 @@
 		TokenRegexStrings mTokenRegexStrings;

 

 		bool mCaseSensitive;

-		

+

 		LanguageDefinition()

 			: mPreprocChar('#'), mAutoIndentation(true), mTokenize(nullptr), mCaseSensitive(true)

 		{

 		}

-		

+

 		static const LanguageDefinition& CPlusPlus();

 		static const LanguageDefinition& HLSL();

 		static const LanguageDefinition& GLSL();

@@ -194,7 +201,7 @@
 	std::vector<std::string> GetTextLines() const;

 	std::string GetSelectedText() const;

 	std::string GetCurrentLineText()const;

-	

+

 	int GetTotalLines() const { return (int)mLines.size(); }

 	bool IsOverwrite() const { return mOverwrite; }

 

@@ -203,12 +210,20 @@
 	bool IsTextChanged() const { return mTextChanged; }

 	bool IsCursorPositionChanged() const { return mCursorPositionChanged; }

 

+	bool IsColorizerEnabled() const { return mColorizerEnabled; }

+	void SetColorizerEnable(bool aValue);

+

 	Coordinates GetCursorPosition() const { return GetActualCursorCoordinates(); }

 	void SetCursorPosition(const Coordinates& aPosition);

 

-	void SetHandleMouseInputs    (bool aValue){ mHandleMouseInputs    = aValue;}

-	void SetHandleKeyboardInputs (bool aValue){ mHandleKeyboardInputs = aValue;}

-	void SetImGuiChildIgnored    (bool aValue){ mIgnoreImGuiChild     = aValue;}

+	inline void SetHandleMouseInputs    (bool aValue){ mHandleMouseInputs    = aValue;}

+	inline bool IsHandleMouseInputsEnabled() const { return mHandleKeyboardInputs; }

+

+	inline void SetHandleKeyboardInputs (bool aValue){ mHandleKeyboardInputs = aValue;}

+	inline bool IsHandleKeyboardInputsEnabled() const { return mHandleKeyboardInputs; }

+

+	inline void SetImGuiChildIgnored    (bool aValue){ mIgnoreImGuiChild     = aValue;}

+	inline bool IsImGuiChildIgnored() const { return mIgnoreImGuiChild; }

 

 	void InsertText(const std::string& aValue);

 	void InsertText(const char* aValue);

@@ -261,14 +276,14 @@
 

 		UndoRecord(

 			const std::string& aAdded,

-			const TextEditor::Coordinates aAddedStart, 

-			const TextEditor::Coordinates aAddedEnd, 

-			

-			const std::string& aRemoved, 

+			const TextEditor::Coordinates aAddedStart,

+			const TextEditor::Coordinates aAddedEnd,

+

+			const std::string& aRemoved,

 			const TextEditor::Coordinates aRemovedStart,

 			const TextEditor::Coordinates aRemovedEnd,

-			

-			TextEditor::EditorState& aBefore, 

+

+			TextEditor::EditorState& aBefore,

 			TextEditor::EditorState& aAfter);

 

 		void Undo(TextEditor* aEditor);

@@ -295,7 +310,6 @@
 	float TextDistanceToLineStart(const Coordinates& aFrom) const;

 	void EnsureCursorVisible();

 	int GetPageSize() const;

-	int AppendBuffer(std::string& aBuffer, char chr, int aIndex);

 	std::string GetText(const Coordinates& aStart, const Coordinates& aEnd) const;

 	Coordinates GetActualCursorCoordinates() const;

 	Coordinates SanitizeCoordinates(const Coordinates& aValue) const;

@@ -306,12 +320,16 @@
 	Coordinates ScreenPosToCoordinates(const ImVec2& aPosition) const;

 	Coordinates FindWordStart(const Coordinates& aFrom) const;

 	Coordinates FindWordEnd(const Coordinates& aFrom) const;

+	int GetCharacterIndex(const Coordinates& aCoordinates) const;

+	int GetCharacterColumn(int aLine, int aIndex) const;

+	int GetLineCharacterCount(int aLine) const;

+	int GetLineMaxColumn(int aLine) const;

 	bool IsOnWordBoundary(const Coordinates& aAt) const;

 	void RemoveLine(int aStart, int aEnd);

 	void RemoveLine(int aIndex);

 	Line& InsertLine(int aIndex);

-	void EnterCharacter(Char aChar, bool aShift);

-	void BackSpace();

+	void EnterCharacter(ImWchar aChar, bool aShift);

+	void Backspace();

 	void DeleteSelection();

 	std::string GetWordUnderCursor() const;

 	std::string GetWordAt(const Coordinates& aCoords) const;

@@ -326,23 +344,24 @@
 	EditorState mState;

 	UndoBuffer mUndoBuffer;

 	int mUndoIndex;

-	

+

 	int mTabSize;

 	bool mOverwrite;

 	bool mReadOnly;

 	bool mWithinRender;

-	bool mScrollToCursor;
+	bool mScrollToCursor;

 	bool mScrollToTop;

 	bool mTextChanged;

+	bool mColorizerEnabled;

 	float  mTextStart;                   // position (in pixels) where a code line starts relative to the left of the TextEditor.

 	int  mLeftMargin;

 	bool mCursorPositionChanged;

 	int mColorRangeMin, mColorRangeMax;

 	SelectionMode mSelectionMode;

-	bool mHandleKeyboardInputs;
-	bool mHandleMouseInputs;
-	bool mIgnoreImGuiChild;
-
+	bool mHandleKeyboardInputs;

+	bool mHandleMouseInputs;

+	bool mIgnoreImGuiChild;

+

 	Palette mPaletteBase;

 	Palette mPalette;

 	LanguageDefinition mLanguageDefinition;

@@ -353,6 +372,6 @@
 	ErrorMarkers mErrorMarkers;

 	ImVec2 mCharAdvance;

 	Coordinates mInteractiveStart, mInteractiveEnd;

-	

+

 	float mLastClick;

 };