Unicode: UTF32 support improvements (#2541, #2538, #2815)
- Make ImWchar32 unsigned.
- Fix Win32 version of ImFileOpen by including windows.h sooner.
- Make ImGuiIO::AddInputCharacterUTF16() more robust by disallowing illegal
surrogate pairs.
- Allow pushing higher plane codepoints through ImGuiIO::AddInputCharacter().
- Minor cleaning up in the high-plane Unicode support.
- Fix Clang -Wunreachable-code warning
diff --git a/imgui.cpp b/imgui.cpp
index 9e7ac21..8b1bda5 100644
--- a/imgui.cpp
+++ b/imgui.cpp
@@ -1094,30 +1094,33 @@
// - on Windows you can get those using ToAscii+keyboard state, or via the WM_CHAR message
void ImGuiIO::AddInputCharacter(unsigned int c)
{
- if (c > 0 && c <= IM_UNICODE_CODEPOINT_MAX)
- InputQueueCharacters.push_back((ImWchar)c);
+ InputQueueCharacters.push_back(c > 0 && c <= IM_UNICODE_CODEPOINT_MAX ? (ImWchar)c : IM_UNICODE_CODEPOINT_INVALID);
}
-// UTF16 string use Surrogate to encode unicode > 0x10000, so we should save the Surrogate.
+// UTF16 strings use surrogate pairs to encode codepoints >= 0x10000, so
+// we should save the high surrogate.
void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c)
{
- if (c >= 0xD800 && c <= 0xDBFF)
+ if ((c & 0xFC00) == 0xD800) // High surrogate, must save
{
- Surrogate = c;
+ if (InputQueueSurrogate != 0)
+ InputQueueCharacters.push_back(0xFFFD);
+ InputQueueSurrogate = c;
+ return;
}
- else
+
+ ImWchar cp = c;
+ if (InputQueueSurrogate != 0)
{
- ImWchar cp = c;
- if (c >= 0xDC00 && c <= 0xDFFF)
- {
- if (sizeof(ImWchar) == 2)
- cp = IM_UNICODE_CODEPOINT_INVALID;
- else
- cp = ((ImWchar)(Surrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000;
- Surrogate = 0;
- }
- InputQueueCharacters.push_back(cp);
+ if ((c & 0xFC00) != 0xDC00) // Invalid low surrogate
+ InputQueueCharacters.push_back(IM_UNICODE_CODEPOINT_INVALID);
+ else if (IM_UNICODE_CODEPOINT_MAX == (0xFFFF)) // Codepoint will not fit in ImWchar (extra parenthesis around 0xFFFF somehow fixes -Wunreachable-code with Clang)
+ cp = IM_UNICODE_CODEPOINT_INVALID;
+ else
+ cp = (ImWchar)(((InputQueueSurrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000);
+ InputQueueSurrogate = 0;
}
+ InputQueueCharacters.push_back(cp);
}
void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars)
@@ -1506,6 +1509,18 @@
// Default file functions
#ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS
+
+#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__)
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef __MINGW32__
+#include <Windows.h>
+#else
+#include <windows.h>
+#endif
+#endif
+
ImFileHandle ImFileOpen(const char* filename, const char* mode)
{
#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__)
@@ -1514,9 +1529,9 @@
const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0);
ImVector<ImWchar> buf;
buf.resize(filename_wsize + mode_wsize);
- ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, &buf[0], filename_wsize);
- ::MultiByteToWideChar(CP_UTF8, 0, mode, -1,&buf[filename_wsize], mode_wsize);
- return _wfopen(&buf[0], &buf[filename_wsize]);
+ ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, (wchar_t*)&buf[0], filename_wsize);
+ ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, (wchar_t*)&buf[filename_wsize], mode_wsize);
+ return _wfopen((const wchar_t*)&buf[0], (const wchar_t*)&buf[filename_wsize]);
#else
return fopen(filename, mode);
#endif
@@ -1628,8 +1643,8 @@
c += (*str++ & 0x3f);
// utf-8 encodings of values used in surrogate pairs are invalid
if ((c & 0xFFFFF800) == 0xD800) return 4;
- // If ImWchar is 16bit, use replacement character U+FFFD instead
- if (sizeof(ImWchar) == 2 && c >= 0x10000) c = IM_UNICODE_CODEPOINT_INVALID;
+ // If codepoint does not fit in ImWchar, use replacement character U+FFFD instead
+ if (c > IM_UNICODE_CODEPOINT_MAX) c = IM_UNICODE_CODEPOINT_INVALID;
*out_char = c;
return 4;
}
diff --git a/imgui.h b/imgui.h
index 09160d1..cbed101 100644
--- a/imgui.h
+++ b/imgui.h
@@ -92,7 +92,7 @@
#else
#define IM_OFFSETOF(_TYPE,_MEMBER) ((size_t)&(((_TYPE*)0)->_MEMBER)) // Offset of _MEMBER within _TYPE. Old style macro.
#endif
-#define IM_UNICODE_CODEPOINT_MAX 0xFFFF // Last Unicode code point supported by this build.
+#define IM_UNICODE_CODEPOINT_MAX (sizeof(ImWchar) == 2 ? 0xFFFF : 0x10FFFF) // Last Unicode code point supported by this build.
#define IM_UNICODE_CODEPOINT_INVALID 0xFFFD // Standard invalid Unicode code point.
// Warnings
@@ -147,7 +147,7 @@
#define ImWchar ImWchar16
#endif
typedef unsigned short ImWchar16; // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings.
-typedef int ImWchar32; // A single 32bit character for keyboard input/display, define ImWchar to ImWchar32 to use it. See imconfig.h .
+typedef unsigned int ImWchar32; // A single U32 character for keyboard input/display. Define ImWchar to ImWchar32 to use it. See imconfig.h .
typedef int ImGuiCol; // -> enum ImGuiCol_ // Enum: A color identifier for styling
typedef int ImGuiCond; // -> enum ImGuiCond_ // Enum: A condition for many Set*() functions
typedef int ImGuiDataType; // -> enum ImGuiDataType_ // Enum: A primary data type
@@ -1512,7 +1512,7 @@
float KeysDownDurationPrev[512]; // Previous duration the key has been down
float NavInputsDownDuration[ImGuiNavInput_COUNT];
float NavInputsDownDurationPrev[ImGuiNavInput_COUNT];
- ImWchar16 Surrogate; // For AddInputCharacterUTF16
+ ImWchar16 InputQueueSurrogate; // For AddInputCharacterUTF16
ImVector<ImWchar> InputQueueCharacters; // Queue of _characters_ input (obtained by platform back-end). Fill using AddInputCharacter() helper.
IMGUI_API ImGuiIO();
@@ -2097,15 +2097,11 @@
{
ImVector<ImU32> UsedChars; // Store 1-bit per Unicode code point (0=unused, 1=used)
- ImFontGlyphRangesBuilder() { Clear(); }
- inline void Clear()
- {
- int MaxUnicode = sizeof(ImWchar) == 2 ? 0x10000 : 0x110000;
- UsedChars.resize(MaxUnicode / sizeof(int)); memset(UsedChars.Data, 0, MaxUnicode / sizeof(int));
- }
- inline bool GetBit(int n) const { int off = (n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array
- inline void SetBit(int n) { int off = (n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array
- inline void AddChar(ImWchar c) { SetBit(c); } // Add character
+ ImFontGlyphRangesBuilder() { Clear(); }
+ inline void Clear() { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX + 1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); }
+ inline bool GetBit(size_t n) const { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array
+ inline void SetBit(size_t n) { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array
+ inline void AddChar(ImWchar c) { SetBit(c); } // Add character
IMGUI_API void AddText(const char* text, const char* text_end = NULL); // Add string (each character of the UTF-8 string are added)
IMGUI_API void AddRanges(const ImWchar* ranges); // Add ranges, e.g. builder.AddRanges(ImFontAtlas::GetGlyphRangesDefault()) to force add all of ASCII/Latin+Ext
IMGUI_API void BuildRanges(ImVector<ImWchar>* out_ranges); // Output new ranges
diff --git a/imgui_draw.cpp b/imgui_draw.cpp
index 28f7728..ae6b980 100644
--- a/imgui_draw.cpp
+++ b/imgui_draw.cpp
@@ -2724,7 +2724,7 @@
const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const
{
- if (c >= IndexLookup.Size)
+ if (c >= (size_t)IndexLookup.Size)
return FallbackGlyph;
const ImWchar i = IndexLookup.Data[c];
if (i == (ImWchar)-1)
@@ -2734,7 +2734,7 @@
const ImFontGlyph* ImFont::FindGlyphNoFallback(ImWchar c) const
{
- if (c >= IndexLookup.Size)
+ if (c >= (size_t)IndexLookup.Size)
return NULL;
const ImWchar i = IndexLookup.Data[c];
if (i == (ImWchar)-1)