document: Handle UTF16-LE annotations
I can produce such annotations when adding annotations to a PDF
attachement from the standard mail app on my iPhone (iOS 12.1).
They currently all show as "ÿþÚ" rather than the actual string content.
UTF16-BE vs UTF16-LE is detected by inferring the endianness from the
first two bytes of the string (0xFF 0xFE and 0xFE 0xFF aka Byte Order
Marker).
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 9772c16..78a57b1 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -844,6 +844,10 @@
result = g_convert (s->c_str () + 2,
s->getLength () - 2,
"UTF-8", "UTF-16BE", nullptr, nullptr, nullptr);
+ } else if (s->hasUnicodeMarkerLE()) {
+ result = g_convert (s->c_str () + 2,
+ s->getLength () - 2,
+ "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr);
} else {
int len;
gunichar *ucs4_temp;
diff --git a/goo/GooString.h b/goo/GooString.h
index bae3a18..5b403e7 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -172,6 +172,7 @@
bool endsWith(const char *suffix) const;
bool hasUnicodeMarker() const { return size() >= 2 && (*this)[0] == char(0xfe) && (*this)[1] == char(0xff); }
+ bool hasUnicodeMarkerLE() const { return size() >= 2 && (*this)[0] == char(0xff) && (*this)[1] == char(0xfe); }
bool hasJustUnicodeMarker() const { return size() == 2 && hasUnicodeMarker(); }
void prependUnicodeMarker();