Fix UTF16LE support in TextStringToUCS4 Make test a bit more complex by using a nice checkbox Also copy the text to the qt6 folder

commit: 969562d387b3791c7bc192a213e74049e08c9395 [log] [tgz]
author: Albert Astals Cid <aacid@kde.org> Sat Jul 11 00:41:13 2020 +0200
committer: Albert Astals Cid <aacid@kde.org> Sat Jul 11 18:27:24 2020 +0200
tree: 0f0e86e05075dba744cab612bfe25abed348f684
parent: 8ee6907bd64b0eb77997ca05c2fc910d5225f4b5 [diff]
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index ee0314f..9097b31 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc

@@ -119,7 +119,7 @@
                 if (isUnicode)
                     utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff);
                 else // UnicodeLE
-                    utf16[i] = (s[2 + i * 2] & 0xff) | (s[3 + i * 2] & 0xff) >> 8;
+                    utf16[i] = (s[3 + i * 2] & 0xff) << 8 | (s[2 + i * 2] & 0xff);
             }
             len = UTF16toUCS4(utf16, len, &u);
             delete[] utf16;

diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp
index 1f04c2a..b153ae5 100644
--- a/qt5/tests/check_utf_conversion.cpp
+++ b/qt5/tests/check_utf_conversion.cpp

@@ -43,7 +43,17 @@
             return false;
     }
 
-    return *a == (Unicode)*b;
+    return true;
+}
+
+static bool compare(const Unicode *a, const uint16_t *b, int len)
+{
+    for (int i = 0; i < len; i++) {
+        if (a[i] != b[i])
+            return false;
+    }
+
+    return true;
 }
 
 void TestUTFConversion::testUTF_data()
@@ -147,32 +157,34 @@
 
 void TestUTFConversion::testUnicodeLittleEndian()
 {
-    uint16_t UTF16LE_hi[4] { 0xFFFE, 0x4800, 0x4900, 0x2100 }; // UTF16-LE "HI!"
-    GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), 4 * 2);
+    uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑"
+    GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi));
 
-    uint16_t UTF16BE_hi[4] { 0xFEFF, 0x0048, 0x0049, 0x0021 }; // UTF16-BE "HI!"
-    GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), 4 * 2);
+    uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑"
+    GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi));
 
     // Let's assert both GooString's are different
-    Q_ASSERT(GooUTF16LE.cmp(&GooUTF16BE) != 0);
+    QVERIFY(GooUTF16LE.cmp(&GooUTF16BE));
 
     Unicode *UCS4fromLE, *UCS4fromBE;
     const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE);
     const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE);
 
-    // 3 as TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
-    Q_ASSERT(len1 == len2);
-    Q_ASSERT(len1 == 3);
+    // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
+    QCOMPARE(len1, len2);
+    QCOMPARE(len1, 4);
 
     // Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same
     for (int i = 0; i < len1; i++) {
-        Q_ASSERT(UCS4fromLE[i] == UCS4fromBE[i]);
+        QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]);
     }
 
+    const QString expected = QStringLiteral("HI!☑");
+
     // Do some final verifications, checking the strings to be "HI!"
     QVERIFY(*UCS4fromLE == *UCS4fromBE);
-    QVERIFY(compare(UCS4fromLE, "HI!", 3));
-    QVERIFY(compare(UCS4fromBE, "HI!", 3));
+    QVERIFY(compare(UCS4fromLE, expected.utf16(), len1));
+    QVERIFY(compare(UCS4fromBE, expected.utf16(), len1));
 }
 
 QTEST_GUILESS_MAIN(TestUTFConversion)

diff --git a/qt6/tests/check_utf_conversion.cpp b/qt6/tests/check_utf_conversion.cpp
index f28829f..f2a6609 100644
--- a/qt6/tests/check_utf_conversion.cpp
+++ b/qt6/tests/check_utf_conversion.cpp

@@ -18,6 +18,7 @@
     void testUTF_data();
     void testUTF();
     void testUnicodeToAscii7();
+    void testUnicodeLittleEndian();
 };
 
 static bool compare(const char *a, const char *b)
@@ -41,9 +42,18 @@
             return false;
     }
 
-    return *a == (Unicode)*b;
+    return true;
 }
 
+static bool compare(const Unicode *a, const uint16_t *b, int len)
+{
+    for (int i = 0; i < len; i++) {
+        if (a[i] != b[i])
+            return false;
+    }
+
+    return true;
+}
 void TestUTFConversion::testUTF_data()
 {
     QTest::addColumn<QString>("s");
@@ -143,5 +153,37 @@
     free(out_ascii_idx);
 }
 
+void TestUTFConversion::testUnicodeLittleEndian()
+{
+    uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑"
+    GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi));
+
+    uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑"
+    GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi));
+
+    // Let's assert both GooString's are different
+    QVERIFY(GooUTF16LE.cmp(&GooUTF16BE));
+
+    Unicode *UCS4fromLE, *UCS4fromBE;
+    const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE);
+    const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE);
+
+    // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
+    QCOMPARE(len1, len2);
+    QCOMPARE(len1, 4);
+
+    // Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same
+    for (int i = 0; i < len1; i++) {
+        QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]);
+    }
+
+    const QString expected = QStringLiteral("HI!☑");
+
+    // Do some final verifications, checking the strings to be "HI!"
+    QVERIFY(*UCS4fromLE == *UCS4fromBE);
+    QVERIFY(compare(UCS4fromLE, expected.utf16(), len1));
+    QVERIFY(compare(UCS4fromBE, expected.utf16(), len1));
+}
+
 QTEST_GUILESS_MAIN(TestUTFConversion)
 #include "check_utf_conversion.moc"
commit	969562d387b3791c7bc192a213e74049e08c9395	[log] [tgz]
author	Albert Astals Cid <aacid@kde.org>	Sat Jul 11 00:41:13 2020 +0200
committer	Albert Astals Cid <aacid@kde.org>	Sat Jul 11 18:27:24 2020 +0200
tree	0f0e86e05075dba744cab612bfe25abed348f684
parent	8ee6907bd64b0eb77997ca05c2fc910d5225f4b5 [diff]