Use more vectors and less owning pointers + length Also remove a couple of unreferenced functions.

commit: 925b104c33f5a1a546c72794d30a6deb918998f7 [log] [tgz]
author: Sune Vuorela <sune@vuorela.dk> Fri Feb 02 11:18:42 2024 +0100
committer: Albert Astals Cid <aacid@kde.org> Tue Feb 06 00:18:30 2024 +0000
tree: c9e6cb1f8c82bdbfc5e2421c982cbf59af536ad8
parent: d44142d3bb95997af339ebe7d30d2180fe74d6f4 [diff]
diff --git a/poppler/CharCodeToUnicode.cc b/poppler/CharCodeToUnicode.cc
index 94f03bb..6035a6a 100644
--- a/poppler/CharCodeToUnicode.cc
+++ b/poppler/CharCodeToUnicode.cc

@@ -120,19 +120,15 @@
 {
     CharCodeToUnicode *ctu = new CharCodeToUnicode();
     ctu->isIdentity = true;
-    ctu->mapLen = 1;
-    ctu->map = (Unicode *)gmallocn(ctu->mapLen, sizeof(Unicode));
+    ctu->map.resize(1, 0);
     return ctu;
 }
-
 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(const char *fileName, const GooString *collection)
 {
     FILE *f;
-    Unicode *mapA;
-    CharCode size, mapLenA;
+    CharCode size;
     char buf[64];
     Unicode u;
-    CharCodeToUnicode *ctu;
 
     if (!(f = openFile(fileName, "r"))) {
         error(errIO, -1, "Couldn't open cidToUnicode file '{0:s}'", fileName);
@@ -140,13 +136,14 @@
     }
 
     size = 32768;
-    mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
-    mapLenA = 0;
+    std::vector<Unicode> mapA;
+    mapA.resize(size, 0);
+    CharCode mapLenA = 0;
 
     while (getLine(buf, sizeof(buf), f)) {
         if (mapLenA == size) {
             size *= 2;
-            mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
+            mapA.resize(size);
         }
         if (sscanf(buf, "%x", &u) == 1) {
             mapA[mapLenA] = u;
@@ -157,95 +154,15 @@
         ++mapLenA;
     }
     fclose(f);
+    mapA.resize(mapLenA);
 
-    ctu = new CharCodeToUnicode(collection->toStr(), mapA, mapLenA, true, {});
-    gfree(mapA);
-    return ctu;
-}
-
-CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(const GooString *fileName)
-{
-    FILE *f;
-    Unicode *mapA;
-    CharCode size, oldSize, len;
-    char buf[256];
-    char *tok;
-    Unicode u0;
-    int uBufSize = 8;
-    Unicode *uBuf = (Unicode *)gmallocn(uBufSize, sizeof(Unicode));
-    CharCodeToUnicode *ctu;
-    int line, n, i;
-    char *tokptr;
-
-    if (!(f = openFile(fileName->c_str(), "r"))) {
-        gfree(uBuf);
-        error(errIO, -1, "Couldn't open unicodeToUnicode file '{0:t}'", fileName);
-        return nullptr;
-    }
-
-    size = 4096;
-    mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
-    memset(mapA, 0, size * sizeof(Unicode));
-    len = 0;
-    std::vector<CharCodeToUnicodeString> sMapA;
-
-    line = 0;
-    while (getLine(buf, sizeof(buf), f)) {
-        ++line;
-        if (!(tok = strtok_r(buf, " \t\r\n", &tokptr)) || !parseHex(tok, strlen(tok), &u0)) {
-            error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'", line, fileName);
-            continue;
-        }
-        n = 0;
-        while ((tok = strtok_r(nullptr, " \t\r\n", &tokptr))) {
-            if (n >= uBufSize) {
-                uBufSize += 8;
-                uBuf = (Unicode *)greallocn(uBuf, uBufSize, sizeof(Unicode));
-            }
-            if (!parseHex(tok, strlen(tok), &uBuf[n])) {
-                error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'", line, fileName);
-                break;
-            }
-            ++n;
-        }
-        if (n < 1) {
-            error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'", line, fileName);
-            continue;
-        }
-        if (u0 >= size) {
-            oldSize = size;
-            while (u0 >= size) {
-                size *= 2;
-            }
-            mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
-            memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
-        }
-        if (n == 1) {
-            mapA[u0] = uBuf[0];
-        } else {
-            mapA[u0] = 0;
-            std::vector<Unicode> u;
-            u.reserve(n);
-            for (i = 0; i < n; ++i) {
-                u.push_back(uBuf[i]);
-            }
-            sMapA.push_back({ u0, std::move(u) });
-        }
-        if (u0 >= len) {
-            len = u0 + 1;
-        }
-    }
-    fclose(f);
-
-    ctu = new CharCodeToUnicode(fileName->toStr(), mapA, len, true, std::move(sMapA));
-    gfree(mapA);
-    gfree(uBuf);
-    return ctu;
+    return new CharCodeToUnicode(collection->toStr(), std::move(mapA), {});
 }
 
 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode)
 {
-    return new CharCodeToUnicode({}, toUnicode, 256, true, {});
+    std::vector<Unicode> data(toUnicode, toUnicode + 256);
+    return new CharCodeToUnicode({}, std::move(data), {});
 }
 
 CharCodeToUnicode *CharCodeToUnicode::parseCMap(const GooString *buf, int nBits)
@@ -466,7 +383,6 @@
 
 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, int offset)
 {
-    CharCode oldLen, i;
     Unicode u;
     int j;
 
@@ -475,20 +391,17 @@
         // (I've seen CMaps with mappings for <ffffffff>.)
         return;
     }
-    if (code >= mapLen) {
-        oldLen = mapLen;
-        mapLen = mapLen ? 2 * mapLen : 256;
-        if (code >= mapLen) {
-            mapLen = (code + 256) & ~255;
+    if (code >= map.size()) {
+        size_t oldLen = map.size();
+        auto newLen = oldLen ? 2 * oldLen : 256;
+        if (code >= newLen) {
+            newLen = (code + 256) & ~255;
         }
-        if (unlikely(code >= mapLen)) {
+        if (unlikely(code >= newLen)) {
             error(errSyntaxWarning, -1, "Illegal code value in CharCodeToUnicode::addMapping");
             return;
         } else {
-            map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
-            for (i = oldLen; i < mapLen; ++i) {
-                map[i] = 0;
-            }
+            map.resize(newLen, 0);
         }
     }
     if (n <= 4) {
@@ -518,67 +431,42 @@
 
 void CharCodeToUnicode::addMappingInt(CharCode code, Unicode u)
 {
-    CharCode oldLen, i;
-
     if (code > 0xffffff) {
         // This is an arbitrary limit to avoid integer overflow issues.
         // (I've seen CMaps with mappings for <ffffffff>.)
         return;
     }
-    if (code >= mapLen) {
-        oldLen = mapLen;
-        mapLen = mapLen ? 2 * mapLen : 256;
-        if (code >= mapLen) {
-            mapLen = (code + 256) & ~255;
+    if (code >= map.size()) {
+        size_t oldLen = map.size();
+        size_t newLen = oldLen ? 2 * oldLen : 256;
+        if (code >= newLen) {
+            newLen = (code + 256) & ~255;
         }
-        map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
-        for (i = oldLen; i < mapLen; ++i) {
-            map[i] = 0;
-        }
+        map.resize(newLen, 0);
     }
     map[code] = u;
 }
 
 CharCodeToUnicode::CharCodeToUnicode()
 {
-    map = nullptr;
-    mapLen = 0;
     refCnt = 1;
     isIdentity = false;
 }
 
 CharCodeToUnicode::CharCodeToUnicode(const std::optional<std::string> &tagA) : tag(tagA)
 {
-    CharCode i;
-
-    mapLen = 256;
-    map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
-    for (i = 0; i < mapLen; ++i) {
-        map[i] = 0;
-    }
+    map.resize(256, 0);
     refCnt = 1;
     isIdentity = false;
 }
-
-CharCodeToUnicode::CharCodeToUnicode(const std::optional<std::string> &tagA, Unicode *mapA, CharCode mapLenA, bool copyMap, std::vector<CharCodeToUnicodeString> &&sMapA) : tag(tagA)
+CharCodeToUnicode::CharCodeToUnicode(const std::optional<std::string> &tagA, std::vector<Unicode> &&mapA, std::vector<CharCodeToUnicodeString> &&sMapA) : tag(tagA)
 {
-    mapLen = mapLenA;
-    if (copyMap) {
-        map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
-        memcpy(map, mapA, mapLen * sizeof(Unicode));
-    } else {
-        map = mapA;
-    }
+    map = std::move(mapA);
     sMap = std::move(sMapA);
     refCnt = 1;
     isIdentity = false;
 }
 
-CharCodeToUnicode::~CharCodeToUnicode()
-{
-    gfree(map);
-}
-
 void CharCodeToUnicode::incRefCnt()
 {
     ++refCnt;
@@ -601,7 +489,7 @@
     size_t i;
     int j;
 
-    if (!map || isIdentity) {
+    if (map.empty() || isIdentity) {
         return;
     }
     if (len == 1) {
@@ -635,11 +523,12 @@
 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode const **u) const
 {
     if (isIdentity) {
-        map[0] = (Unicode)c;
-        *u = map;
+        auto that = const_cast<CharCodeToUnicode *>(this);
+        that->map[0] = (Unicode)c;
+        *u = map.data();
         return 1;
     }
-    if (c >= mapLen) {
+    if (c >= map.size()) {
         return 0;
     }
     if (map[c]) {
@@ -663,7 +552,7 @@
             *c = (CharCode)*u;
             return 1;
         }
-        for (CharCode i = 0; i < mapLen; i++) {
+        for (CharCode i = 0; i < map.size(); i++) {
             if (map[i] == *u) {
                 *c = i;
                 return 1;

diff --git a/poppler/CharCodeToUnicode.h b/poppler/CharCodeToUnicode.h
index 9d99833..dc2fa84 100644
--- a/poppler/CharCodeToUnicode.h
+++ b/poppler/CharCodeToUnicode.h

@@ -56,11 +56,6 @@
     // Returns NULL on failure.
     static CharCodeToUnicode *parseCIDToUnicode(const char *fileName, const GooString *collection);
 
-    // Create a Unicode-to-Unicode mapping from the file specified by
-    // <fileName>.  Sets the initial reference count to 1.  Returns NULL
-    // on failure.
-    static CharCodeToUnicode *parseUnicodeToUnicode(const GooString *fileName);
-
     // Create the CharCode-to-Unicode mapping for an 8-bit font.
     // <toUnicode> is an array of 256 Unicode indexes.  Sets the initial
     // reference count to 1.
@@ -74,7 +69,7 @@
     // <this>.
     void mergeCMap(const GooString *buf, int nBits);
 
-    ~CharCodeToUnicode();
+    ~CharCodeToUnicode() = default;
 
     CharCodeToUnicode(const CharCodeToUnicode &) = delete;
     CharCodeToUnicode &operator=(const CharCodeToUnicode &) = delete;
@@ -96,10 +91,6 @@
     // Map a Unicode to CharCode.
     int mapToCharCode(const Unicode *u, CharCode *c, int usize) const;
 
-    // Return the mapping's length, i.e., one more than the max char
-    // code supported by the mapping.
-    CharCode getLength() const { return mapLen; }
-
 private:
     struct CharCodeToUnicodeString
     {
@@ -111,11 +102,10 @@
     void addMappingInt(CharCode code, Unicode u);
     CharCodeToUnicode();
     explicit CharCodeToUnicode(const std::optional<std::string> &tagA);
-    CharCodeToUnicode(const std::optional<std::string> &tagA, Unicode *mapA, CharCode mapLenA, bool copyMap, std::vector<CharCodeToUnicodeString> &&sMapA);
+    CharCodeToUnicode(const std::optional<std::string> &tagA, std::vector<Unicode> &&mapA, std::vector<CharCodeToUnicodeString> &&sMapA);
 
     const std::optional<std::string> tag;
-    Unicode *map;
-    CharCode mapLen;
+    std::vector<Unicode> map;
     std::vector<CharCodeToUnicodeString> sMap;
     std::atomic_int refCnt;
     bool isIdentity;
commit	925b104c33f5a1a546c72794d30a6deb918998f7	[log] [tgz]
author	Sune Vuorela <sune@vuorela.dk>	Fri Feb 02 11:18:42 2024 +0100
committer	Albert Astals Cid <aacid@kde.org>	Tue Feb 06 00:18:30 2024 +0000
tree	c9e6cb1f8c82bdbfc5e2421c982cbf59af536ad8
parent	d44142d3bb95997af339ebe7d30d2180fe74d6f4 [diff]