blob: 78c05d684366cc6bfd5e03a210ec809865d41282 [file] [log] [blame]
//========================================================================
//
// UnicodeMap.cc
//
// Copyright 2001-2003 Glyph & Cog, LLC
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
// Copyright (C) 2017-2020, 2022 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
// Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
// Copyright (C) 2019 Volker Krause <vkrause@kde.org>
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#include <config.h>
#include <cstdio>
#include <cstring>
#include "goo/glibc.h"
#include "goo/gmem.h"
#include "goo/gfile.h"
#include "goo/GooString.h"
#include "Error.h"
#include "GlobalParams.h"
#include "UnicodeMap.h"
// helper for using std::visit to get a dependent false for static_asserts
// to help get compile errors if one ever extends variants
template<class>
inline constexpr bool always_false_v = false;
//------------------------------------------------------------------------
std::unique_ptr<UnicodeMap> UnicodeMap::parse(const std::string &encodingNameA)
{
FILE *f;
char buf[256];
int line, nBytes;
char *tok1, *tok2, *tok3;
char *tokptr;
if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) {
error(errSyntaxError, -1, "Couldn't find unicodeMap file for the '{0:s}' encoding", encodingNameA.c_str());
return {};
}
auto map = std::unique_ptr<UnicodeMap>(new UnicodeMap(encodingNameA));
std::vector<UnicodeMapRange> customRanges;
std::vector<UnicodeMapExt> eMap;
line = 1;
while (getLine(buf, sizeof(buf), f)) {
if ((tok1 = strtok_r(buf, " \t\r\n", &tokptr)) && (tok2 = strtok_r(nullptr, " \t\r\n", &tokptr))) {
if (!(tok3 = strtok_r(nullptr, " \t\r\n", &tokptr))) {
tok3 = tok2;
tok2 = tok1;
}
nBytes = strlen(tok3) / 2;
if (nBytes <= 4) {
UnicodeMapRange range;
sscanf(tok1, "%x", &range.start);
sscanf(tok2, "%x", &range.end);
sscanf(tok3, "%x", &range.code);
range.nBytes = nBytes;
customRanges.push_back(range);
} else if (tok2 == tok1) {
UnicodeMapExt ext;
sscanf(tok1, "%x", &ext.u);
ext.code.reserve(nBytes);
for (int i = 0; i < nBytes; ++i) {
unsigned int x;
sscanf(tok3 + i * 2, "%2x", &x);
ext.code.push_back((char)x);
}
eMap.push_back(std::move(ext));
} else {
error(errSyntaxError, -1, "Bad line ({0:d}) in unicodeMap file for the '{1:s}' encoding", line, encodingNameA.c_str());
}
} else {
error(errSyntaxError, -1, "Bad line ({0:d}) in unicodeMap file for the '{1:s}' encoding", line, encodingNameA.c_str());
}
++line;
}
fclose(f);
map->eMaps = std::move(eMap);
map->data = std::move(customRanges);
return map;
}
UnicodeMap::UnicodeMap(const std::string &encodingNameA)
{
encodingName = encodingNameA;
unicodeOut = false;
}
UnicodeMap::UnicodeMap(const char *encodingNameA, bool unicodeOutA, std::span<const UnicodeMapRange> rangesA)
{
encodingName = encodingNameA;
unicodeOut = unicodeOutA;
data = rangesA;
}
UnicodeMap::UnicodeMap(const char *encodingNameA, bool unicodeOutA, UnicodeMapFunc funcA)
{
encodingName = encodingNameA;
unicodeOut = unicodeOutA;
data = funcA;
}
UnicodeMap::~UnicodeMap() = default;
UnicodeMap::UnicodeMap(UnicodeMap &&other) noexcept : encodingName { std::move(other.encodingName) }, unicodeOut { other.unicodeOut }, data { std::move(other.data) }, eMaps { std::move(other.eMaps) } { }
UnicodeMap &UnicodeMap::operator=(UnicodeMap &&other) noexcept
{
if (this != &other) {
swap(other);
}
return *this;
}
void UnicodeMap::swap(UnicodeMap &other) noexcept
{
using std::swap;
swap(encodingName, other.encodingName);
swap(unicodeOut, other.unicodeOut);
swap(data, other.data);
swap(eMaps, other.eMaps);
}
bool UnicodeMap::match(const std::string &encodingNameA) const
{
return encodingName == encodingNameA;
}
int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) const
{
return std::visit(
[this, u, buf, bufSize](auto &&item) {
using T = std::decay_t<decltype(item)>;
if constexpr (std::is_same_v<T, UnicodeMapFunc>) {
return (*item)(u, buf, bufSize);
} else if constexpr (std::is_same_v<T, std::span<const UnicodeMapRange>> || std::is_same_v<T, std::vector<UnicodeMapRange>>) {
int a = 0;
int b = (int)item.size();
if (u >= item[a].start) {
// invariant: item[a].start <= u < item[b].start
while (b - a > 1) {
int m = (a + b) / 2;
if (u >= item[m].start) {
a = m;
} else if (u < item[m].start) {
b = m;
}
}
if (u <= item[a].end) {
int n = item[a].nBytes;
if (n > bufSize) {
return 0;
}
unsigned int code = item[a].code + (u - item[a].start);
for (int i = n - 1; i >= 0; --i) {
buf[i] = (char)(code & 0xff);
code >>= 8;
}
return n;
}
}
for (const UnicodeMapExt &ext : eMaps) {
if (ext.u == u) {
if (int(ext.code.size()) >= bufSize) {
return 0;
}
for (int j = 0; j < std::min(int(ext.code.size()), bufSize); ++j) {
buf[j] = ext.code[j];
}
return int(ext.code.size());
}
}
return 0;
} else {
static_assert(always_false_v<T>);
}
},
data);
}
//------------------------------------------------------------------------
UnicodeMapCache::UnicodeMapCache() = default;
const UnicodeMap *UnicodeMapCache::getUnicodeMap(const std::string &encodingName)
{
for (const std::unique_ptr<UnicodeMap> &map : cache) {
if (map->match(encodingName)) {
return map.get();
}
}
std::unique_ptr<UnicodeMap> map = UnicodeMap::parse(encodingName);
if (map) {
UnicodeMap *m = map.get();
cache.emplace_back(std::move(map));
return m;
}
return nullptr;
}