blob: 4c60ede5623f5e4c8aff33f66a7e5df0f76308ef [file] [log] [blame] [edit]
//========================================================================
//
// PDFDoc.cc
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
// Copyright (C) 2005, 2007-2009, 2011-2024 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
// Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
// Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
// Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
// Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
// Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
// Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
// Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
// Copyright (C) 2013, 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2013, 2018 Adam Reichold <adamreichold@myopera.com>
// Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
// Copyright (C) 2015 Li Junling <lijunling@sina.com>
// Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
// Copyright (C) 2015 André Esser <bepandre@hotmail.com>
// Copyright (C) 2016, 2020 Jakub Alba <jakubalba@gmail.com>
// Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
// Copyright (C) 2017 Fredrik Fornwall <fredrik@fornwall.net>
// Copyright (C) 2018 Ben Timby <btimby@gmail.com>
// Copyright (C) 2018 Evangelos Foutras <evangelos@foutrelis.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
// Copyright (C) 2018 Philipp Knechtges <philipp-dev@knechtges.com>
// Copyright (C) 2019 Christian Persch <chpe@src.gnome.org>
// Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com>
// Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de>
// Copyright (C) 2020 Adam Sampson <ats@offog.org>
// Copyright (C) 2021-2024 Oliver Sander <oliver.sander@tu-dresden.de>
// Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
// Copyright (C) 2021 RM <rm+git@arcsin.org>
// Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
// Copyright (C) 2021-2022 Marek Kasik <mkasik@redhat.com>
// Copyright (C) 2022 Felix Jung <fxjung@posteo.de>
// Copyright (C) 2022 crt <chluo@cse.cuhk.edu.hk>
// Copyright (C) 2022 Erich E. Hoover <erich.e.hoover@gmail.com>
// Copyright (C) 2023, 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
// Copyright (C) 2024 Vincent Lefevre <vincent@vinc17.net>
// Copyright (C) 2024 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#include <config.h>
#include <poppler-config.h>
#include <array>
#include <cctype>
#include <clocale>
#include <cstdio>
#include <cerrno>
#include <climits>
#include <cstdlib>
#include <cstddef>
#include <cstring>
#include <ctime>
#include <iomanip>
#include <regex>
#include <sstream>
#include <sys/stat.h>
#include "CryptoSignBackend.h"
#include "goo/glibc.h"
#include "goo/gstrtod.h"
#include "goo/GooString.h"
#include "goo/gfile.h"
#include "GlobalParams.h"
#include "Page.h"
#include "Catalog.h"
#include "Stream.h"
#include "XRef.h"
#include "Linearization.h"
#include "Link.h"
#include "OutputDev.h"
#include "Error.h"
#include "Lexer.h"
#include "Parser.h"
#include "SecurityHandler.h"
#include "Decrypt.h"
#include "Outline.h"
#include "PDFDoc.h"
#include "Hints.h"
#include "UTF.h"
#include "FlateEncoder.h"
#include "JSInfo.h"
#include "ImageEmbeddingUtils.h"
//------------------------------------------------------------------------
struct FILECloser
{
void operator()(FILE *f) { fclose(f); }
};
//------------------------------------------------------------------------
#define headerSearchSize \
1024 // read this many bytes at beginning of
// file to look for '%PDF'
#define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
#define linearizationSearchSize \
1024 // read this many bytes at beginning of
// file to look for linearization
// dictionary
#define xrefSearchSize \
1024 // read this many bytes at end of file
// to look for 'startxref'
//------------------------------------------------------------------------
// PDFDoc
//------------------------------------------------------------------------
#define pdfdocLocker() const std::scoped_lock locker(mutex)
PDFDoc::PDFDoc() { }
PDFDoc::PDFDoc(std::unique_ptr<GooString> &&fileNameA, const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback)
: fileName(std::move(fileNameA)), guiData(guiDataA)
{
#ifdef _WIN32
const int n = fileName->getLength();
fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
for (int i = 0; i < n; ++i) {
fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
}
fileNameU[n] = L'\0';
wchar_t *wFileName = (wchar_t *)utf8ToUtf16(fileName->c_str());
file = GooFile::open(wFileName);
gfree(wFileName);
#else
file = GooFile::open(fileName->toStr());
#endif
if (!file) {
// fopen() has failed.
// Keep a copy of the errno returned by fopen so that it can be
// referred to later.
fopenErrno = errno;
error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName.get(), strerror(errno));
errCode = errOpenFile;
return;
}
// create stream
str = new FileStream(file.get(), 0, false, file->size(), Object(objNull));
ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
}
#ifdef _WIN32
PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback) : guiData(guiDataA)
{
OSVERSIONINFO version;
// save both Unicode and 8-bit copies of the file name
GooString *fileNameG = new GooString();
fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
for (int i = 0; i < fileNameLen; ++i) {
fileNameG->append((char)fileNameA[i]);
fileNameU[i] = fileNameA[i];
}
fileName.reset(fileNameG);
fileNameU[fileNameLen] = L'\0';
// try to open file
// NB: _wfopen is only available in NT
version.dwOSVersionInfoSize = sizeof(version);
GetVersionEx(&version);
if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
file = GooFile::open(fileNameU);
} else {
file = GooFile::open(fileName->toStr());
}
if (!file) {
error(errIO, -1, "Couldn't open file '{0:t}'", fileName.get());
errCode = errOpenFile;
return;
}
// create stream
str = new FileStream(file.get(), 0, false, file->size(), Object(objNull));
ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
}
#endif
PDFDoc::PDFDoc(BaseStream *strA, const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback) : guiData(guiDataA)
{
if (strA->getFileName()) {
fileName = strA->getFileName()->copy();
#ifdef _WIN32
const int n = fileName->getLength();
fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
for (int i = 0; i < n; ++i) {
fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
}
fileNameU[n] = L'\0';
#endif
}
str = strA;
ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
}
bool PDFDoc::setup(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, const std::function<void()> &xrefReconstructedCallback)
{
pdfdocLocker();
if (str->getLength() <= 0) {
error(errSyntaxError, -1, "Document stream is empty");
errCode = errDamaged;
return false;
}
str->setPos(0, -1);
if (str->getPos() < 0) {
error(errSyntaxError, -1, "Document base stream is not seekable");
errCode = errFileIO;
return false;
}
str->reset();
// check footer
// Adobe does not seem to enforce %%EOF, so we do the same
// if (!checkFooter()) return false;
// check header
checkHeader();
bool wasReconstructed = false;
// read xref table
xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed, false, xrefReconstructedCallback);
if (!xref->isOk()) {
if (wasReconstructed) {
delete xref;
startXRefPos = -1;
xref = new XRef(str, getStartXRef(true), getMainXRefEntriesOffset(true), &wasReconstructed, false, xrefReconstructedCallback);
}
if (!xref->isOk()) {
error(errSyntaxError, -1, "Couldn't read xref table");
errCode = xref->getErrorCode();
return false;
}
}
// check for encryption
if (!checkEncryption(ownerPassword, userPassword)) {
errCode = errEncrypted;
return false;
}
// read catalog
catalog = new Catalog(this);
if (catalog && !catalog->isOk()) {
if (!wasReconstructed) {
// try one more time to construct the Catalog, maybe the problem is damaged XRef
delete catalog;
delete xref;
xref = new XRef(str, 0, 0, nullptr, true, xrefReconstructedCallback);
catalog = new Catalog(this);
}
if (catalog && !catalog->isOk()) {
error(errSyntaxError, -1, "Couldn't read page catalog");
errCode = errBadCatalog;
return false;
}
}
// Extract PDF Subtype information
extractPDFSubtype();
// done
return true;
}
PDFDoc::~PDFDoc()
{
if (pageCache) {
for (int i = 0; i < getNumPages(); i++) {
if (pageCache[i]) {
delete pageCache[i];
}
}
gfree(static_cast<void *>(pageCache));
}
delete secHdlr;
delete outline;
delete catalog;
delete xref;
delete hints;
delete linearization;
delete str;
#ifdef _WIN32
gfree(fileNameU);
#endif
}
// Check for a %%EOF at the end of this stream
bool PDFDoc::checkFooter()
{
// we look in the last 1024 chars because Adobe does the same
char *eof = new char[1025];
Goffset pos = str->getPos();
str->setPos(1024, -1);
int i, ch;
for (i = 0; i < 1024; i++) {
ch = str->getChar();
if (ch == EOF) {
break;
}
eof[i] = ch;
}
eof[i] = '\0';
bool found = false;
for (i = i - 5; i >= 0; i--) {
if (strncmp(&eof[i], "%%EOF", 5) == 0) {
found = true;
break;
}
}
if (!found) {
error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
errCode = errDamaged;
delete[] eof;
return false;
}
delete[] eof;
str->setPos(pos);
return true;
}
// Check for a PDF header on this stream. Skip past some garbage
// if necessary.
void PDFDoc::checkHeader()
{
char hdrBuf[headerSearchSize + 1];
char *p;
char *tokptr;
int i;
int bytesRead;
headerPdfMajorVersion = 0;
headerPdfMinorVersion = 0;
// read up to headerSearchSize bytes from the beginning of the document
for (i = 0; i < headerSearchSize; ++i) {
const int c = str->getChar();
if (c == EOF) {
break;
}
hdrBuf[i] = c;
}
bytesRead = i;
hdrBuf[bytesRead] = '\0';
// find the start of the PDF header if it exists and parse the version
bool headerFound = false;
for (i = 0; i < bytesRead - 5; ++i) {
if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
headerFound = true;
break;
}
}
if (!headerFound) {
error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
return;
}
str->moveStart(i);
if (!(p = strtok_r(&hdrBuf[i + 5], " \t\n\r", &tokptr))) {
error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
return;
}
sscanf(p, "%d.%d", &headerPdfMajorVersion, &headerPdfMinorVersion);
// We don't do the version check. Don't add it back in.
}
bool PDFDoc::checkEncryption(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword)
{
bool encrypted;
bool ret;
Object encrypt = xref->getTrailerDict()->dictLookup("Encrypt");
if ((encrypted = encrypt.isDict())) {
if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
if (secHdlr->isUnencrypted()) {
// no encryption
ret = true;
} else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
// authorization succeeded
xref->setEncryption(secHdlr->getPermissionFlags(), secHdlr->getOwnerPasswordOk(), secHdlr->getFileKey(), secHdlr->getFileKeyLength(), secHdlr->getEncVersion(), secHdlr->getEncRevision(), secHdlr->getEncAlgorithm());
ret = true;
} else {
// authorization failed
ret = false;
}
} else {
// couldn't find the matching security handler
ret = false;
}
} else {
// document is not encrypted
ret = true;
}
return ret;
}
static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, const std::string &pdfsubver)
{
const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?");
std::smatch match;
PDFSubtypePart subtypePart = subtypePartNone;
if (std::regex_search(pdfsubver, match, regex)) {
int date = 0;
const int part = std::stoi(match.str(1));
if (match[2].matched) {
date = std::stoi(match.str(2));
}
switch (subtype) {
case subtypePDFX:
switch (part) {
case 1:
switch (date) {
case 2001:
default:
subtypePart = subtypePart1;
break;
case 2003:
subtypePart = subtypePart4;
break;
}
break;
case 2:
subtypePart = subtypePart5;
break;
case 3:
switch (date) {
case 2002:
default:
subtypePart = subtypePart3;
break;
case 2003:
subtypePart = subtypePart6;
break;
}
break;
case 4:
subtypePart = subtypePart7;
break;
case 5:
subtypePart = subtypePart8;
break;
}
break;
default:
subtypePart = (PDFSubtypePart)part;
break;
}
}
return subtypePart;
}
static PDFSubtypeConformance pdfConformanceFromString(const std::string &pdfsubver)
{
const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)");
std::smatch match;
PDFSubtypeConformance pdfConf = subtypeConfNone;
// match contains the PDF conformance (A, B, G, N, P, PG or U)
if (std::regex_search(pdfsubver, match, regex)) {
GooString *conf = new GooString(match.str(1));
// Convert to lowercase as the conformance may appear in both cases
conf->lowerCase();
if (conf->cmp("a") == 0) {
pdfConf = subtypeConfA;
} else if (conf->cmp("b") == 0) {
pdfConf = subtypeConfB;
} else if (conf->cmp("g") == 0) {
pdfConf = subtypeConfG;
} else if (conf->cmp("n") == 0) {
pdfConf = subtypeConfN;
} else if (conf->cmp("p") == 0) {
pdfConf = subtypeConfP;
} else if (conf->cmp("pg") == 0) {
pdfConf = subtypeConfPG;
} else if (conf->cmp("u") == 0) {
pdfConf = subtypeConfU;
} else {
pdfConf = subtypeConfNone;
}
delete conf;
}
return pdfConf;
}
void PDFDoc::extractPDFSubtype()
{
pdfSubtype = subtypeNull;
pdfPart = subtypePartNull;
pdfConformance = subtypeConfNull;
std::unique_ptr<GooString> pdfSubtypeVersion;
// Find PDF InfoDict subtype key if any
if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFA1Version"))) {
pdfSubtype = subtypePDFA;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFEVersion"))) {
pdfSubtype = subtypePDFE;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFUAVersion"))) {
pdfSubtype = subtypePDFUA;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFVTVersion"))) {
pdfSubtype = subtypePDFVT;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFXVersion"))) {
pdfSubtype = subtypePDFX;
} else {
pdfSubtype = subtypeNone;
pdfPart = subtypePartNone;
pdfConformance = subtypeConfNone;
return;
}
// Extract part from version string
pdfPart = pdfPartFromString(pdfSubtype, pdfSubtypeVersion->toStr());
// Extract conformance from version string
pdfConformance = pdfConformanceFromString(pdfSubtypeVersion->toStr());
}
static void addSignatureFieldsToVector(FormField *ff, std::vector<FormFieldSignature *> &res)
{
if (ff->getNumChildren() == 0) {
if (ff->getType() == formSignature) {
res.push_back(static_cast<FormFieldSignature *>(ff));
}
} else {
for (int i = 0; i < ff->getNumChildren(); ++i) {
FormField *children = ff->getChildren(i);
addSignatureFieldsToVector(children, res);
}
}
}
std::vector<FormFieldSignature *> PDFDoc::getSignatureFields()
{
// Unfortunately there's files with signatures in Forms but not in Annots
// and files with signatures in Annots but no in forms so we need to search both
std::vector<FormFieldSignature *> res;
// First search
const Form *f = catalog->getForm();
if (f) {
const int nRootFields = f->getNumFields();
for (int i = 0; i < nRootFields; ++i) {
FormField *ff = f->getRootField(i);
addSignatureFieldsToVector(ff, res);
}
}
// Second search
for (int page = 1; page <= getNumPages(); ++page) {
Page *p = getPage(page);
if (p) {
const std::unique_ptr<FormPageWidgets> pw = p->getFormWidgets();
for (int i = 0; i < pw->getNumWidgets(); ++i) {
FormWidget *fw = pw->getWidget(i);
if (fw->getType() == formSignature) {
assert(fw->getField()->getType() == formSignature);
FormFieldSignature *ffs = static_cast<FormFieldSignature *>(fw->getField());
if (std::ranges::find(res, ffs) == res.end()) {
res.push_back(ffs);
}
}
}
}
}
return res;
}
void PDFDoc::displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData,
bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef)
{
if (globalParams->getPrintCommands()) {
printf("***** page %d *****\n", page);
}
if (getPage(page)) {
getPage(page)->display(out, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
}
}
void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData,
bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData)
{
int page;
for (page = firstPage; page <= lastPage; ++page) {
displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData);
}
}
void PDFDoc::displayPageSlice(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, int sliceX, int sliceY, int sliceW, int sliceH, bool (*abortCheckCbk)(void *data),
void *abortCheckCbkData, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef)
{
if (getPage(page)) {
getPage(page)->displaySlice(out, hDPI, vDPI, rotate, useMediaBox, crop, sliceX, sliceY, sliceW, sliceH, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
}
}
std::unique_ptr<Links> PDFDoc::getLinks(int page)
{
Page *p = getPage(page);
if (!p) {
return std::make_unique<Links>(nullptr);
}
return p->getLinks();
}
void PDFDoc::processLinks(OutputDev *out, int page)
{
if (getPage(page)) {
getPage(page)->processLinks(out);
}
}
Linearization *PDFDoc::getLinearization()
{
if (!linearization) {
linearization = new Linearization(str);
linearizationState = 0;
}
return linearization;
}
bool PDFDoc::checkLinearization()
{
if (linearization == nullptr) {
return false;
}
if (linearizationState == 1) {
return true;
}
if (linearizationState == 2) {
return false;
}
if (!hints) {
hints = new Hints(str, linearization, getXRef(), secHdlr);
}
if (!hints->isOk()) {
linearizationState = 2;
return false;
}
for (int page = 1; page <= linearization->getNumPages(); page++) {
Ref pageRef;
pageRef.num = hints->getPageObjectNum(page);
if (!pageRef.num) {
linearizationState = 2;
return false;
}
// check for bogus ref - this can happen in corrupted PDF files
if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
linearizationState = 2;
return false;
}
pageRef.gen = xref->getEntry(pageRef.num)->gen;
Object obj = xref->fetch(pageRef);
if (!obj.isDict("Page")) {
linearizationState = 2;
return false;
}
}
linearizationState = 1;
return true;
}
bool PDFDoc::isLinearized(bool tryingToReconstruct)
{
if ((str->getLength()) && (getLinearization()->getLength() == str->getLength())) {
return true;
} else {
if (tryingToReconstruct) {
return getLinearization()->getLength() > 0;
} else {
return false;
}
}
}
void PDFDoc::setDocInfoStringEntry(const char *key, GooString *value)
{
bool removeEntry = !value || value->getLength() == 0 || (value->toStr() == unicodeByteOrderMark);
if (removeEntry) {
delete value;
}
Object infoObj = getDocInfo();
if (infoObj.isNull() && removeEntry) {
// No info dictionary, so no entry to remove.
return;
}
Ref infoObjRef;
infoObj = xref->createDocInfoIfNeeded(&infoObjRef);
if (removeEntry) {
infoObj.dictSet(key, Object(objNull));
} else {
infoObj.dictSet(key, Object(value));
}
if (infoObj.dictGetLength() == 0) {
// Info dictionary is empty. Remove it altogether.
removeDocInfo();
} else {
xref->setModifiedObject(&infoObj, infoObjRef);
}
}
std::unique_ptr<GooString> PDFDoc::getDocInfoStringEntry(const char *key)
{
Object infoObj = getDocInfo();
if (!infoObj.isDict()) {
return {};
}
const Object entryObj = infoObj.dictLookup(key);
if (!entryObj.isString()) {
return {};
}
return entryObj.getString()->copy();
}
static bool get_id(const GooString *encodedidstring, GooString *id)
{
const char *encodedid = encodedidstring->c_str();
char pdfid[pdfIdLength + 1];
int n;
if (encodedidstring->getLength() != pdfIdLength / 2) {
return false;
}
n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff, encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff,
encodedid[7] & 0xff, encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff, encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
if (n != pdfIdLength) {
return false;
}
id->Set(pdfid, pdfIdLength);
return true;
}
bool PDFDoc::getID(GooString *permanent_id, GooString *update_id) const
{
Object obj = xref->getTrailerDict()->dictLookup("ID");
if (obj.isArray() && obj.arrayGetLength() == 2) {
if (permanent_id) {
Object obj2 = obj.arrayGet(0);
if (obj2.isString()) {
if (!get_id(obj2.getString(), permanent_id)) {
return false;
}
} else {
error(errSyntaxError, -1, "Invalid permanent ID");
return false;
}
}
if (update_id) {
Object obj2 = obj.arrayGet(1);
if (obj2.isString()) {
if (!get_id(obj2.getString(), update_id)) {
return false;
}
} else {
error(errSyntaxError, -1, "Invalid update ID");
return false;
}
}
return true;
}
return false;
}
Hints *PDFDoc::getHints()
{
if (!hints && isLinearized()) {
hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
}
return hints;
}
int PDFDoc::savePageAs(const GooString &name, int pageNo)
{
FILE *f;
if (file && file->modificationTimeChangedSinceOpen()) {
return errFileChangedSinceOpen;
}
int rootNum = getXRef()->getNumObjects() + 1;
// Make sure that special flags are set, because we are going to read
// all objects, including Unencrypted ones.
xref->scanSpecialFlags();
unsigned char *fileKey;
CryptAlgorithm encAlgorithm;
int keyLength;
xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages());
return errOpenFile;
}
const PDFRectangle *cropBox = nullptr;
if (getCatalog()->getPage(pageNo)->isCropped()) {
cropBox = getCatalog()->getPage(pageNo)->getCropBox();
}
replacePageDict(pageNo, getCatalog()->getPage(pageNo)->getRotate(), getCatalog()->getPage(pageNo)->getMediaBox(), cropBox);
Ref *refPage = getCatalog()->getPageRef(pageNo);
Object page = getXRef()->fetch(*refPage);
if (!(f = openFile(name.c_str(), "wb"))) {
error(errIO, -1, "Couldn't open file '{0:t}'", &name);
return errOpenFile;
}
// Calls fclose on f when the fileCloser is destroyed because it goes out of scope
const std::unique_ptr<FILE, FILECloser> fileCloser(f);
const std::unique_ptr<OutStream> outStr = std::make_unique<FileOutStream>(f, 0);
const std::unique_ptr<XRef> yRef = std::make_unique<XRef>(getXRef()->getTrailerDict());
if (secHdlr != nullptr && !secHdlr->isUnencrypted()) {
yRef->setEncryption(secHdlr->getPermissionFlags(), secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
}
const std::unique_ptr<XRef> countRef = std::make_unique<XRef>();
Object *trailerObj = getXRef()->getTrailerDict();
if (trailerObj->isDict()) {
markPageObjects(trailerObj->getDict(), yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2);
}
yRef->add(0, 65535, 0, false);
writeHeader(outStr.get(), getPDFMajorVersion(), getPDFMinorVersion());
// get and mark info dict
Object infoObj = getXRef()->getDocInfo();
if (infoObj.isDict()) {
Dict *infoDict = infoObj.getDict();
markPageObjects(infoDict, yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2);
if (trailerObj->isDict()) {
Dict *trailerDict = trailerObj->getDict();
const Object &ref = trailerDict->lookupNF("Info");
if (ref.isRef()) {
yRef->add(ref.getRef(), 0, true);
if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
}
}
}
}
// get and mark output intents etc.
Object catObj = getXRef()->getCatalog();
if (!catObj.isDict()) {
error(errSyntaxError, -1, "XRef's Catalog is not a dictionary");
return errOpenFile;
}
Dict *catDict = catObj.getDict();
Object pagesObj = catDict->lookup("Pages");
if (!pagesObj.isDict()) {
error(errSyntaxError, -1, "Catalog Pages is not a dictionary");
return errOpenFile;
}
Object afObj = catDict->lookupNF("AcroForm").copy();
if (!afObj.isNull()) {
markAcroForm(&afObj, yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2);
}
Dict *pagesDict = pagesObj.getDict();
Object resourcesObj = pagesDict->lookup("Resources");
if (resourcesObj.isDict()) {
markPageObjects(resourcesObj.getDict(), yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2);
}
if (!markPageObjects(catDict, yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2)) {
error(errSyntaxError, -1, "markPageObjects failed");
return errDamaged;
}
if (!page.isDict()) {
error(errSyntaxError, -1, "page is not a dictionary");
return errOpenFile;
}
Dict *pageDict = page.getDict();
if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
Object *resourceDictObject = getCatalog()->getPage(pageNo)->getResourceDictObject();
if (resourceDictObject->isDict()) {
resourcesObj = resourceDictObject->copy();
markPageObjects(resourcesObj.getDict(), yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2);
}
}
markPageObjects(pageDict, yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2);
Object annotsObj = pageDict->lookupNF("Annots").copy();
if (!annotsObj.isNull()) {
markAnnotations(&annotsObj, yRef.get(), countRef.get(), 0, refPage->num, rootNum + 2);
}
yRef->markUnencrypted();
writePageObjects(outStr.get(), yRef.get(), 0);
yRef->add(rootNum, 0, outStr->getPos(), true);
outStr->printf("%d 0 obj\n", rootNum);
outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
for (int j = 0; j < catDict->getLength(); j++) {
const char *key = catDict->getKey(j);
if (strcmp(key, "Type") != 0 && strcmp(key, "Catalog") != 0 && strcmp(key, "Pages") != 0) {
if (j > 0) {
outStr->printf(" ");
}
Object value = catDict->getValNF(j).copy();
outStr->printf("/%s ", key);
writeObject(&value, outStr.get(), getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
}
}
outStr->printf(">>\nendobj\n");
yRef->add(rootNum + 1, 0, outStr->getPos(), true);
outStr->printf("%d 0 obj\n", rootNum + 1);
outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
if (resourcesObj.isDict()) {
outStr->printf("/Resources ");
writeObject(&resourcesObj, outStr.get(), getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
}
outStr->printf(">>\n");
outStr->printf("endobj\n");
yRef->add(rootNum + 2, 0, outStr->getPos(), true);
outStr->printf("%d 0 obj\n", rootNum + 2);
outStr->printf("<< ");
for (int n = 0; n < pageDict->getLength(); n++) {
if (n > 0) {
outStr->printf(" ");
}
const char *key = pageDict->getKey(n);
Object value = pageDict->getValNF(n).copy();
if (strcmp(key, "Parent") == 0) {
outStr->printf("/Parent %d 0 R", rootNum + 1);
} else {
outStr->printf("/%s ", key);
writeObject(&value, outStr.get(), getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
}
}
outStr->printf(" >>\nendobj\n");
Goffset uxrefOffset = outStr->getPos();
Ref ref;
ref.num = rootNum;
ref.gen = 0;
Object trailerDict = createTrailerDict(rootNum + 3, false, 0, &ref, getXRef(), name.c_str(), uxrefOffset);
writeXRefTableTrailer(std::move(trailerDict), yRef.get(), false /* do not write unnecessary entries */, uxrefOffset, outStr.get(), getXRef());
outStr->close();
return errNone;
}
int PDFDoc::saveAs(const GooString &name, PDFWriteMode mode)
{
FILE *f;
OutStream *outStr;
int res;
if (!(f = openFile(name.c_str(), "wb"))) {
error(errIO, -1, "Couldn't open file '{0:t}'", &name);
return errOpenFile;
}
outStr = new FileOutStream(f, 0);
res = saveAs(outStr, mode);
delete outStr;
fclose(f);
return res;
}
int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode)
{
if (file && file->modificationTimeChangedSinceOpen()) {
return errFileChangedSinceOpen;
}
if (!xref->isModified() && mode == writeStandard) {
// simply copy the original file
saveWithoutChangesAs(outStr);
} else if (mode == writeForceRewrite) {
saveCompleteRewrite(outStr);
} else {
saveIncrementalUpdate(outStr);
}
return errNone;
}
int PDFDoc::saveWithoutChangesAs(const GooString &name)
{
FILE *f;
OutStream *outStr;
int res;
if (!(f = openFile(name.c_str(), "wb"))) {
error(errIO, -1, "Couldn't open file '{0:t}'", &name);
return errOpenFile;
}
outStr = new FileOutStream(f, 0);
res = saveWithoutChangesAs(outStr);
delete outStr;
fclose(f);
return res;
}
int PDFDoc::saveWithoutChangesAs(OutStream *outStr)
{
if (file && file->modificationTimeChangedSinceOpen()) {
return errFileChangedSinceOpen;
}
BaseStream *copyStr = str->copy();
copyStr->reset();
while (copyStr->lookChar() != EOF) {
std::array<unsigned char, 4096> array;
size_t size = copyStr->doGetChars(array.size(), array.data());
auto sizeWritten = outStr->write(std::span(array.data(), size));
if (size != sizeWritten) {
return errFileIO;
}
}
copyStr->close();
delete copyStr;
return errNone;
}
void PDFDoc::saveIncrementalUpdate(OutStream *outStr)
{
// copy the original file
BaseStream *copyStr = str->copy();
copyStr->reset();
while (copyStr->lookChar() != EOF) {
std::array<unsigned char, 4096> array;
size_t size = copyStr->doGetChars(array.size(), array.data());
auto sizeWritten = outStr->write(std::span(array.data(), size));
if (size != sizeWritten) {
// Write error of some sort
}
}
copyStr->close();
delete copyStr;
unsigned char *fileKey;
CryptAlgorithm encAlgorithm;
int keyLength;
xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
XRef *uxref = new XRef();
uxref->add(0, 65535, 0, false);
xref->lock();
for (int i = 0; i < xref->getNumObjects(); i++) {
if ((xref->getEntry(i)->type == xrefEntryFree) && (xref->getEntry(i)->gen == 0)) { // we skip the irrelevant free objects
continue;
}
if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { // we have an updated object
Ref ref;
ref.num = i;
ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
if (xref->getEntry(i)->type != xrefEntryFree) {
Object obj1 = xref->fetch(ref, 1 /* recursion */);
Goffset offset = writeObjectHeader(&ref, outStr);
writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
writeObjectFooter(outStr);
uxref->add(ref, offset, true);
} else {
uxref->add(ref, 0, false);
}
}
}
xref->unlock();
// because of "uxref->add(0, 65535, 0, false);" uxref->getNumObjects() will
// always be >= 1; if it is 1, it means there is nothing to update
if (uxref->getNumObjects() == 1) {
delete uxref;
return;
}
Goffset uxrefOffset = outStr->getPos();
int numobjects = xref->getNumObjects();
const char *fileNameA = fileName ? fileName->c_str() : nullptr;
Ref rootRef, uxrefStreamRef;
rootRef.num = getXRef()->getRootNum();
rootRef.gen = getXRef()->getRootGen();
// Output a xref stream if there is a xref stream already
bool xRefStream = xref->isXRefStream();
if (xRefStream) {
// Append an entry for the xref stream itself
uxrefStreamRef.num = numobjects++;
uxrefStreamRef.gen = 0;
uxref->add(uxrefStreamRef, uxrefOffset, true);
}
Object trailerDict = createTrailerDict(numobjects, true, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
if (xRefStream) {
writeXRefStreamTrailer(std::move(trailerDict), uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
} else {
writeXRefTableTrailer(std::move(trailerDict), uxref, false, uxrefOffset, outStr, getXRef());
}
delete uxref;
}
void PDFDoc::saveCompleteRewrite(OutStream *outStr)
{
// Make sure that special flags are set, because we are going to read
// all objects, including Unencrypted ones.
xref->scanSpecialFlags();
unsigned char *fileKey;
CryptAlgorithm encAlgorithm;
int keyLength;
xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
XRef *uxref = new XRef();
uxref->add(0, 65535, 0, false);
xref->lock();
for (int i = 0; i < xref->getNumObjects(); i++) {
Ref ref;
XRefEntryType type = xref->getEntry(i)->type;
if (type == xrefEntryFree) {
ref.num = i;
ref.gen = xref->getEntry(i)->gen;
/* the XRef class adds a lot of irrelevant free entries, we only want the significant one
and we don't want the one with num=0 because it has already been added (gen = 65535)*/
if (ref.gen > 0 && ref.num > 0) {
uxref->add(ref, 0, false);
}
} else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
// This entry must not be written, put a free entry instead (with incremented gen)
ref.num = i;
ref.gen = xref->getEntry(i)->gen + 1;
uxref->add(ref, 0, false);
} else if (type == xrefEntryUncompressed) {
ref.num = i;
ref.gen = xref->getEntry(i)->gen;
Object obj1 = xref->fetch(ref, 1 /* recursion */);
Goffset offset = writeObjectHeader(&ref, outStr);
// Write unencrypted objects in unencrypted form
if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
writeObject(&obj1, outStr, nullptr, cryptRC4, 0, 0, 0);
} else {
writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
}
writeObjectFooter(outStr);
uxref->add(ref, offset, true);
} else if (type == xrefEntryCompressed) {
ref.num = i;
ref.gen = 0; // compressed entries have gen == 0
Object obj1 = xref->fetch(ref, 1 /* recursion */);
Goffset offset = writeObjectHeader(&ref, outStr);
writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
writeObjectFooter(outStr);
uxref->add(ref, offset, true);
}
}
xref->unlock();
Goffset uxrefOffset = outStr->getPos();
writeXRefTableTrailer(uxrefOffset, uxref, true /* write all entries */, uxref->getNumObjects(), outStr, false /* complete rewrite */);
delete uxref;
}
std::string PDFDoc::sanitizedName(const std::string &name)
{
std::string sanitizedName;
for (const auto c : name) {
if (c <= (char)0x20 || c >= (char)0x7f || c == ' ' || c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || c == ']' || c == '{' || c == '}' || c == '/' || c == '%' || c == '#') {
char buf[8];
sprintf(buf, "#%02x", c & 0xff);
sanitizedName.append(buf);
} else {
sanitizedName.push_back(c);
}
}
return sanitizedName;
}
void PDFDoc::writeDictionary(Dict *dict, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts)
{
bool deleteSet = false;
if (!alreadyWrittenDicts) {
alreadyWrittenDicts = new std::set<Dict *>;
deleteSet = true;
}
if (alreadyWrittenDicts->find(dict) != alreadyWrittenDicts->end()) {
error(errSyntaxWarning, -1, "PDFDoc::writeDictionary: Found recursive dicts");
if (deleteSet) {
delete alreadyWrittenDicts;
}
return;
} else {
alreadyWrittenDicts->insert(dict);
}
outStr->printf("<<");
for (int i = 0; i < dict->getLength(); i++) {
GooString keyName(dict->getKey(i));
outStr->printf("/%s ", sanitizedName(keyName.toStr()).c_str());
Object obj1 = dict->getValNF(i).copy();
writeObject(&obj1, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
}
outStr->printf(">> ");
if (deleteSet) {
delete alreadyWrittenDicts;
}
}
void PDFDoc::writeStream(Stream *str, OutStream *outStr)
{
outStr->printf("stream\r\n");
str->reset();
for (int c = str->getChar(); c != EOF; c = str->getChar()) {
outStr->printf("%c", c);
}
outStr->printf("\r\nendstream\r\n");
}
void PDFDoc::writeRawStream(Stream *str, OutStream *outStr)
{
Object obj1 = str->getDict()->lookup("Length");
if (!obj1.isInt() && !obj1.isInt64()) {
error(errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
return;
}
Goffset length;
if (obj1.isInt()) {
length = obj1.getInt();
} else {
length = obj1.getInt64();
}
outStr->printf("stream\r\n");
str->unfilteredReset();
for (Goffset i = 0; i < length; i++) {
int c = str->getUnfilteredChar();
if (unlikely(c == EOF)) {
error(errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
break;
}
outStr->printf("%c", c);
}
str->reset();
outStr->printf("\r\nendstream\r\n");
}
void PDFDoc::writeString(const GooString *s, OutStream *outStr, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref)
{
// Encrypt string if encryption is enabled
GooString *sEnc = nullptr;
if (fileKey) {
EncryptStream *enc = new EncryptStream(new MemStream(s->c_str(), 0, s->getLength(), Object(objNull)), fileKey, encAlgorithm, keyLength, ref);
sEnc = new GooString();
int c;
enc->reset();
while ((c = enc->getChar()) != EOF) {
sEnc->append((char)c);
}
delete enc;
s = sEnc;
}
// Write data
if (hasUnicodeByteOrderMark(s->toStr())) {
// unicode string don't necessary end with \0
const char *c = s->c_str();
std::stringstream stream;
stream << std::setfill('0') << std::hex;
for (int i = 0; i < s->getLength(); i++) {
stream << std::setw(2) << (0xff & (unsigned int)*(c + i));
}
outStr->printf("<");
outStr->printf("%s", stream.str().c_str());
outStr->printf("> ");
} else {
const char *c = s->c_str();
outStr->printf("(");
for (int i = 0; i < s->getLength(); i++) {
char unescaped = *(c + i) & 0x000000ff;
// escape if needed
if (unescaped == '\r') {
outStr->printf("\\r");
} else if (unescaped == '\n') {
outStr->printf("\\n");
} else {
if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
outStr->printf("%c", '\\');
}
outStr->printf("%c", unescaped);
}
}
outStr->printf(") ");
}
delete sEnc;
}
Goffset PDFDoc::writeObjectHeader(Ref *ref, OutStream *outStr)
{
Goffset offset = outStr->getPos();
outStr->printf("%i %i obj\r\n", ref->num, ref->gen);
return offset;
}
void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict *> *alreadyWrittenDicts)
{
writeObject(obj, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, { objNum, objGen }, alreadyWrittenDicts);
}
void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts)
{
Array *array;
switch (obj->getType()) {
case objBool:
outStr->printf("%s ", obj->getBool() ? "true" : "false");
break;
case objInt:
outStr->printf("%i ", obj->getInt());
break;
case objInt64:
outStr->printf("%lli ", obj->getInt64());
break;
case objReal: {
GooString s;
s.appendf("{0:.10g}", obj->getReal());
outStr->printf("%s ", s.c_str());
break;
}
case objString:
writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, ref);
break;
case objHexString: {
const GooString *s = obj->getHexString();
outStr->printf("<");
for (int i = 0; i < s->getLength(); i++) {
outStr->printf("%02x", s->getChar(i) & 0xff);
}
outStr->printf("> ");
break;
}
case objName: {
GooString name(obj->getName());
outStr->printf("/%s ", sanitizedName(name.toStr()).c_str());
break;
}
case objNull:
outStr->printf("null ");
break;
case objArray:
array = obj->getArray();
outStr->printf("[");
for (int i = 0; i < array->getLength(); i++) {
Object obj1 = array->getNF(i).copy();
writeObject(&obj1, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref);
}
outStr->printf("] ");
break;
case objDict:
writeDictionary(obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
break;
case objStream: {
// We can't modify stream with the current implementation (no write functions in Stream API)
// => the only type of streams which that have been modified are internal streams (=strWeird)
Stream *stream = obj->getStream();
if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
// we write the stream unencoded => TODO: write stream encoder
// Encrypt stream
bool removeFilter = true;
bool addEncryptstream = false;
if (stream->getKind() == strWeird && fileKey) {
Object filter = stream->getDict()->lookup("Filter");
if (!filter.isName("Crypt")) {
if (filter.isArray()) {
for (int i = 0; i < filter.arrayGetLength(); i++) {
Object filterEle = filter.arrayGet(i);
if (filterEle.isName("Crypt")) {
removeFilter = false;
break;
}
}
if (removeFilter) {
addEncryptstream = true;
}
} else {
addEncryptstream = true;
}
} else {
removeFilter = false;
}
} else if (fileKey != nullptr) { // Encrypt stream
addEncryptstream = true;
}
std::unique_ptr<EncryptStream> encStream;
std::unique_ptr<Stream> compressStream;
Object filter = stream->getDict()->lookup("Filter");
if (filter.isName("FlateDecode")) {
compressStream = std::make_unique<FlateEncoder>(stream);
stream = compressStream.get();
removeFilter = false;
}
if (addEncryptstream) {
encStream = std::make_unique<EncryptStream>(stream, fileKey, encAlgorithm, keyLength, ref);
encStream->setAutoDelete(false);
stream = encStream.get();
}
stream->reset();
// recalculate stream length
Goffset tmp = 0;
for (int c = stream->getChar(); c != EOF; c = stream->getChar()) {
tmp++;
}
stream->getDict()->set("Length", Object(tmp));
// Remove Stream encoding
AutoFreeMemStream *internalStream = dynamic_cast<AutoFreeMemStream *>(stream);
if (internalStream && internalStream->isFilterRemovalForbidden()) {
removeFilter = false;
}
if (removeFilter) {
stream->getDict()->remove("Filter");
}
stream->getDict()->remove("DecodeParms");
writeDictionary(stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
writeStream(stream, outStr);
} else if (fileKey != nullptr && stream->getKind() == strFile && static_cast<FileStream *>(stream)->getNeedsEncryptionOnSave()) {
EncryptStream *encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
encStream->setAutoDelete(false);
writeDictionary(encStream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
writeStream(encStream, outStr);
delete encStream;
} else {
// raw stream copy
FilterStream *fs = dynamic_cast<FilterStream *>(stream);
if (fs) {
BaseStream *bs = fs->getBaseStream();
if (bs) {
Goffset streamEnd;
if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
Goffset val = streamEnd - bs->getStart();
stream->getDict()->set("Length", Object(val));
}
}
}
writeDictionary(stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
writeRawStream(stream, outStr);
}
break;
}
case objRef:
outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
break;
case objCmd:
outStr->printf("%s\n", obj->getCmd());
break;
case objError:
outStr->printf("error\r\n");
break;
case objEOF:
outStr->printf("eof\r\n");
break;
case objNone:
outStr->printf("none\r\n");
break;
default:
error(errUnimplemented, -1, "Unhandled objType : {0:d}, please report a bug with a testcase", obj->getType());
break;
}
}
void PDFDoc::writeObjectFooter(OutStream *outStr)
{
outStr->printf("\r\nendobj\r\n");
}
Object PDFDoc::createTrailerDict(int uxrefSize, bool incrUpdate, Goffset startxRef, Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
{
Dict *trailerDict = new Dict(xRef);
trailerDict->set("Size", Object(uxrefSize));
// build a new ID, as recommended in the reference, uses:
// - current time
// - file name
// - file size
// - values of entry in information dictionary
GooString message;
char buffer[256];
sprintf(buffer, "%i", (int)time(nullptr));
message.append(buffer);
if (fileName) {
message.append(fileName);
}
sprintf(buffer, "%lli", (long long)fileSize);
message.append(buffer);
// info dict -- only use text string
if (!xRef->getTrailerDict()->isNone()) {
Object docInfo = xRef->getDocInfo();
if (docInfo.isDict()) {
for (int i = 0; i < docInfo.getDict()->getLength(); i++) {
Object obj2 = docInfo.getDict()->getVal(i);
if (obj2.isString()) {
message.append(obj2.getString());
}
}
}
}
bool hasEncrypt = false;
if (!xRef->getTrailerDict()->isNone()) {
Object obj2 = xRef->getTrailerDict()->dictLookupNF("Encrypt").copy();
if (!obj2.isNull()) {
trailerDict->set("Encrypt", std::move(obj2));
hasEncrypt = true;
}
}
// calculate md5 digest
unsigned char digest[16];
md5((unsigned char *)message.c_str(), message.getLength(), digest);
// create ID array
// In case of encrypted files, the ID must not be changed because it's used to calculate the key
if (incrUpdate || hasEncrypt) {
// only update the second part of the array
Object obj4 = xRef->getTrailerDict()->getDict()->lookup("ID");
if (!obj4.isArray()) {
if (hasEncrypt) {
error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
}
} else {
Array *array = new Array(xRef);
// Get the first part of the ID
array->add(obj4.arrayGet(0));
array->add(Object(new GooString((const char *)digest, 16)));
trailerDict->set("ID", Object(array));
}
} else {
// new file => same values for the two identifiers
Array *array = new Array(xRef);
array->add(Object(new GooString((const char *)digest, 16)));
array->add(Object(new GooString((const char *)digest, 16)));
trailerDict->set("ID", Object(array));
}
trailerDict->set("Root", Object(*root));
if (incrUpdate) {
trailerDict->set("Prev", Object(startxRef));
}
if (!xRef->getTrailerDict()->isNone()) {
Object obj5 = xRef->getDocInfoNF();
if (!obj5.isNull()) {
trailerDict->set("Info", std::move(obj5));
}
}
return Object(trailerDict);
}
void PDFDoc::writeXRefTableTrailer(Object &&trailerDict, XRef *uxref, bool writeAllEntries, Goffset uxrefOffset, OutStream *outStr, XRef *xRef)
{
uxref->writeTableToFile(outStr, writeAllEntries);
outStr->printf("trailer\r\n");
writeDictionary(trailerDict.getDict(), outStr, xRef, 0, nullptr, cryptRC4, 0, { 0, 0 }, nullptr);
outStr->printf("\r\nstartxref\r\n");
outStr->printf("%lli\r\n", uxrefOffset);
outStr->printf("%%%%EOF\r\n");
}
void PDFDoc::writeXRefStreamTrailer(Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream *outStr, XRef *xRef)
{
GooString stmData;
// Fill stmData and some trailerDict fields
uxref->writeStreamToBuffer(&stmData, trailerDict.getDict(), xRef);
// Create XRef stream object and write it
MemStream *mStream = new MemStream(stmData.c_str(), 0, stmData.getLength(), std::move(trailerDict));
writeObjectHeader(uxrefStreamRef, outStr);
Object obj1(static_cast<Stream *>(mStream));
writeObject(&obj1, outStr, xRef, 0, nullptr, cryptRC4, 0, 0, 0);
writeObjectFooter(outStr);
outStr->printf("startxref\r\n");
outStr->printf("%lli\r\n", uxrefOffset);
outStr->printf("%%%%EOF\r\n");
}
void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, bool writeAllEntries, int uxrefSize, OutStream *outStr, bool incrUpdate)
{
const char *fileNameA = fileName ? fileName->c_str() : nullptr;
// file size (doesn't include the trailer)
unsigned int fileSize = 0;
int c;
str->reset();
while ((c = str->getChar()) != EOF) {
fileSize++;
}
str->close();
Ref ref;
ref.num = getXRef()->getRootNum();
ref.gen = getXRef()->getRootGen();
Object trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize);
writeXRefTableTrailer(std::move(trailerDict), uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
}
void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
{
outStr->printf("%%PDF-%d.%d\n", major, minor);
outStr->printf("%%%c%c%c%c\n", 0xE2, 0xE3, 0xCF, 0xD3);
}
bool PDFDoc::markDictionary(Dict *dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
{
bool deleteSet = false;
if (!alreadyMarkedDicts) {
alreadyMarkedDicts = new std::set<Dict *>;
deleteSet = true;
}
if (alreadyMarkedDicts->find(dict) != alreadyMarkedDicts->end()) {
error(errSyntaxWarning, -1, "PDFDoc::markDictionary: Found recursive dicts");
if (deleteSet) {
delete alreadyMarkedDicts;
}
return true;
} else {
alreadyMarkedDicts->insert(dict);
}
for (int i = 0; i < dict->getLength(); i++) {
const char *key = dict->getKey(i);
if (strcmp(key, "Annots") != 0) {
Object obj1 = dict->getValNF(i).copy();
const bool success = markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
if (unlikely(!success)) {
return false;
}
} else {
Object annotsObj = dict->getValNF(i).copy();
if (!annotsObj.isNull()) {
markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum, alreadyMarkedDicts);
}
}
}
if (deleteSet) {
delete alreadyMarkedDicts;
}
return true;
}
bool PDFDoc::markObject(Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
{
Array *array;
switch (obj->getType()) {
case objArray:
array = obj->getArray();
for (int i = 0; i < array->getLength(); i++) {
Object obj1 = array->getNF(i).copy();
const bool success = markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
if (unlikely(!success)) {
return false;
}
}
break;
case objDict: {
const bool success = markDictionary(obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
if (unlikely(!success)) {
return false;
}
} break;
case objStream: {
Stream *stream = obj->getStream();
const bool success = markDictionary(stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
if (unlikely(!success)) {
return false;
}
} break;
case objRef: {
if (obj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
return true; // already marked as free => should be replaced
}
const bool success = xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, true);
if (unlikely(!success)) {
return false;
}
if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
}
}
if (obj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
countRef->add(obj->getRef().num + numOffset, 1, 0, true);
} else {
XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
entry->gen++;
if (entry->gen > 9) {
break;
}
}
Object obj1 = getXRef()->fetch(obj->getRef());
const bool success = markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
if (unlikely(!success)) {
return false;
}
} break;
default:
break;
}
return true;
}
bool PDFDoc::replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox)
{
Ref *refPage = getCatalog()->getPageRef(pageNo);
Object page = getXRef()->fetch(*refPage);
if (!page.isDict()) {
return false;
}
Dict *pageDict = page.getDict();
pageDict->remove("MediaBoxssdf");
pageDict->remove("MediaBox");
pageDict->remove("CropBox");
pageDict->remove("ArtBox");
pageDict->remove("BleedBox");
pageDict->remove("TrimBox");
pageDict->remove("Rotate");
Array *mediaBoxArray = new Array(getXRef());
mediaBoxArray->add(Object(mediaBox->x1));
mediaBoxArray->add(Object(mediaBox->y1));
mediaBoxArray->add(Object(mediaBox->x2));
mediaBoxArray->add(Object(mediaBox->y2));
Object mediaBoxObject(mediaBoxArray);
Object trimBoxObject = mediaBoxObject.copy();
pageDict->add("MediaBox", std::move(mediaBoxObject));
if (cropBox != nullptr) {
Array *cropBoxArray = new Array(getXRef());
cropBoxArray->add(Object(cropBox->x1));
cropBoxArray->add(Object(cropBox->y1));
cropBoxArray->add(Object(cropBox->x2));
cropBoxArray->add(Object(cropBox->y2));
Object cropBoxObject(cropBoxArray);
trimBoxObject = cropBoxObject.copy();
pageDict->add("CropBox", std::move(cropBoxObject));
}
pageDict->add("TrimBox", std::move(trimBoxObject));
pageDict->add("Rotate", Object(rotate));
getXRef()->setModifiedObject(&page, *refPage);
return true;
}
bool PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
{
pageDict->remove("OpenAction");
pageDict->remove("Outlines");
pageDict->remove("StructTreeRoot");
for (int n = 0; n < pageDict->getLength(); n++) {
const char *key = pageDict->getKey(n);
Object value = pageDict->getValNF(n).copy();
if (strcmp(key, "Parent") != 0 && strcmp(key, "Pages") != 0 && strcmp(key, "AcroForm") != 0 && strcmp(key, "Annots") != 0 && strcmp(key, "P") != 0 && strcmp(key, "Root") != 0) {
const bool success = markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
if (unlikely(!success)) {
return false;
}
}
}
return true;
}
bool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set<Dict *> *alreadyMarkedDicts)
{
bool modified = false;
Object annots = annotsObj->fetch(getXRef());
if (annots.isArray()) {
Array *array = annots.getArray();
for (int i = array->getLength() - 1; i >= 0; i--) {
Object obj1 = array->get(i);
if (obj1.isDict()) {
Dict *dict = obj1.getDict();
Object type = dict->lookup("Type");
if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
const Object &obj2 = dict->lookupNF("P");
if (obj2.isRef()) {
if (obj2.getRef().num == oldPageNum) {
const Object &obj3 = array->getNF(i);
if (obj3.isRef()) {
Ref r;
r.num = newPageNum;
r.gen = 0;
dict->set("P", Object(r));
getXRef()->setModifiedObject(&obj1, obj3.getRef());
}
} else if (obj2.getRef().num == newPageNum) {
continue;
} else {
Object page = getXRef()->fetch(obj2.getRef());
if (page.isDict()) {
Dict *pageDict = page.getDict();
Object pagetype = pageDict->lookup("Type");
if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
continue;
}
}
array->remove(i);
modified = true;
continue;
}
}
}
markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum, alreadyMarkedDicts);
}
obj1 = array->getNF(i).copy();
if (obj1.isRef()) {
if (obj1.getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
continue; // already marked as free => should be replaced
}
xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, true);
if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
}
}
if (obj1.getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
countRef->add(obj1.getRef().num + numOffset, 1, 0, true);
} else {
XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
entry->gen++;
}
}
}
}
if (annotsObj->isRef()) {
if (annotsObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
return modified; // already marked as free => should be replaced
}
xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, true);
if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
}
}
if (annotsObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
countRef->add(annotsObj->getRef().num + numOffset, 1, 0, true);
} else {
XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
entry->gen++;
}
getXRef()->setModifiedObject(&annots, annotsObj->getRef());
}
return modified;
}
void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum)
{
bool modified = false;
Object acroform = afObj->fetch(getXRef());
if (acroform.isDict()) {
Dict *dict = acroform.getDict();
for (int i = 0; i < dict->getLength(); i++) {
if (strcmp(dict->getKey(i), "Fields") == 0) {
Object fields = dict->getValNF(i).copy();
modified = markAnnotations(&fields, xRef, countRef, numOffset, oldRefNum, newRefNum);
} else {
Object obj = dict->getValNF(i).copy();
markObject(&obj, xRef, countRef, numOffset, oldRefNum, newRefNum);
}
}
}
if (afObj->isRef()) {
if (afObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
return; // already marked as free => should be replaced
}
xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, true);
if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
}
}
if (afObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
countRef->add(afObj->getRef().num + numOffset, 1, 0, true);
} else {
XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
entry->gen++;
}
if (modified) {
getXRef()->setModifiedObject(&acroform, afObj->getRef());
}
}
}
unsigned int PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine)
{
unsigned int objectsCount = 0; // count the number of objects in the XRef(s)
unsigned char *fileKey;
CryptAlgorithm encAlgorithm;
int keyLength;
xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
for (int n = numOffset; n < xRef->getNumObjects(); n++) {
if (xRef->getEntry(n)->type != xrefEntryFree) {
Ref ref;
ref.num = n;
ref.gen = xRef->getEntry(n)->gen;
objectsCount++;
Object obj = getXRef()->fetch(ref.num - numOffset, ref.gen);
Goffset offset = writeObjectHeader(&ref, outStr);
if (combine) {
writeObject(&obj, outStr, getXRef(), numOffset, nullptr, cryptRC4, 0, 0, 0);
} else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
writeObject(&obj, outStr, nullptr, cryptRC4, 0, 0, 0);
} else {
writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref);
}
writeObjectFooter(outStr);
xRef->add(ref, offset, true);
}
}
return objectsCount;
}
Outline *PDFDoc::getOutline()
{
if (!outline) {
pdfdocLocker();
// read outline
outline = new Outline(catalog->getOutline(), xref, this);
}
return outline;
}
std::unique_ptr<PDFDoc> PDFDoc::ErrorPDFDoc(int errorCode, std::unique_ptr<GooString> &&fileNameA)
{
// We cannot call std::make_unique here because the PDFDoc constructor is private
PDFDoc *doc = new PDFDoc();
doc->errCode = errorCode;
doc->fileName = std::move(fileNameA);
return std::unique_ptr<PDFDoc>(doc);
}
long long PDFDoc::strToLongLong(const char *s)
{
long long x, d;
const char *p;
x = 0;
for (p = s; *p && isdigit(*p & 0xff); ++p) {
d = *p - '0';
if (x > (LLONG_MAX - d) / 10) {
break;
}
x = 10 * x + d;
}
return x;
}
// Read the 'startxref' position.
Goffset PDFDoc::getStartXRef(bool tryingToReconstruct)
{
if (startXRefPos == -1) {
if (isLinearized(tryingToReconstruct)) {
char buf[linearizationSearchSize + 1];
int c, n, i;
str->setPos(0);
for (n = 0; n < linearizationSearchSize; ++n) {
if ((c = str->getChar()) == EOF) {
break;
}
buf[n] = c;
}
buf[n] = '\0';
// find end of first obj (linearization dictionary)
startXRefPos = 0;
for (i = 0; i < n; i++) {
if (!strncmp("endobj", &buf[i], 6)) {
i += 6;
// skip whitespace
while (buf[i] && Lexer::isSpace(buf[i])) {
++i;
}
startXRefPos = i;
break;
}
}
} else {
char buf[xrefSearchSize + 1];
const char *p;
int c, n, i;
// read last xrefSearchSize bytes
int segnum = 0;
int maxXRefSearch = 24576;
if (str->getLength() < maxXRefSearch) {
maxXRefSearch = static_cast<int>(str->getLength());
}
for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
for (n = 0; n < xrefSearchSize; ++n) {
if ((c = str->getChar()) == EOF) {
break;
}
buf[n] = c;
}
buf[n] = '\0';
// find startxref
for (i = n - 9; i >= 0; --i) {
if (!strncmp(&buf[i], "startxref", 9)) {
break;
}
}
if (i < 0) {
startXRefPos = 0;
} else {
for (p = &buf[i + 9]; isspace(*p); ++p) {
;
}
startXRefPos = strToLongLong(p);
break;
}
}
}
}
return startXRefPos;
}
Goffset PDFDoc::getMainXRefEntriesOffset(bool tryingToReconstruct)
{
unsigned int mainXRefEntriesOffset = 0;
if (isLinearized(tryingToReconstruct)) {
mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
}
return mainXRefEntriesOffset;
}
int PDFDoc::getNumPages()
{
if (isLinearized()) {
int n;
if ((n = getLinearization()->getNumPages())) {
return n;
}
}
return catalog->getNumPages();
}
Page *PDFDoc::parsePage(int page)
{
Ref pageRef;
pageRef.num = getHints()->getPageObjectNum(page);
if (!pageRef.num) {
error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
return nullptr;
}
// check for bogus ref - this can happen in corrupted PDF files
if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
return nullptr;
}
pageRef.gen = xref->getEntry(pageRef.num)->gen;
Object obj = xref->fetch(pageRef);
if (!obj.isDict("Page")) {
error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
return nullptr;
}
Dict *pageDict = obj.getDict();
return new Page(this, page, std::move(obj), pageRef, new PageAttrs(nullptr, pageDict), catalog->getForm());
}
Page *PDFDoc::getPage(int page)
{
if ((page < 1) || page > getNumPages()) {
return nullptr;
}
if (isLinearized() && checkLinearization()) {
pdfdocLocker();
if (!pageCache) {
pageCache = (Page **)gmallocn(getNumPages(), sizeof(Page *));
for (int i = 0; i < getNumPages(); i++) {
pageCache[i] = nullptr;
}
}
if (!pageCache[page - 1]) {
pageCache[page - 1] = parsePage(page);
}
if (pageCache[page - 1]) {
return pageCache[page - 1];
} else {
error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
}
}
return catalog->getPage(page);
}
bool PDFDoc::hasJavascript()
{
JSInfo jsInfo(this);
jsInfo.scanJS(getNumPages(), true);
return jsInfo.containsJS();
}
std::optional<PDFDoc::SignatureData> PDFDoc::createSignature(::Page *destPage, std::unique_ptr<GooString> &&partialFieldName, const PDFRectangle &rect, const GooString &signatureText, const GooString &signatureTextLeft, double fontSize,
double leftFontSize, std::unique_ptr<AnnotColor> &&fontColor, double borderWidth, std::unique_ptr<AnnotColor> &&borderColor, std::unique_ptr<AnnotColor> &&backgroundColor,
const std::string &imagePath)
{
if (destPage == nullptr) {
return std::nullopt;
}
Ref imageResourceRef = Ref::INVALID();
if (!imagePath.empty()) {
imageResourceRef = ImageEmbeddingUtils::embed(xref, imagePath);
if (imageResourceRef == Ref::INVALID()) {
return std::nullopt;
}
}
Form *form = catalog->getCreateForm();
const std::string pdfFontName = form->findPdfFontNameToUseForSigning();
if (pdfFontName.empty()) {
return std::nullopt;
}
const DefaultAppearance da { { objName, pdfFontName.c_str() }, fontSize, std::move(fontColor) };
Object annotObj = Object(new Dict(getXRef()));
annotObj.dictSet("Type", Object(objName, "Annot"));
annotObj.dictSet("Subtype", Object(objName, "Widget"));
annotObj.dictSet("FT", Object(objName, "Sig"));
annotObj.dictSet("T", Object(std::move(partialFieldName)));
Array *rectArray = new Array(getXRef());
rectArray->add(Object(rect.x1));
rectArray->add(Object(rect.y1));
rectArray->add(Object(rect.x2));
rectArray->add(Object(rect.y2));
annotObj.dictSet("Rect", Object(rectArray));
const std::string daStr = da.toAppearanceString();
annotObj.dictSet("DA", Object(new GooString(daStr)));
const Ref ref = getXRef()->addIndirectObject(annotObj);
catalog->addFormToAcroForm(ref);
catalog->setAcroFormModified();
form->ensureFontsForAllCharacters(&signatureText, pdfFontName);
form->ensureFontsForAllCharacters(&signatureTextLeft, pdfFontName);
std::unique_ptr<::FormFieldSignature> field = std::make_unique<::FormFieldSignature>(this, std::move(annotObj), ref, nullptr, nullptr);
field->setCustomAppearanceContent(signatureText);
field->setCustomAppearanceLeftContent(signatureTextLeft);
field->setCustomAppearanceLeftFontSize(leftFontSize);
field->setImageResource(imageResourceRef);
Object refObj(ref);
AnnotWidget *signatureAnnot = new AnnotWidget(this, field->getObj(), &refObj, field.get());
signatureAnnot->setFlags(signatureAnnot->getFlags() | Annot::flagPrint | /*Annot::flagLocked | TODO */ Annot::flagNoRotate);
Dict dummy(getXRef());
auto appearCharacs = std::make_unique<AnnotAppearanceCharacs>(&dummy);
appearCharacs->setBorderColor(std::move(borderColor));
appearCharacs->setBackColor(std::move(backgroundColor));
signatureAnnot->setAppearCharacs(std::move(appearCharacs));
std::unique_ptr<AnnotBorder> border(new AnnotBorderArray());
border->setWidth(borderWidth);
signatureAnnot->setBorder(std::move(border));
signatureAnnot->generateFieldAppearance();
signatureAnnot->updateAppearanceStream();
FormWidget *formWidget = field->getWidget(field->getNumWidgets() - 1);
formWidget->setWidgetAnnotation(signatureAnnot);
return SignatureData { { ref.num, ref.gen }, signatureAnnot, formWidget, std::move(field) };
}
std::optional<CryptoSign::SigningError> PDFDoc::sign(const std::string &saveFilename, const std::string &certNickname, const std::string &password, std::unique_ptr<GooString> &&partialFieldName, int page, const PDFRectangle &rect,
const GooString &signatureText, const GooString &signatureTextLeft, double fontSize, double leftFontSize, std::unique_ptr<AnnotColor> &&fontColor, double borderWidth,
std::unique_ptr<AnnotColor> &&borderColor, std::unique_ptr<AnnotColor> &&backgroundColor, const GooString *reason, const GooString *location, const std::string &imagePath,
const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword)
{
::Page *destPage = getPage(page);
if (destPage == nullptr) {
return CryptoSign::SigningError::InternalError;
}
std::optional<SignatureData> sig =
createSignature(destPage, std::move(partialFieldName), rect, signatureText, signatureTextLeft, fontSize, leftFontSize, std::move(fontColor), borderWidth, std::move(borderColor), std::move(backgroundColor), imagePath);
if (!sig) {
return CryptoSign::SigningError::GenericError; /*This should probably be expanded with error handling from createSignature*/
}
sig->annotWidget->setFlags(sig->annotWidget->getFlags() | Annot::flagLocked);
// say that there a now signatures and that we should append only
catalog->getAcroForm()->dictSet("SigFlags", Object(3));
destPage->addAnnot(sig->annotWidget);
FormWidgetSignature *fws = dynamic_cast<FormWidgetSignature *>(sig->formWidget);
if (fws) {
const auto res = fws->signDocument(saveFilename, certNickname, password, reason, location, ownerPassword, userPassword);
// Now remove the signature stuff in case the user wants to continue editing stuff
// So the document object is clean
const Object &vRefObj = sig->field->getObj()->dictLookupNF("V");
if (vRefObj.isRef()) {
getXRef()->removeIndirectObject(vRefObj.getRef());
}
destPage->removeAnnot(sig->annotWidget);
catalog->removeFormFromAcroForm(sig->ref);
getXRef()->removeIndirectObject(sig->ref);
return res;
}
return CryptoSign::SigningError::InternalError;
}