blob: ba64296eb4d5e50f1e05bfbb210f013f190b6899 [file] [log] [blame]
//========================================================================
//
// PDFDoc.h
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
// Copyright (C) 2005, 2009, 2014, 2015, 2017-2019 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
// Copyright (C) 2008 Pino Toscano <pino@kde.org>
// Copyright (C) 2008 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
// Copyright (C) 2010, 2014 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
// Copyright (C) 2011, 2013, 2014, 2016 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
// Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2013, 2018 Adam Reichold <adamreichold@myopera.com>
// Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
// Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
// Copyright (C) 2015 André Esser <bepandre@hotmail.com>
// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#ifndef PDFDOC_H
#define PDFDOC_H
#include <mutex>
#include "poppler-config.h"
#include <stdio.h>
#include "XRef.h"
#include "Catalog.h"
#include "Page.h"
#include "Annot.h"
#include "Form.h"
#include "OptionalContent.h"
#include "Stream.h"
class GooString;
class GooFile;
class BaseStream;
class OutputDev;
class Links;
class LinkAction;
class LinkDest;
class Outline;
class Linearization;
class SecurityHandler;
class Hints;
class StructTreeRoot;
enum PDFWriteMode {
writeStandard,
writeForceRewrite,
writeForceIncremental
};
enum PDFSubtype {
subtypeNull,
subtypePDFA,
subtypePDFE,
subtypePDFUA,
subtypePDFVT,
subtypePDFX,
subtypeNone
};
enum PDFSubtypePart {
subtypePartNull,
subtypePart1,
subtypePart2,
subtypePart3,
subtypePart4,
subtypePart5,
subtypePart6,
subtypePart7,
subtypePart8,
subtypePartNone
};
enum PDFSubtypeConformance {
subtypeConfNull,
subtypeConfA,
subtypeConfB,
subtypeConfG,
subtypeConfN,
subtypeConfP,
subtypeConfPG,
subtypeConfU,
subtypeConfNone
};
//------------------------------------------------------------------------
// PDFDoc
//------------------------------------------------------------------------
class PDFDoc {
public:
PDFDoc(const GooString *fileNameA, const GooString *ownerPassword = nullptr,
const GooString *userPassword = nullptr, void *guiDataA = nullptr);
#ifdef _WIN32
PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword = nullptr,
GooString *userPassword = nullptr, void *guiDataA = nullptr);
#endif
PDFDoc(BaseStream *strA, const GooString *ownerPassword = nullptr,
const GooString *userPassword = nullptr, void *guiDataA = nullptr);
~PDFDoc();
PDFDoc(const PDFDoc &) = delete;
PDFDoc& operator=(const PDFDoc &) = delete;
static PDFDoc *ErrorPDFDoc(int errorCode, const GooString *fileNameA = nullptr);
// Was PDF document successfully opened?
bool isOk() const { return ok; }
// Get the error code (if isOk() returns false).
int getErrorCode() const { return errCode; }
// Get the error code returned by fopen() (if getErrorCode() ==
// errOpenFile).
int getFopenErrno() const { return fopenErrno; }
// Get file name.
const GooString *getFileName() const { return fileName; }
#ifdef _WIN32
wchar_t *getFileNameU() { return fileNameU; }
#endif
// Get the linearization table.
Linearization *getLinearization();
bool checkLinearization();
// Get the xref table.
XRef *getXRef() const { return xref; }
// Get catalog.
Catalog *getCatalog() const { return catalog; }
// Get optional content configuration
OCGs *getOptContentConfig() const { return catalog->getOptContentConfig(); }
// Get base stream.
BaseStream *getBaseStream() const { return str; }
// Get page parameters.
double getPageMediaWidth(int page)
{ return getPage(page) ? getPage(page)->getMediaWidth() : 0.0 ; }
double getPageMediaHeight(int page)
{ return getPage(page) ? getPage(page)->getMediaHeight() : 0.0 ; }
double getPageCropWidth(int page)
{ return getPage(page) ? getPage(page)->getCropWidth() : 0.0 ; }
double getPageCropHeight(int page)
{ return getPage(page) ? getPage(page)->getCropHeight() : 0.0 ; }
int getPageRotate(int page)
{ return getPage(page) ? getPage(page)->getRotate() : 0 ; }
// Get number of pages.
int getNumPages();
// Return the contents of the metadata stream, or nullptr if there is
// no metadata.
const GooString *readMetadata() const { return catalog->readMetadata(); }
// Return the structure tree root object.
const StructTreeRoot *getStructTreeRoot() const { return catalog->getStructTreeRoot(); }
// Get page.
Page *getPage(int page);
// Display a page.
void displayPage(OutputDev *out, int page,
double hDPI, double vDPI, int rotate,
bool useMediaBox, bool crop, bool printing,
bool (*abortCheckCbk)(void *data) = nullptr,
void *abortCheckCbkData = nullptr,
bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr,
void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false);
// Display a range of pages.
void displayPages(OutputDev *out, int firstPage, int lastPage,
double hDPI, double vDPI, int rotate,
bool useMediaBox, bool crop, bool printing,
bool (*abortCheckCbk)(void *data) = nullptr,
void *abortCheckCbkData = nullptr,
bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr,
void *annotDisplayDecideCbkData = nullptr);
// Display part of a page.
void displayPageSlice(OutputDev *out, int page,
double hDPI, double vDPI, int rotate,
bool useMediaBox, bool crop, bool printing,
int sliceX, int sliceY, int sliceW, int sliceH,
bool (*abortCheckCbk)(void *data) = nullptr,
void *abortCheckCbkData = nullptr,
bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr,
void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false);
// Find a page, given its object ID. Returns page number, or 0 if
// not found.
int findPage(const Ref ref) { return catalog->findPage(ref); }
// Returns the links for the current page, transferring ownership to
// the caller.
Links *getLinks(int page);
// Find a named destination. Returns the link destination, or
// nullptr if <name> is not a destination.
LinkDest *findDest(const GooString *name)
{ return catalog->findDest(name); }
// Process the links for a page.
void processLinks(OutputDev *out, int page);
// Return the outline object.
Outline *getOutline();
// Is the file encrypted?
bool isEncrypted() { return xref->isEncrypted(); }
std::vector<FormWidgetSignature*> getSignatureWidgets();
// Check various permissions.
bool okToPrint(bool ignoreOwnerPW = false)
{ return xref->okToPrint(ignoreOwnerPW); }
bool okToPrintHighRes(bool ignoreOwnerPW = false)
{ return xref->okToPrintHighRes(ignoreOwnerPW); }
bool okToChange(bool ignoreOwnerPW = false)
{ return xref->okToChange(ignoreOwnerPW); }
bool okToCopy(bool ignoreOwnerPW = false)
{ return xref->okToCopy(ignoreOwnerPW); }
bool okToAddNotes(bool ignoreOwnerPW = false)
{ return xref->okToAddNotes(ignoreOwnerPW); }
bool okToFillForm(bool ignoreOwnerPW = false)
{ return xref->okToFillForm(ignoreOwnerPW); }
bool okToAccessibility(bool ignoreOwnerPW = false)
{ return xref->okToAccessibility(ignoreOwnerPW); }
bool okToAssemble(bool ignoreOwnerPW = false)
{ return xref->okToAssemble(ignoreOwnerPW); }
// Is this document linearized?
bool isLinearized(bool tryingToReconstruct = false);
// Return the document's Info dictionary (if any).
Object getDocInfo() { return xref->getDocInfo(); }
Object getDocInfoNF() { return xref->getDocInfoNF(); }
// Create and return the document's Info dictionary if none exists.
// Otherwise return the existing one.
Object createDocInfoIfNoneExists() { return xref->createDocInfoIfNoneExists(); }
// Remove the document's Info dictionary and update the trailer dictionary.
void removeDocInfo() { xref->removeDocInfo(); }
// Set doc info string entry. nullptr or empty value will cause a removal.
// Takes ownership of value.
void setDocInfoStringEntry(const char *key, GooString *value);
// Set document's properties in document's Info dictionary.
// nullptr or empty value will cause a removal.
// Takes ownership of value.
void setDocInfoTitle(GooString *title) { setDocInfoStringEntry("Title", title); }
void setDocInfoAuthor(GooString *author) { setDocInfoStringEntry("Author", author); }
void setDocInfoSubject(GooString *subject) { setDocInfoStringEntry("Subject", subject); }
void setDocInfoKeywords(GooString *keywords) { setDocInfoStringEntry("Keywords", keywords); }
void setDocInfoCreator(GooString *creator) { setDocInfoStringEntry("Creator", creator); }
void setDocInfoProducer(GooString *producer) { setDocInfoStringEntry("Producer", producer); }
void setDocInfoCreatDate(GooString *creatDate) { setDocInfoStringEntry("CreationDate", creatDate); }
void setDocInfoModDate(GooString *modDate) { setDocInfoStringEntry("ModDate", modDate); }
// Get document's properties from document's Info dictionary.
// Returns nullptr on fail.
// Returned GooStrings should be freed by the caller.
GooString *getDocInfoStringEntry(const char *key);
GooString *getDocInfoTitle() { return getDocInfoStringEntry("Title"); }
GooString *getDocInfoAuthor() { return getDocInfoStringEntry("Author"); }
GooString *getDocInfoSubject() { return getDocInfoStringEntry("Subject"); }
GooString *getDocInfoKeywords() { return getDocInfoStringEntry("Keywords"); }
GooString *getDocInfoCreator() { return getDocInfoStringEntry("Creator"); }
GooString *getDocInfoProducer() { return getDocInfoStringEntry("Producer"); }
GooString *getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); }
GooString *getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); }
// Return the PDF subtype, part, and conformance
PDFSubtype getPDFSubtype() const { return pdfSubtype; }
PDFSubtypePart getPDFSubtypePart() const { return pdfPart; }
PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; }
// Return the PDF version specified by the file.
int getPDFMajorVersion() const { return pdfMajorVersion; }
int getPDFMinorVersion() const { return pdfMinorVersion; }
//Return the PDF ID in the trailer dictionary (if any).
bool getID(GooString *permanent_id, GooString *update_id) const;
// Save one page with another name.
int savePageAs(const GooString *name, int pageNo);
// Save this file with another name.
int saveAs(const GooString *name, PDFWriteMode mode=writeStandard);
// Save this file in the given output stream.
int saveAs(OutStream *outStr, PDFWriteMode mode=writeStandard);
// Save this file with another name without saving changes
int saveWithoutChangesAs(const GooString *name);
// Save this file in the given output stream without saving changes
int saveWithoutChangesAs(OutStream *outStr);
// Return a pointer to the GUI (XPDFCore or WinPDFCore object).
void *getGUIData() { return guiData; }
// rewrite pageDict with MediaBox, CropBox and new page CTM
void replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox);
void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts = nullptr);
bool markAnnotations(Object *annots, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set<Dict*> *alreadyMarkedDicts = nullptr);
void markAcroForm(Object *acrpForm, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum);
// write all objects used by pageDict to outStr
unsigned int writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine = false);
static void writeObject (Object *obj, OutStream* outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey,
CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts = nullptr);
static void writeObject (Object *obj, OutStream* outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey,
CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict*> *alreadyWrittenDicts = nullptr);
static void writeHeader(OutStream *outStr, int major, int minor);
static Object createTrailerDict (int uxrefSize, bool incrUpdate, Goffset startxRef,
Ref *root, XRef *xRef, const char *fileName, Goffset fileSize);
static void writeXRefTableTrailer (Object &&trailerDict, XRef *uxref, bool writeAllEntries,
Goffset uxrefOffset, OutStream* outStr, XRef *xRef);
static void writeXRefStreamTrailer (Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef,
Goffset uxrefOffset, OutStream* outStr, XRef *xRef);
private:
// insert referenced objects in XRef
void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts);
void markObject (Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts = nullptr);
static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey,
CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict*> *alreadyWrittenDicts);
// Write object header to current file stream and return its offset
static Goffset writeObjectHeader (Ref *ref, OutStream* outStr);
static void writeObjectFooter (OutStream* outStr);
inline void writeObject (Object *obj, OutStream* outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm,
int keyLength, int objNum, int objGen)
{ writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, {objNum, objGen}); }
inline void writeObject (Object *obj, OutStream* outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm,
int keyLength, Ref ref)
{ writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, ref); }
static void writeStream (Stream* str, OutStream* outStr);
static void writeRawStream (Stream* str, OutStream* outStr);
void writeXRefTableTrailer (Goffset uxrefOffset, XRef *uxref, bool writeAllEntries,
int uxrefSize, OutStream* outStr, bool incrUpdate);
static void writeString (const GooString* s, OutStream* outStr, const unsigned char *fileKey,
CryptAlgorithm encAlgorithm, int keyLength, Ref ref);
void saveIncrementalUpdate (OutStream* outStr);
void saveCompleteRewrite (OutStream* outStr);
Page *parsePage(int page);
// Get hints.
Hints *getHints();
PDFDoc();
void init();
bool setup(const GooString *ownerPassword, const GooString *userPassword);
bool checkFooter();
void checkHeader();
bool checkEncryption(const GooString *ownerPassword, const GooString *userPassword);
void extractPDFSubtype();
// Get the offset of the start xref table.
Goffset getStartXRef(bool tryingToReconstruct = false);
// Get the offset of the entries in the main XRef table of a
// linearized document (0 for non linearized documents).
Goffset getMainXRefEntriesOffset(bool tryingToReconstruct = false);
long long strToLongLong(const char *s);
// Mark the document's Info dictionary as modified.
void setDocInfoModified(Object *infoObj);
const GooString *fileName;
#ifdef _WIN32
wchar_t *fileNameU;
#endif
GooFile *file;
BaseStream *str;
void *guiData;
int pdfMajorVersion;
int pdfMinorVersion;
PDFSubtype pdfSubtype;
PDFSubtypePart pdfPart;
PDFSubtypeConformance pdfConformance;
Linearization *linearization;
// linearizationState = 0: unchecked
// linearizationState = 1: checked and valid
// linearizationState = 2: checked and invalid
int linearizationState;
XRef *xref;
SecurityHandler *secHdlr;
Catalog *catalog;
Hints *hints;
Outline *outline;
Page **pageCache;
bool ok;
int errCode;
//If there is an error opening the PDF file with fopen() in the constructor,
//then the POSIX errno will be here.
int fopenErrno;
Goffset startXRefPos; // offset of last xref table
mutable std::recursive_mutex mutex;
};
#endif