blob: 1c8538534d52b919866439526f1c3317ae935d3c [file] [log] [blame] [edit]
//========================================================================
//
// PDFDoc.cc
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
// Copyright (C) 2005, 2007-2009, 2011 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
// Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
// Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
// Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
// Copyright (C) 2010, 2011 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 Jakub Wilk <ubanus@users.sf.net>
// Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
// Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
// Copyright (C) 2011, 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#include <config.h>
#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif
#include <ctype.h>
#include <locale.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <time.h>
#ifdef _WIN32
# include <windows.h>
#endif
#include <sys/stat.h>
#include "goo/gstrtod.h"
#include "goo/GooString.h"
#include "poppler-config.h"
#include "GlobalParams.h"
#include "Page.h"
#include "Catalog.h"
#include "Stream.h"
#include "XRef.h"
#include "Linearization.h"
#include "Link.h"
#include "OutputDev.h"
#include "Error.h"
#include "ErrorCodes.h"
#include "Lexer.h"
#include "Parser.h"
#include "SecurityHandler.h"
#include "Decrypt.h"
#ifndef DISABLE_OUTLINE
#include "Outline.h"
#endif
#include "PDFDoc.h"
#include "Hints.h"
//------------------------------------------------------------------------
#define headerSearchSize 1024 // read this many bytes at beginning of
// file to look for '%PDF'
#define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
#define linearizationSearchSize 1024 // read this many bytes at beginning of
// file to look for linearization
// dictionary
#define xrefSearchSize 1024 // read this many bytes at end of file
// to look for 'startxref'
//------------------------------------------------------------------------
// PDFDoc
//------------------------------------------------------------------------
void PDFDoc::init()
{
ok = gFalse;
errCode = errNone;
fileName = NULL;
file = NULL;
str = NULL;
xref = NULL;
linearization = NULL;
catalog = NULL;
hints = NULL;
#ifndef DISABLE_OUTLINE
outline = NULL;
#endif
startXRefPos = ~(Guint)0;
secHdlr = NULL;
pageCache = NULL;
}
PDFDoc::PDFDoc()
{
init();
}
PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
GooString *userPassword, void *guiDataA) {
Object obj;
int size = 0;
init();
fileName = fileNameA;
guiData = guiDataA;
struct stat buf;
if (stat(fileName->getCString(), &buf) == 0) {
size = buf.st_size;
}
// try to open file
#ifdef VMS
file = fopen(fileName->getCString(), "rb", "ctx=stm");
#else
file = fopen(fileName->getCString(), "rb");
#endif
if (file == NULL) {
// fopen() has failed.
// Keep a copy of the errno returned by fopen so that it can be
// referred to later.
fopenErrno = errno;
error(-1, "Couldn't open file '%s': %s.", fileName->getCString(),
strerror(errno));
errCode = errOpenFile;
return;
}
// create stream
obj.initNull();
str = new FileStream(file, 0, gFalse, size, &obj);
ok = setup(ownerPassword, userPassword);
}
#ifdef _WIN32
PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
GooString *userPassword, void *guiDataA) {
OSVERSIONINFO version;
wchar_t fileName2[MAX_PATH + 1];
Object obj;
int i;
init();
guiData = guiDataA;
//~ file name should be stored in Unicode (?)
fileName = new GooString();
for (i = 0; i < fileNameLen; ++i) {
fileName->append((char)fileNameA[i]);
}
// zero-terminate the file name string
for (i = 0; i < fileNameLen && i < MAX_PATH; ++i) {
fileName2[i] = fileNameA[i];
}
fileName2[i] = 0;
// try to open file
// NB: _wfopen is only available in NT
struct _stat buf;
int size;
version.dwOSVersionInfoSize = sizeof(version);
GetVersionEx(&version);
if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (_wstat(fileName2, &buf) == 0) {
size = buf.st_size;
}
file = _wfopen(fileName2, L"rb");
} else {
if (_stat(fileName->getCString(), &buf) == 0) {
size = buf.st_size;
}
file = fopen(fileName->getCString(), "rb");
}
if (!file) {
error(-1, "Couldn't open file '%s'", fileName->getCString());
errCode = errOpenFile;
return;
}
// create stream
obj.initNull();
str = new FileStream(file, 0, gFalse, size, &obj);
ok = setup(ownerPassword, userPassword);
}
#endif
PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
GooString *userPassword, void *guiDataA) {
init();
guiData = guiDataA;
if (strA->getFileName()) {
fileName = strA->getFileName()->copy();
} else {
fileName = NULL;
}
str = strA;
ok = setup(ownerPassword, userPassword);
}
GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
str->setPos(0, -1);
if (str->getPos() < 0)
{
error(-1, "Document base stream is not seekable");
return gFalse;
}
str->reset();
// check footer
// Adobe does not seem to enforce %%EOF, so we do the same
// if (!checkFooter()) return gFalse;
// check header
checkHeader();
GBool wasReconstructed = false;
// read xref table
xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
if (!xref->isOk()) {
error(-1, "Couldn't read xref table");
errCode = xref->getErrorCode();
return gFalse;
}
// check for encryption
if (!checkEncryption(ownerPassword, userPassword)) {
errCode = errEncrypted;
return gFalse;
}
// read catalog
catalog = new Catalog(xref);
if (catalog && !catalog->isOk()) {
if (!wasReconstructed)
{
// try one more time to contruct the Catalog, maybe the problem is damaged XRef
delete catalog;
delete xref;
xref = new XRef(str, 0, 0, NULL, true);
catalog = new Catalog(xref);
}
if (catalog && !catalog->isOk()) {
error(-1, "Couldn't read page catalog");
errCode = errBadCatalog;
return gFalse;
}
}
// done
return gTrue;
}
PDFDoc::~PDFDoc() {
if (pageCache) {
for (int i = 0; i < getNumPages(); i++) {
if (pageCache[i]) {
delete pageCache[i];
}
}
gfree(pageCache);
}
delete secHdlr;
#ifndef DISABLE_OUTLINE
if (outline) {
delete outline;
}
#endif
if (catalog) {
delete catalog;
}
if (xref) {
delete xref;
}
if (hints) {
delete hints;
}
if (linearization) {
delete linearization;
}
if (str) {
delete str;
}
if (file) {
fclose(file);
}
if (fileName) {
delete fileName;
}
}
// Check for a %%EOF at the end of this stream
GBool PDFDoc::checkFooter() {
// we look in the last 1024 chars because Adobe does the same
char *eof = new char[1025];
int pos = str->getPos();
str->setPos(1024, -1);
int i, ch;
for (i = 0; i < 1024; i++)
{
ch = str->getChar();
if (ch == EOF)
break;
eof[i] = ch;
}
eof[i] = '\0';
bool found = false;
for (i = i - 5; i >= 0; i--) {
if (strncmp (&eof[i], "%%EOF", 5) == 0) {
found = true;
break;
}
}
if (!found)
{
error(-1, "Document has not the mandatory ending %%EOF");
errCode = errDamaged;
delete[] eof;
return gFalse;
}
delete[] eof;
str->setPos(pos);
return gTrue;
}
// Check for a PDF header on this stream. Skip past some garbage
// if necessary.
void PDFDoc::checkHeader() {
char hdrBuf[headerSearchSize+1];
char *p;
char *tokptr;
int i;
pdfMajorVersion = 0;
pdfMinorVersion = 0;
for (i = 0; i < headerSearchSize; ++i) {
hdrBuf[i] = str->getChar();
}
hdrBuf[headerSearchSize] = '\0';
for (i = 0; i < headerSearchSize - 5; ++i) {
if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
break;
}
}
if (i >= headerSearchSize - 5) {
error(-1, "May not be a PDF file (continuing anyway)");
return;
}
str->moveStart(i);
if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
error(-1, "May not be a PDF file (continuing anyway)");
return;
}
sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
// We don't do the version check. Don't add it back in.
}
GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
Object encrypt;
GBool encrypted;
GBool ret;
xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
if ((encrypted = encrypt.isDict())) {
if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
// authorization succeeded
xref->setEncryption(secHdlr->getPermissionFlags(),
secHdlr->getOwnerPasswordOk(),
secHdlr->getFileKey(),
secHdlr->getFileKeyLength(),
secHdlr->getEncVersion(),
secHdlr->getEncRevision(),
secHdlr->getEncAlgorithm());
ret = gTrue;
} else {
// authorization failed
ret = gFalse;
}
} else {
// couldn't find the matching security handler
ret = gFalse;
}
} else {
// document is not encrypted
ret = gTrue;
}
encrypt.free();
return ret;
}
void PDFDoc::displayPage(OutputDev *out, int page,
double hDPI, double vDPI, int rotate,
GBool useMediaBox, GBool crop, GBool printing,
GBool (*abortCheckCbk)(void *data),
void *abortCheckCbkData,
GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
void *annotDisplayDecideCbkData) {
if (globalParams->getPrintCommands()) {
printf("***** page %d *****\n", page);
}
if (getPage(page))
getPage(page)->display(out, hDPI, vDPI,
rotate, useMediaBox, crop, printing, catalog,
abortCheckCbk, abortCheckCbkData,
annotDisplayDecideCbk, annotDisplayDecideCbkData);
}
void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
double hDPI, double vDPI, int rotate,
GBool useMediaBox, GBool crop, GBool printing,
GBool (*abortCheckCbk)(void *data),
void *abortCheckCbkData,
GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
void *annotDisplayDecideCbkData) {
int page;
for (page = firstPage; page <= lastPage; ++page) {
displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
abortCheckCbk, abortCheckCbkData,
annotDisplayDecideCbk, annotDisplayDecideCbkData);
}
}
void PDFDoc::displayPageSlice(OutputDev *out, int page,
double hDPI, double vDPI, int rotate,
GBool useMediaBox, GBool crop, GBool printing,
int sliceX, int sliceY, int sliceW, int sliceH,
GBool (*abortCheckCbk)(void *data),
void *abortCheckCbkData,
GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
void *annotDisplayDecideCbkData) {
if (getPage(page))
getPage(page)->displaySlice(out, hDPI, vDPI,
rotate, useMediaBox, crop,
sliceX, sliceY, sliceW, sliceH,
printing, catalog,
abortCheckCbk, abortCheckCbkData,
annotDisplayDecideCbk, annotDisplayDecideCbkData);
}
Links *PDFDoc::getLinks(int page) {
Page *p = getPage(page);
if (!p) {
return new Links (NULL);
}
return p->getLinks(catalog);
}
void PDFDoc::processLinks(OutputDev *out, int page) {
if (getPage(page))
getPage(page)->processLinks(out, catalog);
}
Linearization *PDFDoc::getLinearization()
{
if (!linearization) {
linearization = new Linearization(str);
}
return linearization;
}
GBool PDFDoc::isLinearized() {
if ((str->getLength()) &&
(getLinearization()->getLength() == str->getLength()))
return gTrue;
else
return gFalse;
}
static GBool
get_id (GooString *encodedidstring, GooString *id) {
const char *encodedid = encodedidstring->getCString();
char pdfid[pdfIdLength + 1];
int n;
if (encodedidstring->getLength() != pdfIdLength / 2)
return gFalse;
n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
if (n != pdfIdLength)
return gFalse;
id->Set(pdfid, pdfIdLength);
return gTrue;
}
GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
Object obj;
xref->getTrailerDict()->dictLookup ("ID", &obj);
if (obj.isArray() && obj.arrayGetLength() == 2) {
Object obj2;
if (permanent_id) {
if (obj.arrayGet(0, &obj2)->isString()) {
if (!get_id (obj2.getString(), permanent_id)) {
obj2.free();
return gFalse;
}
} else {
error(-1, "Invalid permanent ID");
obj2.free();
return gFalse;
}
obj2.free();
}
if (update_id) {
if (obj.arrayGet(1, &obj2)->isString()) {
if (!get_id (obj2.getString(), update_id)) {
obj2.free();
return gFalse;
}
} else {
error(-1, "Invalid update ID");
obj2.free();
return gFalse;
}
obj2.free();
}
obj.free();
return gTrue;
}
obj.free();
return gFalse;
}
Hints *PDFDoc::getHints()
{
if (!hints && isLinearized()) {
hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
}
return hints;
}
int PDFDoc::savePageAs(GooString *name, int pageNo)
{
FILE *f;
OutStream *outStr;
XRef *yRef, *countRef;
int rootNum = getXRef()->getSize() + 1;
if (pageNo < 1 || pageNo > getNumPages()) {
error(-1, "Illegal pageNo: %d(%d)", pageNo, getNumPages() );
return errOpenFile;
}
PDFRectangle *cropBox = NULL;
if (getCatalog()->getPage(pageNo)->isCropped()) {
cropBox = getCatalog()->getPage(pageNo)->getCropBox();
}
replacePageDict(pageNo,
getCatalog()->getPage(pageNo)->getRotate(),
getCatalog()->getPage(pageNo)->getMediaBox(),
cropBox, NULL);
Ref *refPage = getCatalog()->getPageRef(pageNo);
Object page;
getXRef()->fetch(refPage->num, refPage->gen, &page);
if (!(f = fopen(name->getCString(), "wb"))) {
error(-1, "Couldn't open file '%s'", name->getCString());
return errOpenFile;
}
outStr = new FileOutStream(f,0);
yRef = new XRef();
countRef = new XRef();
yRef->add(0, 65535, 0, gFalse);
writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
// get and mark optional content groups
OCGs *ocgs = getCatalog()->getOptContentConfig();
if (ocgs != NULL) {
Object catDict, optContentProps;
getXRef()->getCatalog(&catDict);
catDict.dictLookup("OCProperties", &optContentProps);
Dict *pageDict = optContentProps.getDict();
markPageObjects(pageDict, yRef, countRef, 0);
catDict.free();
optContentProps.free();
}
Dict *pageDict = page.getDict();
markPageObjects(pageDict, yRef, countRef, 0);
Guint objectsCount = writePageObjects(outStr, yRef, 0);
yRef->add(rootNum,0,outStr->getPos(),gTrue);
outStr->printf("%d 0 obj\n", rootNum);
outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
if (ocgs != NULL) {
Object catDict, optContentProps;
getXRef()->getCatalog(&catDict);
catDict.dictLookup("OCProperties", &optContentProps);
outStr->printf(" /OCProperties <<");
Dict *pageDict = optContentProps.getDict();
for (int n = 0; n < pageDict->getLength(); n++) {
if (n > 0) outStr->printf(" ");
const char *key = pageDict->getKey(n);
Object value; pageDict->getValNF(n, &value);
outStr->printf("/%s ", key);
writeObject(&value, NULL, outStr, getXRef(), 0);
value.free();
}
outStr->printf(" >> ");
catDict.free();
optContentProps.free();
}
outStr->printf(">>\nendobj\n");
objectsCount++;
yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
outStr->printf("%d 0 obj\n", rootNum + 1);
outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 >>\n", rootNum + 2);
outStr->printf("endobj\n");
objectsCount++;
yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
outStr->printf("%d 0 obj\n", rootNum + 2);
outStr->printf("<< ");
for (int n = 0; n < pageDict->getLength(); n++) {
if (n > 0) outStr->printf(" ");
const char *key = pageDict->getKey(n);
Object value; pageDict->getValNF(n, &value);
if (strcmp(key, "Parent") == 0) {
outStr->printf("/Parent %d 0 R", rootNum + 1);
} else {
outStr->printf("/%s ", key);
writeObject(&value, NULL, outStr, getXRef(), 0);
}
value.free();
}
outStr->printf(" >>\nendobj\n");
objectsCount++;
page.free();
Guint uxrefOffset = outStr->getPos();
yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
Ref ref;
ref.num = rootNum;
ref.gen = 0;
writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), name->getCString(), outStr->getPos());
outStr->close();
fclose(f);
delete yRef;
delete countRef;
return errNone;
}
int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
FILE *f;
OutStream *outStr;
int res;
if (!(f = fopen(name->getCString(), "wb"))) {
error(-1, "Couldn't open file '%s'", name->getCString());
return errOpenFile;
}
outStr = new FileOutStream(f,0);
res = saveAs(outStr, mode);
delete outStr;
fclose(f);
return res;
}
int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
// find if we have updated objects
GBool updated = gFalse;
for(int i=0; i<xref->getNumObjects(); i++) {
if (xref->getEntry(i)->updated) {
updated = gTrue;
break;
}
}
// we don't support rewriting files with Encrypt at the moment
Object obj;
xref->getTrailerDict()->getDict()->lookupNF("Encrypt", &obj);
if (!obj.isNull())
{
obj.free();
if (!updated && mode == writeStandard) {
// simply copy the original file
saveWithoutChangesAs (outStr);
} else {
return errEncrypted;
}
}
else
{
obj.free();
if (mode == writeForceRewrite) {
saveCompleteRewrite(outStr);
} else if (mode == writeForceIncremental) {
saveIncrementalUpdate(outStr);
} else { // let poppler decide
if(updated) {
saveIncrementalUpdate(outStr);
} else {
// simply copy the original file
saveWithoutChangesAs (outStr);
}
}
}
return errNone;
}
int PDFDoc::saveWithoutChangesAs(GooString *name) {
FILE *f;
OutStream *outStr;
int res;
if (!(f = fopen(name->getCString(), "wb"))) {
error(-1, "Couldn't open file '%s'", name->getCString());
return errOpenFile;
}
outStr = new FileOutStream(f,0);
res = saveWithoutChangesAs(outStr);
delete outStr;
fclose(f);
return res;
}
int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
int c;
str->reset();
while ((c = str->getChar()) != EOF) {
outStr->put(c);
}
str->close();
return errNone;
}
void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
{
XRef *uxref;
int c;
//copy the original file
str->reset();
while ((c = str->getChar()) != EOF) {
outStr->put(c);
}
str->close();
uxref = new XRef();
uxref->add(0, 65535, 0, gFalse);
for(int i=0; i<xref->getNumObjects(); i++) {
if ((xref->getEntry(i)->type == xrefEntryFree) &&
(xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
continue;
if (xref->getEntry(i)->updated) { //we have an updated object
Object obj1;
Ref ref;
ref.num = i;
ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
xref->fetch(ref.num, ref.gen, &obj1);
Guint offset = writeObject(&obj1, &ref, outStr);
uxref->add(ref.num, ref.gen, offset, gTrue);
obj1.free();
}
}
if (uxref->getSize() == 0) { //we have nothing to update
delete uxref;
return;
}
Guint uxrefOffset = outStr->getPos();
uxref->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
writeTrailer(uxrefOffset, xref->getSize(), outStr, gTrue);
delete uxref;
}
void PDFDoc::saveCompleteRewrite (OutStream* outStr)
{
outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
XRef *uxref = new XRef();
uxref->add(0, 65535, 0, gFalse);
for(int i=0; i<xref->getNumObjects(); i++) {
Object obj1;
Ref ref;
XRefEntryType type = xref->getEntry(i)->type;
if (type == xrefEntryFree) {
ref.num = i;
ref.gen = xref->getEntry(i)->gen;
/* the XRef class adds a lot of irrelevant free entries, we only want the significant one
and we don't want the one with num=0 because it has already been added (gen = 65535)*/
if (ref.gen > 0 && ref.num > 0)
uxref->add(ref.num, ref.gen, 0, gFalse);
} else if (type == xrefEntryUncompressed){
ref.num = i;
ref.gen = xref->getEntry(i)->gen;
xref->fetch(ref.num, ref.gen, &obj1);
Guint offset = writeObject(&obj1, &ref, outStr);
uxref->add(ref.num, ref.gen, offset, gTrue);
obj1.free();
} else if (type == xrefEntryCompressed) {
ref.num = i;
ref.gen = 0; //compressed entries have gen == 0
xref->fetch(ref.num, ref.gen, &obj1);
Guint offset = writeObject(&obj1, &ref, outStr);
uxref->add(ref.num, ref.gen, offset, gTrue);
obj1.free();
}
}
Guint uxrefOffset = outStr->getPos();
uxref->writeToFile(outStr, gTrue /* write all entries */);
writeTrailer(uxrefOffset, uxref->getSize(), outStr, gFalse);
delete uxref;
}
void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset)
{
Object obj1;
outStr->printf("<<");
for (int i=0; i<dict->getLength(); i++) {
GooString keyName(dict->getKey(i));
GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
outStr->printf("/%s ", keyNameToPrint->getCString());
delete keyNameToPrint;
writeObject(dict->getValNF(i, &obj1), NULL, outStr, xRef, numOffset);
obj1.free();
}
outStr->printf(">> ");
}
void PDFDoc::writeStream (Stream* str, OutStream* outStr)
{
outStr->printf("stream\r\n");
str->reset();
for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
outStr->printf("%c", c);
}
outStr->printf("\r\nendstream\r\n");
}
void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
{
Object obj1;
str->getDict()->lookup("Length", &obj1);
if (!obj1.isInt()) {
error (-1, "PDFDoc::writeRawStream, no Length in stream dict");
return;
}
const int length = obj1.getInt();
obj1.free();
outStr->printf("stream\r\n");
str->unfilteredReset();
for (int i=0; i<length; i++) {
int c = str->getUnfilteredChar();
outStr->printf("%c", c);
}
str->reset();
outStr->printf("\r\nendstream\r\n");
}
void PDFDoc::writeString (GooString* s, OutStream* outStr)
{
if (s->hasUnicodeMarker()) {
//unicode string don't necessary end with \0
const char* c = s->getCString();
outStr->printf("(");
for(int i=0; i<s->getLength(); i++) {
char unescaped = *(c+i)&0x000000ff;
//escape if needed
if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
outStr->printf("%c", '\\');
outStr->printf("%c", unescaped);
}
outStr->printf(") ");
} else {
const char* c = s->getCString();
outStr->printf("(");
for(int i=0; i<s->getLength(); i++) {
char unescaped = *(c+i)&0x000000ff;
//escape if needed
if (unescaped == '\r')
outStr->printf("\\r");
else if (unescaped == '\n')
outStr->printf("\\n");
else {
if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
outStr->printf("%c", '\\');
}
outStr->printf("%c", unescaped);
}
}
outStr->printf(") ");
}
}
Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef *xRef, Guint numOffset)
{
Array *array;
Object obj1;
Guint offset = outStr->getPos();
int tmp;
if(ref)
outStr->printf("%i %i obj ", ref->num, ref->gen);
switch (obj->getType()) {
case objBool:
outStr->printf("%s ", obj->getBool()?"true":"false");
break;
case objInt:
outStr->printf("%i ", obj->getInt());
break;
case objReal:
{
GooString s;
s.appendf("{0:.10g}", obj->getReal());
outStr->printf("%s ", s.getCString());
break;
}
case objString:
writeString(obj->getString(), outStr);
break;
case objName:
{
GooString name(obj->getName());
GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
outStr->printf("/%s ", nameToPrint->getCString());
delete nameToPrint;
break;
}
case objNull:
outStr->printf( "null ");
break;
case objArray:
array = obj->getArray();
outStr->printf("[");
for (int i=0; i<array->getLength(); i++) {
writeObject(array->getNF(i, &obj1), NULL,outStr, xRef, numOffset);
obj1.free();
}
outStr->printf("] ");
break;
case objDict:
writeDictionnary (obj->getDict(),outStr, xRef, numOffset);
break;
case objStream:
{
//We can't modify stream with the current implementation (no write functions in Stream API)
// => the only type of streams which that have been modified are internal streams (=strWeird)
Stream *stream = obj->getStream();
if (stream->getKind() == strWeird) {
//we write the stream unencoded => TODO: write stream encoder
stream->reset();
//recalculate stream length
tmp = 0;
for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
tmp++;
}
obj1.initInt(tmp);
stream->getDict()->set("Length", &obj1);
//Remove Stream encoding
stream->getDict()->remove("Filter");
stream->getDict()->remove("DecodeParms");
writeDictionnary (stream->getDict(),outStr, xRef, numOffset);
writeStream (stream,outStr);
obj1.free();
} else {
//raw stream copy
FilterStream *fs = dynamic_cast<FilterStream*>(stream);
if (fs) {
BaseStream *bs = fs->getBaseStream();
if (bs) {
Guint streamEnd;
if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
Object val;
val.initInt(streamEnd - bs->getStart());
stream->getDict()->set("Length", &val);
}
}
}
writeDictionnary (stream->getDict(), outStr, xRef, numOffset);
writeRawStream (stream, outStr);
}
break;
}
case objRef:
outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
break;
case objCmd:
outStr->printf("%s\n", obj->getCmd());
break;
case objError:
outStr->printf("error\r\n");
break;
case objEOF:
outStr->printf("eof\r\n");
break;
case objNone:
outStr->printf("none\r\n");
break;
default:
error(-1,"Unhandled objType : %i, please report a bug with a testcase\r\n", obj->getType());
break;
}
if (ref)
outStr->printf("endobj\r\n");
return offset;
}
void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize,
OutStream* outStr, GBool incrUpdate,
Guint startxRef, Ref *root, XRef *xRef, const char *fileName,
Guint fileSize)
{
Dict *trailerDict = new Dict(xRef);
Object obj1;
obj1.initInt(uxrefSize);
trailerDict->set("Size", &obj1);
obj1.free();
//build a new ID, as recommended in the reference, uses:
// - current time
// - file name
// - file size
// - values of entry in information dictionnary
GooString message;
char buffer[256];
sprintf(buffer, "%i", (int)time(NULL));
message.append(buffer);
message.append(fileName);
sprintf(buffer, "%i", fileSize);
message.append(buffer);
//info dict -- only use text string
if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
for(int i=0; i<obj1.getDict()->getLength(); i++) {
Object obj2;
obj1.getDict()->getVal(i, &obj2);
if (obj2.isString()) {
message.append(obj2.getString());
}
obj2.free();
}
}
obj1.free();
//calculate md5 digest
Guchar digest[16];
Decrypt::md5((Guchar*)message.getCString(), message.getLength(), digest);
obj1.initString(new GooString((const char*)digest, 16));
//create ID array
Object obj2,obj3,obj5;
obj2.initArray(xRef);
if (incrUpdate) {
Object obj4;
//only update the second part of the array
xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
if (!obj4.isArray()) {
error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an array. Trying to continue");
} else {
//Get the first part of the ID
obj4.arrayGet(0,&obj3);
obj2.arrayAdd(&obj3);
obj2.arrayAdd(&obj1);
trailerDict->set("ID", &obj2);
}
obj4.free();
} else {
//new file => same values for the two identifiers
obj2.arrayAdd(&obj1);
obj1.initString(new GooString((const char*)digest, 16));
obj2.arrayAdd(&obj1);
trailerDict->set("ID", &obj2);
}
obj1.initRef(root->num, root->gen);
trailerDict->set("Root", &obj1);
if (incrUpdate) {
obj1.initInt(startxRef);
trailerDict->set("Prev", &obj1);
}
if (!xRef->getTrailerDict()->isNone()) {
xRef->getDocInfoNF(&obj5);
if (!obj5.isNull()) {
trailerDict->set("Info", &obj5);
}
}
outStr->printf( "trailer\r\n");
writeDictionnary(trailerDict, outStr, xRef, 0);
outStr->printf( "\r\nstartxref\r\n");
outStr->printf( "%i\r\n", uxrefOffset);
outStr->printf( "%%%%EOF\r\n");
delete trailerDict;
}
void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
{
char *fileNameA;
if (fileName)
fileNameA = fileName->getCString();
else
fileNameA = "streamwithoutfilename.pdf";
// file size
unsigned int fileSize = 0;
int c;
str->reset();
while ((c = str->getChar()) != EOF) {
fileSize++;
}
str->close();
Ref ref;
ref.num = getXRef()->getRootNum();
ref.gen = getXRef()->getRootGen();
writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize);
}
void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
{
outStr->printf("%%PDF-%d.%d\n", major, minor);
outStr->printf("%%\xE2\xE3\xCF\xD3\n");
}
void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset)
{
Object obj1;
for (int i=0; i<dict->getLength(); i++) {
markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset);
obj1.free();
}
}
void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset)
{
Array *array;
Object obj1;
switch (obj->getType()) {
case objArray:
array = obj->getArray();
for (int i=0; i<array->getLength(); i++) {
markObject(array->getNF(i, &obj1), xRef, countRef, numOffset);
obj1.free();
}
break;
case objDict:
markDictionnary (obj->getDict(), xRef, countRef, numOffset);
break;
case objStream:
{
Stream *stream = obj->getStream();
markDictionnary (stream->getDict(), xRef, countRef, numOffset);
}
break;
case objRef:
{
if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
return; // already marked as free => should be replaced
}
xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
}
}
if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
{
countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
} else {
XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
entry->gen++;
if (entry->gen > 9)
break;
}
Object obj1;
getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
markObject(&obj1, xRef, countRef, numOffset);
obj1.free();
}
break;
default:
break;
}
}
void PDFDoc::replacePageDict(int pageNo, int rotate,
PDFRectangle *mediaBox,
PDFRectangle *cropBox, Object *pageCTM)
{
Ref *refPage = getCatalog()->getPageRef(pageNo);
Object page;
getXRef()->fetch(refPage->num, refPage->gen, &page);
Dict *pageDict = page.getDict();
pageDict->remove("MediaBox");
pageDict->remove("CropBox");
pageDict->remove("ArtBox");
pageDict->remove("BleedBox");
pageDict->remove("TrimBox");
pageDict->remove("Rotate");
Object *mediaBoxObj = new Object();
mediaBoxObj->initArray(getXRef());
Object *murx = new Object();
murx->initReal(mediaBox->x1);
Object *mury = new Object();
mury->initReal(mediaBox->y1);
Object *mllx = new Object();
mllx->initReal(mediaBox->x2);
Object *mlly = new Object();
mlly->initReal(mediaBox->y2);
mediaBoxObj->arrayAdd(murx);
mediaBoxObj->arrayAdd(mury);
mediaBoxObj->arrayAdd(mllx);
mediaBoxObj->arrayAdd(mlly);
pageDict->add(copyString("MediaBox"), mediaBoxObj);
if (cropBox != NULL) {
Object *cropBoxObj = new Object();
cropBoxObj->initArray(getXRef());
Object *curx = new Object();
curx->initReal(cropBox->x1);
Object *cury = new Object();
cury->initReal(cropBox->y1);
Object *cllx = new Object();
cllx->initReal(cropBox->x2);
Object *clly = new Object();
clly->initReal(cropBox->y2);
cropBoxObj->arrayAdd(curx);
cropBoxObj->arrayAdd(cury);
cropBoxObj->arrayAdd(cllx);
cropBoxObj->arrayAdd(clly);
pageDict->add(copyString("CropBox"), cropBoxObj);
}
Object *rotateObj = new Object();
rotateObj->initInt(rotate);
pageDict->add(copyString("Rotate"), rotateObj);
if (pageCTM != NULL) {
Object *contents = new Object();
Ref cmRef = getXRef()->addIndirectObject(pageCTM);
Object *ref = new Object();
ref->initRef(cmRef.num, cmRef.gen);
pageDict->lookupNF("Contents", contents);
Object *newContents = new Object();
newContents->initArray(getXRef());
if (contents->getType() == objRef) {
newContents->arrayAdd(ref);
newContents->arrayAdd(contents);
} else {
newContents->arrayAdd(ref);
for (int i = 0; i < contents->arrayGetLength(); i++) {
Object *contentEle = new Object();
contents->arrayGetNF(i, contentEle);
newContents->arrayAdd(contentEle);
}
}
pageDict->remove("Contents");
pageDict->add(copyString("Contents"), newContents);
}
getXRef()->setModifiedObject(&page, *refPage);
page.free();
}
void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset)
{
for (int n = 0; n < pageDict->getLength(); n++) {
const char *key = pageDict->getKey(n);
Object value; pageDict->getValNF(n, &value);
if (strcmp(key, "Parent") != 0) {
markObject(&value, xRef, countRef, numOffset);
}
value.free();
}
}
Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset)
{
Guint objectsCount = 0; //count the number of objects in the XRef(s)
for (int n = numOffset; n < xRef->getNumObjects(); n++) {
if (xRef->getEntry(n)->type != xrefEntryFree) {
Object obj;
Ref ref;
ref.num = n;
ref.gen = xRef->getEntry(n)->gen;
objectsCount++;
getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
Guint offset = writeObject(&obj, &ref, outStr, xRef, numOffset);
xRef->add(ref.num, ref.gen, offset, gTrue);
obj.free();
}
}
return objectsCount;
}
#ifndef DISABLE_OUTLINE
Outline *PDFDoc::getOutline()
{
if (!outline) {
// read outline
outline = new Outline(catalog->getOutline(), xref);
}
return outline;
}
#endif
PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
{
PDFDoc *doc = new PDFDoc();
doc->errCode = errorCode;
doc->fileName = fileNameA;
return doc;
}
Guint PDFDoc::strToUnsigned(char *s) {
Guint x;
char *p;
int i;
x = 0;
for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
x = 10 * x + (*p - '0');
}
return x;
}
// Read the 'startxref' position.
Guint PDFDoc::getStartXRef()
{
if (startXRefPos == ~(Guint)0) {
if (isLinearized()) {
char buf[linearizationSearchSize+1];
int c, n, i;
str->setPos(0);
for (n = 0; n < linearizationSearchSize; ++n) {
if ((c = str->getChar()) == EOF) {
break;
}
buf[n] = c;
}
buf[n] = '\0';
// find end of first obj (linearization dictionary)
startXRefPos = 0;
for (i = 0; i < n; i++) {
if (!strncmp("endobj", &buf[i], 6)) {
i += 6;
//skip whitespace
while (buf[i] && Lexer::isSpace(buf[i])) ++i;
startXRefPos = i;
break;
}
}
} else {
char buf[xrefSearchSize+1];
char *p;
int c, n, i;
// read last xrefSearchSize bytes
str->setPos(xrefSearchSize, -1);
for (n = 0; n < xrefSearchSize; ++n) {
if ((c = str->getChar()) == EOF) {
break;
}
buf[n] = c;
}
buf[n] = '\0';
// find startxref
for (i = n - 9; i >= 0; --i) {
if (!strncmp(&buf[i], "startxref", 9)) {
break;
}
}
if (i < 0) {
startXRefPos = 0;
} else {
for (p = &buf[i+9]; isspace(*p); ++p) ;
startXRefPos = strToUnsigned(p);
}
}
}
return startXRefPos;
}
Guint PDFDoc::getMainXRefEntriesOffset()
{
Guint mainXRefEntriesOffset = 0;
if (isLinearized()) {
mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
}
return mainXRefEntriesOffset;
}
int PDFDoc::getNumPages()
{
if (isLinearized()) {
int n;
if ((n = getLinearization()->getNumPages())) {
return n;
}
}
return catalog->getNumPages();
}
Page *PDFDoc::parsePage(int page)
{
Page *p = NULL;
Object obj;
Ref pageRef;
Dict *pageDict;
pageRef.num = getHints()->getPageObjectNum(page);
if (!pageRef.num) {
error(-1, "Failed to get object num from hint tables for page %d", page);
return NULL;
}
// check for bogus ref - this can happen in corrupted PDF files
if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
error(-1, "Invalid object num (%d) for page %d", pageRef.num, page);
return NULL;
}
pageRef.gen = xref->getEntry(pageRef.num)->gen;
xref->fetch(pageRef.num, pageRef.gen, &obj);
if (!obj.isDict("Page")) {
obj.free();
error(-1, "Object (%d %d) is not a pageDict", pageRef.num, pageRef.gen);
return NULL;
}
pageDict = obj.getDict();
p = new Page(xref, page, pageDict, pageRef,
new PageAttrs(NULL, pageDict), catalog->getForm());
obj.free();
return p;
}
Page *PDFDoc::getPage(int page)
{
if ((page < 1) || page > getNumPages()) return NULL;
if (isLinearized()) {
if (!pageCache) {
pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
for (int i = 0; i < getNumPages(); i++) {
pageCache[i] = NULL;
}
}
if (!pageCache[page-1]) {
pageCache[page-1] = parsePage(page);
}
if (pageCache[page-1]) {
return pageCache[page-1];
} else {
error(-1, "Failed parsing page %d using hint tables", page);
}
}
return catalog->getPage(page);
}