Rework writing of PDF files
Makes it more compatible with other PDF readers
See "Creating PDF with poppler ?" thread in the mailing list for more info
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index aa52140..01d2759 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -26,6 +26,7 @@
// Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
// Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -573,6 +574,121 @@
return hints;
}
+int PDFDoc::savePageAs(GooString *name, int pageNo)
+{
+ FILE *f;
+ OutStream *outStr;
+ XRef *yRef, *countRef;
+ int rootNum = getXRef()->getSize() + 1;
+
+ if (pageNo < 1 || pageNo > getNumPages()) {
+ error(-1, "Illegal pageNo: %d(%d)", pageNo, getNumPages() );
+ return errOpenFile;
+ }
+ PDFRectangle *cropBox = NULL;
+ if (getCatalog()->getPage(pageNo)->isCropped()) {
+ cropBox = getCatalog()->getPage(pageNo)->getCropBox();
+ }
+ replacePageDict(pageNo,
+ getCatalog()->getPage(pageNo)->getRotate(),
+ getCatalog()->getPage(pageNo)->getMediaBox(),
+ cropBox, NULL);
+ Ref *refPage = getCatalog()->getPageRef(pageNo);
+ Object page;
+ getXRef()->fetch(refPage->num, refPage->gen, &page);
+
+ if (!(f = fopen(name->getCString(), "wb"))) {
+ error(-1, "Couldn't open file '%s'", name->getCString());
+ return errOpenFile;
+ }
+ outStr = new FileOutStream(f,0);
+
+ yRef = new XRef();
+ countRef = new XRef();
+ yRef->add(0, 65535, 0, gFalse);
+ writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
+
+ // get and mark optional content groups
+ OCGs *ocgs = getCatalog()->getOptContentConfig();
+ if (ocgs != NULL) {
+ Object catDict, optContentProps;
+ getXRef()->getCatalog(&catDict);
+ catDict.dictLookup("OCProperties", &optContentProps);
+ Dict *pageDict = optContentProps.getDict();
+ markPageObjects(pageDict, yRef, countRef, 0);
+ catDict.free();
+ optContentProps.free();
+ }
+
+ Dict *pageDict = page.getDict();
+ markPageObjects(pageDict, yRef, countRef, 0);
+ Guint objectsCount = writePageObjects(outStr, yRef, 0);
+
+ yRef->add(rootNum,0,outStr->getPos(),gTrue);
+ outStr->printf("%d 0 obj\n", rootNum);
+ outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+ if (ocgs != NULL) {
+ Object catDict, optContentProps;
+ getXRef()->getCatalog(&catDict);
+ catDict.dictLookup("OCProperties", &optContentProps);
+ outStr->printf(" /OCProperties <<");
+ Dict *pageDict = optContentProps.getDict();
+ for (int n = 0; n < pageDict->getLength(); n++) {
+ if (n > 0) outStr->printf(" ");
+ const char *key = pageDict->getKey(n);
+ Object value; pageDict->getValNF(n, &value);
+ outStr->printf("/%s ", key);
+ writeObject(&value, NULL, outStr, getXRef(), 0);
+ value.free();
+ }
+ outStr->printf(" >> ");
+ catDict.free();
+ optContentProps.free();
+ }
+ outStr->printf(">>\nendobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 1);
+ outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 >>\n", rootNum + 2);
+ outStr->printf("endobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 2);
+ outStr->printf("<< ");
+ for (int n = 0; n < pageDict->getLength(); n++) {
+ if (n > 0) outStr->printf(" ");
+ const char *key = pageDict->getKey(n);
+ Object value; pageDict->getValNF(n, &value);
+ if (strcmp(key, "Parent") == 0) {
+ outStr->printf("/Parent %d 0 R", rootNum + 1);
+ } else {
+ outStr->printf("/%s ", key);
+ writeObject(&value, NULL, outStr, getXRef(), 0);
+ }
+ value.free();
+ }
+ outStr->printf(" >>\nendobj\n");
+ objectsCount++;
+ page.free();
+
+ Guint uxrefOffset = outStr->getPos();
+ yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
+
+ Ref ref;
+ ref.num = rootNum;
+ ref.gen = 0;
+ writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), name->getCString(), outStr->getPos());
+
+ outStr->close();
+ fclose(f);
+ delete yRef;
+ delete countRef;
+
+ return errNone;
+}
+
int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
FILE *f;
OutStream *outStr;
@@ -740,7 +856,7 @@
}
-void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
+void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset)
{
Object obj1;
outStr->printf("<<");
@@ -749,7 +865,7 @@
GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
outStr->printf("/%s ", keyNameToPrint->getCString());
delete keyNameToPrint;
- writeObject(dict->getValNF(i, &obj1), NULL, outStr);
+ writeObject(dict->getValNF(i, &obj1), NULL, outStr, xRef, numOffset);
obj1.free();
}
outStr->printf(">> ");
@@ -805,18 +921,24 @@
const char* c = s->getCString();
outStr->printf("(");
for(int i=0; i<s->getLength(); i++) {
- char unescaped = (*c)&0x000000ff;
+ char unescaped = *(c+i)&0x000000ff;
//escape if needed
- if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
- outStr->printf("%c", '\\');
- outStr->printf("%c", unescaped);
- c++;
+ if (unescaped == '\r')
+ outStr->printf("\\r");
+ else if (unescaped == '\n')
+ outStr->printf("\\n");
+ else {
+ if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
+ outStr->printf("%c", '\\');
+ }
+ outStr->printf("%c", unescaped);
+ }
}
outStr->printf(") ");
}
}
-Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
+Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef *xRef, Guint numOffset)
{
Array *array;
Object obj1;
@@ -858,13 +980,13 @@
array = obj->getArray();
outStr->printf("[");
for (int i=0; i<array->getLength(); i++) {
- writeObject(array->getNF(i, &obj1), NULL,outStr);
+ writeObject(array->getNF(i, &obj1), NULL,outStr, xRef, numOffset);
obj1.free();
}
outStr->printf("] ");
break;
case objDict:
- writeDictionnary (obj->getDict(),outStr);
+ writeDictionnary (obj->getDict(),outStr, xRef, numOffset);
break;
case objStream:
{
@@ -886,7 +1008,7 @@
stream->getDict()->remove("Filter");
stream->getDict()->remove("DecodeParms");
- writeDictionnary (stream->getDict(),outStr);
+ writeDictionnary (stream->getDict(),outStr, xRef, numOffset);
writeStream (stream,outStr);
obj1.free();
} else {
@@ -896,23 +1018,23 @@
BaseStream *bs = fs->getBaseStream();
if (bs) {
Guint streamEnd;
- if (xref->getStreamEnd(bs->getStart(), &streamEnd)) {
+ if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
Object val;
val.initInt(streamEnd - bs->getStart());
stream->getDict()->set("Length", &val);
}
}
}
- writeDictionnary (stream->getDict(), outStr);
+ writeDictionnary (stream->getDict(), outStr, xRef, numOffset);
writeRawStream (stream, outStr);
}
break;
}
case objRef:
- outStr->printf("%i %i R ", obj->getRef().num, obj->getRef().gen);
+ outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
break;
case objCmd:
- outStr->printf("cmd\r\n");
+ outStr->printf("%s\n", obj->getCmd());
break;
case objError:
outStr->printf("error\r\n");
@@ -932,9 +1054,12 @@
return offset;
}
-void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize,
+ OutStream* outStr, GBool incrUpdate,
+ Guint startxRef, Ref *root, XRef *xRef, const char *fileName,
+ Guint fileSize)
{
- Dict *trailerDict = new Dict(xref);
+ Dict *trailerDict = new Dict(xRef);
Object obj1;
obj1.initInt(uxrefSize);
trailerDict->set("Size", &obj1);
@@ -950,23 +1075,13 @@
char buffer[256];
sprintf(buffer, "%i", (int)time(NULL));
message.append(buffer);
- if (fileName)
- message.append(fileName);
- else
- message.append("streamwithoutfilename.pdf");
- // file size
- unsigned int fileSize = 0;
- int c;
- str->reset();
- while ((c = str->getChar()) != EOF) {
- fileSize++;
- }
- str->close();
+ message.append(fileName);
+
sprintf(buffer, "%i", fileSize);
message.append(buffer);
//info dict -- only use text string
- if (xref->getDocInfo(&obj1)->isDict()) {
+ if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
for(int i=0; i<obj1.getDict()->getLength(); i++) {
Object obj2;
obj1.getDict()->getVal(i, &obj2);
@@ -985,12 +1100,12 @@
//create ID array
Object obj2,obj3,obj5;
- obj2.initArray(xref);
+ obj2.initArray(xRef);
if (incrUpdate) {
Object obj4;
//only update the second part of the array
- xref->getTrailerDict()->getDict()->lookup("ID", &obj4);
+ xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
if (!obj4.isArray()) {
error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an array. Trying to continue");
} else {
@@ -1010,22 +1125,23 @@
trailerDict->set("ID", &obj2);
}
-
- obj1.initRef(xref->getRootNum(), xref->getRootGen());
+ obj1.initRef(root->num, root->gen);
trailerDict->set("Root", &obj1);
if (incrUpdate) {
- obj1.initInt(getStartXRef());
+ obj1.initInt(startxRef);
trailerDict->set("Prev", &obj1);
}
- xref->getDocInfoNF(&obj5);
- if (!obj5.isNull()) {
- trailerDict->set("Info", &obj5);
+ if (!xRef->getTrailerDict()->isNone()) {
+ xRef->getDocInfoNF(&obj5);
+ if (!obj5.isNull()) {
+ trailerDict->set("Info", &obj5);
+ }
}
outStr->printf( "trailer\r\n");
- writeDictionnary(trailerDict, outStr);
+ writeDictionnary(trailerDict, outStr, xRef, 0);
outStr->printf( "\r\nstartxref\r\n");
outStr->printf( "%i\r\n", uxrefOffset);
outStr->printf( "%%%%EOF\r\n");
@@ -1033,6 +1149,201 @@
delete trailerDict;
}
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
+{
+ char *fileNameA;
+ if (fileName)
+ fileNameA = fileName->getCString();
+ else
+ fileNameA = "streamwithoutfilename.pdf";
+ // file size
+ unsigned int fileSize = 0;
+ int c;
+ str->reset();
+ while ((c = str->getChar()) != EOF) {
+ fileSize++;
+ }
+ str->close();
+ Ref ref;
+ ref.num = getXRef()->getRootNum();
+ ref.gen = getXRef()->getRootGen();
+ writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize);
+}
+
+void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
+{
+ outStr->printf("%%PDF-%d.%d\n", major, minor);
+ outStr->printf("%%\xE2\xE3\xCF\xD3\n");
+}
+
+void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset)
+{
+ Object obj1;
+ for (int i=0; i<dict->getLength(); i++) {
+ markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset);
+ obj1.free();
+ }
+}
+
+void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset)
+{
+ Array *array;
+ Object obj1;
+
+ switch (obj->getType()) {
+ case objArray:
+ array = obj->getArray();
+ for (int i=0; i<array->getLength(); i++) {
+ markObject(array->getNF(i, &obj1), xRef, countRef, numOffset);
+ obj1.free();
+ }
+ break;
+ case objDict:
+ markDictionnary (obj->getDict(), xRef, countRef, numOffset);
+ break;
+ case objStream:
+ {
+ Stream *stream = obj->getStream();
+ markDictionnary (stream->getDict(), xRef, countRef, numOffset);
+ }
+ break;
+ case objRef:
+ {
+ if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
+ if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
+ return; // already marked as free => should be replaced
+ }
+ xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
+ if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
+ xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
+ }
+ }
+ if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
+ countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
+ {
+ countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
+ } else {
+ XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
+ entry->gen++;
+ }
+ Object obj1;
+ getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
+ markObject(&obj1, xRef, countRef, numOffset);
+ obj1.free();
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void PDFDoc::replacePageDict(int pageNo, int rotate,
+ PDFRectangle *mediaBox,
+ PDFRectangle *cropBox, Object *pageCTM)
+{
+ Ref *refPage = getCatalog()->getPageRef(pageNo);
+ Object page;
+ getXRef()->fetch(refPage->num, refPage->gen, &page);
+ Dict *pageDict = page.getDict();
+ pageDict->remove("MediaBox");
+ pageDict->remove("CropBox");
+ pageDict->remove("ArtBox");
+ pageDict->remove("BleedBox");
+ pageDict->remove("TrimBox");
+ pageDict->remove("Rotate");
+ Object *mediaBoxObj = new Object();
+ mediaBoxObj->initArray(getXRef());
+ Object *murx = new Object();
+ murx->initReal(mediaBox->x1);
+ Object *mury = new Object();
+ mury->initReal(mediaBox->y1);
+ Object *mllx = new Object();
+ mllx->initReal(mediaBox->x2);
+ Object *mlly = new Object();
+ mlly->initReal(mediaBox->y2);
+ mediaBoxObj->arrayAdd(murx);
+ mediaBoxObj->arrayAdd(mury);
+ mediaBoxObj->arrayAdd(mllx);
+ mediaBoxObj->arrayAdd(mlly);
+ pageDict->add(copyString("MediaBox"), mediaBoxObj);
+ if (cropBox != NULL) {
+ Object *cropBoxObj = new Object();
+ cropBoxObj->initArray(getXRef());
+ Object *curx = new Object();
+ curx->initReal(cropBox->x1);
+ Object *cury = new Object();
+ cury->initReal(cropBox->y1);
+ Object *cllx = new Object();
+ cllx->initReal(cropBox->x2);
+ Object *clly = new Object();
+ clly->initReal(cropBox->y2);
+ cropBoxObj->arrayAdd(curx);
+ cropBoxObj->arrayAdd(cury);
+ cropBoxObj->arrayAdd(cllx);
+ cropBoxObj->arrayAdd(clly);
+ pageDict->add(copyString("CropBox"), cropBoxObj);
+ }
+ Object *rotateObj = new Object();
+ rotateObj->initInt(rotate);
+ pageDict->add(copyString("Rotate"), rotateObj);
+ if (pageCTM != NULL) {
+ Object *contents = new Object();
+ Ref cmRef = getXRef()->addIndirectObject(pageCTM);
+ Object *ref = new Object();
+ ref->initRef(cmRef.num, cmRef.gen);
+ pageDict->lookupNF("Contents", contents);
+ Object *newContents = new Object();
+ newContents->initArray(getXRef());
+ if (contents->getType() == objRef) {
+ newContents->arrayAdd(ref);
+ newContents->arrayAdd(contents);
+ } else {
+ newContents->arrayAdd(ref);
+ for (int i = 0; i < contents->arrayGetLength(); i++) {
+ Object *contentEle = new Object();
+ contents->arrayGetNF(i, contentEle);
+ newContents->arrayAdd(contentEle);
+ }
+ }
+ pageDict->remove("Contents");
+ pageDict->add(copyString("Contents"), newContents);
+ }
+ getXRef()->setModifiedObject(&page, *refPage);
+ page.free();
+}
+
+void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset)
+{
+ for (int n = 0; n < pageDict->getLength(); n++) {
+ const char *key = pageDict->getKey(n);
+ Object value; pageDict->getValNF(n, &value);
+ if (strcmp(key, "Parent") != 0) {
+ markObject(&value, xRef, countRef, numOffset);
+ }
+ value.free();
+ }
+}
+
+Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset)
+{
+ Guint objectsCount = 0; //count the number of objects in the XRef(s)
+
+ for (int n = numOffset; n < xRef->getNumObjects(); n++) {
+ if (xRef->getEntry(n)->type != xrefEntryFree) {
+ Object obj;
+ Ref ref;
+ ref.num = n;
+ ref.gen = xRef->getEntry(n)->gen;
+ objectsCount++;
+ getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
+ Guint offset = writeObject(&obj, &ref, outStr, xRef, numOffset);
+ xRef->add(ref.num, ref.gen, offset, gTrue);
+ obj.free();
+ }
+ }
+ return objectsCount;
+}
+
#ifndef DISABLE_OUTLINE
Outline *PDFDoc::getOutline()
{
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index a7113c8..92cee78 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -22,6 +22,7 @@
// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -219,6 +220,8 @@
//Return the PDF ID in the trailer dictionary (if any).
GBool getID(GooString *permanent_id, GooString *update_id);
+ // Save one page with another name.
+ int savePageAs(GooString *name, int pageNo);
// Save this file with another name.
int saveAs(GooString *name, PDFWriteMode mode=writeStandard);
// Save this file in the given output stream.
@@ -231,14 +234,31 @@
// Return a pointer to the GUI (XPDFCore or WinPDFCore object).
void *getGUIData() { return guiData; }
+ // rewrite pageDict with MediaBox, CropBox and new page CTM
+ void replacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, PDFRectangle *cropBox, Object *pageCTM);
+ void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset);
+ // write all objects used by pageDict to outStr
+ Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset);
+ static Guint writeObject (Object *obj, Ref *ref, OutStream* outStr, XRef *xref, Guint numOffset);
+ static void writeHeader(OutStream *outStr, int major, int minor);
+ static void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate,
+ Guint startxRef, Ref *root, XRef *xRef, const char *fileName, Guint fileSize);
+
private:
+ // insert referenced objects in XRef
+ void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint numOffset);
+ void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset);
+ static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset);
+
// Add object to current file stream and return the offset of the beginning of the object
- Guint writeObject (Object *obj, Ref *ref, OutStream* outStr);
- void writeDictionnary (Dict* dict, OutStream* outStr);
- void writeStream (Stream* str, OutStream* outStr);
- void writeRawStream (Stream* str, OutStream* outStr);
+ Guint writeObject (Object *obj, Ref *ref, OutStream* outStr)
+ { return writeObject(obj, ref, outStr, getXRef(), 0); }
+ void writeDictionnary (Dict* dict, OutStream* outStr)
+ { writeDictionnary(dict, outStr, getXRef(), 0); }
+ static void writeStream (Stream* str, OutStream* outStr);
+ static void writeRawStream (Stream* str, OutStream* outStr);
void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate);
- void writeString (GooString* s, OutStream* outStr);
+ static void writeString (GooString* s, OutStream* outStr);
void saveIncrementalUpdate (OutStream* outStr);
void saveCompleteRewrite (OutStream* outStr);