Add pdfextract and pdfmerge
See "Creating PDF with poppler ?" thread for more info
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index beeedb8..90030cd 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -102,3 +102,18 @@
install(TARGETS pdftohtml DESTINATION bin)
install(FILES pdftohtml.1 DESTINATION share/man/man1)
+# pdfextract
+set(pdfextract_SOURCES ${common_srcs}
+ pdfextract.cc
+)
+add_executable(pdfextract ${pdfextract_SOURCES})
+target_link_libraries(pdfextract ${common_libs})
+install(TARGETS pdfextract DESTINATION bin)
+
+# pdfmerge
+set(pdfmerge_SOURCES ${common_srcs}
+ pdfmerge.cc
+)
+add_executable(pdfmerge ${pdfmerge_SOURCES})
+target_link_libraries(pdfmerge ${common_libs})
+install(TARGETS pdfmerge DESTINATION bin)
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 4faddad..30328f2 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -50,6 +50,8 @@
pdftops \
pdftotext \
pdftohtml \
+ pdfextract \
+ pdfmerge \
$(pdftoppm_binary) \
$(pdftocairo_binary)
@@ -102,6 +104,14 @@
HtmlUtils.h \
$(common)
+pdfextract_SOURCES = \
+ pdfextract.cc \
+ $(common)
+
+pdfmerge_SOURCES = \
+ pdfmerge.cc \
+ $(common)
+
# Yay, automake! It should be able to figure out that it has to dist
# pdftoppm.1, but nooo. So we just add it here.
diff --git a/utils/pdfextract.cc b/utils/pdfextract.cc
new file mode 100644
index 0000000..c8c4749
--- /dev/null
+++ b/utils/pdfextract.cc
@@ -0,0 +1,111 @@
+//========================================================================
+//
+// pdfextract.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de>
+//
+//========================================================================
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "PDFDoc.h"
+#include "ErrorCodes.h"
+
+static int firstPage = 0;
+static int lastPage = 0;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to extract"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to extract"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+bool extractPages (const char *srcFileName, const char *destFileName) {
+ char pathName[1024];
+ GooString *gfileName = new GooString (srcFileName);
+ PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
+
+ if (!doc->isOk()) {
+ error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName);
+ return false;
+ }
+ if (doc->isEncrypted()) {
+ error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName);
+ return false;
+ }
+
+ if (firstPage == 0 && lastPage == 0) {
+ firstPage = 1;
+ lastPage = doc->getNumPages();
+ }
+ if (lastPage == 0)
+ lastPage = doc->getNumPages();
+ if (firstPage == 0)
+ firstPage = 1;
+ for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
+ sprintf (pathName, destFileName, pageNo);
+ GooString *gpageName = new GooString (pathName);
+ int errCode = doc->savePageAs(gpageName, pageNo);
+ if ( errCode != errNone) {
+ delete gpageName;
+ delete gfileName;
+ return false;
+ }
+ delete gpageName;
+ }
+ delete gfileName;
+ return true;
+}
+
+int
+main (int argc, char *argv[])
+{
+ Object info;
+ GBool ok;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs (argDesc, &argc, argv);
+ if (!ok || argc != 3 || printVersion || printHelp)
+ {
+ fprintf (stderr, "pdfextract version %s\n", PACKAGE_VERSION);
+ fprintf (stderr, "%s\n", popplerCopyright);
+ fprintf (stderr, "%s\n", xpdfCopyright);
+ if (!printVersion)
+ {
+ printUsage ("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+ extractPages (argv[1], argv[2]);
+
+err0:
+
+ return exitCode;
+}
diff --git a/utils/pdfmerge.cc b/utils/pdfmerge.cc
new file mode 100644
index 0000000..28f7265
--- /dev/null
+++ b/utils/pdfmerge.cc
@@ -0,0 +1,176 @@
+//========================================================================
+//
+// pdfmerge.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de>
+//
+//========================================================================
+#include <PDFDoc.h>
+#include "parseargs.h"
+#include "config.h"
+#include <poppler-config.h>
+#include <vector>
+
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+///////////////////////////////////////////////////////////////////////////
+int main (int argc, char *argv[])
+///////////////////////////////////////////////////////////////////////////
+// Merge PDF files given by arguments 1 to argc-2 and write the result
+// to the file specified by argument argc-1.
+///////////////////////////////////////////////////////////////////////////
+{
+ int objectsCount = 0;
+ Guint numOffset = 0;
+ std::vector<Object> pages;
+ std::vector<Guint> offsets;
+ XRef *yRef, *countRef;
+ FILE *f;
+ OutStream *outStr;
+ int i;
+ int j, rootNum;
+ std::vector<PDFDoc *>docs;
+ int majorVersion = 0;
+ int minorVersion = 0;
+ char *fileName = argv[argc - 1];
+ int exitCode;
+
+ exitCode = 99;
+ if (argc <= 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdfmerge", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ return exitCode;
+ }
+ exitCode = 0;
+
+ for (i = 1; i < argc - 1; i++) {
+ GooString *gfileName = new GooString(argv[i]);
+ PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
+ if (doc->isOk() && !doc->isEncrypted()) {
+ docs.push_back(doc);
+ if (doc->getPDFMajorVersion() > majorVersion) {
+ majorVersion = doc->getPDFMajorVersion();
+ minorVersion = doc->getPDFMinorVersion();
+ } else if (doc->getPDFMajorVersion() == majorVersion) {
+ if (doc->getPDFMinorVersion() > minorVersion) {
+ minorVersion = doc->getPDFMinorVersion();
+ }
+ }
+ } else if (doc->isOk()) {
+ error(-1, "Could not merge encrypted files ('%s')", argv[i]);
+ return -1;
+ } else {
+ error(-1, "Could not merge damaged documents ('%s')", argv[i]);
+ return -1;
+ }
+ }
+
+ if (!(f = fopen(fileName, "wb"))) {
+ error(-1, "Could not open file '%s'", fileName);
+ return -1;
+ }
+ outStr = new FileOutStream(f, 0);
+
+ yRef = new XRef();
+ countRef = new XRef();
+ yRef->add(0, 65535, 0, gFalse);
+ PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
+
+ for (i = 0; i < (int) docs.size(); i++) {
+ for (j = 1; j <= docs[i]->getNumPages(); j++) {
+ PDFRectangle *cropBox = NULL;
+ if (docs[i]->getCatalog()->getPage(j)->isCropped())
+ cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
+ docs[i]->replacePageDict(j,
+ docs[i]->getCatalog()->getPage(j)->getRotate(),
+ docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
+ Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
+ Object page;
+ docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
+ pages.push_back(page);
+ offsets.push_back(numOffset);
+ Dict *pageDict = page.getDict();
+ docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
+ }
+ objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
+ numOffset = yRef->getNumObjects() + 1;
+ }
+
+ rootNum = yRef->getNumObjects() + 1;
+ yRef->add(rootNum, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum);
+ outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+ outStr->printf(">>\nendobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 1);
+ outStr->printf("<< /Type /Pages /Kids [");
+ for (j = 0; j < (int) pages.size(); j++)
+ outStr->printf(" %d 0 R", rootNum + j + 2);
+ outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
+ objectsCount++;
+
+ for (i = 0; i < (int) pages.size(); i++) {
+ yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + i + 2);
+ outStr->printf("<< ");
+ Dict *pageDict = pages[i].getDict();
+ for (j = 0; j < pageDict->getLength(); j++) {
+ if (j > 0)
+ outStr->printf(" ");
+ const char *key = pageDict->getKey(j);
+ Object value;
+ pageDict->getValNF(j, &value);
+ if (strcmp(key, "Parent") == 0) {
+ outStr->printf("/Parent %d 0 R", rootNum + 1);
+ } else {
+ outStr->printf("/%s ", key);
+ PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
+ }
+ value.free();
+ }
+ outStr->printf(" >>\nendobj\n");
+ objectsCount++;
+ }
+ Guint uxrefOffset = outStr->getPos();
+ yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
+
+ Ref ref;
+ ref.num = rootNum;
+ ref.gen = 0;
+ PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
+ &ref, yRef, fileName, outStr->getPos());
+
+ outStr->close();
+ fclose(f);
+ delete yRef;
+ delete countRef;
+ for (j = 0; j < (int) pages.size (); j++) pages[j].free();
+ for (i = 0; i < (int) docs.size (); i++) delete docs[i];
+ return exitCode;
+}