blob: 6739da4a0ee70f578c0758516b1c75a12c350921 [file] [log] [blame]
//========================================================================
//
// pdf-fullrewrite.cc
//
// Copyright 2007 Julien Rebetez
// Copyright 2012 Fabio D'Urso
// Copyright 2022 Albert Astals Cid <aacid@kde.org>
//
//========================================================================
#include "GlobalParams.h"
#include "Error.h"
#include "Object.h"
#include "PDFDoc.h"
#include "XRef.h"
#include "goo/GooString.h"
#include "utils/parseargs.h"
static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc);
static bool compareObjects(const Object *objA, const Object *objB);
static char ownerPassword[33] = "\001";
static char userPassword[33] = "\001";
static bool forceIncremental = false;
static bool checkOutput = false;
static bool printHelp = false;
static const ArgDesc argDesc[] = { { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
{ "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
{ "-i", argFlag, &forceIncremental, 0, "incremental update mode" },
{ "-check", argFlag, &checkOutput, 0, "verify the generated document" },
{ "-h", argFlag, &printHelp, 0, "print usage information" },
{ "-help", argFlag, &printHelp, 0, "print usage information" },
{ "--help", argFlag, &printHelp, 0, "print usage information" },
{ "-?", argFlag, &printHelp, 0, "print usage information" },
{} };
int main(int argc, char *argv[])
{
PDFDoc *doc = nullptr;
PDFDoc *docOut = nullptr;
std::optional<GooString> ownerPW;
std::optional<GooString> userPW;
int res = 0;
// parse args
bool ok = parseArgs(argDesc, &argc, argv);
if (!ok || (argc < 3) || printHelp) {
printUsage(argv[0], "INPUT-FILE OUTPUT-FILE", argDesc);
if (!printHelp) {
res = 1;
}
goto done;
}
if (ownerPassword[0] != '\001') {
ownerPW = GooString(ownerPassword);
}
if (userPassword[0] != '\001') {
userPW = GooString(userPassword);
}
// load input document
globalParams = std::make_unique<GlobalParams>();
doc = new PDFDoc(std::make_unique<GooString>(argv[1]), ownerPW, userPW);
if (!doc->isOk()) {
fprintf(stderr, "Error loading input document\n");
res = 1;
goto done;
}
// save it back (in rewrite or incremental update mode)
if (doc->saveAs(*doc->getFileName(), forceIncremental ? writeForceIncremental : writeForceRewrite) != 0) {
fprintf(stderr, "Error saving document\n");
res = 1;
goto done;
}
if (checkOutput) {
// open the generated document to verify it
docOut = new PDFDoc(std::make_unique<GooString>(argv[2]), ownerPW, userPW);
if (!docOut->isOk()) {
fprintf(stderr, "Error loading generated document\n");
res = 1;
} else if (!compareDocuments(doc, docOut)) {
fprintf(stderr, "Verification failed\n");
res = 1;
}
}
done:
delete docOut;
delete doc;
return res;
}
static bool compareDictionaries(Dict *dictA, Dict *dictB)
{
const int length = dictA->getLength();
if (dictB->getLength() != length) {
return false;
}
/* Check that every key in dictA is contained in dictB.
* Since keys are unique and we've already checked that dictA and dictB
* contain the same number of entries, we don't need to check that every key
* in dictB is also contained in dictA */
for (int i = 0; i < length; ++i) {
const char *key = dictA->getKey(i);
const Object &valA = dictA->getValNF(i);
const Object &valB = dictB->lookupNF(key);
if (!compareObjects(&valA, &valB)) {
return false;
}
}
return true;
}
static bool compareObjects(const Object *objA, const Object *objB)
{
switch (objA->getType()) {
case objBool: {
if (objB->getType() != objBool) {
return false;
} else {
return (objA->getBool() == objB->getBool());
}
}
case objInt:
case objInt64:
case objReal: {
if (!objB->isNum()) {
return false;
} else {
// Fuzzy comparison
const double diff = objA->getNum() - objB->getNum();
return (-0.01 < diff) && (diff < 0.01);
}
}
case objString: {
if (objB->getType() != objString) {
return false;
} else {
const GooString *strA = objA->getString();
const GooString *strB = objB->getString();
return (strA->cmp(strB) == 0);
}
}
case objName: {
if (objB->getType() != objName) {
return false;
} else {
GooString nameA(objA->getName());
GooString nameB(objB->getName());
return (nameA.cmp(&nameB) == 0);
}
}
case objNull: {
if (objB->getType() != objNull) {
return false;
} else {
return true;
}
}
case objArray: {
if (objB->getType() != objArray) {
return false;
} else {
Array *arrayA = objA->getArray();
Array *arrayB = objB->getArray();
const int length = arrayA->getLength();
if (arrayB->getLength() != length) {
return false;
} else {
for (int i = 0; i < length; ++i) {
const Object &elemA = arrayA->getNF(i);
const Object &elemB = arrayB->getNF(i);
if (!compareObjects(&elemA, &elemB)) {
return false;
}
}
return true;
}
}
}
case objDict: {
if (objB->getType() != objDict) {
return false;
} else {
Dict *dictA = objA->getDict();
Dict *dictB = objB->getDict();
return compareDictionaries(dictA, dictB);
}
}
case objStream: {
if (objB->getType() != objStream) {
return false;
} else {
Stream *streamA = objA->getStream();
Stream *streamB = objB->getStream();
if (!compareDictionaries(streamA->getDict(), streamB->getDict())) {
return false;
} else {
int c;
streamA->reset();
streamB->reset();
do {
c = streamA->getChar();
if (c != streamB->getChar()) {
return false;
}
} while (c != EOF);
return true;
}
}
return true;
}
case objRef: {
if (objB->getType() != objRef) {
return false;
} else {
const Ref refA = objA->getRef();
const Ref refB = objB->getRef();
return refA == refB;
}
}
default: {
fprintf(stderr, "compareObjects failed: unexpected object type %u\n", objA->getType());
return false;
}
}
}
static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc)
{
bool result = true;
XRef *origXRef = origDoc->getXRef();
XRef *newXRef = newDoc->getXRef();
// Make sure that special flags are set in both documents
origXRef->scanSpecialFlags();
newXRef->scanSpecialFlags();
// Compare XRef tables' size
const int origNumObjects = origXRef->getNumObjects();
const int newNumObjects = newXRef->getNumObjects();
if (forceIncremental && origXRef->isXRefStream()) {
// In case of incremental update, expect a new entry to be appended to store the new XRef stream
if (origNumObjects + 1 != newNumObjects) {
fprintf(stderr, "XRef table: Unexpected number of entries (%d+1 != %d)\n", origNumObjects, newNumObjects);
result = false;
}
} else {
// In all other cases the number of entries must be the same
if (origNumObjects != newNumObjects) {
fprintf(stderr, "XRef table: Different number of entries (%d != %d)\n", origNumObjects, newNumObjects);
result = false;
}
}
// Compare each XRef entry
const int numObjects = (origNumObjects < newNumObjects) ? origNumObjects : newNumObjects;
for (int i = 0; i < numObjects; ++i) {
XRefEntryType origType = origXRef->getEntry(i)->type;
XRefEntryType newType = newXRef->getEntry(i)->type;
const int origGenNum = (origType != xrefEntryCompressed) ? origXRef->getEntry(i)->gen : 0;
const int newGenNum = (newType != xrefEntryCompressed) ? newXRef->getEntry(i)->gen : 0;
// Check that DontRewrite entries are freed in full rewrite mode
if (!forceIncremental && origXRef->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
if (newType != xrefEntryFree || origGenNum + 1 != newGenNum) {
fprintf(stderr, "XRef entry %u: DontRewrite entry was not freed correctly\n", i);
result = false;
}
continue; // There's nothing left to check for this entry
}
// Compare generation numbers
// Object num 0 should always have gen 65535 according to specs, but some
// documents have it set to 0. We always write 65535 in output
if (i != 0) {
if (origGenNum != newGenNum) {
fprintf(stderr, "XRef entry %u: generation numbers differ (%d != %d)\n", i, origGenNum, newGenNum);
result = false;
continue;
}
} else {
if (newGenNum != 65535) {
fprintf(stderr, "XRef entry %u: generation number was expected to be 65535 (%d != 65535)\n", i, newGenNum);
result = false;
continue;
}
}
// Compare object flags. A failure shows that there's some error in XRef::scanSpecialFlags()
if (origXRef->getEntry(i)->flags != newXRef->getEntry(i)->flags) {
fprintf(stderr, "XRef entry %u: flags detected by scanSpecialFlags differ (%d != %d)\n", i, origXRef->getEntry(i)->flags, newXRef->getEntry(i)->flags);
result = false;
}
// Check that either both are free or both are in use
if ((origType == xrefEntryFree) != (newType == xrefEntryFree)) {
const char *origStatus = (origType == xrefEntryFree) ? "free" : "in use";
const char *newStatus = (newType == xrefEntryFree) ? "free" : "in use";
fprintf(stderr, "XRef entry %u: usage status differs (%s != %s)\n", i, origStatus, newStatus);
result = false;
continue;
}
// Skip free entries
if (origType == xrefEntryFree) {
continue;
}
// Compare contents
Object origObj = origXRef->fetch(i, origGenNum);
Object newObj = newXRef->fetch(i, newGenNum);
if (!compareObjects(&origObj, &newObj)) {
fprintf(stderr, "XRef entry %u: contents differ\n", i);
result = false;
}
}
return result;
}