blob: 99ddbfe548c6ca0b7702f66d8cd1ba73ace33a39 [file] [log] [blame]
//========================================================================
//
// StructTreeRoot.cc
//
// This file is licensed under the GPLv2 or later
//
// Copyright 2013, 2014 Igalia S.L.
// Copyright 2014 Fabio D'Urso <fabiodurso@hotmail.it>
// Copyright 2017 Jan-Erik S <janerik234678@gmail.com>
// Copyright 2017-2019, 2023 Albert Astals Cid <aacid@kde.org>
// Copyright 2017, 2018 Adrian Johnson <ajohnson@redneon.com>
// Copyright 2018, Adam Reichold <adam.reichold@t-online.de>
//
//========================================================================
#include "goo/GooString.h"
#include "StructTreeRoot.h"
#include "StructElement.h"
#include "PDFDoc.h"
#include "Object.h"
#include "Dict.h"
#include <set>
#include <cassert>
StructTreeRoot::StructTreeRoot(PDFDoc *docA, Dict *structTreeRootDict) : doc(docA)
{
assert(doc);
assert(structTreeRootDict);
parse(structTreeRootDict);
}
StructTreeRoot::~StructTreeRoot()
{
for (StructElement *element : elements) {
delete element;
}
}
void StructTreeRoot::parse(Dict *root)
{
// The RoleMap/ClassMap dictionaries are needed by all the parsing
// functions, which will resolve the custom names to canonical
// standard names.
roleMap = root->lookup("RoleMap");
classMap = root->lookup("ClassMap");
// ParentTree (optional). If present, it must be a number tree,
// otherwise it is not possible to map stream objects to their
// corresponding structure element. Here only the references are
// loaded into the array, the pointers to the StructElements will
// be filled-in later when parsing them.
const Object parentTreeObj = root->lookup("ParentTree");
if (parentTreeObj.isDict()) {
parseNumberTreeNode(parentTreeObj.getDict());
}
RefRecursionChecker seenElements;
// Parse the children StructElements
const bool marked = doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked;
Object kids = root->lookup("K");
if (kids.isArray()) {
if (marked && kids.arrayGetLength() > 1) {
error(errSyntaxWarning, -1, "K in StructTreeRoot has more than one children in a tagged PDF");
}
for (int i = 0; i < kids.arrayGetLength(); i++) {
const Object &ref = kids.arrayGetNF(i);
if (ref.isRef()) {
seenElements.insert(ref.getRef());
}
Object obj = kids.arrayGet(i);
if (obj.isDict()) {
StructElement *child = new StructElement(obj.getDict(), this, nullptr, seenElements);
if (child->isOk()) {
if (marked && !(child->getType() == StructElement::Document || child->getType() == StructElement::Part || child->getType() == StructElement::Art || child->getType() == StructElement::Div)) {
error(errSyntaxWarning, -1, "StructTreeRoot element of tagged PDF is wrong type ({0:s})", child->getTypeName());
}
appendChild(child);
if (ref.isRef()) {
parentTreeAdd(ref.getRef(), child);
}
} else {
error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed");
delete child;
}
} else {
error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", obj.getTypeName());
}
}
} else if (kids.isDict()) {
StructElement *child = new StructElement(kids.getDict(), this, nullptr, seenElements);
if (child->isOk()) {
appendChild(child);
const Object &ref = root->lookupNF("K");
if (ref.isRef()) {
parentTreeAdd(ref.getRef(), child);
}
} else {
error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed");
delete child;
}
} else if (!kids.isNull()) {
error(errSyntaxWarning, -1, "K in StructTreeRoot is wrong type ({0:s})", kids.getTypeName());
}
// refToParentMap is only used during parsing. Ensure all memory used by it is freed.
std::multimap<Ref, Parent *>().swap(refToParentMap);
}
void StructTreeRoot::parseNumberTreeNode(Dict *node)
{
Object kids = node->lookup("Kids");
if (kids.isArray()) {
for (int i = 0; i < kids.arrayGetLength(); i++) {
Object obj = kids.arrayGet(i);
if (obj.isDict()) {
parseNumberTreeNode(obj.getDict());
} else {
error(errSyntaxError, -1, "Kids item at position {0:d} is wrong type ({1:s})", i, obj.getTypeName());
}
}
return;
} else if (!kids.isNull()) {
error(errSyntaxError, -1, "Kids object is wrong type ({0:s})", kids.getTypeName());
}
Object nums = node->lookup("Nums");
if (nums.isArray()) {
if (nums.arrayGetLength() % 2 == 0) {
// keys in even positions, references in odd ones
for (int i = 0; i < nums.arrayGetLength(); i += 2) {
Object key = nums.arrayGet(i);
if (!key.isInt()) {
error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i, key.getTypeName());
continue;
}
int keyVal = key.getInt();
std::vector<Parent> &vec = parentTree[keyVal];
Object valueArray = nums.arrayGet(i + 1);
if (valueArray.isArray()) {
vec.resize(valueArray.arrayGetLength());
for (int j = 0; j < valueArray.arrayGetLength(); j++) {
const Object &itemvalue = valueArray.arrayGetNF(j);
if (itemvalue.isRef()) {
Ref ref = itemvalue.getRef();
vec[j].ref = ref;
refToParentMap.insert(std::pair<Ref, Parent *>(ref, &vec[j]));
} else if (!itemvalue.isNull()) {
error(errSyntaxError, -1, "Nums array item at position {0:d}/{1:d} is invalid type ({2:s})", i, j, itemvalue.getTypeName());
}
}
} else {
const Object &valueRef = nums.arrayGetNF(i + 1);
if (valueRef.isRef()) {
Ref ref = valueRef.getRef();
vec.resize(1);
vec[0].ref = ref;
refToParentMap.insert(std::pair<Ref, Parent *>(ref, &vec[0]));
} else {
error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i + 1, valueRef.getTypeName());
}
}
}
} else {
error(errSyntaxError, -1, "Nums array length is not a even ({0:d})", nums.arrayGetLength());
}
} else {
error(errSyntaxError, -1, "Nums object is wrong type ({0:s})", nums.getTypeName());
}
}
void StructTreeRoot::parentTreeAdd(const Ref objectRef, StructElement *element)
{
auto range = refToParentMap.equal_range(objectRef);
for (auto it = range.first; it != range.second; ++it) {
it->second->element = element;
}
}