blob: 24366ab014c1c8e222ae8020dbbf85367e69cdd8 [file] [log] [blame]
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkPDFDocumentPriv.h"
#include "SkPDFTag.h"
// Table 333 in PDF 32000-1:2008
static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
switch (type) {
#define M(X) case SkPDF::DocumentStructureType::k ## X: return #X
M(Document);
M(Part);
M(Art);
M(Sect);
M(Div);
M(BlockQuote);
M(Caption);
M(TOC);
M(TOCI);
M(Index);
M(NonStruct);
M(Private);
M(H);
M(H1);
M(H2);
M(H3);
M(H4);
M(H5);
M(H6);
M(P);
M(L);
M(LI);
M(Lbl);
M(LBody);
M(Table);
M(TR);
M(TH);
M(TD);
M(THead);
M(TBody);
M(TFoot);
M(Span);
M(Quote);
M(Note);
M(Reference);
M(BibEntry);
M(Code);
M(Link);
M(Annot);
M(Ruby);
M(RB);
M(RT);
M(RP);
M(Warichu);
M(WT);
M(WP);
M(Figure);
M(Formula);
M(Form);
#undef M
}
SK_ABORT("bad tag");
return "";
}
struct SkPDFTagNode {
SkPDFTagNode* fChildren = nullptr;
size_t fChildCount = 0;
struct MarkedContentInfo {
unsigned fPageIndex;
int fMarkId;
};
SkTArray<MarkedContentInfo> fMarkedContent;
int fNodeId;
SkPDF::DocumentStructureType fType;
SkPDFIndirectReference fRef;
enum State {
kUnknown,
kYes,
kNo,
} fCanDiscard = kUnknown;
};
SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
SkPDFTagTree::~SkPDFTagTree() = default;
static void copy(const SkPDF::StructureElementNode& node,
SkPDFTagNode* dst,
SkArenaAlloc* arena,
SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
nodeMap->set(node.fNodeId, dst);
size_t childCount = node.fChildCount;
SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
dst->fChildCount = childCount;
dst->fNodeId = node.fNodeId;
dst->fType = node.fType;
dst->fChildren = children;
for (size_t i = 0; i < childCount; ++i) {
copy(node.fChildren[i], &children[i], arena, nodeMap);
}
}
void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) {
if (node) {
fRoot = fArena.make<SkPDFTagNode>();
copy(*node, fRoot, &fArena, &fNodeMap);
}
}
void SkPDFTagTree::reset() {
fArena.reset();
fNodeMap.reset();
fMarksPerPage.reset();
fRoot = nullptr;
}
int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
if (!fRoot) {
return -1;
}
SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
if (!tagPtr) {
return -1;
}
SkPDFTagNode* tag = *tagPtr;
SkASSERT(tag);
while (fMarksPerPage.size() < pageIndex + 1) {
fMarksPerPage.push_back();
}
SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
int markId = pageMarks.count();
tag->fMarkedContent.push_back({pageIndex, markId});
pageMarks.push_back(tag);
return markId;
}
static bool can_discard(SkPDFTagNode* node) {
if (node->fCanDiscard == SkPDFTagNode::kYes) {
return true;
}
if (node->fCanDiscard == SkPDFTagNode::kNo) {
return false;
}
if (!node->fMarkedContent.empty()) {
node->fCanDiscard = SkPDFTagNode::kNo;
return false;
}
for (size_t i = 0; i < node->fChildCount; ++i) {
if (!can_discard(&node->fChildren[i])) {
node->fCanDiscard = SkPDFTagNode::kNo;
return false;
}
}
node->fCanDiscard = SkPDFTagNode::kYes;
return true;
}
SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
SkPDFTagNode* node,
SkPDFDocument* doc) {
SkPDFIndirectReference ref = doc->reserveRef();
std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
SkPDFTagNode* children = node->fChildren;
size_t childCount = node->fChildCount;
for (size_t i = 0; i < childCount; ++i) {
SkPDFTagNode* child = &children[i];
if (!(can_discard(child))) {
kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
}
}
for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
mcr->insertInt("MCID", info.fMarkId);
kids->appendObject(std::move(mcr));
}
node->fRef = ref;
SkPDFDict dict("StructElem");
dict.insertName("S", tag_name_from_type(node->fType));
dict.insertRef("P", parent);
dict.insertObject("K", std::move(kids));
return doc->emit(dict, ref);
}
SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
if (!fRoot) {
return SkPDFIndirectReference();
}
if (can_discard(fRoot)) {
SkDEBUGFAIL("PDF has tag tree but no marked content.");
}
SkPDFIndirectReference ref = doc->reserveRef();
unsigned pageCount = SkToUInt(doc->pageCount());
// Build the StructTreeRoot.
SkPDFDict structTreeRoot("StructTreeRoot");
structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
// Build the parent tree, which is a mapping from the marked
// content IDs on each page to their corressponding tags.
SkPDFDict parentTree("ParentTree");
auto parentTreeNums = SkPDFMakeArray();
SkASSERT(fMarksPerPage.size() <= pageCount);
for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
SkPDFArray markToTagArray;
for (SkPDFTagNode* mark : pageMarks) {
SkASSERT(mark->fRef);
markToTagArray.appendRef(mark->fRef);
}
parentTreeNums->appendInt(j);
parentTreeNums->appendRef(doc->emit(markToTagArray));
}
parentTree.insertObject("Nums", std::move(parentTreeNums));
structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
return doc->emit(structTreeRoot, ref);
}