| /* |
| * Copyright 2018 Google LLC |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #include "src/pdf/SkPDFTag.h" |
| |
| #include "include/core/SkPoint.h" |
| #include "include/core/SkScalar.h" |
| #include "include/private/base/SkAssert.h" |
| #include "include/private/base/SkDebug.h" |
| #include "include/private/base/SkTo.h" |
| #include "src/base/SkZip.h" |
| #include "src/pdf/SkPDFDocumentPriv.h" |
| |
| #include <algorithm> |
| #include <compare> |
| #include <memory> |
| #include <ranges> |
| #include <utility> |
| #include <vector> |
| |
| using namespace skia_private; |
| |
| namespace { |
| struct Location { |
| SkPoint fPoint{SK_ScalarNaN, SK_ScalarNaN}; |
| unsigned fPageIndex{0}; |
| |
| void accumulate(Location const& child) { |
| if (!child.fPoint.isFinite()) { |
| return; |
| } |
| if (!fPoint.isFinite()) { |
| *this = child; |
| return; |
| } |
| if (child.fPageIndex < fPageIndex) { |
| *this = child; |
| return; |
| } |
| if (child.fPageIndex == fPageIndex) { |
| fPoint.fX = std::min(child.fPoint.fX, fPoint.fX); |
| fPoint.fY = std::max(child.fPoint.fY, fPoint.fY); // PDF y-up |
| return; |
| } |
| } |
| }; |
| } // namespace |
| |
| struct SkPDFStructElem { |
| // Structure elements (/StructElem) may have an element identifier (/ID) which is a byte string. |
| // Element identifiers are used by attributes (/StructElem /A) to refer to structure elements. |
| // The mapping from element identifier to structure element is emitted in the /IDTree. |
| // Element identifiers are stored as an integer (elemId) and this method creates a byte string. |
| // Since the /IDTree is a name tree the element identifier keys must be ordered; |
| // the digits are zero-padded so that lexicographic order matches numeric order. |
| static SkString StringFromElemId(int elemId) { |
| SkString elemIdString; |
| elemIdString.printf("node%08d", elemId); |
| return elemIdString; |
| } |
| |
| SkPDFStructElem* fParent = nullptr; |
| SkSpan<SkPDFStructElem> fChildren; |
| struct MarkedContentInfo { |
| Location fLocation; |
| int fMcid; |
| SkPDFParentTreeKey fStructParentsKey; |
| }; |
| std::vector<MarkedContentInfo> fMarkedContent; |
| int fElemId = 0; |
| bool fWantTitle = false; |
| bool fUsed = false; |
| bool fUsedInIDTree = false; |
| SkString fStructType; |
| SkString fTitle; |
| SkString fAlt; |
| SkString fLang; |
| SkPDFIndirectReference fRef; |
| std::unique_ptr<SkPDFArray> fAttributes; |
| std::vector<int> fAttributeElemIds; |
| struct ContentItemInfo { |
| unsigned fPageIndex; |
| SkPDFParentTreeKey fStructParentKey; |
| }; |
| std::vector<ContentItemInfo> fContentItems; |
| |
| void setUsed(const THashMap<int, SkPDFStructElem*>& structElemForElemId) { |
| if (fUsed) { |
| return; |
| } |
| // First to avoid possible cycles. |
| fUsed = true; |
| // Any StructElem referenced by an attribute is used. |
| for (int elemId : fAttributeElemIds) { |
| SkPDFStructElem** structElemPtr = structElemForElemId.find(elemId); |
| if (!structElemPtr) { |
| continue; |
| } |
| SkPDFStructElem* structElem = *structElemPtr; |
| SkASSERT(structElem); |
| structElem->setUsed(structElemForElemId); |
| structElem->fUsedInIDTree = true; |
| } |
| // The parent StructElem is used. |
| if (fParent) { |
| fParent->setUsed(structElemForElemId); |
| } |
| } |
| |
| class ContentIndex { |
| SkPDFParentTreeKey fParentId; |
| int fMcid; |
| public: |
| ContentIndex() : fParentId(), fMcid(0) {} |
| ContentIndex(const MarkedContentInfo& mci) |
| : fParentId(mci.fStructParentsKey), fMcid(mci.fMcid) {} |
| ContentIndex(const ContentItemInfo& cii) |
| : fParentId(cii.fStructParentKey), fMcid(0) {} |
| bool valid() const { return static_cast<bool>(fParentId); } |
| std::strong_ordering operator<=>(const ContentIndex&) const = default; |
| }; |
| class ContentSpan { |
| struct Data { |
| ContentIndex fFirst; |
| ContentIndex fLast; |
| bool operator==(const Data&) const = default; |
| }; |
| std::optional<Data> fData; |
| public: |
| ContentSpan() = default; |
| ContentSpan(const ContentSpan&) = default; |
| ContentSpan& operator=(const ContentSpan&) = default; |
| bool operator==(const ContentSpan& that) const = default; |
| bool empty() const { return !fData.has_value(); } |
| const ContentIndex& first() const { return fData->fFirst; } |
| const ContentIndex& last() const { return fData->fLast; } |
| void accumulate(const ContentIndex& ci) { |
| if (!ci.valid()) { |
| return; |
| } |
| if (this->empty()) { |
| fData.emplace(ci, ci); |
| return; |
| } |
| if (ci < fData->fFirst) { |
| fData->fFirst = ci; |
| } |
| if (fData->fLast < ci) { |
| fData->fLast = ci; |
| } |
| } |
| void accumulate(const ContentSpan& cs) { |
| if (cs.empty()) { |
| return; |
| } |
| this->accumulate(cs.first()); |
| this->accumulate(cs.last()); |
| } |
| }; |
| SkPDFIndirectReference emitStructElem(const SkPDFStructTree& structTree, |
| SkPDFIndirectReference parent, |
| std::vector<SkPDFStructTree::IDTreeEntry>* idTree, |
| SkPDFDocument* doc, |
| ContentSpan&); |
| }; |
| |
| SkPDF::AttributeList::AttributeList() = default; |
| |
| SkPDF::AttributeList::~AttributeList() = default; |
| |
| void SkPDF::AttributeList::appendInt(const char* owner, const char* name, int value) { |
| if (!fAttrs) { |
| fAttrs = SkPDFMakeArray(); |
| } |
| std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
| attrDict->insertName("O", owner); |
| attrDict->insertInt(name, value); |
| fAttrs->appendObject(std::move(attrDict)); |
| } |
| |
| void SkPDF::AttributeList::appendFloat(const char* owner, const char* name, float value) { |
| if (!fAttrs) { |
| fAttrs = SkPDFMakeArray(); |
| } |
| std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
| attrDict->insertName("O", owner); |
| attrDict->insertScalar(name, value); |
| fAttrs->appendObject(std::move(attrDict)); |
| } |
| |
| void SkPDF::AttributeList::appendName(const char* owner, const char* name, const char* value) { |
| if (!fAttrs) { |
| fAttrs = SkPDFMakeArray(); |
| } |
| std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
| attrDict->insertName("O", owner); |
| attrDict->insertName(name, value); |
| fAttrs->appendObject(std::move(attrDict)); |
| } |
| |
| void SkPDF::AttributeList::appendTextString(const char* owner, const char* name, const char* value){ |
| if (!fAttrs) { |
| fAttrs = SkPDFMakeArray(); |
| } |
| std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
| attrDict->insertName("O", owner); |
| attrDict->insertTextString(name, value); |
| fAttrs->appendObject(std::move(attrDict)); |
| } |
| |
| void SkPDF::AttributeList::appendTextString(const char* owner, const char* name, SkString value) { |
| if (!fAttrs) { |
| fAttrs = SkPDFMakeArray(); |
| } |
| std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
| attrDict->insertName("O", owner); |
| attrDict->insertTextString(name, std::move(value)); |
| fAttrs->appendObject(std::move(attrDict)); |
| } |
| |
| void SkPDF::AttributeList::appendFloatArray(const char* owner, const char* name, |
| SkSpan<const float> value) { |
| if (!fAttrs) { |
| fAttrs = SkPDFMakeArray(); |
| } |
| std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
| attrDict->insertName("O", owner); |
| std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray(); |
| for (float element : value) { |
| pdfArray->appendScalar(element); |
| } |
| attrDict->insertObject(name, std::move(pdfArray)); |
| fAttrs->appendObject(std::move(attrDict)); |
| } |
| |
| void SkPDF::AttributeList::appendNodeIdArray(const char* owner, const char* name, |
| SkSpan<const int> elemIds) { |
| if (!fAttrs) { |
| fAttrs = SkPDFMakeArray(); |
| } |
| // Keep the element identifiers so we can mark their targets as used (and needing /ID) later. |
| fElemIds.insert(fElemIds.end(), elemIds.begin(), elemIds.end()); |
| std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
| attrDict->insertName("O", owner); |
| std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray(); |
| for (int elemId : elemIds) { |
| pdfArray->appendByteString(SkPDFStructElem::StringFromElemId(elemId)); |
| } |
| attrDict->insertObject(name, std::move(pdfArray)); |
| fAttrs->appendObject(std::move(attrDict)); |
| } |
| |
| SkPDFStructTree::SkPDFStructTree(SkPDF::StructureElementNode* node, |
| SkPDF::Metadata::Outline outline) |
| : fArena(4 * sizeof(SkPDFStructElem)) |
| { |
| if (node) { |
| fRoot = fArena.make<SkPDFStructElem>(); |
| fOutline = outline; |
| this->move(*node, fRoot, false); |
| } |
| } |
| |
| SkPDFStructTree::~SkPDFStructTree() = default; |
| |
| void SkPDFStructTree::move(SkPDF::StructureElementNode& node, |
| SkPDFStructElem* structElem, |
| bool wantTitle) { |
| constexpr bool kDumpStructureTree = false; |
| if constexpr (kDumpStructureTree) { |
| int indent = 0; |
| for (SkPDFStructElem* parent = structElem->fParent; parent; parent = parent->fParent) { |
| ++indent; |
| } |
| SkString attrIds; |
| if (!node.fAttributes.fElemIds.empty()) { |
| attrIds.append(" ["); |
| for (int attrId : node.fAttributes.fElemIds) { |
| attrIds.appendS32(attrId); |
| attrIds.append(","); |
| } |
| *(attrIds.end() - 1) = ']'; |
| } |
| SkDebugf("%.*s %d %s%s\n", |
| indent, " ", node.fNodeId, node.fTypeString.c_str(), attrIds.c_str()); |
| } |
| |
| structElem->fElemId = node.fNodeId; |
| fStructElemForElemId.set(structElem->fElemId, structElem); |
| |
| // Accumulate title text, need to be in sync with create_outline_from_headers |
| const SkString& type = node.fTypeString; |
| wantTitle |= fOutline == SkPDF::Metadata::Outline::StructureElementHeaders && |
| type.size() == 2 && type[0] == 'H' && '1' <= type[1] && type[1] <= '6'; |
| structElem->fWantTitle = wantTitle; |
| |
| static SkString nonStruct("NonStruct"); |
| structElem->fStructType = node.fTypeString.isEmpty() ? nonStruct : std::move(node.fTypeString); |
| structElem->fAlt = std::move(node.fAlt); |
| structElem->fLang = std::move(node.fLang); |
| |
| size_t childCount = node.fChildVector.size(); |
| structElem->fChildren = SkSpan(fArena.makeArray<SkPDFStructElem>(childCount), childCount); |
| for (auto&& [nodeChild, elemChild] : SkMakeZip(node.fChildVector, structElem->fChildren)) { |
| elemChild.fParent = structElem; |
| this->move(*nodeChild, &elemChild, wantTitle); |
| } |
| |
| structElem->fAttributes = std::move(node.fAttributes.fAttrs); |
| structElem->fAttributeElemIds = std::move(node.fAttributes.fElemIds); |
| } |
| |
| int SkPDFStructTree::Mark::elemId() const { |
| return fStructElem ? fStructElem->fElemId : 0; |
| } |
| |
| SkString SkPDFStructTree::Mark::structType() const { |
| SkASSERT(bool(*this)); |
| return fStructElem->fStructType; |
| } |
| |
| int SkPDFStructTree::Mark::mcid() const { |
| return fStructElem ? fStructElem->fMarkedContent[fMarkIndex].fMcid : -1; |
| } |
| |
| void SkPDFStructTree::Mark::accumulate(SkPoint point) { |
| SkASSERT(bool(*this)); |
| Location& location = fStructElem->fMarkedContent[fMarkIndex].fLocation; |
| return location.accumulate({{point}, location.fPageIndex}); |
| } |
| |
| auto SkPDFStructTree::createMarkForElemId(int elemId, unsigned pageIndex, |
| SkPDFParentTreeKey& structParentsKey) -> Mark |
| { |
| if (!fRoot) { |
| return Mark(); |
| } |
| SkPDFStructElem** structElemPtr = fStructElemForElemId.find(elemId); |
| if (!structElemPtr) { |
| return Mark(); |
| } |
| SkPDFStructElem* structElem = *structElemPtr; |
| SkASSERT(structElem); |
| |
| if (fParentTree.size() <= structParentsKey.fValue) { |
| return Mark(); |
| } |
| if (!structParentsKey) { |
| structParentsKey.fValue = fParentTree.size(); |
| fParentTree.push_back(Stream()); |
| } |
| ParentTreeEntry& entry = fParentTree[structParentsKey.fValue]; |
| if (!std::holds_alternative<Stream>(entry)) { |
| return Mark(); |
| } |
| |
| structElem->setUsed(fStructElemForElemId); |
| |
| TArray<SkPDFStructElem*>& structElemForMcid = std::get<Stream>(entry).fChildren; |
| int mcid = structElemForMcid.size(); |
| SkASSERT(structElem->fMarkedContent.empty() || |
| structElem->fMarkedContent.back().fLocation.fPageIndex <= pageIndex); |
| structElem->fMarkedContent.push_back({{{SK_ScalarNaN, SK_ScalarNaN}, pageIndex}, |
| mcid, structParentsKey}); |
| structElemForMcid.push_back(structElem); |
| return Mark(structElem, structElem->fMarkedContent.size() - 1); |
| } |
| |
| void SkPDFStructTree::setContentStreamRefForStructParentsKey( |
| SkPDFParentTreeKey structParentsKey, SkPDFIndirectReference contentStreamRef) |
| { |
| if (structParentsKey.fValue < 0 || fParentTree.size() <= structParentsKey.fValue) { |
| return; |
| } |
| ParentTreeEntry& entry = fParentTree[structParentsKey.fValue]; |
| if (!std::holds_alternative<Stream>(entry)) { |
| return; |
| } |
| std::get<Stream>(entry).fContentStreamRef = contentStreamRef; |
| } |
| |
| SkPDFIndirectReference SkPDFStructTree::getContentStreamRefForStructParentsKey( |
| SkPDFParentTreeKey structParentsKey) const |
| { |
| if (structParentsKey.fValue < 0 || fParentTree.size() <= structParentsKey.fValue) { |
| return SkPDFIndirectReference(); |
| } |
| const ParentTreeEntry& entry = fParentTree[structParentsKey.fValue]; |
| if (!std::holds_alternative<Stream>(entry)) { |
| return SkPDFIndirectReference(); |
| } |
| return std::get<Stream>(entry).fContentStreamRef; |
| } |
| |
| SkPDFParentTreeKey SkPDFStructTree::createStructParentKeyForElemId( |
| int elemId, unsigned pageIndex, SkPDFIndirectReference contentItemRef) |
| { |
| if (!fRoot) { |
| return SkPDFParentTreeKey(); |
| } |
| SkPDFStructElem** structElemPtr = fStructElemForElemId.find(elemId); |
| if (!structElemPtr) { |
| return SkPDFParentTreeKey(); |
| } |
| SkPDFStructElem* structElem = *structElemPtr; |
| SkASSERT(structElem); |
| |
| structElem->setUsed(fStructElemForElemId); |
| |
| SkPDFParentTreeKey structParentKey{fParentTree.size()}; |
| SkPDFStructElem::ContentItemInfo contentItemInfo = {pageIndex, structParentKey}; |
| structElem->fContentItems.push_back(contentItemInfo); |
| fParentTree.emplace_back(Item{structElem, contentItemRef}); |
| return structParentKey; |
| } |
| |
| SkPDFIndirectReference SkPDFStructTree::getContentItemRefForStructParentKey( |
| SkPDFParentTreeKey structParentKey) const |
| { |
| if (structParentKey.fValue < 0 || fParentTree.size() <= structParentKey.fValue) { |
| return SkPDFIndirectReference(); |
| } |
| const ParentTreeEntry& entry = fParentTree[structParentKey.fValue]; |
| if (!std::holds_alternative<Item>(entry)) { |
| return SkPDFIndirectReference(); |
| } |
| return std::get<Item>(entry).fContentItemRef; |
| } |
| |
| SkPDFIndirectReference SkPDFStructElem::emitStructElem( |
| const SkPDFStructTree& structTree, |
| SkPDFIndirectReference parent, |
| std::vector<SkPDFStructTree::IDTreeEntry>* idTree, |
| SkPDFDocument* doc, |
| ContentSpan& contentSpan) |
| { |
| fRef = doc->reserveRef(); |
| |
| SkPDFDict dict("StructElem"); |
| dict.insertName("S", fStructType); |
| |
| if (!fAlt.isEmpty()) { |
| dict.insertTextString("Alt", fAlt); |
| } |
| if (!fLang.isEmpty()) { |
| dict.insertTextString("Lang", fLang); |
| } |
| dict.insertRef("P", parent); |
| |
| { // K |
| // Need to emit the kids in order. There are three kinds of kids: |
| // 1. children (structure elements, in user order, have marked content and content items) |
| // 2. marked content (drawing, sort by {struct parent key, marked content id}) |
| // 3. content items (currently just annotations, {struct parent key, 0}) |
| // The children must be emitted in the order specified by the user. |
| // The marked content and content items must be emitted in the order they were drawn. |
| // If all the kid content is well ordered (no child span overlapping with anything else) and |
| // that order matches the user specified order of children then there is a "good" order. |
| // But any form of overlap is possible so there may not be a "good" order. |
| // In other words, the structure tree is an ordered hierarchy but the user can draw items |
| // and associate them with structure tree entries in any order. If the content isn't |
| // hierarchical it won't fit well into the structure tree. So try to find a least-bad order. |
| // |
| // The strategy used here is: |
| // 1. Merge all overlapping child spans to order the children. |
| // 2. Emit the each next child, marked content, or content item. |
| // Empty children are emitted first then compare by ContentIndex. |
| |
| // Emit the children, collect their spans, then adjust the spans |
| struct ChildSpan { |
| ContentSpan fContentSpan; |
| SkPDFIndirectReference fRef; |
| }; |
| std::vector<ChildSpan> childSpans; |
| for (auto&& child : fChildren) { |
| if (child.fUsed) { |
| ChildSpan& childSpan = childSpans.emplace_back(); |
| childSpan.fRef = child.emitStructElem(structTree, fRef, idTree, doc, |
| childSpan.fContentSpan); |
| } |
| } |
| if (childSpans.size() > 1) { |
| std::optional<ContentIndex> minFirstAfter; |
| for (auto&& childSpan : std::views::reverse(childSpans)) { |
| if (childSpan.fContentSpan.empty()) { |
| // Let empty child spans remain empty |
| continue; |
| } |
| if (!minFirstAfter.has_value() || childSpan.fContentSpan.first() <= minFirstAfter) { |
| // This child span starts before all subsequent child spans, everything is fine. |
| minFirstAfter = childSpan.fContentSpan.first(); |
| continue; |
| } |
| // This is a non-empty span which currently starts after a subsequent child span. |
| childSpan.fContentSpan.accumulate(minFirstAfter.value()); |
| } |
| } |
| SkDEBUGCODE( |
| // Postcondition: spans are empty or start after the all previous spans. |
| std::optional<ContentIndex> maxFirstSeenSoFar; |
| for (auto&& childSpan : childSpans) { |
| if (childSpan.fContentSpan.empty()) { |
| continue; |
| } |
| if (!maxFirstSeenSoFar.has_value()) { |
| maxFirstSeenSoFar = childSpan.fContentSpan.first(); |
| } |
| SkASSERT( |
| childSpan.fContentSpan.empty() || |
| maxFirstSeenSoFar <= childSpan.fContentSpan.first() |
| ); |
| maxFirstSeenSoFar = std::max(maxFirstSeenSoFar.value(), |
| childSpan.fContentSpan.first()); |
| } |
| ) |
| |
| // Setup the marked content |
| unsigned longestPage = 0; |
| if (!fMarkedContent.empty()) { |
| // Use the mode page as /Pg and use integer mcid for marks on that page. |
| // SkPDFStructElem::fMarkedContent is already sorted by page, since it is append only in |
| // createMarkForElemId where pageIndex is the monotonically increasing current page. |
| size_t longestRun = 0; |
| size_t currentRun = 0; |
| unsigned currentPage = 0; |
| for (const SkPDFStructElem::MarkedContentInfo& info : fMarkedContent) { |
| unsigned thisPage = info.fLocation.fPageIndex; |
| if (currentPage != thisPage) { |
| SkASSERT(currentPage < thisPage); |
| currentPage = thisPage; |
| currentRun = 0; |
| } |
| ++currentRun; |
| if (longestRun < currentRun) { |
| longestRun = currentRun; |
| longestPage = currentPage; |
| } |
| } |
| dict.insertRef("Pg", doc->getPage(longestPage)); |
| } |
| |
| std::unique_ptr<SkPDFArray> kids(new SkPDFOptionalArray()); |
| auto markedContent = fMarkedContent.begin(); |
| auto contentItem = fContentItems.begin(); |
| auto childSpan = childSpans.begin(); |
| while (markedContent != fMarkedContent.end() || |
| contentItem != fContentItems.end() || |
| childSpan != childSpans.end()) |
| { |
| ContentIndex mci = markedContent == fMarkedContent.end() |
| ? ContentIndex() |
| : ContentIndex(*markedContent); |
| ContentIndex cii = contentItem == fContentItems.end() |
| ? ContentIndex() |
| : ContentIndex(*contentItem); |
| |
| if (childSpan != childSpans.end() && ( |
| childSpan->fContentSpan.empty() || |
| ((!mci.valid() || childSpan->fContentSpan.first() <= mci) && |
| (!cii.valid() || childSpan->fContentSpan.first() <= cii)))) |
| { |
| kids->appendRef(childSpan->fRef); |
| contentSpan.accumulate(childSpan->fContentSpan); |
| ++childSpan; |
| continue; |
| } |
| |
| if (mci.valid() && (!cii.valid() || mci <= cii)) { |
| const SkPDFStructElem::MarkedContentInfo& info = *markedContent; |
| SkPDFIndirectReference contentStreamRef = |
| structTree.getContentStreamRefForStructParentsKey(info.fStructParentsKey); |
| if (info.fLocation.fPageIndex == longestPage && |
| contentStreamRef == SkPDFStructTree::kPageContentStreamRef) |
| { |
| kids->appendInt(info.fMcid); |
| contentSpan.accumulate(info); |
| } else if (contentStreamRef || |
| contentStreamRef == SkPDFStructTree::kPageContentStreamRef) |
| { |
| std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR"); |
| if (info.fLocation.fPageIndex != longestPage) { |
| mcr->insertRef("Pg", doc->getPage(info.fLocation.fPageIndex)); |
| } |
| if (contentStreamRef) { |
| mcr->insertRef("Stm", contentStreamRef); |
| } |
| mcr->insertInt("MCID", info.fMcid); |
| kids->appendObject(std::move(mcr)); |
| contentSpan.accumulate(info); |
| } |
| |
| ++markedContent; |
| continue; |
| } |
| |
| if (cii.valid() && (!mci.valid() || cii <= mci)) { |
| const SkPDFStructElem::ContentItemInfo& info = *contentItem; |
| SkPDFIndirectReference contentItemRef = |
| structTree.getContentItemRefForStructParentKey(info.fStructParentKey); |
| std::unique_ptr<SkPDFDict> contentItemDict = SkPDFMakeDict("OBJR"); |
| contentItemDict->insertRef("Obj", contentItemRef); |
| contentItemDict->insertRef("Pg", doc->getPage(info.fPageIndex)); |
| kids->appendObject(std::move(contentItemDict)); |
| contentSpan.accumulate(info); |
| |
| ++contentItem; |
| continue; |
| } |
| |
| SkASSERT(false); |
| } |
| dict.insertObject("K", std::move(kids)); |
| } |
| |
| if (fAttributes) { |
| dict.insertObject("A", std::move(fAttributes)); |
| } |
| |
| // If this StructElem ID was referenced, add /ID and add it to the IDTree. |
| if (fUsedInIDTree) { |
| dict.insertByteString("ID", SkPDFStructElem::StringFromElemId(fElemId)); |
| idTree->push_back({fElemId, fRef}); |
| } |
| |
| return doc->emit(dict, fRef); |
| } |
| |
| void SkPDFStructTree::addStructElemTitle(int elemId, SkSpan<const char> title) { |
| if (!fRoot) { |
| return; |
| } |
| SkPDFStructElem** structElemPtr = fStructElemForElemId.find(elemId); |
| if (!structElemPtr) { |
| return; |
| } |
| SkPDFStructElem* structElem = *structElemPtr; |
| SkASSERT(structElem); |
| |
| if (structElem->fWantTitle) { |
| structElem->fTitle.append(title.data(), title.size()); |
| // Arbitrary cutoff for size. |
| if (structElem->fTitle.size() > 1023) { |
| structElem->fWantTitle = false; |
| } |
| } |
| } |
| |
| SkPDFIndirectReference SkPDFStructTree::emitStructTreeRoot(SkPDFDocument* doc) const { |
| if (!fRoot || !fRoot->fUsed) { |
| return SkPDFIndirectReference(); |
| } |
| |
| SkPDFIndirectReference structTreeRootRef = doc->reserveRef(); |
| |
| // Build the StructTreeRoot. |
| SkPDFDict structTreeRoot("StructTreeRoot"); |
| std::vector<IDTreeEntry> idTree; |
| SkPDFStructElem::ContentSpan rootContentSpan; |
| structTreeRoot.insertRef("K", fRoot->emitStructElem(*this, structTreeRootRef, &idTree, doc, |
| rootContentSpan)); |
| structTreeRoot.insertInt("ParentTreeNextKey", fParentTree.size()); |
| |
| // Build the parent tree, a number tree which consists of two things: |
| // For each Page or FormXObject with marked content: |
| // key: ?::StructParents |
| // value: array of structure element ref indexed by the page's marked-content identifiers |
| // For each content item (usually an annotation) |
| // key: ?::StructParent |
| // value: structure element ref |
| SkPDFDict parentTree("ParentTree"); |
| auto parentTreeNums = SkPDFMakeArray(); |
| |
| for (int structParentKey = 0; structParentKey < fParentTree.size(); ++structParentKey) { |
| const ParentTreeEntry& entry = fParentTree[structParentKey]; |
| if (std::holds_alternative<Item>(entry)) { |
| parentTreeNums->appendInt(structParentKey); // /StructParent |
| parentTreeNums->appendRef(std::get<Item>(entry).fStructElem->fRef); |
| } else { |
| const Stream& stream = std::get<Stream>(entry); |
| if (stream.fContentStreamRef || stream.fContentStreamRef == kPageContentStreamRef) { |
| SkPDFArray structElemForMcidArray; |
| for (const SkPDFStructElem* structElem : stream.fChildren) { |
| SkASSERT(structElem->fRef); |
| structElemForMcidArray.appendRef(structElem->fRef); |
| } |
| parentTreeNums->appendInt(structParentKey); // /StructParents |
| parentTreeNums->appendRef(doc->emit(structElemForMcidArray)); |
| } |
| } |
| } |
| |
| parentTree.insertObject("Nums", std::move(parentTreeNums)); |
| structTreeRoot.insertRef("ParentTree", doc->emit(parentTree)); |
| |
| // Build the IDTree, a mapping from every unique element identifier byte string to |
| // a reference to its corresponding structure element. |
| if (!idTree.empty()) { |
| std::sort(idTree.begin(), idTree.end(), |
| [](const IDTreeEntry& a, const IDTreeEntry& b) { |
| return a.elemId < b.elemId; |
| }); |
| |
| SkPDFDict idTreeLeaf; |
| auto limits = SkPDFMakeArray(); |
| SkString lowestElemIdString = SkPDFStructElem::StringFromElemId(idTree.begin()->elemId); |
| limits->appendByteString(lowestElemIdString); |
| SkString highestElemIdString = SkPDFStructElem::StringFromElemId(idTree.rbegin()->elemId); |
| limits->appendByteString(highestElemIdString); |
| idTreeLeaf.insertObject("Limits", std::move(limits)); |
| auto names = SkPDFMakeArray(); |
| for (const IDTreeEntry& entry : idTree) { |
| names->appendByteString(SkPDFStructElem::StringFromElemId(entry.elemId)); |
| names->appendRef(entry.structElemRef); |
| } |
| idTreeLeaf.insertObject("Names", std::move(names)); |
| auto idTreeKids = SkPDFMakeArray(); |
| idTreeKids->appendRef(doc->emit(idTreeLeaf)); |
| |
| SkPDFDict idTreeRoot; |
| idTreeRoot.insertObject("Kids", std::move(idTreeKids)); |
| structTreeRoot.insertRef("IDTree", doc->emit(idTreeRoot)); |
| } |
| |
| return doc->emit(structTreeRoot, structTreeRootRef); |
| } |
| |
| namespace header_outline { |
| namespace { |
| struct Entry { |
| struct Content { |
| SkString fText; |
| Location fLocation; |
| void accumulate(Content const& child) { |
| fText += child.fText; |
| fLocation.accumulate(child.fLocation); |
| } |
| }; |
| |
| Content fContent; |
| int fHeaderLevel; |
| SkPDFIndirectReference fStructureRef; |
| SkPDFIndirectReference fRef = {}; |
| std::vector<Entry> fChildren = {}; |
| size_t fDescendentsEmitted = 0; |
| |
| void setAllRefs(SkPDFDocument* const doc, SkPDFIndirectReference ref) { |
| fRef = ref; |
| for (auto&& child : fChildren) { |
| child.setAllRefs(doc, doc->reserveRef()); |
| } |
| } |
| |
| void emitDescendents(SkPDFDocument* const doc) { |
| fDescendentsEmitted = fChildren.size(); |
| for (size_t i = 0; i < fChildren.size(); ++i) { |
| auto&& child = fChildren[i]; |
| child.emitDescendents(doc); |
| fDescendentsEmitted += child.fDescendentsEmitted; |
| |
| SkPDFDict entry; |
| entry.insertTextString("Title", child.fContent.fText); |
| |
| auto destination = SkPDFMakeArray(); |
| destination->appendRef(doc->getPage(child.fContent.fLocation.fPageIndex)); |
| destination->appendName("XYZ"); |
| destination->appendScalar(child.fContent.fLocation.fPoint.x()); |
| destination->appendScalar(child.fContent.fLocation.fPoint.y()); |
| destination->appendInt(0); |
| entry.insertObject("Dest", std::move(destination)); |
| |
| entry.insertRef("Parent", fRef); |
| if (child.fStructureRef) { |
| entry.insertRef("SE", child.fStructureRef); |
| } |
| if (0 < i) { |
| entry.insertRef("Prev", fChildren[i-1].fRef); |
| } |
| if (i < fChildren.size()-1) { |
| entry.insertRef("Next", fChildren[i+1].fRef); |
| } |
| if (!child.fChildren.empty()) { |
| entry.insertRef("First", child.fChildren.front().fRef); |
| entry.insertRef("Last", child.fChildren.back().fRef); |
| entry.insertInt("Count", child.fDescendentsEmitted); |
| } |
| doc->emit(entry, child.fRef); |
| } |
| } |
| }; |
| |
| Entry::Content create_header_content(SkPDFStructElem* const structElem) { |
| SkString text; |
| if (!structElem->fTitle.isEmpty()) { |
| text = structElem->fTitle; |
| } else if (!structElem->fAlt.isEmpty()) { |
| text = structElem->fAlt; |
| } |
| |
| // The uppermost/leftmost point on the earliest page of this StructElem's marks. |
| Location structElemLocation; |
| for (auto&& mark : structElem->fMarkedContent) { |
| structElemLocation.accumulate(mark.fLocation); |
| } |
| |
| Entry::Content content{std::move(text), std::move(structElemLocation)}; |
| |
| // Accumulate children |
| for (auto&& child : structElem->fChildren) { |
| if (child.fUsed) { |
| content.accumulate(create_header_content(&child)); |
| } |
| } |
| return content; |
| } |
| |
| void make(SkPDFDocument* const doc, SkPDFStructElem* const structElem, STArray<7, Entry*>& stack) { |
| const SkString& type = structElem->fStructType; |
| if (type.size() == 2 && type[0] == 'H' && '1' <= type[1] && type[1] <= '6') { |
| int level = type[1] - '0'; |
| while (level <= stack.back()->fHeaderLevel) { |
| stack.pop_back(); |
| } |
| Entry::Content content = create_header_content(structElem); |
| if (!content.fText.isEmpty()) { |
| Entry e{std::move(content), level, structElem->fRef}; |
| stack.push_back(&stack.back()->fChildren.emplace_back(std::move(e))); |
| return; |
| } |
| } |
| |
| for (auto&& child : structElem->fChildren) { |
| if (child.fUsed) { |
| make(doc, &child, stack); |
| } |
| } |
| } |
| } // namespace |
| } // namespace header_outline |
| |
| namespace structelem_outline { |
| namespace { |
| struct Entry { |
| size_t fDescendantCount = 0; |
| Location fLocation; |
| void accumulate(Entry const& child) { |
| fDescendantCount += child.fDescendantCount; |
| fLocation.accumulate(child.fLocation); |
| } |
| }; |
| Entry emit(SkPDFDocument* const doc, |
| SkPDFStructElem* const structElem, |
| SkPDFIndirectReference const parentRef, |
| SkPDFIndirectReference const prevSiblingRef, |
| SkPDFIndirectReference const selfRef, |
| SkPDFIndirectReference const nextSiblingRef) { |
| Entry self; |
| |
| // Emit any child entries. |
| STArray<20, SkPDFIndirectReference> childRefs; |
| for (auto&& child : structElem->fChildren) { |
| if (!child.fUsed) { |
| continue; |
| } |
| childRefs.emplace_back(doc->reserveRef()); |
| } |
| int childRefsIndex = 0; |
| SkPDFIndirectReference prevChildRef; // Starts out as "none". |
| childRefs.emplace_back(); // Put an extra "none" on the end for the last "next". |
| for (auto&& child : structElem->fChildren) { |
| if (!child.fUsed) { |
| continue; |
| } |
| SkPDFIndirectReference currChildRef = childRefs[childRefsIndex]; |
| SkPDFIndirectReference nextChildRef = childRefs[childRefsIndex+1]; |
| self.accumulate(emit(doc, &child, selfRef, prevChildRef, currChildRef, nextChildRef)); |
| prevChildRef = currChildRef; |
| ++childRefsIndex; |
| } |
| childRefs.pop_back(); // Remove the "none" on the end. |
| |
| // Emit self entry. |
| SkPDFDict entry; |
| if (!structElem->fTitle.isEmpty()) { |
| entry.insertTextString("Title", structElem->fTitle); |
| } else if (!structElem->fAlt.isEmpty()) { |
| entry.insertTextString("Title", structElem->fAlt); |
| } else { |
| entry.insertTextString("Title", structElem->fStructType); |
| } |
| |
| // The uppermost/leftmost point on the earliest page of this structure element's marks. |
| Location structElemLocation; |
| for (auto&& mark : structElem->fMarkedContent) { |
| structElemLocation.accumulate(mark.fLocation); |
| } |
| if (structElemLocation.fPoint.isFinite()) { |
| auto destination = SkPDFMakeArray(); |
| destination->appendRef(doc->getPage(structElemLocation.fPageIndex)); |
| destination->appendName("XYZ"); |
| destination->appendScalar(structElemLocation.fPoint.x()); |
| destination->appendScalar(structElemLocation.fPoint.y()); |
| destination->appendInt(0); |
| entry.insertObject("Dest", std::move(destination)); |
| |
| self.fLocation.accumulate(structElemLocation); |
| } else if (self.fLocation.fPoint.isFinite()) { |
| // The uppermost/leftmost point on the earliest page of any child. |
| auto destination = SkPDFMakeArray(); |
| destination->appendRef(doc->getPage(self.fLocation.fPageIndex)); |
| destination->appendName("XYZ"); |
| destination->appendScalar(self.fLocation.fPoint.x()); |
| destination->appendScalar(self.fLocation.fPoint.y()); |
| destination->appendInt(0); |
| entry.insertObject("Dest", std::move(destination)); |
| } |
| if (structElem->fRef) { |
| entry.insertRef("SE", structElem->fRef); |
| } |
| entry.insertRef("Parent", parentRef); |
| if (prevSiblingRef) { |
| entry.insertRef("Prev", prevSiblingRef); |
| } |
| if (nextSiblingRef) { |
| entry.insertRef("Next", nextSiblingRef); |
| } |
| if (!childRefs.empty()) { |
| entry.insertRef("First", childRefs.front()); |
| entry.insertRef("Last", childRefs.back()); |
| entry.insertInt("Count", self.fDescendantCount); |
| } |
| doc->emit(entry, selfRef); |
| ++self.fDescendantCount; |
| return self; |
| } |
| } // namespace |
| } // namespace structelem_outline |
| |
| SkPDFIndirectReference SkPDFStructTree::makeOutline(SkPDFDocument* doc) const { |
| if (!fRoot || !fRoot->fUsed || fOutline == SkPDF::Metadata::Outline::None) { |
| return SkPDFIndirectReference(); |
| } |
| |
| SkPDFIndirectReference outlineRef; |
| SkPDFDict outline("Outlines"); |
| if (fOutline == SkPDF::Metadata::Outline::StructureElements) { |
| outlineRef = doc->reserveRef(); |
| SkPDFIndirectReference entryRef = doc->reserveRef(); |
| SkPDFIndirectReference none; |
| structelem_outline::Entry entry = structelem_outline::emit(doc, fRoot, outlineRef, |
| none, entryRef, none); |
| outline.insertRef("First", entryRef); |
| outline.insertRef("Last", entryRef); |
| outline.insertInt("Count", entry.fDescendantCount); |
| } else { |
| STArray<7, header_outline::Entry*> stack; |
| header_outline::Entry top{{SkString(), Location()}, 0, {}}; |
| stack.push_back(&top); |
| header_outline::make(doc, fRoot, stack); |
| if (top.fChildren.empty()) { |
| return SkPDFIndirectReference(); |
| } |
| outlineRef = doc->reserveRef(); |
| top.setAllRefs(doc, outlineRef); |
| top.emitDescendents(doc); |
| outline.insertRef("First", top.fChildren.front().fRef); |
| outline.insertRef("Last", top.fChildren.back().fRef); |
| outline.insertInt("Count", top.fDescendentsEmitted); |
| } |
| |
| return doc->emit(outline, outlineRef); |
| } |
| |
| SkString SkPDFStructTree::getRootLanguage() { |
| return fRoot ? fRoot->fLang : SkString(); |
| } |