Re-land: Support adding attributes to PDF document structure nodes.
Originally landed: https://skia-review.googlesource.com/c/skia/+/268878
Reverted: https://skia-review.googlesource.com/c/skia/+/271858
The issue was with compilation when PDF support is disabled. See
the diff between patchsets 1 and 2.
This is an important part of writing a tagged PDF. Many of the nodes
in the document structure tree need additional attributes, just like
in HTML.
This change aims to add support for a few useful attributes, not to
be comprehensive.
Bug: chromium:1039816
Change-Id: I15f8b6c41d4fdaa4b6e21775ab6d26ec57eb0f5d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/271916
Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org>
Reviewed-by: Mike Reed <reed@google.com>
diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index 40dfa5c..da64a10 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -23,6 +23,9 @@
related calls is treated as a request to do no color correction at decode
time.
+ * Add new APIs to add attributes to document structure node when
+ creating a tagged PDF.
+
* Remove CGFontRef parameter from SkCreateTypefaceFromCTFont.
Use CTFontManagerCreateFontDescriptorFromData instead of
CGFontCreateWithDataProvider to create CTFonts to avoid memory use issues.
diff --git a/gn/tests.gni b/gn/tests.gni
index 3c35994..c2f772a 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -169,6 +169,7 @@
"$_tests/PDFMetadataAttributeTest.cpp",
"$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp",
"$_tests/PDFPrimitivesTest.cpp",
+ "$_tests/PDFTaggedTableTest.cpp",
"$_tests/PDFTaggedTest.cpp",
"$_tests/PackBitsTest.cpp",
"$_tests/PackedConfigsTextureTest.cpp",
diff --git a/include/docs/SkPDFDocument.h b/include/docs/SkPDFDocument.h
index 892489e..ae78306 100644
--- a/include/docs/SkPDFDocument.h
+++ b/include/docs/SkPDFDocument.h
@@ -5,15 +5,21 @@
#include "include/core/SkDocument.h"
+#include <vector>
+
+#include "include/core/SkColor.h"
#include "include/core/SkMilestone.h"
#include "include/core/SkScalar.h"
#include "include/core/SkString.h"
#include "include/core/SkTime.h"
+#include "include/private/SkNoncopyable.h"
#define SKPDF_STRING(X) SKPDF_STRING_IMPL(X)
#define SKPDF_STRING_IMPL(X) #X
class SkExecutor;
+class SkPDFArray;
+class SkPDFTagTree;
namespace SkPDF {
@@ -71,16 +77,42 @@
kForm, //!< Form control (not like an HTML FORM element)
};
+/** Attributes for nodes in the PDF tree. */
+class SK_API AttributeList : SkNoncopyable {
+public:
+ AttributeList();
+ ~AttributeList();
+
+ // Each attribute must have an owner (e.g. "Layout", "List", "Table", etc)
+ // and an attribute name (e.g. "BBox", "RowSpan", etc.) from PDF32000_2008 14.8.5,
+ // and then a value of the proper type according to the spec.
+ void appendInt(const char* owner, const char* name, int value);
+ void appendFloat(const char* owner, const char* name, float value);
+ void appendString(const char* owner, const char* name, const char* value);
+ void appendFloatArray(const char* owner,
+ const char* name,
+ const std::vector<float>& value);
+ void appendStringArray(const char* owner,
+ const char* name,
+ const std::vector<SkString>& value);
+
+private:
+ friend class ::SkPDFTagTree;
+
+ std::unique_ptr<SkPDFArray> fAttrs;
+};
+
/** A node in a PDF structure tree, giving a semantic representation
of the content. Each node ID is associated with content
by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing.
NodeIDs should be unique within each tree.
*/
struct StructureElementNode {
- const StructureElementNode* fChildren = nullptr;
- size_t fChildCount;
- int fNodeId;
- DocumentStructureType fType;
+ StructureElementNode* fChildren = nullptr;
+ size_t fChildCount = 0;
+ int fNodeId = 0;
+ DocumentStructureType fType = DocumentStructureType::kNonStruct;
+ AttributeList fAttributes;
};
/** Optional metadata to be passed into the PDF factory function.
@@ -149,7 +181,7 @@
a semantic representation of the content. The caller
should retain ownership.
*/
- const StructureElementNode* fStructureElementTreeRoot = nullptr;
+ StructureElementNode* fStructureElementTreeRoot = nullptr;
/** Executor to handle threaded work within PDF Backend. If this is nullptr,
then all work will be done serially on the main thread. To have worker
diff --git a/src/pdf/SkDocument_PDF_None.cpp b/src/pdf/SkDocument_PDF_None.cpp
index 2dc580c..1b482ef 100644
--- a/src/pdf/SkDocument_PDF_None.cpp
+++ b/src/pdf/SkDocument_PDF_None.cpp
@@ -8,8 +8,14 @@
#include "include/core/SkCanvas.h"
#include "include/docs/SkPDFDocument.h"
+class SkPDFArray {};
+
sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream*, const SkPDF::Metadata&) { return nullptr; }
void SkPDF::SetNodeId(SkCanvas* c, int n) {
c->drawAnnotation({0, 0, 0, 0}, "PDF_Node_Key", SkData::MakeWithCopy(&n, sizeof(n)).get());
}
+
+SkPDF::AttributeList::AttributeList() = default;
+
+SkPDF::AttributeList::~AttributeList() = default;
diff --git a/src/pdf/SkPDFTag.cpp b/src/pdf/SkPDFTag.cpp
index a224807..776ec36 100644
--- a/src/pdf/SkPDFTag.cpp
+++ b/src/pdf/SkPDFTag.cpp
@@ -66,6 +66,70 @@
SK_ABORT("bad tag");
}
+SkPDF::AttributeList::AttributeList() = default;
+
+SkPDF::AttributeList::~AttributeList() = default;
+
+void SkPDF::AttributeList::appendInt(
+ const char* owner, const char* name, int value) {
+ if (!fAttrs)
+ fAttrs = SkPDFMakeArray();
+ std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
+ attrDict->insertName("O", owner);
+ attrDict->insertInt(name, value);
+ fAttrs->appendObject(std::move(attrDict));
+}
+
+void SkPDF::AttributeList::appendFloat(
+ const char* owner, const char* name, float value) {
+ if (!fAttrs)
+ fAttrs = SkPDFMakeArray();
+ std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
+ attrDict->insertName("O", owner);
+ attrDict->insertScalar(name, value);
+ fAttrs->appendObject(std::move(attrDict));
+}
+
+void SkPDF::AttributeList::appendString(
+ const char* owner, const char* name, const char* value) {
+ if (!fAttrs)
+ fAttrs = SkPDFMakeArray();
+ std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
+ attrDict->insertName("O", owner);
+ attrDict->insertName(name, value);
+ fAttrs->appendObject(std::move(attrDict));
+}
+
+void SkPDF::AttributeList::appendFloatArray(
+ const char* owner, const char* name, const std::vector<float>& value) {
+ if (!fAttrs)
+ fAttrs = SkPDFMakeArray();
+ std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
+ attrDict->insertName("O", owner);
+ std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
+ for (float element : value) {
+ pdfArray->appendScalar(element);
+ }
+ attrDict->insertObject(name, std::move(pdfArray));
+ fAttrs->appendObject(std::move(attrDict));
+}
+
+void SkPDF::AttributeList::appendStringArray(
+ const char* owner,
+ const char* name,
+ const std::vector<SkString>& value) {
+ if (!fAttrs)
+ fAttrs = SkPDFMakeArray();
+ std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
+ attrDict->insertName("O", owner);
+ std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
+ for (SkString element : value) {
+ pdfArray->appendName(element);
+ }
+ attrDict->insertObject(name, std::move(pdfArray));
+ fAttrs->appendObject(std::move(attrDict));
+}
+
struct SkPDFTagNode {
SkPDFTagNode* fChildren = nullptr;
size_t fChildCount = 0;
@@ -82,16 +146,18 @@
kYes,
kNo,
} fCanDiscard = kUnknown;
+ std::unique_ptr<SkPDFArray> fAttributes;
};
SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
SkPDFTagTree::~SkPDFTagTree() = default;
-static void copy(const SkPDF::StructureElementNode& node,
- SkPDFTagNode* dst,
- SkArenaAlloc* arena,
- SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
+// static
+void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
+ SkPDFTagNode* dst,
+ SkArenaAlloc* arena,
+ SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
nodeMap->set(node.fNodeId, dst);
size_t childCount = node.fChildCount;
SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
@@ -100,14 +166,15 @@
dst->fType = node.fType;
dst->fChildren = children;
for (size_t i = 0; i < childCount; ++i) {
- copy(node.fChildren[i], &children[i], arena, nodeMap);
+ Copy(node.fChildren[i], &children[i], arena, nodeMap);
}
+ dst->fAttributes = std::move(node.fAttributes.fAttrs);
}
-void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) {
+void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
if (node) {
fRoot = fArena.make<SkPDFTagNode>();
- copy(*node, fRoot, &fArena, &fNodeMap);
+ Copy(*node, fRoot, &fArena, &fNodeMap);
}
}
@@ -184,6 +251,13 @@
dict.insertName("S", tag_name_from_type(node->fType));
dict.insertRef("P", parent);
dict.insertObject("K", std::move(kids));
+ SkString idString;
+ idString.printf("%d", node->fNodeId);
+ dict.insertName("ID", idString.c_str());
+ if (node->fAttributes) {
+ dict.insertObject("A", std::move(node->fAttributes));
+ }
+
return doc->emit(dict, ref);
}
@@ -223,4 +297,3 @@
structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
return doc->emit(structTreeRoot, ref);
}
-
diff --git a/src/pdf/SkPDFTag.h b/src/pdf/SkPDFTag.h
index d0718d3..c9bd71e 100644
--- a/src/pdf/SkPDFTag.h
+++ b/src/pdf/SkPDFTag.h
@@ -21,12 +21,17 @@
public:
SkPDFTagTree();
~SkPDFTagTree();
- void init(const SkPDF::StructureElementNode*);
+ void init(SkPDF::StructureElementNode*);
void reset();
int getMarkIdForNodeId(int nodeId, unsigned pageIndex);
SkPDFIndirectReference makeStructTreeRoot(SkPDFDocument* doc);
private:
+ static void Copy(SkPDF::StructureElementNode& node,
+ SkPDFTagNode* dst,
+ SkArenaAlloc* arena,
+ SkTHashMap<int, SkPDFTagNode*>* nodeMap);
+
SkArenaAlloc fArena;
SkTHashMap<int, SkPDFTagNode*> fNodeMap;
SkPDFTagNode* fRoot = nullptr;
diff --git a/tests/PDFTaggedTableTest.cpp b/tests/PDFTaggedTableTest.cpp
new file mode 100644
index 0000000..4bc1497
--- /dev/null
+++ b/tests/PDFTaggedTableTest.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2020 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "tests/Test.h"
+
+#include "include/core/SkCanvas.h"
+#include "include/core/SkFont.h"
+#include "include/core/SkStream.h"
+#include "include/docs/SkPDFDocument.h"
+
+using PDFTag = SkPDF::StructureElementNode;
+
+// Test building a tagged PDF containing a table.
+// Add this to args.gn to output the PDF to a file:
+// extra_cflags = [ "-DSK_PDF_TEST_TAGS_OUTPUT_PATH=\"/tmp/table.pdf\"" ]
+DEF_TEST(SkPDF_tagged_table, r) {
+ REQUIRE_PDF_DOCUMENT(SkPDF_tagged, r);
+#ifdef SK_PDF_TEST_TAGS_OUTPUT_PATH
+ SkFILEWStream outputStream(SK_PDF_TEST_TAGS_OUTPUT_PATH);
+#else
+ SkDynamicMemoryWStream outputStream;
+#endif
+
+ SkSize pageSize = SkSize::Make(612, 792); // U.S. Letter
+
+ SkPDF::Metadata metadata;
+ metadata.fTitle = "Example Tagged Table PDF";
+ metadata.fCreator = "Skia";
+ SkTime::DateTime now;
+ SkTime::GetDateTime(&now);
+ metadata.fCreation = now;
+ metadata.fModified = now;
+
+ constexpr int kRowCount = 5;
+ constexpr int kColCount = 4;
+ const char* cellData[kRowCount * kColCount] = {
+ "Car", "Engine", "City MPG", "Highway MPG",
+ "Mitsubishi Mirage ES", "Gas", "28", "47",
+ "Toyota Prius Three", "Hybrid", "43", "59",
+ "Nissan Leaf SL", "Electric", "N/A", nullptr,
+ "Tesla Model 3", nullptr, "N/A", nullptr
+ };
+
+ // The document tag.
+ PDFTag root;
+ root.fNodeId = 1;
+ root.fType = SkPDF::DocumentStructureType::kDocument;
+ root.fChildCount = 2;
+ PDFTag rootChildren[2];
+
+ // Heading.
+ PDFTag& h1 = rootChildren[0];
+ h1.fNodeId = 2;
+ h1.fType = SkPDF::DocumentStructureType::kH1;
+ h1.fChildCount = 0;
+
+ // Table.
+ PDFTag& table = rootChildren[1];
+ table.fNodeId = 3;
+ table.fType = SkPDF::DocumentStructureType::kTable;
+ table.fChildCount = 5;
+ table.fAttributes.appendFloatArray("Layout", "BBox", {72, 72, 360, 360});
+
+ PDFTag rows[kRowCount];
+ PDFTag all_cells[kRowCount * kColCount];
+ for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
+ PDFTag& row = rows[rowIndex];
+ row.fNodeId = 4 + rowIndex;
+ row.fType = SkPDF::DocumentStructureType::kTR;
+ row.fChildCount = kColCount;
+ PDFTag* cells = &all_cells[rowIndex * kColCount];
+
+ for (int colIndex = 0; colIndex < kColCount; colIndex++) {
+ int cellIndex = rowIndex * kColCount + colIndex;
+ PDFTag& cell = cells[colIndex];
+ cell.fNodeId = 10 + cellIndex;
+ if (!cellData[cellIndex])
+ cell.fType = SkPDF::DocumentStructureType::kNonStruct;
+ else if (rowIndex == 0 || colIndex == 0)
+ cell.fType = SkPDF::DocumentStructureType::kTH;
+ else
+ cell.fType = SkPDF::DocumentStructureType::kTD;
+ cell.fChildCount = 0;
+
+ if (cellIndex == 13) {
+ cell.fAttributes.appendInt("Table", "RowSpan", 2);
+ } else if (cellIndex == 14 || cellIndex == 18) {
+ cell.fAttributes.appendInt("Table", "ColSpan", 2);
+ } else if (cell.fType == SkPDF::DocumentStructureType::kTH) {
+ cell.fAttributes.appendString(
+ "Table", "Scope", rowIndex == 0 ? "Column" : "Row");
+ }
+ }
+ row.fChildren = cells;
+ }
+ table.fChildren = rows;
+ root.fChildren = rootChildren;
+
+ metadata.fStructureElementTreeRoot = &root;
+ sk_sp<SkDocument> document = SkPDF::MakeDocument(
+ &outputStream, metadata);
+
+ SkPaint paint;
+ paint.setColor(SK_ColorBLACK);
+
+ SkCanvas* canvas =
+ document->beginPage(pageSize.width(),
+ pageSize.height());
+ SkPDF::SetNodeId(canvas, 2);
+ SkFont font(nullptr, 36);
+ canvas->drawString("Tagged PDF Table", 72, 72, font, paint);
+
+ font.setSize(14);
+ for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
+ for (int colIndex = 0; colIndex < kColCount; colIndex++) {
+ int cellIndex = rowIndex * kColCount + colIndex;
+ const char* str = cellData[cellIndex];
+ if (!str)
+ continue;
+
+ int x = 72 + colIndex * 108 + (colIndex > 0 ? 72 : 0);
+ int y = 144 + rowIndex * 48;
+
+ SkPDF::SetNodeId(canvas, 10 + cellIndex);
+ canvas->drawString(str, x, y, font, paint);
+ }
+ }
+
+ document->endPage();
+ document->close();
+ outputStream.flush();
+}