src/pdf/SkPDFMakeToUnicodeCmap.cpp - skia - Git at Google

 /*
  * Copyright 2011 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "src/pdf/SkPDFMakeToUnicodeCmap.h"

 #include "include/private/SkTo.h"
 #include "src/pdf/SkPDFUtils.h"
 #include "src/utils/SkUTF.h"

 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
                                     bool multibyte) {
     // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
     // It's there to prevent old version Adobe Readers from malfunctioning.
     const char* kHeader =
         "/CIDInit /ProcSet findresource begin\n"
         "12 dict begin\n"
         "begincmap\n";
     cmap->writeText(kHeader);

     // The /CIDSystemInfo must be consistent to the one in
     // SkPDFFont::populateCIDFont().
     // We can not pass over the system info object here because the format is
     // different. This is not a reference object.
     const char* kSysInfo =
         "/CIDSystemInfo\n"
         "<<  /Registry (Adobe)\n"
         "/Ordering (UCS)\n"
         "/Supplement 0\n"
         ">> def\n";
     cmap->writeText(kSysInfo);

     // The CMapName must be consistent to /CIDSystemInfo above.
     // /CMapType 2 means ToUnicode.
     // Codespace range just tells the PDF processor the valid range.
     const char* kTypeInfoHeader =
         "/CMapName /Adobe-Identity-UCS def\n"
         "/CMapType 2 def\n"
         "1 begincodespacerange\n";
     cmap->writeText(kTypeInfoHeader);
     if (multibyte) {
         cmap->writeText("<0000> <FFFF>\n");
     } else {
         cmap->writeText("<00> <FF>\n");
     }
     cmap->writeText("endcodespacerange\n");
 }

 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
     const char kFooter[] =
         "endcmap\n"
         "CMapName currentdict /CMap defineresource pop\n"
         "end\n"
         "end";
     cmap->writeText(kFooter);
 }

 namespace {
 struct BFChar {
     SkGlyphID fGlyphId;
     SkUnichar fUnicode;
 };

 struct BFRange {
     SkGlyphID fStart;
     SkGlyphID fEnd;
     SkUnichar fUnicode;
 };
 }  // namespace

 static void write_glyph(SkDynamicMemoryWStream* cmap,
                         bool multiByte,
                         SkGlyphID gid) {
     if (multiByte) {
         SkPDFUtils::WriteUInt16BE(cmap, gid);
     } else {
         SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
     }
 }

 static void append_bfchar_section(const std::vector<BFChar>& bfchar,
                                   bool multiByte,
                                   SkDynamicMemoryWStream* cmap) {
     // PDF spec defines that every bf* list can have at most 100 entries.
     for (size_t i = 0; i < bfchar.size(); i += 100) {
         int count = SkToInt(bfchar.size() - i);
         count = SkMin32(count, 100);
         cmap->writeDecAsText(count);
         cmap->writeText(" beginbfchar\n");
         for (int j = 0; j < count; ++j) {
             cmap->writeText("<");
             write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
             cmap->writeText("> <");
             SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
             cmap->writeText(">\n");
         }
         cmap->writeText("endbfchar\n");
     }
 }

 static void append_bfrange_section(const std::vector<BFRange>& bfrange,
                                    bool multiByte,
                                    SkDynamicMemoryWStream* cmap) {
     // PDF spec defines that every bf* list can have at most 100 entries.
     for (size_t i = 0; i < bfrange.size(); i += 100) {
         int count = SkToInt(bfrange.size() - i);
         count = SkMin32(count, 100);
         cmap->writeDecAsText(count);
         cmap->writeText(" beginbfrange\n");
         for (int j = 0; j < count; ++j) {
             cmap->writeText("<");
             write_glyph(cmap, multiByte, bfrange[i + j].fStart);
             cmap->writeText("> <");
             write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
             cmap->writeText("> <");
             SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
             cmap->writeText(">\n");
         }
         cmap->writeText("endbfrange\n");
     }
 }

 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
 // Technote 5014.
 // The function is not static so we can test it in unit tests.
 //
 // Current implementation guarantees bfchar and bfrange entries do not overlap.
 //
 // Current implementation does not attempt aggressive optimizations against
 // following case because the specification is not clear.
 //
 // 4 beginbfchar          1 beginbfchar
 // <0003> <0013>          <0020> <0014>
 // <0005> <0015>    to    endbfchar
 // <0007> <0017>          1 beginbfrange
 // <0020> <0014>          <0003> <0007> <0013>
 // endbfchar              endbfrange
 //
 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
 // overlap, but succeeding maps supersede preceding maps."
 //
 // In case of searching text in PDF, bfrange will have higher precedence so
 // typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
 // the spec does not mention how will this kind of conflict being resolved.
 //
 // For the worst case (having 65536 continuous unicode and we use every other
 // one of them), the possible savings by aggressive optimization is 416KB
 // pre-compressed and does not provide enough motivation for implementation.
 void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
                              const SkPDFGlyphUse* subset,
                              SkDynamicMemoryWStream* cmap,
                              bool multiByteGlyphs,
                              SkGlyphID firstGlyphID,
                              SkGlyphID lastGlyphID) {
     int glyphOffset = 0;
     if (!multiByteGlyphs) {
         glyphOffset = firstGlyphID - 1;
     }

     std::vector<BFChar> bfcharEntries;
     std::vector<BFRange> bfrangeEntries;

     BFRange currentRangeEntry = {0, 0, 0};
     bool rangeEmpty = true;
     const int limit = (int)lastGlyphID + 1 - glyphOffset;

     for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
         SkGlyphID gid = i + glyphOffset;
         bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
         if (!rangeEmpty) {
             // PDF spec requires bfrange not changing the higher byte,
             // e.g. <1035> <10FF> <2222> is ok, but
             //      <1035> <1100> <2222> is no good
             bool inRange =
                 i == currentRangeEntry.fEnd + 1 &&
                 i >> 8 == currentRangeEntry.fStart >> 8 &&
                 i < limit &&
                 glyphToUnicode[gid] ==
                     currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
             if (!inSubset || !inRange) {
                 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
                     bfrangeEntries.push_back(currentRangeEntry);
                 } else {
                     bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
                 }
                 rangeEmpty = true;
             }
         }
         if (inSubset) {
             currentRangeEntry.fEnd = i;
             if (rangeEmpty) {
               currentRangeEntry.fStart = i;
               currentRangeEntry.fUnicode = glyphToUnicode[gid];
               rangeEmpty = false;
             }
         }
     }

     // The spec requires all bfchar entries for a font must come before bfrange
     // entries.
     append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
     append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
 }

 std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
         const SkUnichar* glyphToUnicode,
         const SkPDFGlyphUse* subset,
         bool multiByteGlyphs,
         SkGlyphID firstGlyphID,
         SkGlyphID lastGlyphID) {
     SkDynamicMemoryWStream cmap;
     append_tounicode_header(&cmap, multiByteGlyphs);
     SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
                             firstGlyphID, lastGlyphID);
     append_cmap_footer(&cmap);
     return cmap.detachAsStream();
 }
	/*
	* Copyright 2011 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "src/pdf/SkPDFMakeToUnicodeCmap.h"

	#include "include/private/SkTo.h"
	#include "src/pdf/SkPDFUtils.h"
	#include "src/utils/SkUTF.h"

	static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
	bool multibyte) {
	// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
	// It's there to prevent old version Adobe Readers from malfunctioning.
	const char* kHeader =
	"/CIDInit /ProcSet findresource begin\n"
	"12 dict begin\n"
	"begincmap\n";
	cmap->writeText(kHeader);

	// The /CIDSystemInfo must be consistent to the one in
	// SkPDFFont::populateCIDFont().
	// We can not pass over the system info object here because the format is
	// different. This is not a reference object.
	const char* kSysInfo =
	"/CIDSystemInfo\n"
	"<< /Registry (Adobe)\n"
	"/Ordering (UCS)\n"
	"/Supplement 0\n"
	">> def\n";
	cmap->writeText(kSysInfo);

	// The CMapName must be consistent to /CIDSystemInfo above.
	// /CMapType 2 means ToUnicode.
	// Codespace range just tells the PDF processor the valid range.
	const char* kTypeInfoHeader =
	"/CMapName /Adobe-Identity-UCS def\n"
	"/CMapType 2 def\n"
	"1 begincodespacerange\n";
	cmap->writeText(kTypeInfoHeader);
	if (multibyte) {
	cmap->writeText("<0000> <FFFF>\n");
	} else {
	cmap->writeText("<00> <FF>\n");
	}
	cmap->writeText("endcodespacerange\n");
	}

	static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
	const char kFooter[] =
	"endcmap\n"
	"CMapName currentdict /CMap defineresource pop\n"
	"end\n"
	"end";
	cmap->writeText(kFooter);
	}

	namespace {
	struct BFChar {
	SkGlyphID fGlyphId;
	SkUnichar fUnicode;
	};

	struct BFRange {
	SkGlyphID fStart;
	SkGlyphID fEnd;
	SkUnichar fUnicode;
	};
	} // namespace

	static void write_glyph(SkDynamicMemoryWStream* cmap,
	bool multiByte,
	SkGlyphID gid) {
	if (multiByte) {
	SkPDFUtils::WriteUInt16BE(cmap, gid);
	} else {
	SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
	}
	}

	static void append_bfchar_section(const std::vector<BFChar>& bfchar,
	bool multiByte,
	SkDynamicMemoryWStream* cmap) {
	// PDF spec defines that every bf* list can have at most 100 entries.
	for (size_t i = 0; i < bfchar.size(); i += 100) {
	int count = SkToInt(bfchar.size() - i);
	count = SkMin32(count, 100);
	cmap->writeDecAsText(count);
	cmap->writeText(" beginbfchar\n");
	for (int j = 0; j < count; ++j) {
	cmap->writeText("<");
	write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
	cmap->writeText("> <");
	SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
	cmap->writeText(">\n");
	}
	cmap->writeText("endbfchar\n");
	}
	}

	static void append_bfrange_section(const std::vector<BFRange>& bfrange,
	bool multiByte,
	SkDynamicMemoryWStream* cmap) {
	// PDF spec defines that every bf* list can have at most 100 entries.
	for (size_t i = 0; i < bfrange.size(); i += 100) {
	int count = SkToInt(bfrange.size() - i);
	count = SkMin32(count, 100);
	cmap->writeDecAsText(count);
	cmap->writeText(" beginbfrange\n");
	for (int j = 0; j < count; ++j) {
	cmap->writeText("<");
	write_glyph(cmap, multiByte, bfrange[i + j].fStart);
	cmap->writeText("> <");
	write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
	cmap->writeText("> <");
	SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
	cmap->writeText(">\n");
	}
	cmap->writeText("endbfrange\n");
	}
	}

	// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
	// Technote 5014.
	// The function is not static so we can test it in unit tests.
	//
	// Current implementation guarantees bfchar and bfrange entries do not overlap.
	//
	// Current implementation does not attempt aggressive optimizations against
	// following case because the specification is not clear.
	//
	// 4 beginbfchar 1 beginbfchar
	// <0003> <0013> <0020> <0014>
	// <0005> <0015> to endbfchar
	// <0007> <0017> 1 beginbfrange
	// <0020> <0014> <0003> <0007> <0013>
	// endbfchar endbfrange
	//
	// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
	// overlap, but succeeding maps supersede preceding maps."
	//
	// In case of searching text in PDF, bfrange will have higher precedence so
	// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
	// the spec does not mention how will this kind of conflict being resolved.
	//
	// For the worst case (having 65536 continuous unicode and we use every other
	// one of them), the possible savings by aggressive optimization is 416KB
	// pre-compressed and does not provide enough motivation for implementation.
	void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
	const SkPDFGlyphUse* subset,
	SkDynamicMemoryWStream* cmap,
	bool multiByteGlyphs,
	SkGlyphID firstGlyphID,
	SkGlyphID lastGlyphID) {
	int glyphOffset = 0;
	if (!multiByteGlyphs) {
	glyphOffset = firstGlyphID - 1;
	}

	std::vector<BFChar> bfcharEntries;
	std::vector<BFRange> bfrangeEntries;

	BFRange currentRangeEntry = {0, 0, 0};
	bool rangeEmpty = true;
	const int limit = (int)lastGlyphID + 1 - glyphOffset;

	for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
	SkGlyphID gid = i + glyphOffset;
	bool inSubset = i < limit && (subset == nullptr \|\| subset->has(gid));
	if (!rangeEmpty) {
	// PDF spec requires bfrange not changing the higher byte,
	// e.g. <1035> <10FF> <2222> is ok, but
	// <1035> <1100> <2222> is no good
	bool inRange =
	i == currentRangeEntry.fEnd + 1 &&
	i >> 8 == currentRangeEntry.fStart >> 8 &&
	i < limit &&
	glyphToUnicode[gid] ==
	currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
	if (!inSubset \|\| !inRange) {
	if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
	bfrangeEntries.push_back(currentRangeEntry);
	} else {
	bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
	}
	rangeEmpty = true;
	}
	}
	if (inSubset) {
	currentRangeEntry.fEnd = i;
	if (rangeEmpty) {
	currentRangeEntry.fStart = i;
	currentRangeEntry.fUnicode = glyphToUnicode[gid];
	rangeEmpty = false;
	}
	}
	}

	// The spec requires all bfchar entries for a font must come before bfrange
	// entries.
	append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
	append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
	}

	std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
	const SkUnichar* glyphToUnicode,
	const SkPDFGlyphUse* subset,
	bool multiByteGlyphs,
	SkGlyphID firstGlyphID,
	SkGlyphID lastGlyphID) {
	SkDynamicMemoryWStream cmap;
	append_tounicode_header(&cmap, multiByteGlyphs);
	SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
	firstGlyphID, lastGlyphID);
	append_cmap_footer(&cmap);
	return cmap.detachAsStream();
	}