ICU project: text break iterators in SkShaper
Change-Id: I8a0dd71298331b608fbe874cc610a80fc7815b0e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/313082
Reviewed-by: Ben Wagner <bungeman@google.com>
Commit-Queue: Julia Lavrova <jlavrova@google.com>
diff --git a/modules/skshaper/src/SkShaper_harfbuzz.cpp b/modules/skshaper/src/SkShaper_harfbuzz.cpp
index 4ff3903..ff5da5d 100644
--- a/modules/skshaper/src/SkShaper_harfbuzz.cpp
+++ b/modules/skshaper/src/SkShaper_harfbuzz.cpp
@@ -35,23 +35,12 @@
#include <hb.h>
#include <hb-icu.h>
#include <hb-ot.h>
-#include <unicode/ubrk.h>
-#include <unicode/umachine.h>
-#include <unicode/urename.h>
#include <unicode/uscript.h>
-#include <unicode/ustring.h>
-#include <unicode/utext.h>
-#include <unicode/utypes.h>
-
#include <cstring>
#include <memory>
#include <type_traits>
#include <utility>
-#if defined(SK_USING_THIRD_PARTY_ICU)
-#include "SkLoadICU.h"
-#endif
-
// HB_FEATURE_GLOBAL_START and HB_FEATURE_GLOBAL_END were not added until HarfBuzz 2.0
// They would have always worked, they just hadn't been named yet.
#if !defined(HB_FEATURE_GLOBAL_START)
@@ -71,10 +60,9 @@
using HBFace = resource<hb_face_t , decltype(hb_face_destroy) , hb_face_destroy >;
using HBFont = resource<hb_font_t , decltype(hb_font_destroy) , hb_font_destroy >;
using HBBuffer = resource<hb_buffer_t , decltype(hb_buffer_destroy), hb_buffer_destroy>;
-using ICUBrk = resource<UBreakIterator, decltype(ubrk_close) , ubrk_close >;
-using ICUUText = resource<UText , decltype(utext_close) , utext_close >;
using SkUnicodeBidi = std::unique_ptr<SkBidiIterator>;
+using SkUnicodeBreak = std::unique_ptr<SkBreakIterator>;
hb_position_t skhb_position(SkScalar value) {
// Treat HarfBuzz hb_position_t as 16.16 fixed-point.
@@ -653,11 +641,16 @@
class ShaperHarfBuzz : public SkShaper {
public:
- ShaperHarfBuzz(HBBuffer, ICUBrk line, ICUBrk grapheme, sk_sp<SkFontMgr>);
+ ShaperHarfBuzz(std::unique_ptr<SkUnicode>,
+ SkUnicodeBreak line,
+ SkUnicodeBreak grapheme,
+ HBBuffer,
+ sk_sp<SkFontMgr>);
protected:
- ICUBrk fLineBreakIterator;
- ICUBrk fGraphemeBreakIterator;
+ std::unique_ptr<SkUnicode> fUnicode;
+ SkUnicodeBreak fLineBreakIterator;
+ SkUnicodeBreak fGraphemeBreakIterator;
ShapedRun shape(const char* utf8, size_t utf8Bytes,
const char* utf8Start,
@@ -668,7 +661,6 @@
const FontRunIterator&,
const Feature*, size_t featuresSize) const;
private:
- std::unique_ptr<SkUnicode> fUnicode = SkUnicode::Make();
const sk_sp<SkFontMgr> fFontMgr;
HBBuffer fBuffer;
hb_language_t fUndefinedLanguage;
@@ -753,52 +745,43 @@
};
static std::unique_ptr<SkShaper> MakeHarfBuzz(sk_sp<SkFontMgr> fontmgr, bool correct) {
- #if defined(SK_USING_THIRD_PARTY_ICU)
- if (!SkLoadICU()) {
- SkDEBUGF("SkLoadICU() failed!\n");
- return nullptr;
- }
- #endif
HBBuffer buffer(hb_buffer_create());
if (!buffer) {
SkDEBUGF("Could not create hb_buffer");
return nullptr;
}
- UErrorCode status = U_ZERO_ERROR;
- ICUBrk lineBreakIterator(ubrk_open(UBRK_LINE, "th", nullptr, 0, &status));
- if (!lineBreakIterator || U_FAILURE(status)) {
- SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
+ auto unicode = SkUnicode::Make();
+ if (!unicode) {
return nullptr;
}
-
- ICUBrk graphemeBreakIterator(ubrk_open(UBRK_CHARACTER, "th", nullptr, 0, &status));
- if (!graphemeBreakIterator || U_FAILURE(status)) {
- SkDEBUGF("Could not create grapheme break iterator: %s", u_errorName(status));
+ auto lineIter = unicode->makeBreakIterator("th", SkUnicode::BreakType::kLines);
+ if (!lineIter) {
+ return nullptr;
+ }
+ auto graphIter = unicode->makeBreakIterator("th", SkUnicode::BreakType::kGraphemes);
+ if (!graphIter) {
return nullptr;
}
if (correct) {
- return std::make_unique<ShaperDrivenWrapper>(std::move(buffer),
- std::move(lineBreakIterator),
- std::move(graphemeBreakIterator),
- std::move(fontmgr));
+ return std::make_unique<ShaperDrivenWrapper>(std::move(unicode),
+ std::move(lineIter), std::move(graphIter), std::move(buffer), std::move(fontmgr));
} else {
- return std::make_unique<ShapeThenWrap>(std::move(buffer),
- std::move(lineBreakIterator),
- std::move(graphemeBreakIterator),
- std::move(fontmgr));
+ return std::make_unique<ShapeThenWrap>(std::move(unicode),
+ std::move(lineIter), std::move(graphIter), std::move(buffer), std::move(fontmgr));
}
}
-ShaperHarfBuzz::ShaperHarfBuzz(HBBuffer buffer, ICUBrk line, ICUBrk grapheme,
- sk_sp<SkFontMgr> fontmgr)
- : fLineBreakIterator(std::move(line))
- , fGraphemeBreakIterator(std::move(grapheme))
+ShaperHarfBuzz::ShaperHarfBuzz(std::unique_ptr<SkUnicode> unicode,
+ SkUnicodeBreak lineIter, SkUnicodeBreak graphIter, HBBuffer buffer, sk_sp<SkFontMgr> fontmgr)
+ : fUnicode(std::move(unicode))
+ , fLineBreakIterator(std::move(lineIter))
+ , fGraphemeBreakIterator(std::move(graphIter))
, fFontMgr(std::move(fontmgr))
, fBuffer(std::move(buffer))
, fUndefinedLanguage(hb_language_from_string("und", -1))
-{}
+{ }
void ShaperHarfBuzz::shape(const char* utf8, size_t utf8Bytes,
const SkFont& srcFont,
@@ -929,21 +912,10 @@
// TODO: break iterator per item, but just reset position if needed?
// Maybe break iterator with model?
- UBreakIterator& breakIterator = *fLineBreakIterator;
- {
- UErrorCode status = U_ZERO_ERROR;
- UText sUtf8UText = UTEXT_INITIALIZER;
- ICUUText utf8UText(utext_openUTF8(&sUtf8UText, utf8Start, utf8runLength, &status));
- if (U_FAILURE(status)) {
- SkDebugf("Could not create utf8UText: %s", u_errorName(status));
- return;
- }
- ubrk_setUText(&breakIterator, utf8UText.get(), &status);
- if (U_FAILURE(status)) {
- SkDebugf("Could not setText on break iterator: %s", u_errorName(status));
- return;
- }
+ if (!fLineBreakIterator->setText(utf8Start, utf8runLength)) {
+ return;
}
+ SkBreakIterator& breakIterator = *fLineBreakIterator;
ShapedRun best(RunHandler::Range(), SkFont(), 0, nullptr, 0,
{ SK_ScalarNegativeInfinity, SK_ScalarNegativeInfinity });
@@ -951,9 +923,9 @@
bool bestUsesModelForGlyphs = false;
SkScalar widthLeft = width - line.fAdvance.fX;
- for (int32_t breakIteratorCurrent = ubrk_next(&breakIterator);
- breakIteratorCurrent != UBRK_DONE;
- breakIteratorCurrent = ubrk_next(&breakIterator))
+ for (int32_t breakIteratorCurrent = breakIterator.next();
+ !breakIterator.isDone();
+ breakIteratorCurrent = breakIterator.next())
{
// TODO: if past a safe to break, future safe to break will be at least as long
@@ -1032,29 +1004,15 @@
{
SkTArray<ShapedRun> runs;
{
- UBreakIterator& lineBreakIterator = *fLineBreakIterator;
- UBreakIterator& graphemeBreakIterator = *fGraphemeBreakIterator;
- {
- UErrorCode status = U_ZERO_ERROR;
- UText sUtf8UText = UTEXT_INITIALIZER;
- ICUUText utf8UText(utext_openUTF8(&sUtf8UText, utf8, utf8Bytes, &status));
- if (U_FAILURE(status)) {
- SkDebugf("Could not create utf8UText: %s", u_errorName(status));
- return;
- }
-
- ubrk_setUText(&lineBreakIterator, utf8UText.get(), &status);
- if (U_FAILURE(status)) {
- SkDebugf("Could not setText on line break iterator: %s", u_errorName(status));
- return;
- }
- ubrk_setUText(&graphemeBreakIterator, utf8UText.get(), &status);
- if (U_FAILURE(status)) {
- SkDebugf("Could not setText on grapheme break iterator: %s", u_errorName(status));
- return;
- }
+ if (!fLineBreakIterator->setText(utf8, utf8Bytes)) {
+ return;
+ }
+ if (!fGraphemeBreakIterator->setText(utf8, utf8Bytes)) {
+ return;
}
+ SkBreakIterator& lineBreakIterator = *fLineBreakIterator;
+ SkBreakIterator& graphemeBreakIterator = *fGraphemeBreakIterator;
const char* utf8Start = nullptr;
const char* utf8End = utf8;
while (runSegmenter.advanceRuns()) {
@@ -1072,20 +1030,18 @@
ShapedGlyph& glyph = run.fGlyphs[i];
int32_t glyphCluster = glyph.fCluster;
- int32_t lineBreakIteratorCurrent = ubrk_current(&lineBreakIterator);
- while (lineBreakIteratorCurrent != UBRK_DONE &&
- lineBreakIteratorCurrent < glyphCluster)
+ int32_t lineBreakIteratorCurrent = lineBreakIterator.current();
+ while (!lineBreakIterator.isDone() && lineBreakIteratorCurrent < glyphCluster)
{
- lineBreakIteratorCurrent = ubrk_next(&lineBreakIterator);
+ lineBreakIteratorCurrent = lineBreakIterator.next();
}
glyph.fMayLineBreakBefore = glyph.fCluster != previousCluster &&
lineBreakIteratorCurrent == glyphCluster;
- int32_t graphemeBreakIteratorCurrent = ubrk_current(&graphemeBreakIterator);
- while (graphemeBreakIteratorCurrent != UBRK_DONE &&
- graphemeBreakIteratorCurrent < glyphCluster)
+ int32_t graphemeBreakIteratorCurrent = graphemeBreakIterator.current();
+ while (!graphemeBreakIterator.isDone() && graphemeBreakIteratorCurrent < glyphCluster)
{
- graphemeBreakIteratorCurrent = ubrk_next(&graphemeBreakIterator);
+ graphemeBreakIteratorCurrent = graphemeBreakIterator.next();
}
glyph.fGraphemeBreakBefore = glyph.fCluster != previousCluster &&
graphemeBreakIteratorCurrent == glyphCluster;
@@ -1492,18 +1448,17 @@
return MakeHarfBuzz(std::move(fontmgr), false);
}
std::unique_ptr<SkShaper> SkShaper::MakeShapeDontWrapOrReorder(sk_sp<SkFontMgr> fontmgr) {
- #if defined(SK_USING_THIRD_PARTY_ICU)
- if (!SkLoadICU()) {
- SkDEBUGF("SkLoadICU() failed!\n");
- return nullptr;
- }
- #endif
HBBuffer buffer(hb_buffer_create());
if (!buffer) {
SkDEBUGF("Could not create hb_buffer");
return nullptr;
}
- return std::make_unique<ShapeDontWrapOrReorder>(std::move(buffer), nullptr, nullptr,
- std::move(fontmgr));
+ auto unicode = SkUnicode::Make();
+ if (!unicode) {
+ return nullptr;
+ }
+
+ return std::make_unique<ShapeDontWrapOrReorder>
+ (std::move(unicode), nullptr, nullptr, std::move(buffer), std::move(fontmgr));
}
diff --git a/modules/skshaper/src/SkUnicode.h b/modules/skshaper/src/SkUnicode.h
index 5414ed2..07b0904 100644
--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@@ -47,12 +47,27 @@
kLTR,
kRTL,
};
- virtual ~SkBidiIterator() {}
+ virtual ~SkBidiIterator() = default;
virtual Position getLength() = 0;
virtual Level getLevelAt(Position) = 0;
static void ReorderVisual(const Level runLevels[], int levelsCount, int32_t logicalFromVisual[]);
};
+class SKUNICODE_API SkBreakIterator {
+public:
+ typedef int32_t Position;
+ typedef int32_t Status;
+ virtual ~SkBreakIterator() = default;
+ virtual Position first() = 0;
+ virtual Position current() = 0;
+ virtual Position next() = 0;
+ virtual Position preceding(Position offset) = 0;
+ virtual Position following(Position offset) = 0;
+ virtual Status status() = 0;
+ virtual bool isDone() = 0;
+ virtual bool setText(const char utftext8[], int utf8Units) = 0;
+};
+
class SKUNICODE_API SkUnicode {
public:
typedef uint32_t ScriptID;
@@ -76,7 +91,7 @@
kHardLineBreak
};
- enum class UBreakType {
+ enum class BreakType {
kWords,
kGraphemes,
kLines
@@ -94,11 +109,13 @@
virtual bool isWhitespace(SkUnichar utf8) = 0;
virtual SkString convertUtf16ToUtf8(const std::u16string& utf16) = 0;
- // Iterators (used in SkShaper)
+ // Methods used in SkShaper
virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
(const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
(const char text[], int count, SkBidiIterator::Direction) = 0;
+ virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
+ (const char locale[], BreakType breakType) = 0;
// High level methods (that we actually use somewhere=SkParagraph)
virtual bool getBidiRegions
diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp
index 13de14d..7eb0f86 100644
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@@ -118,20 +118,84 @@
ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
}
+class SkBreakIterator_icu : public SkBreakIterator {
+ ICUBreakIterator fBreakIterator;
+ Position fLastResult;
+ public:
+ explicit SkBreakIterator_icu(ICUBreakIterator iter)
+ : fBreakIterator(std::move(iter)), fLastResult(0) {}
+ Position first() override
+ { return fLastResult = ubrk_first(fBreakIterator.get()); }
+ Position current() override
+ { return fLastResult = ubrk_current(fBreakIterator.get()); }
+ Position next() override
+ { return fLastResult = ubrk_next(fBreakIterator.get()); }
+ Position preceding(Position offset) override
+ { return fLastResult = ubrk_preceding(fBreakIterator.get(), offset); }
+ Position following(Position offset) override
+ { return fLastResult = ubrk_following(fBreakIterator.get(), offset);}
+ Status status() override { return ubrk_getRuleStatus(fBreakIterator.get()); }
+ bool isDone() override { return fLastResult == UBRK_DONE; }
+
+ bool setText(const char utftext8[], int utf8Units) override {
+ UErrorCode status = U_ZERO_ERROR;
+
+ UText sUtf8UText = UTEXT_INITIALIZER;
+ ICUUText text(utext_openUTF8(&sUtf8UText, &utftext8[0], utf8Units, &status));
+
+ if (U_FAILURE(status)) {
+ SkDEBUGF("Break error: %s", u_errorName(status));
+ return false;
+ }
+ SkASSERT(text);
+ ubrk_setUText(fBreakIterator.get(), text.get(), &status);
+ if (U_FAILURE(status)) {
+ SkDEBUGF("Break error: %s", u_errorName(status));
+ return false;
+ }
+ fLastResult = 0;
+ return true;
+ }
+
+ static UBreakIteratorType convertType(SkUnicode::BreakType type) {
+ switch (type) {
+ case SkUnicode::BreakType::kLines: return UBRK_LINE;
+ case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
+ case SkUnicode::BreakType::kWords: return UBRK_WORD;
+ default:
+ return UBRK_CHARACTER;
+ }
+ }
+
+ static std::unique_ptr<SkBreakIterator> makeUtf8BreakIterator
+ (const char locale[], SkUnicode::BreakType type) {
+ UErrorCode status = U_ZERO_ERROR;
+ ICUBreakIterator iterator(ubrk_open(convertType(type), locale, nullptr, 0, &status));
+ if (U_FAILURE(status)) {
+ SkDEBUGF("Break error: %s", u_errorName(status));
+ return nullptr;
+ }
+ return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
+ }
+};
+
class SkUnicode_icu : public SkUnicode {
- static UBreakIteratorType convertType(UBreakType type) {
+ static UBreakIteratorType convertType(BreakType type) {
switch (type) {
- case UBreakType::kLines: return UBRK_LINE;
- case UBreakType::kGraphemes: return UBRK_CHARACTER;
- case UBreakType::kWords: return UBRK_WORD;
+ case BreakType::kLines: return UBRK_LINE;
+ case BreakType::kGraphemes: return UBRK_CHARACTER;
+ case BreakType::kWords: return UBRK_WORD;
default:
SkDEBUGF("Convert error: wrong break type");
return UBRK_CHARACTER;
}
}
- static bool extractBidi(const char utf8[], int utf8Units, TextDirection dir, std::vector<BidiRegion>* bidiRegions) {
+ static bool extractBidi(const char utf8[],
+ int utf8Units,
+ TextDirection dir,
+ std::vector<BidiRegion>* bidiRegions) {
// Convert to UTF16 since for now bidi iterator only operates on utf16
std::unique_ptr<uint16_t[]> utf16;
@@ -189,7 +253,7 @@
UErrorCode status = U_ZERO_ERROR;
- UBreakIteratorType breakType = convertType(UBreakType::kWords);
+ UBreakIteratorType breakType = convertType(BreakType::kWords);
ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Break error: %s", u_errorName(status));
@@ -220,7 +284,8 @@
return true;
}
- static bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
+ static bool extractPositions
+ (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> add) {
UErrorCode status = U_ZERO_ERROR;
UText sUtf8UText = UTEXT_INITIALIZER;
@@ -252,7 +317,9 @@
return true;
}
- static bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
+ static bool extractWhitespaces(const char utf8[],
+ int utf8Units,
+ std::vector<Position>* whitespaces) {
const char* start = utf8;
const char* end = utf8 + utf8Units;
@@ -293,16 +360,22 @@
SkASSERT(dstLen == utf8Units);
return utf8Units;
}
+
public:
~SkUnicode_icu() override { }
std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
SkBidiIterator::Direction dir) override {
return SkBidiIterator_icu::makeBidiIterator(text, count, dir);
}
- std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[], int count,
+ std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
+ int count,
SkBidiIterator::Direction dir) override {
return SkBidiIterator_icu::makeBidiIterator(text, count, dir);
}
+ std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
+ BreakType breakType) override {
+ return SkBreakIterator_icu::makeUtf8BreakIterator(locale, breakType);
+ }
// TODO: Use ICU data file to detect controls and whitespaces
bool isControl(SkUnichar utf8) override {
@@ -323,13 +396,18 @@
}
}
- bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector<BidiRegion>* results) override {
+ bool getBidiRegions(const char utf8[],
+ int utf8Units,
+ TextDirection dir,
+ std::vector<BidiRegion>* results) override {
return extractBidi(utf8, utf8Units, dir, results);
}
- bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
+ bool getLineBreaks(const char utf8[],
+ int utf8Units,
+ std::vector<LineBreakBefore>* results) override {
- return extractPositions(utf8, utf8Units, UBreakType::kLines,
+ return extractPositions(utf8, utf8Units, BreakType::kLines,
[results](int pos, int status) {
results->emplace_back(pos,status == UBRK_LINE_HARD
? LineBreakType::kHardLineBreak
@@ -351,7 +429,7 @@
bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
- return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
+ return extractPositions(utf8, utf8Units, BreakType::kGraphemes,
[results](int pos, int status) { results->emplace_back(pos);
});
}
@@ -361,7 +439,9 @@
return extractWhitespaces(utf8, utf8Units, results);
}
- void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override {
+ void reorderVisual(const BidiLevel runLevels[],
+ int levelsCount,
+ int32_t logicalFromVisual[]) override {
ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
}
};