ICU project: text break iterators in SkShaper

Change-Id: I8a0dd71298331b608fbe874cc610a80fc7815b0e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/313082
Reviewed-by: Ben Wagner <bungeman@google.com>
Commit-Queue: Julia Lavrova <jlavrova@google.com>
diff --git a/modules/skshaper/src/SkShaper_harfbuzz.cpp b/modules/skshaper/src/SkShaper_harfbuzz.cpp
index 4ff3903..ff5da5d 100644
--- a/modules/skshaper/src/SkShaper_harfbuzz.cpp
+++ b/modules/skshaper/src/SkShaper_harfbuzz.cpp
@@ -35,23 +35,12 @@
 #include <hb.h>
 #include <hb-icu.h>
 #include <hb-ot.h>
-#include <unicode/ubrk.h>
-#include <unicode/umachine.h>
-#include <unicode/urename.h>
 #include <unicode/uscript.h>
-#include <unicode/ustring.h>
-#include <unicode/utext.h>
-#include <unicode/utypes.h>
-
 #include <cstring>
 #include <memory>
 #include <type_traits>
 #include <utility>
 
-#if defined(SK_USING_THIRD_PARTY_ICU)
-#include "SkLoadICU.h"
-#endif
-
 // HB_FEATURE_GLOBAL_START and HB_FEATURE_GLOBAL_END were not added until HarfBuzz 2.0
 // They would have always worked, they just hadn't been named yet.
 #if !defined(HB_FEATURE_GLOBAL_START)
@@ -71,10 +60,9 @@
 using HBFace   = resource<hb_face_t     , decltype(hb_face_destroy)  , hb_face_destroy  >;
 using HBFont   = resource<hb_font_t     , decltype(hb_font_destroy)  , hb_font_destroy  >;
 using HBBuffer = resource<hb_buffer_t   , decltype(hb_buffer_destroy), hb_buffer_destroy>;
-using ICUBrk   = resource<UBreakIterator, decltype(ubrk_close)       , ubrk_close       >;
-using ICUUText = resource<UText         , decltype(utext_close)      , utext_close      >;
 
 using SkUnicodeBidi = std::unique_ptr<SkBidiIterator>;
+using SkUnicodeBreak = std::unique_ptr<SkBreakIterator>;
 
 hb_position_t skhb_position(SkScalar value) {
     // Treat HarfBuzz hb_position_t as 16.16 fixed-point.
@@ -653,11 +641,16 @@
 
 class ShaperHarfBuzz : public SkShaper {
 public:
-    ShaperHarfBuzz(HBBuffer, ICUBrk line, ICUBrk grapheme, sk_sp<SkFontMgr>);
+    ShaperHarfBuzz(std::unique_ptr<SkUnicode>,
+                   SkUnicodeBreak line,
+                   SkUnicodeBreak grapheme,
+                   HBBuffer,
+                   sk_sp<SkFontMgr>);
 
 protected:
-    ICUBrk fLineBreakIterator;
-    ICUBrk fGraphemeBreakIterator;
+    std::unique_ptr<SkUnicode> fUnicode;
+    SkUnicodeBreak fLineBreakIterator;
+    SkUnicodeBreak fGraphemeBreakIterator;
 
     ShapedRun shape(const char* utf8, size_t utf8Bytes,
                     const char* utf8Start,
@@ -668,7 +661,6 @@
                     const FontRunIterator&,
                     const Feature*, size_t featuresSize) const;
 private:
-    std::unique_ptr<SkUnicode> fUnicode = SkUnicode::Make();
     const sk_sp<SkFontMgr> fFontMgr;
     HBBuffer               fBuffer;
     hb_language_t          fUndefinedLanguage;
@@ -753,52 +745,43 @@
 };
 
 static std::unique_ptr<SkShaper> MakeHarfBuzz(sk_sp<SkFontMgr> fontmgr, bool correct) {
-    #if defined(SK_USING_THIRD_PARTY_ICU)
-    if (!SkLoadICU()) {
-        SkDEBUGF("SkLoadICU() failed!\n");
-        return nullptr;
-    }
-    #endif
     HBBuffer buffer(hb_buffer_create());
     if (!buffer) {
         SkDEBUGF("Could not create hb_buffer");
         return nullptr;
     }
 
-    UErrorCode status = U_ZERO_ERROR;
-    ICUBrk lineBreakIterator(ubrk_open(UBRK_LINE, "th", nullptr, 0, &status));
-    if (!lineBreakIterator || U_FAILURE(status)) {
-        SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
+    auto unicode = SkUnicode::Make();
+    if (!unicode) {
         return nullptr;
     }
-
-    ICUBrk graphemeBreakIterator(ubrk_open(UBRK_CHARACTER, "th", nullptr, 0, &status));
-    if (!graphemeBreakIterator || U_FAILURE(status)) {
-        SkDEBUGF("Could not create grapheme break iterator: %s", u_errorName(status));
+    auto lineIter = unicode->makeBreakIterator("th", SkUnicode::BreakType::kLines);
+    if (!lineIter) {
+        return nullptr;
+    }
+    auto graphIter = unicode->makeBreakIterator("th", SkUnicode::BreakType::kGraphemes);
+    if (!graphIter) {
         return nullptr;
     }
 
     if (correct) {
-        return std::make_unique<ShaperDrivenWrapper>(std::move(buffer),
-                                                       std::move(lineBreakIterator),
-                                                       std::move(graphemeBreakIterator),
-                                                       std::move(fontmgr));
+        return std::make_unique<ShaperDrivenWrapper>(std::move(unicode),
+            std::move(lineIter), std::move(graphIter), std::move(buffer), std::move(fontmgr));
     } else {
-        return std::make_unique<ShapeThenWrap>(std::move(buffer),
-                                                 std::move(lineBreakIterator),
-                                                 std::move(graphemeBreakIterator),
-                                                 std::move(fontmgr));
+        return std::make_unique<ShapeThenWrap>(std::move(unicode),
+            std::move(lineIter), std::move(graphIter), std::move(buffer), std::move(fontmgr));
     }
 }
 
-ShaperHarfBuzz::ShaperHarfBuzz(HBBuffer buffer, ICUBrk line, ICUBrk grapheme,
-                               sk_sp<SkFontMgr> fontmgr)
-    : fLineBreakIterator(std::move(line))
-    , fGraphemeBreakIterator(std::move(grapheme))
+ShaperHarfBuzz::ShaperHarfBuzz(std::unique_ptr<SkUnicode> unicode,
+    SkUnicodeBreak lineIter, SkUnicodeBreak graphIter, HBBuffer buffer, sk_sp<SkFontMgr> fontmgr)
+    : fUnicode(std::move(unicode))
+    , fLineBreakIterator(std::move(lineIter))
+    , fGraphemeBreakIterator(std::move(graphIter))
     , fFontMgr(std::move(fontmgr))
     , fBuffer(std::move(buffer))
     , fUndefinedLanguage(hb_language_from_string("und", -1))
-{}
+{ }
 
 void ShaperHarfBuzz::shape(const char* utf8, size_t utf8Bytes,
                            const SkFont& srcFont,
@@ -929,21 +912,10 @@
 
             // TODO: break iterator per item, but just reset position if needed?
             // Maybe break iterator with model?
-            UBreakIterator& breakIterator = *fLineBreakIterator;
-            {
-                UErrorCode status = U_ZERO_ERROR;
-                UText sUtf8UText = UTEXT_INITIALIZER;
-                ICUUText utf8UText(utext_openUTF8(&sUtf8UText, utf8Start, utf8runLength, &status));
-                if (U_FAILURE(status)) {
-                    SkDebugf("Could not create utf8UText: %s", u_errorName(status));
-                    return;
-                }
-                ubrk_setUText(&breakIterator, utf8UText.get(), &status);
-                if (U_FAILURE(status)) {
-                    SkDebugf("Could not setText on break iterator: %s", u_errorName(status));
-                    return;
-                }
+            if (!fLineBreakIterator->setText(utf8Start, utf8runLength)) {
+                return;
             }
+            SkBreakIterator& breakIterator = *fLineBreakIterator;
 
             ShapedRun best(RunHandler::Range(), SkFont(), 0, nullptr, 0,
                            { SK_ScalarNegativeInfinity, SK_ScalarNegativeInfinity });
@@ -951,9 +923,9 @@
             bool bestUsesModelForGlyphs = false;
             SkScalar widthLeft = width - line.fAdvance.fX;
 
-            for (int32_t breakIteratorCurrent = ubrk_next(&breakIterator);
-                 breakIteratorCurrent != UBRK_DONE;
-                 breakIteratorCurrent = ubrk_next(&breakIterator))
+            for (int32_t breakIteratorCurrent = breakIterator.next();
+                 !breakIterator.isDone();
+                 breakIteratorCurrent = breakIterator.next())
             {
                 // TODO: if past a safe to break, future safe to break will be at least as long
 
@@ -1032,29 +1004,15 @@
 {
     SkTArray<ShapedRun> runs;
 {
-    UBreakIterator& lineBreakIterator = *fLineBreakIterator;
-    UBreakIterator& graphemeBreakIterator = *fGraphemeBreakIterator;
-    {
-        UErrorCode status = U_ZERO_ERROR;
-        UText sUtf8UText = UTEXT_INITIALIZER;
-        ICUUText utf8UText(utext_openUTF8(&sUtf8UText, utf8, utf8Bytes, &status));
-        if (U_FAILURE(status)) {
-            SkDebugf("Could not create utf8UText: %s", u_errorName(status));
-            return;
-        }
-
-        ubrk_setUText(&lineBreakIterator, utf8UText.get(), &status);
-        if (U_FAILURE(status)) {
-            SkDebugf("Could not setText on line break iterator: %s", u_errorName(status));
-            return;
-        }
-        ubrk_setUText(&graphemeBreakIterator, utf8UText.get(), &status);
-        if (U_FAILURE(status)) {
-            SkDebugf("Could not setText on grapheme break iterator: %s", u_errorName(status));
-            return;
-        }
+    if (!fLineBreakIterator->setText(utf8, utf8Bytes)) {
+        return;
+    }
+    if (!fGraphemeBreakIterator->setText(utf8, utf8Bytes)) {
+        return;
     }
 
+    SkBreakIterator& lineBreakIterator = *fLineBreakIterator;
+    SkBreakIterator& graphemeBreakIterator = *fGraphemeBreakIterator;
     const char* utf8Start = nullptr;
     const char* utf8End = utf8;
     while (runSegmenter.advanceRuns()) {
@@ -1072,20 +1030,18 @@
             ShapedGlyph& glyph = run.fGlyphs[i];
             int32_t glyphCluster = glyph.fCluster;
 
-            int32_t lineBreakIteratorCurrent = ubrk_current(&lineBreakIterator);
-            while (lineBreakIteratorCurrent != UBRK_DONE &&
-                   lineBreakIteratorCurrent < glyphCluster)
+            int32_t lineBreakIteratorCurrent = lineBreakIterator.current();
+            while (!lineBreakIterator.isDone() && lineBreakIteratorCurrent < glyphCluster)
             {
-                lineBreakIteratorCurrent = ubrk_next(&lineBreakIterator);
+                lineBreakIteratorCurrent = lineBreakIterator.next();
             }
             glyph.fMayLineBreakBefore = glyph.fCluster != previousCluster &&
                                         lineBreakIteratorCurrent == glyphCluster;
 
-            int32_t graphemeBreakIteratorCurrent = ubrk_current(&graphemeBreakIterator);
-            while (graphemeBreakIteratorCurrent != UBRK_DONE &&
-                   graphemeBreakIteratorCurrent < glyphCluster)
+            int32_t graphemeBreakIteratorCurrent = graphemeBreakIterator.current();
+            while (!graphemeBreakIterator.isDone() && graphemeBreakIteratorCurrent < glyphCluster)
             {
-                graphemeBreakIteratorCurrent = ubrk_next(&graphemeBreakIterator);
+                graphemeBreakIteratorCurrent = graphemeBreakIterator.next();
             }
             glyph.fGraphemeBreakBefore = glyph.fCluster != previousCluster &&
                                          graphemeBreakIteratorCurrent == glyphCluster;
@@ -1492,18 +1448,17 @@
     return MakeHarfBuzz(std::move(fontmgr), false);
 }
 std::unique_ptr<SkShaper> SkShaper::MakeShapeDontWrapOrReorder(sk_sp<SkFontMgr> fontmgr) {
-    #if defined(SK_USING_THIRD_PARTY_ICU)
-    if (!SkLoadICU()) {
-        SkDEBUGF("SkLoadICU() failed!\n");
-        return nullptr;
-    }
-    #endif
     HBBuffer buffer(hb_buffer_create());
     if (!buffer) {
         SkDEBUGF("Could not create hb_buffer");
         return nullptr;
     }
 
-    return std::make_unique<ShapeDontWrapOrReorder>(std::move(buffer), nullptr, nullptr,
-                                                      std::move(fontmgr));
+    auto unicode = SkUnicode::Make();
+    if (!unicode) {
+        return nullptr;
+    }
+
+    return std::make_unique<ShapeDontWrapOrReorder>
+        (std::move(unicode), nullptr, nullptr, std::move(buffer), std::move(fontmgr));
 }
diff --git a/modules/skshaper/src/SkUnicode.h b/modules/skshaper/src/SkUnicode.h
index 5414ed2..07b0904 100644
--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@@ -47,12 +47,27 @@
         kLTR,
         kRTL,
     };
-    virtual ~SkBidiIterator() {}
+    virtual ~SkBidiIterator() = default;
     virtual Position getLength() = 0;
     virtual Level getLevelAt(Position) = 0;
     static void ReorderVisual(const Level runLevels[], int levelsCount, int32_t logicalFromVisual[]);
 };
 
+class SKUNICODE_API SkBreakIterator {
+public:
+    typedef int32_t Position;
+    typedef int32_t Status;
+    virtual ~SkBreakIterator() = default;
+    virtual Position first() = 0;
+    virtual Position current() = 0;
+    virtual Position next() = 0;
+    virtual Position preceding(Position offset) = 0;
+    virtual Position following(Position offset) = 0;
+    virtual Status status() = 0;
+    virtual bool isDone() = 0;
+    virtual bool setText(const char utftext8[], int utf8Units) = 0;
+};
+
 class SKUNICODE_API SkUnicode {
     public:
         typedef uint32_t ScriptID;
@@ -76,7 +91,7 @@
             kHardLineBreak
         };
 
-        enum class UBreakType {
+        enum class BreakType {
             kWords,
             kGraphemes,
             kLines
@@ -94,11 +109,13 @@
         virtual bool isWhitespace(SkUnichar utf8) = 0;
         virtual SkString convertUtf16ToUtf8(const std::u16string& utf16) = 0;
 
-        // Iterators (used in SkShaper)
+        // Methods used in SkShaper
         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
             (const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
             (const char text[], int count, SkBidiIterator::Direction) = 0;
+        virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
+            (const char locale[], BreakType breakType) = 0;
 
         // High level methods (that we actually use somewhere=SkParagraph)
         virtual bool getBidiRegions
diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp
index 13de14d..7eb0f86 100644
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@@ -118,20 +118,84 @@
     ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
 }
 
+class SkBreakIterator_icu : public SkBreakIterator {
+    ICUBreakIterator fBreakIterator;
+    Position fLastResult;
+ public:
+    explicit SkBreakIterator_icu(ICUBreakIterator iter)
+        : fBreakIterator(std::move(iter)), fLastResult(0) {}
+    Position first() override
+      { return fLastResult = ubrk_first(fBreakIterator.get()); }
+    Position current() override
+      { return fLastResult = ubrk_current(fBreakIterator.get()); }
+    Position next() override
+      { return fLastResult = ubrk_next(fBreakIterator.get()); }
+    Position preceding(Position offset) override
+        { return fLastResult = ubrk_preceding(fBreakIterator.get(), offset); }
+    Position following(Position offset) override
+        { return fLastResult = ubrk_following(fBreakIterator.get(), offset);}
+    Status status() override { return ubrk_getRuleStatus(fBreakIterator.get()); }
+    bool isDone() override { return fLastResult == UBRK_DONE; }
+
+    bool setText(const char utftext8[], int utf8Units) override {
+        UErrorCode status = U_ZERO_ERROR;
+
+        UText sUtf8UText = UTEXT_INITIALIZER;
+        ICUUText text(utext_openUTF8(&sUtf8UText, &utftext8[0], utf8Units, &status));
+
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(text);
+        ubrk_setUText(fBreakIterator.get(), text.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        fLastResult = 0;
+        return true;
+    }
+
+    static UBreakIteratorType convertType(SkUnicode::BreakType type) {
+        switch (type) {
+            case SkUnicode::BreakType::kLines: return UBRK_LINE;
+            case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
+            case SkUnicode::BreakType::kWords: return UBRK_WORD;
+            default:
+              return UBRK_CHARACTER;
+        }
+    }
+
+    static std::unique_ptr<SkBreakIterator> makeUtf8BreakIterator
+        (const char locale[], SkUnicode::BreakType type) {
+        UErrorCode status = U_ZERO_ERROR;
+        ICUBreakIterator iterator(ubrk_open(convertType(type), locale, nullptr, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return nullptr;
+        }
+        return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
+    }
+};
+
 class SkUnicode_icu : public SkUnicode {
 
-    static UBreakIteratorType convertType(UBreakType type) {
+    static UBreakIteratorType convertType(BreakType type) {
         switch (type) {
-            case UBreakType::kLines: return UBRK_LINE;
-            case UBreakType::kGraphemes: return UBRK_CHARACTER;
-            case UBreakType::kWords: return UBRK_WORD;
+            case BreakType::kLines: return UBRK_LINE;
+            case BreakType::kGraphemes: return UBRK_CHARACTER;
+            case BreakType::kWords: return UBRK_WORD;
             default:
               SkDEBUGF("Convert error: wrong break type");
               return UBRK_CHARACTER;
         }
     }
 
-    static bool extractBidi(const char utf8[], int utf8Units, TextDirection dir, std::vector<BidiRegion>* bidiRegions) {
+    static bool extractBidi(const char utf8[],
+                            int utf8Units,
+                            TextDirection dir,
+                            std::vector<BidiRegion>* bidiRegions) {
 
         // Convert to UTF16 since for now bidi iterator only operates on utf16
         std::unique_ptr<uint16_t[]> utf16;
@@ -189,7 +253,7 @@
 
         UErrorCode status = U_ZERO_ERROR;
 
-        UBreakIteratorType breakType = convertType(UBreakType::kWords);
+        UBreakIteratorType breakType = convertType(BreakType::kWords);
         ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
         if (U_FAILURE(status)) {
             SkDEBUGF("Break error: %s", u_errorName(status));
@@ -220,7 +284,8 @@
         return true;
     }
 
-    static bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
+    static bool extractPositions
+        (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> add) {
 
         UErrorCode status = U_ZERO_ERROR;
         UText sUtf8UText = UTEXT_INITIALIZER;
@@ -252,7 +317,9 @@
         return true;
     }
 
-    static bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
+    static bool extractWhitespaces(const char utf8[],
+                                   int utf8Units,
+                                   std::vector<Position>* whitespaces) {
 
         const char* start = utf8;
         const char* end = utf8 + utf8Units;
@@ -293,16 +360,22 @@
         SkASSERT(dstLen == utf8Units);
         return utf8Units;
    }
+
 public:
     ~SkUnicode_icu() override { }
     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
                                                      SkBidiIterator::Direction dir) override {
         return SkBidiIterator_icu::makeBidiIterator(text, count, dir);
     }
-    std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[], int count,
+    std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
+                                                     int count,
                                                      SkBidiIterator::Direction dir) override {
         return SkBidiIterator_icu::makeBidiIterator(text, count, dir);
     }
+    std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
+                                                       BreakType breakType) override {
+        return SkBreakIterator_icu::makeUtf8BreakIterator(locale, breakType);
+    }
 
     // TODO: Use ICU data file to detect controls and whitespaces
     bool isControl(SkUnichar utf8) override {
@@ -323,13 +396,18 @@
         }
     }
 
-    bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector<BidiRegion>* results) override {
+    bool getBidiRegions(const char utf8[],
+                        int utf8Units,
+                        TextDirection dir,
+                        std::vector<BidiRegion>* results) override {
         return extractBidi(utf8, utf8Units, dir, results);
     }
 
-    bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
+    bool getLineBreaks(const char utf8[],
+                       int utf8Units,
+                       std::vector<LineBreakBefore>* results) override {
 
-        return extractPositions(utf8, utf8Units, UBreakType::kLines,
+        return extractPositions(utf8, utf8Units, BreakType::kLines,
             [results](int pos, int status) {
                     results->emplace_back(pos,status == UBRK_LINE_HARD
                                                         ? LineBreakType::kHardLineBreak
@@ -351,7 +429,7 @@
 
     bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
 
-        return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
+        return extractPositions(utf8, utf8Units, BreakType::kGraphemes,
             [results](int pos, int status) { results->emplace_back(pos);
         });
     }
@@ -361,7 +439,9 @@
         return extractWhitespaces(utf8, utf8Units, results);
     }
 
-    void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override {
+    void reorderVisual(const BidiLevel runLevels[],
+                       int levelsCount,
+                       int32_t logicalFromVisual[]) override {
         ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
     }
 };