blob: 055828d4779da195f3d0428185b14bcae90f0ff5 [file] [log] [blame]
/*
* Copyright 2022 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "include/core/SkSpan.h"
#include "include/core/SkString.h"
#include "include/core/SkTypes.h"
#include "include/private/SkBitmaskEnum.h"
#include "include/private/base/SkTArray.h"
#include "include/private/base/SkTo.h"
#include "modules/skunicode/include/SkUnicode.h"
#include "src/base/SkUTF.h"
#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#ifdef SK_UNICODE_CLIENT_IMPLEMENTATION
#include "modules/skunicode/src/SkUnicode_client.h"
#else
#include <unicode/ubidi.h>
#endif
class SkUnicode_client : public SkUnicode {
public:
struct Data {
SkSpan<const char> fText8;
SkSpan<const char16_t> fText16;
std::vector<BidiRegion> fBidiRegions;
std::vector<Position> fWords;
std::vector<SkUnicode::Position> fGraphemeBreaks;
std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
Data(SkSpan<char> text,
std::vector<SkUnicode::BidiRegion> bidiRegions,
std::vector<SkUnicode::Position> words,
std::vector<SkUnicode::Position> graphemeBreaks,
std::vector<SkUnicode::LineBreakBefore> lineBreaks)
: fText8(text)
, fText16(SkSpan<const char16_t>(nullptr, 0))
, fBidiRegions(std::move(bidiRegions))
, fWords(std::move(words))
, fGraphemeBreaks(std::move(graphemeBreaks))
, fLineBreaks(std::move(lineBreaks)) {
}
void reset() {
fText8 = SkSpan<const char>(nullptr, 0);
fText16 = SkSpan<const char16_t>(nullptr, 0);
fBidiRegions.clear();
fGraphemeBreaks.clear();
fLineBreaks.clear();
}
};
SkUnicode_client() = delete;
SkUnicode_client(SkSpan<char> text,
std::vector<SkUnicode::BidiRegion> bidiRegions,
std::vector<SkUnicode::Position> words,
std::vector<SkUnicode::Position> graphemeBreaks,
std::vector<SkUnicode::LineBreakBefore> lineBreaks)
: fData(std::make_shared<Data>(text,
std::move(bidiRegions),
std::move(words),
std::move(graphemeBreaks),
std::move(lineBreaks))) {}
SkUnicode_client(const SkUnicode_client* origin)
: fData(origin->fData) {}
std::unique_ptr<SkUnicode> copy() override {
return std::make_unique<SkUnicode_client>(this);
}
~SkUnicode_client() override = default;
void reset() { fData->reset(); }
// For SkShaper
std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
SkBidiIterator::Direction dir) override;
std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
int count,
SkBidiIterator::Direction dir) override;
std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
BreakType breakType) override;
std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
// For SkParagraph
bool getBidiRegions(const char utf8[],
int utf8Units,
TextDirection dir,
std::vector<BidiRegion>* results) override {
*results = fData->fBidiRegions;
return true;
}
// TODO: Take if from the Client or hard code here?
static bool isControl(SkUnichar utf8) {
return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) ||
(utf8 >= 0x200D && utf8 <= 0x200F) ||
(utf8 >= 0x202A && utf8 <= 0x202E);
}
static bool isWhitespace(SkUnichar unichar) {
std::u16string whitespaces =
u"\u0009" // character tabulation
"\u000A" // line feed
"\u000B" // line tabulation
"\u000C" // form feed
"\u000D" // carriage return
"\u0020" // space
//"\u0085" // next line
//"\u00A0" // no-break space
"\u1680" // ogham space mark
"\u2000" // en quad
"\u2001" // em quad
"\u2002" // en space
"\u2003" // em space
"\u2004" // three-per-em space
"\u2005" // four-per-em space
"\u2006" // six-per-em space
//"\u2007" // figure space
"\u2008" // punctuation space
"\u2009" // thin space
"\u200A" // hair space
"\u2028" // line separator
"\u2029" // paragraph separator
//"\u202F" // narrow no-break space
"\u205F" // medium mathematical space
"\u3000";// ideographic space
return whitespaces.find(unichar) != std::u16string::npos;
}
static bool isSpace(SkUnichar unichar) {
std::u16string spaces =
u"\u0009" // character tabulation
"\u000A" // line feed
"\u000B" // line tabulation
"\u000C" // form feed
"\u000D" // carriage return
"\u0020" // space
"\u0085" // next line
"\u00A0" // no-break space
"\u1680" // ogham space mark
"\u2000" // en quad
"\u2001" // em quad
"\u2002" // en space
"\u2003" // em space
"\u2004" // three-per-em space
"\u2005" // four-per-em space
"\u2006" // six-per-em space
"\u2007" // figure space
"\u2008" // punctuation space
"\u2009" // thin space
"\u200A" // hair space
"\u2028" // line separator
"\u2029" // paragraph separator
"\u202F" // narrow no-break space
"\u205F" // medium mathematical space
"\u3000"; // ideographic space
return spaces.find(unichar) != std::u16string::npos;
}
static bool isTabulation(SkUnichar utf8) {
return utf8 == '\t';
}
static bool isHardBreak(SkUnichar utf8) {
return utf8 == '\n';
}
bool computeCodeUnitFlags(char utf8[],
int utf8Units,
bool replaceTabs,
SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
results->clear();
results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
for (auto& lineBreak : fData->fLineBreaks) {
(*results)[lineBreak.pos] |=
lineBreak.breakType == LineBreakType::kHardLineBreak
? CodeUnitFlags::kHardLineBreakBefore
: CodeUnitFlags::kSoftLineBreakBefore;
}
for (auto& grapheme : fData->fGraphemeBreaks) {
(*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
}
const char* current = utf8;
const char* end = utf8 + utf8Units;
while (current < end) {
auto before = current - utf8;
SkUnichar unichar = SkUTF::NextUTF8(&current, end);
if (unichar < 0) unichar = 0xFFFD;
auto after = current - utf8;
if (replaceTabs && SkUnicode_client::isTabulation(unichar)) {
results->at(before) |= SkUnicode::kTabulation;
if (replaceTabs) {
unichar = ' ';
utf8[before] = ' ';
}
}
for (auto i = before; i < after; ++i) {
if (SkUnicode_client::isSpace(unichar)) {
results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
}
if (SkUnicode_client::isWhitespace(unichar)) {
results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
}
if (SkUnicode_client::isControl(unichar)) {
results->at(i) |= SkUnicode::kControl;
}
}
}
return true;
}
bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
results->clear();
results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
for (auto& lineBreak : fData->fLineBreaks) {
(*results)[lineBreak.pos] |=
lineBreak.breakType == LineBreakType::kHardLineBreak
? CodeUnitFlags::kHardLineBreakBefore
: CodeUnitFlags::kSoftLineBreakBefore;
}
for (auto& grapheme : fData->fGraphemeBreaks) {
(*results)[grapheme] |= CodeUnitFlags::kGraphemeStart;
}
return true;
}
bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
*results = fData->fWords;
return true;
}
SkString toUpper(const SkString& str) override {
SkASSERT(false);
return SkString(fData->fText8.data(), fData->fText8.size());
}
void reorderVisual(const BidiLevel runLevels[],
int levelsCount,
int32_t logicalFromVisual[]) override {
#ifdef SK_UNICODE_ICU_IMPLEMENTATION
ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
#else
ubidi_reorderVisual_skia(runLevels, levelsCount, logicalFromVisual);
#endif
}
private:
friend class SkBidiIterator_client;
friend class SkBreakIterator_client;
std::shared_ptr<Data> fData;
};
class SkBidiIterator_client : public SkBidiIterator {
std::shared_ptr<SkUnicode_client::Data> fData;
public:
explicit SkBidiIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
Position getLength() override { return fData->fBidiRegions.size(); }
Level getLevelAt(Position pos) override {
auto found = std::lower_bound(
fData->fBidiRegions.begin(),
fData->fBidiRegions.end(),
SkUnicode::BidiRegion(pos, pos, 0),
[](const SkUnicode::BidiRegion& a, const SkUnicode::BidiRegion& b) {
return a.start <= b.start && a.end <= b.end;
});
return found->level;
}
};
class SkBreakIterator_client: public SkBreakIterator {
std::shared_ptr<SkUnicode_client::Data> fData;
Position fLastResult;
Position fStart;
Position fEnd;
public:
explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { }
Position first() override
{ return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; }
Position current() override
{ return fData->fLineBreaks[fStart + fLastResult].pos; }
Position next() override
{ return fData->fLineBreaks[fStart + fLastResult + 1].pos; }
Status status() override {
return fData->fLineBreaks[fStart + fLastResult].breakType ==
SkUnicode::LineBreakType::kHardLineBreak
? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
: SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
}
bool isDone() override { return fStart + fLastResult == fEnd; }
bool setText(const char utftext8[], int utf8Units) override {
SkASSERT(utftext8 >= fData->fText8.data() &&
utf8Units <= SkToS16(fData->fText8.size()));
fStart = utftext8 - fData->fText8.data();
fEnd = fStart + utf8Units;
fLastResult = 0;
return true;
}
bool setText(const char16_t utftext16[], int utf16Units) override {
SkASSERT(utftext16 >= fData->fText16.data() &&
utf16Units <= SkToS16(fData->fText16.size()));
fStart = utftext16 - fData->fText16.data();
fEnd = fStart + utf16Units;
fLastResult = 0;
return true;
}
};
std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count,
SkBidiIterator::Direction dir) {
return std::make_unique<SkBidiIterator_client>(fData);
}
std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[],
int count,
SkBidiIterator::Direction dir) {
return std::make_unique<SkBidiIterator_client>(fData);
}
std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[],
BreakType breakType) {
return std::make_unique<SkBreakIterator_client>(fData);
}
std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) {
return std::make_unique<SkBreakIterator_client>(fData);
}
std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode(
SkSpan<char> text,
std::vector<SkUnicode::BidiRegion> bidiRegions,
std::vector<SkUnicode::Position> words,
std::vector<SkUnicode::Position> graphemeBreaks,
std::vector<SkUnicode::LineBreakBefore> lineBreaks) {
return std::make_unique<SkUnicode_client>(text, bidiRegions, words, graphemeBreaks, lineBreaks);
}