use slope-guess for faster charToGlyph
Made up a threshold between linear and slope searching.
For a count of 100 here are the before and after timings.
The first two lines are being changed. The latter 2 are
the native look-ups for mac.
Before
15/15 MB 115 2.33µs 2.34µs 2.34µs 2.34µs 0% ▆▁▄█▇▇▂▄█▆ nonrendering cmap_findcache_charToGlyph
15/15 MB 66 3.47µs 3.48µs 3.49µs 3.55µs 1% █▁▁▁▂▁▁▁▁▄ nonrendering cmap_addcache_charToGlyph
15/15 MB 1 1.1µs 1.13µs 1.21µs 1.98µs 22% █▂▁▁▁▁▁▁▁▂ nonrendering cmap_face_charToGlyph
15/15 MB 190 1.09µs 1.1µs 1.19µs 1.64µs 17% ▁▁▁▆█▁▁▁▁▁ nonrendering cmap_font_charToGlyph
After
15/15 MB 447 448ns 449ns 448ns 449ns 0% ▂▅█▅▁▆█▂▁▆ nonrendering cmap_findcache_charToGlyph
15/15 MB 95 2.79µs 3.03µs 3µs 3.06µs 3% ▇▇▇▇▇███▄▁ nonrendering cmap_addcache_charToGlyph
15/15 MB 1 1.15µs 1.16µs 1.25µs 1.99µs 21% █▂▁▁▁▁▁▁▁▁ nonrendering cmap_face_charToGlyph
15/15 MB 186 1.09µs 1.1µs 1.12µs 1.27µs 5% █▁▁▁▂▁▁▁▁▁ nonrendering cmap_font_charToGlyph
Bug: skia:
Change-Id: If7da4eef3cce248393815071f342607f0c8140bb
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/208044
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Reed <reed@google.com>
diff --git a/bench/CmapBench.cpp b/bench/CmapBench.cpp
index 8178477..3b90e35 100644
--- a/bench/CmapBench.cpp
+++ b/bench/CmapBench.cpp
@@ -65,35 +65,25 @@
}
}
-namespace {
-class UnicharGen {
- SkUnichar fU;
- const int fStep;
-public:
- UnicharGen(int step) : fU(0), fStep(step) {}
-
- SkUnichar next() {
- fU += fStep;
- return fU;
- }
-};
-}
-
class CMAPBench : public Benchmark {
TypefaceProc fProc;
SkString fName;
SkUnichar fText[NGLYPHS];
SkFont fFont;
SkCharToGlyphCache fCache;
+ int fCount;
public:
- CMAPBench(TypefaceProc proc, const char name[]) {
- fProc = proc;
- fName.printf("cmap_%s", name);
+ CMAPBench(TypefaceProc proc, const char name[], int count) {
+ SkASSERT(count <= NGLYPHS);
- UnicharGen gen(3);
- for (int i = 0; i < NGLYPHS; ++i) {
- fText[i] = gen.next();
+ fProc = proc;
+ fName.printf("%s_%d", name, count);
+ fCount = count;
+
+ SkRandom rand;
+ for (int i = 0; i < count; ++i) {
+ fText[i] = rand.nextU() & 0xFFFF;
fCache.addCharAndGlyph(fText[i], i);
}
fFont.setTypeface(SkTypeface::MakeDefault());
@@ -109,7 +99,7 @@
}
void onDraw(int loops, SkCanvas* canvas) override {
- fProc({fCache, loops, fFont, fText, NGLYPHS});
+ fProc({fCache, loops, fFont, fText, fCount});
}
private:
@@ -119,7 +109,16 @@
//////////////////////////////////////////////////////////////////////////////
-DEF_BENCH( return new CMAPBench(textToGlyphs_proc, "font_charToGlyph"); )
-DEF_BENCH( return new CMAPBench(charsToGlyphs_proc, "face_charToGlyph"); )
-DEF_BENCH( return new CMAPBench(addcache_proc, "addcache_charToGlyph"); )
-DEF_BENCH( return new CMAPBench(findcache_proc, "findcache_charToGlyph"); )
+constexpr int SMALL = 10;
+
+DEF_BENCH( return new CMAPBench(textToGlyphs_proc, "font_charToGlyph", SMALL); )
+DEF_BENCH( return new CMAPBench(charsToGlyphs_proc, "face_charToGlyph", SMALL); )
+DEF_BENCH( return new CMAPBench(addcache_proc, "addcache_charToGlyph", SMALL); )
+DEF_BENCH( return new CMAPBench(findcache_proc, "findcache_charToGlyph", SMALL); )
+
+constexpr int BIG = 100;
+
+DEF_BENCH( return new CMAPBench(textToGlyphs_proc, "font_charToGlyph", BIG); )
+DEF_BENCH( return new CMAPBench(charsToGlyphs_proc, "face_charToGlyph", BIG); )
+DEF_BENCH( return new CMAPBench(addcache_proc, "addcache_charToGlyph", BIG); )
+DEF_BENCH( return new CMAPBench(findcache_proc, "findcache_charToGlyph", BIG); )
diff --git a/src/utils/SkCharToGlyphCache.cpp b/src/utils/SkCharToGlyphCache.cpp
index 1ce91a6..f962dea 100644
--- a/src/utils/SkCharToGlyphCache.cpp
+++ b/src/utils/SkCharToGlyphCache.cpp
@@ -8,89 +8,116 @@
#include "SkCharToGlyphCache.h"
#include "../private/SkTFitsIn.h"
-// TODO: consider faster ways to search big arrays
-// bsearch
-// slope-based (since we can compute a slope from first and last values)
+SkCharToGlyphCache::SkCharToGlyphCache() {
+ // Add sentinels so we can always rely on these to stop linear searches (in either direction)
+ // Neither is a legal unichar, so we don't care what glyphID we use.
+ //
+ *fK32.append() = 0x80000000; *fV16.append() = 0;
+ *fK32.append() = 0x7FFFFFFF; *fV16.append() = 0;
+
+ fDenom = 0;
+}
+
+SkCharToGlyphCache::~SkCharToGlyphCache() {}
+
+// Determined experimentally. For N much larger, the slope technique is faster.
+// For N much smaller, a simple search is faster.
//
-// Also, consider adding sentinels to key arrays so we don't have to check that
-// index is in rage.
+constexpr int kSmallCountLimit = 16;
+
+// To use slope technique we need at least 2 real entries (+2 sentinels) hence the min of 4
//
-template <typename T> int find_index(const T* base, int count, T value) {
- for (int index = 0; index < count; ++index) {
+constexpr int kMinCountForSlope = 4;
+
+static int find_simple(const SkUnichar base[], int count, SkUnichar value) {
+ int index;
+ for (index = 0;; ++index) {
if (value <= base[index]) {
if (value < base[index]) {
index = ~index; // not found
}
- return index;
+ break;
}
}
- return ~count; // append at the end
+ return index;
}
-SkCharToGlyphCache::SkCharToGlyphCache() {}
+static int find_with_slope(const SkUnichar base[], int count, SkUnichar value, double denom) {
+ SkASSERT(count >= kMinCountForSlope);
-SkCharToGlyphCache::~SkCharToGlyphCache() {}
-
-int SkCharToGlyphCache::findGlyphIndex(SkUnichar unichar) const {
int index;
- if (SkTFitsIn<uint16_t>(unichar)) {
- int count = f1616.count() >> 1;
- index = find_index(f1616.begin(), count, SkToU16(unichar));
- if (index >= 0) {
- return f1616[count + index];
+ if (value <= base[1]) {
+ index = 1;
+ if (value < base[index]) {
+ index = ~index;
+ }
+ } else if (value >= base[count - 2]) {
+ index = count - 2;
+ if (value > base[index]) {
+ index = ~(index + 1);
}
} else {
- int count = fKey32.count();
- index = find_index(fKey32.begin(), count, unichar);
- if (index >= 0) {
- return fValue16[index];
+ // make our guess based on the "slope" of the current values
+// index = 1 + (int64_t)(count - 2) * (value - base[1]) / (base[count - 2] - base[1]);
+ index = 1 + (int)(denom * (count - 2) * (value - base[1]));
+ SkASSERT(index >= 1 && index <= count - 2);
+
+ if (value >= base[index]) {
+ for (;; ++index) {
+ if (value <= base[index]) {
+ if (value < base[index]) {
+ index = ~index; // not found
+ }
+ break;
+ }
+ }
+ } else {
+ for (--index;; --index) {
+ SkASSERT(index >= 0);
+ if (value >= base[index]) {
+ if (value > base[index]) {
+ index = ~(index + 1);
+ }
+ break;
+ }
+ }
}
}
- SkASSERT(index < 0);
+ return index;
+}
+
+int SkCharToGlyphCache::findGlyphIndex(SkUnichar unichar) const {
+ const int count = fK32.count();
+ int index;
+ if (count <= kSmallCountLimit) {
+ index = find_simple(fK32.begin(), count, unichar);
+ } else {
+ index = find_with_slope(fK32.begin(), count, unichar, fDenom);
+ }
+ if (index >= 0) {
+ return fV16[index];
+ }
return index;
}
void SkCharToGlyphCache::insertCharAndGlyph(int index, SkUnichar unichar, SkGlyphID glyph) {
- if (SkTFitsIn<uint16_t>(unichar)) {
- // count is the logical count, since our array is really two arrays back-to-back
- size_t count = f1616.size() >> 1;
- SkASSERT((unsigned)index <= count);
+ SkASSERT(fK32.size() == fV16.size());
+ SkASSERT((unsigned)index < fK32.size());
+ SkASSERT(unichar < fK32[index]);
- f1616.setCount(f1616.size() + 2); // make room for key and value
- uint16_t* base = f1616.begin();
+ *fK32.insert(index) = unichar;
+ *fV16.insert(index) = glyph;
- // now slide twice, to make room for the new key and value
- uint16_t* src = base + count + index;
- size_t amt = count - index;
- sk_careful_memmove(src + 2, src, amt * sizeof(uint16_t));
-
- src = base + index;
- amt = count;
- sk_careful_memmove(src + 1, src, amt * sizeof(uint16_t));
-
- // now store the new values
- base[index] = SkToU16(unichar);
- base[count + 1 + index] = glyph;
- } else {
- SkASSERT(fKey32.size() == fValue16.size());
- SkASSERT((unsigned)index <= fKey32.size());
-
- *fKey32.insert(index) = unichar;
- *fValue16.insert(index) = glyph;
+ // if we've changed the first [1] or last [count-2] entry, recompute our slope
+ const int count = fK32.count();
+ if (count >= kMinCountForSlope && (index == 1 || index == count - 2)) {
+ SkASSERT(index >= 1 && index <= count - 2);
+ fDenom = 1.0 / ((double)fK32[count - 2] - fK32[1]);
}
- this->validate();
-}
-void SkCharToGlyphCache::validate() const {
#ifdef SK_DEBUG
- SkASSERT((f1616.count() & 1) == 0);
- for (int i = 1; i < f1616.count() >> 1; ++i) {
- SkASSERT(f1616[i - 1] < f1616[i]);
- }
-
- SkASSERT(fKey32.size() == fValue16.size());
- for (int i = 1; i < fKey32.count(); ++i) {
- SkASSERT(fKey32[i - 1] < fKey32[i]);
+ for (int i = 1; i < fK32.count(); ++i) {
+ SkASSERT(fK32[i-1] < fK32[i]);
}
#endif
}
diff --git a/src/utils/SkCharToGlyphCache.h b/src/utils/SkCharToGlyphCache.h
index 2eed22e..9e93166 100644
--- a/src/utils/SkCharToGlyphCache.h
+++ b/src/utils/SkCharToGlyphCache.h
@@ -47,15 +47,9 @@
}
private:
- // If the unichar is <= 16bits, store keys and values (planar) in the same array
- // [keys...][values...]
- SkTDArray<uint16_t> f1616;
-
- // If the unichar is > 16bits, use these two arrays: 32bit key, 16bit value
- SkTDArray<int32_t> fKey32;
- SkTDArray<uint16_t> fValue16;
-
- void validate() const;
+ SkTDArray<int32_t> fK32;
+ SkTDArray<uint16_t> fV16;
+ double fDenom;
};
#endif
diff --git a/tests/TestUtils.cpp b/tests/TestUtils.cpp
index e0fbc57..16ef403 100644
--- a/tests/TestUtils.cpp
+++ b/tests/TestUtils.cpp
@@ -213,6 +213,9 @@
SkGlyphID glyph = hash_to_glyph(c);
int index = cache.findGlyphIndex(c);
+ if (index >= 0) {
+ index = cache.findGlyphIndex(c);
+ }
REPORTER_ASSERT(reporter, index < 0);
cache.insertCharAndGlyph(~index, c, glyph);
@@ -221,6 +224,9 @@
c = gen2.next();
glyph = hash_to_glyph(c);
index = cache.findGlyphIndex(c);
+ if ((unsigned)index != glyph) {
+ index = cache.findGlyphIndex(c);
+ }
REPORTER_ASSERT(reporter, (unsigned)index == glyph);
}
}