add utility for caching char2glyph

Try: out/release/nanobench --match _charToGlyph

Pseudo plan to use this:
- attach to whatever typeface backends need it (probably just freetype)
- have a purge/limiting scheme (e.g. only cache N entries)
- if we care, make the search fancier (e.g. binary, slope, etc.)

Bug: 951647
Change-Id: Ib1042ca5891d2742499faf1314579c402121a855
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/207703
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Reed <reed@google.com>
diff --git a/bench/CmapBench.cpp b/bench/CmapBench.cpp
index 3fcda85..8178477 100644
--- a/bench/CmapBench.cpp
+++ b/bench/CmapBench.cpp
@@ -7,70 +7,109 @@
 
 #include "Benchmark.h"
 #include "SkCanvas.h"
+#include "SkCharToGlyphCache.h"
 #include "SkFont.h"
+#include "SkRandom.h"
 #include "SkTypeface.h"
+#include "SkUTF.h"
 
 enum {
     NGLYPHS = 100
 };
 
-typedef void (*TypefaceProc)(int loops, const SkFont&, const void* text, size_t len,
-                             int glyphCount);
+namespace {
+struct Rec {
+    const SkCharToGlyphCache&   fCache;
+    int                         fLoops;
+    const SkFont&               fFont;
+    const SkUnichar*            fText;
+    int                         fCount;
+};
+}
 
-static void textToGlyphs_proc(int loops, const SkFont& font, const void* text, size_t len,
-                              int glyphCount) {
+typedef void (*TypefaceProc)(const Rec& r);
+
+static void textToGlyphs_proc(const Rec& r) {
     uint16_t glyphs[NGLYPHS];
-    SkASSERT(glyphCount <= NGLYPHS);
+    SkASSERT(r.fCount <= NGLYPHS);
 
-    for (int i = 0; i < loops; ++i) {
-        font.textToGlyphs(text, len, kUTF8_SkTextEncoding, glyphs, NGLYPHS);
+    for (int i = 0; i < r.fLoops; ++i) {
+        r.fFont.textToGlyphs(r.fText, r.fCount*4, kUTF32_SkTextEncoding, glyphs, NGLYPHS);
     }
 }
 
-static void charsToGlyphs_proc(int loops, const SkFont& font, const void* text,
-                               size_t len, int glyphCount) {
+static void charsToGlyphs_proc(const Rec& r) {
     uint16_t glyphs[NGLYPHS];
-    SkASSERT(glyphCount <= NGLYPHS);
+    SkASSERT(r.fCount <= NGLYPHS);
 
-    SkTypeface* face = font.getTypefaceOrDefault();
-    for (int i = 0; i < loops; ++i) {
-        face->charsToGlyphs(text, SkTypeface::kUTF8_Encoding, glyphs, glyphCount);
+    SkTypeface* face = r.fFont.getTypefaceOrDefault();
+    for (int i = 0; i < r.fLoops; ++i) {
+        face->charsToGlyphs(r.fText, SkTypeface::kUTF32_Encoding, glyphs, r.fCount);
     }
 }
 
-static void charsToGlyphsNull_proc(int loops, const SkFont& font, const void* text,
-                                   size_t len, int glyphCount) {
-    SkTypeface* face = font.getTypefaceOrDefault();
-    for (int i = 0; i < loops; ++i) {
-        face->charsToGlyphs(text, SkTypeface::kUTF8_Encoding, nullptr, glyphCount);
+static void addcache_proc(const Rec& r) {
+    for (int i = 0; i < r.fLoops; ++i) {
+        SkCharToGlyphCache cache;
+        for (int i = 0; i < r.fCount; ++i) {
+            cache.addCharAndGlyph(r.fText[i], i);
+        }
     }
 }
 
+static void findcache_proc(const Rec& r) {
+    for (int i = 0; i < r.fLoops; ++i) {
+        for (int i = 0; i < r.fCount; ++i) {
+            r.fCache.findGlyphIndex(r.fText[i]);
+        }
+    }
+}
+
+namespace {
+class UnicharGen {
+    SkUnichar fU;
+    const int fStep;
+public:
+    UnicharGen(int step) : fU(0), fStep(step) {}
+
+    SkUnichar next() {
+        fU += fStep;
+        return fU;
+    }
+};
+}
+
 class CMAPBench : public Benchmark {
     TypefaceProc fProc;
     SkString     fName;
-    char         fText[NGLYPHS];
+    SkUnichar    fText[NGLYPHS];
     SkFont       fFont;
+    SkCharToGlyphCache fCache;
 
 public:
     CMAPBench(TypefaceProc proc, const char name[]) {
         fProc = proc;
         fName.printf("cmap_%s", name);
 
+        UnicharGen gen(3);
         for (int i = 0; i < NGLYPHS; ++i) {
-            // we're jamming values into utf8, so we must keep it legal utf8
-            fText[i] = 'A' + (i & 31);
+            fText[i] = gen.next();
+            fCache.addCharAndGlyph(fText[i], i);
         }
         fFont.setTypeface(SkTypeface::MakeDefault());
     }
 
+    bool isSuitableFor(Backend backend) override {
+        return backend == kNonRendering_Backend;
+    }
+
 protected:
     const char* onGetName() override {
         return fName.c_str();
     }
 
     void onDraw(int loops, SkCanvas* canvas) override {
-        fProc(loops, fFont, fText, sizeof(fText), NGLYPHS);
+        fProc({fCache, loops, fFont, fText, NGLYPHS});
     }
 
 private:
@@ -80,6 +119,7 @@
 
 //////////////////////////////////////////////////////////////////////////////
 
-DEF_BENCH( return new CMAPBench(textToGlyphs_proc, "paint_textToGlyphs"); )
-DEF_BENCH( return new CMAPBench(charsToGlyphs_proc, "face_charsToGlyphs"); )
-DEF_BENCH( return new CMAPBench(charsToGlyphsNull_proc, "face_charsToGlyphs_null"); )
+DEF_BENCH( return new CMAPBench(textToGlyphs_proc, "font_charToGlyph"); )
+DEF_BENCH( return new CMAPBench(charsToGlyphs_proc, "face_charToGlyph"); )
+DEF_BENCH( return new CMAPBench(addcache_proc, "addcache_charToGlyph"); )
+DEF_BENCH( return new CMAPBench(findcache_proc, "findcache_charToGlyph"); )
diff --git a/gn/utils.gni b/gn/utils.gni
index 74d890c..4a44408 100644
--- a/gn/utils.gni
+++ b/gn/utils.gni
@@ -22,7 +22,7 @@
   "$_include/utils/SkParsePath.h",
   "$_include/utils/SkRandom.h",
   "$_include/utils/SkShadowUtils.h",
-  
+
   #mac
   "$_include/utils/mac/SkCGUtils.h",
 ]
@@ -38,6 +38,8 @@
   "$_src/utils/SkCanvasStack.h",
   "$_src/utils/SkCanvasStack.cpp",
   "$_src/utils/SkCanvasStateUtils.cpp",
+  "$_src/utils/SkCharToGlyphCache.cpp",
+  "$_src/utils/SkCharToGlyphCache.h",
   "$_src/utils/SkDashPath.cpp",
   "$_src/utils/SkDashPathPriv.h",
   "$_src/utils/SkEventTracer.cpp",
diff --git a/include/private/SkMalloc.h b/include/private/SkMalloc.h
index 0e41073..9c899b3 100644
--- a/include/private/SkMalloc.h
+++ b/include/private/SkMalloc.h
@@ -124,4 +124,13 @@
     return dst;
 }
 
+static inline void* sk_careful_memmove(void* dst, const void* src, size_t len) {
+    // When we pass >0 len we had better already be passing valid pointers.
+    // So we just need to skip calling memcpy when len == 0.
+    if (len) {
+        memmove(dst,src,len);
+    }
+    return dst;
+}
+
 #endif  // SkMalloc_DEFINED
diff --git a/src/utils/SkCharToGlyphCache.cpp b/src/utils/SkCharToGlyphCache.cpp
new file mode 100644
index 0000000..1ce91a6
--- /dev/null
+++ b/src/utils/SkCharToGlyphCache.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkCharToGlyphCache.h"
+#include "../private/SkTFitsIn.h"
+
+// TODO: consider faster ways to search big arrays
+//       bsearch
+//       slope-based (since we can compute a slope from first and last values)
+//
+//       Also, consider adding sentinels to key arrays so we don't have to check that
+//       index is in rage.
+//
+template <typename T> int find_index(const T* base, int count, T value) {
+    for (int index = 0; index < count; ++index) {
+        if (value <= base[index]) {
+            if (value < base[index]) {
+                index = ~index; // not found
+            }
+            return index;
+        }
+    }
+    return ~count;    // append at the end
+}
+
+SkCharToGlyphCache::SkCharToGlyphCache() {}
+
+SkCharToGlyphCache::~SkCharToGlyphCache() {}
+
+int SkCharToGlyphCache::findGlyphIndex(SkUnichar unichar) const {
+    int index;
+    if (SkTFitsIn<uint16_t>(unichar)) {
+        int count = f1616.count() >> 1;
+        index = find_index(f1616.begin(), count, SkToU16(unichar));
+        if (index >= 0) {
+            return f1616[count + index];
+        }
+    } else {
+        int count = fKey32.count();
+        index = find_index(fKey32.begin(), count, unichar);
+        if (index >= 0) {
+            return fValue16[index];
+        }
+    }
+    SkASSERT(index < 0);
+    return index;
+}
+
+void SkCharToGlyphCache::insertCharAndGlyph(int index, SkUnichar unichar, SkGlyphID glyph) {
+    if (SkTFitsIn<uint16_t>(unichar)) {
+        // count is the logical count, since our array is really two arrays back-to-back
+        size_t count = f1616.size() >> 1;
+        SkASSERT((unsigned)index <= count);
+
+        f1616.setCount(f1616.size() + 2); // make room for key and value
+        uint16_t* base = f1616.begin();
+
+        // now slide twice, to make room for the new key and value
+        uint16_t* src = base + count + index;
+        size_t    amt = count - index;
+        sk_careful_memmove(src + 2, src, amt * sizeof(uint16_t));
+
+        src = base + index;
+        amt = count;
+        sk_careful_memmove(src + 1, src, amt * sizeof(uint16_t));
+
+        // now store the new values
+        base[index] = SkToU16(unichar);
+        base[count + 1 + index] = glyph;
+    } else {
+        SkASSERT(fKey32.size() == fValue16.size());
+        SkASSERT((unsigned)index <= fKey32.size());
+
+        *fKey32.insert(index) = unichar;
+        *fValue16.insert(index) = glyph;
+    }
+    this->validate();
+}
+
+void SkCharToGlyphCache::validate() const {
+#ifdef SK_DEBUG
+    SkASSERT((f1616.count() & 1) == 0);
+    for (int i = 1; i < f1616.count() >> 1; ++i) {
+        SkASSERT(f1616[i - 1] < f1616[i]);
+    }
+
+    SkASSERT(fKey32.size() == fValue16.size());
+    for (int i = 1; i < fKey32.count(); ++i) {
+        SkASSERT(fKey32[i - 1] < fKey32[i]);
+    }
+#endif
+}
diff --git a/src/utils/SkCharToGlyphCache.h b/src/utils/SkCharToGlyphCache.h
new file mode 100644
index 0000000..2eed22e
--- /dev/null
+++ b/src/utils/SkCharToGlyphCache.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkCharToGlyphCache_DEFINED
+#define SkCharToGlyphCache_DEFINED
+
+#include "SkTypes.h"
+#include "../private/SkTDArray.h"
+
+class SkCharToGlyphCache {
+public:
+    SkCharToGlyphCache();
+    ~SkCharToGlyphCache();
+
+    /**
+     *  Given a unichar, return its glyphID (if the return value is positive), else return
+     *  ~index of where to insert the computed glyphID.
+     *
+     *  int result = cache.charToGlyph(unichar);
+     *  if (result >= 0) {
+     *      glyphID = result;
+     *  } else {
+     *      glyphID = compute_glyph_using_typeface(unichar);
+     *      cache.insertCharAndGlyph(~result, unichar, glyphID);
+     *  }
+     */
+    int findGlyphIndex(SkUnichar c) const;
+
+    /**
+     *  Insert a new char/glyph pair into the cache at the specified index.
+     *  See charToGlyph() for how to compute the bit-not of the index.
+     */
+    void insertCharAndGlyph(int index, SkUnichar, SkGlyphID);
+
+    // helper to pre-seed an entry in the cache
+    void addCharAndGlyph(SkUnichar unichar, SkGlyphID glyph) {
+        int index = this->findGlyphIndex(unichar);
+        if (index >= 0) {
+            SkASSERT(SkToU16(index) == glyph);
+        } else {
+            this->insertCharAndGlyph(~index, unichar, glyph);
+        }
+    }
+
+private:
+    // If the unichar is <= 16bits, store keys and values (planar) in the same array
+    // [keys...][values...]
+    SkTDArray<uint16_t> f1616;
+
+    // If the unichar is > 16bits, use these two arrays: 32bit key, 16bit value
+    SkTDArray<int32_t>   fKey32;
+    SkTDArray<uint16_t>  fValue16;
+
+    void validate() const;
+};
+
+#endif
diff --git a/tests/TestUtils.cpp b/tests/TestUtils.cpp
index 560894f..e0fbc57 100644
--- a/tests/TestUtils.cpp
+++ b/tests/TestUtils.cpp
@@ -182,3 +182,46 @@
     dst->prepend("data:image/png;base64,");
     return true;
 }
+
+#include "SkCharToGlyphCache.h"
+
+static SkGlyphID hash_to_glyph(uint32_t value) {
+    return SkToU16(((value >> 16) ^ value) & 0xFFFF);
+}
+
+namespace {
+class UnicharGen {
+    SkUnichar fU;
+    const int fStep;
+public:
+    UnicharGen(int step) : fU(0), fStep(step) {}
+
+    SkUnichar next() {
+        fU += fStep;
+        return fU;
+    }
+};
+}
+
+DEF_TEST(chartoglyph_cache, reporter) {
+    SkCharToGlyphCache cache;
+    const int step = 3;
+
+    UnicharGen gen(step);
+    for (int i = 0; i < 500; ++i) {
+        SkUnichar c = gen.next();
+        SkGlyphID glyph = hash_to_glyph(c);
+
+        int index = cache.findGlyphIndex(c);
+        REPORTER_ASSERT(reporter, index < 0);
+        cache.insertCharAndGlyph(~index, c, glyph);
+
+        UnicharGen gen2(step);
+        for (int j = 0; j <= i; ++j) {
+            c = gen2.next();
+            glyph = hash_to_glyph(c);
+            index = cache.findGlyphIndex(c);
+            REPORTER_ASSERT(reporter, (unsigned)index == glyph);
+        }
+    }
+}