Hand-code bsearch in the hot inner loop.

Saves another 3 / 4 percent with Amiri.
diff --git a/src/hb-open-type-private.hh b/src/hb-open-type-private.hh
index cd1163e..6a8b98b 100644
--- a/src/hb-open-type-private.hh
+++ b/src/hb-open-type-private.hh
@@ -949,21 +949,22 @@
 struct SortedArrayOf : ArrayOf<Type> {
 
   template <typename SearchType>
-  inline int search (const SearchType &x) const {
-    unsigned int count = this->len;
-    /* Linear search is *much* faster for small counts. */
-    if (likely (count < 32)) {
-      for (unsigned int i = 0; i < count; i++)
-	if (this->array[i].cmp (x) == 0)
-	  return i;
-      return -1;
-    } else {
-      struct Cmp {
-	static int cmp (const SearchType *a, const Type *b) { return b->cmp (*a); }
-      };
-      const Type *p = (const Type *) bsearch (&x, this->array, this->len, sizeof (this->array[0]), (hb_compare_func_t) Cmp::cmp);
-      return p ? p - this->array : -1;
+  inline int search (const SearchType &x) const
+  {
+    /* Hand-coded bsearch here since this is in the hot inner loop. */
+    int min = 0, max = (int) this->len - 1;
+    while (min <= max)
+    {
+      int mid = (min + max) / 2;
+      int c = this->array[mid].cmp (x);
+      if (c < 0)
+        max = mid - 1;
+      else if (c > 0)
+        min = mid + 1;
+      else
+        return mid;
     }
+    return -1;
   }
 };