Rearrange append to multiply by sizeOfT in the templated code

Multiplying by sizeOfT is very expensive in the common code. The
compiler must generate a multiply instruction. When the sizeof(T)
is known at compiler time, then the compiler can strength reduce the
multiply to shifts and adds. This CL provide a large performance
improvement for the system.

Change the two most common append calls to use this technique.

Bug: b/249254511
Bug: chromium:1369069
Change-Id: I9bc7bbdb007a31357581426336b96f7cfc4eaa1b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/587896
Reviewed-by: John Stiles <johnstiles@google.com>
Commit-Queue: Herb Derby <herb@google.com>
diff --git a/include/private/SkTDArray.h b/include/private/SkTDArray.h
index aaadffa..7a1b5db 100644
--- a/include/private/SkTDArray.h
+++ b/include/private/SkTDArray.h
@@ -59,19 +59,14 @@
 
     // Insertion routines
     void* prepend();
-    void* append();
+
+    void append();
+    void append(int count);
     void* append(const void* src, int count);
+
     void* insert(int index);
     void* insert(int index, int count, const void* src);
 
-    // Stack routines
-    void* push_back() {
-        if (fCount < fReserve) {
-            return this->address(fCount++);
-        } else {
-            return this->append();
-        }
-    }
     void pop_back() {
         SkASSERT(fCount > 0);
         fCount--;
@@ -201,9 +196,14 @@
     }
 
     T* append() {
-        return static_cast<T*>(fStorage.append());
+        fStorage.append();
+        return this->end() - 1;
     }
-    T* append(int count, const T* src = nullptr) {
+    T* append(int count) {
+        fStorage.append(count);
+        return this->end() - count;
+    }
+    T* append(int count, const T* src) {
         return static_cast<T*>(fStorage.append(src, count));
     }
 
@@ -235,7 +235,10 @@
     }
 
     // routines to treat the array like a stack
-    void push_back(const T& v) { *static_cast<T*>(fStorage.push_back()) = v; }
+    void push_back(const T& v) {
+        this->append();
+        this->back() = v;
+    }
     void pop_back() { fStorage.pop_back(); }
 
     void deleteAll() {
diff --git a/src/core/SkTDArray.cpp b/src/core/SkTDArray.cpp
index 856fe85..b78f649 100644
--- a/src/core/SkTDArray.cpp
+++ b/src/core/SkTDArray.cpp
@@ -148,8 +148,22 @@
     return this->insert(/*index=*/0);
 }
 
-void* SkTDStorage::append() {
-    return this->insert(fCount);
+void SkTDStorage::append() {
+    if (fCount < fReserve) {
+        fCount++;
+    } else {
+        this->insert(fCount);
+    }
+}
+
+void SkTDStorage::append(int count) {
+    SkASSERT(count >= 0);
+    // Read as: if (fCount + count <= fReserve) {...}. This is a UB safe way to avoid the add.
+    if (fReserve - fCount >= count) {
+        fCount += count;
+    } else {
+        this->insert(fCount, count, nullptr);
+    }
 }
 
 void* SkTDStorage::append(const void* src, int count) {