SkVarAlloc

Like SkChunkAlloc, but
  - does its allocation with better sympathy for malloc granularity;
  - the fast path inlines entirely;
  - smaller per-block overhead;
  - smaller per-SkVarAlloc overhead;
  - growth parameters are a little more tunable.

Its main downside is less flexibility; it supports fewer methods than SkChunkAlloc.

These current parameters bring the first allocation down from 4K to 1K,
without affecting recording time on my desktop.  skiaperf.com will tell the
whole story.

BUG=skia:

Review URL: https://codereview.chromium.org/674263002
diff --git a/gyp/core.gypi b/gyp/core.gypi
index 2239058..01838a3 100644
--- a/gyp/core.gypi
+++ b/gyp/core.gypi
@@ -209,6 +209,7 @@
         '<(skia_src_path)/core/SkUnPreMultiply.cpp',
         '<(skia_src_path)/core/SkUtils.cpp',
         '<(skia_src_path)/core/SkValidatingReadBuffer.cpp',
+        '<(skia_src_path)/core/SkVarAlloc.cpp',
         '<(skia_src_path)/core/SkVertState.cpp',
         '<(skia_src_path)/core/SkWriteBuffer.cpp',
         '<(skia_src_path)/core/SkWriter32.cpp',
diff --git a/src/core/SkRecord.h b/src/core/SkRecord.h
index 5362d91..8302db4 100644
--- a/src/core/SkRecord.h
+++ b/src/core/SkRecord.h
@@ -8,10 +8,10 @@
 #ifndef SkRecord_DEFINED
 #define SkRecord_DEFINED
 
-#include "SkChunkAlloc.h"
 #include "SkRecords.h"
 #include "SkTLogic.h"
 #include "SkTemplates.h"
+#include "SkVarAlloc.h"
 
 // SkRecord (REC-ord) represents a sequence of SkCanvas calls, saved for future use.
 // These future uses may include: replay, optimization, serialization, or combinations of those.
@@ -27,11 +27,10 @@
 
 class SkRecord : SkNoncopyable {
     enum {
-        kChunkBytes = 4096,
         kFirstReserveCount = 64 / sizeof(void*),
     };
 public:
-    SkRecord() : fAlloc(kChunkBytes), fCount(0), fReserved(0) {}
+    SkRecord() : fAlloc(1024, 2.0f), fCount(0), fReserved(0) {}
 
     ~SkRecord() {
         Destroyer destroyer;
@@ -69,7 +68,7 @@
     template <typename T>
     T* alloc(size_t count = 1) {
         // Bump up to the next pointer width if needed, so all allocations start pointer-aligned.
-        return (T*)fAlloc.allocThrow(SkAlignPtr(sizeof(T) * count));
+        return (T*)fAlloc.alloc(sizeof(T) * count, SK_MALLOC_THROW);
     }
 
     // Add a new command of type T to the end of this SkRecord.
@@ -226,7 +225,7 @@
     // fRecords and fTypes need to be data structures that can append fixed length data, and need to
     // support efficient random access and forward iteration.  (They don't need to be contiguous.)
 
-    SkChunkAlloc fAlloc;
+    SkVarAlloc fAlloc;
     SkAutoTMalloc<Record> fRecords;
     SkAutoTMalloc<Type8> fTypes;
     // fCount and fReserved measure both fRecords and fTypes, which always grow in lock step.
diff --git a/src/core/SkVarAlloc.cpp b/src/core/SkVarAlloc.cpp
new file mode 100644
index 0000000..5c3a41c
--- /dev/null
+++ b/src/core/SkVarAlloc.cpp
@@ -0,0 +1,56 @@
+#include "SkVarAlloc.h"
+#include "SkScalar.h"
+
+// We use non-standard malloc diagnostic methods to make sure our allocations are sized well.
+#if defined(SK_BUILD_FOR_MAC)
+    #include <malloc/malloc.h>
+#elif defined(SK_BUILD_FOR_LINUX)
+    #include <malloc.h>
+#endif
+
+struct SkVarAlloc::Block {
+    Block* prev;
+    char* data() { return (char*)(this + 1); }
+
+    static Block* Alloc(Block* prev, size_t size, unsigned flags) {
+        SkASSERT(size >= sizeof(Block));
+        Block* b = (Block*)sk_malloc_flags(size, flags);
+        b->prev = prev;
+        return b;
+    }
+};
+
+SkVarAlloc::SkVarAlloc(size_t smallest, float growth)
+    : fByte(NULL)
+    , fLimit(NULL)
+    , fSmallest(SkToUInt(smallest))
+    , fGrowth(growth)
+    , fBlock(NULL) {}
+
+SkVarAlloc::~SkVarAlloc() {
+    Block* b = fBlock;
+    while (b) {
+        Block* prev = b->prev;
+        sk_free(b);
+        b = prev;
+    }
+}
+
+void SkVarAlloc::makeSpace(size_t bytes, unsigned flags) {
+    SkASSERT(SkIsAlignPtr(bytes));
+
+    size_t alloc = fSmallest;
+    while (alloc < bytes + sizeof(Block)) {
+        alloc *= 2;
+    }
+    fBlock = Block::Alloc(fBlock, alloc, flags);
+    fByte = fBlock->data();
+    fLimit = fByte + alloc - sizeof(Block);
+    fSmallest = SkToUInt(SkScalarTruncToInt(fSmallest * fGrowth));
+
+#if defined(SK_BUILD_FOR_MAC)
+    SkASSERT(alloc == malloc_good_size(alloc));
+#elif defined(SK_BUILD_FOR_LINUX)
+    SkASSERT(alloc == malloc_usable_size(fByte - sizeof(Block)));
+#endif
+}
diff --git a/src/core/SkVarAlloc.h b/src/core/SkVarAlloc.h
new file mode 100644
index 0000000..0a7864b
--- /dev/null
+++ b/src/core/SkVarAlloc.h
@@ -0,0 +1,40 @@
+#ifndef SkVarAlloc_DEFINED
+#define SkVarAlloc_DEFINED
+
+#include "SkTypes.h"
+
+class SkVarAlloc : SkNoncopyable {
+public:
+    // SkVarAlloc will never allocate less than smallest bytes at a time.
+    // When it allocates a new block, it will be at least growth times bigger than the last.
+    SkVarAlloc(size_t smallest, float growth);
+    ~SkVarAlloc();
+
+    // Returns contiguous bytes aligned at least for pointers.  You may pass SK_MALLOC_THROW, etc.
+    char* alloc(size_t bytes, unsigned sk_malloc_flags) {
+        bytes = SkAlignPtr(bytes);
+
+        if (fByte + bytes > fLimit) {
+            this->makeSpace(bytes, sk_malloc_flags);
+        }
+        SkASSERT(fByte + bytes <= fLimit);
+
+        char* ptr = fByte;
+        fByte += bytes;
+        return ptr;
+    }
+
+private:
+    void makeSpace(size_t bytes, unsigned flags);
+
+    char* fByte;
+    const char* fLimit;
+
+    unsigned fSmallest;
+    const float fGrowth;
+
+    struct Block;
+    Block* fBlock;
+};
+
+#endif//SkVarAlloc_DEFINED