Revert "Redesign program key construction"

This reverts commit bbbf1a7f50a303bd76163793bd5968c72f5f4432.

Reason for revert: D3D Failures

Original change's description:
> Redesign program key construction
>
> This does two things:
> 1) Moves responsibility for bit-packing portions of the key into the key
>    itself. A new GrKeyBuilder type manages adding bits, with asserts to
>    ensure a value always fits in the requested number. In theory this
>    will let us generate smaller keys overall, at the expense of slightly
>    more complex code during construction.
> 2) Adds a string label parameter for key methods that fold in data. For
>    new methods, the label is required. To ease migration, the old add32
>    does not require a label (yet). This will let us generate detailed,
>    human readable keys, either based on SK_DEBUG, or a runtime option
>    (if we're comfortable paying the cost).
>
> Bug: skia:11372
> Change-Id: Ib0f941551e0dbadabbd2a7de912b00e9e766b166
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/377876
> Commit-Queue: Brian Osman <brianosman@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>

TBR=bsalomon@google.com,robertphillips@google.com,brianosman@google.com

Change-Id: I7bfb20905c87083e84a1ea21bc53d63e882e2c68
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:11372
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/378777
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/dm/DMSrcSink.cpp b/dm/DMSrcSink.cpp
index e1c6ee0..21fd738 100644
--- a/dm/DMSrcSink.cpp
+++ b/dm/DMSrcSink.cpp
@@ -1685,7 +1685,6 @@
     auto precompileShaders = [&memoryCache](GrDirectContext* dContext) {
         memoryCache.foreach([dContext](sk_sp<const SkData> key,
                                        sk_sp<SkData> data,
-                                       const SkString& /*description*/,
                                        int /*count*/) {
             SkAssertResult(dContext->precompileShader(*key, *data));
         });
diff --git a/include/gpu/GrContextOptions.h b/include/gpu/GrContextOptions.h
index 7e9e724..c7b9aab 100644
--- a/include/gpu/GrContextOptions.h
+++ b/include/gpu/GrContextOptions.h
@@ -9,7 +9,6 @@
 #define GrContextOptions_DEFINED
 
 #include "include/core/SkData.h"
-#include "include/core/SkString.h"
 #include "include/core/SkTypes.h"
 #include "include/gpu/GrDriverBugWorkarounds.h"
 #include "include/gpu/GrTypes.h"
@@ -52,17 +51,7 @@
          */
         virtual sk_sp<SkData> load(const SkData& key) = 0;
 
-        // Placeholder until all clients override the 3-parameter store(), then remove this, and
-        // make that version pure virtual.
-        virtual void store(const SkData& /*key*/, const SkData& /*data*/) { SkASSERT(false); }
-
-        /**
-         * Stores data in the cache, indexed by key. description provides a human-readable
-         * version of the key.
-         */
-        virtual void store(const SkData& key, const SkData& data, const SkString& /*description*/) {
-            this->store(key, data);
-        }
+        virtual void store(const SkData& key, const SkData& data) = 0;
 
     protected:
         PersistentCache() = default;
diff --git a/src/gpu/GrPrimitiveProcessor.cpp b/src/gpu/GrPrimitiveProcessor.cpp
index 5730d82..9299f71 100644
--- a/src/gpu/GrPrimitiveProcessor.cpp
+++ b/src/gpu/GrPrimitiveProcessor.cpp
@@ -32,7 +32,7 @@
     return this->onTextureSampler(i);
 }
 
-uint32_t GrPrimitiveProcessor::ComputeCoordTransformsKey(const GrFragmentProcessor& fp) {
+uint32_t GrPrimitiveProcessor::computeCoordTransformsKey(const GrFragmentProcessor& fp) const {
     // This is highly coupled with the code in GrGLSLGeometryProcessor::collectTransforms().
 
     uint32_t key = 0;
diff --git a/src/gpu/GrPrimitiveProcessor.h b/src/gpu/GrPrimitiveProcessor.h
index 729e518..104c508 100644
--- a/src/gpu/GrPrimitiveProcessor.h
+++ b/src/gpu/GrPrimitiveProcessor.h
@@ -181,7 +181,7 @@
      * Computes a key for the transforms owned by an FP based on the shader code that will be
      * emitted by the primitive processor to implement them.
      */
-    static uint32_t ComputeCoordTransformsKey(const GrFragmentProcessor& fp);
+    uint32_t computeCoordTransformsKey(const GrFragmentProcessor& fp) const;
 
     /**
      * Sets a unique key on the GrProcessorKeyBuilder that is directly associated with this geometry
diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h
index 97cf9a1..2ec0646 100644
--- a/src/gpu/GrProcessor.h
+++ b/src/gpu/GrProcessor.h
@@ -13,7 +13,6 @@
 #include "src/gpu/GrColor.h"
 #include "src/gpu/GrGpuBuffer.h"
 #include "src/gpu/GrProcessorUnitTest.h"
-#include "src/gpu/GrProgramDesc.h"
 #include "src/gpu/GrSamplerState.h"
 #include "src/gpu/GrShaderVar.h"
 #include "src/gpu/GrSurfaceProxyPriv.h"
@@ -27,33 +26,28 @@
  */
 class GrProcessorKeyBuilder {
 public:
-    GrProcessorKeyBuilder(GrKeyBuilder* key) : fKey(key) {
-        SkASSERT(0 == fKey->size() % sizeof(uint32_t));
+    GrProcessorKeyBuilder(SkTArray<unsigned char, true>* data) : fData(data), fCount(0) {
+        SkASSERT(0 == fData->count() % sizeof(uint32_t));
     }
 
-    ~GrProcessorKeyBuilder() { fKey->flush(); }
-
-    void addBits(uint32_t numBits, uint32_t val, const char* label) {
-        fKey->addBits(numBits, val, label);
+    void add32(uint32_t v) {
+        ++fCount;
+        fData->push_back_n(4, reinterpret_cast<uint8_t*>(&v));
     }
 
-    void addBytes(uint32_t numBytes, const void* data, const char* label) {
-        fKey->addBytes(numBytes, data, label);
+    /** Inserts count uint32_ts into the key. The returned pointer is only valid until the next
+        add*() call. */
+    uint32_t* SK_WARN_UNUSED_RESULT add32n(int count) {
+        SkASSERT(count > 0);
+        fCount += count;
+        return reinterpret_cast<uint32_t*>(fData->push_back_n(4 * count));
     }
 
-    void add32(uint32_t v, const char* label = "unknown") {
-        this->addBits(32, v, label);
-    }
-
-    template <typename StringFunc>
-    void addString(StringFunc&& sf) {
-        fKey->addString(std::move(sf));
-    }
-
-    size_t sizeInBits() const { return fKey->sizeInBits(); }
+    size_t size() const { return sizeof(uint32_t) * fCount; }
 
 private:
-    GrKeyBuilder* fKey;    // unowned ptr to the larger key.
+    SkTArray<uint8_t, true>* fData; // unowned ptr to the larger key.
+    int fCount;                     // number of uint32_ts added to fData by the processor.
 };
 
 /** Provides custom shader code to the Ganesh shading pipeline. GrProcessor objects *must* be
diff --git a/src/gpu/GrProgramDesc.cpp b/src/gpu/GrProgramDesc.cpp
index 9dbbadb..e11d06a 100644
--- a/src/gpu/GrProgramDesc.cpp
+++ b/src/gpu/GrProgramDesc.cpp
@@ -70,15 +70,6 @@
     }
 }
 
-// Currently we allow 8 bits for the class id and 24 bits for the overall processor key size
-// (as measured in bits, so the byte count of the processor key must be < 2^21).
-static constexpr uint32_t kClassIDBits = 8;
-static constexpr uint32_t kKeySizeBits = 24;
-
-static bool processor_meta_data_fits(uint32_t classID, size_t keySize) {
-    return (classID < (1u << kClassIDBits)) && (keySize < (1u << kKeySizeBits));
-}
-
 /**
  * A function which emits a meta key into the key builder.  This is required because shader code may
  * be dependent on properties of the effect that the effect itself doesn't use
@@ -92,10 +83,12 @@
                             const GrCaps& caps,
                             uint32_t transformKey,
                             GrProcessorKeyBuilder* b) {
-    size_t processorKeySize = b->sizeInBits();
+    size_t processorKeySize = b->size();
     uint32_t classID = fp.classID();
 
-    if (!processor_meta_data_fits(classID, processorKeySize)) {
+    // Currently we allow 16 bits for the class id and the overall processor key size.
+    static const uint32_t kMetaKeyInvalidMask = ~((uint32_t)UINT16_MAX);
+    if ((processorKeySize | classID) & kMetaKeyInvalidMask) {
         return false;
     }
 
@@ -106,48 +99,54 @@
         caps.addExtraSamplerKey(b, te.samplerState(), backendFormat);
     });
 
-    b->addBits(kClassIDBits, classID,          "fpClassID");
-    b->addBits(kKeySizeBits, processorKeySize, "fpKeySize");
-    b->add32(transformKey,                     "fpTransforms");
+    uint32_t* key = b->add32n(2);
+    key[0] = (classID << 16) | SkToU32(processorKeySize);
+    key[1] = transformKey;
     return true;
 }
 
 static bool gen_pp_meta_key(const GrPrimitiveProcessor& pp,
                             const GrCaps& caps,
+                            uint32_t transformKey,
                             GrProcessorKeyBuilder* b) {
-    size_t processorKeySize = b->sizeInBits();
+    size_t processorKeySize = b->size();
     uint32_t classID = pp.classID();
 
-    if (!processor_meta_data_fits(classID, processorKeySize)) {
+    // Currently we allow 16 bits for the class id and the overall processor key size.
+    static const uint32_t kMetaKeyInvalidMask = ~((uint32_t)UINT16_MAX);
+    if ((processorKeySize | classID) & kMetaKeyInvalidMask) {
         return false;
     }
 
     add_pp_sampler_keys(b, pp, caps);
 
-    b->addBits(kClassIDBits, classID,          "ppClassID");
-    b->addBits(kKeySizeBits, processorKeySize, "ppKeySize");
+    uint32_t* key = b->add32n(2);
+    key[0] = (classID << 16) | SkToU32(processorKeySize);
+    key[1] = transformKey;
     return true;
 }
 
 static bool gen_xp_meta_key(const GrXferProcessor& xp, GrProcessorKeyBuilder* b) {
-    size_t processorKeySize = b->sizeInBits();
+    size_t processorKeySize = b->size();
     uint32_t classID = xp.classID();
 
-    if (!processor_meta_data_fits(classID, processorKeySize)) {
+    // Currently we allow 16 bits for the class id and the overall processor key size.
+    static const uint32_t kMetaKeyInvalidMask = ~((uint32_t)UINT16_MAX);
+    if ((processorKeySize | classID) & kMetaKeyInvalidMask) {
         return false;
     }
 
-    b->addBits(kClassIDBits, classID,          "xpClassID");
-    b->addBits(kKeySizeBits, processorKeySize, "xpKeySize");
+    b->add32((classID << 16) | SkToU32(processorKeySize));
     return true;
 }
 
-static bool gen_frag_proc_and_meta_keys(const GrFragmentProcessor& fp,
+static bool gen_frag_proc_and_meta_keys(const GrPrimitiveProcessor& primProc,
+                                        const GrFragmentProcessor& fp,
                                         const GrCaps& caps,
                                         GrProcessorKeyBuilder* b) {
     for (int i = 0; i < fp.numChildProcessors(); ++i) {
         if (auto child = fp.childProcessor(i)) {
-            if (!gen_frag_proc_and_meta_keys(*child, caps, b)) {
+            if (!gen_frag_proc_and_meta_keys(primProc, *child, caps, b)) {
                 return false;
             }
         } else {
@@ -156,10 +155,9 @@
         }
     }
 
-    b->addString([&](){ return fp.name(); });
     fp.getGLSLProcessorKey(*caps.shaderCaps(), b);
 
-    return gen_fp_meta_key(fp, caps, GrPrimitiveProcessor::ComputeCoordTransformsKey(fp), b);
+    return gen_fp_meta_key(fp, caps, primProc.computeCoordTransformsKey(fp), b);
 }
 
 bool GrProgramDesc::Build(GrProgramDesc* desc,
@@ -181,10 +179,9 @@
     GrProcessorKeyBuilder b(&desc->key());
 
     const GrPrimitiveProcessor& primitiveProcessor = programInfo.primProc();
-    b.addString([&](){ return primitiveProcessor.name(); });
     primitiveProcessor.getGLSLProcessorKey(*caps.shaderCaps(), &b);
     primitiveProcessor.getAttributeKey(&b);
-    if (!gen_pp_meta_key(primitiveProcessor, caps, &b)) {
+    if (!gen_pp_meta_key(primitiveProcessor, caps, 0, &b)) {
         desc->key().reset();
         return false;
     }
@@ -193,7 +190,7 @@
     int numColorFPs = 0, numCoverageFPs = 0;
     for (int i = 0; i < pipeline.numFragmentProcessors(); ++i) {
         const GrFragmentProcessor& fp = pipeline.getFragmentProcessor(i);
-        if (!gen_frag_proc_and_meta_keys(fp, caps, &b)) {
+        if (!gen_frag_proc_and_meta_keys(primitiveProcessor, fp, caps, &b)) {
             desc->key().reset();
             return false;
         }
@@ -211,7 +208,6 @@
         origin = pipeline.dstProxyView().origin();
         originIfDstTexture = &origin;
     }
-    b.addString([&](){ return xp.name(); });
     xp.getGLSLProcessorKey(*caps.shaderCaps(), &b, originIfDstTexture, pipeline.dstSampleType());
     if (!gen_xp_meta_key(xp, &b)) {
         desc->key().reset();
@@ -224,22 +220,27 @@
     }
 
     // Add "header" metadata
-    b.addBits(16, pipeline.writeSwizzle().asKey(), "writeSwizzle");
-    b.addBits( 1, numColorFPs,    "numColorFPs");
-    b.addBits( 2, numCoverageFPs, "numCoverageFPs");
+    uint32_t header = 0;
+    SkDEBUGCODE(uint32_t header_bits = 0);
+    auto add_bits = [&](uint32_t nbits, uint32_t val) {
+        SkASSERT(val < (1u << nbits));
+        SkASSERT((header_bits += nbits) <= 32);
+        header = (header << nbits) | val;
+    };
+    add_bits(16, pipeline.writeSwizzle().asKey());
+    add_bits( 1, numColorFPs);
+    add_bits( 2, numCoverageFPs);
     // If we knew the shader won't depend on origin, we could skip this (and use the same program
     // for both origins). Instrumenting all fragment processors would be difficult and error prone.
-    b.addBits( 2, GrGLSLFragmentShaderBuilder::KeyForSurfaceOrigin(programInfo.origin()), "origin");
-    b.addBits( 1, static_cast<uint32_t>(programInfo.requestedFeatures()), "requestedFeatures");
-    b.addBits( 1, pipeline.snapVerticesToPixelCenters(), "snapVertices");
+    add_bits( 2, GrGLSLFragmentShaderBuilder::KeyForSurfaceOrigin(programInfo.origin()));
+    add_bits( 1, static_cast<uint32_t>(programInfo.requestedFeatures()));
+    add_bits( 1, pipeline.snapVerticesToPixelCenters());
     // The base descriptor only stores whether or not the primitiveType is kPoints. Backend-
     // specific versions (e.g., Vulkan) require more detail
-    b.addBits( 1, (programInfo.primitiveType() == GrPrimitiveType::kPoints), "isPoints");
+    add_bits( 1, (programInfo.primitiveType() == GrPrimitiveType::kPoints));
 
-    // This keyLength includes any partial uint32_t that's been written (rounded up).
-    // The GrProcessorKeyBuilder destructor will call flush() when we exit this function, putting
-    // a clean break between the "common" data written by this function, and any backend-specific
-    // data appended later.
+    b.add32(header);
+
     desc->fInitialKeyLength = desc->keyLength();
 
     return true;
diff --git a/src/gpu/GrProgramDesc.h b/src/gpu/GrProgramDesc.h
index c691e79..7eaf880 100644
--- a/src/gpu/GrProgramDesc.h
+++ b/src/gpu/GrProgramDesc.h
@@ -8,126 +8,15 @@
 #ifndef GrProgramDesc_DEFINED
 #define GrProgramDesc_DEFINED
 
-#include "include/core/SkString.h"
 #include "include/private/GrTypesPriv.h"
 #include "include/private/SkTArray.h"
 #include "include/private/SkTo.h"
 
-#include <limits.h>
-
 class GrCaps;
 class GrProgramInfo;
 class GrRenderTarget;
 class GrShaderCaps;
 
-class GrKeyBuilder {
-public:
-    GrKeyBuilder() = default;
-    GrKeyBuilder(const GrKeyBuilder& other) = default;
-
-    void reset() { *this = GrKeyBuilder{}; }
-
-    void addBits(uint32_t numBits, uint32_t val, const char* label) {
-        SkASSERT(numBits > 0 && numBits <= 32);
-        SkASSERT(numBits == 32 || (val < (1u << numBits)));
-
-        SkDEBUGCODE(fDescription.appendf("%s: %u\n", label, val);)
-
-        fCurValue |= (val << fBitsUsed);
-        fBitsUsed += numBits;
-
-        if (fBitsUsed >= 32) {
-            // Overflow, start a new working value
-            fData.push_back(fCurValue);
-            uint32_t excess = fBitsUsed - 32;
-            fCurValue = excess ? (val >> (numBits - excess)) : 0;
-            fBitsUsed = excess;
-        }
-
-        SkASSERT(fCurValue < (1u << fBitsUsed));
-    }
-
-    void addBytes(uint32_t numBytes, const void* data, const char* label) {
-        // TODO: Make this smarter/faster?
-        const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
-        for (; numBytes --> 0; bytes++) {
-            this->addBits(8, *bytes, label);
-        }
-    }
-
-    template <typename StringFunc>
-    void addString(StringFunc&& sf) {
-        #ifdef SK_DEBUG
-            fDescription.append(sf());
-            fDescription.append("\n");
-        #endif
-    }
-
-    void flush() {
-        if (fBitsUsed) {
-            fData.push_back(fCurValue);
-            fCurValue = 0;
-            fBitsUsed = 0;
-        }
-    }
-
-    bool empty() const { return fData.empty() && !fBitsUsed; }
-
-    const uint32_t* data() const {
-        SkASSERT(fBitsUsed == 0);  // flush() must be called when construction is complete
-        return fData.begin();
-    }
-
-    size_t size() const {
-        return (fData.count() + (fBitsUsed ? 1 : 0)) * sizeof(uint32_t);
-    }
-
-    size_t sizeInBits() const {
-        return (fData.count() * sizeof(uint32_t) * CHAR_BIT) + fBitsUsed;
-    }
-
-    GrKeyBuilder& operator=(const GrKeyBuilder& other) = default;
-
-    bool operator==(const GrKeyBuilder& that) const {
-        return fBitsUsed == that.fBitsUsed &&
-               fCurValue == that.fCurValue &&
-               fData == that.fData;
-    }
-
-    bool operator!= (const GrKeyBuilder& other) const {
-        return !(*this == other);
-    }
-
-    void setData(const void* data, size_t length) {
-        SkASSERT(SkIsAlign4(length));
-        fData.reset(length / 4);
-        memcpy(fData.begin(), data, length);
-    }
-
-    SkString description() const {
-        #ifdef SK_DEBUG
-            return fDescription;
-        #else
-            return SkString{};
-        #endif
-    }
-
-private:
-    enum {
-        kHeaderSize            = 1,    // "header" in ::Build
-        kMaxPreallocProcessors = 8,
-        kIntsPerProcessor      = 4,    // This is an overestimate of the average effect key size.
-        kPreAllocSize = kHeaderSize +
-                        kMaxPreallocProcessors * kIntsPerProcessor,
-    };
-
-    SkSTArray<kPreAllocSize, uint32_t, true> fData;
-    uint32_t fCurValue = 0;
-    uint32_t fBitsUsed = 0;  // ... in current value
-
-    SkDEBUGCODE(SkString fDescription;)
-};
-
 /** This class is used to generate a generic program cache key. The Dawn, Metal and Vulkan
  *  backends derive backend-specific versions which add additional information.
  */
@@ -139,21 +28,38 @@
 
     // Returns this as a uint32_t array to be used as a key in the program cache.
     const uint32_t* asKey() const {
-        return fKey.data();
+        return reinterpret_cast<const uint32_t*>(fKey.begin());
     }
 
     // Gets the number of bytes in asKey(). It will be a 4-byte aligned value.
     uint32_t keyLength() const {
-        SkASSERT(0 == (fKey.size() % 4));
-        return fKey.size();
+        SkASSERT(0 == (fKey.count() % 4));
+        return fKey.count();
     }
 
-    SkString description() const { return fKey.description(); }
-
-    GrProgramDesc& operator= (const GrProgramDesc& other) = default;
+    GrProgramDesc& operator= (const GrProgramDesc& other) {
+        uint32_t keyLength = other.keyLength();
+        fKey.reset(SkToInt(keyLength));
+        memcpy(fKey.begin(), other.fKey.begin(), keyLength);
+        fInitialKeyLength = other.fInitialKeyLength;
+        return *this;
+    }
 
     bool operator== (const GrProgramDesc& that) const {
-        return this->fKey == that.fKey;
+        if (this->keyLength() != that.keyLength()) {
+            return false;
+        }
+
+        SkASSERT(SkIsAlign4(this->keyLength()));
+        int l = this->keyLength() >> 2;
+        const uint32_t* aKey = this->asKey();
+        const uint32_t* bKey = that.asKey();
+        for (int i = 0; i < l; ++i) {
+            if (aKey[i] != bKey[i]) {
+                return false;
+            }
+        }
+        return true;
     }
 
     bool operator!= (const GrProgramDesc& other) const {
@@ -190,14 +96,23 @@
         if (!SkTFitsIn<int>(keyLength)) {
             return false;
         }
-        desc->fKey.setData(keyData, keyLength);
+        desc->fKey.reset(SkToInt(keyLength));
+        memcpy(desc->fKey.begin(), keyData, keyLength);
         return true;
     }
 
-    GrKeyBuilder& key() { return fKey; }
+    enum {
+        kHeaderSize            = 4,    // "header" in ::Build
+        kMaxPreallocProcessors = 8,
+        kIntsPerProcessor      = 4,    // This is an overestimate of the average effect key size.
+        kPreAllocSize = kHeaderSize +
+                        kMaxPreallocProcessors * sizeof(uint32_t) * kIntsPerProcessor,
+    };
+
+    SkSTArray<kPreAllocSize, uint8_t, true>& key() { return fKey; }
 
 private:
-    GrKeyBuilder fKey;
+    SkSTArray<kPreAllocSize, uint8_t, true> fKey;
     uint32_t fInitialKeyLength = 0;
 };
 
diff --git a/src/gpu/GrStencilSettings.cpp b/src/gpu/GrStencilSettings.cpp
index 7313858..f57b7a4 100644
--- a/src/gpu/GrStencilSettings.cpp
+++ b/src/gpu/GrStencilSettings.cpp
@@ -244,30 +244,39 @@
 }
 
 void GrStencilSettings::genKey(GrProcessorKeyBuilder* b, bool includeRefs) const {
-    b->addBits(6, fFlags, "stencilFlags");
+    b->add32(fFlags);
     if (this->isDisabled()) {
         return;
     }
     if (!this->isTwoSided()) {
+        constexpr int kCount16 = sizeof(Face) / sizeof(uint16_t);
+        static_assert(0 == sizeof(Face) % sizeof(uint16_t));
+        uint16_t* key = reinterpret_cast<uint16_t*>(b->add32n((kCount16 + 1) / 2));
         if (includeRefs) {
-            b->addBytes(sizeof(Face), &fCWFace, "stencilCWFace");
+            memcpy(key, &fCWFace, sizeof(Face));
         } else {
             Face tempFace = fCWFace;
             tempFace.fRef = 0;
-            b->addBytes(sizeof(Face), &tempFace, "stencilCWFace");
+            memcpy(key, &tempFace, sizeof(Face));
         }
+        key[kCount16] = 0;
+        static_assert(1 == kCount16 % 2);
     } else {
+        constexpr int kCount32 = (2 * sizeof(Face)) / sizeof(uint32_t);
+        static_assert(0 == (2 * sizeof(Face)) % sizeof(uint32_t));
+        uint32_t* key = b->add32n(kCount32);
         if (includeRefs) {
-            b->addBytes(sizeof(Face), &fCWFace, "stencilCWFace");
-            b->addBytes(sizeof(Face), &fCCWFace, "stencilCCWFace");
+            memcpy(key, &fCWFace, 2 * sizeof(Face));
+            static_assert(
+                    sizeof(Face) ==
+                    offsetof(GrStencilSettings, fCCWFace) - offsetof(GrStencilSettings, fCWFace));
         } else {
             Face tempFaces[2];
             tempFaces[0] = fCWFace;
             tempFaces[0].fRef = 0;
             tempFaces[1] = fCCWFace;
             tempFaces[1].fRef = 0;
-            b->addBytes(sizeof(Face), &tempFaces[0], "stencilCWFace");
-            b->addBytes(sizeof(Face), &tempFaces[1], "stencilCCWFace");
+            memcpy(key, &tempFaces, 2 * sizeof(Face));
         }
     }
     // We rely on GrStencilSettings::Face being tightly packed for the key to be reliable.
diff --git a/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp b/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
index acecfcf..0361673 100644
--- a/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
+++ b/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
@@ -630,10 +630,9 @@
             }
             sk_sp<SkData> key =
                     SkData::MakeWithoutCopy(this->desc().asKey(), this->desc().initialKeyLength());
-            const SkString& description = this->desc().description();
             sk_sp<SkData> data = GrPersistentCacheUtils::PackCachedShaders(
                     cacheSkSL ? kSKSL_Tag : kHLSL_Tag, hlsl, inputs, kGrShaderTypeCount);
-            persistentCache->store(*key, *data, description);
+            persistentCache->store(*key, *data);
         }
     }
 
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.cpp b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
index 63d4c35..cb14885 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
@@ -166,7 +166,6 @@
         return;
     }
     sk_sp<SkData> key = SkData::MakeWithoutCopy(this->desc().asKey(), this->desc().keyLength());
-    const SkString& description = this->desc().description();
     if (fGpu->glCaps().programBinarySupport()) {
         // binary cache
         GrGLsizei length = 0;
@@ -187,7 +186,7 @@
             writer.writePad32(binary.get(), length);
 
             auto data = writer.snapshotAsData();
-            this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data, description);
+            this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data);
         }
     } else {
         // source cache, plus metadata to allow for a complete precompile
@@ -204,7 +203,7 @@
 
         auto data = GrPersistentCacheUtils::PackCachedShaders(isSkSL ? kSKSL_Tag : kGLSL_Tag,
                                                               shaders, &inputs, 1, &meta);
-        this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data, description);
+        this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data);
     }
 }
 
diff --git a/src/gpu/mtl/GrMtlPipelineStateBuilder.mm b/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
index 0a43cbf..4de002a 100644
--- a/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
+++ b/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
@@ -79,7 +79,6 @@
     // program, and that only depends on the base GrProgramDesc data.
     sk_sp<SkData> key = SkData::MakeWithoutCopy(this->desc().asKey(),
                                                 this->desc().initialKeyLength());
-    const SkString& description = this->desc().description();
     sk_sp<SkData> data;
     if (isSkSL) {
         // source cache, plus metadata to allow for a complete precompile
@@ -91,7 +90,7 @@
         data = GrPersistentCacheUtils::PackCachedShaders(kMSL_Tag, shaders, inputs,
                                                          kGrShaderTypeCount);
     }
-    fGpu->getContext()->priv().getPersistentCache()->store(*key, *data, description);
+    fGpu->getContext()->priv().getPersistentCache()->store(*key, *data);
 }
 
 id<MTLLibrary> GrMtlPipelineStateBuilder::compileMtlShaderLibrary(
diff --git a/src/gpu/vk/GrVkCaps.cpp b/src/gpu/vk/GrVkCaps.cpp
index c3c7a23..d07ae65 100644
--- a/src/gpu/vk/GrVkCaps.cpp
+++ b/src/gpu/vk/GrVkCaps.cpp
@@ -1754,13 +1754,12 @@
 
     GrVkSampler::Key key = GrVkSampler::GenerateKey(samplerState, *ycbcrInfo);
 
-    constexpr size_t numInts = (sizeof(key) + 3) / 4;
-    uint32_t tmp[numInts];
-    memcpy(tmp, &key, sizeof(key));
+    size_t numInts = (sizeof(key) + 3) / 4;
 
-    for (size_t i = 0; i < numInts; ++i) {
-        b->add32(tmp[i]);
-    }
+    uint32_t* tmp = b->add32n(numInts);
+
+    tmp[numInts - 1] = 0;
+    memcpy(tmp, &key, sizeof(key));
 }
 
 /**
diff --git a/src/gpu/vk/GrVkPipelineStateBuilder.cpp b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
index a12a77e..52d0f34 100644
--- a/src/gpu/vk/GrVkPipelineStateBuilder.cpp
+++ b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
@@ -160,13 +160,11 @@
     // to the key right after the base key.
     sk_sp<SkData> key = SkData::MakeWithoutCopy(this->desc().asKey(),
                                                 this->desc().initialKeyLength()+4);
-    const SkString& description = this->desc().description();
 
     sk_sp<SkData> data = GrPersistentCacheUtils::PackCachedShaders(isSkSL ? kSKSL_Tag : kSPIRV_Tag,
                                                                    shaders,
                                                                    inputs, kGrShaderTypeCount);
-
-    this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data, description);
+    this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data);
 }
 
 GrVkPipelineState* GrVkPipelineStateBuilder::finalize(const GrProgramDesc& desc,
diff --git a/src/gpu/vk/GrVkResourceProvider.cpp b/src/gpu/vk/GrVkResourceProvider.cpp
index e3150c1..bcf2768 100644
--- a/src/gpu/vk/GrVkResourceProvider.cpp
+++ b/src/gpu/vk/GrVkResourceProvider.cpp
@@ -576,7 +576,7 @@
     sk_sp<SkData> keyData = SkData::MakeWithoutCopy(&key, sizeof(uint32_t));
 
     fGpu->getContext()->priv().getPersistentCache()->store(
-            *keyData, *SkData::MakeWithoutCopy(data.get(), dataSize), SkString("VkPipelineCache"));
+            *keyData, *SkData::MakeWithoutCopy(data.get(), dataSize));
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/tools/gpu/MemoryCache.cpp b/tools/gpu/MemoryCache.cpp
index 65f08b7..93ef2b1 100644
--- a/tools/gpu/MemoryCache.cpp
+++ b/tools/gpu/MemoryCache.cpp
@@ -52,13 +52,13 @@
     return result->second.fData;
 }
 
-void MemoryCache::store(const SkData& key, const SkData& data, const SkString& description) {
+void MemoryCache::store(const SkData& key, const SkData& data) {
     if (LOG_MEMORY_CACHE) {
         SkDebugf("Store Key: %s\n\tData: %s\n\n", data_to_str(key).c_str(),
                  data_to_str(data).c_str());
     }
     ++fCacheStoreCnt;
-    fMap[Key(key)] = Value(data, description);
+    fMap[Key(key)] = Value(data);
 }
 
 void MemoryCache::writeShadersToDisk(const char* path, GrBackendApi api) {
diff --git a/tools/gpu/MemoryCache.h b/tools/gpu/MemoryCache.h
index cf7d284..f70cf13 100644
--- a/tools/gpu/MemoryCache.h
+++ b/tools/gpu/MemoryCache.h
@@ -33,7 +33,7 @@
     }
 
     sk_sp<SkData> load(const SkData& key) override;
-    void store(const SkData& key, const SkData& data, const SkString& description) override;
+    void store(const SkData& key, const SkData& data) override;
     int numCacheMisses() const { return fCacheMissCnt; }
     int numCacheStores() const { return fCacheStoreCnt; }
     void resetCacheStats() {
@@ -46,7 +46,7 @@
     template <typename Fn>
     void foreach(Fn&& fn) {
         for (auto it = fMap.begin(); it != fMap.end(); ++it) {
-            fn(it->first.fKey, it->second.fData, it->second.fDescription, it->second.fHitCount);
+            fn(it->first.fKey, it->second.fData, it->second.fHitCount);
         }
     }
 
@@ -65,16 +65,14 @@
 
     struct Value {
         Value() = default;
-        Value(const SkData& data, const SkString& description)
+        Value(const SkData& data)
             : fData(SkData::MakeWithCopy(data.data(), data.size()))
-            , fDescription(description)
             , fHitCount(1) {}
         Value(const Value& that) = default;
         Value& operator=(const Value&) = default;
 
         sk_sp<SkData> fData;
-        SkString      fDescription;
-        int           fHitCount;
+        int fHitCount;
     };
 
     struct Hash {
diff --git a/tools/viewer/Viewer.cpp b/tools/viewer/Viewer.cpp
index e3d09e1..18af842 100644
--- a/tools/viewer/Viewer.cpp
+++ b/tools/viewer/Viewer.cpp
@@ -2349,7 +2349,7 @@
                 static bool gLoadPending = false;
                 if (gLoadPending) {
                     auto collectShaders = [this](sk_sp<const SkData> key, sk_sp<SkData> data,
-                                                 const SkString& description, int hitCount) {
+                                                 int hitCount) {
                         CachedShader& entry(fCachedShaders.push_back());
                         entry.fKey = key;
                         SkMD5 hash;
@@ -2358,7 +2358,6 @@
                         for (int i = 0; i < 16; ++i) {
                             entry.fKeyString.appendf("%02x", digest.data[i]);
                         }
-                        entry.fKeyDescription = description;
 
                         SkReadBuffer reader(data->data(), data->size());
                         entry.fShaderType = GrPersistentCacheUtils::GetType(&reader);
@@ -2415,10 +2414,6 @@
                             ImVec2 boxSize(-1.0f, ImGui::GetTextLineHeight() * std::min(lines, 30));
                             ImGui::InputTextMultiline(label, str, boxSize);
                         };
-                        if (ImGui::TreeNode("Key")) {
-                            ImGui::TextWrapped("%s", entry.fKeyDescription.c_str());
-                            ImGui::TreePop();
-                        }
                         stringBox("##VP", &entry.fShader[kVertex_GrShaderType]);
                         stringBox("##FP", &entry.fShader[kFragment_GrShaderType]);
                         ImGui::TreePop();
@@ -2466,7 +2461,7 @@
                                                                               entry.fShader,
                                                                               entry.fInputs,
                                                                               kGrShaderTypeCount);
-                        fPersistentCache.store(*entry.fKey, *data, entry.fKeyDescription);
+                        fPersistentCache.store(*entry.fKey, *data);
 
                         entry.fShader[kFragment_GrShaderType] = backup;
                     }
diff --git a/tools/viewer/Viewer.h b/tools/viewer/Viewer.h
index bdb4703..b4ac70d 100644
--- a/tools/viewer/Viewer.h
+++ b/tools/viewer/Viewer.h
@@ -220,7 +220,6 @@
 
         sk_sp<const SkData> fKey;
         SkString            fKeyString;
-        SkString            fKeyDescription;
 
         SkFourByteTag         fShaderType;
         SkSL::String          fShader[kGrShaderTypeCount];