SkRuntimeEffect: Fix 'in' variables in CPU backend

We were never calling specialize() to bake in the values of ins,
so do that. Add uniformSize() to get the size of just the uniform
values. (The interpreter asserts that the size of the uniforms
being passed in matches the expected size from the ByteCode,
so these need to match up).

Added a unit test that uses both 'in' and 'uniform'.

Change-Id: I595822171211d35a17d5977fa790de0d1bbd6c78
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/263519
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/include/effects/SkArithmeticImageFilter.h b/include/effects/SkArithmeticImageFilter.h
index d7c8c8d..2945a13 100644
--- a/include/effects/SkArithmeticImageFilter.h
+++ b/include/effects/SkArithmeticImageFilter.h
@@ -15,8 +15,8 @@
         memset(this, 0, sizeof(*this));
     }
 
-    bool enforcePMColor;
     float k[4];
+    bool enforcePMColor;
 };
 
 // DEPRECATED: Use include/effects/SkImageFilters::Arithmetic
diff --git a/include/effects/SkRuntimeEffect.h b/include/effects/SkRuntimeEffect.h
index 5a39edc..1495e13 100644
--- a/include/effects/SkRuntimeEffect.h
+++ b/include/effects/SkRuntimeEffect.h
@@ -102,7 +102,13 @@
         const std::vector<T>& fVec;
     };
 
+    // Combined size of all 'in' and 'uniform' variables. When calling makeColorFilter or
+    // makeShader, provide an SkData of this size, containing values for all of those variables.
     size_t inputSize() const;
+
+    // Combined size of just the 'uniform' variables.
+    size_t uniformSize() const { return fUniformSize; }
+
     ConstIterable<Variable> inputs() const { return ConstIterable<Variable>(fInAndUniformVars); }
     ConstIterable<SkString> children() const { return ConstIterable<SkString>(fChildren); }
 
@@ -117,12 +123,16 @@
     // If successful, ByteCode != nullptr, otherwise, ErrorText contains the reason for failure.
     using ByteCodeResult = std::tuple<std::unique_ptr<SkSL::ByteCode>, SkString>;
 
-    ByteCodeResult toByteCode();
+    ByteCodeResult toByteCode(const void* inputs);
 
 private:
     SkRuntimeEffect(SkString sksl, std::unique_ptr<SkSL::Compiler> compiler,
                     std::unique_ptr<SkSL::Program> baseProgram,
-                    std::vector<Variable>&& inAndUniformVars, std::vector<SkString>&& children);
+                    std::vector<Variable>&& inAndUniformVars, std::vector<SkString>&& children,
+                    size_t uniformSize);
+
+    using SpecializeResult = std::tuple<std::unique_ptr<SkSL::Program>, SkString>;
+    SpecializeResult specialize(SkSL::Program& baseProgram, const void* inputs);
 
     int fIndex;
     SkString fSkSL;
@@ -131,6 +141,8 @@
     std::unique_ptr<SkSL::Program> fBaseProgram;
     std::vector<Variable> fInAndUniformVars;
     std::vector<SkString> fChildren;
+
+    size_t fUniformSize;
 };
 
 #endif
diff --git a/src/core/SkColorFilter.cpp b/src/core/SkColorFilter.cpp
index 320fede..26836d4 100644
--- a/src/core/SkColorFilter.cpp
+++ b/src/core/SkColorFilter.cpp
@@ -417,12 +417,12 @@
         auto ctx = rec.fAlloc->make<SkRasterPipeline_InterpreterCtx>();
         // don't need to set ctx->paintColor
         ctx->inputs = fInputs->data();
-        ctx->ninputs = fInputs->size() / 4;
+        ctx->ninputs = fEffect->uniformSize() / 4;
         ctx->shaderConvention = false;
 
         SkAutoMutexExclusive ama(fByteCodeMutex);
         if (!fByteCode) {
-            auto [byteCode, errorText] = fEffect->toByteCode();
+            auto [byteCode, errorText] = fEffect->toByteCode(fInputs->data());
             if (!byteCode) {
                 SkDebugf("%s\n", errorText.c_str());
                 return false;
diff --git a/src/core/SkRuntimeEffect.cpp b/src/core/SkRuntimeEffect.cpp
index 17b1ce9..a3ab9a4 100644
--- a/src/core/SkRuntimeEffect.cpp
+++ b/src/core/SkRuntimeEffect.cpp
@@ -32,13 +32,18 @@
     }
     SkASSERT(!compiler->errorCount());
 
-    size_t offset = 0;
+    size_t offset = 0, uniformSize = 0;
     std::vector<Variable> inAndUniformVars;
     std::vector<SkString> children;
     const SkSL::Context& ctx(compiler->context());
 
-    // Gather the inputs in two passes, to de-interleave them in our input layout
-    for (auto flag : { SkSL::Modifiers::kIn_Flag, SkSL::Modifiers::kUniform_Flag }) {
+    // Gather the inputs in two passes, to de-interleave them in our input layout.
+    // We put the uniforms *first*, so that the CPU backend can alias the combined input block as
+    // the uniform block when calling the interpreter.
+    for (auto flag : { SkSL::Modifiers::kUniform_Flag, SkSL::Modifiers::kIn_Flag }) {
+        if (flag == SkSL::Modifiers::kIn_Flag) {
+            uniformSize = offset;
+        }
         for (const auto& e : *program) {
             if (e.fKind == SkSL::ProgramElement::kVar_Kind) {
                 SkSL::VarDeclarations& v = (SkSL::VarDeclarations&) e;
@@ -164,7 +169,8 @@
                                                       std::move(compiler),
                                                       std::move(program),
                                                       std::move(inAndUniformVars),
-                                                      std::move(children)));
+                                                      std::move(children),
+                                                      uniformSize));
     return std::make_pair(std::move(effect), SkString());
 }
 
@@ -190,14 +196,18 @@
 SkRuntimeEffect::SkRuntimeEffect(SkString sksl, std::unique_ptr<SkSL::Compiler> compiler,
                                  std::unique_ptr<SkSL::Program> baseProgram,
                                  std::vector<Variable>&& inAndUniformVars,
-                                 std::vector<SkString>&& children)
+                                 std::vector<SkString>&& children,
+                                 size_t uniformSize)
         : fIndex(new_sksl_index())
         , fSkSL(std::move(sksl))
         , fCompiler(std::move(compiler))
         , fBaseProgram(std::move(baseProgram))
         , fInAndUniformVars(std::move(inAndUniformVars))
-        , fChildren(std::move(children)) {
+        , fChildren(std::move(children))
+        , fUniformSize(uniformSize) {
     SkASSERT(fCompiler && fBaseProgram);
+    SkASSERT(SkIsAlign4(fUniformSize));
+    SkASSERT(fUniformSize <= this->inputSize());
 }
 
 size_t SkRuntimeEffect::inputSize() const {
@@ -206,23 +216,8 @@
         : fInAndUniformVars.back().fOffset + fInAndUniformVars.back().sizeInBytes();
 }
 
-#if SK_SUPPORT_GPU
-bool SkRuntimeEffect::toPipelineStage(const void* inputs, const GrShaderCaps* shaderCaps,
-                                      SkSL::PipelineStageArgs* outArgs) {
-    // This function is used by the GPU backend, and can't reuse our previously built fBaseProgram.
-    // If the supplied shaderCaps have any non-default values, we have baked in the wrong settings.
-    SkSL::Program::Settings settings;
-    settings.fCaps = shaderCaps;
-
-    auto baseProgram = fCompiler->convertProgram(SkSL::Program::kPipelineStage_Kind,
-                                                 SkSL::String(fSkSL.c_str(), fSkSL.size()),
-                                                 settings);
-    if (!baseProgram) {
-        SkDebugf("%s\n", fCompiler->errorText().c_str());
-        SkASSERT(false);
-        return false;
-    }
-
+SkRuntimeEffect::SpecializeResult SkRuntimeEffect::specialize(SkSL::Program& baseProgram,
+                                                              const void* inputs) {
     std::unordered_map<SkSL::String, SkSL::Program::Settings::Value> inputMap;
     for (const auto& v : fInAndUniformVars) {
         if (v.fQualifier != Variable::Qualifier::kIn) {
@@ -249,18 +244,40 @@
             }
             default:
                 SkDEBUGFAIL("Unsupported input variable type");
-                return false;
+                return SpecializeResult{nullptr, SkString("Unsupported input variable type")};
         }
     }
 
-    auto specialized = fCompiler->specialize(*baseProgram, inputMap);
+    auto specialized = fCompiler->specialize(baseProgram, inputMap);
     bool optimized = fCompiler->optimize(*specialized);
     if (!optimized) {
+        return SpecializeResult{nullptr, SkString(fCompiler->errorText().c_str())};
+    }
+    return SpecializeResult{std::move(specialized), SkString()};
+}
+
+#if SK_SUPPORT_GPU
+bool SkRuntimeEffect::toPipelineStage(const void* inputs, const GrShaderCaps* shaderCaps,
+                                      SkSL::PipelineStageArgs* outArgs) {
+    // This function is used by the GPU backend, and can't reuse our previously built fBaseProgram.
+    // If the supplied shaderCaps have any non-default values, we have baked in the wrong settings.
+    SkSL::Program::Settings settings;
+    settings.fCaps = shaderCaps;
+
+    auto baseProgram = fCompiler->convertProgram(SkSL::Program::kPipelineStage_Kind,
+                                                 SkSL::String(fSkSL.c_str(), fSkSL.size()),
+                                                 settings);
+    if (!baseProgram) {
         SkDebugf("%s\n", fCompiler->errorText().c_str());
         SkASSERT(false);
         return false;
     }
 
+    auto specialized = std::get<0>(this->specialize(*baseProgram, inputs));
+    if (!specialized) {
+        return false;
+    }
+
     if (!fCompiler->toPipelineStage(*specialized, outArgs)) {
         SkDebugf("%s\n", fCompiler->errorText().c_str());
         SkASSERT(false);
@@ -271,9 +288,13 @@
 }
 #endif
 
-SkRuntimeEffect::ByteCodeResult SkRuntimeEffect::toByteCode() {
-    auto byteCode = fCompiler->toByteCode(*fBaseProgram);
-    return std::make_tuple(std::move(byteCode), SkString(fCompiler->errorText().c_str()));
+SkRuntimeEffect::ByteCodeResult SkRuntimeEffect::toByteCode(const void* inputs) {
+    auto [specialized, errorText] = this->specialize(*fBaseProgram, inputs);
+    if (!specialized) {
+        return ByteCodeResult{nullptr, errorText};
+    }
+    auto byteCode = fCompiler->toByteCode(*specialized);
+    return ByteCodeResult(std::move(byteCode), SkString(fCompiler->errorText().c_str()));
 }
 
 sk_sp<SkShader> SkRuntimeEffect::makeShader(sk_sp<SkData> inputs,
diff --git a/src/shaders/SkRTShader.cpp b/src/shaders/SkRTShader.cpp
index dcc6c4e..3c817ba 100644
--- a/src/shaders/SkRTShader.cpp
+++ b/src/shaders/SkRTShader.cpp
@@ -41,12 +41,12 @@
     auto ctx = rec.fAlloc->make<SkRasterPipeline_InterpreterCtx>();
     ctx->paintColor = rec.fPaint.getColor4f();
     ctx->inputs = fInputs->data();
-    ctx->ninputs = fInputs->size() / 4;
+    ctx->ninputs = fEffect->uniformSize() / 4;
     ctx->shaderConvention = true;
 
     SkAutoMutexExclusive ama(fByteCodeMutex);
     if (!fByteCode) {
-        auto [byteCode, errorText] = fEffect->toByteCode();
+        auto [byteCode, errorText] = fEffect->toByteCode(fInputs->data());
         if (!byteCode) {
             SkDebugf("%s\n", errorText.c_str());
             return false;
diff --git a/tests/SkRuntimeEffectTest.cpp b/tests/SkRuntimeEffectTest.cpp
index ed017af..c31407a 100644
--- a/tests/SkRuntimeEffectTest.cpp
+++ b/tests/SkRuntimeEffectTest.cpp
@@ -142,14 +142,24 @@
     TestEffect xy(r, "", "color = half4(half(x - 0.5), half(y - 0.5), 0, 1);");
     xy.test(r, surface, 0xFF000000, 0xFF0000FF, 0xFF00FF00, 0xFF00FFFF);
 
+    using float4 = std::array<float, 4>;
+
     // NOTE: For now, we always emit valid premul colors, until CPU and GPU agree on clamping
     TestEffect uniformColor(r, "uniform float4 gColor;", "color = half4(gColor);");
 
-    uniformColor["gColor"] = std::array<float, 4>{ 0.0f, 0.25f, 0.75f, 1.0f };
+    uniformColor["gColor"] = float4{ 0.0f, 0.25f, 0.75f, 1.0f };
     uniformColor.test(r, surface, 0xFFBF4000);
 
-    uniformColor["gColor"] = std::array<float, 4>{ 0.75f, 0.25f, 0.0f, 1.0f };
+    uniformColor["gColor"] = float4{ 0.75f, 0.25f, 0.0f, 1.0f };
     uniformColor.test(r, surface, 0xFF0040BF);
+
+    TestEffect pickColor(r, "in int flag; uniform half4 gColors[2];", "color = gColors[flag];");
+    pickColor["gColors"] =
+            std::array<float4, 2>{float4{1.0f, 0.0f, 0.0f, 1.0f}, float4{0.0f, 1.0f, 0.0f, 1.0f}};
+    pickColor["flag"] = 0;
+    pickColor.test(r, surface, 0xFF0000FF);
+    pickColor["flag"] = 1;
+    pickColor.test(r, surface, 0xFF00FF00);
 }
 
 DEF_TEST(SkRuntimeEffectSimple, r) {