[graphite] Logical viewport is (0,0), adjust intrinsic uniforms to match

The CommandBuffer public functions now just take a viewport size to
enforce the logical viewport starts at (0,0). Any non-zero origin
for the final viewport is solely from the replay translation.

It also updates the intrinsic uniforms to include the inverse
dst copy dimensions for later normalization, and adjusts the
`rtAdjust` uniform to a `viewport` uniform that holds both the
replay translation and the inverse viewport dimensions. Since we can
assume that the logical viewport origin (what defines the mapping
from device coords to NDC) is (0,0), the equations in the vertex
shader are slightly adjusted as well.

The vertex shader does not rely on `viewport.xy`, these will be used
in the fragment shader to adjust sk_FragCoord to reflect replays in
a later CL.

Bug: b/367490053
Change-Id: Id9cf265ccdcc76bf502c955e5ddb3c696afa48ff
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/901696
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
diff --git a/src/gpu/graphite/CommandBuffer.cpp b/src/gpu/graphite/CommandBuffer.cpp
index de41bf5..301bf40 100644
--- a/src/gpu/graphite/CommandBuffer.cpp
+++ b/src/gpu/graphite/CommandBuffer.cpp
@@ -81,7 +81,7 @@
                                   sk_sp<Texture> depthStencilTexture,
                                   const Texture* dstCopy,
                                   SkIRect dstCopyBounds,
-                                  SkIRect viewport,
+                                  SkISize viewportDims,
                                   const DrawPassList& drawPasses) {
     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
 
@@ -102,8 +102,6 @@
         return true;
     }
 
-    viewport.offset(fReplayTranslation.x(), fReplayTranslation.y());
-
     dstCopyBounds.offset(fReplayTranslation.x(), fReplayTranslation.y());
     if (!dstCopyBounds.intersect(colorAttachmentBounds)) {
         // The draws within the RenderPass that would sample from the dstCopy have been translated
@@ -111,8 +109,10 @@
         dstCopyBounds = SkIRect::MakeEmpty();
     }
     // Save the dstCopy texture so that it can be embedded into texture bind commands later on.
+    // Stash the texture's full dimensions on the rect so we can calculate normalized coords later.
     fDstCopy.first = dstCopy;
-    fDstCopyOffset = dstCopyBounds.topLeft();
+    fDstCopyBounds = dstCopy ? SkIRect::MakePtSize(dstCopyBounds.topLeft(), dstCopy->dimensions())
+                             : SkIRect::MakeEmpty();
     if (dstCopy && !fDstCopy.second) {
         // Only lookup the sampler the first time we require a dstCopy. The texture can change
         // on subsequent passes but it will always use the same nearest neighbor sampling.
@@ -122,6 +122,10 @@
         this->trackResource(std::move(nearestNeighbor));
     }
 
+    // We don't intersect the viewport with the render pass bounds or target size because it just
+    // defines a linear transform, which we don't want to change just because a portion of it maps
+    // to a region that gets clipped.
+    SkIRect viewport = SkIRect::MakePtSize(fReplayTranslation, viewportDims);
     if (!this->onAddRenderPass(renderPassDesc,
                                renderPassBounds,
                                colorTexture.get(),
diff --git a/src/gpu/graphite/CommandBuffer.h b/src/gpu/graphite/CommandBuffer.h
index b54633f..2c576dd 100644
--- a/src/gpu/graphite/CommandBuffer.h
+++ b/src/gpu/graphite/CommandBuffer.h
@@ -75,15 +75,19 @@
     SkSpan<const sk_sp<Buffer>> buffersToAsyncMapOnSubmit() const;
 
     // If any recorded draw requires a dst texture copy for blending, that texture must be provided
-    // in `dstCopy`; otherwise it should be null. The `dstCopyBounds` and `viewport` are in the
-    // same coordinate space of the logical target *before* any replay translation is applied.
+    // in `dstCopy`; otherwise it should be null. The `dstCopyBounds` are in the same coordinate
+    // space of the logical viewport *before* any replay translation is applied.
+    //
+    // The logical viewport is always (0,0,viewportDims) and matches the "device" coordinate space
+    // of the higher-level SkDevices that recorded the rendering operations. The actual viewport
+    // is automatically adjusted by the replay translation.
     bool addRenderPass(const RenderPassDesc&,
                        sk_sp<Texture> colorTexture,
                        sk_sp<Texture> resolveTexture,
                        sk_sp<Texture> depthStencilTexture,
                        const Texture* dstCopy,
                        SkIRect dstCopyBounds,
-                       SkIRect viewport,
+                       SkISize viewportDims,
                        const DrawPassList& drawPasses);
 
     bool addComputePass(DispatchGroupSpan dispatchGroups);
@@ -122,13 +126,16 @@
     CommandBuffer();
 
     SkISize fColorAttachmentSize;
+    // This is also the origin of the logical viewport relative to the target texture's (0,0) pixel
     SkIVector fReplayTranslation;
+
     // The texture to use for implementing DstReadRequirement::kTextureCopy for the current render
     // pass. This is a bare pointer since the CopyTask that initializes the texture's contents
     // will have tracked the resource on the CommandBuffer already.
     std::pair<const Texture*, const Sampler*> fDstCopy;
-    // Already includes replay translation and respects final color attachment bounds.
-    SkIVector fDstCopyOffset;
+    // Already includes replay translation and respects final color attachment bounds, but with
+    // dimensions that equal fDstCopy's width and height.
+    SkIRect fDstCopyBounds;
 
 private:
     // Release all tracked Resources
diff --git a/src/gpu/graphite/ContextUtils.cpp b/src/gpu/graphite/ContextUtils.cpp
index 978dae1..c99f0fe 100644
--- a/src/gpu/graphite/ContextUtils.cpp
+++ b/src/gpu/graphite/ContextUtils.cpp
@@ -277,9 +277,8 @@
     return result;
 }
 
-static constexpr Uniform kIntrinsicUniforms[] = { {"rtAdjust",          SkSLType::kFloat4},
-                                                  {"replayTranslation", SkSLType::kFloat2},
-                                                  {"dstCopyOffset",     SkSLType::kFloat2} };
+static constexpr Uniform kIntrinsicUniforms[] = { {"viewport",      SkSLType::kFloat4},
+                                                  {"dstCopyBounds", SkSLType::kFloat4} };
 
 std::string emit_intrinsic_uniforms(int bufferID, Layout layout) {
     auto offsetter = UniformOffsetCalculator::ForTopLevel(layout);
@@ -298,31 +297,39 @@
 
 void CollectIntrinsicUniforms(const Caps* caps,
                               SkIRect viewport,
-                              SkIPoint replayTranslation,
-                              SkIPoint dstCopyOffset,
+                              SkIRect dstCopyBounds,
                               UniformManager* uniforms) {
     SkDEBUGCODE(uniforms->setExpectedUniforms(kIntrinsicUniforms, /*isSubstruct=*/false);)
 
-    // rtAdjust
+    // viewport
     {
-        // The rtAdjust defines the linear transform from logical pixel space (before any replay
-        // translation) to the NDC space. So we have to subtract off the replay offset.
-        const float x = viewport.left() - replayTranslation.x();
-        const float y = viewport.top()  - replayTranslation.y();
-        const float invTwoW = 2.f / viewport.width();
-        const float invTwoH = 2.f / viewport.height();
-        // Depending on how the backend defines its NDC space, we may have to flip the Y axis
-        // even though all logical rendering and actual pixel storage is assumed to be top-left.
-        const float yFlip = caps->ndcYAxisPointsDown() ? 1.f : -1.f;
-        SkV4 rtAdjust = {invTwoW, yFlip*invTwoH, -1.f - x*invTwoW, yFlip*(-1.f - y*invTwoH)};
-        uniforms->write(rtAdjust);
+        // The vertex shader needs to divide by the dimension and then multiply by 2, so do this
+        // once on the CPU. This is because viewport normalization wants to range from -1 to 1, and
+        // not 0 to 1. If any other user of the viewport uniform requires the true reciprocal or
+        // original dimensions, this can be adjusted.
+        SkASSERT(!viewport.isEmpty());
+        float invTwoW = 2.f / viewport.width();
+        float invTwoH = 2.f / viewport.height();
+
+        // If the NDC Y axis points up (opposite normal skia convention and the underlying view
+        // convention), upload the inverse height as a negative value. See BuildVertexSkSL
+        // for how this is used.
+        if (!caps->ndcYAxisPointsDown()) {
+            invTwoH *= -1.f;
+        }
+        uniforms->write(SkV4{(float) viewport.left(), (float) viewport.top(), invTwoW, invTwoH});
     }
 
-    // replayTranslation
-    uniforms->write(SkV2{(float) replayTranslation.fX, (float) replayTranslation.fY});
-
-    // dstCopyOffset
-    uniforms->write(SkV2{(float) dstCopyOffset.fX, (float) dstCopyOffset.fY});
+    // dstCopyBounds
+    {
+        // Unlike viewport, dstCopyBounds can be empty so check for 0 dimensions and set the
+        // reciprocal to 0. It is also not doubled since its purpose is to normalize texture coords
+        // to 0 to 1, and not -1 to 1.
+        int width = dstCopyBounds.width();
+        int height = dstCopyBounds.height();
+        uniforms->write(SkV4{(float) dstCopyBounds.left(), (float) dstCopyBounds.top(),
+                             width ? 1.f / width : 0.f, height ? 1.f / height : 0.f});
+    }
 
     SkDEBUGCODE(uniforms->doneWithExpectedUniforms());
 }
@@ -576,7 +583,17 @@
     }
 
     sksl += step->vertexSkSL();
-    sksl += "sk_Position = float4(devPosition.xy * rtAdjust.xy + devPosition.ww * rtAdjust.zw,"
+
+    // We want to map the rectangle of logical device pixels from (0,0) to (viewWidth, viewHeight)
+    // to normalized device coordinates: (-1,-1) to (1,1) (actually -w to w since it's before
+    // homogenous division).
+    //
+    // For efficiency, this assumes viewport.zw holds the reciprocol of twice the viewport width and
+    // height. On some backends the NDC Y axis is flipped relative to the device and
+    // viewport coords (i.e. it points up instead of down). In those cases, it's also assumed that
+    // viewport.w holds a negative value. In that case the sign(viewport.zw) changes from
+    // subtracting w to adding w.
+    sksl += "sk_Position = float4(viewport.zw*devPosition.xy - sign(viewport.zw)*devPosition.ww,"
             "devPosition.zw);";
 
     if (useShadingStorageBuffer) {
diff --git a/src/gpu/graphite/ContextUtils.h b/src/gpu/graphite/ContextUtils.h
index 453ece8..829be6f 100644
--- a/src/gpu/graphite/ContextUtils.h
+++ b/src/gpu/graphite/ContextUtils.h
@@ -90,14 +90,19 @@
         const DrawParams& params);
 
 // `viewport` should hold the actual viewport set as backend state (defining the NDC -> pixel
-// transform).
-// `replayTranslation` should hold the replay translation provided on insertRecording().
-// It is assumed that `dstCopyOffset` has already accounted for the replay translation.
+// transform). The viewport's dimensions are used to define the SkDevice->NDC transform applied in
+// the vertex shader, but this assumes that the (0,0) device coordinate maps to the corner of the
+// top-left of the NDC cube. The viewport's origin is used in the fragment shader to reconstruct
+// the logical fragment coordinate from the target's current frag coord (which are not relative to
+// active viewport).
+//
+// It is assumed that `dstCopyBounds` is in the same coordinate space as the `viewport` (e.g.
+// final backing target's pixel coords) and that its width and height match the dimensions of the
+// texture to be sampled for dst reads.
 void CollectIntrinsicUniforms(
         const Caps* caps,
         SkIRect viewport,
-        SkIPoint replayTranslation,
-        SkIPoint dstCopyOffset,
+        SkIRect dstCopyBounds,
         UniformManager*);
 
 DstReadRequirement GetDstReadRequirement(const Caps*, std::optional<SkBlendMode>, Coverage);
diff --git a/src/gpu/graphite/dawn/DawnCommandBuffer.cpp b/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
index 5eeac3a..84685ab 100644
--- a/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
+++ b/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
@@ -823,8 +823,7 @@
 
 bool DawnCommandBuffer::updateIntrinsicUniforms(SkIRect viewport) {
     UniformManager intrinsicValues{Layout::kStd140};
-    CollectIntrinsicUniforms(fSharedContext->caps(), viewport, fReplayTranslation, fDstCopyOffset,
-                             &intrinsicValues);
+    CollectIntrinsicUniforms(fSharedContext->caps(), viewport, fDstCopyBounds, &intrinsicValues);
 
     BindBufferInfo binding =
             fIntrinsicConstants->add(this, UniformDataBlock::Wrap(&intrinsicValues));
diff --git a/src/gpu/graphite/mtl/MtlCommandBuffer.mm b/src/gpu/graphite/mtl/MtlCommandBuffer.mm
index 89d76d6..89796fa 100644
--- a/src/gpu/graphite/mtl/MtlCommandBuffer.mm
+++ b/src/gpu/graphite/mtl/MtlCommandBuffer.mm
@@ -614,8 +614,7 @@
 
 void MtlCommandBuffer::updateIntrinsicUniforms(SkIRect viewport) {
     UniformManager intrinsicValues{Layout::kMetal};
-    CollectIntrinsicUniforms(fSharedContext->caps(), viewport, fReplayTranslation, fDstCopyOffset,
-                             &intrinsicValues);
+    CollectIntrinsicUniforms(fSharedContext->caps(), viewport, fDstCopyBounds, &intrinsicValues);
     SkSpan<const char> bytes = intrinsicValues.finish();
     fActiveRenderCommandEncoder->setVertexBytes(bytes.data(), bytes.size_bytes(),
                                                 MtlGraphicsPipeline::kIntrinsicUniformBufferIndex);
diff --git a/src/gpu/graphite/task/RenderPassTask.cpp b/src/gpu/graphite/task/RenderPassTask.cpp
index cea979f..a35c7b7 100644
--- a/src/gpu/graphite/task/RenderPassTask.cpp
+++ b/src/gpu/graphite/task/RenderPassTask.cpp
@@ -150,7 +150,7 @@
                                      std::move(depthStencilAttachment),
                                      fDstCopy ? fDstCopy->texture() : nullptr,
                                      fDstCopyBounds,
-                                     SkIRect::MakeSize(fTarget->dimensions()),
+                                     fTarget->dimensions(),
                                      fDrawPasses)) {
         return Status::kSuccess;
     } else {
diff --git a/src/gpu/graphite/vk/VulkanCommandBuffer.cpp b/src/gpu/graphite/vk/VulkanCommandBuffer.cpp
index 7d3ac05..8d587c7 100644
--- a/src/gpu/graphite/vk/VulkanCommandBuffer.cpp
+++ b/src/gpu/graphite/vk/VulkanCommandBuffer.cpp
@@ -371,8 +371,7 @@
     // The SkSL has declared these as a top-level interface block, which will use std140 in Vulkan.
     // If we switch to supporting push constants here, it would be std430 instead.
     UniformManager intrinsicValues{Layout::kStd140};
-    CollectIntrinsicUniforms(fSharedContext->caps(), viewport, fReplayTranslation, fDstCopyOffset,
-                             &intrinsicValues);
+    CollectIntrinsicUniforms(fSharedContext->caps(), viewport, fDstCopyBounds, &intrinsicValues);
     SkSpan<const char> bytes = intrinsicValues.finish();
     SkASSERT(bytes.size_bytes() == VulkanResourceProvider::kIntrinsicConstantSize);