[graphite] MtlComputeCommandEncoder

Added MtlComputeCommandEncoder and a WorkgroupSize data structure to
represent global and local work group sizes.

Bug: b/240604572
Change-Id: I71e2af983411e6c02e6b326315b88084e9c1af40
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/563518
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Arman Uguray <armansito@google.com>
Reviewed-by: Jim Van Verth <jvanverth@google.com>
diff --git a/gn/graphite.gni b/gn/graphite.gni
index a582940..0284105 100644
--- a/gn/graphite.gni
+++ b/gn/graphite.gni
@@ -34,6 +34,7 @@
   "$_src/CommandTypes.h",
   "$_src/ComputePipeline.cpp",
   "$_src/ComputePipeline.h",
+  "$_src/ComputeTypes.h",
   "$_src/Context.cpp",
   "$_src/ContextPriv.cpp",
   "$_src/ContextPriv.h",
@@ -162,6 +163,7 @@
   "$_src/mtl/MtlCaps.mm",
   "$_src/mtl/MtlCommandBuffer.h",
   "$_src/mtl/MtlCommandBuffer.mm",
+  "$_src/mtl/MtlComputeCommandEncoder.h",
   "$_src/mtl/MtlComputePipeline.h",
   "$_src/mtl/MtlComputePipeline.mm",
   "$_src/mtl/MtlGpu.h",
diff --git a/src/gpu/graphite/ComputeTypes.h b/src/gpu/graphite/ComputeTypes.h
new file mode 100644
index 0000000..3b5d865
--- /dev/null
+++ b/src/gpu/graphite/ComputeTypes.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef skgpu_graphite_ComputeTypes_DEFINED
+#define skgpu_graphite_ComputeTypes_DEFINED
+
+#include "src/gpu/graphite/Buffer.h"
+
+namespace skgpu::graphite {
+
+/**
+ * Defines the space that a compute shader operates on. A problem space is logically divided into
+ * abstract "work groups" (or "thread groups" in Metal/D3D12).
+ *
+ * The "work group count" or "global size" of the work group is a 3-dimensional number that defines
+ * the size of the problem space. The user must provide the global size to define the number of
+ * work groups that execute as part of a dispatch.
+ *
+ * The local size of a work group defines the number of parallel execution units that run in that
+ * group (these are called "threads" in Metal/D3D12, "wavefronts" in OpenCL, "warps" in CUDA). The
+ * local size is defined in 3 dimensions and must be determined based on hardware limitations, which
+ * can be queried via Caps::maxComputeWorkgroupSize() (for each individual dimension) and
+ * Caps::maxComputeInvocationsPerWorkgroup().
+ *
+ * The WorkgroupSize type is used to represent both global size and local size.
+ */
+struct WorkgroupSize {
+    WorkgroupSize() = default;
+    WorkgroupSize(uint32_t width, uint32_t height, uint32_t depth)
+            : fWidth(width)
+            , fHeight(height)
+            , fDepth(depth) {}
+
+    uint32_t fWidth = 1;
+    uint32_t fHeight = 1;
+    uint32_t fDepth = 1;
+};
+
+}  // namespace skgpu::graphite
+
+#endif  // skgpu_graphite_ComputeTypes_DEFINED
diff --git a/src/gpu/graphite/mtl/MtlComputeCommandEncoder.h b/src/gpu/graphite/mtl/MtlComputeCommandEncoder.h
new file mode 100644
index 0000000..d0c310d
--- /dev/null
+++ b/src/gpu/graphite/mtl/MtlComputeCommandEncoder.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2022 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef skgpu_graphite_MtlComputeCommandEncoder_DEFINED
+#define skgpu_graphite_MtlComputeCommandEncoder_DEFINED
+
+#include "include/core/SkRefCnt.h"
+#include "include/ports/SkCFObject.h"
+#include "src/gpu/graphite/ComputeTypes.h"
+#include "src/gpu/graphite/Resource.h"
+
+#import <Metal/Metal.h>
+
+namespace skgpu::graphite {
+
+/**
+ * Wraps a MTLComputeCommandEncoder object and associated tracked state
+ */
+class MtlComputeCommandEncoder : public Resource {
+public:
+    static sk_sp<MtlComputeCommandEncoder> Make(const Gpu* gpu,
+                                                id<MTLCommandBuffer> commandBuffer,
+                                                MTLDispatchType dispatchType) {
+        // Adding a retain here to keep our own ref separate from the autorelease pool
+        sk_cfp<id<MTLComputeCommandEncoder>> encoder =
+                sk_ret_cfp([commandBuffer computeCommandEncoderWithDispatchType:dispatchType]);
+        return sk_sp<MtlComputeCommandEncoder>(
+                new MtlComputeCommandEncoder(gpu, std::move(encoder)));
+    }
+
+    void setLabel(NSString* label) { [(*fCommandEncoder) setLabel:label]; }
+
+    void pushDebugGroup(NSString* string) { [(*fCommandEncoder) pushDebugGroup:string]; }
+    void popDebugGroup() { [(*fCommandEncoder) popDebugGroup]; }
+    void insertDebugSignpost(NSString* string) { [(*fCommandEncoder) insertDebugSignpost:string]; }
+
+    void setComputePipelineState(id<MTLComputePipelineState> pso) {
+        if (fCurrentComputePipelineState != pso) {
+            [(*fCommandEncoder) setComputePipelineState:pso];
+            fCurrentComputePipelineState = pso;
+        }
+    }
+
+    void setBuffer(id<MTLBuffer> buffer, NSUInteger offset, NSUInteger index) {
+        SkASSERT(buffer != nil);
+        // TODO(skia:13580): As with the setVertexBufferOffset:atIndex: and
+        // setFragmentBufferOffset:atIndex: methods of MTLRenderCommandEncoder,
+        // Apple recommends using setBufferOffset:atIndex: to avoid rebinding a buffer when only
+        // updating its offset. Consider tracking buffers/offsets by index and limiting calls to
+        // setBuffer:offset:atIndex.
+        [(*fCommandEncoder) setBuffer:buffer offset:offset atIndex:index];
+    }
+
+    void setTexture(id<MTLTexture> texture, NSUInteger index) {
+        SkASSERT(texture != nil);
+        [(*fCommandEncoder) setTexture:texture atIndex:index];
+    }
+
+    void setSamplerState(id<MTLSamplerState> sampler, NSUInteger index) {
+        SkASSERT(sampler != nil);
+        [(*fCommandEncoder) setSamplerState:sampler atIndex:index];
+    }
+
+    void dispatchThreadgroups(const WorkgroupSize& globalSize, const WorkgroupSize& localSize) {
+        MTLSize threadgroupCount =
+                MTLSizeMake(globalSize.fWidth, globalSize.fHeight, globalSize.fDepth);
+        MTLSize threadsPerThreadgroup =
+                MTLSizeMake(localSize.fWidth, localSize.fHeight, localSize.fDepth);
+        [(*fCommandEncoder) dispatchThreadgroups:threadgroupCount
+                           threadsPerThreadgroup:threadsPerThreadgroup];
+    }
+
+    void endEncoding() { [(*fCommandEncoder) endEncoding]; }
+
+private:
+    MtlComputeCommandEncoder(const Gpu* gpu, sk_cfp<id<MTLComputeCommandEncoder>> encoder)
+            : Resource(gpu, Ownership::kOwned, SkBudgeted::kYes)
+            , fCommandEncoder(std::move(encoder)) {}
+
+    void freeGpuData() override { fCommandEncoder.reset(); }
+
+    sk_cfp<id<MTLComputeCommandEncoder>> fCommandEncoder;
+
+    id<MTLComputePipelineState> fCurrentComputePipelineState = nil;
+
+    // TODO(skia:13580): Keep track of texture/sampler and buffer resources?
+};
+
+}  // namespace skgpu::graphite
+
+#endif  // skgpu_graphite_MtlComputeCommandEncoder_DEFINED