| /* |
| * Copyright 2023 Google LLC |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #ifndef skgpu_graphite_ComputeStep_DEFINED |
| #define skgpu_graphite_ComputeStep_DEFINED |
| |
| #include "include/core/SkSpan.h" |
| #include "src/core/SkEnumBitMask.h" |
| #include "src/gpu/graphite/ComputeTypes.h" |
| |
| #include <optional> |
| #include <string> |
| #include <string_view> |
| #include <vector> |
| |
| namespace skgpu::graphite { |
| |
| class DrawParams; |
| struct ResourceBindingRequirements; |
| |
| /** |
| * A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep` |
| * implementation describes an invocation of a compute program and its data binding layout. |
| * |
| * A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including |
| * geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or |
| * buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via |
| * "slots"), transient storage buffers/textures that are only used within an individual dispatch, |
| * geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster |
| * pipeline stage, as well as texture outputs. |
| * |
| * The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by |
| * operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot |
| * number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will |
| * have access to the same backing resource object through that slot: |
| * |
| * _______________ _______________ |
| * | | | | |
| * | ---[Slot 0]--- | |
| * | | | | |
| * | ---[Slot 1]--- | |
| * | ComputeStep 1 | | ComputeStep 2 | |
| * | ---[Slot 2] | | |
| * | | | | |
| * | | [Slot 3]--- | |
| * | | | | |
| * --------------- --------------- |
| * |
| * In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are |
| * exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be |
| * declared as "private" resources which are visible to a single ComputeStep. |
| * |
| * Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be |
| * used to assign the draw buffers of a RenderStep. |
| * |
| * It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of |
| * ComputeStep and RenderStep invocations have a compatible resource and data-flow layout. |
| */ |
| class ComputeStep { |
| public: |
| enum class DataFlow { |
| // A set of writable Buffer bindings that the `ComputeStep` will write vertex and instance |
| // attributes to. If present, these buffers can be used to encode the draw command for a |
| // subsequent `RenderStep`. |
| kVertexOutput, |
| kIndexOutput, |
| kInstanceOutput, |
| kIndirectDrawOutput, |
| |
| // A private binding is a resource that is only visible to a single ComputeStep invocation. |
| kPrivate, |
| |
| // Bindings with a slot number that can be used to forward data between a series of |
| // `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be |
| // shared by multiple `ComputeStep`s in a group. |
| kShared, |
| }; |
| |
| enum class ResourceType { |
| kUniformBuffer, |
| kStorageBuffer, |
| |
| // TODO(b/238794438): Support sampled and storage texture types. |
| }; |
| |
| enum class ResourcePolicy { |
| kNone, |
| |
| // The memory of the resource will be initialized to 0 |
| kClear, |
| |
| // The ComputeStep will be asked to initialize the memory on the CPU via |
| // `ComputeStep::prepareBuffer` prior to pipeline execution. This may incur a transfer cost |
| // on platforms that do not allow buffers to be mapped in shared memory. |
| // |
| // If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same |
| // shared slot number, only the first ComputeStep in the series will receive a call to |
| // `ComputeStep::prepareBuffer`. |
| kMapped, |
| }; |
| |
| struct ResourceDesc final { |
| ResourceType fType; |
| DataFlow fFlow; |
| ResourcePolicy fPolicy; |
| |
| // This field only has meaning (and must have a non-negative value) if `fFlow` is |
| // `DataFlow::kShared`. |
| int fSlot = -1; |
| |
| constexpr ResourceDesc(ResourceType type, |
| DataFlow flow, |
| ResourcePolicy policy, |
| int slot = -1) |
| : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {} |
| }; |
| |
| virtual ~ComputeStep() = default; |
| |
| // Returns a complete SkSL compute program. The returned SkSL must declare all resoure bindings |
| // starting at `nextBindingIndex` in the order in which they are enumerated by |
| // `ComputeStep::resources()`. |
| virtual std::string computeSkSL(const ResourceBindingRequirements&, |
| int nextBindingIndex) const = 0; |
| |
| // This method will be called for entries in the ComputeStep's resource list to determine the |
| // required allocation sizes. The ComputeStep should return the minimum allocation size for the |
| // resource. |
| // |
| // TODO(armansito): The only piece of information that the ComputeStep currently uses to make |
| // this determination is the draw parameters. This approach particularly doesn't address (and |
| // likely needs to be reworked) for intermediate ComputeSteps in a chain of invocations, where |
| // the effective data sizes may not be known on the CPU. |
| // |
| // For now, we assume that there will be a strict data contract between chained ComputeSteps. |
| // The buffer sizes are an estimate based on the DrawParams. This is generic enough to allow |
| // different schemes (such as dynamic allocations and buffer pools) but may not be easily |
| // validated on the CPU. |
| virtual size_t calculateResourceSize(const DrawParams&, |
| int resourceIndex, |
| const ResourceDesc&) const { |
| return 0u; |
| } |
| |
| // Return the global dispatch size (aka "workgroup count") for this step based on the draw |
| // parameters. The default value is a workgroup count of (1, 1, 1) |
| // |
| // TODO(armansito): The only piece of information that the ComputeStep currently gets to make |
| // this determination is the draw parameters. There might be other inputs to this calculation |
| // for intermediate compute stages that may not be known on the CPU. One way to address this is |
| // to drive the workgroup dimensions via an indirect dispatch. |
| virtual WorkgroupSize calculateGlobalDispatchSize(const DrawParams&) const { |
| return WorkgroupSize(); |
| } |
| |
| // Populates a buffer resource which was specified as "mapped". This method will only be called |
| // once for a resource right after its allocation and before pipeline execution. For shared |
| // resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the buffer. |
| // |
| // `resourceIndex` matches the order in which `resource` was enumerated by |
| // `ComputeStep::resources()`. |
| virtual void prepareBuffer(const DrawParams&, |
| int ssboIndex, |
| int resourceIndex, |
| const ResourceDesc& resource, |
| void* buffer, |
| size_t bufferSize) const; |
| |
| SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); } |
| |
| // Identifier that can be used as part of a unique key for a compute pipeline state object |
| // associated with this `ComputeStep`. |
| uint32_t uniqueID() const { return fUniqueID; } |
| |
| // Returns a debug name for the subclass implementation. |
| const char* name() const { return fName.c_str(); } |
| |
| // The size of the workgroup for this ComputeStep's entry point function. This value is hardware |
| // dependent. On Metal, this value should be used when invoking the dispatch API call. On all |
| // other backends, this value will be baked into the pipeline. |
| WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; } |
| |
| // Data flow behavior queries: |
| bool outputsVertices() const { return fFlags & Flags::kOutputsVertexBuffer; } |
| bool outputsIndices() const { return fFlags & Flags::kOutputsIndexBuffer; } |
| bool outputsInstances() const { return fFlags & Flags::kOutputsInstanceBuffer; } |
| bool writesIndirectDraw() const { return fFlags & Flags::kOutputsIndirectDrawBuffer; } |
| |
| protected: |
| ComputeStep(std::string_view name, |
| WorkgroupSize localDispatchSize, |
| SkSpan<const ResourceDesc> resources); |
| |
| private: |
| enum class Flags : uint8_t { |
| kNone = 0b0000, |
| kOutputsVertexBuffer = 0b0001, |
| kOutputsIndexBuffer = 0b0010, |
| kOutputsInstanceBuffer = 0b0100, |
| kOutputsIndirectDrawBuffer = 0b1000, |
| }; |
| SK_DECL_BITMASK_OPS_FRIENDS(Flags); |
| |
| // Disallow copy and move |
| ComputeStep(const ComputeStep&) = delete; |
| ComputeStep(ComputeStep&&) = delete; |
| |
| uint32_t fUniqueID; |
| SkEnumBitMask<Flags> fFlags; |
| std::string fName; |
| std::vector<ResourceDesc> fResources; |
| |
| // TODO(b/240615224): Subclasses should simply specify the workgroup size that they need. |
| // The ComputeStep constructor should check and reduce that number based on the maximum |
| // supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the |
| // dispatch API call. On other backends, we'll use this value to generate the right SkSL |
| // workgroup size declaration to avoid any validation failures. |
| WorkgroupSize fLocalDispatchSize; |
| }; |
| SK_MAKE_BITMASK_OPS(ComputeStep::Flags); |
| |
| } // namespace skgpu::graphite |
| |
| #endif // skgpu_graphite_ComputeStep_DEFINED |