blob: eef79c8dd3d8113699f00e5560ab954fbf2ebd50 [file] [log] [blame]
/*
* Copyright 2023 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef skgpu_graphite_ComputeStep_DEFINED
#define skgpu_graphite_ComputeStep_DEFINED
#include "include/core/SkSpan.h"
#include "src/core/SkEnumBitMask.h"
#include "src/gpu/graphite/ComputeTypes.h"
#include <optional>
#include <string>
#include <string_view>
#include <vector>
namespace skgpu::graphite {
class DrawParams;
struct ResourceBindingRequirements;
/**
* A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep`
* implementation describes an invocation of a compute program and its data binding layout.
*
* A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including
* geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or
* buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via
* "slots"), transient storage buffers/textures that are only used within an individual dispatch,
* geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster
* pipeline stage, as well as texture outputs.
*
* The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by
* operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot
* number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will
* have access to the same backing resource object through that slot:
*
* _______________ _______________
* | | | |
* | ---[Slot 0]--- |
* | | | |
* | ---[Slot 1]--- |
* | ComputeStep 1 | | ComputeStep 2 |
* | ---[Slot 2] | |
* | | | |
* | | [Slot 3]--- |
* | | | |
* --------------- ---------------
*
* In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are
* exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be
* declared as "private" resources which are visible to a single ComputeStep.
*
* Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be
* used to assign the draw buffers of a RenderStep.
*
* It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of
* ComputeStep and RenderStep invocations have a compatible resource and data-flow layout.
*/
class ComputeStep {
public:
enum class DataFlow {
// A set of writable Buffer bindings that the `ComputeStep` will write vertex and instance
// attributes to. If present, these buffers can be used to encode the draw command for a
// subsequent `RenderStep`.
kVertexOutput,
kIndexOutput,
kInstanceOutput,
kIndirectDrawOutput,
// A private binding is a resource that is only visible to a single ComputeStep invocation.
kPrivate,
// Bindings with a slot number that can be used to forward data between a series of
// `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be
// shared by multiple `ComputeStep`s in a group.
kShared,
};
enum class ResourceType {
kUniformBuffer,
kStorageBuffer,
// TODO(b/238794438): Support sampled and storage texture types.
};
enum class ResourcePolicy {
kNone,
// The memory of the resource will be initialized to 0
kClear,
// The ComputeStep will be asked to initialize the memory on the CPU via
// `ComputeStep::prepareBuffer` prior to pipeline execution. This may incur a transfer cost
// on platforms that do not allow buffers to be mapped in shared memory.
//
// If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same
// shared slot number, only the first ComputeStep in the series will receive a call to
// `ComputeStep::prepareBuffer`.
kMapped,
};
struct ResourceDesc final {
ResourceType fType;
DataFlow fFlow;
ResourcePolicy fPolicy;
// This field only has meaning (and must have a non-negative value) if `fFlow` is
// `DataFlow::kShared`.
int fSlot = -1;
constexpr ResourceDesc(ResourceType type,
DataFlow flow,
ResourcePolicy policy,
int slot = -1)
: fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {}
};
virtual ~ComputeStep() = default;
// Returns a complete SkSL compute program. The returned SkSL must declare all resoure bindings
// starting at `nextBindingIndex` in the order in which they are enumerated by
// `ComputeStep::resources()`.
virtual std::string computeSkSL(const ResourceBindingRequirements&,
int nextBindingIndex) const = 0;
// This method will be called for entries in the ComputeStep's resource list to determine the
// required allocation sizes. The ComputeStep should return the minimum allocation size for the
// resource.
//
// TODO(armansito): The only piece of information that the ComputeStep currently uses to make
// this determination is the draw parameters. This approach particularly doesn't address (and
// likely needs to be reworked) for intermediate ComputeSteps in a chain of invocations, where
// the effective data sizes may not be known on the CPU.
//
// For now, we assume that there will be a strict data contract between chained ComputeSteps.
// The buffer sizes are an estimate based on the DrawParams. This is generic enough to allow
// different schemes (such as dynamic allocations and buffer pools) but may not be easily
// validated on the CPU.
virtual size_t calculateResourceSize(const DrawParams&,
int resourceIndex,
const ResourceDesc&) const {
return 0u;
}
// Return the global dispatch size (aka "workgroup count") for this step based on the draw
// parameters. The default value is a workgroup count of (1, 1, 1)
//
// TODO(armansito): The only piece of information that the ComputeStep currently gets to make
// this determination is the draw parameters. There might be other inputs to this calculation
// for intermediate compute stages that may not be known on the CPU. One way to address this is
// to drive the workgroup dimensions via an indirect dispatch.
virtual WorkgroupSize calculateGlobalDispatchSize(const DrawParams&) const {
return WorkgroupSize();
}
// Populates a buffer resource which was specified as "mapped". This method will only be called
// once for a resource right after its allocation and before pipeline execution. For shared
// resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the buffer.
//
// `resourceIndex` matches the order in which `resource` was enumerated by
// `ComputeStep::resources()`.
virtual void prepareBuffer(const DrawParams&,
int ssboIndex,
int resourceIndex,
const ResourceDesc& resource,
void* buffer,
size_t bufferSize) const;
SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); }
// Identifier that can be used as part of a unique key for a compute pipeline state object
// associated with this `ComputeStep`.
uint32_t uniqueID() const { return fUniqueID; }
// Returns a debug name for the subclass implementation.
const char* name() const { return fName.c_str(); }
// The size of the workgroup for this ComputeStep's entry point function. This value is hardware
// dependent. On Metal, this value should be used when invoking the dispatch API call. On all
// other backends, this value will be baked into the pipeline.
WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; }
// Data flow behavior queries:
bool outputsVertices() const { return fFlags & Flags::kOutputsVertexBuffer; }
bool outputsIndices() const { return fFlags & Flags::kOutputsIndexBuffer; }
bool outputsInstances() const { return fFlags & Flags::kOutputsInstanceBuffer; }
bool writesIndirectDraw() const { return fFlags & Flags::kOutputsIndirectDrawBuffer; }
protected:
ComputeStep(std::string_view name,
WorkgroupSize localDispatchSize,
SkSpan<const ResourceDesc> resources);
private:
enum class Flags : uint8_t {
kNone = 0b0000,
kOutputsVertexBuffer = 0b0001,
kOutputsIndexBuffer = 0b0010,
kOutputsInstanceBuffer = 0b0100,
kOutputsIndirectDrawBuffer = 0b1000,
};
SK_DECL_BITMASK_OPS_FRIENDS(Flags);
// Disallow copy and move
ComputeStep(const ComputeStep&) = delete;
ComputeStep(ComputeStep&&) = delete;
uint32_t fUniqueID;
SkEnumBitMask<Flags> fFlags;
std::string fName;
std::vector<ResourceDesc> fResources;
// TODO(b/240615224): Subclasses should simply specify the workgroup size that they need.
// The ComputeStep constructor should check and reduce that number based on the maximum
// supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the
// dispatch API call. On other backends, we'll use this value to generate the right SkSL
// workgroup size declaration to avoid any validation failures.
WorkgroupSize fLocalDispatchSize;
};
SK_MAKE_BITMASK_OPS(ComputeStep::Flags);
} // namespace skgpu::graphite
#endif // skgpu_graphite_ComputeStep_DEFINED