src/gpu/graphite/compute/ComputeStep.h - skia - Git at Google

 /*
  * Copyright 2023 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #ifndef skgpu_graphite_ComputeStep_DEFINED
 #define skgpu_graphite_ComputeStep_DEFINED

 #include "include/core/SkSpan.h"
 #include "src/core/SkEnumBitMask.h"
 #include "src/gpu/graphite/ComputeTypes.h"

 #include <optional>
 #include <string>
 #include <string_view>
 #include <vector>

 namespace skgpu::graphite {

 class DrawParams;
 struct ResourceBindingRequirements;

 /**
  * A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep`
  * implementation describes an invocation of a compute program and its data binding layout.
  *
  * A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including
  * geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or
  * buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via
  * "slots"), transient storage buffers/textures that are only used within an individual dispatch,
  * geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster
  * pipeline stage, as well as texture outputs.
  *
  * The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by
  * operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot
  * number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will
  * have access to the same backing resource object through that slot:
  *
  *      _______________                _______________
  *     |               |              |               |
  *     |                ---[Slot 0]---                |
  *     |               |              |               |
  *     |                ---[Slot 1]---                |
  *     | ComputeStep 1 |              | ComputeStep 2 |
  *     |                ---[Slot 2]   |               |
  *     |               |              |               |
  *     |               |   [Slot 3]---                |
  *     |               |              |               |
  *      ---------------                ---------------
  *
  * In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are
  * exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be
  * declared as "private" resources which are visible to a single ComputeStep.
  *
  * Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be
  * used to assign the draw buffers of a RenderStep.
  *
  * It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of
  * ComputeStep and RenderStep invocations have a compatible resource and data-flow layout.
  */
 class ComputeStep {
 public:
     enum class DataFlow {
         // A set of writable Buffer bindings that the `ComputeStep` will write vertex and instance
         // attributes to. If present, these buffers can be used to encode the draw command for a
         // subsequent `RenderStep`.
         kVertexOutput,
         kIndexOutput,
         kInstanceOutput,
         kIndirectDrawOutput,

         // A private binding is a resource that is only visible to a single ComputeStep invocation.
         kPrivate,

         // Bindings with a slot number that can be used to forward data between a series of
         // `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be
         // shared by multiple `ComputeStep`s in a group.
         kShared,
     };

     enum class ResourceType {
         kUniformBuffer,
         kStorageBuffer,

         // TODO(b/238794438): Support sampled and storage texture types.
     };

     enum class ResourcePolicy {
         kNone,

         // The memory of the resource will be initialized to 0
         kClear,

         // The ComputeStep will be asked to initialize the memory on the CPU via
         // `ComputeStep::prepareBuffer` prior to pipeline execution. This may incur a transfer cost
         // on platforms that do not allow buffers to be mapped in shared memory.
         //
         // If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same
         // shared slot number, only the first ComputeStep in the series will receive a call to
         // `ComputeStep::prepareBuffer`.
         kMapped,
     };

     struct ResourceDesc final {
         ResourceType fType;
         DataFlow fFlow;
         ResourcePolicy fPolicy;

         // This field only has meaning (and must have a non-negative value) if `fFlow` is
         // `DataFlow::kShared`.
         int fSlot = -1;

         constexpr ResourceDesc(ResourceType type,
                                DataFlow flow,
                                ResourcePolicy policy,
                                int slot = -1)
                 : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {}
     };

     virtual ~ComputeStep() = default;

     // Returns a complete SkSL compute program. The returned SkSL must declare all resoure bindings
     // starting at `nextBindingIndex` in the order in which they are enumerated by
     // `ComputeStep::resources()`.
     virtual std::string computeSkSL(const ResourceBindingRequirements&,
                                     int nextBindingIndex) const = 0;

     // This method will be called for entries in the ComputeStep's resource list to determine the
     // required allocation sizes. The ComputeStep should return the minimum allocation size for the
     // resource.
     //
     // TODO(armansito): The only piece of information that the ComputeStep currently uses to make
     // this determination is the draw parameters. This approach particularly doesn't address (and
     // likely needs to be reworked) for intermediate ComputeSteps in a chain of invocations, where
     // the effective data sizes may not be known on the CPU.
     //
     // For now, we assume that there will be a strict data contract between chained ComputeSteps.
     // The buffer sizes are an estimate based on the DrawParams. This is generic enough to allow
     // different schemes (such as dynamic allocations and buffer pools) but may not be easily
     // validated on the CPU.
     virtual size_t calculateResourceSize(const DrawParams&,
                                          int resourceIndex,
                                          const ResourceDesc&) const {
         return 0u;
     }

     // Return the global dispatch size (aka "workgroup count") for this step based on the draw
     // parameters. The default value is a workgroup count of (1, 1, 1)
     //
     // TODO(armansito): The only piece of information that the ComputeStep currently gets to make
     // this determination is the draw parameters. There might be other inputs to this calculation
     // for intermediate compute stages that may not be known on the CPU. One way to address this is
     // to drive the workgroup dimensions via an indirect dispatch.
     virtual WorkgroupSize calculateGlobalDispatchSize(const DrawParams&) const {
         return WorkgroupSize();
     }

     // Populates a buffer resource which was specified as "mapped". This method will only be called
     // once for a resource right after its allocation and before pipeline execution. For shared
     // resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the buffer.
     //
     // `resourceIndex` matches the order in which `resource` was enumerated by
     // `ComputeStep::resources()`.
     virtual void prepareBuffer(const DrawParams&,
                                int ssboIndex,
                                int resourceIndex,
                                const ResourceDesc& resource,
                                void* buffer,
                                size_t bufferSize) const;

     SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); }

     // Identifier that can be used as part of a unique key for a compute pipeline state object
     // associated with this `ComputeStep`.
     uint32_t uniqueID() const { return fUniqueID; }

     // Returns a debug name for the subclass implementation.
     const char* name() const { return fName.c_str(); }

     // The size of the workgroup for this ComputeStep's entry point function. This value is hardware
     // dependent. On Metal, this value should be used when invoking the dispatch API call. On all
     // other backends, this value will be baked into the pipeline.
     WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; }

     // Data flow behavior queries:
     bool outputsVertices() const { return fFlags & Flags::kOutputsVertexBuffer; }
     bool outputsIndices() const { return fFlags & Flags::kOutputsIndexBuffer; }
     bool outputsInstances() const { return fFlags & Flags::kOutputsInstanceBuffer; }
     bool writesIndirectDraw() const { return fFlags & Flags::kOutputsIndirectDrawBuffer; }

 protected:
     ComputeStep(std::string_view name,
                 WorkgroupSize localDispatchSize,
                 SkSpan<const ResourceDesc> resources);

 private:
     enum class Flags : uint8_t {
         kNone                      = 0b0000,
         kOutputsVertexBuffer       = 0b0001,
         kOutputsIndexBuffer        = 0b0010,
         kOutputsInstanceBuffer     = 0b0100,
         kOutputsIndirectDrawBuffer = 0b1000,
     };
     SK_DECL_BITMASK_OPS_FRIENDS(Flags);

     // Disallow copy and move
     ComputeStep(const ComputeStep&) = delete;
     ComputeStep(ComputeStep&&)      = delete;

     uint32_t fUniqueID;
     SkEnumBitMask<Flags> fFlags;
     std::string fName;
     std::vector<ResourceDesc> fResources;

     // TODO(b/240615224): Subclasses should simply specify the workgroup size that they need.
     // The ComputeStep constructor should check and reduce that number based on the maximum
     // supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the
     // dispatch API call. On other backends, we'll use this value to generate the right SkSL
     // workgroup size declaration to avoid any validation failures.
     WorkgroupSize fLocalDispatchSize;
 };
 SK_MAKE_BITMASK_OPS(ComputeStep::Flags);

 }  // namespace skgpu::graphite

 #endif  // skgpu_graphite_ComputeStep_DEFINED
	/*
	* Copyright 2023 Google LLC
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#ifndef skgpu_graphite_ComputeStep_DEFINED
	#define skgpu_graphite_ComputeStep_DEFINED

	#include "include/core/SkSpan.h"
	#include "src/core/SkEnumBitMask.h"
	#include "src/gpu/graphite/ComputeTypes.h"

	#include <optional>
	#include <string>
	#include <string_view>
	#include <vector>

	namespace skgpu::graphite {

	class DrawParams;
	struct ResourceBindingRequirements;

	/**
	* A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep`
	* implementation describes an invocation of a compute program and its data binding layout.
	*
	* A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including
	* geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or
	* buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via
	* "slots"), transient storage buffers/textures that are only used within an individual dispatch,
	* geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster
	* pipeline stage, as well as texture outputs.
	*
	* The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by
	* operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot
	* number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will
	* have access to the same backing resource object through that slot:
	*
	* _______________ _______________
	* \| \| \| \|
	* \| ---[Slot 0]--- \|
	* \| \| \| \|
	* \| ---[Slot 1]--- \|
	* \| ComputeStep 1 \| \| ComputeStep 2 \|
	* \| ---[Slot 2] \| \|
	* \| \| \| \|
	* \| \| [Slot 3]--- \|
	* \| \| \| \|
	* --------------- ---------------
	*
	* In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are
	* exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be
	* declared as "private" resources which are visible to a single ComputeStep.
	*
	* Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be
	* used to assign the draw buffers of a RenderStep.
	*
	* It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of
	* ComputeStep and RenderStep invocations have a compatible resource and data-flow layout.
	*/
	class ComputeStep {
	public:
	enum class DataFlow {
	// A set of writable Buffer bindings that the `ComputeStep` will write vertex and instance
	// attributes to. If present, these buffers can be used to encode the draw command for a
	// subsequent `RenderStep`.
	kVertexOutput,
	kIndexOutput,
	kInstanceOutput,
	kIndirectDrawOutput,

	// A private binding is a resource that is only visible to a single ComputeStep invocation.
	kPrivate,

	// Bindings with a slot number that can be used to forward data between a series of
	// `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be
	// shared by multiple `ComputeStep`s in a group.
	kShared,
	};

	enum class ResourceType {
	kUniformBuffer,
	kStorageBuffer,

	// TODO(b/238794438): Support sampled and storage texture types.
	};

	enum class ResourcePolicy {
	kNone,

	// The memory of the resource will be initialized to 0
	kClear,

	// The ComputeStep will be asked to initialize the memory on the CPU via
	// `ComputeStep::prepareBuffer` prior to pipeline execution. This may incur a transfer cost
	// on platforms that do not allow buffers to be mapped in shared memory.
	//
	// If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same
	// shared slot number, only the first ComputeStep in the series will receive a call to
	// `ComputeStep::prepareBuffer`.
	kMapped,
	};

	struct ResourceDesc final {
	ResourceType fType;
	DataFlow fFlow;
	ResourcePolicy fPolicy;

	// This field only has meaning (and must have a non-negative value) if `fFlow` is
	// `DataFlow::kShared`.
	int fSlot = -1;

	constexpr ResourceDesc(ResourceType type,
	DataFlow flow,
	ResourcePolicy policy,
	int slot = -1)
	: fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {}
	};

	virtual ~ComputeStep() = default;

	// Returns a complete SkSL compute program. The returned SkSL must declare all resoure bindings
	// starting at `nextBindingIndex` in the order in which they are enumerated by
	// `ComputeStep::resources()`.
	virtual std::string computeSkSL(const ResourceBindingRequirements&,
	int nextBindingIndex) const = 0;

	// This method will be called for entries in the ComputeStep's resource list to determine the
	// required allocation sizes. The ComputeStep should return the minimum allocation size for the
	// resource.
	//
	// TODO(armansito): The only piece of information that the ComputeStep currently uses to make
	// this determination is the draw parameters. This approach particularly doesn't address (and
	// likely needs to be reworked) for intermediate ComputeSteps in a chain of invocations, where
	// the effective data sizes may not be known on the CPU.
	//
	// For now, we assume that there will be a strict data contract between chained ComputeSteps.
	// The buffer sizes are an estimate based on the DrawParams. This is generic enough to allow
	// different schemes (such as dynamic allocations and buffer pools) but may not be easily
	// validated on the CPU.
	virtual size_t calculateResourceSize(const DrawParams&,
	int resourceIndex,
	const ResourceDesc&) const {
	return 0u;
	}

	// Return the global dispatch size (aka "workgroup count") for this step based on the draw
	// parameters. The default value is a workgroup count of (1, 1, 1)
	//
	// TODO(armansito): The only piece of information that the ComputeStep currently gets to make
	// this determination is the draw parameters. There might be other inputs to this calculation
	// for intermediate compute stages that may not be known on the CPU. One way to address this is
	// to drive the workgroup dimensions via an indirect dispatch.
	virtual WorkgroupSize calculateGlobalDispatchSize(const DrawParams&) const {
	return WorkgroupSize();
	}

	// Populates a buffer resource which was specified as "mapped". This method will only be called
	// once for a resource right after its allocation and before pipeline execution. For shared
	// resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the buffer.
	//
	// `resourceIndex` matches the order in which `resource` was enumerated by
	// `ComputeStep::resources()`.
	virtual void prepareBuffer(const DrawParams&,
	int ssboIndex,
	int resourceIndex,
	const ResourceDesc& resource,
	void* buffer,
	size_t bufferSize) const;

	SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); }

	// Identifier that can be used as part of a unique key for a compute pipeline state object
	// associated with this `ComputeStep`.
	uint32_t uniqueID() const { return fUniqueID; }

	// Returns a debug name for the subclass implementation.
	const char* name() const { return fName.c_str(); }

	// The size of the workgroup for this ComputeStep's entry point function. This value is hardware
	// dependent. On Metal, this value should be used when invoking the dispatch API call. On all
	// other backends, this value will be baked into the pipeline.
	WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; }

	// Data flow behavior queries:
	bool outputsVertices() const { return fFlags & Flags::kOutputsVertexBuffer; }
	bool outputsIndices() const { return fFlags & Flags::kOutputsIndexBuffer; }
	bool outputsInstances() const { return fFlags & Flags::kOutputsInstanceBuffer; }
	bool writesIndirectDraw() const { return fFlags & Flags::kOutputsIndirectDrawBuffer; }

	protected:
	ComputeStep(std::string_view name,
	WorkgroupSize localDispatchSize,
	SkSpan<const ResourceDesc> resources);

	private:
	enum class Flags : uint8_t {
	kNone = 0b0000,
	kOutputsVertexBuffer = 0b0001,
	kOutputsIndexBuffer = 0b0010,
	kOutputsInstanceBuffer = 0b0100,
	kOutputsIndirectDrawBuffer = 0b1000,
	};
	SK_DECL_BITMASK_OPS_FRIENDS(Flags);

	// Disallow copy and move
	ComputeStep(const ComputeStep&) = delete;
	ComputeStep(ComputeStep&&) = delete;

	uint32_t fUniqueID;
	SkEnumBitMask<Flags> fFlags;
	std::string fName;
	std::vector<ResourceDesc> fResources;

	// TODO(b/240615224): Subclasses should simply specify the workgroup size that they need.
	// The ComputeStep constructor should check and reduce that number based on the maximum
	// supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the
	// dispatch API call. On other backends, we'll use this value to generate the right SkSL
	// workgroup size declaration to avoid any validation failures.
	WorkgroupSize fLocalDispatchSize;
	};
	SK_MAKE_BITMASK_OPS(ComputeStep::Flags);

	} // namespace skgpu::graphite

	#endif // skgpu_graphite_ComputeStep_DEFINED