| /* |
| * Copyright 2022 Google LLC |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #include "tests/Test.h" |
| |
| #include "include/core/SkBitmap.h" |
| #include "include/gpu/graphite/Context.h" |
| #include "include/gpu/graphite/Recorder.h" |
| #include "include/gpu/graphite/Recording.h" |
| #include "src/gpu/graphite/Buffer.h" |
| #include "src/gpu/graphite/Caps.h" |
| #include "src/gpu/graphite/ComputePipelineDesc.h" |
| #include "src/gpu/graphite/ComputeTypes.h" |
| #include "src/gpu/graphite/ContextPriv.h" |
| #include "src/gpu/graphite/RecorderPriv.h" |
| #include "src/gpu/graphite/ResourceProvider.h" |
| #include "src/gpu/graphite/UniformManager.h" |
| #include "src/gpu/graphite/compute/ComputeStep.h" |
| #include "src/gpu/graphite/compute/DispatchGroup.h" |
| #include "src/gpu/graphite/task/ComputeTask.h" |
| #include "src/gpu/graphite/task/CopyTask.h" |
| #include "src/gpu/graphite/task/SynchronizeToCpuTask.h" |
| #include "src/gpu/graphite/task/UploadTask.h" |
| |
| #include "tools/graphite/GraphiteTestContext.h" |
| |
| using namespace skgpu::graphite; |
| using namespace skiatest::graphite; |
| |
| namespace { |
| |
| void* map_buffer(Context* context, |
| skiatest::graphite::GraphiteTestContext* testContext, |
| Buffer* buffer, |
| size_t offset) { |
| SkASSERT(buffer); |
| if (context->priv().caps()->bufferMapsAreAsync()) { |
| buffer->asyncMap(); |
| while (!buffer->isMapped()) { |
| testContext->tick(); |
| } |
| } |
| std::byte* ptr = static_cast<std::byte*>(buffer->map()); |
| SkASSERT(ptr); |
| |
| return ptr + offset; |
| } |
| |
| sk_sp<Buffer> sync_buffer_to_cpu(Recorder* recorder, const Buffer* buffer) { |
| if (recorder->priv().caps()->drawBufferCanBeMappedForReadback()) { |
| // `buffer` can be mapped directly, however it may still require a synchronization step |
| // by the underlying API (e.g. a managed buffer in Metal). SynchronizeToCpuTask |
| // automatically handles this for us. |
| recorder->priv().add(SynchronizeToCpuTask::Make(sk_ref_sp(buffer))); |
| return sk_ref_sp(buffer); |
| } |
| |
| // The backend requires a transfer buffer for CPU read-back |
| auto xferBuffer = |
| recorder->priv().resourceProvider()->findOrCreateBuffer(buffer->size(), |
| BufferType::kXferGpuToCpu, |
| AccessPattern::kHostVisible, |
| "ComputeTest_TransferToCpu"); |
| SkASSERT(xferBuffer); |
| |
| recorder->priv().add(CopyBufferToBufferTask::Make(buffer, |
| /*srcOffset=*/0, |
| xferBuffer, |
| /*dstOffset=*/0, |
| buffer->size())); |
| return xferBuffer; |
| } |
| |
| std::unique_ptr<Recording> submit_recording(Context* context, |
| GraphiteTestContext* testContext, |
| Recorder* recorder) { |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| return nullptr; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| return recording; |
| } |
| |
| bool is_dawn_or_metal_context_type(skiatest::GpuContextType ctxType) { |
| return skiatest::IsDawnContextType(ctxType) || skiatest::IsMetalContextType(ctxType); |
| } |
| |
| } // namespace |
| |
| #define DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS( \ |
| name, reporter, graphite_context, test_context) \ |
| DEF_GRAPHITE_TEST_FOR_CONTEXTS(name, \ |
| is_dawn_or_metal_context_type, \ |
| reporter, \ |
| graphite_context, \ |
| test_context, \ |
| CtsEnforcement::kNever) |
| |
| // TODO(b/262427430, b/262429132): Enable this test on other backends once they all support |
| // compute programs. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_SingleDispatchTest, |
| reporter, |
| context, |
| testContext) { |
| constexpr uint32_t kProblemSize = 512; |
| constexpr float kFactor = 4.f; |
| |
| // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread |
| // processes 1 vector at a time. |
| constexpr uint32_t kWorkgroupSize = kProblemSize / 4; |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers |
| // below all pack their data into vectors to be compatible with SPIR-V/WGSL. |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestArrayMultiply", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| // Input buffer: |
| { |
| // TODO(b/299979165): Declare this binding as read-only. |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kPrivate, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*sksl=*/"inputBlock {\n" |
| " float factor;\n" |
| " layout(offset=16) float4 in_data[];\n" |
| "}", |
| }, |
| // Output buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, // shared to allow us to access it from the |
| // Builder |
| /*policy=*/ResourcePolicy::kMapped, // mappable for read-back |
| /*slot=*/0, |
| /*sksl=*/"outputBlock { float4 out_data[]; }", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| // A kernel that multiplies a large array of floats by a supplied factor. |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor; |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| if (index == 0) { |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| return sizeof(float) * (kProblemSize + 4); |
| } |
| SkASSERT(index == 1); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(float) * kProblemSize; |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| // Only initialize the input buffer. |
| if (resourceIndex != 0) { |
| return; |
| } |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| |
| size_t dataCount = sizeof(float) * (kProblemSize + 4); |
| SkASSERT(bufferSize == dataCount); |
| SkSpan<float> inData(static_cast<float*>(buffer), dataCount); |
| inData[0] = kFactor; |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| inData[i + 4] = i + 1; |
| } |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| if (!builder.appendStep(&step)) { |
| ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup"); |
| return; |
| } |
| |
| // The output buffer should have been placed in the right output slot. |
| BindBufferInfo outputInfo = builder.getSharedBufferResource(0); |
| if (!outputInfo) { |
| ERRORF(reporter, "Failed to allocate an output buffer at slot 0"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| float* outData = static_cast<float*>( |
| map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset)); |
| SkASSERT(outputBuffer->isMapped() && outData != nullptr); |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| const float expected = (i + 1) * kFactor; |
| const float found = outData[i]; |
| REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found); |
| } |
| } |
| |
| // TODO(b/262427430, b/262429132): Enable this test on other backends once they all support |
| // compute programs. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_DispatchGroupTest, |
| reporter, |
| context, |
| testContext) { |
| // TODO(b/315834710): This fails on Dawn D3D11 |
| if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) { |
| return; |
| } |
| |
| constexpr uint32_t kProblemSize = 512; |
| constexpr float kFactor1 = 4.f; |
| constexpr float kFactor2 = 3.f; |
| |
| // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread |
| // processes 1 vector at a time. |
| constexpr uint32_t kWorkgroupSize = kProblemSize / 4; |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| // Define two steps that perform two multiplication passes over the same input. |
| |
| class TestComputeStep1 : public ComputeStep { |
| public: |
| // TODO(skia:40045541): SkSL doesn't support std430 layout well, so the buffers |
| // below all pack their data into vectors to be compatible with SPIR-V/WGSL. |
| TestComputeStep1() : ComputeStep( |
| /*name=*/"TestArrayMultiplyFirstPass", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| // Input buffer: |
| { |
| // TODO(b/299979165): Declare this binding as read-only. |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kPrivate, |
| /*policy=*/ResourcePolicy::kMapped, // mappable for read-back |
| /*sksl=*/"inputBlock {\n" |
| " float factor;\n" |
| " layout(offset=16) float4 in_data[];\n" |
| "}", |
| }, |
| // Output buffers: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, // GPU-only, read by second step |
| /*slot=*/0, |
| /*sksl=*/"outputBlock1 { float4 forward_data[]; }", |
| }, |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, // mappable for read-back |
| /*slot=*/1, |
| /*sksl=*/"outputBlock2 { float2 extra_data; }", |
| } |
| }) {} |
| ~TestComputeStep1() override = default; |
| |
| // A kernel that multiplies a large array of floats by a supplied factor. |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| uint idx = sk_GlobalInvocationID.x; |
| forward_data[idx] = in_data[idx] * factor; |
| if (idx == 0) { |
| extra_data.x = factor; |
| extra_data.y = 2 * factor; |
| } |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| if (index == 0) { |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| return sizeof(float) * (kProblemSize + 4); |
| } |
| if (index == 1) { |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| SkASSERT(r.fSlot == 0); |
| return sizeof(float) * kProblemSize; |
| } |
| |
| SkASSERT(index == 2); |
| SkASSERT(r.fSlot == 1); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return 2 * sizeof(float); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| if (resourceIndex != 0) { |
| return; |
| } |
| |
| size_t dataCount = sizeof(float) * (kProblemSize + 4); |
| SkASSERT(bufferSize == dataCount); |
| SkSpan<float> inData(static_cast<float*>(buffer), dataCount); |
| inData[0] = kFactor1; |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| inData[i + 4] = i + 1; |
| } |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step1; |
| |
| class TestComputeStep2 : public ComputeStep { |
| public: |
| TestComputeStep2() : ComputeStep( |
| /*name=*/"TestArrayMultiplySecondPass", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| // Input buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, // GPU-only |
| /*slot=*/0, // this is the output from the first step |
| /*sksl=*/"inputBlock { float4 in_data[]; }", |
| }, |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kPrivate, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*sksl=*/"factorBlock { float factor; }" |
| }, |
| // Output buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, // mappable for read-back |
| /*slot=*/2, |
| /*sksl=*/"outputBlock { float4 out_data[]; }", |
| } |
| }) {} |
| ~TestComputeStep2() override = default; |
| |
| // A kernel that multiplies a large array of floats by a supplied factor. |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor; |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index != 0); |
| if (index == 1) { |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| return sizeof(float) * 4; |
| } |
| SkASSERT(index == 2); |
| SkASSERT(r.fSlot == 2); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(float) * kProblemSize; |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| if (resourceIndex != 1) { |
| return; |
| } |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| *static_cast<float*>(buffer) = kFactor2; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step2; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step1); |
| builder.appendStep(&step2); |
| |
| // Slots 0, 1, and 2 should all contain shared buffers. Slot 1 contains the extra output buffer |
| // from step 1 while slot 2 contains the result of the second multiplication pass from step 1. |
| // Slot 0 is not mappable. |
| REPORTER_ASSERT(reporter, |
| std::holds_alternative<BufferView>(builder.outputTable().fSharedSlots[0]), |
| "shared resource at slot 0 is missing"); |
| BindBufferInfo outputInfo = builder.getSharedBufferResource(2); |
| if (!outputInfo) { |
| ERRORF(reporter, "Failed to allocate an output buffer at slot 0"); |
| return; |
| } |
| |
| // Extra output buffer from step 1 (corresponding to 'outputBlock2') |
| BindBufferInfo extraOutputInfo = builder.getSharedBufferResource(1); |
| if (!extraOutputInfo) { |
| ERRORF(reporter, "shared resource at slot 1 is missing"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffers get synchronized to the CPU once the GPU submission has finished. |
| auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer); |
| auto extraOutputBuffer = sync_buffer_to_cpu(recorder.get(), extraOutputInfo.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer from step 2 |
| float* outData = static_cast<float*>( |
| map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset)); |
| SkASSERT(outputBuffer->isMapped() && outData != nullptr); |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| const float expected = (i + 1) * kFactor1 * kFactor2; |
| const float found = outData[i]; |
| REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found); |
| } |
| |
| // Verify the contents of the extra output buffer from step 1 |
| float* extraOutData = static_cast<float*>( |
| map_buffer(context, testContext, extraOutputBuffer.get(), extraOutputInfo.fOffset)); |
| SkASSERT(extraOutputBuffer->isMapped() && extraOutData != nullptr); |
| REPORTER_ASSERT(reporter, |
| kFactor1 == extraOutData[0], |
| "expected '%f', found '%f'", |
| kFactor1, |
| extraOutData[0]); |
| REPORTER_ASSERT(reporter, |
| 2 * kFactor1 == extraOutData[1], |
| "expected '%f', found '%f'", |
| 2 * kFactor2, |
| extraOutData[1]); |
| } |
| |
| // TODO(b/262427430, b/262429132): Enable this test on other backends once they all support |
| // compute programs. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_UniformBufferTest, |
| reporter, |
| context, |
| testContext) { |
| // TODO(b/315834710): This fails on Dawn D3D11 |
| if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) { |
| return; |
| } |
| |
| constexpr uint32_t kProblemSize = 512; |
| constexpr float kFactor = 4.f; |
| |
| // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread |
| // processes 1 vector at a time. |
| constexpr uint32_t kWorkgroupSize = kProblemSize / 4; |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestArrayMultiply", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| // Uniform buffer: |
| { |
| /*type=*/ResourceType::kUniformBuffer, |
| /*flow=*/DataFlow::kPrivate, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*sksl=*/"uniformBlock { float factor; }" |
| }, |
| // Input buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kPrivate, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*sksl=*/"inputBlock { float4 in_data[]; }", |
| }, |
| // Output buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, // shared to allow us to access it from the |
| // Builder |
| /*policy=*/ResourcePolicy::kMapped, // mappable for read-back |
| /*slot=*/0, |
| /*sksl=*/"outputBlock { float4 out_data[]; }", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| // A kernel that multiplies a large array of floats by a supplied factor. |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor; |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| if (index == 0) { |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| return sizeof(float); |
| } |
| if (index == 1) { |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| return sizeof(float) * kProblemSize; |
| } |
| SkASSERT(index == 2); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(float) * kProblemSize; |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| // Only initialize the input storage buffer. |
| if (resourceIndex != 1) { |
| return; |
| } |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| size_t dataCount = sizeof(float) * kProblemSize; |
| SkASSERT(bufferSize == dataCount); |
| SkSpan<float> inData(static_cast<float*>(buffer), dataCount); |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| inData[i] = i + 1; |
| } |
| } |
| |
| void prepareUniformBuffer(int resourceIndex, |
| const ResourceDesc&, |
| UniformManager* mgr) const override { |
| SkASSERT(resourceIndex == 0); |
| SkDEBUGCODE( |
| const Uniform uniforms[] = {{"factor", SkSLType::kFloat}}; |
| mgr->setExpectedUniforms(uniforms); |
| ) |
| mgr->write(kFactor); |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| if (!builder.appendStep(&step)) { |
| ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup"); |
| return; |
| } |
| |
| // The output buffer should have been placed in the right output slot. |
| BindBufferInfo outputInfo = builder.getSharedBufferResource(0); |
| if (!outputInfo) { |
| ERRORF(reporter, "Failed to allocate an output buffer at slot 0"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| float* outData = static_cast<float*>( |
| map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset)); |
| SkASSERT(outputBuffer->isMapped() && outData != nullptr); |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| const float expected = (i + 1) * kFactor; |
| const float found = outData[i]; |
| REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found); |
| } |
| } |
| |
| // TODO(b/262427430, b/262429132): Enable this test on other backends once they all support |
| // compute programs. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ExternallyAssignedBuffer, |
| reporter, |
| context, |
| testContext) { |
| constexpr uint32_t kProblemSize = 512; |
| constexpr float kFactor = 4.f; |
| |
| // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread |
| // processes 1 vector at a time. |
| constexpr uint32_t kWorkgroupSize = kProblemSize / 4; |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"ExternallyAssignedBuffer", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| // Input buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kPrivate, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*sksl=*/"inputBlock {\n" |
| " float factor;\n" |
| " layout(offset = 16) float4 in_data[];\n" |
| "}\n", |
| }, |
| // Output buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, // shared to allow us to access it from the |
| // Builder |
| /*policy=*/ResourcePolicy::kMapped, // mappable for read-back |
| /*slot=*/0, |
| /*sksl=*/"outputBlock { float4 out_data[]; }", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| // A kernel that multiplies a large array of floats by a supplied factor. |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x] * factor; |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int resourceIndex, const ResourceDesc& r) const override { |
| SkASSERT(resourceIndex == 0); |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| return sizeof(float) * (kProblemSize + 4); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(resourceIndex == 0); |
| SkASSERT(r.fFlow == DataFlow::kPrivate); |
| |
| size_t dataCount = sizeof(float) * (kProblemSize + 4); |
| SkASSERT(bufferSize == dataCount); |
| SkSpan<float> inData(static_cast<float*>(buffer), dataCount); |
| inData[0] = kFactor; |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| inData[i + 4] = i + 1; |
| } |
| } |
| } step; |
| |
| // We allocate a buffer and directly assign it to the DispatchGroup::Builder. The ComputeStep |
| // will not participate in the creation of this buffer. |
| auto [_, outputInfo] = |
| recorder->priv().drawBufferManager()->getStoragePointer(sizeof(float) * kProblemSize); |
| REPORTER_ASSERT(reporter, outputInfo, "Failed to allocate output buffer"); |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.assignSharedBuffer({outputInfo, sizeof(float) * kProblemSize}, 0); |
| |
| // Initialize the step with a pre-determined global size |
| if (!builder.appendStep(&step, {WorkgroupSize(1, 1, 1)})) { |
| ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| float* outData = static_cast<float*>( |
| map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset)); |
| SkASSERT(outputBuffer->isMapped() && outData != nullptr); |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| const float expected = (i + 1) * kFactor; |
| const float found = outData[i]; |
| REPORTER_ASSERT(reporter, expected == found, "expected '%f', found '%f'", expected, found); |
| } |
| } |
| |
| // Tests the storage texture binding for a compute dispatch that writes the same color to every |
| // pixel of a storage texture. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTexture, |
| reporter, |
| context, |
| testContext) { |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same |
| // size. |
| constexpr uint32_t kDim = 16; |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestStorageTexture", |
| /*localDispatchSize=*/{kDim, kDim, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kWriteOnlyStorageTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"dst", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0)); |
| } |
| )"; |
| } |
| |
| std::tuple<SkISize, SkColorType> calculateTextureParameters( |
| int index, const ResourceDesc& r) const override { |
| return {{kDim, kDim}, kRGBA_8888_SkColorType}; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| if (!builder.appendStep(&step)) { |
| ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup"); |
| return; |
| } |
| |
| sk_sp<TextureProxy> texture = builder.getSharedTextureResource(0); |
| if (!texture) { |
| ERRORF(reporter, "Shared resource at slot 0 is missing"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| SkBitmap bitmap; |
| SkImageInfo imgInfo = |
| SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType); |
| bitmap.allocPixels(imgInfo); |
| |
| SkPixmap pixels; |
| bool peekPixelsSuccess = bitmap.peekPixels(&pixels); |
| REPORTER_ASSERT(reporter, peekPixelsSuccess); |
| |
| bool readPixelsSuccess = context->priv().readPixels(pixels, texture.get(), imgInfo, 0, 0); |
| REPORTER_ASSERT(reporter, readPixelsSuccess); |
| |
| for (uint32_t x = 0; x < kDim; ++x) { |
| for (uint32_t y = 0; y < kDim; ++y) { |
| SkColor4f expected = SkColor4f::FromColor(SK_ColorGREEN); |
| SkColor4f color = pixels.getColor4f(x, y); |
| REPORTER_ASSERT(reporter, expected == color, |
| "At position {%u, %u}, " |
| "expected {%.1f, %.1f, %.1f, %.1f}, " |
| "found {%.1f, %.1f, %.1f, %.1f}", |
| x, y, |
| expected.fR, expected.fG, expected.fB, expected.fA, |
| color.fR, color.fG, color.fB, color.fA); |
| } |
| } |
| } |
| |
| // Tests the readonly texture binding for a compute dispatch that random-access reads from a |
| // CPU-populated texture and copies it to a storage texture. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureReadAndWrite, |
| reporter, |
| context, |
| testContext) { |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same |
| // size. |
| constexpr uint32_t kDim = 16; |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestStorageTextureReadAndWrite", |
| /*localDispatchSize=*/{kDim, kDim, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kReadOnlyTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"src", |
| }, |
| { |
| /*type=*/ResourceType::kWriteOnlyStorageTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/1, |
| /*sksl=*/"dst", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| half4 color = textureRead(src, sk_LocalInvocationID.xy); |
| textureWrite(dst, sk_LocalInvocationID.xy, color); |
| } |
| )"; |
| } |
| |
| std::tuple<SkISize, SkColorType> calculateTextureParameters( |
| int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 1); |
| return {{kDim, kDim}, kRGBA_8888_SkColorType}; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step; |
| |
| // Create and populate an input texture. |
| SkBitmap srcBitmap; |
| SkImageInfo srcInfo = |
| SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType); |
| srcBitmap.allocPixels(srcInfo); |
| SkPixmap srcPixels; |
| bool srcPeekPixelsSuccess = srcBitmap.peekPixels(&srcPixels); |
| REPORTER_ASSERT(reporter, srcPeekPixelsSuccess); |
| for (uint32_t x = 0; x < kDim; ++x) { |
| for (uint32_t y = 0; y < kDim; ++y) { |
| *srcPixels.writable_addr32(x, y) = |
| SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0); |
| } |
| } |
| |
| auto texInfo = context->priv().caps()->getDefaultSampledTextureInfo(kRGBA_8888_SkColorType, |
| skgpu::Mipmapped::kNo, |
| skgpu::Protected::kNo, |
| skgpu::Renderable::kNo); |
| sk_sp<TextureProxy> srcProxy = TextureProxy::Make(context->priv().caps(), |
| recorder->priv().resourceProvider(), |
| {kDim, kDim}, |
| texInfo, |
| "ComputeTestSrcProxy", |
| skgpu::Budgeted::kNo); |
| MipLevel mipLevel; |
| mipLevel.fPixels = srcPixels.addr(); |
| mipLevel.fRowBytes = srcPixels.rowBytes(); |
| UploadInstance upload = UploadInstance::Make(recorder.get(), |
| srcProxy, |
| srcPixels.info().colorInfo(), |
| srcPixels.info().colorInfo(), |
| {mipLevel}, |
| SkIRect::MakeWH(kDim, kDim), |
| std::make_unique<ImageUploadContext>()); |
| if (!upload.isValid()) { |
| ERRORF(reporter, "Could not create UploadInstance"); |
| return; |
| } |
| recorder->priv().add(UploadTask::Make(std::move(upload))); |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| |
| // Assign the input texture to slot 0. This corresponds to the ComputeStep's "src" texture |
| // binding. |
| builder.assignSharedTexture(std::move(srcProxy), 0); |
| |
| if (!builder.appendStep(&step)) { |
| ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup"); |
| return; |
| } |
| |
| sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1); |
| if (!dst) { |
| ERRORF(reporter, "shared resource at slot 1 is missing"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| SkBitmap bitmap; |
| SkImageInfo imgInfo = |
| SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType); |
| bitmap.allocPixels(imgInfo); |
| |
| SkPixmap pixels; |
| bool peekPixelsSuccess = bitmap.peekPixels(&pixels); |
| REPORTER_ASSERT(reporter, peekPixelsSuccess); |
| |
| bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0); |
| REPORTER_ASSERT(reporter, readPixelsSuccess); |
| |
| for (uint32_t x = 0; x < kDim; ++x) { |
| for (uint32_t y = 0; y < kDim; ++y) { |
| SkColor4f expected = SkColor4f::FromBytes_RGBA( |
| SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0)); |
| SkColor4f color = pixels.getColor4f(x, y); |
| REPORTER_ASSERT(reporter, expected == color, |
| "At position {%u, %u}, " |
| "expected {%.1f, %.1f, %.1f, %.1f}, " |
| "found {%.1f, %.1f, %.1f, %.1f}", |
| x, y, |
| expected.fR, expected.fG, expected.fB, expected.fA, |
| color.fR, color.fG, color.fB, color.fA); |
| } |
| } |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ReadOnlyStorageBuffer, |
| reporter, |
| context, |
| testContext) { |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same |
| // size. |
| constexpr uint32_t kDim = 16; |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestReadOnlyStorageBuffer", |
| /*localDispatchSize=*/{kDim, kDim, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kReadOnlyStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*slot=*/0, |
| /*sksl=*/"src { uint in_data[]; }", |
| }, |
| { |
| /*type=*/ResourceType::kWriteOnlyStorageTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/1, |
| /*sksl=*/"dst", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| uint ix = sk_LocalInvocationID.y * 16 + sk_LocalInvocationID.x; |
| uint value = in_data[ix]; |
| half4 splat = half4( |
| half(value & 0xFF), |
| half((value >> 8) & 0xFF), |
| half((value >> 16) & 0xFF), |
| half((value >> 24) & 0xFF) |
| ); |
| textureWrite(dst, sk_LocalInvocationID.xy, splat / 255.0); |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| return kDim * kDim * sizeof(uint32_t); |
| } |
| |
| void prepareStorageBuffer(int index, |
| const ResourceDesc&, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(index == 0); |
| SkASSERT(bufferSize == kDim * kDim * sizeof(uint32_t)); |
| |
| uint32_t* inputs = reinterpret_cast<uint32_t*>(buffer); |
| for (uint32_t y = 0; y < kDim; ++y) { |
| for (uint32_t x = 0; x < kDim; ++x) { |
| uint32_t value = |
| ((x * 256 / kDim) & 0xFF) | ((y * 256 / kDim) & 0xFF) << 8 | 255 << 24; |
| *(inputs++) = value; |
| } |
| } |
| } |
| |
| std::tuple<SkISize, SkColorType> calculateTextureParameters( |
| int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 1); |
| return {{kDim, kDim}, kRGBA_8888_SkColorType}; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| if (!builder.appendStep(&step)) { |
| ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup"); |
| return; |
| } |
| |
| sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1); |
| if (!dst) { |
| ERRORF(reporter, "shared resource at slot 1 is missing"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| SkBitmap bitmap; |
| SkImageInfo imgInfo = |
| SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType); |
| bitmap.allocPixels(imgInfo); |
| |
| SkPixmap pixels; |
| bool peekPixelsSuccess = bitmap.peekPixels(&pixels); |
| REPORTER_ASSERT(reporter, peekPixelsSuccess); |
| |
| bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0); |
| REPORTER_ASSERT(reporter, readPixelsSuccess); |
| |
| for (uint32_t x = 0; x < kDim; ++x) { |
| for (uint32_t y = 0; y < kDim; ++y) { |
| SkColor4f expected = |
| SkColor4f::FromColor(SkColorSetARGB(255, x * 256 / kDim, y * 256 / kDim, 0)); |
| SkColor4f color = pixels.getColor4f(x, y); |
| bool pass = true; |
| for (int i = 0; i < 4; i++) { |
| pass &= color[i] == expected[i]; |
| } |
| REPORTER_ASSERT(reporter, pass, |
| "At position {%u, %u}, " |
| "expected {%.1f, %.1f, %.1f, %.1f}, " |
| "found {%.1f, %.1f, %.1f, %.1f}", |
| x, y, |
| expected.fR, expected.fG, expected.fB, expected.fA, |
| color.fR, color.fG, color.fB, color.fA); |
| } |
| } |
| } |
| |
| // Tests that a texture written by one compute step can be sampled by a subsequent step. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_StorageTextureMultipleComputeSteps, |
| reporter, |
| context, |
| testContext) { |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| // For this test we allocate a 16x16 tile which is written to by a single workgroup of the same |
| // size. |
| constexpr uint32_t kDim = 16; |
| |
| // Writes to a texture in slot 0. |
| class TestComputeStep1 : public ComputeStep { |
| public: |
| TestComputeStep1() : ComputeStep( |
| /*name=*/"TestStorageTexturesFirstPass", |
| /*localDispatchSize=*/{kDim, kDim, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kWriteOnlyStorageTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"dst", |
| } |
| }) {} |
| ~TestComputeStep1() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| textureWrite(dst, sk_LocalInvocationID.xy, half4(0.0, 1.0, 0.0, 1.0)); |
| } |
| )"; |
| } |
| |
| std::tuple<SkISize, SkColorType> calculateTextureParameters( |
| int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| return {{kDim, kDim}, kRGBA_8888_SkColorType}; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step1; |
| |
| // Reads from the texture in slot 0 and writes it to another texture in slot 1. |
| class TestComputeStep2 : public ComputeStep { |
| public: |
| TestComputeStep2() : ComputeStep( |
| /*name=*/"TestStorageTexturesSecondPass", |
| /*localDispatchSize=*/{kDim, kDim, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kReadOnlyTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"src", |
| }, |
| { |
| /*type=*/ResourceType::kWriteOnlyStorageTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/1, |
| /*sksl=*/"dst", |
| } |
| }) {} |
| ~TestComputeStep2() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| half4 color = textureRead(src, sk_LocalInvocationID.xy); |
| textureWrite(dst, sk_LocalInvocationID.xy, color); |
| } |
| )"; |
| } |
| |
| std::tuple<SkISize, SkColorType> calculateTextureParameters( |
| int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 1); |
| return {{kDim, kDim}, kRGBA_8888_SkColorType}; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step2; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step1); |
| builder.appendStep(&step2); |
| |
| sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1); |
| if (!dst) { |
| ERRORF(reporter, "shared resource at slot 1 is missing"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| SkBitmap bitmap; |
| SkImageInfo imgInfo = |
| SkImageInfo::Make(kDim, kDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType); |
| bitmap.allocPixels(imgInfo); |
| |
| SkPixmap pixels; |
| bool peekPixelsSuccess = bitmap.peekPixels(&pixels); |
| REPORTER_ASSERT(reporter, peekPixelsSuccess); |
| |
| bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0); |
| REPORTER_ASSERT(reporter, readPixelsSuccess); |
| |
| for (uint32_t x = 0; x < kDim; ++x) { |
| for (uint32_t y = 0; y < kDim; ++y) { |
| SkColor4f expected = SkColor4f::FromColor(SK_ColorGREEN); |
| SkColor4f color = pixels.getColor4f(x, y); |
| REPORTER_ASSERT(reporter, expected == color, |
| "At position {%u, %u}, " |
| "expected {%.1f, %.1f, %.1f, %.1f}, " |
| "found {%.1f, %.1f, %.1f, %.1f}", |
| x, y, |
| expected.fR, expected.fG, expected.fB, expected.fA, |
| color.fR, color.fG, color.fB, color.fA); |
| } |
| } |
| } |
| |
| // Tests that a texture can be sampled by a compute step using a sampler. |
| // TODO(armansito): Once the previous TODO is done, add additional tests that exercise mixed use of |
| // texture, buffer, and sampler bindings. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_SampledTexture, |
| reporter, |
| context, |
| testContext) { |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| // The first ComputeStep initializes a 16x16 texture with a checkerboard pattern of alternating |
| // red and black pixels. The second ComputeStep downsamples this texture into a 4x4 using |
| // bilinear filtering at pixel borders, intentionally averaging the values of each 4x4 tile in |
| // the source texture, and writes the result to the destination texture. |
| constexpr uint32_t kSrcDim = 16; |
| constexpr uint32_t kDstDim = 4; |
| |
| class TestComputeStep1 : public ComputeStep { |
| public: |
| TestComputeStep1() : ComputeStep( |
| /*name=*/"Test_SampledTexture_Init", |
| /*localDispatchSize=*/{kSrcDim, kSrcDim, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kWriteOnlyStorageTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"dst", |
| } |
| }) {} |
| ~TestComputeStep1() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| uint2 c = sk_LocalInvocationID.xy; |
| uint checkerBoardColor = (c.x + (c.y % 2)) % 2; |
| textureWrite(dst, c, half4(checkerBoardColor, 0, 0, 1)); |
| } |
| )"; |
| } |
| |
| std::tuple<SkISize, SkColorType> calculateTextureParameters( |
| int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| return {{kSrcDim, kSrcDim}, kRGBA_8888_SkColorType}; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step1; |
| |
| class TestComputeStep2 : public ComputeStep { |
| public: |
| TestComputeStep2() : ComputeStep( |
| /*name=*/"Test_SampledTexture_Sample", |
| /*localDispatchSize=*/{kDstDim, kDstDim, 1}, |
| /*resources=*/{ |
| // Declare the storage texture before the sampled texture. This tests that |
| // binding index assignment works consistently across all backends when a |
| // sampler-less texture and a texture+sampler pair are intermixed and sampler |
| // bindings aren't necessarily contiguous when the ranges are distinct. |
| { |
| /*type=*/ResourceType::kWriteOnlyStorageTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/1, |
| /*sksl=*/"dst", |
| }, |
| { |
| /*type=*/ResourceType::kSampledTexture, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"src", |
| } |
| }) {} |
| ~TestComputeStep2() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| // Normalize the 4x4 invocation indices and sample the source texture using |
| // that. |
| uint2 dstCoord = sk_LocalInvocationID.xy; |
| const float2 dstSizeInv = float2(0.25, 0.25); |
| float2 unormCoord = float2(dstCoord) * dstSizeInv; |
| |
| // Use explicit LOD, as quad derivatives are not available to a compute shader. |
| half4 color = sampleLod(src, unormCoord, 0); |
| textureWrite(dst, dstCoord, color); |
| } |
| )"; |
| } |
| |
| std::tuple<SkISize, SkColorType> calculateTextureParameters( |
| int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0 || index == 1); |
| return {{kDstDim, kDstDim}, kRGBA_8888_SkColorType}; |
| } |
| |
| SamplerDesc calculateSamplerParameters(int index, const ResourceDesc&) const override { |
| SkASSERT(index == 1); |
| // Use the repeat tile mode to sample an infinite checkerboard. |
| constexpr SkTileMode kTileModes[2] = {SkTileMode::kRepeat, SkTileMode::kRepeat}; |
| return {SkFilterMode::kLinear, kTileModes}; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step2; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step1); |
| builder.appendStep(&step2); |
| |
| sk_sp<TextureProxy> dst = builder.getSharedTextureResource(1); |
| if (!dst) { |
| ERRORF(reporter, "shared resource at slot 1 is missing"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| SkBitmap bitmap; |
| SkImageInfo imgInfo = |
| SkImageInfo::Make(kDstDim, kDstDim, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType); |
| bitmap.allocPixels(imgInfo); |
| |
| SkPixmap pixels; |
| bool peekPixelsSuccess = bitmap.peekPixels(&pixels); |
| REPORTER_ASSERT(reporter, peekPixelsSuccess); |
| |
| bool readPixelsSuccess = context->priv().readPixels(pixels, dst.get(), imgInfo, 0, 0); |
| REPORTER_ASSERT(reporter, readPixelsSuccess); |
| |
| for (uint32_t x = 0; x < kDstDim; ++x) { |
| for (uint32_t y = 0; y < kDstDim; ++y) { |
| SkColor4f color = pixels.getColor4f(x, y); |
| REPORTER_ASSERT(reporter, color.fR > 0.49 && color.fR < 0.51, |
| "At position {%u, %u}, " |
| "expected red channel in range [0.49, 0.51], " |
| "found {%.3f}", |
| x, y, color.fR); |
| } |
| } |
| } |
| |
| // TODO(b/260622403): The shader tested here is identical to |
| // `resources/sksl/compute/AtomicsOperations.compute`. It would be nice to be able to exercise SkSL |
| // features like this as part of SkSLTest.cpp instead of as a graphite test. |
| // TODO(b/262427430, b/262429132): Enable this test on other backends once they all support |
| // compute programs. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_AtomicOperationsTest, |
| reporter, |
| context, |
| testContext) { |
| // This fails on Dawn D3D11, b/315834710 |
| if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) { |
| return; |
| } |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| constexpr uint32_t kWorkgroupCount = 32; |
| constexpr uint32_t kWorkgroupSize = 256; |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestAtomicOperations", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*slot=*/0, |
| /*sksl=*/"ssbo { atomicUint globalCounter; }", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| // A kernel that increments a global (device memory) counter across multiple workgroups. |
| // Each workgroup maintains its own independent tally in a workgroup-shared counter which |
| // is then added to the global count. |
| // |
| // This exercises atomic store/load/add and coherent reads and writes over memory in storage |
| // and workgroup address spaces. |
| std::string computeSkSL() const override { |
| return R"( |
| workgroup atomicUint localCounter; |
| |
| void main() { |
| // Initialize the local counter. |
| if (sk_LocalInvocationID.x == 0) { |
| atomicStore(localCounter, 0); |
| } |
| |
| // Synchronize the threads in the workgroup so they all see the initial value. |
| workgroupBarrier(); |
| |
| // All threads increment the counter. |
| atomicAdd(localCounter, 1); |
| |
| // Synchronize the threads again to ensure they have all executed the increment |
| // and the following load reads the same value across all threads in the |
| // workgroup. |
| workgroupBarrier(); |
| |
| // Add the workgroup-only tally to the global counter. |
| if (sk_LocalInvocationID.x == 0) { |
| atomicAdd(globalCounter, atomicLoad(localCounter)); |
| } |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(uint32_t); |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(kWorkgroupCount, 1, 1); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(resourceIndex == 0); |
| *static_cast<uint32_t*>(buffer) = 0; |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step); |
| |
| BindBufferInfo info = builder.getSharedBufferResource(0); |
| if (!info) { |
| ERRORF(reporter, "shared resource at slot 0 is missing"); |
| return; |
| } |
| |
| // Record the compute pass task. |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize; |
| const uint32_t result = static_cast<const uint32_t*>( |
| map_buffer(context, testContext, buffer.get(), info.fOffset))[0]; |
| REPORTER_ASSERT(reporter, |
| result == kExpectedCount, |
| "expected '%u', found '%u'", |
| kExpectedCount, |
| result); |
| } |
| |
| // TODO(b/260622403): The shader tested here is identical to |
| // `resources/sksl/compute/AtomicsOperationsOverArrayAndStruct.compute`. It would be nice to be able |
| // to exercise SkSL features like this as part of SkSLTest.cpp instead of as a graphite test. |
| // TODO(b/262427430, b/262429132): Enable this test on other backends once they all support |
| // compute programs. |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_AtomicOperationsOverArrayAndStructTest, |
| reporter, |
| context, |
| testContext) { |
| // This fails on Dawn D3D11, b/315834710 |
| if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) { |
| return; |
| } |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| constexpr uint32_t kWorkgroupCount = 32; |
| constexpr uint32_t kWorkgroupSize = 256; |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestAtomicOperationsOverArrayAndStruct", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*slot=*/0, |
| /*sksl=*/"ssbo {\n" |
| " atomicUint globalCountsFirstHalf;\n" |
| " atomicUint globalCountsSecondHalf;\n" |
| "}\n" |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| // Construct a kernel that increments a two global (device memory) counters across multiple |
| // workgroups. Each workgroup maintains its own independent tallies in workgroup-shared |
| // counters which are then added to the global counts. |
| // |
| // This exercises atomic store/load/add and coherent reads and writes over memory in storage |
| // and workgroup address spaces. |
| std::string computeSkSL() const override { |
| return R"( |
| const uint WORKGROUP_SIZE = 256; |
| |
| workgroup atomicUint localCounts[2]; |
| |
| void main() { |
| // Initialize the local counts. |
| if (sk_LocalInvocationID.x == 0) { |
| atomicStore(localCounts[0], 0); |
| atomicStore(localCounts[1], 0); |
| } |
| |
| // Synchronize the threads in the workgroup so they all see the initial value. |
| workgroupBarrier(); |
| |
| // Each thread increments one of the local counters based on its invocation |
| // index. |
| uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1; |
| atomicAdd(localCounts[idx], 1); |
| |
| // Synchronize the threads again to ensure they have all executed the increments |
| // and the following load reads the same value across all threads in the |
| // workgroup. |
| workgroupBarrier(); |
| |
| // Add the workgroup-only tally to the global counter. |
| if (sk_LocalInvocationID.x == 0) { |
| atomicAdd(globalCountsFirstHalf, atomicLoad(localCounts[0])); |
| atomicAdd(globalCountsSecondHalf, atomicLoad(localCounts[1])); |
| } |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return 2 * sizeof(uint32_t); |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(kWorkgroupCount, 1, 1); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(resourceIndex == 0); |
| uint32_t* data = static_cast<uint32_t*>(buffer); |
| data[0] = 0; |
| data[1] = 0; |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step); |
| |
| BindBufferInfo info = builder.getSharedBufferResource(0); |
| if (!info) { |
| ERRORF(reporter, "shared resource at slot 0 is missing"); |
| return; |
| } |
| |
| // Record the compute pass task. |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize / 2; |
| |
| const uint32_t* ssboData = static_cast<const uint32_t*>( |
| map_buffer(context, testContext, buffer.get(), info.fOffset)); |
| const uint32_t firstHalfCount = ssboData[0]; |
| const uint32_t secondHalfCount = ssboData[1]; |
| REPORTER_ASSERT(reporter, |
| firstHalfCount == kExpectedCount, |
| "expected '%u', found '%u'", |
| kExpectedCount, |
| firstHalfCount); |
| REPORTER_ASSERT(reporter, |
| secondHalfCount == kExpectedCount, |
| "expected '%u', found '%u'", |
| kExpectedCount, |
| secondHalfCount); |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearedBuffer, |
| reporter, |
| context, |
| testContext) { |
| constexpr uint32_t kProblemSize = 512; |
| |
| // The ComputeStep packs kProblemSize floats into kProblemSize / 4 vectors and each thread |
| // processes 1 vector at a time. |
| constexpr uint32_t kWorkgroupSize = kProblemSize / 4; |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| // The ComputeStep requests an unmapped buffer that is zero-initialized. It writes the output to |
| // a mapped buffer which test verifies. |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestClearedBuffer", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| // Zero initialized input buffer |
| { |
| // TODO(b/299979165): Declare this binding as read-only. |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kPrivate, |
| /*policy=*/ResourcePolicy::kClear, |
| /*sksl=*/"inputBlock { uint4 in_data[]; }\n", |
| }, |
| // Output buffer: |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, // shared to allow us to access it from the |
| // Builder |
| /*policy=*/ResourcePolicy::kMapped, // mappable for read-back |
| /*slot=*/0, |
| /*sksl=*/"outputBlock { uint4 out_data[]; }\n", |
| } |
| }) {} |
| ~TestComputeStep() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x]; |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| return sizeof(uint32_t) * kProblemSize; |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| // Should receive this call only for the mapped buffer. |
| SkASSERT(resourceIndex == 1); |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| if (!builder.appendStep(&step)) { |
| ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup"); |
| return; |
| } |
| |
| // The output buffer should have been placed in the right output slot. |
| BindBufferInfo outputInfo = builder.getSharedBufferResource(0); |
| if (!outputInfo) { |
| ERRORF(reporter, "Failed to allocate an output buffer at slot 0"); |
| return; |
| } |
| |
| // Record the compute task |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto outputBuffer = sync_buffer_to_cpu(recorder.get(), outputInfo.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| uint32_t* outData = static_cast<uint32_t*>( |
| map_buffer(context, testContext, outputBuffer.get(), outputInfo.fOffset)); |
| SkASSERT(outputBuffer->isMapped() && outData != nullptr); |
| for (unsigned int i = 0; i < kProblemSize; ++i) { |
| const uint32_t found = outData[i]; |
| REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found); |
| } |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearOrdering, |
| reporter, |
| context, |
| testContext) { |
| // Initiate two independent DispatchGroups operating on the same buffer. The first group |
| // writes garbage to the buffer and the second group copies the contents to an output buffer. |
| // This test validates that the reads, writes, and clear occur in the expected order. |
| constexpr uint32_t kWorkgroupSize = 64; |
| |
| // Initialize buffer with non-zero data. |
| class FillWithGarbage : public ComputeStep { |
| public: |
| FillWithGarbage() : ComputeStep( |
| /*name=*/"FillWithGarbage", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"outputBlock { uint4 out_data[]; }\n", |
| } |
| }) {} |
| ~FillWithGarbage() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = uint4(0xFE); |
| } |
| )"; |
| } |
| } garbageStep; |
| |
| // Second stage just copies the data to a destination buffer. This is only to verify that this |
| // stage, issued in a separate DispatchGroup, observes the clear. |
| class CopyBuffer : public ComputeStep { |
| public: |
| CopyBuffer() : ComputeStep( |
| /*name=*/"CopyBuffer", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"inputBlock { uint4 in_data[]; }\n", |
| }, |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/1, |
| /*sksl=*/"outputBlock { uint4 out_data[]; }\n", |
| } |
| }) {} |
| ~CopyBuffer() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x]; |
| } |
| )"; |
| } |
| } copyStep; |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| DispatchGroup::Builder builder(recorder.get()); |
| |
| constexpr size_t kElementCount = 4 * kWorkgroupSize; |
| constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount; |
| auto input = recorder->priv().drawBufferManager()->getStorage(kBufferSize); |
| auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize); |
| |
| ComputeTask::DispatchGroupList groups; |
| |
| // First group. |
| builder.assignSharedBuffer({input, kBufferSize}, 0); |
| builder.appendStep(&garbageStep, {{1, 1, 1}}); |
| groups.push_back(builder.finalize()); |
| |
| // Second group. |
| builder.reset(); |
| builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes); |
| builder.assignSharedBuffer({output, kBufferSize}, 1); |
| builder.appendStep(©Step, {{1, 1, 1}}); |
| groups.push_back(builder.finalize()); |
| |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get()); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| // Verify the contents of the output buffer. |
| uint32_t* outData = static_cast<uint32_t*>( |
| map_buffer(context, testContext, outputBuffer.get(), output.fOffset)); |
| SkASSERT(outputBuffer->isMapped() && outData != nullptr); |
| for (unsigned int i = 0; i < kElementCount; ++i) { |
| const uint32_t found = outData[i]; |
| REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found); |
| } |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_ClearOrderingScratchBuffers, |
| reporter, |
| context, |
| testContext) { |
| // This test is the same as the ClearOrdering test but the two stages write to a recycled |
| // ScratchBuffer. This is primarily to test ScratchBuffer reuse. |
| constexpr uint32_t kWorkgroupSize = 64; |
| |
| // Initialize buffer with non-zero data. |
| class FillWithGarbage : public ComputeStep { |
| public: |
| FillWithGarbage() : ComputeStep( |
| /*name=*/"FillWithGarbage", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"outputBlock { uint4 out_data[]; }\n", |
| } |
| }) {} |
| ~FillWithGarbage() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = uint4(0xFE); |
| } |
| )"; |
| } |
| } garbageStep; |
| |
| // Second stage just copies the data to a destination buffer. This is only to verify that this |
| // stage (issued in a separate DispatchGroup) sees the changes. |
| class CopyBuffer : public ComputeStep { |
| public: |
| CopyBuffer() : ComputeStep( |
| /*name=*/"CopyBuffer", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/0, |
| /*sksl=*/"inputBlock { uint4 in_data[]; }\n", |
| }, |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kNone, |
| /*slot=*/1, |
| /*sksl=*/"outputBlock { uint4 out_data[]; }\n", |
| } |
| }) {} |
| ~CopyBuffer() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| void main() { |
| out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x]; |
| } |
| )"; |
| } |
| } copyStep; |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| DispatchGroup::Builder builder(recorder.get()); |
| |
| constexpr size_t kElementCount = 4 * kWorkgroupSize; |
| constexpr size_t kBufferSize = sizeof(uint32_t) * kElementCount; |
| auto [_, output] = recorder->priv().drawBufferManager()->getStoragePointer(kBufferSize); |
| |
| ComputeTask::DispatchGroupList groups; |
| |
| // First group. |
| { |
| auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize); |
| auto input = scratch.suballocate(kBufferSize); |
| builder.assignSharedBuffer({input, kBufferSize}, 0); |
| |
| // `scratch` returns to the scratch buffer pool when it goes out of scope |
| } |
| builder.appendStep(&garbageStep, {{1, 1, 1}}); |
| groups.push_back(builder.finalize()); |
| |
| // Second group. |
| builder.reset(); |
| { |
| auto scratch = recorder->priv().drawBufferManager()->getScratchStorage(kBufferSize); |
| auto input = scratch.suballocate(kBufferSize); |
| builder.assignSharedBuffer({input, kBufferSize}, 0, ClearBuffer::kYes); |
| } |
| builder.assignSharedBuffer({output, kBufferSize}, 1); |
| builder.appendStep(©Step, {{1, 1, 1}}); |
| groups.push_back(builder.finalize()); |
| |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto outputBuffer = sync_buffer_to_cpu(recorder.get(), output.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = submit_recording(context, testContext, recorder.get()); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| // Verify the contents of the output buffer. |
| uint32_t* outData = static_cast<uint32_t*>( |
| map_buffer(context, testContext, outputBuffer.get(), output.fOffset)); |
| SkASSERT(outputBuffer->isMapped() && outData != nullptr); |
| for (unsigned int i = 0; i < kElementCount; ++i) { |
| const uint32_t found = outData[i]; |
| REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found); |
| } |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_DAWN_AND_METAL_CONTEXTS(Compute_IndirectDispatch, |
| reporter, |
| context, |
| testContext) { |
| // This fails on Dawn D3D11, b/315834710 |
| if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) { |
| return; |
| } |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| constexpr uint32_t kWorkgroupCount = 32; |
| constexpr uint32_t kWorkgroupSize = 64; |
| |
| // `IndirectStep` populates a buffer with the global workgroup count for `CountStep`. |
| // `CountStep` is recorded using `DispatchGroup::appendStepIndirect()` and its workgroups get |
| // dispatched according to the values computed by `IndirectStep` on the GPU. |
| class IndirectStep : public ComputeStep { |
| public: |
| IndirectStep() |
| : ComputeStep( |
| /*name=*/"TestIndirectDispatch_IndirectStep", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/ |
| {{ |
| /*type=*/ResourceType::kIndirectBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kClear, |
| /*slot=*/0, |
| // TODO(armansito): Ideally the SSBO would have a single member of |
| // type `IndirectDispatchArgs` struct type. SkSL modules don't |
| // support struct declarations so this is currently not possible. |
| /*sksl=*/"ssbo { uint indirect[]; }", |
| }}) {} |
| ~IndirectStep() override = default; |
| |
| // Kernel that specifies a workgroup size of `kWorkgroupCount` to be used by the indirect |
| // dispatch. |
| std::string computeSkSL() const override { |
| return R"( |
| // This needs to match `kWorkgroupCount` declared above. |
| const uint kWorkgroupCount = 32; |
| |
| void main() { |
| if (sk_LocalInvocationID.x == 0) { |
| indirect[0] = kWorkgroupCount; |
| indirect[1] = 1; |
| indirect[2] = 1; |
| } |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return kIndirectDispatchArgumentSize; |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(1, 1, 1); |
| } |
| } indirectStep; |
| |
| class CountStep : public ComputeStep { |
| public: |
| CountStep() |
| : ComputeStep( |
| /*name=*/"TestIndirectDispatch_CountStep", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/ |
| {{ |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*slot=*/1, |
| /*sksl=*/"ssbo { atomicUint globalCounter; }", |
| }}) {} |
| ~CountStep() override = default; |
| |
| std::string computeSkSL() const override { |
| return R"( |
| workgroup atomicUint localCounter; |
| |
| void main() { |
| // Initialize the local counter. |
| if (sk_LocalInvocationID.x == 0) { |
| atomicStore(localCounter, 0); |
| } |
| |
| // Synchronize the threads in the workgroup so they all see the initial value. |
| workgroupBarrier(); |
| |
| // All threads increment the counter. |
| atomicAdd(localCounter, 1); |
| |
| // Synchronize the threads again to ensure they have all executed the increment |
| // and the following load reads the same value across all threads in the |
| // workgroup. |
| workgroupBarrier(); |
| |
| // Add the workgroup-only tally to the global counter. |
| if (sk_LocalInvocationID.x == 0) { |
| atomicAdd(globalCounter, atomicLoad(localCounter)); |
| } |
| } |
| )"; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| SkASSERT(r.fSlot == 1); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(uint32_t); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(resourceIndex == 0); |
| *static_cast<uint32_t*>(buffer) = 0; |
| } |
| } countStep; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&indirectStep); |
| BindBufferInfo indirectBufferInfo = builder.getSharedBufferResource(0); |
| if (!indirectBufferInfo) { |
| ERRORF(reporter, "Shared resource at slot 0 is missing"); |
| return; |
| } |
| builder.appendStepIndirect(&countStep, {indirectBufferInfo, kIndirectDispatchArgumentSize}); |
| |
| BindBufferInfo info = builder.getSharedBufferResource(1); |
| if (!info) { |
| ERRORF(reporter, "Shared resource at slot 1 is missing"); |
| return; |
| } |
| |
| // Record the compute pass task. |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize; |
| const uint32_t result = static_cast<const uint32_t*>( |
| map_buffer(context, testContext, buffer.get(), info.fOffset))[0]; |
| REPORTER_ASSERT(reporter, |
| result == kExpectedCount, |
| "expected '%u', found '%u'", |
| kExpectedCount, |
| result); |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_NativeShaderSourceMetal, |
| reporter, |
| context, |
| testContext) { |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| constexpr uint32_t kWorkgroupCount = 32; |
| constexpr uint32_t kWorkgroupSize = 1024; |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestAtomicOperationsMetal", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*slot=*/0, |
| } |
| }, |
| /*workgroupBuffers=*/{}, |
| /*baseFlags=*/Flags::kSupportsNativeShader) {} |
| ~TestComputeStep() override = default; |
| |
| NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override { |
| SkASSERT(format == NativeShaderFormat::kMSL); |
| static constexpr std::string_view kSource = R"( |
| #include <metal_stdlib> |
| |
| using namespace metal; |
| |
| kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]], |
| device atomic_uint& globalCounter [[buffer(0)]]) { |
| threadgroup atomic_uint localCounter; |
| |
| // Initialize the local counter. |
| if (localId.x == 0u) { |
| atomic_store_explicit(&localCounter, 0u, memory_order_relaxed); |
| } |
| |
| // Synchronize the threads in the workgroup so they all see the initial value. |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // All threads increment the counter. |
| atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed); |
| |
| // Synchronize the threads again to ensure they have all executed the increment |
| // and the following load reads the same value across all threads in the |
| // workgroup. |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Add the workgroup-only tally to the global counter. |
| if (localId.x == 0u) { |
| uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed); |
| atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed); |
| } |
| } |
| )"; |
| return {kSource, "atomicCount"}; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(uint32_t); |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(kWorkgroupCount, 1, 1); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(resourceIndex == 0); |
| *static_cast<uint32_t*>(buffer) = 0; |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step); |
| |
| BindBufferInfo info = builder.getSharedBufferResource(0); |
| if (!info) { |
| ERRORF(reporter, "shared resource at slot 0 is missing"); |
| return; |
| } |
| |
| // Record the compute pass task. |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize; |
| const uint32_t result = static_cast<const uint32_t*>( |
| map_buffer(context, testContext, buffer.get(), info.fOffset))[0]; |
| REPORTER_ASSERT(reporter, |
| result == kExpectedCount, |
| "expected '%u', found '%u'", |
| kExpectedCount, |
| result); |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_WorkgroupBufferDescMetal, |
| reporter, |
| context, |
| testContext) { |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| constexpr uint32_t kWorkgroupCount = 32; |
| constexpr uint32_t kWorkgroupSize = 1024; |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestAtomicOperationsMetal", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*slot=*/0, |
| } |
| }, |
| /*workgroupBuffers=*/{ |
| { |
| /*size=*/sizeof(uint32_t), |
| /*index=*/0u, |
| } |
| }, |
| /*baseFlags=*/Flags::kSupportsNativeShader) {} |
| ~TestComputeStep() override = default; |
| |
| // This is the same MSL kernel as in Compute_NativeShaderSourceMetal, except `localCounter` |
| // is an entry-point parameter instead of a local variable. This forces the workgroup |
| // binding to be encoded explicitly in the command encoder. |
| NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override { |
| SkASSERT(format == NativeShaderFormat::kMSL); |
| static constexpr std::string_view kSource = R"( |
| #include <metal_stdlib> |
| |
| using namespace metal; |
| |
| kernel void atomicCount(uint3 localId [[thread_position_in_threadgroup]], |
| device atomic_uint& globalCounter [[buffer(0)]], |
| threadgroup atomic_uint& localCounter [[threadgroup(0)]]) { |
| // Initialize the local counter. |
| if (localId.x == 0u) { |
| atomic_store_explicit(&localCounter, 0u, memory_order_relaxed); |
| } |
| |
| // Synchronize the threads in the workgroup so they all see the initial value. |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // All threads increment the counter. |
| atomic_fetch_add_explicit(&localCounter, 1u, memory_order_relaxed); |
| |
| // Synchronize the threads again to ensure they have all executed the increment |
| // and the following load reads the same value across all threads in the |
| // workgroup. |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Add the workgroup-only tally to the global counter. |
| if (localId.x == 0u) { |
| uint tally = atomic_load_explicit(&localCounter, memory_order_relaxed); |
| atomic_fetch_add_explicit(&globalCounter, tally, memory_order_relaxed); |
| } |
| } |
| )"; |
| return {kSource, "atomicCount"}; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(uint32_t); |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(kWorkgroupCount, 1, 1); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(resourceIndex == 0); |
| *static_cast<uint32_t*>(buffer) = 0; |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step); |
| |
| BindBufferInfo info = builder.getSharedBufferResource(0); |
| if (!info) { |
| ERRORF(reporter, "shared resource at slot 0 is missing"); |
| return; |
| } |
| |
| // Record the compute pass task. |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize; |
| const uint32_t result = static_cast<const uint32_t*>( |
| map_buffer(context, testContext, buffer.get(), info.fOffset))[0]; |
| REPORTER_ASSERT(reporter, |
| result == kExpectedCount, |
| "expected '%u', found '%u'", |
| kExpectedCount, |
| result); |
| } |
| |
| DEF_GRAPHITE_TEST_FOR_DAWN_CONTEXT(Compute_NativeShaderSourceWGSL, reporter, context, testContext) { |
| // This fails on Dawn D3D11, b/315834710 |
| if (testContext->contextType() == skgpu::ContextType::kDawn_D3D11) { |
| return; |
| } |
| |
| std::unique_ptr<Recorder> recorder = context->makeRecorder(); |
| |
| constexpr uint32_t kWorkgroupCount = 32; |
| constexpr uint32_t kWorkgroupSize = 256; // The WebGPU default workgroup size limit is 256 |
| |
| class TestComputeStep : public ComputeStep { |
| public: |
| TestComputeStep() : ComputeStep( |
| /*name=*/"TestAtomicOperationsWGSL", |
| /*localDispatchSize=*/{kWorkgroupSize, 1, 1}, |
| /*resources=*/{ |
| { |
| /*type=*/ResourceType::kStorageBuffer, |
| /*flow=*/DataFlow::kShared, |
| /*policy=*/ResourcePolicy::kMapped, |
| /*slot=*/0, |
| } |
| }, |
| /*workgroupBuffers=*/{}, |
| /*baseFlags=*/Flags::kSupportsNativeShader) {} |
| ~TestComputeStep() override = default; |
| |
| NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override { |
| SkASSERT(format == NativeShaderFormat::kWGSL); |
| static constexpr std::string_view kSource = R"( |
| @group(0) @binding(0) var<storage, read_write> globalCounter: atomic<u32>; |
| |
| var<workgroup> localCounter: atomic<u32>; |
| |
| @compute @workgroup_size(256) |
| fn atomicCount(@builtin(local_invocation_id) localId: vec3u) { |
| // Initialize the local counter. |
| if localId.x == 0u { |
| atomicStore(&localCounter, 0u); |
| } |
| |
| // Synchronize the threads in the workgroup so they all see the initial value. |
| workgroupBarrier(); |
| |
| // All threads increment the counter. |
| atomicAdd(&localCounter, 1u); |
| |
| // Synchronize the threads again to ensure they have all executed the increment |
| // and the following load reads the same value across all threads in the |
| // workgroup. |
| workgroupBarrier(); |
| |
| // Add the workgroup-only tally to the global counter. |
| if localId.x == 0u { |
| let tally = atomicLoad(&localCounter); |
| atomicAdd(&globalCounter, tally); |
| } |
| } |
| )"; |
| return {kSource, "atomicCount"}; |
| } |
| |
| size_t calculateBufferSize(int index, const ResourceDesc& r) const override { |
| SkASSERT(index == 0); |
| SkASSERT(r.fSlot == 0); |
| SkASSERT(r.fFlow == DataFlow::kShared); |
| return sizeof(uint32_t); |
| } |
| |
| WorkgroupSize calculateGlobalDispatchSize() const override { |
| return WorkgroupSize(kWorkgroupCount, 1, 1); |
| } |
| |
| void prepareStorageBuffer(int resourceIndex, |
| const ResourceDesc& r, |
| void* buffer, |
| size_t bufferSize) const override { |
| SkASSERT(resourceIndex == 0); |
| *static_cast<uint32_t*>(buffer) = 0; |
| } |
| } step; |
| |
| DispatchGroup::Builder builder(recorder.get()); |
| builder.appendStep(&step); |
| |
| BindBufferInfo info = builder.getSharedBufferResource(0); |
| if (!info) { |
| ERRORF(reporter, "shared resource at slot 0 is missing"); |
| return; |
| } |
| |
| // Record the compute pass task. |
| ComputeTask::DispatchGroupList groups; |
| groups.push_back(builder.finalize()); |
| recorder->priv().add(ComputeTask::Make(std::move(groups))); |
| |
| // Ensure the output buffer is synchronized to the CPU once the GPU submission has finished. |
| auto buffer = sync_buffer_to_cpu(recorder.get(), info.fBuffer); |
| |
| // Submit the work and wait for it to complete. |
| std::unique_ptr<Recording> recording = recorder->snap(); |
| if (!recording) { |
| ERRORF(reporter, "Failed to make recording"); |
| return; |
| } |
| |
| InsertRecordingInfo insertInfo; |
| insertInfo.fRecording = recording.get(); |
| context->insertRecording(insertInfo); |
| testContext->syncedSubmit(context); |
| |
| // Verify the contents of the output buffer. |
| constexpr uint32_t kExpectedCount = kWorkgroupCount * kWorkgroupSize; |
| const uint32_t result = static_cast<const uint32_t*>( |
| map_buffer(context, testContext, buffer.get(), info.fOffset))[0]; |
| REPORTER_ASSERT(reporter, |
| result == kExpectedCount, |
| "expected '%u', found '%u'", |
| kExpectedCount, |
| result); |
| } |