src/gpu/graphite/DrawList.cpp - skia - Git at Google

 /*
  * Copyright 2021 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 #include "src/gpu/graphite/DrawList.h"

 #include "include/core/SkTypes.h"
 #include "include/gpu/graphite/Recorder.h"
 #include "src/core/SkTraceEvent.h"
 #include "src/gpu/graphite/DrawPass.h"
 #include "src/gpu/graphite/DrawWriter.h"
 #include "src/gpu/graphite/KeyContext.h"
 #include "src/gpu/graphite/RecorderPriv.h"
 #include "src/gpu/graphite/Renderer.h"
 #include "src/gpu/graphite/geom/Geometry.h"

 namespace skgpu::graphite {

 std::pair<DrawParams*, Layer*> DrawList::recordDraw(
         const Renderer* renderer,
         const Transform& localToDevice,
         const Geometry& geometry,
         const Clip& clip,
         DrawOrder ordering,
         UniquePaintParamsID paintID,
         SkEnumBitMask<DstUsage> dstUsage,
         BarrierType barrierBeforeDraws,
         PipelineDataGatherer* gatherer,
         const StrokeStyle* stroke,
         const Layer* latestDepthLayer) {

     SkASSERT(localToDevice.valid());
     SkASSERT(!geometry.isEmpty() && !clip.drawBounds().isEmptyNegativeOrNaN());
     SkASSERT(!(renderer->depthStencilFlags() & DepthStencilFlags::kStencil) ||
              ordering.stencilIndex() != DrawOrder::kUnassigned);

     // TODO: Add validation that the renderer's expected shape type and stroke params match provided
     const Draw& draw = fDraws.emplace_back(renderer,
                                            this->deduplicateTransform(localToDevice),
                                            geometry,
                                            clip,
                                            ordering,
                                            barrierBeforeDraws,
                                            stroke);

     fRenderStepCount += renderer->numRenderSteps();
     // Create a sort key for every render step in this draw
     for (int stepIndex = 0; stepIndex < draw.renderer()->numRenderSteps(); ++stepIndex) {
         const RenderStep* const step = draw.renderer()->steps()[stepIndex];
         gatherer->markOffsetAndAlign(step->performsShading(), step->uniformAlignment());

         GraphicsPipelineCache::Index pipelineIndex = fPipelineCache.insert(
                 { step->renderStepID(), step->performsShading() ?
                                         paintID : UniquePaintParamsID::Invalid()});

         step->writeUniformsAndTextures(draw.drawParams(), gatherer);

         auto [combinedUniforms, combinedTextures] =
                 gatherer->endCombinedData(step->performsShading());

         UniformDataCache::Index uniformIndex = combinedUniforms ?
                 fUniformDataCache.insert(combinedUniforms) : UniformDataCache::kInvalidIndex;
         TextureDataCache::Index textureBindingIndex = combinedTextures ?
                 fTextureDataCache.insert(combinedTextures) : TextureDataCache::kInvalidIndex;

         fSortKeys.push_back({&draw, stepIndex, pipelineIndex, uniformIndex, textureBindingIndex});
         gatherer->rewindForRenderStep();
     }

     fPassBounds.join(clip.drawBounds());
     fRequiresMSAA |= renderer->requiresMSAA();
     fDepthStencilFlags |= renderer->depthStencilFlags();
     if (dstUsage & DstUsage::kDstReadRequired) {
         // For paints that read from the dst, update the bounds. It may later be determined that the
         // DstReadStrategy does not require them, but they are inexpensive to track.
         fDstReadBounds.join(clip.drawBounds());
     }

 #if defined(SK_DEBUG)
     if (geometry.isCoverageMaskShape()) {
         fCoverageMaskShapeDrawCount++;
     }
 #endif

     return {nullptr, nullptr};
 }

 std::unique_ptr<DrawPass> DrawList::snapDrawPass(Recorder* recorder,
                                                  sk_sp<TextureProxy> target,
                                                  const SkImageInfo& targetInfo,
                                                  DstReadStrategy dstReadStrategy) {
     // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
     // its size should be done with care and good reason. The performance of sorting the keys is
     // heavily tied to the total size.
     //
     // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
     // 16 bytes. There are several ways this could be done if necessary:
     //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
     //    the uniform data => 8 bytes of key, 8 bytes of pointer.
     //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
     //    4 low bits from the Draw* pointer since it's 16 byte aligned.
     //  - Compact the Draw* to an index into the original collection, although that has extra
     //    indirection and does not work as well with SkTBlockList.
     // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
     // than an 8 byte key and unmodified pointer.
     static_assert(sizeof(SortKey) == SkAlignTo(16 + sizeof(void*), alignof(SortKey)));

     // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
     // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
     // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
     // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
     // vs. algorithms that require an extra O(n) storage.
     // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
     // bugs in the DrawOrder determination code?
     std::sort(fSortKeys.begin(), fSortKeys.end());

     TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", fDraws.count());

     // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
     // is returned from Make(), it is considered immutable.
     std::unique_ptr<DrawPass> drawPass(new DrawPass(target, {fLoadOp, StoreOp::kStore}, fClearColor,
                                                     recorder->priv().refFloatStorageManager()));

     DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
     DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
     GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
     const SkIRect targetBounds = SkIRect::MakeSize(targetInfo.dimensions());
     SkIRect lastScissor = targetBounds;

     SkASSERT(drawPass->fTarget->isFullyLazy() ||
              SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
     drawPass->fCommandList.setScissor(lastScissor);

     const Caps* caps = recorder->priv().caps();
     const bool useStorageBuffers = caps->storageBufferSupport();
     UniformTracker uniformTracker(useStorageBuffers);

     // TODO(b/372953722): Remove this forced binding command behavior once dst copies are always
     // bound separately from the rest of the textures.
     const bool rebindTexturesOnPipelineChange = dstReadStrategy == DstReadStrategy::kTextureCopy;
     // Keep track of the prior draw's PaintOrder. If the current draw requires barriers and there
     // is no pipeline or state change, then we must compare the current and prior draw's PaintOrders
     // to determine if the draws overlap. If they do, we must inject a flush between them such that
     // the barrier addition and draw commands are ordered correctly.
     CompressedPaintersOrder priorDrawPaintOrder {};

     // Accumulate rough pixel area touched by each pipeline as we iterate the SortKeys
     drawPass->fPipelineDrawAreas.push_back_n(fPipelineCache.count(), 0.f);

     TextureTracker textureBindingTracker(&fTextureDataCache);
     for (const DrawList::SortKey& key : fSortKeys) {
         const DrawList::Draw& draw = key.draw();
         const RenderStep& renderStep = key.renderStep();

         const bool pipelineChange = key.pipelineIndex() != lastPipeline;
         drawPass->fPipelineDrawAreas[key.pipelineIndex()] +=
                 draw.drawParams().drawBounds().area();

         const bool uniformBindingChange = uniformTracker.writeUniforms(
                 fUniformDataCache, bufferMgr, key.uniformIndex());

         // TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any
         // dst copy to the texture bind group/descriptor set automatically when processing a
         // BindTexturesAndSamplers call because they use a single group to contain all textures.
         // However, from the DrawPass POV, we can run into the scenario where two pipelines have the
         // same textures+samplers except one requires a dst-copy and the other does not. In this
         // case we wouldn't necessarily insert a new command when the pipeline changed and then
         // end up with layout validation errors.
         const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
                 key.textureBindingIndex()) ||
                 (rebindTexturesOnPipelineChange && pipelineChange &&
                  key.textureBindingIndex() != TextureDataCache::kInvalidIndex);

         std::optional<SkIRect> newScissor =
                 renderStep.getScissor(draw.drawParams(), lastScissor, targetBounds);

         const bool stateChange = uniformBindingChange  || textureBindingsChange ||
                                  newScissor.has_value();

         // Update DrawWriter *before* we actually change any state so that accumulated draws from
         // the previous state use the proper state.
         if (pipelineChange) {
             drawWriter.newPipelineState(renderStep.primitiveType(),
                                         renderStep.staticDataStride(),
                                         renderStep.appendDataStride(),
                                         renderStep.getRenderStateFlags(),
                                         draw.drawParams().barrierBeforeDraws());
         } else if (stateChange) {
             drawWriter.newDynamicState();
         } else if (draw.drawParams().barrierBeforeDraws() != BarrierType::kNone &&
                    priorDrawPaintOrder != draw.drawParams().order().paintOrder()) {
             // Even if there is no pipeline or state change, we must consider whether a
             // DrawPassCommand to add barriers must be inserted before any draw commands. If so,
             // then determine if the current and prior draws overlap (ie, their PaintOrders are
             // unequal). If so, perform a flush() to make sure the draw and add barrier commands are
             // appended to the command list in the proper order.
             drawWriter.flush();
         }

         // Make state changes before accumulating new draw data
         if (pipelineChange) {
             drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
             lastPipeline = key.pipelineIndex();
         }
         if (stateChange) {
             if (uniformBindingChange) {
                 uniformTracker.bindUniforms(UniformSlot::kCombinedUniforms, &drawPass->fCommandList);
             }
             if (textureBindingsChange) {
                 textureBindingTracker.bindTextures(&drawPass->fCommandList);
             }
             if (newScissor.has_value()) {
                 drawPass->fCommandList.setScissor(*newScissor);
                 lastScissor = *newScissor;
             }
         }

         uint32_t uniformSsboIndex = useStorageBuffers ? uniformTracker.ssboIndex() : 0;
         renderStep.writeVertices(&drawWriter, draw.drawParams(), uniformSsboIndex);

         if (bufferMgr->hasMappingFailed()) {
             SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
             return nullptr;
         }

         // Update priorDrawPaintOrder value before iterating to analyze the next draw.
         priorDrawPaintOrder = draw.drawParams().order().paintOrder();
     }
     // Finish recording draw calls for any collected data still pending at end of the loop
     drawWriter.flush();

     drawPass->fBounds = fPassBounds.roundOut().asSkIRect();
     drawPass->fPipelineDescs   = fPipelineCache.detach();
     drawPass->fSampledTextures = fTextureDataCache.detachTextures();

     TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
     TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
     TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());

     this->reset(LoadOp::kLoad);

     return drawPass;
 }

 void DrawList::reset(LoadOp op, SkColor4f clearColor) {
     fDraws.reset();
     fSortKeys.clear();
     DrawListBase::reset(op, clearColor);
 }

 } // namespace skgpu::graphite
	/*
	* Copyright 2021 Google LLC
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/
	#include "src/gpu/graphite/DrawList.h"

	#include "include/core/SkTypes.h"
	#include "include/gpu/graphite/Recorder.h"
	#include "src/core/SkTraceEvent.h"
	#include "src/gpu/graphite/DrawPass.h"
	#include "src/gpu/graphite/DrawWriter.h"
	#include "src/gpu/graphite/KeyContext.h"
	#include "src/gpu/graphite/RecorderPriv.h"
	#include "src/gpu/graphite/Renderer.h"
	#include "src/gpu/graphite/geom/Geometry.h"

	namespace skgpu::graphite {

	std::pair<DrawParams, Layer> DrawList::recordDraw(
	const Renderer* renderer,
	const Transform& localToDevice,
	const Geometry& geometry,
	const Clip& clip,
	DrawOrder ordering,
	UniquePaintParamsID paintID,
	SkEnumBitMask<DstUsage> dstUsage,
	BarrierType barrierBeforeDraws,
	PipelineDataGatherer* gatherer,
	const StrokeStyle* stroke,
	const Layer* latestDepthLayer) {

	SkASSERT(localToDevice.valid());
	SkASSERT(!geometry.isEmpty() && !clip.drawBounds().isEmptyNegativeOrNaN());
	SkASSERT(!(renderer->depthStencilFlags() & DepthStencilFlags::kStencil) \|\|
	ordering.stencilIndex() != DrawOrder::kUnassigned);

	// TODO: Add validation that the renderer's expected shape type and stroke params match provided
	const Draw& draw = fDraws.emplace_back(renderer,
	this->deduplicateTransform(localToDevice),
	geometry,
	clip,
	ordering,
	barrierBeforeDraws,
	stroke);

	fRenderStepCount += renderer->numRenderSteps();
	// Create a sort key for every render step in this draw
	for (int stepIndex = 0; stepIndex < draw.renderer()->numRenderSteps(); ++stepIndex) {
	const RenderStep* const step = draw.renderer()->steps()[stepIndex];
	gatherer->markOffsetAndAlign(step->performsShading(), step->uniformAlignment());

	GraphicsPipelineCache::Index pipelineIndex = fPipelineCache.insert(
	{ step->renderStepID(), step->performsShading() ?
	paintID : UniquePaintParamsID::Invalid()});

	step->writeUniformsAndTextures(draw.drawParams(), gatherer);

	auto [combinedUniforms, combinedTextures] =
	gatherer->endCombinedData(step->performsShading());

	UniformDataCache::Index uniformIndex = combinedUniforms ?
	fUniformDataCache.insert(combinedUniforms) : UniformDataCache::kInvalidIndex;
	TextureDataCache::Index textureBindingIndex = combinedTextures ?
	fTextureDataCache.insert(combinedTextures) : TextureDataCache::kInvalidIndex;

	fSortKeys.push_back({&draw, stepIndex, pipelineIndex, uniformIndex, textureBindingIndex});
	gatherer->rewindForRenderStep();
	}

	fPassBounds.join(clip.drawBounds());
	fRequiresMSAA \|= renderer->requiresMSAA();
	fDepthStencilFlags \|= renderer->depthStencilFlags();
	if (dstUsage & DstUsage::kDstReadRequired) {
	// For paints that read from the dst, update the bounds. It may later be determined that the
	// DstReadStrategy does not require them, but they are inexpensive to track.
	fDstReadBounds.join(clip.drawBounds());
	}

	#if defined(SK_DEBUG)
	if (geometry.isCoverageMaskShape()) {
	fCoverageMaskShapeDrawCount++;
	}
	#endif

	return {nullptr, nullptr};
	}

	std::unique_ptr<DrawPass> DrawList::snapDrawPass(Recorder* recorder,
	sk_sp<TextureProxy> target,
	const SkImageInfo& targetInfo,
	DstReadStrategy dstReadStrategy) {
	// NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
	// its size should be done with care and good reason. The performance of sorting the keys is
	// heavily tied to the total size.
	//
	// At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
	// 16 bytes. There are several ways this could be done if necessary:
	// - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
	// the uniform data => 8 bytes of key, 8 bytes of pointer.
	// - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
	// 4 low bits from the Draw* pointer since it's 16 byte aligned.
	// - Compact the Draw* to an index into the original collection, although that has extra
	// indirection and does not work as well with SkTBlockList.
	// In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
	// than an 8 byte key and unmodified pointer.
	static_assert(sizeof(SortKey) == SkAlignTo(16 + sizeof(void*), alignof(SortKey)));

	// TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
	// algorithms that approach O(n) in that condition may be favorable. Alternatively, could
	// explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
	// std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
	// vs. algorithms that require an extra O(n) storage.
	// TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
	// bugs in the DrawOrder determination code?
	std::sort(fSortKeys.begin(), fSortKeys.end());

	TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", fDraws.count());

	// The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
	// is returned from Make(), it is considered immutable.
	std::unique_ptr<DrawPass> drawPass(new DrawPass(target, {fLoadOp, StoreOp::kStore}, fClearColor,
	recorder->priv().refFloatStorageManager()));

	DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
	DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
	GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
	const SkIRect targetBounds = SkIRect::MakeSize(targetInfo.dimensions());
	SkIRect lastScissor = targetBounds;

	SkASSERT(drawPass->fTarget->isFullyLazy() \|\|
	SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
	drawPass->fCommandList.setScissor(lastScissor);

	const Caps* caps = recorder->priv().caps();
	const bool useStorageBuffers = caps->storageBufferSupport();
	UniformTracker uniformTracker(useStorageBuffers);

	// TODO(b/372953722): Remove this forced binding command behavior once dst copies are always
	// bound separately from the rest of the textures.
	const bool rebindTexturesOnPipelineChange = dstReadStrategy == DstReadStrategy::kTextureCopy;
	// Keep track of the prior draw's PaintOrder. If the current draw requires barriers and there
	// is no pipeline or state change, then we must compare the current and prior draw's PaintOrders
	// to determine if the draws overlap. If they do, we must inject a flush between them such that
	// the barrier addition and draw commands are ordered correctly.
	CompressedPaintersOrder priorDrawPaintOrder {};

	// Accumulate rough pixel area touched by each pipeline as we iterate the SortKeys
	drawPass->fPipelineDrawAreas.push_back_n(fPipelineCache.count(), 0.f);

	TextureTracker textureBindingTracker(&fTextureDataCache);
	for (const DrawList::SortKey& key : fSortKeys) {
	const DrawList::Draw& draw = key.draw();
	const RenderStep& renderStep = key.renderStep();

	const bool pipelineChange = key.pipelineIndex() != lastPipeline;
	drawPass->fPipelineDrawAreas[key.pipelineIndex()] +=
	draw.drawParams().drawBounds().area();

	const bool uniformBindingChange = uniformTracker.writeUniforms(
	fUniformDataCache, bufferMgr, key.uniformIndex());

	// TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any
	// dst copy to the texture bind group/descriptor set automatically when processing a
	// BindTexturesAndSamplers call because they use a single group to contain all textures.
	// However, from the DrawPass POV, we can run into the scenario where two pipelines have the
	// same textures+samplers except one requires a dst-copy and the other does not. In this
	// case we wouldn't necessarily insert a new command when the pipeline changed and then
	// end up with layout validation errors.
	const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
	key.textureBindingIndex()) \|\|
	(rebindTexturesOnPipelineChange && pipelineChange &&
	key.textureBindingIndex() != TextureDataCache::kInvalidIndex);

	std::optional<SkIRect> newScissor =
	renderStep.getScissor(draw.drawParams(), lastScissor, targetBounds);

	const bool stateChange = uniformBindingChange \|\| textureBindingsChange \|\|
	newScissor.has_value();

	// Update DrawWriter before we actually change any state so that accumulated draws from
	// the previous state use the proper state.
	if (pipelineChange) {
	drawWriter.newPipelineState(renderStep.primitiveType(),
	renderStep.staticDataStride(),
	renderStep.appendDataStride(),
	renderStep.getRenderStateFlags(),
	draw.drawParams().barrierBeforeDraws());
	} else if (stateChange) {
	drawWriter.newDynamicState();
	} else if (draw.drawParams().barrierBeforeDraws() != BarrierType::kNone &&
	priorDrawPaintOrder != draw.drawParams().order().paintOrder()) {
	// Even if there is no pipeline or state change, we must consider whether a
	// DrawPassCommand to add barriers must be inserted before any draw commands. If so,
	// then determine if the current and prior draws overlap (ie, their PaintOrders are
	// unequal). If so, perform a flush() to make sure the draw and add barrier commands are
	// appended to the command list in the proper order.
	drawWriter.flush();
	}

	// Make state changes before accumulating new draw data
	if (pipelineChange) {
	drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
	lastPipeline = key.pipelineIndex();
	}
	if (stateChange) {
	if (uniformBindingChange) {
	uniformTracker.bindUniforms(UniformSlot::kCombinedUniforms, &drawPass->fCommandList);
	}
	if (textureBindingsChange) {
	textureBindingTracker.bindTextures(&drawPass->fCommandList);
	}
	if (newScissor.has_value()) {
	drawPass->fCommandList.setScissor(*newScissor);
	lastScissor = *newScissor;
	}
	}

	uint32_t uniformSsboIndex = useStorageBuffers ? uniformTracker.ssboIndex() : 0;
	renderStep.writeVertices(&drawWriter, draw.drawParams(), uniformSsboIndex);

	if (bufferMgr->hasMappingFailed()) {
	SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
	return nullptr;
	}

	// Update priorDrawPaintOrder value before iterating to analyze the next draw.
	priorDrawPaintOrder = draw.drawParams().order().paintOrder();
	}
	// Finish recording draw calls for any collected data still pending at end of the loop
	drawWriter.flush();

	drawPass->fBounds = fPassBounds.roundOut().asSkIRect();
	drawPass->fPipelineDescs = fPipelineCache.detach();
	drawPass->fSampledTextures = fTextureDataCache.detachTextures();

	TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
	TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
	TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());

	this->reset(LoadOp::kLoad);

	return drawPass;
	}

	void DrawList::reset(LoadOp op, SkColor4f clearColor) {
	fDraws.reset();
	fSortKeys.clear();
	DrawListBase::reset(op, clearColor);
	}

	} // namespace skgpu::graphite