Reland "Reland "[graphite] Extracts early in drawGeometry"" This reverts commit 5aafacb1358d9dbc00f6b30a056e016e0091aa04. Original change's description: > Revert "Reland "[graphite] Extracts early in drawGeometry"" > > This reverts commit 81de4113e3e7cfe8ec91413fbbe51101dcb354e3. > > Reason for revert: Now breaking chromium roll. > > Original change's description: > > Reland "[graphite] Extracts early in drawGeometry" > > > > * Reintroduce notify image in use and flush in snapDrawTask. > > > > * Fixes an issue where multi-draw dependencies were not correctly tracked. > > > > This reverts commit 1b271fd02a65ba97e12bcaa32f67afa50b5d9b52. > > > > > > Original change's description: > > > Revert "[graphite] Extracts early in drawGeometry" > > > > > > This reverts commit 25f00cb247f23b4a8cbe7a1245bdf609fa0be846. > > > > > > Reason for revert: Breaks android roll > > > > > > Original change's description: > > > > [graphite] Extracts early in drawGeometry > > > > > > > > * Moves the creation of UniquePaintIDs from DrawPass::Snap to PaintParams::toKey, which is called in Device::drawGeometry > > > > > > > > * Moves blend mode calculations into PaintParams, and adds an enum DstUsage to DrawTypes. > > > > > > > > * Moves the creation of a draw pass from DrawPass::Make to DrawList::snapDrawPass. > > > > > > > > * Texture and uniform trackers commensurately moved to DrawList. > > > > > > > > Change-Id: Ie843db44bfad0cd51773ffa7e42050fdbd7c22e3 > > > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1045336 > > > > Commit-Queue: Thomas Smith <thomsmit@google.com> > > > > Reviewed-by: Michael Ludwig <michaelludwig@google.com> > > > > > > No-Presubmit: true > > > No-Tree-Checks: true > > > No-Try: true > > > Change-Id: I19ad73d77051295e37ac9adaae77f228e4934834 > > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1052396 > > > Commit-Queue: Thomas Smith <thomsmit@google.com> > > > Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com> > > > > Change-Id: Ib8b9aa5b3ed998bdecd3b56a03ca13f189518178 > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1052657 > > Reviewed-by: Michael Ludwig <michaelludwig@google.com> > > Commit-Queue: Thomas Smith <thomsmit@google.com> > > No-Presubmit: true > No-Tree-Checks: true > No-Try: true > Change-Id: I0132ab1e71955f6a8b35b3107afe9ae48f5654aa > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1059636 > Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com> > Commit-Queue: Thomas Smith <thomsmit@google.com> Change-Id: I945aeeb179ca3d8ac1fcab857cdd8e641ba25c7a Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1059899 Commit-Queue: Thomas Smith <thomsmit@google.com> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
diff --git a/fuzz/FuzzPrecompile.cpp b/fuzz/FuzzPrecompile.cpp index 81d3006..db5a758 100644 --- a/fuzz/FuzzPrecompile.cpp +++ b/fuzz/FuzzPrecompile.cpp
@@ -25,6 +25,7 @@ #include "src/core/SkBlenderBase.h" #include "src/gpu/graphite/ContextPriv.h" #include "src/gpu/graphite/ContextUtils.h" +#include "src/gpu/graphite/DrawContext.h" #include "src/gpu/graphite/KeyContext.h" #include "src/gpu/graphite/PaintParams.h" #include "src/gpu/graphite/PaintParamsKey.h" @@ -300,7 +301,34 @@ SkASSERT_RELEASE(before == after); } +sk_sp<DrawContext> get_precompile_draw_context( + const skgpu::graphite::Caps* caps, Context* context) { + std::unique_ptr<Recorder> drawRecorder = context->makeRecorder(); + ResourceProvider* resourceProvider = drawRecorder->priv().resourceProvider(); + constexpr SkISize drawSize = {128, 128}; + const SkColorInfo colorInfo = SkColorInfo(kRGBA_8888_SkColorType, + kPremul_SkAlphaType, + SkColorSpace::MakeSRGB()); + TextureInfo texInfo = caps->getDefaultSampledTextureInfo(colorInfo.colorType(), + skgpu::Mipmapped::kNo, + skgpu::Protected::kNo, + skgpu::Renderable::kYes); + sk_sp<TextureProxy> target = TextureProxy::Make(caps, + resourceProvider, + drawSize, + texInfo, + "PrecompileTarget", + skgpu::Budgeted::kYes); + sk_sp<DrawContext> precompileDrawContext = DrawContext::Make(caps, + std::move(target), + drawSize, + colorInfo, + {}); + return precompileDrawContext; +} + void fuzz_graphite(Fuzz* fuzz, Context* context, int depth = 9) { + const skgpu::graphite::Caps* caps = context->priv().caps(); std::unique_ptr<PrecompileContext> precompileContext = context->makePrecompileContext(); std::unique_ptr<Recorder> recorder = context->makeRecorder(); ShaderCodeDictionary* dict = context->priv().shaderCodeDictionary(); @@ -310,12 +338,15 @@ Layout layout = context->backend() == skgpu::BackendApi::kMetal ? Layout::kMetal : Layout::kStd140; + // Currently, we just use this as a valid parameter for keyContext (will hit asserts otherwise) + sk_sp<DrawContext> drawContext = get_precompile_draw_context(caps, context); + FloatStorageManager floatStorageManager; PaintParamsKeyBuilder builder(dict); PipelineDataGatherer gatherer(layout); sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>(); - KeyContext precompileKeyContext(recorder->priv().caps(), &floatStorageManager, - &builder, &gatherer, dict, rtDict, ci); + KeyContext precompileKeyContext(caps, &floatStorageManager, &builder, &gatherer, dict, rtDict, + ci); DrawTypeFlags kDrawType = DrawTypeFlags::kSimpleShape; SkPath path = make_path(); @@ -328,7 +359,8 @@ fuzz->next(&temp); Coverage coverage = coverageOptions[temp % 3]; - PaintParams paintParams = PaintParams(paint, + PaintParams paintParams = PaintParams(recorder->priv().caps(), + paint, /* primitiveBlender= */ nullptr, /* nonMSAAClip= */ {}, /* clipShader= */ nullptr, @@ -338,6 +370,7 @@ SkDEBUGCODE(builder.checkReset()); SkDEBUGCODE(gatherer.checkReset()); KeyContext keyContext(recorder.get(), + drawContext.get(), &floatStorageManager, &builder, &gatherer,
diff --git a/src/gpu/graphite/Device.cpp b/src/gpu/graphite/Device.cpp index 0046acf..875d4fa 100644 --- a/src/gpu/graphite/Device.cpp +++ b/src/gpu/graphite/Device.cpp
@@ -71,6 +71,7 @@ #include "src/gpu/graphite/DrawTypes.h" #include "src/gpu/graphite/Image_Base_Graphite.h" #include "src/gpu/graphite/Image_Graphite.h" +#include "src/gpu/graphite/KeyContext.h" #include "src/gpu/graphite/Log.h" #include "src/gpu/graphite/PaintParams.h" #include "src/gpu/graphite/PathAtlas.h" @@ -141,49 +142,10 @@ return kFillStyle; } -bool paint_depends_on_dst(const PaintParams& paintParams) { - std::optional<SkBlendMode> bm = paintParams.asFinalBlendMode(); - if (!bm.has_value()) { - return true; // Runtime blenders always depend on the dst - } - - if (bm == SkBlendMode::kClear || bm == SkBlendMode::kSrc) { - // src and clear blending never depend on dst - return false; - } else if (bm != SkBlendMode::kSrcOver && bm != SkBlendMode::kDstOut) { - // any other blend mode besides src-over and dst-out use dst in some way - return true; - } - - // At this point, we depend on the dst if source alpha != 1, so analyze the paint to - // see if it's opaque. - bool srcIsTransparent = !paintParams.color().isOpaque() || - (paintParams.shader() && !paintParams.shader()->isOpaque()) || - (paintParams.colorFilter() && - !paintParams.colorFilter()->isAlphaUnchanged()); - - if (paintParams.primitiveBlender()) { - std::optional<SkBlendMode> primBlend = as_BB(paintParams.primitiveBlender())->asBlendMode(); - // The primitive blender does not blend against the dst color, but it might change whether - // or not the src is transparent. - if (primBlend && !srcIsTransparent) { - // Since dst might be transparent, we can only preserve opacity for cases where the - // src coefficient is one and the dst coefficient is zero (when src alpha = 1). - srcIsTransparent = primBlend != SkBlendMode::kSrcOver && primBlend != SkBlendMode::kSrc; - } else { - // Runtime blender or complex blend modifies the final src color so assume it has alpha - srcIsTransparent = true; - } - } - return srcIsTransparent; -} - /** If the paint can be reduced to a solid flood-fill, determine the correct color to fill with. */ std::optional<SkColor4f> extract_paint_color(const PaintParams& paint, const SkColorInfo& dstColorInfo) { - SkASSERT(!paint_depends_on_dst(paint)); - - std::optional<SkBlendMode> bm = paint.asFinalBlendMode(); + std::optional<SkBlendMode> bm = paint.finalBlendMode(); // Since we don't depend on the dst, a dst-out blend mode implies source is // opaque, which causes dst-out to behave like clear. if (bm == SkBlendMode::kClear || bm == SkBlendMode::kDstOut) { @@ -358,6 +320,23 @@ strategy == PathRendererStrategy::kDefault; } +class AutoResetForDraw { +public: + explicit AutoResetForDraw(PipelineDataGatherer* gatherer) : fDataGatherer(gatherer) {} + + ~AutoResetForDraw() { + if (fDataGatherer) { + fDataGatherer->resetForDraw(); + } + } + + AutoResetForDraw(const AutoResetForDraw&) = delete; + AutoResetForDraw& operator=(const AutoResetForDraw&) = delete; + +private: + PipelineDataGatherer* fDataGatherer; +}; + } // anonymous namespace /** @@ -518,6 +497,12 @@ fRecorder->priv().caps()->defaultMSAASamplesCount()); } } + + const bool useStorageBuffers = fRecorder->priv().caps()->storageBufferSupport(); + const auto& bindingReq = fRecorder->priv().caps()->resourceBindingRequirements(); + fDataGatherer = std::make_unique<PipelineDataGatherer>( + useStorageBuffers ? bindingReq.fStorageBufferLayout : bindingReq.fUniformBufferLayout); + fKeyBuilder = std::make_unique<PaintParamsKeyBuilder>(fRecorder->priv().shaderCodeDictionary()); } Device::~Device() { @@ -1383,7 +1368,7 @@ sk_sp<SkBlender> primitiveBlender, bool skipColorXform) { ASSERT_SINGLE_OWNER - + AutoResetForDraw autoReset(fDataGatherer.get()); if (!localToDevice.valid()) { // If the transform is not invertible or not finite then drawing isn't well defined. SKGPU_LOG_W("Skipping draw with non-invertible/non-finite transform."); @@ -1488,47 +1473,93 @@ clip.outsetBoundsForAA(); } - // Figure out what dst color requirements we have, if any. - const SkBlenderBase* blender = as_BB(paint.getBlender()); - const std::optional<SkBlendMode> blendMode = blender ? blender->asBlendMode() - : SkBlendMode::kSrcOver; - - // A primitive blender should be ignored if there is no primitive color to blend against. - // Additionally, if a renderer emits a primitive color, then a null primitive blender should - // be interpreted as SrcOver blending mode. if (!renderer || !renderer->emitsPrimitiveColor()) { + // Ignore primitive blender if the renderer doesn't support it primitiveBlender = nullptr; } else if (!SkToBool(primitiveBlender)) { + // A null blender is normally equivalent to SrcOver; coerce it to non-null so that nullity + // can be used by PaintParamsKeyBuilder to know when to add primitive blending blocks. primitiveBlender = SkBlender::Mode(SkBlendMode::kSrcOver); } - Coverage rendererCoverage = renderer ? renderer->coverage() - : Coverage::kSingleChannel; - TextureFormat targetFormat = TextureInfoPriv::ViewFormat(fDC->target()->textureInfo()); - PaintParams shading{paint, + PaintParams shading{fRecorder->priv().caps(), + paint, std::move(primitiveBlender), clip.nonMSAAClip(), sk_ref_sp(clip.shader()), - rendererCoverage, - targetFormat, + renderer ? renderer->coverage() : Coverage::kSingleChannel, + TextureInfoPriv::ViewFormat(fDC->target()->textureInfo()), skipColorXform}; - if (clip.needsCoverage() && rendererCoverage == Coverage::kNone) { - // Must upgrade to single channel coverage if the clip requires coverage; - // but preserve LCD coverage if the Renderer uses that. - rendererCoverage = Coverage::kSingleChannel; + + // Some shapes and styles combine multiple draws so the total render step count is split between + // the main renderer and possibly a secondaryRenderer. As we can't be sure whether a secondary + // renderer is required prior to getting the dstUsage from shading.toKey(), we pessimistically + // assume it's required for needsFlushBeforeDraw(). + int numNewRenderSteps = 1; + SkStrokeRec::Style styleType = style.getStyle(); + if (renderer) { + numNewRenderSteps = renderer->numRenderSteps(); + if (styleType == SkStrokeRec::kStrokeAndFill_Style) { + numNewRenderSteps += + fRecorder->priv().rendererProvider()->tessellatedStrokes()->numRenderSteps(); + } else if (style.isFillStyle() && renderer->useNonAAInnerFill()) { + numNewRenderSteps += + fRecorder->priv().rendererProvider()->nonAABounds()->numRenderSteps(); + } } - bool dstReadRequired = !CanUseHardwareBlending(fRecorder->priv().caps(), - targetFormat, - blendMode, - rendererCoverage); - const bool dependsOnDst = paint_depends_on_dst(shading) || - clip.shader() || !clip.nonMSAAClip().isEmpty(); + + // Decide if we have any reason to flush pending work. A flush may be necessary for two reasons: + // 1) A flush is required before updating the clip state or making any permanent changes to + // a path atlas, since otherwise clip operations and/or atlas entries for the current + // draw will be flushed. + // 2) A flush is required before shading.toKey() is called so that child tasks required by + // this draw are associated with the DrawContext after any instead of being added as a + // child of the current draw. See "Layer" tests in NotifyInUseTest.cpp. + DstReadStrategy dstReadStrategy = shading.dstReadRequired() ? + fDC->dstReadStrategy() : DstReadStrategy::kNoneRequired; + const bool needsFlush = this->needsFlushBeforeDraw(numNewRenderSteps, dstReadStrategy); + if (needsFlush) { + if (pathAtlas != nullptr) { + // We need to flush work for all devices associated with the current Recorder. + // Otherwise we may end up with outstanding draws that depend on past atlas state. + fRecorder->priv().flushTrackedDevices( + SK_DUMP_TASKS_CODE("Device::drawGeometry Flush Before Draw")); + } else { + this->flushPendingWork(/*drawContext=*/nullptr); + } + } + + // Determine the paint ID and collect the paint uniforms now before anything has been recorded. + // The paint may reference an SkPicture or a Graphite-backed dynamic SkImage that can trigger + // a flush of the Recorder. + KeyContext keyContext{fRecorder, + fDC.get(), + fRecorder->priv().refFloatStorageManager().get(), + fKeyBuilder.get(), + fDataGatherer.get(), + localToDevice.matrix(), + fDC->colorInfo(), + geometry.isShape() || geometry.isEdgeAAQuad() + ? KeyGenFlags::kDefault + : KeyGenFlags::kDisableSamplingOptimization, + paint.getColor4f()}; + SkDEBUGCODE(fDataGatherer->checkReset()); + SkDEBUGCODE(fKeyBuilder->checkReset()); + + auto keyResult = shading.toKey(keyContext); + if (!keyResult) { + // Converting the SkPaint to a pipeline and set of uniform values + sampled textures failed. + SKGPU_LOG_W("Key context creation failed in Device::drawGeometry, draw dropped!"); + return; + } + + auto [paintID, dstUsage] = *keyResult; // If we are unclipped, do not depend on the dst, and cover the target, then we can adjust // load ops of the renderpass to more optimally handle the draw (and avoid redundant clears). // NOTE: We skip this for fully-lazy render targets because the load ops may impact a larger // area than the Device's theoretical bounds. - const bool overwritesAllPixels = !dependsOnDst && + const bool overwritesAllPixels = dstUsage == DstUsage::kNone && geometry.isShape() && geometry.shape().isFloodFill() && !fDC->target()->isFullyLazy() && @@ -1546,7 +1577,7 @@ // discarded dst can still be accessed. For non-floating point formats, that is fine, // but float formats can have NaNs after a discard that cause blending to fail. To // avoid that scenario, we clear to a known value instead. - if (shading.asFinalBlendMode() == SkBlendMode::kSrcOver && + if (shading.finalBlendMode() == SkBlendMode::kSrcOver && TextureFormatIsFloatingPoint( TextureInfoPriv::ViewFormat(fDC->target()->textureInfo()))) { fDC->clear(SkColors::kMagenta); // This color doesn't matter @@ -1557,43 +1588,6 @@ } } - // Some shapes and styles combine multiple draws so the total render step count is split between - // the main renderer and possibly a secondaryRenderer. - SkStrokeRec::Style styleType = style.getStyle(); - const Renderer* secondaryRenderer = nullptr; - Rect innerFillBounds = Rect::InfiniteInverted(); - if (renderer) { - if (styleType == SkStrokeRec::kStrokeAndFill_Style) { - // `renderer` covers the fill, `secondaryRenderer` covers the stroke - secondaryRenderer = fRecorder->priv().rendererProvider()->tessellatedStrokes(); - } else if (style.isFillStyle() && renderer->useNonAAInnerFill() && !dependsOnDst) { - // `renderer` opts into drawing a non-AA inner fill - innerFillBounds = get_inner_bounds(geometry, localToDevice); - if (!innerFillBounds.isEmptyNegativeOrNaN()) { - secondaryRenderer = fRecorder->priv().rendererProvider()->nonAABounds(); - } - } - } - const int numNewRenderSteps = (renderer ? renderer->numRenderSteps() : 1) + - (secondaryRenderer ? secondaryRenderer->numRenderSteps() : 0); - - // Decide if we have any reason to flush pending work. We want to flush before updating the clip - // state or making any permanent changes to a path atlas, since otherwise clip operations and/or - // atlas entries for the current draw will be flushed. - DstReadStrategy dstReadStrategy = - dstReadRequired ? fDC->dstReadStrategy() : DstReadStrategy::kNoneRequired; - const bool needsFlush = this->needsFlushBeforeDraw(numNewRenderSteps, dstReadStrategy); - if (needsFlush) { - if (pathAtlas != nullptr) { - // We need to flush work for all devices associated with the current Recorder. - // Otherwise we may end up with outstanding draws that depend on past atlas state. - fRecorder->priv().flushTrackedDevices( - SK_DUMP_TASKS_CODE("Device::drawGeometry Flush Before Draw")); - } else { - this->flushPendingWork(/*drawContext=*/nullptr); - } - } - // If an atlas path renderer was chosen we need to insert the shape into the atlas and schedule // it to be drawn. std::optional<PathAtlas::MaskAndOrigin> atlasMask; // only used if `pathAtlas != nullptr` @@ -1665,7 +1659,7 @@ order.dependsOnPaintersOrder(clipOrder); // If a draw is not opaque, it must be drawn after the most recent draw it intersects with in // order to blend correctly. - if (rendererCoverage != Coverage::kNone || dependsOnDst) { + if (shading.rendererCoverage() != Coverage::kNone || dstUsage != DstUsage::kNone) { CompressedPaintersOrder prevDraw = fColorDepthBoundsManager->getMostRecentDraw(clip.drawBounds()); order.dependsOnPaintersOrder(prevDraw); @@ -1678,7 +1672,8 @@ DisjointStencilIndex setIndex = fDisjointStencilSet->add(order.paintOrder(), clip.drawBounds()); order.dependsOnStencil(setIndex); - } else if (!dependsOnDst && renderer->coverage() == Coverage::kNone && style.isFillStyle() && + } else if (dstUsage == DstUsage::kNone && renderer->coverage() == Coverage::kNone && + style.isFillStyle() && ((geometry.isEdgeAAQuad() && geometry.edgeAAQuad().isRect()) || (geometry.isShape() && geometry.shape().isRect()))) { // Sort this draw front to back since it will not blend against what came before it. @@ -1688,10 +1683,6 @@ order.reverseDepthAsStencil(); } - // TODO(b/330864257): This is an extra traversal of all paint effects, that can be avoided when - // the paint key itself is determined inside this function. - shading.notifyImagesInUse(fRecorder, fDC.get()); - // If an atlas path renderer was chosen, then record a single CoverageMaskShape draw. // The shape will be scheduled to be rendered or uploaded into the atlas during the // next invocation of flushPendingWork(). @@ -1699,8 +1690,8 @@ // Record the draw as a fill since stroking is handled by the atlas render/upload. SkASSERT(atlasMask.has_value()); auto [mask, origin] = *atlasMask; - fDC->recordDraw(renderer, Transform::Translate(origin.fX, origin.fY), Geometry(mask), - clip, order, &shading, nullptr, dependsOnDst, dstReadRequired); + fDC->recordDraw(renderer, Transform::Translate(origin.fX, origin.fY), Geometry(mask), clip, + order, paintID, dstUsage, fDataGatherer.get(), nullptr); } else { if (styleType == SkStrokeRec::kStroke_Style || styleType == SkStrokeRec::kHairline_Style || @@ -1711,30 +1702,33 @@ fDC->recordDraw(styleType == SkStrokeRec::kStrokeAndFill_Style ? fRecorder->priv().rendererProvider()->tessellatedStrokes() : renderer, - localToDevice, geometry, clip, order, &shading, &stroke, dependsOnDst, - dstReadRequired); + localToDevice, geometry, clip, order, paintID, dstUsage, + fDataGatherer.get(), &stroke); } if (styleType == SkStrokeRec::kFill_Style || styleType == SkStrokeRec::kStrokeAndFill_Style) { // Possibly record an additional draw using the non-AA bounds renderer to fill the // interior with a renderer that can disable blending entirely. + Rect innerFillBounds = renderer->useNonAAInnerFill() && dstUsage == DstUsage::kNone && + styleType != SkStrokeRec::kStrokeAndFill_Style + ? get_inner_bounds(geometry, localToDevice) + : Rect::InfiniteInverted(); if (!innerFillBounds.isEmptyNegativeOrNaN()) { - SkASSERT(!dependsOnDst && renderer->useNonAAInnerFill()); + SkASSERT(dstUsage == DstUsage::kNone && renderer->useNonAAInnerFill()); DrawOrder orderWithoutCoverage{order.depth()}; orderWithoutCoverage.dependsOnPaintersOrder(clipOrder); // The regular draw has analytic coverage, so isn't being sorted front to back, but // we do want to sort the inner fill to maximize overdraw reduction orderWithoutCoverage.reverseDepthAsStencil(); - fDC->recordDraw(fRecorder->priv().rendererProvider()->nonAABounds(), localToDevice, Geometry(Shape(innerFillBounds)), clip, orderWithoutCoverage, - &shading, nullptr, dependsOnDst, dstReadRequired); + paintID, dstUsage, fDataGatherer.get(), nullptr); // Force the coverage draw to come after the non-AA draw in order to benefit from // early depth testing. order.dependsOnPaintersOrder(orderWithoutCoverage.paintOrder()); } - fDC->recordDraw(renderer, localToDevice, geometry, clip, order, &shading, nullptr, - dependsOnDst, dstReadRequired); + fDC->recordDraw(renderer, localToDevice, geometry, clip, order, paintID, dstUsage, + fDataGatherer.get(), nullptr); } } @@ -1752,6 +1746,8 @@ const Shape& shape, const Clip& clip, DrawOrder order) { + AutoResetForDraw autoReset(fDataGatherer.get()); + // A clip draw's state is almost fully defined by the ClipStack. The only thing we need // to account for is selecting a Renderer and tracking the stencil buffer usage. Geometry geometry{shape}; @@ -1778,16 +1774,17 @@ SkASSERT(renderer->coverage() == Coverage::kNone && renderer->requiresMSAA()); SkASSERT(pathAtlas == nullptr); - // Clips draws are depth-only (null PaintParams), and filled (null StrokeStyle). - // TODO: Remove this CPU-transform once perspective is supported for all path renderers + // Clips draws are depth-only (invalid UniquePaintParamsID), and filled (null StrokeStyle). + // The data gatherer must be reset so that the DrawList can use it for any RenderStep data. if (localToDevice.type() == Transform::Type::kPerspective) { SkPath devicePath = geometry.shape().asPath().makeTransform(localToDevice.matrix().asM33()); fDC->recordDraw(renderer, Transform::Identity(), Geometry(Shape(devicePath)), clip, order, - /*paint*/nullptr, /*stroke*/nullptr, /*dependsOnDst*/false, - /*dstReadReq*/false); + UniquePaintParamsID::Invalid(), DstUsage::kNone, fDataGatherer.get(), + /*stroke=*/nullptr); } else { - fDC->recordDraw(renderer, localToDevice, geometry, clip, order, /*paint*/nullptr, - /*stroke*/nullptr, /*dependsOnDst*/false, /*dstReadReq*/false); + fDC->recordDraw(renderer, localToDevice, geometry, clip, order, + UniquePaintParamsID::Invalid(), DstUsage::kNone, fDataGatherer.get(), + /*stroke=*/nullptr); } // This ensures that draws recorded after this clip shape has been popped off the stack will // be unaffected by the Z value the clip shape wrote to the depth attachment. @@ -2023,7 +2020,7 @@ } this->internalFlush(); - sk_sp<Task> drawTask = fDC->snapDrawTask(fRecorder); + sk_sp<Task> drawTask = fDC->snapDrawTask(); if (drawContext) { drawContext->recordDependency(std::move(drawTask)); } else { @@ -2072,7 +2069,7 @@ fCurrentDepth = DrawOrder::kClearDepth; fAtlasedPathCount = 0; - // Any cleanup in the AtlasProvider + // Any cleanup in the AtlasProvider fRecorder->priv().atlasProvider()->compact(); }
diff --git a/src/gpu/graphite/Device.h b/src/gpu/graphite/Device.h index ca7451f..2919523 100644 --- a/src/gpu/graphite/Device.h +++ b/src/gpu/graphite/Device.h
@@ -76,6 +76,8 @@ class DrawContext; class Geometry; class Image; +class PaintParamsKeyBuilder; +class PipelineDataGatherer; class PathAtlas; class Renderer; class Shape; @@ -341,6 +343,9 @@ // some other task chain that makes it to the root list. sk_sp<Task> fLastTask; + std::unique_ptr<PaintParamsKeyBuilder> fKeyBuilder; + std::unique_ptr<PipelineDataGatherer> fDataGatherer; + ClipStack fClip; // Tracks accumulated intersections for ordering dependent use of the color and depth attachment
diff --git a/src/gpu/graphite/DrawContext.cpp b/src/gpu/graphite/DrawContext.cpp index fb7a295..ae4443f 100644 --- a/src/gpu/graphite/DrawContext.cpp +++ b/src/gpu/graphite/DrawContext.cpp
@@ -86,6 +86,9 @@ , fImageInfo(ii) , fSurfaceProps(props) , fDstReadStrategy(caps->getDstReadStrategy()) + , fSupportsHardwareAdvancedBlend(caps->supportsHardwareAdvancedBlending()) + , fAdvancedBlendsRequireBarrier(caps->blendEquationSupport() == + Caps::BlendEquationSupport::kAdvancedNoncoherent) , fCurrentDrawTask(sk_make_sp<DrawTask>(fTarget)) , fPendingDraws(std::make_unique<DrawList>()) , fPendingUploads(std::make_unique<UploadList>()) { @@ -105,37 +108,28 @@ DrawContext::~DrawContext() = default; void DrawContext::clear(const SkColor4f& clearColor) { - this->discard(); - - fPendingLoadOp = LoadOp::kClear; - SkPMColor4f pmColor = clearColor.premul(); - fPendingClearColor = pmColor.array(); + this->resetForClearOrDiscard(); + fPendingDraws->reset(LoadOp::kClear, clearColor); } void DrawContext::discard() { + this->resetForClearOrDiscard(); + fPendingDraws->reset(LoadOp::kDiscard); +} + +void DrawContext::resetForClearOrDiscard() { // Non-loading operations on a fully lazy target can corrupt data beyond the DrawContext's // region so should be avoided. SkASSERT(!fTarget->isFullyLazy()); - // A fullscreen clear or discard will overwrite anything that came before, so clear the DrawList // NOTE: Eventually the current DrawTask should be reset, once there are no longer implicit // dependencies on atlas tasks between DrawContexts. When that's resolved, the only tasks in the // current DrawTask are those that directly impact the target, which becomes irrelevant with the // clear op overwriting it. For now, preserve the previous tasks that might include atlas // uploads that are not explicitly shared between DrawContexts. - if (fPendingDraws->renderStepCount() > 0) { - fPendingDraws = std::make_unique<DrawList>(); - } if (fComputePathAtlas) { fComputePathAtlas->reset(); } - - // NOTE: Historically, we would switch to a clear load op on floating point render targets - // because analytic coverage would turn on blending for kSrc draws that filled the target. When - // this happened, the discard could introduce NaNs into the dst color values that would cause - // pixels to drop. Now we should only be calling discard() in situations that won't trigger - // analytic coverage, so we can still benefit from the kDiscard performance. - fPendingLoadOp = LoadOp::kDiscard; } void DrawContext::recordDraw(const Renderer* renderer, @@ -143,17 +137,31 @@ const Geometry& geometry, const Clip& clip, DrawOrder ordering, - const PaintParams* paint, - const StrokeStyle* stroke, - bool dependsOnDst, - bool dstReadReq) { + UniquePaintParamsID paintID, + SkEnumBitMask<DstUsage> dstUsage, + PipelineDataGatherer* gatherer, + const StrokeStyle* stroke) { SkASSERTF(SkIRect::MakeSize(this->imageInfo().dimensions()).contains(clip.scissor()), "Image %dx%d, scissor %d,%d,%d,%d", this->imageInfo().width(), this->imageInfo().height(), clip.scissor().left(), clip.scissor().top(), clip.scissor().right(), clip.scissor().bottom()); - fPendingDraws->recordDraw(renderer, localToDevice, geometry, clip, ordering, paint, stroke, - dependsOnDst, dstReadReq); + + // Determine whether a draw requies a barrier + BarrierType barrierBeforeDraws = BarrierType::kNone; + if (fDstReadStrategy == DstReadStrategy::kReadFromInput && + (dstUsage & DstUsage::kDstReadRequired)) { + barrierBeforeDraws = BarrierType::kReadDstFromInput; + } + if ((dstUsage & DstUsage::kAdvancedBlend) && + fSupportsHardwareAdvancedBlend && fAdvancedBlendsRequireBarrier) { + // A draw should only read from the dst OR use hardware for advanced blend modes. + SkASSERT(!(dstUsage & DstUsage::kDstReadRequired)); + barrierBeforeDraws = BarrierType::kAdvancedNoncoherentBlend; + } + + fPendingDraws->recordDraw(renderer, localToDevice, geometry, clip, ordering, paintID, dstUsage, + barrierBeforeDraws, gatherer, stroke); } bool DrawContext::recordUpload(Recorder* recorder, @@ -217,7 +225,7 @@ fComputePathAtlas->reset(); } // else platform doesn't support compute or atlas was never initialized. - if (fPendingDraws->renderStepCount() == 0 && fPendingLoadOp != LoadOp::kClear) { + if (!fPendingDraws->modifiesTarget()) { // Nothing will be rasterized to the target that warrants a RenderPassTask, but we preserve // any added uploads or compute tasks since those could also affect the target w/o // rasterizing anything directly. @@ -239,18 +247,11 @@ // TODO: At this point, there's only ever one DrawPass in a RenderPassTask to a target. When // subpasses are implemented, they will either be collected alongside fPendingDraws or added // to the RenderPassTask separately. - std::unique_ptr<DrawPass> pass = DrawPass::Make(recorder, - std::move(fPendingDraws), - fTarget, - this->imageInfo(), - std::make_pair(fPendingLoadOp, fPendingStoreOp), - fPendingClearColor, - drawPassDstReadStrategy); - fPendingDraws = std::make_unique<DrawList>(); - // Now that there is content drawn to the target, that content must be loaded on any subsequent - // render pass. - fPendingLoadOp = LoadOp::kLoad; - fPendingStoreOp = StoreOp::kStore; + std::unique_ptr<DrawPass> pass = fPendingDraws->snapDrawPass(recorder, + fTarget, + this->imageInfo(), + drawPassDstReadStrategy); + SkASSERT(!fPendingDraws->modifiesTarget()); // Should be drained into `pass`. if (pass) { SkASSERT(fTarget.get() == pass->target()); @@ -308,11 +309,7 @@ // DrawContexts now implicitly depend on. } -sk_sp<Task> DrawContext::snapDrawTask(Recorder* recorder) { - // If flush() was explicitly called earlier and no new work was recorded, this call to flush() - // is a no-op and shouldn't hurt performance. - this->flush(recorder); - +sk_sp<Task> DrawContext::snapDrawTask() { if (!fCurrentDrawTask->hasTasks()) { return nullptr; }
diff --git a/src/gpu/graphite/DrawContext.h b/src/gpu/graphite/DrawContext.h index 9b68fd5..75dd1ed 100644 --- a/src/gpu/graphite/DrawContext.h +++ b/src/gpu/graphite/DrawContext.h
@@ -13,6 +13,7 @@ #include "include/core/SkRefCnt.h" #include "include/core/SkSurfaceProps.h" #include "src/gpu/graphite/DrawList.h" +#include "src/gpu/graphite/PaintParams.h" #include "src/gpu/graphite/ResourceTypes.h" #include "src/gpu/graphite/TextureProxy.h" #include "src/gpu/graphite/TextureProxyView.h" @@ -57,11 +58,11 @@ ~DrawContext() override; - const SkImageInfo& imageInfo() const { return fImageInfo; } - const SkColorInfo& colorInfo() const { return fImageInfo.colorInfo(); } - TextureProxy* target() { return fTarget.get(); } - const TextureProxy* target() const { return fTarget.get(); } - sk_sp<TextureProxy> refTarget() const { return fTarget; } + const SkImageInfo& imageInfo() const { return fImageInfo; } + const SkColorInfo& colorInfo() const { return fImageInfo.colorInfo(); } + TextureProxy* target() { return fTarget.get(); } + const TextureProxy* target() const { return fTarget.get(); } + sk_sp<TextureProxy> refTarget() const { return fTarget; } // May be null if the target is not texturable. const TextureProxyView& readSurfaceView() const { return fReadView; } @@ -78,10 +79,10 @@ const Geometry& geometry, const Clip& clip, DrawOrder ordering, - const PaintParams* paint, - const StrokeStyle* stroke, - bool dependsOnDst, - bool dstReadReq); + UniquePaintParamsID paintID, + SkEnumBitMask<DstUsage> dstUsage, + PipelineDataGatherer* gatherer, + const StrokeStyle* stroke); bool recordUpload(Recorder* recorder, sk_sp<TextureProxy> targetProxy, @@ -104,9 +105,10 @@ // dependent tasks into the DrawTask currently being built. void flush(Recorder*); - // Flushes (if needed) and completes the current DrawTask, returning it to the caller. - // Subsequent recorded operations will be added to a new DrawTask. - sk_sp<Task> snapDrawTask(Recorder*); + // Returns the current DrawTask to the caller, so all pending draws and uploads (if flush() + // was not immediately called prior to this) and subsequently recorded draws and uploads will + // go into a new DrawTask. + sk_sp<Task> snapDrawTask(); // Returns the dst read strategy to use when/if a paint requires a dst read DstReadStrategy dstReadStrategy() const { return fDstReadStrategy; } @@ -114,6 +116,8 @@ private: DrawContext(const Caps*, sk_sp<TextureProxy>, const SkImageInfo&, const SkSurfaceProps&); + void resetForClearOrDiscard(); + sk_sp<TextureProxy> fTarget; TextureProxyView fReadView; SkImageInfo fImageInfo; @@ -122,6 +126,8 @@ // Does *not* reflect whether a dst read is needed by the DrawLists - simply specifies the // strategies to use should any encountered paint require it. const DstReadStrategy fDstReadStrategy; + const bool fSupportsHardwareAdvancedBlend; + const bool fAdvancedBlendsRequireBarrier; // The in-progress DrawTask that will be snapped and returned when some external requirement // must depend on the contents of this DrawContext's target. As higher-level Skia operations @@ -135,10 +141,6 @@ // flushing. std::unique_ptr<DrawList> fPendingDraws; std::unique_ptr<UploadList> fPendingUploads; - // Load and store information for the current pending draws. - LoadOp fPendingLoadOp = LoadOp::kLoad; - StoreOp fPendingStoreOp = StoreOp::kStore; - std::array<float, 4> fPendingClearColor = { 0, 0, 0, 0 }; // Accumulates atlas coverage masks generated by compute dispatches that are required by one or // more entries in `fPendingDraws`. When pending draws are snapped into a new DrawPass, a
diff --git a/src/gpu/graphite/DrawList.cpp b/src/gpu/graphite/DrawList.cpp index 1d24a9b..365e747 100644 --- a/src/gpu/graphite/DrawList.cpp +++ b/src/gpu/graphite/DrawList.cpp
@@ -7,11 +7,143 @@ #include "src/gpu/graphite/DrawList.h" #include "include/core/SkTypes.h" +#include "include/gpu/graphite/Recorder.h" +#include "src/core/SkTraceEvent.h" +#include "src/gpu/graphite/DrawPass.h" +#include "src/gpu/graphite/DrawWriter.h" +#include "src/gpu/graphite/KeyContext.h" +#include "src/gpu/graphite/RecorderPriv.h" #include "src/gpu/graphite/Renderer.h" #include "src/gpu/graphite/geom/Geometry.h" namespace skgpu::graphite { +namespace { + +// Writes uniform data either to uniform buffers or to shared storage buffers, and tracks when +// bindings need to change between draws. +class UniformTracker { +public: + UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {} + + bool writeUniforms(UniformDataCache& uniformCache, + DrawBufferManager* bufferMgr, + UniformDataCache::Index index) { + if (index >= UniformDataCache::kInvalidIndex) { + return false; + } + + if (index == fLastIndex) { + return false; + } + fLastIndex = index; + + UniformDataCache::Entry& uniformData = uniformCache.lookup(index); + const size_t uniformDataSize = uniformData.fCpuData.size(); + + // Upload the uniform data if we haven't already. + // Alternatively, re-upload the uniform data to avoid a rebind if we're using storage + // buffers. This will result in more data uploaded, but the tradeoff seems worthwhile. + if (!uniformData.fBufferBinding.fBuffer || + (fUseStorageBuffers && uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer)) { + UniformWriter writer; + std::tie(writer, uniformData.fBufferBinding) = + fUseStorageBuffers ? bufferMgr->getAlignedSsboWriter(1, uniformDataSize) + : bufferMgr->getUniformWriter(1, uniformDataSize); + + // Early out if buffer mapping failed. + if (!writer) { + return {}; + } + + writer.write(uniformData.fCpuData.data(), uniformDataSize); + + if (fUseStorageBuffers) { + // When using storage buffers, store the SSBO index in the binding's offset field + // and always use the entire buffer's size in the size field. + SkASSERT(uniformData.fBufferBinding.fOffset % uniformDataSize == 0); + uniformData.fBufferBinding.fOffset /= uniformDataSize; + uniformData.fBufferBinding.fSize = uniformData.fBufferBinding.fBuffer->size(); + } + } + + const bool needsRebind = + uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer || + (!fUseStorageBuffers && uniformData.fBufferBinding.fOffset != fLastBinding.fOffset); + + fLastBinding = uniformData.fBufferBinding; + + return needsRebind; + } + + void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) { + BindBufferInfo binding = fLastBinding; + if (fUseStorageBuffers) { + // Track the SSBO index in fLastBinding, but set offset = 0 in the actual used binding. + binding.fOffset = 0; + } + commandList->bindUniformBuffer(binding, slot); + } + + uint32_t ssboIndex() const { + // The SSBO index for the last-bound storage buffer is stored in the binding's offset field. + return fLastBinding.fOffset; + } + +private: + // Internally track the last binding returned, so that we know whether new uploads or rebindings + // are necessary. If we're using SSBOs, this is treated specially -- the fOffset field holds the + // index in the storage buffer of the last-written uniforms, and the offsets used for actual + // bindings are always zero. + BindBufferInfo fLastBinding; + + // This keeps track of the last index used for writing uniforms from a provided uniform cache. + // If a provided index matches the last index, the uniforms are assumed to already be written + // and no additional uploading is performed. This assumes a UniformTracker will always be + // provided with the same uniform cache. + UniformDataCache::Index fLastIndex = UniformDataCache::kInvalidIndex; + + const bool fUseStorageBuffers; +}; + +// Tracks when to issue BindTexturesAndSamplers commands to a command list and converts +// TextureDataBlocks to that representation as needed. +class TextureTracker { +public: + TextureTracker(TextureDataCache* textureCache) + : fTextureCache(textureCache) {} + + bool setCurrentTextureBindings(TextureDataCache::Index bindingIndex) { + if (bindingIndex < TextureDataCache::kInvalidIndex && fLastIndex != bindingIndex) { + fLastIndex = bindingIndex; + return true; + } + // No binding change + return false; + } + + void bindTextures(DrawPassCommands::List* commandList) { + SkASSERT(fLastIndex < TextureDataCache::kInvalidIndex); + TextureDataBlock binding = fTextureCache->lookup(fLastIndex); + + auto [textures, samplers] = + commandList->bindDeferredTexturesAndSamplers(binding.numTextures()); + + for (int i = 0; i < binding.numTextures(); ++i) { + auto [t, s] = binding.texture(i); + textures[i] = t.get(); + samplers[i] = s; + } + } + +private: + TextureDataCache::Index fLastIndex = TextureDataCache::kInvalidIndex; + + TextureDataCache* const fTextureCache; +}; + +} // anonymous namespace + const Transform& DrawList::deduplicateTransform(const Transform& localToDevice) { // TODO: This is a pretty simple deduplication strategy and doesn't take advantage of the stack // knowledge that Device has. @@ -26,10 +158,11 @@ const Geometry& geometry, const Clip& clip, DrawOrder ordering, - const PaintParams* paint, - const StrokeStyle* stroke, - bool dependsOnDst, - bool dstReadReq) { + UniquePaintParamsID paintID, + SkEnumBitMask<DstUsage> dstUsage, + BarrierType barrierBeforeDraws, + PipelineDataGatherer* gatherer, + const StrokeStyle* stroke) { SkASSERT(localToDevice.valid()); SkASSERT(!geometry.isEmpty() && !clip.drawBounds().isEmptyNegativeOrNaN()); SkASSERT(!(renderer->depthStencilFlags() & DepthStencilFlags::kStencil) || @@ -37,14 +170,57 @@ // TODO: Add validation that the renderer's expected shape type and stroke params match provided - fDraws.emplace_back(renderer, this->deduplicateTransform(localToDevice), geometry, clip, - ordering, paint, stroke, dependsOnDst, dstReadReq); + // Create a sort key for every render step in this draw, extracting out any + // RenderStep-specific data. + UniformDataCache::Index shadingUniformIndex = UniformDataCache::kInvalidIndex; + if (paintID.isValid()) { + UniformDataBlock paintUniforms = gatherer->endPaintData(); + if (paintUniforms) { + shadingUniformIndex = fShadingUniformDataCache.insert(paintUniforms); + } + } - // Accumulate renderer information for each draw added to this list + const Draw& draw = fDraws.emplace_back(renderer, + this->deduplicateTransform(localToDevice), + geometry, + clip, + ordering, + barrierBeforeDraws, + stroke); + fRenderStepCount += renderer->numRenderSteps(); + + gatherer->setRenderStepManagerActive(); + for (int stepIndex = 0; stepIndex < draw.renderer()->numRenderSteps(); ++stepIndex) { + gatherer->rewindForRenderStep(); + + const RenderStep* const step = draw.renderer()->steps()[stepIndex]; + const bool performsShading = step->performsShading(); + + GraphicsPipelineCache::Index pipelineIndex = fPipelineCache.insert( + { step->renderStepID(), + performsShading ? paintID : UniquePaintParamsID::Invalid() }); + + step->writeUniformsAndTextures(draw.drawParams(), gatherer); + auto [stepUniforms, combinedTextures] = gatherer->endRenderStepData(performsShading); + + UniformDataCache::Index geomUniformIndex = stepUniforms ? + fGeometryUniformDataCache.insert(stepUniforms) : UniformDataCache::kInvalidIndex; + TextureDataCache::Index textureBindingIndex = combinedTextures ? + fTextureDataCache.insert(combinedTextures) : TextureDataCache::kInvalidIndex; + + fSortKeys.push_back({&draw, + stepIndex, + pipelineIndex, + geomUniformIndex, + performsShading ? shadingUniformIndex : UniformDataCache::kInvalidIndex, + textureBindingIndex}); + } + + fPassBounds.join(clip.drawBounds()); fRequiresMSAA |= renderer->requiresMSAA(); fDepthStencilFlags |= renderer->depthStencilFlags(); - if (paint && dstReadReq) { + if (dstUsage & DstUsage::kDstReadRequired) { // For paints that read from the dst, update the bounds. It may later be determined that the // DstReadStrategy does not require them, but they are inexpensive to track. fDstReadBounds.join(clip.drawBounds()); @@ -56,4 +232,200 @@ } #endif } + +std::unique_ptr<DrawPass> DrawList::snapDrawPass(Recorder* recorder, + sk_sp<TextureProxy> target, + const SkImageInfo& targetInfo, + const DstReadStrategy dstReadStrategy) { + // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to + // its size should be done with care and good reason. The performance of sorting the keys is + // heavily tied to the total size. + // + // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just + // 16 bytes. There are several ways this could be done if necessary: + // - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to + // the uniform data => 8 bytes of key, 8 bytes of pointer. + // - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the + // 4 low bits from the Draw* pointer since it's 16 byte aligned. + // - Compact the Draw* to an index into the original collection, although that has extra + // indirection and does not work as well with SkTBlockList. + // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower + // than an 8 byte key and unmodified pointer. + static_assert(sizeof(SortKey) == SkAlignTo(16 + sizeof(void*), alignof(SortKey))); + + // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so + // algorithms that approach O(n) in that condition may be favorable. Alternatively, could + // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than + // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place + // vs. algorithms that require an extra O(n) storage. + // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding + // bugs in the DrawOrder determination code? + std::sort(fSortKeys.begin(), fSortKeys.end()); + + TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", fDraws.count()); + + // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass + // is returned from Make(), it is considered immutable. + std::unique_ptr<DrawPass> drawPass(new DrawPass(target, {fLoadOp, StoreOp::kStore}, fClearColor, + recorder->priv().refFloatStorageManager())); + + DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager(); + DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr); + GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex; + const SkIRect targetBounds = SkIRect::MakeSize(targetInfo.dimensions()); + SkIRect lastScissor = targetBounds; + + SkASSERT(drawPass->fTarget->isFullyLazy() || + SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor)); + drawPass->fCommandList.setScissor(lastScissor); + + const Caps* caps = recorder->priv().caps(); + const bool useStorageBuffers = caps->storageBufferSupport(); + UniformTracker geometryUniformTracker(useStorageBuffers); + UniformTracker shadingUniformTracker(useStorageBuffers); + + // TODO(b/372953722): Remove this forced binding command behavior once dst copies are always + // bound separately from the rest of the textures. + const bool rebindTexturesOnPipelineChange = dstReadStrategy == DstReadStrategy::kTextureCopy; + // Keep track of the prior draw's PaintOrder. If the current draw requires barriers and there + // is no pipeline or state change, then we must compare the current and prior draw's PaintOrders + // to determine if the draws overlap. If they do, we must inject a flush between them such that + // the barrier addition and draw commands are ordered correctly. + CompressedPaintersOrder priorDrawPaintOrder {}; + +#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE) + // Accumulate rough pixel area touched by each pipeline as we iterate the SortKeys + drawPass->fPipelineDrawAreas.push_back_n(fPipelineCache.count(), 0.f); +#endif + + TextureTracker textureBindingTracker(&fTextureDataCache); + for (const DrawList::SortKey& key : fSortKeys) { + const DrawList::Draw& draw = key.draw(); + const RenderStep& renderStep = key.renderStep(); + + const bool pipelineChange = key.pipelineIndex() != lastPipeline; +#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE) + drawPass->fPipelineDrawAreas[key.pipelineIndex()] += + draw.drawParams().clip().drawBounds().area(); +#endif + + const bool geomBindingChange = geometryUniformTracker.writeUniforms( + fGeometryUniformDataCache, bufferMgr, key.geometryUniformIndex()); + const bool shadingBindingChange = shadingUniformTracker.writeUniforms( + fShadingUniformDataCache, bufferMgr, key.shadingUniformIndex()); + + // TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any + // dst copy to the texture bind group/descriptor set automatically when processing a + // BindTexturesAndSamplers call because they use a single group to contain all textures. + // However, from the DrawPass POV, we can run into the scenario where two pipelines have the + // same textures+samplers except one requires a dst-copy and the other does not. In this + // case we wouldn't necessarily insert a new command when the pipeline changed and then + // end up with layout validation errors. + const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings( + key.textureBindingIndex()) || + (rebindTexturesOnPipelineChange && pipelineChange && + key.textureBindingIndex() != TextureDataCache::kInvalidIndex); + + std::optional<SkIRect> newScissor = + renderStep.getScissor(draw.drawParams(), lastScissor, targetBounds); + + const bool stateChange = geomBindingChange || + shadingBindingChange || + textureBindingsChange || + newScissor.has_value(); + + // Update DrawWriter *before* we actually change any state so that accumulated draws from + // the previous state use the proper state. + if (pipelineChange) { + drawWriter.newPipelineState(renderStep.primitiveType(), + renderStep.staticDataStride(), + renderStep.appendDataStride(), + renderStep.getRenderStateFlags(), + draw.barrierBeforeDraws()); + } else if (stateChange) { + drawWriter.newDynamicState(); + } else if (draw.barrierBeforeDraws() != BarrierType::kNone && + priorDrawPaintOrder != draw.drawParams().order().paintOrder()) { + // Even if there is no pipeline or state change, we must consider whether a + // DrawPassCommand to add barriers must be inserted before any draw commands. If so, + // then determine if the current and prior draws overlap (ie, their PaintOrders are + // unequal). If so, perform a flush() to make sure the draw and add barrier commands are + // appended to the command list in the proper order. + drawWriter.flush(); + } + + // Make state changes before accumulating new draw data + if (pipelineChange) { + drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex()); + lastPipeline = key.pipelineIndex(); + } + if (stateChange) { + if (geomBindingChange) { + geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep, + &drawPass->fCommandList); + } + if (shadingBindingChange) { + shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList); + } + if (textureBindingsChange) { + textureBindingTracker.bindTextures(&drawPass->fCommandList); + } + if (newScissor.has_value()) { + drawPass->fCommandList.setScissor(*newScissor); + lastScissor = *newScissor; + } + } + + uint32_t geometrySsboIndex = useStorageBuffers ? geometryUniformTracker.ssboIndex() : 0; + uint32_t shadingSsboIndex = useStorageBuffers ? shadingUniformTracker.ssboIndex() : 0; + skvx::uint2 ssboIndices = {geometrySsboIndex, shadingSsboIndex}; + renderStep.writeVertices(&drawWriter, draw.drawParams(), ssboIndices); + + if (bufferMgr->hasMappingFailed()) { + SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!"); + return nullptr; + } + + // Update priorDrawPaintOrder value before iterating to analyze the next draw. + priorDrawPaintOrder = draw.drawParams().order().paintOrder(); + } + // Finish recording draw calls for any collected data still pending at end of the loop + drawWriter.flush(); + + drawPass->fBounds = fPassBounds.roundOut().asSkIRect(); + drawPass->fPipelineDescs = fPipelineCache.detach(); + drawPass->fSampledTextures = fTextureDataCache.detachTextures(); + + TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size()); + TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size()); + TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count()); + + this->reset(LoadOp::kLoad); + + return drawPass; +} + +void DrawList::reset(LoadOp loadOp, SkColor4f color) { + fLoadOp = loadOp; + fClearColor = color.premul().array(); + + fSortKeys.clear(); + fDraws.reset(); + fTransforms.reset(); + + // Accumulate renderer information for each draw added to this list + fRenderStepCount = 0; + fRequiresMSAA = false; + fDepthStencilFlags = DepthStencilFlags::kNone; + SkDEBUGCODE(fCoverageMaskShapeDrawCount = 0); + + fDstReadBounds = Rect::InfiniteInverted(); + fPassBounds = Rect::InfiniteInverted(); + + fGeometryUniformDataCache.reset(); + fShadingUniformDataCache.reset(); + fTextureDataCache.reset(); + fPipelineCache.reset(); +} + } // namespace skgpu::graphite
diff --git a/src/gpu/graphite/DrawList.h b/src/gpu/graphite/DrawList.h index c704bc7..e5247df 100644 --- a/src/gpu/graphite/DrawList.h +++ b/src/gpu/graphite/DrawList.h
@@ -13,9 +13,12 @@ #include "src/base/SkBlockAllocator.h" #include "src/base/SkEnumBitMask.h" #include "src/base/SkTBlockList.h" +#include "src/gpu/graphite/ContextUtils.h" +#include "src/gpu/graphite/DrawCommands.h" #include "src/gpu/graphite/DrawOrder.h" #include "src/gpu/graphite/DrawParams.h" #include "src/gpu/graphite/PaintParams.h" +#include "src/gpu/graphite/PipelineData.h" #include "src/gpu/graphite/geom/Rect.h" #include "src/gpu/graphite/geom/Transform.h" @@ -25,6 +28,7 @@ namespace skgpu::graphite { +class DrawPass; class Geometry; class Renderer; @@ -84,21 +88,35 @@ const Geometry& geometry, const Clip& clip, DrawOrder ordering, - const PaintParams* paint, - const StrokeStyle* stroke, - bool dependsOnDst, - bool dstReadReq); + UniquePaintParamsID paintID, + SkEnumBitMask<DstUsage> dstUsage, + BarrierType barrierBeforeDraws, + PipelineDataGatherer* gatherer, + const StrokeStyle* stroke); + + std::unique_ptr<DrawPass> snapDrawPass(Recorder* recorder, + sk_sp<TextureProxy> target, + const SkImageInfo& targetInfo, + const DstReadStrategy dstReadStrategy); int renderStepCount() const { return fRenderStepCount; } + bool modifiesTarget() const { + return this->renderStepCount() > 0 || fLoadOp == LoadOp::kClear; + } + + // Discard all previously recorded draws and set to the requested load op (with optional clear + // color). + void reset(LoadOp op, SkColor4f clearColor = {0.f, 0.f, 0.f, 0.f}); + // Bounds for a dst read required by this DrawList. These bounds are only valid if drawsReadDst // returns true. const Rect& dstReadBounds() const { return fDstReadBounds; } + const Rect& passBounds() const { return fPassBounds; } bool drawsReadDst() const { return !fDstReadBounds.isEmptyNegativeOrNaN(); } bool drawsRequireMSAA() const { return fRequiresMSAA; } SkEnumBitMask<DepthStencilFlags> depthStencilFlags() const { return fDepthStencilFlags; } - SkDEBUGCODE(bool hasCoverageMaskDraws() const { return fCoverageMaskShapeDrawCount > 0; }) private: @@ -107,27 +125,133 @@ struct Draw { public: Draw(const Renderer* renderer, const Transform& transform, const Geometry& geometry, - const Clip& clip, DrawOrder order, const PaintParams* paint, const StrokeStyle* stroke, - bool dependsOnDst, bool dstReadReq) + const Clip& clip, DrawOrder order, BarrierType barrierBeforeDraws, + const StrokeStyle* stroke) : fRenderer(renderer) , fDrawParams(transform, geometry, clip, order, stroke) - , fPaintParams(paint ? std::optional<PaintParams>(*paint) : std::nullopt) - , fDependsOnDst(dependsOnDst) - , fDstReadReq(dstReadReq) {} - const Renderer* renderer() const { return fRenderer; } - const DrawParams& drawParams() const { return fDrawParams; } - const std::optional<PaintParams>& paintParams() const { return fPaintParams; } - bool dependsOnDst() const { return fDependsOnDst; } - bool dstReadReq() const { return fDstReadReq; } + , fBarrierBeforeDraws(barrierBeforeDraws) {} + + const Renderer* renderer() const { return fRenderer; } + const DrawParams& drawParams() const { return fDrawParams; } + const BarrierType& barrierBeforeDraws() const { return fBarrierBeforeDraws; } private: const Renderer* fRenderer; // Owned by SharedContext of Recorder that recorded the draw DrawParams fDrawParams; // The DrawParam's transform is owned by fTransforms of the DrawList - std::optional<PaintParams> fPaintParams; - bool fDependsOnDst; - bool fDstReadReq; + BarrierType fBarrierBeforeDraws; }; + template <uint64_t Bits, uint64_t Offset> + struct Bitfield { + static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1; + static constexpr uint64_t kOffset = Offset; + static constexpr uint64_t kBits = Bits; + + static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); } + static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; } + }; + + /** + * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's + * Renderer), which can be sorted independently. Each (step, draw) pair produces its own + * SortKey. + * + * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic + * binds within a pipeline, while still respecting the overall painter's order. This decreases + * the number of low-level draw commands in a command buffer and increases the size of those, + * allowing the GPU to operate more efficiently and have fewer bubbles within its own + * instruction stream. + * + * The Draw's CompresssedPaintersOrder and DisjointStencilIndex represent the most significant + * bits of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline + * description is encoded in two steps: + * 1. The index of the RenderStep packed in the high bits to ensure each step for a draw is + * ordered correctly. + * 2. An index into a cache of pipeline descriptions is used to encode the identity of the + * pipeline (SortKeys that differ in the bits from #1 necessarily would have different + * descriptions, but then the specific ordering of the RenderSteps isn't enforced). Last, + * the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass. + * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do + * not require rebinding uniform data or other state (e.g. scissor). Since the uniform data + * index and the pipeline description index are packed into indices and not actual pointers, + * a given SortKey is only valid for the a specific DrawList->DrawPass conversion. + */ + class SortKey { + public: + SortKey(const DrawList::Draw* draw, + int renderStep, + GraphicsPipelineCache::Index pipelineIndex, + UniformDataCache::Index geomUniformIndex, + UniformDataCache::Index shadingUniformIndex, + TextureDataCache::Index textureBindingIndex) + : fPipelineKey( + ColorDepthOrderField::set(draw->drawParams().order().paintOrder().bits()) + | StencilIndexField::set(draw->drawParams().order().stencilIndex().bits()) + | RenderStepField::set(static_cast<uint32_t>(renderStep)) + | PipelineField::set(pipelineIndex)) + , fUniformKey(GeometryUniformField::set(geomUniformIndex) | + ShadingUniformField::set(shadingUniformIndex) | + TextureBindingsField::set(textureBindingIndex)) + , fDraw(draw) { + SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex); + SkASSERT(renderStep <= draw->renderer()->numRenderSteps()); + } + + bool operator<(const SortKey& k) const { + return fPipelineKey < k.fPipelineKey || + (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey); + } + + const RenderStep& renderStep() const { + return fDraw->renderer()->step(RenderStepField::get(fPipelineKey)); + } + + const DrawList::Draw& draw() const { return *fDraw; } + + GraphicsPipelineCache::Index pipelineIndex() const { + return PipelineField::get(fPipelineKey); + } + UniformDataCache::Index geometryUniformIndex() const { + return GeometryUniformField::get(fUniformKey); + } + UniformDataCache::Index shadingUniformIndex() const { + return ShadingUniformField::get(fUniformKey); + } + TextureDataCache::Index textureBindingIndex() const { + return TextureBindingsField::get(fUniformKey); + } + + private: + // Fields are ordered from most-significant to least when sorting by 128-bit value. + // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we + // need to sort consistently. + using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder) + using StencilIndexField = Bitfield<16, 32>; // sizeof(DisjointStencilIndex) + using RenderStepField = Bitfield<2, 30>; // bits >= log2(Renderer::kMaxRenderSteps) + using PipelineField = Bitfield<30, 0>; // bits >= log2(max total steps in draw list) + uint64_t fPipelineKey; + + // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are + // greater than or equal to 2^(bits-1) represent "no-data", while values between + // [0, 2^(bits-1)-1] can access data arrays without extra logic. + using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps) + using ShadingUniformField = Bitfield<17, 30>; // bits >= 1+log2(max total steps) + using TextureBindingsField = Bitfield<30, 0>; // bits >= 1+log2(max total steps) + uint64_t fUniformKey; + + // Backpointer to the draw that produced the sort key + const DrawList::Draw* fDraw; + + static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder)); + static_assert(StencilIndexField::kBits >= sizeof(DisjointStencilIndex)); + static_assert(RenderStepField::kBits >= SkNextLog2_portable(Renderer::kMaxRenderSteps)); + static_assert(PipelineField::kBits >= SkNextLog2_portable(DrawList::kMaxRenderSteps)); + static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps)); + static_assert(ShadingUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps)); + static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps)); + }; + + // The returned Transform reference remains valid for the lifetime of the DrawList. const Transform& deduplicateTransform(const Transform&); @@ -145,9 +269,20 @@ // Tracked for all paints that read from the dst. If it is later determined that the // DstReadStrategy is not kTextureCopy, this value can simply be ignored. Rect fDstReadBounds = Rect::InfiniteInverted(); + Rect fPassBounds = Rect::InfiniteInverted(); // Other properties of draws contained within this DrawList bool fRequiresMSAA = false; SkEnumBitMask<DepthStencilFlags> fDepthStencilFlags = DepthStencilFlags::kNone; + + std::vector<SortKey> fSortKeys; + + UniformDataCache fGeometryUniformDataCache; + UniformDataCache fShadingUniformDataCache; + TextureDataCache fTextureDataCache; + GraphicsPipelineCache fPipelineCache; + + LoadOp fLoadOp = LoadOp::kLoad; + std::array<float, 4> fClearColor = {0.f, 0.f, 0.f, 0.f}; }; } // namespace skgpu::graphite
diff --git a/src/gpu/graphite/DrawPass.cpp b/src/gpu/graphite/DrawPass.cpp index 403a5d4..254cfb6 100644 --- a/src/gpu/graphite/DrawPass.cpp +++ b/src/gpu/graphite/DrawPass.cpp
@@ -6,325 +6,27 @@ */ #include "src/gpu/graphite/DrawPass.h" -#include "include/core/SkBlendMode.h" -#include "include/core/SkImageInfo.h" -#include "include/gpu/graphite/Recorder.h" -#include "include/gpu/graphite/TextureInfo.h" -#include "include/private/base/SkAlign.h" -#include "include/private/base/SkAssert.h" -#include "include/private/base/SkSpan_impl.h" -#include "src/base/SkMathPriv.h" -#include "src/base/SkTBlockList.h" -#include "src/base/SkVx.h" #include "src/core/SkTraceEvent.h" -#include "src/gpu/BufferWriter.h" -#include "src/gpu/graphite/Buffer.h" -#include "src/gpu/graphite/BufferManager.h" -#include "src/gpu/graphite/Caps.h" -#include "src/gpu/graphite/CommandBuffer.h" -#include "src/gpu/graphite/ContextUtils.h" -#include "src/gpu/graphite/DrawList.h" -#include "src/gpu/graphite/DrawOrder.h" -#include "src/gpu/graphite/DrawParams.h" -#include "src/gpu/graphite/DrawTypes.h" -#include "src/gpu/graphite/DrawWriter.h" -#include "src/gpu/graphite/GraphicsPipeline.h" -#include "src/gpu/graphite/GraphicsPipelineDesc.h" -#include "src/gpu/graphite/KeyContext.h" #include "src/gpu/graphite/Log.h" #include "src/gpu/graphite/PaintParams.h" #include "src/gpu/graphite/PaintParamsKey.h" #include "src/gpu/graphite/PipelineCreationTask.h" #include "src/gpu/graphite/PipelineData.h" -#include "src/gpu/graphite/RecorderPriv.h" -#include "src/gpu/graphite/Renderer.h" -#include "src/gpu/graphite/Resource.h" #include "src/gpu/graphite/Resource.h" // IWYU pragma: keep #include "src/gpu/graphite/ResourceProvider.h" #include "src/gpu/graphite/ResourceTypes.h" #include "src/gpu/graphite/RuntimeEffectDictionary.h" #include "src/gpu/graphite/Texture.h" // IWYU pragma: keep #include "src/gpu/graphite/TextureProxy.h" -#include "src/gpu/graphite/UniquePaintParamsID.h" -#include "src/gpu/graphite/geom/Rect.h" -#include "src/gpu/graphite/geom/Transform.h" #if defined(SK_TRACE_GRAPHITE_PIPELINE_USE) #include "src/gpu/graphite/RenderPassDesc.h" #endif -#include <algorithm> -#include <cstdint> -#include <optional> -#include <tuple> -#include <vector> - using namespace skia_private; namespace skgpu::graphite { -class ShaderCodeDictionary; - -namespace { - -// Helper to manage packed fields within a uint64_t -template <uint64_t Bits, uint64_t Offset> -struct Bitfield { - static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1; - static constexpr uint64_t kOffset = Offset; - static constexpr uint64_t kBits = Bits; - - static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); } - static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; } -}; - -// NOTE: TextureBinding's use as a key type in DenseBiMap relies on the fact that the underlying -// data has been de-duplicated by a PipelineDataCache earlier, so that the bit identity of the data -// blocks (e.g. address+size) is equivalent to the content equality of the texture lists. - -// Tracks the combination of textures from the paint and from the RenderStep to describe the full -// binding that needs to be in the command list. -struct TextureBinding { - TextureDataBlock fPaintTextures; - TextureDataBlock fStepTextures; - - bool operator==(const TextureBinding& other) const { - return fPaintTextures == other.fPaintTextures && - fStepTextures == other.fStepTextures; - } - bool operator!=(const TextureBinding& other) const { return !(*this == other); } - - int numTextures() const { - return (fPaintTextures ? fPaintTextures.numTextures() : 0) + - (fStepTextures ? fStepTextures.numTextures() : 0); - } -}; - -// Writes uniform data either to uniform buffers or to shared storage buffers, and tracks when -// bindings need to change between draws. -class UniformTracker { -public: - UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {} - - bool writeUniforms(UniformDataCache& uniformCache, - DrawBufferManager* bufferMgr, - UniformDataCache::Index index) { - if (index >= UniformDataCache::kInvalidIndex) { - return false; - } - - if (index == fLastIndex) { - return false; - } - fLastIndex = index; - - UniformDataCache::Entry& uniformData = uniformCache.lookup(index); - const size_t uniformDataSize = uniformData.fCpuData.size(); - - // Upload the uniform data if we haven't already. - // Alternatively, re-upload the uniform data to avoid a rebind if we're using storage - // buffers. This will result in more data uploaded, but the tradeoff seems worthwhile. - if (!uniformData.fBufferBinding.fBuffer || - (fUseStorageBuffers && uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer)) { - UniformWriter writer; - std::tie(writer, uniformData.fBufferBinding) = - fUseStorageBuffers ? bufferMgr->getAlignedSsboWriter(1, uniformDataSize) - : bufferMgr->getUniformWriter(1, uniformDataSize); - - // Early out if buffer mapping failed. - if (!writer) { - return {}; - } - - writer.write(uniformData.fCpuData.data(), uniformDataSize); - - if (fUseStorageBuffers) { - // When using storage buffers, store the SSBO index in the binding's offset field - // and always use the entire buffer's size in the size field. - SkASSERT(uniformData.fBufferBinding.fOffset % uniformDataSize == 0); - uniformData.fBufferBinding.fOffset /= uniformDataSize; - uniformData.fBufferBinding.fSize = uniformData.fBufferBinding.fBuffer->size(); - } - } - - const bool needsRebind = - uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer || - (!fUseStorageBuffers && uniformData.fBufferBinding.fOffset != fLastBinding.fOffset); - - fLastBinding = uniformData.fBufferBinding; - - return needsRebind; - } - - void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) { - BindBufferInfo binding = fLastBinding; - if (fUseStorageBuffers) { - // Track the SSBO index in fLastBinding, but set offset = 0 in the actual used binding. - binding.fOffset = 0; - } - commandList->bindUniformBuffer(binding, slot); - } - - uint32_t ssboIndex() const { - // The SSBO index for the last-bound storage buffer is stored in the binding's offset field. - return fLastBinding.fOffset; - } - -private: - // Internally track the last binding returned, so that we know whether new uploads or rebindings - // are necessary. If we're using SSBOs, this is treated specially -- the fOffset field holds the - // index in the storage buffer of the last-written uniforms, and the offsets used for actual - // bindings are always zero. - BindBufferInfo fLastBinding; - - // This keeps track of the last index used for writing uniforms from a provided uniform cache. - // If a provided index matches the last index, the uniforms are assumed to already be written - // and no additional uploading is performed. This assumes a UniformTracker will always be - // provided with the same uniform cache. - UniformDataCache::Index fLastIndex = UniformDataCache::kInvalidIndex; - - const bool fUseStorageBuffers; -}; - -// Tracks when to issue BindTexturesAndSamplers commands to a command list and converts -// TextureDataBlocks to that representation as needed. -class TextureTracker { -public: - TextureTracker(TextureDataCache* textureCache) - : fTextureCache(textureCache) {} - - bool setCurrentTextureBindings(TextureDataCache::Index bindingIndex) { - if (bindingIndex < TextureDataCache::kInvalidIndex && fLastIndex != bindingIndex) { - fLastIndex = bindingIndex; - return true; - } - // No binding change - return false; - } - - void bindTextures(DrawPassCommands::List* commandList) { - SkASSERT(fLastIndex < TextureDataCache::kInvalidIndex); - TextureDataBlock binding = fTextureCache->lookup(fLastIndex); - - auto [textures, samplers] = - commandList->bindDeferredTexturesAndSamplers(binding.numTextures()); - - for (int i = 0; i < binding.numTextures(); ++i) { - auto [t, s] = binding.texture(i); - textures[i] = t.get(); - samplers[i] = s; - } - } - -private: - TextureDataCache::Index fLastIndex = TextureDataCache::kInvalidIndex; - - TextureDataCache* const fTextureCache; -}; - -} // namespace - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -/** - * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's - * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey. - * - * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds - * within a pipeline, while still respecting the overall painter's order. This decreases the number - * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU - * to operate more efficiently and have fewer bubbles within its own instruction stream. - * - * The Draw's CompresssedPaintersOrder and DisjointStencilIndex represent the most significant bits - * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline - * description is encoded in two steps: - * 1. The index of the RenderStep packed in the high bits to ensure each step for a draw is - * ordered correctly. - * 2. An index into a cache of pipeline descriptions is used to encode the identity of the - * pipeline (SortKeys that differ in the bits from #1 necessarily would have different - * descriptions, but then the specific ordering of the RenderSteps isn't enforced). - * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass. - * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not - * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and - * the pipeline description index are packed into indices and not actual pointers, a given SortKey - * is only valid for the a specific DrawList->DrawPass conversion. - */ -class DrawPass::SortKey { -public: - SortKey(const DrawList::Draw* draw, - int renderStep, - GraphicsPipelineCache::Index pipelineIndex, - UniformDataCache::Index geomUniformIndex, - UniformDataCache::Index shadingUniformIndex, - TextureDataCache::Index textureBindingIndex) - : fPipelineKey(ColorDepthOrderField::set(draw->drawParams().order().paintOrder().bits()) | - StencilIndexField::set(draw->drawParams().order().stencilIndex().bits()) | - RenderStepField::set(static_cast<uint32_t>(renderStep)) | - PipelineField::set(pipelineIndex)) - , fUniformKey(GeometryUniformField::set(geomUniformIndex) | - ShadingUniformField::set(shadingUniformIndex) | - TextureBindingsField::set(textureBindingIndex)) - , fDraw(draw) { - SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex); - SkASSERT(renderStep <= draw->renderer()->numRenderSteps()); - } - - bool operator<(const SortKey& k) const { - return fPipelineKey < k.fPipelineKey || - (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey); - } - - const RenderStep& renderStep() const { - return fDraw->renderer()->step(RenderStepField::get(fPipelineKey)); - } - - const DrawList::Draw& draw() const { return *fDraw; } - - GraphicsPipelineCache::Index pipelineIndex() const { - return PipelineField::get(fPipelineKey); - } - UniformDataCache::Index geometryUniformIndex() const { - return GeometryUniformField::get(fUniformKey); - } - UniformDataCache::Index shadingUniformIndex() const { - return ShadingUniformField::get(fUniformKey); - } - TextureDataCache::Index textureBindingIndex() const { - return TextureBindingsField::get(fUniformKey); - } - -private: - // Fields are ordered from most-significant to least when sorting by 128-bit value. - // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we - // need to sort consistently. - using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder) - using StencilIndexField = Bitfield<16, 32>; // sizeof(DisjointStencilIndex) - using RenderStepField = Bitfield<2, 30>; // bits >= log2(Renderer::kMaxRenderSteps) - using PipelineField = Bitfield<30, 0>; // bits >= log2(max total steps in draw list) - uint64_t fPipelineKey; - - // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are - // greater than or equal to 2^(bits-1) represent "no-data", while values between - // [0, 2^(bits-1)-1] can access data arrays without extra logic. - using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps) - using ShadingUniformField = Bitfield<17, 30>; // bits >= 1+log2(max total steps) - using TextureBindingsField = Bitfield<30, 0>; // bits >= 1+log2(max total steps) - uint64_t fUniformKey; - - // Backpointer to the draw that produced the sort key - const DrawList::Draw* fDraw; - - static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder)); - static_assert(StencilIndexField::kBits >= sizeof(DisjointStencilIndex)); - static_assert(RenderStepField::kBits >= SkNextLog2_portable(Renderer::kMaxRenderSteps)); - static_assert(PipelineField::kBits >= SkNextLog2_portable(DrawList::kMaxRenderSteps)); - static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps)); - static_assert(ShadingUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps)); - static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps)); -}; - -/////////////////////////////////////////////////////////////////////////////////////////////////// - DrawPass::DrawPass(sk_sp<TextureProxy> target, std::pair<LoadOp, StoreOp> ops, std::array<float, 4> clearColor, @@ -337,306 +39,6 @@ DrawPass::~DrawPass() = default; -namespace { -bool paint_uses_advanced_blend_equation(std::optional<PaintParams> drawPaintParams) { - if (!drawPaintParams.has_value() || !drawPaintParams.value().asFinalBlendMode().has_value()) { - return false; - } - - return (int)drawPaintParams.value().asFinalBlendMode().value() > - (int)SkBlendMode::kLastCoeffMode; -} -} // anonymous - -std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder, - std::unique_ptr<DrawList> draws, - sk_sp<TextureProxy> target, - const SkImageInfo& targetInfo, - std::pair<LoadOp, StoreOp> ops, - std::array<float, 4> clearColor, - const DstReadStrategy dstReadStrategy) { - // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to - // its size should be done with care and good reason. The performance of sorting the keys is - // heavily tied to the total size. - // - // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just - // 16 bytes. There are several ways this could be done if necessary: - // - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to - // the uniform data => 8 bytes of key, 8 bytes of pointer. - // - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the - // 4 low bits from the Draw* pointer since it's 16 byte aligned. - // - Compact the Draw* to an index into the original collection, although that has extra - // indirection and does not work as well with SkTBlockList. - // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower - // than an 8 byte key and unmodified pointer. - static_assert(sizeof(DrawPass::SortKey) == - SkAlignTo(16 + sizeof(void*), alignof(DrawPass::SortKey))); - - TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", draws->fDraws.count()); - - // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass - // is returned from Make(), it is considered immutable. - std::unique_ptr<DrawPass> drawPass(new DrawPass(target, ops, clearColor, - recorder->priv().refFloatStorageManager())); - - Rect passBounds = Rect::InfiniteInverted(); - - UniformDataCache geometryUniformDataCache; - UniformDataCache shadingUniformDataCache; - TextureDataCache textureDataCache; - DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager(); - if (bufferMgr->hasMappingFailed()) { - SKGPU_LOG_W("Buffer mapping has already failed; dropping draw pass!"); - return nullptr; - } - - GraphicsPipelineCache pipelineCache; - - // Geometry uniforms are currently always UBO-backed. - const Caps* caps = recorder->priv().caps(); - const bool useStorageBuffers = caps->storageBufferSupport(); - const ResourceBindingRequirements& bindingReqs = caps->resourceBindingRequirements(); - Layout uniformLayout = - useStorageBuffers ? bindingReqs.fStorageBufferLayout : bindingReqs.fUniformBufferLayout; - - ShaderCodeDictionary* dict = recorder->priv().shaderCodeDictionary(); - PaintParamsKeyBuilder builder(dict); - - // The initial layout we pass here is not important as it will be re-assigned when writing - // shading and geometry uniforms below. - PipelineDataGatherer gatherer(uniformLayout); - std::vector<SortKey> keys; - keys.reserve(draws->renderStepCount()); - - for (const DrawList::Draw& draw : draws->fDraws.items()) { - gatherer.resetForDraw(); - - UniquePaintParamsID shaderID = UniquePaintParamsID::Invalid(); - UniformDataCache::Index shadingUniformIndex = UniformDataCache::kInvalidIndex; - - if (draw.paintParams().has_value()) { - SkDEBUGCODE(builder.checkReset()); - SkDEBUGCODE(gatherer.checkReset()); - - auto& geometry = draw.drawParams().geometry(); - KeyContext keyContext(recorder, - drawPass->floatStorageManager(), - &builder, - &gatherer, - draw.drawParams().transform(), - targetInfo.colorInfo(), - geometry.isShape() || geometry.isEdgeAAQuad() - ? KeyGenFlags::kDefault - : KeyGenFlags::kDisableSamplingOptimization, - draw.paintParams().value().color()); -#if defined(SK_DEBUG) - auto result = draw.paintParams().value().toKey(keyContext); - auto [dependsOnDst, dstReadReq, usesAdvancedBlend] = *result; -#else - draw.paintParams().value().toKey(keyContext); -#endif - SkASSERT(dependsOnDst == draw.dependsOnDst()); - SkASSERT(dstReadReq == draw.dstReadReq()); - SkASSERT(usesAdvancedBlend == paint_uses_advanced_blend_equation(draw.paintParams())); - - shaderID = recorder->priv().shaderCodeDictionary()->findOrCreate(&builder); - if (shaderID.isValid()) { - UniformDataBlock paintUniforms = gatherer.endPaintData(); - if (paintUniforms) { - shadingUniformIndex = shadingUniformDataCache.insert(paintUniforms); - } - } - } // else depth-only, no paint data - - // Create a sort key for every render step in this draw, extracting out any - // RenderStep-specific data. - gatherer.setRenderStepManagerActive(); - for (int stepIndex = 0; stepIndex < draw.renderer()->numRenderSteps(); ++stepIndex) { - gatherer.rewindForRenderStep(); - - const RenderStep* const step = draw.renderer()->steps()[stepIndex]; - const bool performsShading = draw.paintParams().has_value() && step->performsShading(); - - GraphicsPipelineCache::Index pipelineIndex = pipelineCache.insert( - { step->renderStepID(), - performsShading ? shaderID : UniquePaintParamsID::Invalid() }); - - step->writeUniformsAndTextures(draw.drawParams(), &gatherer); - auto [stepUniforms, combinedTextures] = gatherer.endRenderStepData(performsShading); - - UniformDataCache::Index geomUniformIndex = stepUniforms ? - geometryUniformDataCache.insert(stepUniforms) : UniformDataCache::kInvalidIndex; - TextureDataCache::Index textureBindingIndex = combinedTextures ? - textureDataCache.insert(combinedTextures) : TextureDataCache::kInvalidIndex; - - keys.push_back({&draw, stepIndex, pipelineIndex, - geomUniformIndex, shadingUniformIndex, textureBindingIndex}); - } - - passBounds.join(draw.drawParams().clip().drawBounds()); - } - - // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so - // algorithms that approach O(n) in that condition may be favorable. Alternatively, could - // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than - // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place - // vs. algorithms that require an extra O(n) storage. - // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding - // bugs in the DrawOrder determination code? - std::sort(keys.begin(), keys.end()); - DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr); - GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex; - const SkIRect targetBounds = SkIRect::MakeSize(targetInfo.dimensions()); - SkIRect lastScissor = targetBounds; - - SkASSERT(drawPass->fTarget->isFullyLazy() || - SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor)); - drawPass->fCommandList.setScissor(lastScissor); - - UniformTracker geometryUniformTracker(useStorageBuffers); - UniformTracker shadingUniformTracker(useStorageBuffers); - - // TODO(b/372953722): Remove this forced binding command behavior once dst copies are always - // bound separately from the rest of the textures. - const bool rebindTexturesOnPipelineChange = dstReadStrategy == DstReadStrategy::kTextureCopy; - // Keep track of the prior draw's PaintOrder. If the current draw requires barriers and there - // is no pipeline or state change, then we must compare the current and prior draw's PaintOrders - // to determine if the draws overlap. If they do, we must inject a flush between them such that - // the barrier addition and draw commands are ordered correctly. - CompressedPaintersOrder priorDrawPaintOrder {}; - - // If a draw uses an advanced blend mode and the device supports this via noncoherent blending, - // then we must insert the appropriate barrier and ensure that the draws do not overlap. - const bool advancedBlendsRequireBarrier = - caps->blendEquationSupport() == Caps::BlendEquationSupport::kAdvancedNoncoherent; - -#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE) - // Accumulate rough pixel area touched by each pipeline as we iterate the SortKeys - drawPass->fPipelineDrawAreas.push_back_n(pipelineCache.count(), 0.f); -#endif - - TextureTracker textureBindingTracker(&textureDataCache); - for (const SortKey& key : keys) { - const DrawList::Draw& draw = key.draw(); - const RenderStep& renderStep = key.renderStep(); - - const bool pipelineChange = key.pipelineIndex() != lastPipeline; -#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE) - drawPass->fPipelineDrawAreas[key.pipelineIndex()] += - draw.drawParams().clip().drawBounds().area(); -#endif - - const bool geomBindingChange = geometryUniformTracker.writeUniforms( - geometryUniformDataCache, bufferMgr, key.geometryUniformIndex()); - const bool shadingBindingChange = shadingUniformTracker.writeUniforms( - shadingUniformDataCache, bufferMgr, key.shadingUniformIndex()); - - // TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any - // dst copy to the texture bind group/descriptor set automatically when processing a - // BindTexturesAndSamplers call because they use a single group to contain all textures. - // However, from the DrawPass POV, we can run into the scenario where two pipelines have the - // same textures+samplers except one requires a dst-copy and the other does not. In this - // case we wouldn't necessarily insert a new command when the pipeline changed and then - // end up with layout validation errors. - const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings( - key.textureBindingIndex()) || - (rebindTexturesOnPipelineChange && pipelineChange && - key.textureBindingIndex() != TextureDataCache::kInvalidIndex); - - std::optional<SkIRect> newScissor = - renderStep.getScissor(draw.drawParams(), lastScissor, targetBounds); - - // Determine + analyze draw properties to inform whether we need to issue barriers before - // issuing draw calls. - bool drawsOverlap = priorDrawPaintOrder != draw.drawParams().order().paintOrder(); - bool drawUsesAdvancedBlendMode = paint_uses_advanced_blend_equation(draw.paintParams()); - - std::optional<BarrierType> barrierToAddBeforeDraws = std::nullopt; - if (dstReadStrategy == DstReadStrategy::kReadFromInput && draw.dstReadReq()) { - barrierToAddBeforeDraws = BarrierType::kReadDstFromInput; - } - if (drawUsesAdvancedBlendMode && - caps->supportsHardwareAdvancedBlending() && - advancedBlendsRequireBarrier) { - // A draw should only read from the dst OR use hardware for advanced blend modes. - SkASSERT(!draw.dstReadReq()); - - barrierToAddBeforeDraws = BarrierType::kAdvancedNoncoherentBlend; - } - - const bool stateChange = geomBindingChange || - shadingBindingChange || - textureBindingsChange || - newScissor.has_value(); - - // Update DrawWriter *before* we actually change any state so that accumulated draws from - // the previous state use the proper state. - if (pipelineChange) { - drawWriter.newPipelineState(renderStep.primitiveType(), - renderStep.staticDataStride(), - renderStep.appendDataStride(), - renderStep.getRenderStateFlags(), - barrierToAddBeforeDraws); - } else if (stateChange) { - drawWriter.newDynamicState(); - } else if (barrierToAddBeforeDraws.has_value() && drawsOverlap) { - // Even if there is no pipeline or state change, we must consider whether a - // DrawPassCommand to add barriers must be inserted before any draw commands. If so, - // then determine if the current and prior draws overlap (ie, their PaintOrders are - // unequal). If so, perform a flush() to make sure the draw and add barrier commands are - // appended to the command list in the proper order. - drawWriter.flush(); - } - - // Make state changes before accumulating new draw data - if (pipelineChange) { - drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex()); - lastPipeline = key.pipelineIndex(); - } - if (stateChange) { - if (geomBindingChange) { - geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep, - &drawPass->fCommandList); - } - if (shadingBindingChange) { - shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList); - } - if (textureBindingsChange) { - textureBindingTracker.bindTextures(&drawPass->fCommandList); - } - if (newScissor.has_value()) { - drawPass->fCommandList.setScissor(*newScissor); - lastScissor = *newScissor; - } - } - - uint32_t geometrySsboIndex = useStorageBuffers ? geometryUniformTracker.ssboIndex() : 0; - uint32_t shadingSsboIndex = useStorageBuffers ? shadingUniformTracker.ssboIndex() : 0; - skvx::uint2 ssboIndices = {geometrySsboIndex, shadingSsboIndex}; - renderStep.writeVertices(&drawWriter, draw.drawParams(), ssboIndices); - - if (bufferMgr->hasMappingFailed()) { - SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!"); - return nullptr; - } - - // Update priorDrawPaintOrder value before iterating to analyze the next draw. - priorDrawPaintOrder = draw.drawParams().order().paintOrder(); - } - // Finish recording draw calls for any collected data still pending at end of the loop - drawWriter.flush(); - - drawPass->fBounds = passBounds.roundOut().asSkIRect(); - drawPass->fPipelineDescs = pipelineCache.detach(); - drawPass->fSampledTextures = textureDataCache.detachTextures(); - - TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size()); - TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size()); - TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count()); - - return drawPass; -} - bool DrawPass::prepareResources(ResourceProvider* resourceProvider, sk_sp<const RuntimeEffectDictionary> runtimeDict, const RenderPassDesc& renderPassDesc) {
diff --git a/src/gpu/graphite/DrawPass.h b/src/gpu/graphite/DrawPass.h index 8738756..71fa368 100644 --- a/src/gpu/graphite/DrawPass.h +++ b/src/gpu/graphite/DrawPass.h
@@ -15,12 +15,6 @@ #include "src/gpu/graphite/GraphicsPipelineDesc.h" #include "src/gpu/graphite/GraphicsPipelineHandle.h" -#include <array> -#include <cstddef> -#include <cstdint> -#include <memory> -#include <utility> - struct SkImageInfo; namespace skgpu::graphite { @@ -29,7 +23,6 @@ class DrawList; class FloatStorageManager; class GraphicsPipeline; -class Recorder; struct RenderPassDesc; class ResourceProvider; class RuntimeEffectDictionary; @@ -55,16 +48,6 @@ public: ~DrawPass(); - // Create a DrawPass that renders the DrawList into `target` with the given load/store ops and - // clear color. - static std::unique_ptr<DrawPass> Make(Recorder*, - std::unique_ptr<DrawList>, - sk_sp<TextureProxy> target, - const SkImageInfo& targetInfo, - std::pair<LoadOp, StoreOp>, - std::array<float, 4> clearColor, - const DstReadStrategy dstReadStrategy); - // Defined relative to the top-left corner of the surface the DrawPass renders to, and is // contained within its dimensions. const SkIRect& bounds() const { return fBounds; } @@ -101,7 +84,7 @@ [[nodiscard]] bool addResourceRefs(ResourceProvider*, CommandBuffer*); private: - class SortKey; + friend class DrawList; // For the constructor DrawPass(sk_sp<TextureProxy> target, std::pair<LoadOp, StoreOp> ops,
diff --git a/src/gpu/graphite/DrawTypes.h b/src/gpu/graphite/DrawTypes.h index 3da8167..972f6af 100644 --- a/src/gpu/graphite/DrawTypes.h +++ b/src/gpu/graphite/DrawTypes.h
@@ -175,10 +175,19 @@ // These barrier types are not utilized by all backends, but we define them at this level anyhow // since it impacts the logic used to group & sort draws. enum class BarrierType : uint8_t { + kNone, kAdvancedNoncoherentBlend, kReadDstFromInput, }; +enum class DstUsage : uint8_t { + kNone = 0, + kDependsOnDst = 0b001, + kDstReadRequired = 0b010, + kAdvancedBlend = 0b100, +}; +SK_MAKE_BITMASK_OPS(DstUsage) + enum class RenderStateFlags : unsigned { kNone = 0b0000, kFixed = 0b0001, // Uses explicit DrawWriter::draw functions
diff --git a/src/gpu/graphite/DrawWriter.cpp b/src/gpu/graphite/DrawWriter.cpp index f35680b..753bd63 100644 --- a/src/gpu/graphite/DrawWriter.cpp +++ b/src/gpu/graphite/DrawWriter.cpp
@@ -92,8 +92,8 @@ // Before any draw commands are added, check if the DrawWriter has an assigned barrier type // to issue prior to draw calls. - if (fBarrierToIssueBeforeDraws.has_value()) { - fCommandList->addBarrier(fBarrierToIssueBeforeDraws.value()); + if (fBarrierToIssueBeforeDraws != BarrierType::kNone) { + fCommandList->addBarrier(fBarrierToIssueBeforeDraws); } // Issue the appropriate draw call (instanced vs. non-instanced) based on the current
diff --git a/src/gpu/graphite/DrawWriter.h b/src/gpu/graphite/DrawWriter.h index 9475782..213eeb2 100644 --- a/src/gpu/graphite/DrawWriter.h +++ b/src/gpu/graphite/DrawWriter.h
@@ -99,7 +99,7 @@ size_t staticStride, size_t appendStride, SkEnumBitMask<RenderStateFlags> newRenderState, - std::optional<BarrierType> barrierType) { + BarrierType barrierType) { this->flush(); // Once flushed, any pending data must have been drawn. @@ -114,7 +114,7 @@ // aligned, regardless of the previous render state. fShouldAlign4 = SkToBool(newRenderState & RenderStateFlags::kAppendVertices); - // Assign the (optional) barrier type. If a valid value, then the DrawWriter will append + // Assign the barrier type. If a valid value, then the DrawWriter will append // AddBarrier commands of the indicated type prior to appending any draw commands used with // this pipeline. fBarrierToIssueBeforeDraws = barrierType; @@ -271,7 +271,7 @@ // to let the next reserve() call know that we need a 4 count aligned offset. bool fShouldAlign4; - std::optional<BarrierType> fBarrierToIssueBeforeDraws = std::nullopt; + BarrierType fBarrierToIssueBeforeDraws = BarrierType::kNone; void flushInternal();
diff --git a/src/gpu/graphite/KeyContext.cpp b/src/gpu/graphite/KeyContext.cpp index d6cb80f..83fe12c 100644 --- a/src/gpu/graphite/KeyContext.cpp +++ b/src/gpu/graphite/KeyContext.cpp
@@ -9,6 +9,7 @@ #include "include/effects/SkRuntimeEffect.h" #include "src/core/SkRuntimeEffectPriv.h" +#include "src/gpu/graphite/DrawContext.h" #include "src/gpu/graphite/PaintParams.h" #include "src/gpu/graphite/RecorderPriv.h" #include "src/gpu/graphite/RuntimeEffectDictionary.h" @@ -31,6 +32,7 @@ , fCaps(caps) {} KeyContext::KeyContext(skgpu::graphite::Recorder* recorder, + DrawContext* drawContext, FloatStorageManager* floatStorageManager, PaintParamsKeyBuilder* paintParamsKeyBuilder, PipelineDataGatherer* pipelineDataGatherer, @@ -39,6 +41,7 @@ SkEnumBitMask<KeyGenFlags> initialFlags, const SkColor4f& paintColor) : fRecorder(recorder) + , fDC(drawContext) , fFloatStorageManager(floatStorageManager) , fPaintParamsKeyBuilder(paintParamsKeyBuilder) , fPipelineDataGatherer(pipelineDataGatherer) @@ -55,6 +58,7 @@ KeyContext::KeyContext(const KeyContext& other) : fRecorder(other.fRecorder) + , fDC(other.fDC) , fFloatStorageManager(other.fFloatStorageManager) , fPaintParamsKeyBuilder(other.fPaintParamsKeyBuilder) , fPipelineDataGatherer(other.fPipelineDataGatherer)
diff --git a/src/gpu/graphite/KeyContext.h b/src/gpu/graphite/KeyContext.h index 6bb76e9..9b6126e 100644 --- a/src/gpu/graphite/KeyContext.h +++ b/src/gpu/graphite/KeyContext.h
@@ -21,6 +21,7 @@ namespace skgpu::graphite { class Caps; +class DrawContext; enum class DstReadStrategy : uint8_t; class FloatStorageManager; class PaintParamsKeyBuilder; @@ -62,6 +63,7 @@ // Constructor for the ExtractPaintData code path (i.e., with a Recorder) KeyContext(Recorder*, + DrawContext*, FloatStorageManager*, PaintParamsKeyBuilder*, PipelineDataGatherer*, @@ -74,6 +76,7 @@ ~KeyContext(); Recorder* recorder() const { return fRecorder; } + DrawContext* drawContext() const { return fDC; } const Caps* caps() const { return fCaps; } @@ -95,6 +98,7 @@ protected: Recorder* fRecorder = nullptr; + DrawContext* fDC = nullptr; FloatStorageManager* fFloatStorageManager; PaintParamsKeyBuilder* fPaintParamsKeyBuilder; PipelineDataGatherer* fPipelineDataGatherer;
diff --git a/src/gpu/graphite/KeyHelpers.cpp b/src/gpu/graphite/KeyHelpers.cpp index ebc02aa..b4a4e5e 100644 --- a/src/gpu/graphite/KeyHelpers.cpp +++ b/src/gpu/graphite/KeyHelpers.cpp
@@ -1503,26 +1503,6 @@ keyContext.paintParamsKeyBuilder()->endBlock(); } -void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - SkSpan<const SkRuntimeEffect::ChildPtr> children) { - for (const auto& child : children) { - if (child.type().has_value()) { - switch (*child.type()) { - case SkRuntimeEffect::ChildType::kShader: - NotifyImagesInUse(recorder, drawContext, child.shader()); - break; - case SkRuntimeEffect::ChildType::kColorFilter: - NotifyImagesInUse(recorder, drawContext, child.colorFilter()); - break; - case SkRuntimeEffect::ChildType::kBlender: - NotifyImagesInUse(recorder, drawContext, child.blender()); - break; - } - } // else a null child is a no-op, so cannot sample an image - } -} - } // anonymous namespace void AddToKey(const KeyContext& keyContext, const SkBlender* blender) { @@ -1546,16 +1526,6 @@ SkUNREACHABLE; } -void NotifyImagesInUse(Recorder* recorder, DrawContext* drawContext, const SkBlender* blender) { - if (!blender) { - return; - } - if (as_BB(blender)->type() == SkBlenderBase::BlenderType::kRuntime) { - const auto* rbb = static_cast<const SkRuntimeBlender*>(blender); - notify_in_use(recorder, drawContext, rbb->children()); - } // else blend mode doesn't reference images -} - //-------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------- static SkPMColor4f map_color(const SkColor4f& c, @@ -1713,26 +1683,6 @@ SkUNREACHABLE; } -void NotifyImagesInUse(Recorder* recorder, DrawContext* drawContext, const SkColorFilter* filter) { - if (!filter) { - return; - } - if (as_CFB(filter)->type() == SkColorFilterBase::Type::kCompose) { - // Recurse to two children - const auto* cf = static_cast<const SkComposeColorFilter*>(filter); - NotifyImagesInUse(recorder, drawContext, cf->inner().get()); - NotifyImagesInUse(recorder, drawContext, cf->outer().get()); - } else if (as_CFB(filter)->type() == SkColorFilterBase::Type::kWorkingFormat) { - // Recurse to one child - const auto* wfcf = static_cast<const SkWorkingFormatColorFilter*>(filter); - NotifyImagesInUse(recorder, drawContext, wfcf->child().get()); - } else if (as_CFB(filter)->type() == SkColorFilterBase::Type::kRuntime) { - // Recurse to all children - const auto* rcf = static_cast<const SkRuntimeColorFilter*>(filter); - notify_in_use(recorder, drawContext, rcf->children()); - } // else other color filters do not rely on SkImages -} - // ================================================================== static void add_to_key(const KeyContext& keyContext, const SkBlendShader* shader) { @@ -1749,13 +1699,6 @@ AddToKey(keyContext, shader->dst().get()); }); } -static void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - const SkBlendShader* shader) { - // SkBlendShader uses a fixed blend mode, so there's no blender to recurse through - NotifyImagesInUse(recorder, drawContext, shader->src().get()); - NotifyImagesInUse(recorder, drawContext, shader->dst().get()); -} static SkMatrix matrix_invert_or_identity(const SkMatrix& matrix) { SkMatrix inverseMatrix; @@ -1781,9 +1724,6 @@ keyContext.paintParamsKeyBuilder()->endBlock(); } -static void notify_in_use(Recorder* recorder, DrawContext* drawContext, const SkCTMShader* shader) { - NotifyImagesInUse(recorder, drawContext, shader->proxyShader().get()); -} static void add_to_key(const KeyContext& keyContext, const SkColorShader* shader) { SkASSERT(shader); @@ -1794,9 +1734,6 @@ SolidColorShaderBlock::AddBlock(keyContext, color); } -static void notify_in_use(Recorder*, DrawContext*, const SkColorShader*) { - // No-op -} static void add_to_key(const KeyContext& keyContext, const SkColorFilterShader* shader) { SkASSERT(shader); @@ -1809,12 +1746,6 @@ AddToKey(keyContext, shader->filter().get()); }); } -static void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - const SkColorFilterShader* shader) { - NotifyImagesInUse(recorder, drawContext, shader->shader().get()); - NotifyImagesInUse(recorder, drawContext, shader->filter().get()); -} static void add_to_key(const KeyContext& keyContext, const SkCoordClampShader* shader) { SkASSERT(shader); @@ -1826,18 +1757,10 @@ AddToKey(childContext, shader->shader().get()); keyContext.paintParamsKeyBuilder()->endBlock(); } -static void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - const SkCoordClampShader* shader) { - NotifyImagesInUse(recorder, drawContext, shader->shader().get()); -} static void add_to_key(const KeyContext& keyContext, const SkEmptyShader*) { keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kPriorOutput); } -static void notify_in_use(Recorder*, DrawContext*, const SkEmptyShader*) { - // No-op -} static void add_yuv_image_to_key(const KeyContext& keyContext, const SkImageShader* origShader, @@ -2000,23 +1923,20 @@ keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kError); return; } - if (!as_IB(shader->image())->isGraphiteBacked()) { - // GetGraphiteBacked() created a new image (or fetched a cached image) from the client - // image provider. This image was not available when NotifyInUse() visited the shader tree, - // so call notify again. These images shouldn't really be producing new tasks since it's - // unlikely that a client will be fulfilling with a dynamic image that wraps a long-lived - // SkSurface. However, the images can be linked to a surface that rendered the initial - // content and not calling notifyInUse() prevents unlinking the image from the Device. - // If the client image provider then holds on to many of these images, the leaked Device and - // DrawContext memory can be surprisingly high. b/338453542. - // TODO (b/330864257): Once paint keys are extracted at draw time, AddToKey() will be - // fully responsible for notifyInUse() calls and then we can simply always call this on - // `imageToDraw`. The DrawContext that samples the image will also be available to AddToKey - // so we won't have to pass in nullptr. - SkASSERT(as_IB(imageToDraw)->isGraphiteBacked()); - static_cast<Image_Base*>(imageToDraw.get())->notifyInUse(keyContext.recorder(), - /*drawContext=*/nullptr); - } + + // We must call notifyInUse() here to link the final, Graphite-backed 'imageToDraw' + // to the DrawContext that will sample it. + // + // This is necessary for two primary cases: + // 1. The original image was not Graphite-backed. + // 2. The original image was already Graphite-backed, but produced through Image::Copy, possibly + // from a different DrawContext. + // + // Failing to call this can lead to leaked Device and DrawContext memory (b/338453542). + SkASSERT(as_IB(imageToDraw)->isGraphiteBacked()); + SkASSERT(keyContext.drawContext()); + static_cast<Image_Base*>(imageToDraw.get())->notifyInUse(keyContext.recorder(), + keyContext.drawContext()); if (as_IB(imageToDraw)->isYUVA()) { return add_yuv_image_to_key(keyContext, shader, @@ -2102,17 +2022,6 @@ ColorSpaceTransformBlock::AddBlock(keyContext, colorXformData); }); } -static void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - const SkImageShader* shader) { - auto image = as_IB(shader->image()); - if (!image->isGraphiteBacked()) { - // If it's not graphite-backed, there's no pending graphite work. - return; - } - - static_cast<Image_Base*>(image)->notifyInUse(recorder, drawContext); -} static void add_to_key(const KeyContext& keyContext, const SkLocalMatrixShader* shader) { SkASSERT(shader); @@ -2187,12 +2096,6 @@ keyContext.paintParamsKeyBuilder()->endBlock(); } -static void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - const SkLocalMatrixShader* shader) { - NotifyImagesInUse(recorder, drawContext, shader->wrappedShader().get()); -} - // If either of these change then the corresponding change must also be made in the SkSL // perlin_noise_shader function. static_assert((int)SkPerlinNoiseShaderType::kFractalNoise == @@ -2233,9 +2136,6 @@ PerlinNoiseShaderBlock::AddBlock(keyContext, perlinData); } -static void notify_in_use(Recorder*, DrawContext*, const SkPerlinNoiseShader*) { - // No-op, perlin noise has no children. -} static void add_to_key(const KeyContext& keyContext, const SkPictureShader* shader) { @@ -2309,10 +2209,6 @@ AddToKey(keyContext, imgShader.get()); } -static void notify_in_use(Recorder*, DrawContext*, const SkPictureShader*) { - // While the SkPicture the shader points to, may have Graphite-backed shaders that need to be - // notified, that will happen when the picture is rendered into an image in add_to_key -} static void add_to_key(const KeyContext& keyContext, const SkRuntimeShader* shader) { @@ -2333,29 +2229,18 @@ keyContext.paintParamsKeyBuilder()->endBlock(); } -static void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - const SkRuntimeShader* shader) { - notify_in_use(recorder, drawContext, shader->children()); -} static void add_to_key(const KeyContext& keyContext, const SkTransformShader* shader) { SKGPU_LOG_W("Raster-only SkShader (SkTransformShader) encountered"); keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kError); } -static void notify_in_use(Recorder*, DrawContext*, const SkTransformShader*) { - // no-op -} static void add_to_key(const KeyContext& keyContext, const SkTriColorShader* shader) { SKGPU_LOG_W("Raster-only SkShader (SkTriColorShader) encountered"); keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kError); } -static void notify_in_use(Recorder*, DrawContext*, const SkTriColorShader*) { - // no-op -} static void add_to_key(const KeyContext& keyContext, const SkWorkingColorSpaceShader* shader) { @@ -2388,11 +2273,6 @@ ColorSpaceTransformBlock::AddBlock(keyContext, data); }); } -static void notify_in_use(Recorder* recorder, - DrawContext* drawContext, - const SkWorkingColorSpaceShader* shader) { - NotifyImagesInUse(recorder, drawContext, shader->shader().get()); -} static SkBitmap create_color_and_offset_bitmap(int numStops, const SkPMColor4f* colors, @@ -2622,9 +2502,6 @@ } SkUNREACHABLE; } -static void notify_in_use(Recorder*, DrawContext*, const SkGradientBaseShader*) { - // Gradients do not have children, so no images to notify -} void AddToKey(const KeyContext& keyContext, const SkShader* shader) { if (!shader) { @@ -2647,24 +2524,4 @@ SkUNREACHABLE; } -void NotifyImagesInUse(Recorder* recorder, - DrawContext* drawContext, - const SkShader* shader) { - if (!shader) { - return; - } - switch (as_SB(shader)->type()) { -#define M(type) \ - case SkShaderBase::ShaderType::k##type: \ - notify_in_use(recorder, \ - drawContext, \ - static_cast<const Sk##type##Shader*>(shader)); \ - return; - SK_ALL_SHADERS(M) -#undef M - } - SkUNREACHABLE; -} - - } // namespace skgpu::graphite
diff --git a/src/gpu/graphite/KeyHelpers.h b/src/gpu/graphite/KeyHelpers.h index 154e6d5..1281303 100644 --- a/src/gpu/graphite/KeyHelpers.h +++ b/src/gpu/graphite/KeyHelpers.h
@@ -418,14 +418,6 @@ */ void AddToKey(const KeyContext& keyContext, const SkShader* shader); -// TODO(b/330864257) These visitation functions are redundant with AddToKey, except that they are -// executed in the Device::drawGeometry() stack frame, whereas the keys are currently deferred until -// DrawPass::Make. Image use needs to be detected in the draw frame to split tasks to match client -// actions. Once paint keys are extracted in the draw frame, this can go away entirely. -void NotifyImagesInUse(Recorder*, DrawContext*, const SkBlender*); -void NotifyImagesInUse(Recorder*, DrawContext*, const SkColorFilter*); -void NotifyImagesInUse(Recorder*, DrawContext*, const SkShader*); - template <typename AddBlendToKeyT, typename AddSrcToKeyT, typename AddDstToKeyT> void Blend(const KeyContext& keyContext, AddBlendToKeyT addBlendToKey,
diff --git a/src/gpu/graphite/PaintParams.cpp b/src/gpu/graphite/PaintParams.cpp index 4c73fc4..d137d9b 100644 --- a/src/gpu/graphite/PaintParams.cpp +++ b/src/gpu/graphite/PaintParams.cpp
@@ -18,12 +18,9 @@ #include "src/gpu/graphite/ContextUtils.h" #include "src/gpu/graphite/KeyContext.h" #include "src/gpu/graphite/KeyHelpers.h" -#include "src/gpu/graphite/Log.h" #include "src/gpu/graphite/PaintParamsKey.h" #include "src/gpu/graphite/PipelineData.h" #include "src/gpu/graphite/RecorderPriv.h" -#include "src/gpu/graphite/Uniform.h" -#include "src/shaders/SkShaderBase.h" namespace skgpu::graphite { @@ -64,9 +61,35 @@ return true; } +std::optional<SkBlendMode> get_final_blendmode(SkBlender* blender) { + return blender ? as_BB(blender)->asBlendMode() : SkBlendMode::kSrcOver; +} + +Coverage get_renderer_coverage(Coverage coverage, + SkShader* clipShader, + const NonMSAAClip& nonMSAAClip) { + return (clipShader || !nonMSAAClip.isEmpty()) && coverage == Coverage::kNone ? + Coverage::kSingleChannel : coverage; +} + +SkEnumBitMask<DstUsage> get_dst_usage(const Caps* caps, + TextureFormat targetFormat, + std::optional<SkBlendMode> finalBlendMode, + Coverage rendererCoverage) { + SkEnumBitMask<DstUsage> dstUsage = + CanUseHardwareBlending(caps, targetFormat, finalBlendMode, rendererCoverage) + ? DstUsage::kNone + : DstUsage::kDstReadRequired; + if (finalBlendMode.has_value() && finalBlendMode.value() > SkBlendMode::kLastCoeffMode) { + dstUsage |= DstUsage::kAdvancedBlend; + } + return dstUsage; +} + } // anonymous namespace -PaintParams::PaintParams(const SkPaint& paint, +PaintParams::PaintParams(const Caps* caps, + const SkPaint& paint, sk_sp<SkBlender> primitiveBlender, const NonMSAAClip& nonMSAAClip, sk_sp<SkShader> clipShader, @@ -75,15 +98,17 @@ bool skipColorXform) : fColor(paint.getColor4f()) , fFinalBlender(paint.refBlender()) + , fFinalBlendMode(get_final_blendmode(fFinalBlender.get())) , fShader(paint.refShader()) , fColorFilter(paint.refColorFilter()) , fPrimitiveBlender(std::move(primitiveBlender)) , fNonMSAAClip(nonMSAAClip) , fClipShader(std::move(clipShader)) - , fRendererCoverage(coverage) + , fRendererCoverage(get_renderer_coverage(coverage, fClipShader.get(), fNonMSAAClip)) , fTargetFormat(targetFormat) , fSkipColorXform(skipColorXform) - , fDither(paint.isDither()) { + , fDither(paint.isDither()) + , fDstUsage(get_dst_usage(caps, fTargetFormat, fFinalBlendMode, fRendererCoverage)) { if (!fPrimitiveBlender) { SkColor4f constantColor; // if filled in, will be un-premul sRGB // fColor is un-premul sRGB @@ -106,11 +131,6 @@ PaintParams::~PaintParams() = default; PaintParams& PaintParams::operator=(const PaintParams& other) = default; -std::optional<SkBlendMode> PaintParams::asFinalBlendMode() const { - return fFinalBlender ? as_BB(fFinalBlender)->asBlendMode() - : SkBlendMode::kSrcOver; -} - sk_sp<SkBlender> PaintParams::refFinalBlender() const { return fFinalBlender; } sk_sp<SkShader> PaintParams::refShader() const { return fShader; } @@ -356,36 +376,22 @@ bool isOpaque = this->handleDithering(keyContext); // Root Node 1 is the final blender - std::optional<SkBlendMode> finalBlendMode = this->asFinalBlendMode(); - bool usesAdvancedBlend = finalBlendMode.has_value() && - (int)finalBlendMode.value() > (int)SkBlendMode::kLastCoeffMode; - - Coverage finalCoverage = fRendererCoverage; - if ((fClipShader || !fNonMSAAClip.isEmpty()) && fRendererCoverage == Coverage::kNone) { - finalCoverage = Coverage::kSingleChannel; - } - - bool dependsOnDst = fClipShader || !fNonMSAAClip.isEmpty(); - bool dstReadReq = !CanUseHardwareBlending(keyContext.recorder()->priv().caps(), - fTargetFormat, - finalBlendMode, - finalCoverage); - - if (finalBlendMode.has_value()) { - if (!dstReadReq) { + bool dependsOnDst = fRendererCoverage != Coverage::kNone; + if (fFinalBlendMode.has_value()) { + if (!(fDstUsage & DstUsage::kDstReadRequired)) { // With no shader blending, be as explicit as possible about the final blend - AddFixedBlendMode(keyContext, finalBlendMode.value()); + AddFixedBlendMode(keyContext, fFinalBlendMode.value()); } else { // With shader blending, use AddBlendMode() to select the more universal blend functions // when possible. Technically we could always use a fixed blend mode but would then // over-generate when encountering certain classes of blends. This is most problematic // on devices that wouldn't support dual-source blending, so help them out by at least // not requiring lots of pipelines. - AddBlendMode(keyContext, finalBlendMode.value()); + AddBlendMode(keyContext, fFinalBlendMode.value()); } // Blend modes can be analyzed to determine if specific src colors still depend on the dst. - dependsOnDst |= blendmode_depends_on_dst(finalBlendMode.value(), isOpaque); + dependsOnDst |= blendmode_depends_on_dst(fFinalBlendMode.value(), isOpaque); } else { AddToKey(keyContext, fFinalBlender.get()); // Cannot inspect runtime blenders to pessimistically assume they will always use the dst. @@ -395,26 +401,15 @@ // Optional Root Node 2 is the clip this->handleClipping(keyContext); - return Result{dependsOnDst, dstReadReq, usesAdvancedBlend}; -} + UniquePaintParamsID paintID = + keyContext.recorder()->priv().shaderCodeDictionary()->findOrCreate( + keyContext.paintParamsKeyBuilder()); -// TODO(b/330864257): Can be deleted once keys are determined by the Device draw. -void PaintParams::notifyImagesInUse(Recorder* recorder, - DrawContext* drawContext) const { - if (fShader) { - NotifyImagesInUse(recorder, drawContext, fShader.get()); - } - if (fPrimitiveBlender) { - NotifyImagesInUse(recorder, drawContext, fPrimitiveBlender.get()); - } - if (fColorFilter) { - NotifyImagesInUse(recorder, drawContext, fColorFilter.get()); - } - if (fFinalBlender) { - NotifyImagesInUse(recorder, drawContext, fFinalBlender.get()); - } - if (fClipShader) { - NotifyImagesInUse(recorder, drawContext, fClipShader.get()); + if (!paintID.isValid()) { + return {}; + } else { + return Result{paintID, + fDstUsage | (dependsOnDst ? DstUsage::kDependsOnDst : DstUsage::kNone)}; } }
diff --git a/src/gpu/graphite/PaintParams.h b/src/gpu/graphite/PaintParams.h index 9983d9d..f958428 100644 --- a/src/gpu/graphite/PaintParams.h +++ b/src/gpu/graphite/PaintParams.h
@@ -10,6 +10,7 @@ #include "include/core/SkColor.h" #include "include/core/SkPaint.h" +#include "src/base/SkEnumBitMask.h" #include "src/gpu/graphite/Caps.h" #include "src/gpu/graphite/Renderer.h" #include "src/gpu/graphite/geom/NonMSAAClip.h" @@ -26,6 +27,7 @@ class PipelineDataGatherer; class Recorder; class TextureProxy; +class UniquePaintParamsID; // TBD: If occlusion culling is eliminated as a phase, we can easily move the paint conversion // back to Device when the command is recorded (similar to SkPaint -> GrPaint), and then @@ -35,7 +37,8 @@ // assumed to be anti-aliased. class PaintParams { public: - explicit PaintParams(const SkPaint&, + explicit PaintParams(const Caps* caps, + const SkPaint&, sk_sp<SkBlender> primitiveBlender, const NonMSAAClip& nonMSAAClip, sk_sp<SkShader> clipShader, @@ -50,7 +53,7 @@ SkColor4f color() const { return fColor; } - std::optional<SkBlendMode> asFinalBlendMode() const; + std::optional<SkBlendMode> finalBlendMode() const { return fFinalBlendMode; } SkBlender* finalBlender() const { return fFinalBlender.get(); } sk_sp<SkBlender> refFinalBlender() const; @@ -63,19 +66,18 @@ SkBlender* primitiveBlender() const { return fPrimitiveBlender.get(); } sk_sp<SkBlender> refPrimitiveBlender() const; - TextureFormat targetFormat() const { return fTargetFormat; } - bool skipColorXform() const { return fSkipColorXform; } - bool dither() const { return fDither; } + Coverage rendererCoverage() const { return fRendererCoverage; } + bool skipColorXform() const { return fSkipColorXform; } + bool dither() const { return fDither; } /** Converts an SkColor4f to the destination color space. */ static SkColor4f Color4fPrepForDst(SkColor4f srgb, const SkColorInfo& dstColorInfo); - using Result = std::tuple</*dependsOnDst*/bool, /*dstReadRequired*/bool, - /*usesAdvancedBlend*/bool>; + using Result = std::tuple<UniquePaintParamsID, SkEnumBitMask<DstUsage>>; std::optional<Result> toKey(const KeyContext&) const; - void notifyImagesInUse(Recorder*, DrawContext*) const; - + bool dstReadRequired() const { return (fDstUsage & DstUsage::kDstReadRequired) == + DstUsage::kDstReadRequired; } private: bool addPaintColorToKey(const KeyContext&) const; bool handlePrimitiveColor(const KeyContext&) const; @@ -85,20 +87,22 @@ bool handleDstRead(const KeyContext&) const; void handleClipping(const KeyContext&) const; - SkColor4f fColor; - sk_sp<SkBlender> fFinalBlender; // A nullptr here means SrcOver blending - sk_sp<SkShader> fShader; - sk_sp<SkColorFilter> fColorFilter; + SkColor4f fColor; + sk_sp<SkBlender> fFinalBlender; // A nullptr here means SrcOver blending + std::optional<SkBlendMode> fFinalBlendMode; // A nullptr here means we have a runtime blendmode + sk_sp<SkShader> fShader; + sk_sp<SkColorFilter> fColorFilter; // A nullptr fPrimitiveBlender means there's no primitive color blending and it is skipped. // In the case where there is primitive blending, the primitive color is the source color and // the dest is the paint's color (or the paint's shader's computed color). - sk_sp<SkBlender> fPrimitiveBlender; - NonMSAAClip fNonMSAAClip; - sk_sp<SkShader> fClipShader; - Coverage fRendererCoverage; - TextureFormat fTargetFormat; - bool fSkipColorXform; - bool fDither; + sk_sp<SkBlender> fPrimitiveBlender; + NonMSAAClip fNonMSAAClip; + sk_sp<SkShader> fClipShader; + Coverage fRendererCoverage; + TextureFormat fTargetFormat; + bool fSkipColorXform; + bool fDither; + SkEnumBitMask<DstUsage> fDstUsage; }; // Add a fixed blend mode node for a specific SkBlendMode.
diff --git a/src/gpu/graphite/PipelineData.h b/src/gpu/graphite/PipelineData.h index 55e7dd3..d8083fe 100644 --- a/src/gpu/graphite/PipelineData.h +++ b/src/gpu/graphite/PipelineData.h
@@ -19,8 +19,8 @@ #include "src/core/SkColorData.h" #include "src/core/SkTHash.h" #include "src/gpu/graphite/BufferManager.h" -#include "src/gpu/graphite/DrawList.h" #include "src/gpu/graphite/DrawTypes.h" +#include "src/gpu/graphite/GraphicsPipelineDesc.h" #include "src/gpu/graphite/TextureProxy.h" #include "src/gpu/graphite/UniformManager.h" #include "src/shaders/gradients/SkGradientBaseShader.h"
diff --git a/src/gpu/graphite/compute/VelloRenderer.cpp b/src/gpu/graphite/compute/VelloRenderer.cpp index afbbf07..d8137f7 100644 --- a/src/gpu/graphite/compute/VelloRenderer.cpp +++ b/src/gpu/graphite/compute/VelloRenderer.cpp
@@ -23,6 +23,7 @@ #include "src/gpu/graphite/TextureUtils.h" #include "src/gpu/graphite/UniformManager.h" #include "src/gpu/graphite/compute/DispatchGroup.h" +#include "src/gpu/graphite/geom/Transform.h" #include <algorithm>
diff --git a/tests/graphite/VerticesPaddingTest.cpp b/tests/graphite/VerticesPaddingTest.cpp index 5d239ae..87f3107 100644 --- a/tests/graphite/VerticesPaddingTest.cpp +++ b/tests/graphite/VerticesPaddingTest.cpp
@@ -248,7 +248,7 @@ auto vertsNewPipeline = [&]() { dw->newPipelineState(/*type=*/{}, kStride, kStride, RenderStateFlags::kAppendVertices, - std::nullopt); + BarrierType::kNone); return; };
diff --git a/tests/graphite/precompile/PaintParamsKeyTest.cpp b/tests/graphite/precompile/PaintParamsKeyTest.cpp index 850db73..a356ee2 100644 --- a/tests/graphite/precompile/PaintParamsKeyTest.cpp +++ b/tests/graphite/precompile/PaintParamsKeyTest.cpp
@@ -49,6 +49,7 @@ #include "src/core/SkRuntimeEffectPriv.h" #include "src/gpu/graphite/ContextPriv.h" #include "src/gpu/graphite/ContextUtils.h" +#include "src/gpu/graphite/DrawContext.h" #include "src/gpu/graphite/GraphicsPipelineDesc.h" #include "src/gpu/graphite/KeyContext.h" #include "src/gpu/graphite/KeyHelpers.h" @@ -101,6 +102,34 @@ //-------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------- +sk_sp<DrawContext> get_precompile_draw_context( + const skgpu::graphite::Caps* caps, Context* context) { + std::unique_ptr<Recorder> drawRecorder = context->makeRecorder(); + ResourceProvider* resourceProvider = drawRecorder->priv().resourceProvider(); + constexpr SkISize drawSize = {128, 128}; + const SkColorInfo colorInfo = SkColorInfo(kRGBA_8888_SkColorType, + kPremul_SkAlphaType, + SkColorSpace::MakeSRGB()); + TextureInfo texInfo = caps->getDefaultSampledTextureInfo(colorInfo.colorType(), + skgpu::Mipmapped::kNo, + skgpu::Protected::kNo, + skgpu::Renderable::kYes); + sk_sp<TextureProxy> target = TextureProxy::Make(caps, + resourceProvider, + drawSize, + texInfo, + "PrecompileTarget", + skgpu::Budgeted::kYes); + sk_sp<DrawContext> precompileDrawContext = DrawContext::Make(caps, + std::move(target), + drawSize, + colorInfo, + {}); + return precompileDrawContext; +} + +//-------------------------------------------------------------------------------------------------- +//-------------------------------------------------------------------------------------------------- // M(Empty) #define SK_ALL_TEST_SHADERS(M) \ M(Blend) \ @@ -1927,6 +1956,7 @@ [[maybe_unused]] void extract_vs_build_subtest(skiatest::Reporter* reporter, Context* context, + DrawContext* drawContext, skiatest::graphite::GraphiteTestContext* /* testContext */, const KeyContext& precompileKeyContext, Recorder* recorder, @@ -1981,7 +2011,8 @@ clipData.fAnalyticClip.fRadius = 5; } - PaintParams paintParams = PaintParams(paint, + PaintParams paintParams = PaintParams(recorder->priv().caps(), + paint, primitiveBlender, clipData, std::move(modifiedClipShader), @@ -1990,6 +2021,7 @@ /* skipColorXform= */ false); paramsGatherer.resetForDraw(); KeyContext keyContext(recorder, + drawContext, precompileKeyContext.floatStorageManager(), precompileKeyContext.paintParamsKeyBuilder(), ¶msGatherer, @@ -1997,10 +2029,9 @@ precompileKeyContext.dstColorInfo(), KeyGenFlags::kDisableSamplingOptimization, paintParams.color()); - paintParams.toKey(keyContext); - UniquePaintParamsID paintID = - recorder->priv().shaderCodeDictionary()->findOrCreate( - precompileKeyContext.paintParamsKeyBuilder()); + auto keyResult = paintParams.toKey(keyContext); + UniquePaintParamsID paintID = keyResult.has_value() ? std::get<0>(*keyResult) + : UniquePaintParamsID::Invalid(); RenderPassDesc unusedRenderPassDesc; std::vector<UniquePaintParamsID> precompileIDs; @@ -2137,6 +2168,7 @@ void run_test(skiatest::Reporter* reporter, Context* context, + DrawContext* drawContext, PrecompileContext* precompileContext, skiatest::graphite::GraphiteTestContext* testContext, const KeyContext& precompileKeyContext, @@ -2171,7 +2203,7 @@ // a SkCanvas::clipShader call). paintOptions.priv().setClipShaders({clipShaderOption}); - extract_vs_build_subtest(reporter, context, testContext, precompileKeyContext, + extract_vs_build_subtest(reporter, context, drawContext, testContext, precompileKeyContext, recorder.get(), paint, paintOptions, s, bm, cf, mf, imageFilter, clipType, clipShader, dt, seed, &rand, verbose); precompile_vs_real_draws_subtest(reporter, context, precompileContext, @@ -2187,14 +2219,17 @@ testContext, true, CtsEnforcement::kNever) { + const skgpu::graphite::Caps* caps = context->priv().caps(); std::unique_ptr<PrecompileContext> precompileContext = context->makePrecompileContext(); - sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>(); + // Currently, we just use this as a valid parameter for keyContext (will hit asserts otherwise) + sk_sp<DrawContext> precompileDrawContext = get_precompile_draw_context(caps, context); FloatStorageManager floatStorageManager; ShaderCodeDictionary* dict = context->priv().shaderCodeDictionary(); PaintParamsKeyBuilder builder(dict); PipelineDataGatherer gatherer(Layout::kMetal); - KeyContext keyContext(context->priv().caps(), + sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>(); + KeyContext keyContext(caps, &floatStorageManager, &builder, &gatherer, @@ -2237,6 +2272,7 @@ run_test(reporter, context, + precompileDrawContext.get(), precompileContext.get(), testContext, keyContext, @@ -2264,14 +2300,17 @@ testContext, true, CtsEnforcement::kNever) { + const skgpu::graphite::Caps* caps = context->priv().caps(); std::unique_ptr<PrecompileContext> precompileContext = context->makePrecompileContext(); - sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>(); + // Currently, we just use this as a valid parameter for keyContext (will hit asserts otherwise) + sk_sp<DrawContext> precompileDrawContext = get_precompile_draw_context(caps, context); FloatStorageManager floatStorageManager; ShaderCodeDictionary* dict = context->priv().shaderCodeDictionary(); PaintParamsKeyBuilder builder(dict); PipelineDataGatherer gatherer(Layout::kMetal); - KeyContext precompileKeyContext(context->priv().caps(), + sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>(); + KeyContext precompileKeyContext(caps, &floatStorageManager, &builder, &gatherer, @@ -2399,7 +2438,8 @@ ++current; #endif - run_test(reporter, context, precompileContext.get(), + run_test(reporter, context, precompileDrawContext.get(), + precompileContext.get(), testContext, precompileKeyContext, shader, blender, cf, mf, imageFilter, clip, dt, kDefaultSeed, /* verbose= */ false);