src/gpu/graphite/ComputePathAtlas.cpp - skia - Git at Google

 /*
  * Copyright 2024 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "src/gpu/graphite/ComputePathAtlas.h"

 #include "include/gpu/graphite/Recorder.h"
 #include "src/gpu/graphite/AtlasProvider.h"
 #include "src/gpu/graphite/Caps.h"
 #include "src/gpu/graphite/Log.h"
 #include "src/gpu/graphite/RecorderPriv.h"
 #include "src/gpu/graphite/RendererProvider.h"
 #include "src/gpu/graphite/TextureProxy.h"
 #include "src/gpu/graphite/TextureUtils.h"
 #include "src/gpu/graphite/geom/Transform_graphite.h"

 #ifdef SK_ENABLE_VELLO_SHADERS
 #include "src/gpu/graphite/compute/DispatchGroup.h"
 #endif

 namespace skgpu::graphite {
 namespace {

 // TODO: This is the maximum target dimension that vello can handle today
 constexpr uint16_t kComputeAtlasDim = 4096;

 }  // namespace

 ComputePathAtlas::ComputePathAtlas(Recorder* recorder)
     : PathAtlas(recorder, kComputeAtlasDim, kComputeAtlasDim)
     , fRectanizer(this->width(), this->height()) {}

 bool ComputePathAtlas::initializeTextureIfNeeded() {
     if (!fTexture) {
         SkColorType targetCT = ComputeShaderCoverageMaskTargetFormat(fRecorder->priv().caps());
         fTexture = fRecorder->priv().atlasProvider()->getAtlasTexture(fRecorder,
                                                                       this->width(),
                                                                       this->height(),
                                                                       targetCT,
                                                                       /*identifier=*/0,
                                                                       /*requireStorageUsage=*/true);
     }
     return fTexture != nullptr;
 }

 bool ComputePathAtlas::isSuitableForAtlasing(const Rect& transformedShapeBounds) const {
     Rect maskBounds = transformedShapeBounds.makeRoundOut();
     skvx::float2 maskSize = maskBounds.size();
     float width = maskSize.x(), height = maskSize.y();

     if (width > this->width() || height > this->height()) {
         return false;
     }

     // For now we're allowing paths that are smaller than 1/32nd of the full 4096x4096 atlas size
     // to prevent the atlas texture from filling up too often. There are several approaches we
     // should explore to alleviate the cost of atlasing large paths.
     //
     // 1. Rendering multiple atlas textures requires an extra compute pass for each texture. This
     // impairs performance because there is a fixed cost to each dispatch and all dispatches get
     // serialized by pipeline barrier synchronization. We should explore ways to render to multiple
     // textures by issuing more workgroups in fewer dispatches as well as removing pipeline barriers
     // across dispatches that target different atlas pages.
     //
     // 2. Implement a compressed "sparse" mask rendering scheme to render paths with a large
     // bounding box using less texture space.
     return (width * height) <= (1024 * 512);
 }

 const TextureProxy* ComputePathAtlas::addRect(skvx::half2 maskSize,
                                               SkIPoint16* outPos) {
     if (!this->initializeTextureIfNeeded()) {
         SKGPU_LOG_E("Failed to instantiate an atlas texture");
         return nullptr;
     }

     // An empty mask always fits, so just return the texture.
     // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
     // another way. See PathAtlas::addShape().
     if (!all(maskSize)) {
         *outPos = {0, 0};
         return fTexture.get();
     }

     if (!fRectanizer.addPaddedRect(maskSize.x(), maskSize.y(), kEntryPadding, outPos)) {
         return nullptr;
     }

     return fTexture.get();
 }

 void ComputePathAtlas::reset() {
     fRectanizer.reset();

     this->onReset();
 }

 #ifdef SK_ENABLE_VELLO_SHADERS

 /**
  * ComputePathAtlas that uses a VelloRenderer.
  */
 class VelloComputePathAtlas final : public ComputePathAtlas {
 public:
     explicit VelloComputePathAtlas(Recorder* recorder) : ComputePathAtlas(recorder) {}
     // Record the compute dispatches that will draw the atlas contents.
     std::unique_ptr<DispatchGroup> recordDispatches(Recorder* recorder) const override;

 private:
     const TextureProxy* onAddShape(const Shape&,
                                    const Transform& transform,
                                    const SkStrokeRec&,
                                    skvx::half2 maskSize,
                                    skvx::half2* outPos) override;
     void onReset() override {
         fScene.reset();
         fOccuppiedWidth = fOccuppiedHeight = 0;
     }

     // Contains the encoded scene buffer data that serves as the input to a vello compute pass.
     VelloScene fScene;

     // Occuppied bounds of the atlas
     uint32_t fOccuppiedWidth = 0;
     uint32_t fOccuppiedHeight = 0;
 };

 std::unique_ptr<DispatchGroup> VelloComputePathAtlas::recordDispatches(Recorder* recorder) const {
     if (!this->texture()) {
         return nullptr;
     }

     SkASSERT(recorder && recorder == fRecorder);
     // Unless the analytic area AA mode unless caps say otherwise.
     VelloAaConfig config = VelloAaConfig::kAnalyticArea;
 #if defined(GRAPHITE_TEST_UTILS)
     PathRendererStrategy strategy = recorder->priv().caps()->requestedPathRendererStrategy();
     if (strategy == PathRendererStrategy::kComputeMSAA16) {
         config = VelloAaConfig::kMSAA16;
     }
 #endif
     return recorder->priv().rendererProvider()->velloRenderer()->renderScene(
             {fOccuppiedWidth, fOccuppiedHeight, SkColors::kBlack, config},
             fScene,
             sk_ref_sp(this->texture()),
             recorder);
 }

 const TextureProxy* VelloComputePathAtlas::onAddShape(
         const Shape& shape,
         const Transform& transform,
         const SkStrokeRec& style,
         skvx::half2 maskSize,
         skvx::half2* outPos) {
     SkIPoint16 iPos;
     const TextureProxy* texProxy = this->addRect(maskSize, &iPos);
     if (!texProxy) {
         return nullptr;
     }
     *outPos = skvx::half2(iPos.x(), iPos.y());
     // If the mask is empty, just return.
     // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
     // another way. See PathAtlas::addShape().
     if (!all(maskSize)) {
         return texProxy;
     }

     // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
     // appropriately transformed in that case.
     SkASSERT(transform.type() != Transform::Type::kPerspective);

     // Restrict the render to the occupied area of the atlas, including entry padding so that the
     // padded row/column is cleared when Vello renders.
     Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x(), iPos.y()), skvx::cast<float>(maskSize));
     fOccuppiedWidth = std::max(fOccuppiedWidth, (uint32_t)atlasBounds.right() + kEntryPadding);
     fOccuppiedHeight = std::max(fOccuppiedHeight, (uint32_t)atlasBounds.bot() + kEntryPadding);

     // TODO(b/283876964): Apply clips here. Initially we'll need to encode the clip stack repeatedly
     // for each shape since the full vello renderer treats clips and their affected draws as a
     // single shape hierarchy in the same scene coordinate space. For coverage masks we want each
     // mask to be transformed to its atlas allocation coordinates and for the clip to be applied
     // with a translation relative to the atlas slot.
     //
     // Repeatedly encoding the clip stack should be relatively cheap (depending on how deep the
     // clips get) however it is wasteful both in terms of time and memory. If this proves to hurt
     // performance, future work will explore building an atlas-oriented element processing stage
     // that applies the atlas-relative translation while evaluating the stack monoid on the GPU.

     // Clip the mask to the bounds of the atlas slot, which are already inset by 1px relative to
     // the bounds that the Rectanizer assigned.
     SkPath clipRect = SkPath::Rect(atlasBounds.asSkRect());
     fScene.pushClipLayer(clipRect, Transform::Identity());

     // The atlas transform of the shape is the linear-components (scale, rotation, skew) of
     // `localToDevice` translated by the top-left offset of `atlasBounds`.
     Transform atlasTransform = transform.postTranslate(atlasBounds.x(), atlasBounds.y());
     SkPath devicePath = shape.asPath();

     // For stroke-and-fill, draw two masks into the same atlas slot: one for the stroke and one for
     // the fill.
     SkStrokeRec::Style styleType = style.getStyle();
     if (styleType == SkStrokeRec::kStroke_Style ||
         styleType == SkStrokeRec::kHairline_Style ||
         styleType == SkStrokeRec::kStrokeAndFill_Style) {
         // We need to special-case hairline strokes and strokes with sub-pixel width as Vello
         // draws these with aliasing and the results are barely visible. Draw the stroke with a
         // device-space width of 1 pixel and scale down the alpha by the true width to approximate
         // the sampled area.
         float width = style.getWidth();
         float deviceWidth = width * atlasTransform.maxScaleFactor();
         if (style.isHairlineStyle() || deviceWidth <= 1.0) {
             // Both strokes get 1/2 weight scaled by the theoretical area (1 for hairlines,
             // `deviceWidth` otherwise).
             SkColor4f color = SkColors::kRed;
             color.fR *= style.isHairlineStyle() ? 1.0 : deviceWidth;

             // Transform the stroke's width to its local coordinate space since it'll get drawn with
             // `atlasTransform`.
             float transformedWidth = 1.0f / atlasTransform.maxScaleFactor();
             SkStrokeRec adjustedStyle(style);
             adjustedStyle.setStrokeStyle(transformedWidth);
             fScene.solidStroke(devicePath, color, adjustedStyle, atlasTransform);
         } else {
             fScene.solidStroke(devicePath, SkColors::kRed, style, atlasTransform);
         }
     }
     if (styleType == SkStrokeRec::kFill_Style || styleType == SkStrokeRec::kStrokeAndFill_Style) {
         fScene.solidFill(devicePath, SkColors::kRed, shape.fillType(), atlasTransform);
     }

     fScene.popClipLayer();

     return texProxy;
 }

 #endif  // SK_ENABLE_VELLO_SHADERS

 std::unique_ptr<ComputePathAtlas> ComputePathAtlas::CreateDefault(Recorder* recorder) {
 #ifdef SK_ENABLE_VELLO_SHADERS
     return std::make_unique<VelloComputePathAtlas>(recorder);
 #else
     return nullptr;
 #endif
 }

 }  // namespace skgpu::graphite
	/*
	* Copyright 2024 Google LLC
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "src/gpu/graphite/ComputePathAtlas.h"

	#include "include/gpu/graphite/Recorder.h"
	#include "src/gpu/graphite/AtlasProvider.h"
	#include "src/gpu/graphite/Caps.h"
	#include "src/gpu/graphite/Log.h"
	#include "src/gpu/graphite/RecorderPriv.h"
	#include "src/gpu/graphite/RendererProvider.h"
	#include "src/gpu/graphite/TextureProxy.h"
	#include "src/gpu/graphite/TextureUtils.h"
	#include "src/gpu/graphite/geom/Transform_graphite.h"

	#ifdef SK_ENABLE_VELLO_SHADERS
	#include "src/gpu/graphite/compute/DispatchGroup.h"
	#endif

	namespace skgpu::graphite {
	namespace {

	// TODO: This is the maximum target dimension that vello can handle today
	constexpr uint16_t kComputeAtlasDim = 4096;

	} // namespace

	ComputePathAtlas::ComputePathAtlas(Recorder* recorder)
	: PathAtlas(recorder, kComputeAtlasDim, kComputeAtlasDim)
	, fRectanizer(this->width(), this->height()) {}

	bool ComputePathAtlas::initializeTextureIfNeeded() {
	if (!fTexture) {
	SkColorType targetCT = ComputeShaderCoverageMaskTargetFormat(fRecorder->priv().caps());
	fTexture = fRecorder->priv().atlasProvider()->getAtlasTexture(fRecorder,
	this->width(),
	this->height(),
	targetCT,
	/identifier=/0,
	/requireStorageUsage=/true);
	}
	return fTexture != nullptr;
	}

	bool ComputePathAtlas::isSuitableForAtlasing(const Rect& transformedShapeBounds) const {
	Rect maskBounds = transformedShapeBounds.makeRoundOut();
	skvx::float2 maskSize = maskBounds.size();
	float width = maskSize.x(), height = maskSize.y();

	if (width > this->width() \|\| height > this->height()) {
	return false;
	}

	// For now we're allowing paths that are smaller than 1/32nd of the full 4096x4096 atlas size
	// to prevent the atlas texture from filling up too often. There are several approaches we
	// should explore to alleviate the cost of atlasing large paths.
	//
	// 1. Rendering multiple atlas textures requires an extra compute pass for each texture. This
	// impairs performance because there is a fixed cost to each dispatch and all dispatches get
	// serialized by pipeline barrier synchronization. We should explore ways to render to multiple
	// textures by issuing more workgroups in fewer dispatches as well as removing pipeline barriers
	// across dispatches that target different atlas pages.
	//
	// 2. Implement a compressed "sparse" mask rendering scheme to render paths with a large
	// bounding box using less texture space.
	return (width * height) <= (1024 * 512);
	}

	const TextureProxy* ComputePathAtlas::addRect(skvx::half2 maskSize,
	SkIPoint16* outPos) {
	if (!this->initializeTextureIfNeeded()) {
	SKGPU_LOG_E("Failed to instantiate an atlas texture");
	return nullptr;
	}

	// An empty mask always fits, so just return the texture.
	// TODO: This may not be needed if we can handle clipped out bounds with inverse fills
	// another way. See PathAtlas::addShape().
	if (!all(maskSize)) {
	*outPos = {0, 0};
	return fTexture.get();
	}

	if (!fRectanizer.addPaddedRect(maskSize.x(), maskSize.y(), kEntryPadding, outPos)) {
	return nullptr;
	}

	return fTexture.get();
	}

	void ComputePathAtlas::reset() {
	fRectanizer.reset();

	this->onReset();
	}

	#ifdef SK_ENABLE_VELLO_SHADERS

	/**
	* ComputePathAtlas that uses a VelloRenderer.
	*/
	class VelloComputePathAtlas final : public ComputePathAtlas {
	public:
	explicit VelloComputePathAtlas(Recorder* recorder) : ComputePathAtlas(recorder) {}
	// Record the compute dispatches that will draw the atlas contents.
	std::unique_ptr<DispatchGroup> recordDispatches(Recorder* recorder) const override;

	private:
	const TextureProxy* onAddShape(const Shape&,
	const Transform& transform,
	const SkStrokeRec&,
	skvx::half2 maskSize,
	skvx::half2* outPos) override;
	void onReset() override {
	fScene.reset();
	fOccuppiedWidth = fOccuppiedHeight = 0;
	}

	// Contains the encoded scene buffer data that serves as the input to a vello compute pass.
	VelloScene fScene;

	// Occuppied bounds of the atlas
	uint32_t fOccuppiedWidth = 0;
	uint32_t fOccuppiedHeight = 0;
	};

	std::unique_ptr<DispatchGroup> VelloComputePathAtlas::recordDispatches(Recorder* recorder) const {
	if (!this->texture()) {
	return nullptr;
	}

	SkASSERT(recorder && recorder == fRecorder);
	// Unless the analytic area AA mode unless caps say otherwise.
	VelloAaConfig config = VelloAaConfig::kAnalyticArea;
	#if defined(GRAPHITE_TEST_UTILS)
	PathRendererStrategy strategy = recorder->priv().caps()->requestedPathRendererStrategy();
	if (strategy == PathRendererStrategy::kComputeMSAA16) {
	config = VelloAaConfig::kMSAA16;
	}
	#endif
	return recorder->priv().rendererProvider()->velloRenderer()->renderScene(
	{fOccuppiedWidth, fOccuppiedHeight, SkColors::kBlack, config},
	fScene,
	sk_ref_sp(this->texture()),
	recorder);
	}

	const TextureProxy* VelloComputePathAtlas::onAddShape(
	const Shape& shape,
	const Transform& transform,
	const SkStrokeRec& style,
	skvx::half2 maskSize,
	skvx::half2* outPos) {
	SkIPoint16 iPos;
	const TextureProxy* texProxy = this->addRect(maskSize, &iPos);
	if (!texProxy) {
	return nullptr;
	}
	*outPos = skvx::half2(iPos.x(), iPos.y());
	// If the mask is empty, just return.
	// TODO: This may not be needed if we can handle clipped out bounds with inverse fills
	// another way. See PathAtlas::addShape().
	if (!all(maskSize)) {
	return texProxy;
	}

	// TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
	// appropriately transformed in that case.
	SkASSERT(transform.type() != Transform::Type::kPerspective);

	// Restrict the render to the occupied area of the atlas, including entry padding so that the
	// padded row/column is cleared when Vello renders.
	Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x(), iPos.y()), skvx::cast<float>(maskSize));
	fOccuppiedWidth = std::max(fOccuppiedWidth, (uint32_t)atlasBounds.right() + kEntryPadding);
	fOccuppiedHeight = std::max(fOccuppiedHeight, (uint32_t)atlasBounds.bot() + kEntryPadding);

	// TODO(b/283876964): Apply clips here. Initially we'll need to encode the clip stack repeatedly
	// for each shape since the full vello renderer treats clips and their affected draws as a
	// single shape hierarchy in the same scene coordinate space. For coverage masks we want each
	// mask to be transformed to its atlas allocation coordinates and for the clip to be applied
	// with a translation relative to the atlas slot.
	//
	// Repeatedly encoding the clip stack should be relatively cheap (depending on how deep the
	// clips get) however it is wasteful both in terms of time and memory. If this proves to hurt
	// performance, future work will explore building an atlas-oriented element processing stage
	// that applies the atlas-relative translation while evaluating the stack monoid on the GPU.

	// Clip the mask to the bounds of the atlas slot, which are already inset by 1px relative to
	// the bounds that the Rectanizer assigned.
	SkPath clipRect = SkPath::Rect(atlasBounds.asSkRect());
	fScene.pushClipLayer(clipRect, Transform::Identity());

	// The atlas transform of the shape is the linear-components (scale, rotation, skew) of
	// `localToDevice` translated by the top-left offset of `atlasBounds`.
	Transform atlasTransform = transform.postTranslate(atlasBounds.x(), atlasBounds.y());
	SkPath devicePath = shape.asPath();

	// For stroke-and-fill, draw two masks into the same atlas slot: one for the stroke and one for
	// the fill.
	SkStrokeRec::Style styleType = style.getStyle();
	if (styleType == SkStrokeRec::kStroke_Style \|\|
	styleType == SkStrokeRec::kHairline_Style \|\|
	styleType == SkStrokeRec::kStrokeAndFill_Style) {
	// We need to special-case hairline strokes and strokes with sub-pixel width as Vello
	// draws these with aliasing and the results are barely visible. Draw the stroke with a
	// device-space width of 1 pixel and scale down the alpha by the true width to approximate
	// the sampled area.
	float width = style.getWidth();
	float deviceWidth = width * atlasTransform.maxScaleFactor();
	if (style.isHairlineStyle() \|\| deviceWidth <= 1.0) {
	// Both strokes get 1/2 weight scaled by the theoretical area (1 for hairlines,
	// `deviceWidth` otherwise).
	SkColor4f color = SkColors::kRed;
	color.fR *= style.isHairlineStyle() ? 1.0 : deviceWidth;

	// Transform the stroke's width to its local coordinate space since it'll get drawn with
	// `atlasTransform`.
	float transformedWidth = 1.0f / atlasTransform.maxScaleFactor();
	SkStrokeRec adjustedStyle(style);
	adjustedStyle.setStrokeStyle(transformedWidth);
	fScene.solidStroke(devicePath, color, adjustedStyle, atlasTransform);
	} else {
	fScene.solidStroke(devicePath, SkColors::kRed, style, atlasTransform);
	}
	}
	if (styleType == SkStrokeRec::kFill_Style \|\| styleType == SkStrokeRec::kStrokeAndFill_Style) {
	fScene.solidFill(devicePath, SkColors::kRed, shape.fillType(), atlasTransform);
	}

	fScene.popClipLayer();

	return texProxy;
	}

	#endif // SK_ENABLE_VELLO_SHADERS

	std::unique_ptr<ComputePathAtlas> ComputePathAtlas::CreateDefault(Recorder* recorder) {
	#ifdef SK_ENABLE_VELLO_SHADERS
	return std::make_unique<VelloComputePathAtlas>(recorder);
	#else
	return nullptr;
	#endif
	}

	} // namespace skgpu::graphite