blob: 75ece800c3d1cb02ba2ba7ed51adabaae55e83e9 [file] [log] [blame]
/*
* Copyright 2024 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/gpu/graphite/ComputePathAtlas.h"
#include "include/gpu/graphite/Recorder.h"
#include "src/gpu/graphite/AtlasProvider.h"
#include "src/gpu/graphite/Caps.h"
#include "src/gpu/graphite/Log.h"
#include "src/gpu/graphite/RecorderPriv.h"
#include "src/gpu/graphite/RendererProvider.h"
#include "src/gpu/graphite/TextureProxy.h"
#include "src/gpu/graphite/TextureUtils.h"
#include "src/gpu/graphite/geom/Transform_graphite.h"
#ifdef SK_ENABLE_VELLO_SHADERS
#include "src/gpu/graphite/compute/DispatchGroup.h"
#endif
namespace skgpu::graphite {
namespace {
// TODO: This is the maximum target dimension that vello can handle today
constexpr uint16_t kComputeAtlasDim = 4096;
} // namespace
ComputePathAtlas::ComputePathAtlas(Recorder* recorder)
: PathAtlas(recorder, kComputeAtlasDim, kComputeAtlasDim)
, fRectanizer(this->width(), this->height()) {}
bool ComputePathAtlas::initializeTextureIfNeeded() {
if (!fTexture) {
SkColorType targetCT = ComputeShaderCoverageMaskTargetFormat(fRecorder->priv().caps());
fTexture = fRecorder->priv().atlasProvider()->getAtlasTexture(fRecorder,
this->width(),
this->height(),
targetCT,
/*identifier=*/0,
/*requireStorageUsage=*/true);
}
return fTexture != nullptr;
}
bool ComputePathAtlas::isSuitableForAtlasing(const Rect& transformedShapeBounds) const {
Rect maskBounds = transformedShapeBounds.makeRoundOut();
skvx::float2 maskSize = maskBounds.size();
float width = maskSize.x(), height = maskSize.y();
if (width > this->width() || height > this->height()) {
return false;
}
// For now we're allowing paths that are smaller than 1/32nd of the full 4096x4096 atlas size
// to prevent the atlas texture from filling up too often. There are several approaches we
// should explore to alleviate the cost of atlasing large paths.
//
// 1. Rendering multiple atlas textures requires an extra compute pass for each texture. This
// impairs performance because there is a fixed cost to each dispatch and all dispatches get
// serialized by pipeline barrier synchronization. We should explore ways to render to multiple
// textures by issuing more workgroups in fewer dispatches as well as removing pipeline barriers
// across dispatches that target different atlas pages.
//
// 2. Implement a compressed "sparse" mask rendering scheme to render paths with a large
// bounding box using less texture space.
return (width * height) <= (1024 * 512);
}
const TextureProxy* ComputePathAtlas::addRect(skvx::half2 maskSize,
SkIPoint16* outPos) {
if (!this->initializeTextureIfNeeded()) {
SKGPU_LOG_E("Failed to instantiate an atlas texture");
return nullptr;
}
// An empty mask always fits, so just return the texture.
// TODO: This may not be needed if we can handle clipped out bounds with inverse fills
// another way. See PathAtlas::addShape().
if (!all(maskSize)) {
*outPos = {0, 0};
return fTexture.get();
}
if (!fRectanizer.addPaddedRect(maskSize.x(), maskSize.y(), kEntryPadding, outPos)) {
return nullptr;
}
return fTexture.get();
}
void ComputePathAtlas::reset() {
fRectanizer.reset();
this->onReset();
}
#ifdef SK_ENABLE_VELLO_SHADERS
/**
* ComputePathAtlas that uses a VelloRenderer.
*/
class VelloComputePathAtlas final : public ComputePathAtlas {
public:
explicit VelloComputePathAtlas(Recorder* recorder) : ComputePathAtlas(recorder) {}
// Record the compute dispatches that will draw the atlas contents.
std::unique_ptr<DispatchGroup> recordDispatches(Recorder* recorder) const override;
private:
const TextureProxy* onAddShape(const Shape&,
const Transform& transform,
const SkStrokeRec&,
skvx::half2 maskSize,
skvx::half2* outPos) override;
void onReset() override {
fScene.reset();
fOccuppiedWidth = fOccuppiedHeight = 0;
}
// Contains the encoded scene buffer data that serves as the input to a vello compute pass.
VelloScene fScene;
// Occuppied bounds of the atlas
uint32_t fOccuppiedWidth = 0;
uint32_t fOccuppiedHeight = 0;
};
std::unique_ptr<DispatchGroup> VelloComputePathAtlas::recordDispatches(Recorder* recorder) const {
if (!this->texture()) {
return nullptr;
}
SkASSERT(recorder && recorder == fRecorder);
// Unless the analytic area AA mode unless caps say otherwise.
VelloAaConfig config = VelloAaConfig::kAnalyticArea;
#if defined(GRAPHITE_TEST_UTILS)
PathRendererStrategy strategy = recorder->priv().caps()->requestedPathRendererStrategy();
if (strategy == PathRendererStrategy::kComputeMSAA16) {
config = VelloAaConfig::kMSAA16;
}
#endif
return recorder->priv().rendererProvider()->velloRenderer()->renderScene(
{fOccuppiedWidth, fOccuppiedHeight, SkColors::kBlack, config},
fScene,
sk_ref_sp(this->texture()),
recorder);
}
const TextureProxy* VelloComputePathAtlas::onAddShape(
const Shape& shape,
const Transform& transform,
const SkStrokeRec& style,
skvx::half2 maskSize,
skvx::half2* outPos) {
SkIPoint16 iPos;
const TextureProxy* texProxy = this->addRect(maskSize, &iPos);
if (!texProxy) {
return nullptr;
}
*outPos = skvx::half2(iPos.x(), iPos.y());
// If the mask is empty, just return.
// TODO: This may not be needed if we can handle clipped out bounds with inverse fills
// another way. See PathAtlas::addShape().
if (!all(maskSize)) {
return texProxy;
}
// TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
// appropriately transformed in that case.
SkASSERT(transform.type() != Transform::Type::kPerspective);
// Restrict the render to the occupied area of the atlas, including entry padding so that the
// padded row/column is cleared when Vello renders.
Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x(), iPos.y()), skvx::cast<float>(maskSize));
fOccuppiedWidth = std::max(fOccuppiedWidth, (uint32_t)atlasBounds.right() + kEntryPadding);
fOccuppiedHeight = std::max(fOccuppiedHeight, (uint32_t)atlasBounds.bot() + kEntryPadding);
// TODO(b/283876964): Apply clips here. Initially we'll need to encode the clip stack repeatedly
// for each shape since the full vello renderer treats clips and their affected draws as a
// single shape hierarchy in the same scene coordinate space. For coverage masks we want each
// mask to be transformed to its atlas allocation coordinates and for the clip to be applied
// with a translation relative to the atlas slot.
//
// Repeatedly encoding the clip stack should be relatively cheap (depending on how deep the
// clips get) however it is wasteful both in terms of time and memory. If this proves to hurt
// performance, future work will explore building an atlas-oriented element processing stage
// that applies the atlas-relative translation while evaluating the stack monoid on the GPU.
// Clip the mask to the bounds of the atlas slot, which are already inset by 1px relative to
// the bounds that the Rectanizer assigned.
SkPath clipRect = SkPath::Rect(atlasBounds.asSkRect());
fScene.pushClipLayer(clipRect, Transform::Identity());
// The atlas transform of the shape is the linear-components (scale, rotation, skew) of
// `localToDevice` translated by the top-left offset of `atlasBounds`.
Transform atlasTransform = transform.postTranslate(atlasBounds.x(), atlasBounds.y());
SkPath devicePath = shape.asPath();
// For stroke-and-fill, draw two masks into the same atlas slot: one for the stroke and one for
// the fill.
SkStrokeRec::Style styleType = style.getStyle();
if (styleType == SkStrokeRec::kStroke_Style ||
styleType == SkStrokeRec::kHairline_Style ||
styleType == SkStrokeRec::kStrokeAndFill_Style) {
// We need to special-case hairline strokes and strokes with sub-pixel width as Vello
// draws these with aliasing and the results are barely visible. Draw the stroke with a
// device-space width of 1 pixel and scale down the alpha by the true width to approximate
// the sampled area.
float width = style.getWidth();
float deviceWidth = width * atlasTransform.maxScaleFactor();
if (style.isHairlineStyle() || deviceWidth <= 1.0) {
// Both strokes get 1/2 weight scaled by the theoretical area (1 for hairlines,
// `deviceWidth` otherwise).
SkColor4f color = SkColors::kRed;
color.fR *= style.isHairlineStyle() ? 1.0 : deviceWidth;
// Transform the stroke's width to its local coordinate space since it'll get drawn with
// `atlasTransform`.
float transformedWidth = 1.0f / atlasTransform.maxScaleFactor();
SkStrokeRec adjustedStyle(style);
adjustedStyle.setStrokeStyle(transformedWidth);
fScene.solidStroke(devicePath, color, adjustedStyle, atlasTransform);
} else {
fScene.solidStroke(devicePath, SkColors::kRed, style, atlasTransform);
}
}
if (styleType == SkStrokeRec::kFill_Style || styleType == SkStrokeRec::kStrokeAndFill_Style) {
fScene.solidFill(devicePath, SkColors::kRed, shape.fillType(), atlasTransform);
}
fScene.popClipLayer();
return texProxy;
}
#endif // SK_ENABLE_VELLO_SHADERS
std::unique_ptr<ComputePathAtlas> ComputePathAtlas::CreateDefault(Recorder* recorder) {
#ifdef SK_ENABLE_VELLO_SHADERS
return std::make_unique<VelloComputePathAtlas>(recorder);
#else
return nullptr;
#endif
}
} // namespace skgpu::graphite