/*
 * Copyright 2024 Google LLC
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */
#include "src/gpu/graphite/ComputePathAtlas.h"

#include "include/gpu/graphite/Recorder.h"
#include "src/core/SkIPoint16.h"
#include "src/gpu/graphite/AtlasProvider.h"
#include "src/gpu/graphite/Log.h"
#include "src/gpu/graphite/RecorderPriv.h"
#include "src/gpu/graphite/TextureProxy.h"
#include "src/gpu/graphite/TextureUtils.h"

#ifdef SK_ENABLE_VELLO_SHADERS
#include "src/core/SkTraceEvent.h"
#include "src/gpu/graphite/ContextOptionsPriv.h"
#include "src/gpu/graphite/RendererProvider.h"
#include "src/gpu/graphite/compute/DispatchGroup.h"
#include "src/gpu/graphite/geom/Transform.h"
#endif

#include <cmath>
#include <cstddef>
#include <cstdint>

enum SkColorType : int;

namespace skgpu::graphite {
namespace {

// TODO: This is the maximum target dimension that vello can handle today.
constexpr uint16_t kComputeAtlasDim = 4096;

// TODO: Currently we reject shapes that are smaller than a subset of a given atlas page to avoid
// creating too many flushes in a Recording containing many large path draws. These shapes often
// don't make efficient use of the available atlas texture space and the cost of sequential
// dispatches to render multiple atlas pages can be prohibitive.
constexpr size_t kBboxAreaThreshold = 1024 * 512;

// Coordinate size that is too large for vello to handle efficiently. See the discussion on
// https://github.com/linebender/vello/pull/542.
constexpr float kCoordinateThreshold = 1e10;

}  // namespace

ComputePathAtlas::ComputePathAtlas(Recorder* recorder)
    : PathAtlas(recorder, kComputeAtlasDim, kComputeAtlasDim)
    , fRectanizer(this->width(), this->height()) {}

bool ComputePathAtlas::initializeTextureIfNeeded() {
    if (!fTexture) {
        SkColorType targetCT = ComputeShaderCoverageMaskTargetFormat(fRecorder->priv().caps());
        fTexture = fRecorder->priv().atlasProvider()->getAtlasTexture(fRecorder,
                                                                      this->width(),
                                                                      this->height(),
                                                                      targetCT,
                                                                      /*identifier=*/0,
                                                                      /*requireStorageUsage=*/true);
    }
    return fTexture != nullptr;
}

bool ComputePathAtlas::isSuitableForAtlasing(const Rect& transformedShapeBounds,
                                             const Rect& clipBounds) const {
    Rect shapeBounds = transformedShapeBounds.makeRoundOut();
    Rect maskBounds = shapeBounds.makeIntersect(clipBounds);
    skvx::float2 maskSize = maskBounds.size();
    float width = maskSize.x(), height = maskSize.y();

    if (width > this->width() || height > this->height()) {
        return false;
    }

    // For now we're allowing paths that are smaller than 1/32nd of the full 4096x4096 atlas size
    // to prevent the atlas texture from filling up too often. There are several approaches we
    // should explore to alleviate the cost of atlasing large paths.
    if (width * height > kBboxAreaThreshold) {
        return false;
    }

    // Reject pathological shapes that vello can't handle efficiently yet.
    skvx::float2 unclippedSize = shapeBounds.size();
    if (std::fabs(unclippedSize.x()) > kCoordinateThreshold ||
        std::fabs(unclippedSize.y()) > kCoordinateThreshold) {
        return false;
    }

    return true;
}

sk_sp<TextureProxy> ComputePathAtlas::addRect(skvx::half2 maskSize,
                                              SkIPoint16* outPos) {
    if (!this->initializeTextureIfNeeded()) {
        SKGPU_LOG_E("Failed to instantiate an atlas texture");
        return nullptr;
    }

    // An empty mask always fits, so just return the texture.
    // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
    // another way. See PathAtlas::addShape().
    if (!all(maskSize)) {
        *outPos = {0, 0};
        return fTexture;
    }

    if (!fRectanizer.addPaddedRect(maskSize.x(), maskSize.y(), kEntryPadding, outPos)) {
        return nullptr;
    }

    return fTexture;
}

void ComputePathAtlas::reset() {
    fRectanizer.reset();

    this->onReset();
}

#ifdef SK_ENABLE_VELLO_SHADERS

/**
 * ComputePathAtlas that uses a VelloRenderer.
 */
class VelloComputePathAtlas final : public ComputePathAtlas {
public:
    explicit VelloComputePathAtlas(Recorder* recorder)
        : ComputePathAtlas(recorder)
        , fCachedAtlasMgr(fWidth, fHeight, recorder->priv().caps()) {}
    // Record the compute dispatches that will draw the atlas contents.
    bool recordDispatches(Recorder*, ComputeTask::DispatchGroupList*) const override;

private:
    sk_sp<TextureProxy> onAddShape(const Shape&,
                                   const Transform& localToDevice,
                                   const SkStrokeRec&,
                                   skvx::half2 maskOrigin,
                                   skvx::half2 maskSize,
                                   SkIVector transformedMaskOffset,
                                   skvx::half2* outPos) override;
    void onReset() override {
        fCachedAtlasMgr.onReset();

        fUncachedScene.reset();
        fUncachedOccupiedArea = { 0, 0 };
    }

    class VelloAtlasMgr : public PathAtlas::DrawAtlasMgr {
    public:
        VelloAtlasMgr(size_t width, size_t height, const Caps* caps)
            : PathAtlas::DrawAtlasMgr(width, height, width, height,
                                      DrawAtlas::UseStorageTextures::kYes,
                                      /*label=*/"VelloPathAtlas", caps) {}

        bool recordDispatches(Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const;

        void onReset() {
            fDrawAtlas->markUsedPlotsAsFull();
            for (int i = 0; i < PlotLocator::kMaxMultitexturePages; ++i) {
                fScenes[i].reset();
                fOccupiedAreas[i] = {0, 0};
            }
        }

    protected:
        bool onAddToAtlas(const Shape&,
                          const Transform& localToDevice,
                          const SkStrokeRec&,
                          SkIRect shapeBounds,
                          SkIVector transformedMaskOffset,
                          const AtlasLocator&) override;

    private:
        VelloScene fScenes[PlotLocator::kMaxMultitexturePages];
        SkISize fOccupiedAreas[PlotLocator::kMaxMultitexturePages] = {
            {0, 0}, {0, 0}, {0, 0}, {0, 0}
        };
    };

    VelloAtlasMgr fCachedAtlasMgr;

    // Contains the encoded scene buffer data that serves as the input to a vello compute pass.
    // For the uncached atlas.
    VelloScene fUncachedScene;

    // Occupied bounds of the uncached atlas
    SkISize fUncachedOccupiedArea = { 0, 0 };
};

static VelloAaConfig get_vello_aa_config(Recorder* recorder) {
    PathRendererStrategy strategy = recorder->priv().rendererProvider()->pathRendererStrategy();
    if (strategy == PathRendererStrategy::kComputeMSAA16) {
        return VelloAaConfig::kMSAA16;
    } else if (strategy == PathRendererStrategy::kComputeMSAA8) {
        return VelloAaConfig::kMSAA8;
    } else {
        SkASSERT(strategy == PathRendererStrategy::kComputeAnalyticAA);
        return VelloAaConfig::kAnalyticArea;
    }
}

static std::unique_ptr<DispatchGroup> render_vello_scene(Recorder* recorder,
                                                         sk_sp<TextureProxy> texture,
                                                         const VelloScene& scene,
                                                         SkISize occupiedArea,
                                                         VelloAaConfig config) {
    return recorder->priv().rendererProvider()->velloRenderer()->renderScene(
                {(uint32_t)occupiedArea.width(),
                 (uint32_t)occupiedArea.height(),
                 SkColors::kBlack,
                 config},
                scene,
                std::move(texture),
                recorder);
}

static void add_shape_to_scene(const Shape& shape,
                               const Transform& localToDevice,
                               const SkStrokeRec& style,
                               Rect atlasBounds,
                               SkIVector transformedMaskOffset,
                               VelloScene* scene,
                               SkISize* occupiedArea) {
    occupiedArea->fWidth = std::max(occupiedArea->fWidth,
                                    (int)atlasBounds.right() + PathAtlas::kEntryPadding);
    occupiedArea->fHeight = std::max(occupiedArea->fHeight,
                                     (int)atlasBounds.bot() + PathAtlas::kEntryPadding);

    // TODO(b/283876964): Apply clips here. Initially we'll need to encode the clip stack repeatedly
    // for each shape since the full vello renderer treats clips and their affected draws as a
    // single shape hierarchy in the same scene coordinate space. For coverage masks we want each
    // mask to be transformed to its atlas allocation coordinates and for the clip to be applied
    // with a translation relative to the atlas slot.
    //
    // Repeatedly encoding the clip stack should be relatively cheap (depending on how deep the
    // clips get) however it is wasteful both in terms of time and memory. If this proves to hurt
    // performance, future work will explore building an atlas-oriented element processing stage
    // that applies the atlas-relative translation while evaluating the stack monoid on the GPU.

    // Clip the mask to the bounds of the atlas slot, which are already inset by 1px relative to
    // the bounds that the Rectanizer assigned.
    SkPath clipRect = SkPath::Rect(atlasBounds.asSkRect());
    scene->pushClipLayer(clipRect, Transform::Identity());

    // The atlas transform of the shape is `localToDevice` translated by the top-left offset of the
    // 'atlasBounds' and the inverse of the base mask transform offset.
    Transform atlasTransform = localToDevice.postTranslate(
            atlasBounds.x()-transformedMaskOffset.x(), atlasBounds.y()-transformedMaskOffset.y());
    SkPath devicePath = shape.asPath();

    // For stroke-and-fill, draw two masks into the same atlas slot: one for the stroke and one for
    // the fill.
    SkStrokeRec::Style styleType = style.getStyle();
    if (styleType == SkStrokeRec::kStroke_Style ||
        styleType == SkStrokeRec::kHairline_Style ||
        styleType == SkStrokeRec::kStrokeAndFill_Style) {
        // We need to special-case hairline strokes and strokes with sub-pixel width as Vello
        // draws these with aliasing and the results are barely visible. Draw the stroke with a
        // device-space width of 1 pixel and scale down the alpha by the true width to approximate
        // the sampled area.
        float width = style.getWidth();
        float deviceWidth = width * atlasTransform.maxScaleFactor();
        if (style.isHairlineStyle() || deviceWidth <= 1.0) {
            // Both strokes get 1/2 weight scaled by the theoretical area (1 for hairlines,
            // `deviceWidth` otherwise).
            SkColor4f color = SkColors::kRed;
            color.fR *= style.isHairlineStyle() ? 1.0 : deviceWidth;

            // Transform the stroke's width to its local coordinate space since it'll get drawn with
            // `atlasTransform`.
            float transformedWidth = 1.0f / atlasTransform.maxScaleFactor();
            SkStrokeRec adjustedStyle(style);
            adjustedStyle.setStrokeStyle(transformedWidth);
            scene->solidStroke(devicePath, color, adjustedStyle, atlasTransform);
        } else {
            scene->solidStroke(devicePath, SkColors::kRed, style, atlasTransform);
        }
    }
    if (styleType == SkStrokeRec::kFill_Style || styleType == SkStrokeRec::kStrokeAndFill_Style) {
        scene->solidFill(devicePath, SkColors::kRed, shape.fillType(), atlasTransform);
    }

    scene->popClipLayer();
}

bool VelloComputePathAtlas::recordDispatches(Recorder* recorder,
                                             ComputeTask::DispatchGroupList* dispatches) const {
    bool addedDispatches = fCachedAtlasMgr.recordDispatches(recorder, dispatches);

    if (this->texture() && !fUncachedOccupiedArea.isEmpty()) {
        SkASSERT(recorder && recorder == fRecorder);

        VelloAaConfig config = get_vello_aa_config(recorder);
        std::unique_ptr<DispatchGroup> dispatchGroup =
                render_vello_scene(recorder,
                                   sk_ref_sp(this->texture()),
                                   fUncachedScene,
                                   fUncachedOccupiedArea,
                                   config);
        if (dispatchGroup) {
            TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
                                 "# dispatches", dispatchGroup->dispatches().size());
            dispatches->emplace_back(std::move(dispatchGroup));
            return true;
        } else {
            SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
        }
    }

    return addedDispatches;
}

sk_sp<TextureProxy> VelloComputePathAtlas::onAddShape(
        const Shape& shape,
        const Transform& localToDevice,
        const SkStrokeRec& style,
        skvx::half2 maskOrigin,
        skvx::half2 maskSize,
        SkIVector transformedMaskOffset,
        skvx::half2* outPos) {

    skgpu::UniqueKey maskKey;
    if (!shape.isVolatilePath()) {
        // Try to locate or add to cached DrawAtlas
        sk_sp<TextureProxy> proxy = fCachedAtlasMgr.findOrCreateEntry(fRecorder,
                                                                      shape,
                                                                      localToDevice,
                                                                      style,
                                                                      maskOrigin,
                                                                      maskSize,
                                                                      transformedMaskOffset,
                                                                      outPos);
        if (proxy) {
            return proxy;
        }
    }

    // Try to add to uncached texture
    SkIPoint16 iPos;
    sk_sp<TextureProxy> texProxy = this->addRect(maskSize, &iPos);
    if (!texProxy) {
        return nullptr;
    }
    *outPos = skvx::half2(iPos.x(), iPos.y());
    // If the mask is empty, just return.
    // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
    // another way. See PathAtlas::addShape().
    if (!all(maskSize)) {
        return texProxy;
    }

    // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
    // appropriately transformed in that case.
    SkASSERT(localToDevice.type() != Transform::Type::kPerspective);

    // Restrict the render to the occupied area of the atlas, including entry padding so that the
    // padded row/column is cleared when Vello renders.
    Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x(), iPos.y()), skvx::cast<float>(maskSize));

    add_shape_to_scene(shape, localToDevice, style, atlasBounds, transformedMaskOffset,
                       &fUncachedScene, &fUncachedOccupiedArea);

    return texProxy;
}

/////////////////////////////////////////////////////////////////////////////////////////

bool VelloComputePathAtlas::VelloAtlasMgr::onAddToAtlas(const Shape& shape,
                                                        const Transform& localToDevice,
                                                        const SkStrokeRec& style,
                                                        SkIRect shapeBounds,
                                                        SkIVector transformedMaskOffset,
                                                        const AtlasLocator& locator) {
    uint32_t index = locator.pageIndex();
    const TextureProxy* texProxy = fDrawAtlas->getProxies()[index].get();
    if (!texProxy) {
        return false;
    }

    // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
    // appropriately transformed in that case.
    SkASSERT(localToDevice.type() != Transform::Type::kPerspective);

    // Restrict the render to the occupied area of the atlas, including entry padding so that the
    // padded row/column is cleared when Vello renders.
    SkIPoint iPos = locator.topLeft();
    Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x() + kEntryPadding, iPos.y() + kEntryPadding),
                                  skvx::float2(shapeBounds.width(), shapeBounds.height()));

    add_shape_to_scene(shape, localToDevice, style, atlasBounds, transformedMaskOffset,
                       &fScenes[index], &fOccupiedAreas[index]);

    return true;
}

bool VelloComputePathAtlas::VelloAtlasMgr::recordDispatches(
        Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const {
    SkASSERT(recorder);
    VelloAaConfig config = get_vello_aa_config(recorder);

    bool addedDispatches = false;
    for (int i = 0; i < 4; ++i) {
        if (!fOccupiedAreas[i].isEmpty()) {
            std::unique_ptr<DispatchGroup> dispatchGroup =
                    render_vello_scene(recorder,
                                       fDrawAtlas->getProxies()[i],
                                       fScenes[i],
                                       fOccupiedAreas[i],
                                       config);
            if (dispatchGroup) {
                TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
                                     "# dispatches", dispatchGroup->dispatches().size());
                dispatches->emplace_back(std::move(dispatchGroup));
                addedDispatches = true;
            } else {
                SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
            }
        }
    }
    return addedDispatches;
}


#endif  // SK_ENABLE_VELLO_SHADERS

std::unique_ptr<ComputePathAtlas> ComputePathAtlas::CreateDefault(Recorder* recorder) {
#ifdef SK_ENABLE_VELLO_SHADERS
    return std::make_unique<VelloComputePathAtlas>(recorder);
#else
    return nullptr;
#endif
}

}  // namespace skgpu::graphite
