build custom blitter for drawAtlas
- add uniform_color_dst stage
- add stageupdate option for shaders
More cases we could try to handle in the future:
- perspective (not hard)
- mipmaps (plumbing is there, need to re-call bitmapstate helper)
Before
10025.60 drawAtlas_3 8888
6636.06 drawAtlas_2 8888
After
3566.18 drawAtlas_3 8888
2585.83 ? drawAtlas_2 8888
Change-Id: I656231324c0390029f6d08941c4f9d11ccdb8e87
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/233061
Commit-Queue: Mike Reed <reed@google.com>
Reviewed-by: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkDraw_atlas.cpp b/src/core/SkDraw_atlas.cpp
index 05681b2..fdaa9a4 100644
--- a/src/core/SkDraw_atlas.cpp
+++ b/src/core/SkDraw_atlas.cpp
@@ -7,35 +7,110 @@
#include "include/core/SkColorFilter.h"
#include "include/core/SkRSXform.h"
+#include "src/core/SkBlendModePriv.h"
+#include "src/core/SkColorSpacePriv.h"
+#include "src/core/SkColorSpaceXformSteps.h"
+#include "src/core/SkCoreBlitters.h"
#include "src/core/SkDraw.h"
+#include "src/core/SkRasterPipeline.h"
#include "src/core/SkScan.h"
#include "src/shaders/SkShaderBase.h"
+#include "include/core/SkMatrix.h"
+#include "src/core/SkScan.h"
+
+static void fill_rect(const SkMatrix& ctm, const SkRasterClip& rc,
+ const SkRect& r, SkBlitter* blitter) {
+ if (ctm.rectStaysRect()) {
+ SkRect dr;
+ ctm.mapRect(&dr, r);
+ SkScan::FillRect(dr, rc, blitter);
+ } else {
+ SkPath path;
+ path.addRect(r);
+ path.transform(ctm);
+ SkScan::FillPath(path, rc, blitter);
+ }
+}
+
+static void load_color(SkRasterPipeline_UniformColorCtx* ctx, const float rgba[]) {
+ // only need one of these. can I query the pipeline to know if its lowp or highp?
+ ctx->rgba[0] = SkScalarRoundToInt(rgba[0]*255); ctx->r = rgba[0];
+ ctx->rgba[1] = SkScalarRoundToInt(rgba[1]*255); ctx->g = rgba[1];
+ ctx->rgba[2] = SkScalarRoundToInt(rgba[2]*255); ctx->b = rgba[2];
+ ctx->rgba[3] = SkScalarRoundToInt(rgba[3]*255); ctx->a = rgba[3];
+}
+
void SkDraw::drawAtlas(const SkImage* atlas, const SkRSXform xform[], const SkRect textures[],
const SkColor colors[], int count, SkBlendMode bmode, const SkPaint& paint) {
- SkDraw draw(*this);
- SkPaint p(paint);
+ sk_sp<SkShader> atlasShader = atlas->makeShader();
+ if (!atlasShader) {
+ return;
+ }
+ SkPaint p(paint);
p.setAntiAlias(false); // we never respect this for drawAtlas(or drawVertices)
p.setStyle(SkPaint::kFill_Style);
p.setShader(nullptr);
p.setMaskFilter(nullptr);
- sk_sp<SkShader> atlasShader = atlas->makeShader();
- if (!atlasShader) {
+ SkSTArenaAlloc<256> alloc;
+ SkRasterPipeline pipeline(&alloc);
+ SkStageRec rec = {
+ &pipeline, &alloc, fDst.colorType(), fDst.colorSpace(), p, nullptr, *fMatrix
+ };
+
+ SkStageUpdater* updator = as_SB(atlasShader.get())->appendUpdatableStages(rec);
+ if (!updator) {
+ SkDraw draw(*this);
+
+ p.setShader(atlasShader);
+ for (int i = 0; i < count; ++i) {
+ if (colors) {
+ p.setShader(SkShaders::Blend(bmode, SkShaders::Color(colors[i]), atlasShader));
+ }
+ SkMatrix mx;
+ mx.setRSXform(xform[i]);
+ mx.preTranslate(-textures[i].fLeft, -textures[i].fTop);
+ mx.postConcat(*fMatrix);
+ draw.fMatrix = &mx;
+ draw.drawRect(textures[i], p);
+ }
return;
}
- p.setShader(atlasShader);
- SkMatrix xf;
+ SkRasterPipeline_UniformColorCtx* uniformCtx = nullptr;
+ SkColorSpaceXformSteps steps(sk_srgb_singleton(), kUnpremul_SkAlphaType,
+ rec.fDstCS, kUnpremul_SkAlphaType);
+
+ if (colors) {
+ // we will late-bind the values in ctx, once for each color in the loop
+ uniformCtx = alloc.make<SkRasterPipeline_UniformColorCtx>();
+ rec.fPipeline->append(SkRasterPipeline::uniform_color_dst, uniformCtx);
+ SkBlendMode_AppendStages(bmode, rec.fPipeline);
+ }
+
+ bool isOpaque = !colors && atlasShader->isOpaque();
+ if (p.getAlphaf() != 1) {
+ rec.fPipeline->append(SkRasterPipeline::scale_1_float, alloc.make<float>(p.getAlphaf()));
+ isOpaque = false;
+ }
+
+ auto blitter = SkCreateRasterPipelineBlitter(fDst, p, pipeline, isOpaque, &alloc);
+
for (int i = 0; i < count; ++i) {
if (colors) {
- p.setShader(SkShaders::Blend(bmode, SkShaders::Color(colors[i]), atlasShader));
+ SkColor4f c4 = SkColor4f::FromColor(colors[i]);
+ steps.apply(c4.vec());
+ load_color(uniformCtx, c4.premul().vec());
}
- xf.setRSXform(xform[i]);
- xf.preTranslate(-textures[i].fLeft, -textures[i].fTop);
- xf.postConcat(*fMatrix);
- draw.fMatrix = &xf;
- draw.drawRect(textures[i], p);
+
+ SkMatrix mx;
+ mx.setRSXform(xform[i]);
+ mx.preTranslate(-textures[i].fLeft, -textures[i].fTop);
+ mx.postConcat(*fMatrix);
+
+ updator->update(mx, nullptr);
+ fill_rect(mx, *fRC, textures[i], blitter);
}
}
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 8552c02..8a3e0cc 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -41,7 +41,8 @@
M(force_opaque) M(force_opaque_dst) \
M(set_rgb) M(unbounded_set_rgb) M(swap_rb) M(swap_rb_dst) \
M(from_srgb) M(to_srgb) \
- M(black_color) M(white_color) M(uniform_color) M(unbounded_uniform_color) \
+ M(black_color) M(white_color) \
+ M(uniform_color) M(unbounded_uniform_color) M(uniform_color_dst) \
M(seed_shader) M(dither) \
M(load_a8) M(load_a8_dst) M(store_a8) M(gather_a8) \
M(load_565) M(load_565_dst) M(store_565) M(gather_565) \
@@ -265,7 +266,6 @@
bool empty() const { return fStages == nullptr; }
-
private:
struct StageList {
StageList* prev;
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 702a41a..5dc6da9 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -1317,6 +1317,13 @@
b = c->b;
a = c->a;
}
+// load 4 floats from memory, and splat them into dr,dg,db,da
+STAGE(uniform_color_dst, const SkRasterPipeline_UniformColorCtx* c) {
+ dr = c->r;
+ dg = c->g;
+ db = c->b;
+ da = c->a;
+}
// splats opaque-black into r,g,b,a
STAGE(black_color, Ctx::None) {
@@ -3145,6 +3152,12 @@
b = c->rgba[2];
a = c->rgba[3];
}
+STAGE_PP(uniform_color_dst, const SkRasterPipeline_UniformColorCtx* c) {
+ dr = c->rgba[0];
+ dg = c->rgba[1];
+ db = c->rgba[2];
+ da = c->rgba[3];
+}
STAGE_PP(black_color, Ctx::None) { r = g = b = 0; a = 255; }
STAGE_PP(white_color, Ctx::None) { r = g = b = 255; a = 255; }
diff --git a/src/shaders/SkImageShader.cpp b/src/shaders/SkImageShader.cpp
index 28ff579..57523c7 100644
--- a/src/shaders/SkImageShader.cpp
+++ b/src/shaders/SkImageShader.cpp
@@ -291,15 +291,45 @@
void SkShaderBase::RegisterFlattenables() { SK_REGISTER_FLATTENABLE(SkImageShader); }
-bool SkImageShader::onAppendStages(const SkStageRec& rec) const {
+class SkImageStageUpdater : public SkStageUpdater {
+public:
+ const SkImageShader* fShader;
+
+ float fMatrixStorage[6];
+
+#if 0 // TODO: when we support mipmaps
+ SkRasterPipeline_GatherCtx* fGather;
+ SkRasterPipeline_TileCtx* fLimitX;
+ SkRasterPipeline_TileCtx* fLimitY;
+ SkRasterPipeline_DecalTileCtx* fDecal;
+#endif
+
+ bool update(const SkMatrix& ctm, const SkMatrix* localM) override {
+ SkMatrix matrix;
+ return fShader->computeTotalInverse(ctm, localM, &matrix) &&
+ matrix.asAffine(fMatrixStorage);
+ }
+};
+
+bool SkImageShader::doStages(const SkStageRec& rec, SkImageStageUpdater* updater) const {
+ if (updater &&
+ (rec.fPaint.getFilterQuality() == kMedium_SkFilterQuality ||
+ rec.fCTM.hasPerspective()))
+ {
+ // TODO: handle these cases
+ // medium: recall RequestBitmap and update width/height accordingly
+ // perspt: store 9 floats and use persp stage
+ return false;
+ }
+
SkRasterPipeline* p = rec.fPipeline;
SkArenaAlloc* alloc = rec.fAlloc;
+ auto quality = rec.fPaint.getFilterQuality();
SkMatrix matrix;
if (!this->computeTotalInverse(rec.fCTM, rec.fLocalM, &matrix)) {
return false;
}
- auto quality = rec.fPaint.getFilterQuality();
const auto* state = SkBitmapController::RequestBitmap(as_IB(fImage.get()),
matrix, quality, alloc);
@@ -312,28 +342,32 @@
quality = state->quality();
auto info = pm.info();
- // When the matrix is just an integer translate, bilerp == nearest neighbor.
- if (quality == kLow_SkFilterQuality &&
- matrix.getType() <= SkMatrix::kTranslate_Mask &&
- matrix.getTranslateX() == (int)matrix.getTranslateX() &&
- matrix.getTranslateY() == (int)matrix.getTranslateY()) {
- quality = kNone_SkFilterQuality;
- }
-
- // See skia:4649 and the GM image_scale_aligned.
- if (quality == kNone_SkFilterQuality) {
- if (matrix.getScaleX() >= 0) {
- matrix.setTranslateX(nextafterf(matrix.getTranslateX(),
- floorf(matrix.getTranslateX())));
- }
- if (matrix.getScaleY() >= 0) {
- matrix.setTranslateY(nextafterf(matrix.getTranslateY(),
- floorf(matrix.getTranslateY())));
- }
- }
-
p->append(SkRasterPipeline::seed_shader);
- p->append_matrix(alloc, matrix);
+
+ if (updater) {
+ p->append(SkRasterPipeline::matrix_2x3, updater->fMatrixStorage);
+ } else {
+ // When the matrix is just an integer translate, bilerp == nearest neighbor.
+ if (quality == kLow_SkFilterQuality &&
+ matrix.getType() <= SkMatrix::kTranslate_Mask &&
+ matrix.getTranslateX() == (int)matrix.getTranslateX() &&
+ matrix.getTranslateY() == (int)matrix.getTranslateY()) {
+ quality = kNone_SkFilterQuality;
+ }
+
+ // See skia:4649 and the GM image_scale_aligned.
+ if (quality == kNone_SkFilterQuality) {
+ if (matrix.getScaleX() >= 0) {
+ matrix.setTranslateX(nextafterf(matrix.getTranslateX(),
+ floorf(matrix.getTranslateX())));
+ }
+ if (matrix.getScaleY() >= 0) {
+ matrix.setTranslateY(nextafterf(matrix.getTranslateY(),
+ floorf(matrix.getTranslateY())));
+ }
+ }
+ p->append_matrix(alloc, matrix);
+ }
auto gather = alloc->make<SkRasterPipeline_GatherCtx>();
gather->pixels = pm.addr();
@@ -356,6 +390,16 @@
decal_ctx->limit_y = limit_y->scale;
}
+#if 0 // TODO: when we support kMedium
+ if (updator && (quality == kMedium_SkFilterQuality)) {
+ // if we change levels in mipmap, we need to update the scales (and invScales)
+ updator->fGather = gather;
+ updator->fLimitX = limit_x;
+ updator->fLimitY = limit_y;
+ updator->fDecal = decal_ctx;
+ }
+#endif
+
auto append_tiling_and_gather = [&] {
if (decal_x_and_y) {
p->append(SkRasterPipeline::decal_x_and_y, decal_ctx);
@@ -439,7 +483,7 @@
return true;
};
- // We've got a fast path for 8888 bilinear clamp/clamp sampling.
+ // Check for fast-path stages.
auto ct = info.colorType();
if (true
&& (ct == kRGBA_8888_SkColorType || ct == kBGRA_8888_SkColorType)
@@ -511,7 +555,6 @@
if (quality == kNone_SkFilterQuality) {
append_tiling_and_gather();
-
} else if (quality == kLow_SkFilterQuality) {
p->append(SkRasterPipeline::save_xy, sampler);
@@ -550,3 +593,14 @@
return append_misc();
}
+
+bool SkImageShader::onAppendStages(const SkStageRec& rec) const {
+ return this->doStages(rec, nullptr);
+}
+
+SkStageUpdater* SkImageShader::onAppendUpdatableStages(const SkStageRec& rec) const {
+ auto updater = rec.fAlloc->make<SkImageStageUpdater>();
+ updater->fShader = this;
+ return this->doStages(rec, updater) ? updater : nullptr;
+}
+
diff --git a/src/shaders/SkImageShader.h b/src/shaders/SkImageShader.h
index 6990a4a..5a01f0a 100644
--- a/src/shaders/SkImageShader.h
+++ b/src/shaders/SkImageShader.h
@@ -12,6 +12,9 @@
#include "src/shaders/SkBitmapProcShader.h"
#include "src/shaders/SkShaderBase.h"
+// private subclass of SkStageUpdater
+class SkImageStageUpdater;
+
class SkImageShader : public SkShaderBase {
public:
static sk_sp<SkShader> Make(sk_sp<SkImage>,
@@ -42,6 +45,9 @@
SkImage* onIsAImage(SkMatrix*, SkTileMode*) const override;
bool onAppendStages(const SkStageRec&) const override;
+ SkStageUpdater* onAppendUpdatableStages(const SkStageRec&) const override;
+
+ bool doStages(const SkStageRec&, SkImageStageUpdater* = nullptr) const;
sk_sp<SkImage> fImage;
const SkTileMode fTileModeX;
diff --git a/src/shaders/SkShaderBase.h b/src/shaders/SkShaderBase.h
index 58634f3..e071e22 100644
--- a/src/shaders/SkShaderBase.h
+++ b/src/shaders/SkShaderBase.h
@@ -29,6 +29,23 @@
class SkPaint;
class SkRasterPipeline;
+/**
+ * Shaders can optionally return a subclass of this when appending their stages.
+ * Doing so tells the caller that the stages can be reused with different CTMs (but nothing
+ * else can change), by calling the updater's udpate() method before each use.
+ *
+ * This can be a perf-win bulk draws like drawAtlas and drawVertices, where most of the setup
+ * (i.e. uniforms) are constant, and only something small is changing (i.e. matrices). This
+ * reuse skips the cost of computing the stages (and/or avoids having to allocate a separate
+ * shader for each small draw.
+ */
+class SkStageUpdater {
+public:
+ virtual ~SkStageUpdater() {}
+
+ virtual bool update(const SkMatrix& ctm, const SkMatrix* localM) = 0;
+};
+
class SkShaderBase : public SkShader {
public:
~SkShaderBase() override;
@@ -185,6 +202,10 @@
*/
virtual sk_sp<SkShader> makeAsALocalMatrixShader(SkMatrix* localMatrix) const;
+ SkStageUpdater* appendUpdatableStages(const SkStageRec& rec) const {
+ return this->onAppendUpdatableStages(rec);
+ }
+
protected:
SkShaderBase(const SkMatrix* localMatrix = nullptr);
@@ -207,6 +228,8 @@
// Default impl creates shadercontext and calls that (not very efficient)
virtual bool onAppendStages(const SkStageRec&) const;
+ virtual SkStageUpdater* onAppendUpdatableStages(const SkStageRec&) const { return nullptr; }
+
private:
// This is essentially const, but not officially so it can be modified in constructors.
SkMatrix fLocalMatrix;