blob: 2ce147acdc9231a82b6a61ff9d25c6215642bcb7 [file] [log] [blame]
#include "../eskia.h"
#include "array_size.h"
#include "compiler.h"
#include "implicit_cast.h"
#include <algorithm>
#include <cmath>
namespace eskia {
// Convert from 5- or 6-bit unorm to 8-bit unorm.
static constexpr uint8_t unpack_5(int x) {
return implicit_cast<uint8_t>((x << 3) | (x >> 2));
}
static constexpr uint8_t unpack_6(int x) {
return implicit_cast<uint8_t>((x << 2) | (x >> 4));
}
static constexpr bool test_unpack() {
for (int i = 0; i <= 31; i++) {
int tolerance = (i == 0 || i == 31) ? 0 : 1;
int error = implicit_cast<int>(i * 255/31.0 + 0.5) - unpack_5(i);
if (error > +tolerance || error < -tolerance) {
return false;
}
}
for (int i = 0; i <= 63; i++) {
int tolerance = (i == 0 || i == 63) ? 0 : 1;
int error = implicit_cast<int>(i * 255/63.0 + 0.5) - unpack_6(i);
if (error > +tolerance || error < -tolerance) {
return false;
}
}
return true;
}
static_assert(test_unpack(), "");
// Convert from 8-bit unorm to 5- or 6-bit unorm.
static constexpr uint8_t pack_5(int x) { return implicit_cast<uint8_t>(x >> 3); }
static constexpr uint8_t pack_6(int x) { return implicit_cast<uint8_t>(x >> 2); }
static constexpr bool test_pack() {
for (int i = 0; i <= 255; i++) {
int tolerance = (i == 0 || i == 255) ? 0 : 1;
int error_5 = implicit_cast<int>(i * 31/255.0 + 0.5) - pack_5(i);
int error_6 = implicit_cast<int>(i * 63/255.0 + 0.5) - pack_6(i);
if (error_5 > +tolerance || error_5 < -tolerance ||
error_6 > +tolerance || error_6 < -tolerance) {
return false;
}
}
return true;
}
static_assert(test_pack(), "");
// Rounding divide by 255, to convert the product of two 8-bit unorms back to an 8-bit unorm.
static constexpr uint8_t approx_div255(int x) {
return implicit_cast<uint8_t>((x + 255) / 256);
}
static constexpr bool test_approx_div255() {
for (int i = 0; i < 255; i++) {
for (int j = 0; j < 255; j++) {
int tolerance = (i == 0 || i == 255 ||
j == 0 || j == 255) ? 0 : 1;
int error = approx_div255(i*j) - (i*j+127)/255;
if (error > +tolerance || error < -tolerance) {
return false;
}
}
}
return true;
}
static_assert(test_approx_div255(), "");
// Linearly interpolate from d to s by c, s and d 8-bit unorms, c 7-bit [0,128] coverage.
static constexpr uint8_t lerp(int s, int d, int c) {
return implicit_cast<uint8_t>( (s*c + (128-c)*d)/128 );
}
static_assert(lerp(42, 47, 0) == 47, "");
static_assert(lerp(42, 47, 128) == 42, "");
static_assert(lerp(42, 47, 64) == 44, "");
static constexpr ALWAYS_INLINE
RGBA_8888 lerp_SWAR(RGBA_8888 src, RGBA_8888 dst, float coverage) {
// See notes in srcover_SWAR() below for the general approach.
// Here we're just doing lerp()'s math instead.
auto s_rb = implicit_cast<uint32_t>(src.r) << 0 | implicit_cast<uint32_t>(src.b) << 16,
s_ga = implicit_cast<uint32_t>(src.g) << 8 | implicit_cast<uint32_t>(src.a) << 24,
d_rb = implicit_cast<uint32_t>(dst.r) << 0 | implicit_cast<uint32_t>(dst.b) << 16,
d_ga = implicit_cast<uint32_t>(dst.g) << 8 | implicit_cast<uint32_t>(dst.a) << 24;
auto c = implicit_cast<uint8_t>(coverage * 128);
uint32_t rgba = (( ( (s_rb>>0)*c + (d_rb>>0)*(128-c) ) & 0x7f807f80) >> 7)
| (( ( (s_ga>>8)*c + (d_ga>>8)*(128-c) ) & 0x7f807f80) << 1);
return {
implicit_cast<uint8_t>((rgba >> 0) & 0xff),
implicit_cast<uint8_t>((rgba >> 8) & 0xff),
implicit_cast<uint8_t>((rgba >> 16) & 0xff),
implicit_cast<uint8_t>((rgba >> 24) & 0xff),
};
}
// The srcover blend mode, s + d*(1-sa), for 8-bit unorms.
static constexpr uint8_t srcover(int s, int sa, int d) {
return implicit_cast<uint8_t>(s + approx_div255(d * (255 - sa)));
}
static constexpr bool test_srcover() {
for (int d = 0; d <= 255; d++) {
// Transparent src -> dst.
if (srcover(0, 0, d) != d) {
return false;
}
// Opaque src -> src.
for (int s = 0; s <= 255; s++) {
if (srcover(s, 255, d) != s) {
return false;
}
}
}
return true;
}
static_assert(test_srcover(), "");
// srcover() applied to all four channels at once,
// using a SIMD-within-a-register approach to cut the number of multiplies in half.
static constexpr ALWAYS_INLINE
RGBA_8888 srcover_SWAR(RGBA_8888 src, RGBA_8888 dst, float coverage) {
// We'll multiply each 8-bit channel from dst by one_minus_sa, also 8-bit. That fits
// in 16 bits, so we can perform two logical multiplies with each uint32_t multiply.
//
// These particular pairings and shifts let the compiler convert this to simple masking,
// with 0x00ff00ff for red and blue, and 0xff00ff00 for green and alpha.
auto s_rb = implicit_cast<uint32_t>(src.r) << 0 | implicit_cast<uint32_t>(src.b) << 16,
s_ga = implicit_cast<uint32_t>(src.g) << 8 | implicit_cast<uint32_t>(src.a) << 24,
d_rb = implicit_cast<uint32_t>(dst.r) << 0 | implicit_cast<uint32_t>(dst.b) << 16,
d_ga = implicit_cast<uint32_t>(dst.g) << 8 | implicit_cast<uint32_t>(dst.a) << 24;
// Fold 7-bit [0,128] coverage into src, s = (s * coverage) / 128.
auto c = implicit_cast<uint8_t>(coverage * 128);
s_rb = (((s_rb >> 0) * c) & 0x7f807f80) >> 7;
s_ga = (((s_ga >> 8) * c) & 0x7f807f80) << 1;
uint8_t one_minus_sa = 255 - (s_ga >> 24);
// Now that we're set up, here's the body of srcover(),
// rgba = s + approx_div255(d * (255 - sa))
uint32_t rgba = s_rb + ((( (d_rb >> 0) * one_minus_sa + 0x00ff00ff ) & 0xff00ff00) >> 8)
| s_ga + ((( (d_ga >> 8) * one_minus_sa + 0x00ff00ff ) & 0xff00ff00) >> 0);
// This should optimize to a no-op, as if written `return bit_cast<RGBA_8888>(rgba)`.
return {
implicit_cast<uint8_t>((rgba >> 0) & 0xff),
implicit_cast<uint8_t>((rgba >> 8) & 0xff),
implicit_cast<uint8_t>((rgba >> 16) & 0xff),
implicit_cast<uint8_t>((rgba >> 24) & 0xff),
};
}
static constexpr bool operator==(RGBA_8888 x, RGBA_8888 y) {
return x.r == y.r
&& x.g == y.g
&& x.b == y.b
&& x.a == y.a;
}
static_assert(srcover_SWAR({127,0,0,127}, {255,255,255,255}, 1.0f)
== RGBA_8888{255,128,128,255}, "");
static constexpr RGBA_8888 to_RGBA_8888(RGBA_8888 c) { return c; }
static constexpr RGBA_8888 to_RGBA_8888(BGRA_8888 c) { return {c.r,c.g,c.b,c.a}; }
static constexpr RGBA_8888 to_RGBA_8888(RGB_888 c) { return {c.r,c.g,c.b,255}; }
static constexpr RGBA_8888 to_RGBA_8888(RGB_565 c) { return {unpack_5(c.r),
unpack_6(c.g),
unpack_5(c.b), 255};}
static constexpr RGBA_8888 to_RGBA_8888(BGR_565 c) { return {unpack_5(c.r),
unpack_6(c.g),
unpack_5(c.b), 255};}
static constexpr void from_RGBA_8888(RGBA_8888 c, RGBA_8888* d) { *d = c; }
static constexpr void from_RGBA_8888(RGBA_8888 c, BGRA_8888* d) {
*d = BGRA_8888{c.b, c.g, c.r, c.a};
}
static constexpr void from_RGBA_8888(RGBA_8888 c, RGB_888* d) {
*d = RGB_888{c.r, c.g, c.b};
}
static constexpr void from_RGBA_8888(RGBA_8888 c, RGB_565* d) {
*d = RGB_565{pack_5(c.r), pack_6(c.g), pack_5(c.b)};
}
static constexpr void from_RGBA_8888(RGBA_8888 c, BGR_565* d) {
*d = BGR_565{pack_5(c.b), pack_6(c.g), pack_5(c.r)};
}
template <typename Fn>
static void* blend(void* vdst, const RGBA_8888* src, int n, Fn&& fn) {
// Cast our dst pointer to the type fn() returns.
auto dst = static_cast<decltype(fn({},{}))*>(vdst);
for (const auto end = dst+n; dst != end; dst++, src++) {
*dst = fn(*src, *dst);
}
return dst;
}
void* src_RGB_565(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, RGB_565 d) {
from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* src_BGR_565(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, BGR_565 d) {
from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* src_RGB_888(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, RGB_888 d) {
from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* src_RGBA_8888(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, RGBA_8888 d) {
from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* src_BGRA_8888(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, BGRA_8888 d) {
from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* srcover_RGB_565(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, RGB_565 d) {
from_RGBA_8888(srcover_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* srcover_BGR_565(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, BGR_565 d) {
from_RGBA_8888(srcover_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* srcover_RGB_888(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, RGB_888 d) {
from_RGBA_8888(srcover_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* srcover_RGBA_8888(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, RGBA_8888 d) {
from_RGBA_8888(srcover_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
void* srcover_BGRA_8888(void* dst, const RGBA_8888* src, float coverage, int n) {
return blend(dst,src,n, [=](RGBA_8888 s, BGRA_8888 d) {
from_RGBA_8888(srcover_SWAR(s, to_RGBA_8888(d), coverage), &d);
return d;
});
}
static constexpr RGBA_8888 premultiply(RGBA_8888 px) {
return {
approx_div255(px.r * px.a),
approx_div255(px.g * px.a),
approx_div255(px.b * px.a),
px.a,
};
}
static_assert(premultiply(RGBA_8888{ 0, 0, 0, 0}) == RGBA_8888{ 0, 0, 0, 0}, "");
static_assert(premultiply(RGBA_8888{127, 63,192,255}) == RGBA_8888{127, 63,192,255}, "");
static_assert(premultiply(RGBA_8888{168, 83,255,192}) == RGBA_8888{126, 63,192,192}, "");
void single_color(const Paint& paint,
int x, int y, int n,
float coverage, void* dst) {
(void)x;
(void)y;
const RGBA_8888 pm = premultiply(paint.color);
const RGBA_8888 splat[] = {pm,pm,pm,pm, pm,pm,pm,pm, pm,pm,pm,pm, pm,pm,pm,pm};
for (const int N = array_size(splat); n >= N; n -= N) {
dst = paint.blend(dst, splat, coverage, N);
}
paint.blend(dst, splat, coverage, n);
}
static constexpr int clamp(int v, int lo, int hi) {
if (v < lo) { return lo; }
if (v > hi) { return hi; }
return v;
}
static_assert(clamp(1, 2,3) == 2, "");
static_assert(clamp(4, 2,3) == 3, "");
static_assert(clamp(2, 1,3) == 2, "");
static_assert(clamp(3, 1,3) == 3, "");
struct Point { float x,y; };
static constexpr Point transform(const Affine& affine, Point p) {
return {
affine.sx * p.x + affine.kx * p.y + affine.tx,
affine.ky * p.x + affine.sy * p.y + affine.ty,
};
}
// TODO(mtklein): unit test transform()
[[clang::no_sanitize("float-divide-by-zero")]]
static float div(float x, float y) {
return x / y;
}
// Return true if x is in [lo,hi], taking care to handle edge cases like NaNs and ±Inf.
static constexpr bool in_range(float x, float lo, float hi) {
return (x >= lo) && (x <= hi);
}
static_assert( in_range( 0 , 0,1), "");
static_assert( in_range( 0.5, 0,1), "");
static_assert( in_range( 1 , 0,1), "");
static_assert(!in_range( -1 , 0,1), "");
static_assert(!in_range( 2 , 0,1), "");
static_assert(!in_range(+INFINITY, 0,1), "");
static_assert(!in_range(-INFINITY, 0,1), "");
static_assert(!in_range( NAN, 0,1), "");
struct Edge {
// An edge is a directed line segment, lerping between two points for t in [0,1]:
// x = (b.x - a.x) t + a.x
// y = (b.y - a.y) t + a.y
Point a,b;
// We often want to find the x where an edge intersects a given horizontal scanline.
// This precalculated term makes x_from_y() quick and easy.
float dxdy;
bool quick_reject(float min_y, float max_y) const {
return min_y > std::max(a.y, b.y)
|| max_y < std::min(a.y, b.y);
}
// If the edge intersects the horizontal at y exactly once, set x where and return true.
bool x_from_y(float y, float* x) const {
*x = (y - a.y) * dxdy + a.x;
return in_range(*x, std::min(a.x,b.x), std::max(a.x,b.x));
}
int winding() const { return a.y < b.y ? +1 : -1; }
Edge(Point _a, Point _b) : a(_a), b(_b) {
// x = (b.x - a.x)t + a.x
// y = (b.y - a.y)t + b.y
//
// Solve for t given y:
//
// ( y - a.y)
// t = -----------
// (b.y - a.y)
//
// Substitute that back in to solve for x.
// (b.x - a.x) (y - a.y)
// x = --------------------- + a.x
// (b.y - a.y)
//
// We can redistribute that to the form of x = y * M + A.
// If x is finite and within the edge's range of x, that's where they intersect!
dxdy = div(b.x - a.x,
b.y - a.y);
}
};
struct EdgeIterator {
const Edge* edge;
const Edge* last;
float min,
max;
EdgeIterator(const Edge* e, int n, float min_y, float max_y)
: edge(e)
, last(e+n)
, min(min_y)
, max(max_y)
{
edge--;
++(*this);
}
EdgeIterator begin() const { return *this; }
EdgeIterator end() const { return EdgeIterator{last,0,min,max}; }
bool operator!=(const EdgeIterator& that) const { return edge != that.edge; }
const Edge& operator*() const { return *edge; }
EdgeIterator& operator++() {
do {
edge++;
} while (edge != last &&
(!std::isfinite(edge->dxdy) || edge->quick_reject(min, max)));
return *this;
}
};
static Rect bounds(const Edge edges[], int nedges) {
float l = edges->a.x,
t = edges->a.y,
r = edges->a.x,
b = edges->a.y;
for (; nedges --> 0; edges++) {
l = std::min(l, edges->a.x);
t = std::min(t, edges->a.y);
r = std::max(r, edges->a.x);
b = std::max(b, edges->a.y);
l = std::min(l, edges->b.x);
t = std::min(t, edges->b.y);
r = std::max(r, edges->b.x);
b = std::max(b, edges->b.y);
}
return {l,t,r,b};
}
static Rect round_out(Rect rect) {
return {
floorf(rect.l),
floorf(rect.t),
ceilf (rect.r),
ceilf (rect.b),
};
}
#if 0
static constexpr Point kSampleOffsets[] = { {0.5f, 0.5f} };
#elif 0
static constexpr Point kSampleOffsets[] = { {0.25f, 0.25f}, {0.75f, 0.75f} };
#else
static constexpr Point kSampleOffsets[] = {
{ 4/9.0f, 2/9.0f },
{ 8/9.0f, 4/9.0f },
{ 2/9.0f, 6/9.0f },
{ 6/9.0f, 8/9.0f },
};
#endif
struct Delta {
int16_t ix; // which x-coordinate gets a delta?
int8_t sample; // which sample within that x-coordinate?
int8_t winding; // which direction (+1 or -1)?
};
enum class FillRule : int { EvenOdd = +1, NonZero = -1 };
struct Accum {
int winding[array_size(kSampleOffsets)];
Accum() {
for (auto& w : winding) { w = 0; }
}
Accum& operator+=(Delta d) {
winding[d.sample] += d.winding;
return *this;
}
float coverage(FillRule fill_rule) const {
float c = 0;
for (auto w : winding) {
if ((w & static_cast<int>(fill_rule)) != 0) {
c += 1.0f / array_size(winding);
}
}
return c;
}
};
static void fill_edges(Frame* frame,
const Edge edges[],
int nedges,
const Paint& paint,
FillRule fill_rule = FillRule::NonZero) {
const Rect ibounds = round_out(bounds(edges, nedges));
// A quick guide to indices:
// - ix and iy are integer x and y pixel coordinates
// - j loops over per-sample data within Delta, offsets[] and winding[].
for (float y = ibounds.t; y < ibounds.b; y += 1) {
Delta deltas[4096];
Delta* delta = deltas;
// TODO(mtklein): handle running off the end of deltas.
for (const Edge& edge : EdgeIterator{edges, nedges, y, y+1}) {
for (int j = 0; j < array_size(kSampleOffsets); j++) {
float sample_y = y + kSampleOffsets[j].y;
float x;
if (edge.x_from_y(sample_y, &x)) {
// Which is the first integer pixel whose ix + offset ≥ x?
// ix + offset >= x
// ~~> ix >= x - offset
// ~~> ix = ceil(x - offset)
// (Another option here that's nearly the same is floor(x + offset).)
int ix = implicit_cast<int>(std::ceil(x - kSampleOffsets[j].x));
// TODO(mtklein): calculate area,
// distribute over active samples,
// update winding values,
// re-accumulate areas back over still-active samples
*delta++ = Delta{
implicit_cast<int16_t>(ix),
implicit_cast<int8_t >(j),
implicit_cast<int8_t >(edge.winding()),
};
}
}
}
int iy = implicit_cast<int>(y);
auto row = static_cast<char*>(frame->dst)
+ frame->bpp * frame->stride * iy;
std::sort(deltas, delta, [](Delta a, Delta b) { return a.ix < b.ix; });
Accum accum;
int shaded_until = deltas[0].ix;
for (const Delta* d = deltas; d != delta; ) {
for (float c = accum.coverage(fill_rule); c > 0;) {
paint.shade(paint, shaded_until,iy, d->ix - shaded_until,
c, row + shaded_until*frame->bpp);
break;
}
shaded_until = d->ix;
// Accumulate all the deltas at the next ix.
// TODO(mtklein): add tests to make sure this actually works right.
do {
accum += *d++;
} while (d != delta && d->ix == (d-1)->ix);
}
}
}
void Frame::fill(const Rect& rect, const Paint& paint) {
auto l = implicit_cast<int>(floorf(rect.l)),
t = implicit_cast<int>(floorf(rect.t)),
r = implicit_cast<int>(floorf(rect.r)),
b = implicit_cast<int>(floorf(rect.b));
l = clamp(l, 0,this->width - 1);
t = clamp(t, 0,this->height - 1);
r = clamp(r, 0,this->width );
b = clamp(b, 0,this->height );
int n = r-l;
if (n < 0) {
return;
}
const int row_bytes = this->bpp * this->stride;
auto row = static_cast<char*>(this->dst) + t*row_bytes
+ l*this->bpp;
for (int y = t; y < b; y++) {
// TODO(mtklein): this coverage is wrong when rect is not an integer rectangle.
float coverage = 1.0f;
paint.shade(paint, l,y,n, coverage, row);
row += row_bytes;
}
}
void Frame::fill(const Rect& rect, const Paint& paint, const Affine& affine) {
Point lt = transform(affine, {rect.l, rect.t}),
lb = transform(affine, {rect.l, rect.b}),
rb = transform(affine, {rect.r, rect.b}),
rt = transform(affine, {rect.r, rect.t});
Edge edges[] = {
{lt,lb},
{lb,rb},
{rb,rt},
{rt,lt},
};
fill_edges(this, edges, array_size(edges), paint);
}
} // namespace eskia