| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // Coarse rasterization of path segments. |
| |
| // Allocation and initialization of tiles for paths. |
| |
| #version 450 |
| #extension GL_GOOGLE_include_directive : enable |
| |
| #include "mem.h" |
| #include "setup.h" |
| |
| #define LG_COARSE_WG 5 |
| #define COARSE_WG (1 << LG_COARSE_WG) |
| |
| layout(local_size_x = COARSE_WG, local_size_y = 1) in; |
| |
| layout(set = 0, binding = 1) readonly buffer ConfigBuf { |
| Config conf; |
| }; |
| |
| #include "pathseg.h" |
| #include "tile.h" |
| |
| // scale factors useful for converting coordinates to tiles |
| #define SX (1.0 / float(TILE_WIDTH_PX)) |
| #define SY (1.0 / float(TILE_HEIGHT_PX)) |
| |
| #define ACCURACY 0.25 |
| #define Q_ACCURACY (ACCURACY * 0.1) |
| #define REM_ACCURACY (ACCURACY - Q_ACCURACY) |
| #define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY) |
| #define MAX_QUADS 16 |
| |
| vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) { |
| float mt = 1.0 - t; |
| return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t; |
| } |
| |
| vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) { |
| float mt = 1.0 - t; |
| return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t; |
| } |
| |
| struct SubdivResult { |
| float val; |
| float a0; |
| float a2; |
| }; |
| |
| /// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$ |
| /// |
| /// This is used for flattening curves. |
| #define D 0.67 |
| float approx_parabola_integral(float x) { |
| return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x))); |
| } |
| |
| /// An approximation to the inverse parabola integral. |
| #define B 0.39 |
| float approx_parabola_inv_integral(float x) { |
| return x * sqrt(1.0 - B + (B * B + 0.25 * x * x)); |
| } |
| |
| SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) { |
| vec2 d01 = p1 - p0; |
| vec2 d12 = p2 - p1; |
| vec2 dd = d01 - d12; |
| float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x; |
| float x0 = (d01.x * dd.x + d01.y * dd.y) / cross; |
| float x2 = (d12.x * dd.x + d12.y * dd.y) / cross; |
| float scale = abs(cross / (length(dd) * (x2 - x0))); |
| |
| float a0 = approx_parabola_integral(x0); |
| float a2 = approx_parabola_integral(x2); |
| float val = 0.0; |
| if (scale < 1e9) { |
| float da = abs(a2 - a0); |
| float sqrt_scale = sqrt(scale); |
| if (sign(x0) == sign(x2)) { |
| val = da * sqrt_scale; |
| } else { |
| float xmin = sqrt_tol / sqrt_scale; |
| val = sqrt_tol * da / approx_parabola_integral(xmin); |
| } |
| } |
| return SubdivResult(val, a0, a2); |
| } |
| |
| void main() { |
| uint element_ix = gl_GlobalInvocationID.x; |
| PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size); |
| |
| PathSegTag tag = PathSegTag(PathSeg_Nop, 0); |
| if (element_ix < conf.n_pathseg) { |
| tag = PathSeg_tag(conf.pathseg_alloc, ref); |
| } |
| bool mem_ok = mem_error == NO_ERROR; |
| switch (tag.tag) { |
| case PathSeg_Cubic: |
| PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref); |
| |
| // Affine transform is now applied in pathseg |
| /* |
| uint trans_ix = cubic.trans_ix; |
| if (trans_ix > 0) { |
| TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size); |
| TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref); |
| cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate; |
| cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate; |
| cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate; |
| cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate; |
| } |
| */ |
| |
| vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; |
| float err = err_v.x * err_v.x + err_v.y * err_v.y; |
| // The number of quadratics. |
| uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1); |
| n_quads = min(n_quads, MAX_QUADS); |
| SubdivResult keep_params[MAX_QUADS]; |
| // Iterate over quadratics and tote up the estimated number of segments. |
| float val = 0.0; |
| vec2 qp0 = cubic.p0; |
| float step = 1.0 / float(n_quads); |
| for (uint i = 0; i < n_quads; i++) { |
| float t = float(i + 1) * step; |
| vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t); |
| vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step); |
| qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); |
| SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); |
| keep_params[i] = params; |
| val += params.val; |
| |
| qp0 = qp2; |
| } |
| uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1); |
| |
| bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE; |
| uint path_ix = cubic.path_ix; |
| Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size)); |
| Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); |
| ivec4 bbox = ivec4(path.bbox); |
| vec2 p0 = cubic.p0; |
| qp0 = cubic.p0; |
| float v_step = val / float(n); |
| int n_out = 1; |
| float val_sum = 0.0; |
| for (uint i = 0; i < n_quads; i++) { |
| float t = float(i + 1) * step; |
| vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t); |
| vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step); |
| qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); |
| SubdivResult params = keep_params[i]; |
| float u0 = approx_parabola_inv_integral(params.a0); |
| float u2 = approx_parabola_inv_integral(params.a2); |
| float uscale = 1.0 / (u2 - u0); |
| float target = float(n_out) * v_step; |
| while (n_out == n || target < val_sum + params.val) { |
| vec2 p1; |
| if (n_out == n) { |
| p1 = cubic.p3; |
| } else { |
| float u = (target - val_sum) / params.val; |
| float a = mix(params.a0, params.a2, u); |
| float au = approx_parabola_inv_integral(a); |
| float t = (au - u0) * uscale; |
| p1 = eval_quad(qp0, qp1, qp2, t); |
| } |
| |
| // Output line segment |
| |
| // Bounding box of element in pixel coordinates. |
| float xmin = min(p0.x, p1.x) - cubic.stroke.x; |
| float xmax = max(p0.x, p1.x) + cubic.stroke.x; |
| float ymin = min(p0.y, p1.y) - cubic.stroke.y; |
| float ymax = max(p0.y, p1.y) + cubic.stroke.y; |
| float dx = p1.x - p0.x; |
| float dy = p1.y - p0.y; |
| // Set up for per-scanline coverage formula, below. |
| float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy; |
| float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX; |
| float b = invslope; // Note: assumes square tiles, otherwise scale. |
| float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX; |
| |
| int x0 = int(floor(xmin * SX)); |
| int x1 = int(floor(xmax * SX) + 1); |
| int y0 = int(floor(ymin * SY)); |
| int y1 = int(floor(ymax * SY) + 1); |
| |
| x0 = clamp(x0, bbox.x, bbox.z); |
| y0 = clamp(y0, bbox.y, bbox.w); |
| x1 = clamp(x1, bbox.x, bbox.z); |
| y1 = clamp(y1, bbox.y, bbox.w); |
| float xc = a + b * float(y0); |
| int stride = bbox.z - bbox.x; |
| int base = (y0 - bbox.y) * stride - bbox.x; |
| // TODO: can be tighter, use c to bound width |
| uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); |
| // Consider using subgroups to aggregate atomic add. |
| MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size); |
| if (tile_alloc.failed || !mem_ok) { |
| return; |
| } |
| uint tile_offset = tile_alloc.alloc.offset; |
| |
| TileSeg tile_seg; |
| |
| int xray = int(floor(p0.x*SX)); |
| int last_xray = int(floor(p1.x*SX)); |
| if (p0.y > p1.y) { |
| int tmp = xray; |
| xray = last_xray; |
| last_xray = tmp; |
| } |
| for (int y = y0; y < y1; y++) { |
| float tile_y0 = float(y * TILE_HEIGHT_PX); |
| int xbackdrop = max(xray + 1, bbox.x); |
| if (!is_stroke && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) { |
| int backdrop = p1.y < p0.y ? 1 : -1; |
| TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop)); |
| uint tile_el = tile_ref.offset >> 2; |
| if (touch_mem(path_alloc, tile_el + 1)) { |
| atomicAdd(memory[tile_el + 1], backdrop); |
| } |
| } |
| |
| // next_xray is the xray for the next scanline; the line segment intersects |
| // all tiles between xray and next_xray. |
| int next_xray = last_xray; |
| if (y < y1 - 1) { |
| float tile_y1 = float((y + 1) * TILE_HEIGHT_PX); |
| float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy); |
| next_xray = int(floor(x_edge*SX)); |
| } |
| |
| int min_xray = min(xray, next_xray); |
| int max_xray = max(xray, next_xray); |
| int xx0 = min(int(floor(xc - c)), min_xray); |
| int xx1 = max(int(ceil(xc + c)), max_xray + 1); |
| xx0 = clamp(xx0, x0, x1); |
| xx1 = clamp(xx1, x0, x1); |
| |
| for (int x = xx0; x < xx1; x++) { |
| float tile_x0 = float(x * TILE_WIDTH_PX); |
| TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x)); |
| uint tile_el = tile_ref.offset >> 2; |
| uint old = 0; |
| if (touch_mem(path_alloc, tile_el)) { |
| old = atomicExchange(memory[tile_el], tile_offset); |
| } |
| tile_seg.origin = p0; |
| tile_seg.vector = p1 - p0; |
| float y_edge = 0.0; |
| if (!is_stroke) { |
| y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx); |
| if (min(p0.x, p1.x) < tile_x0) { |
| vec2 p = vec2(tile_x0, y_edge); |
| if (p0.x > p1.x) { |
| tile_seg.vector = p - p0; |
| } else { |
| tile_seg.origin = p; |
| tile_seg.vector = p1 - p; |
| } |
| // kernel4 uses sign(vector.x) for the sign of the intersection backdrop. |
| // Nudge zeroes towards the intended sign. |
| if (tile_seg.vector.x == 0) { |
| tile_seg.vector.x = sign(p1.x - p0.x)*1e-9; |
| } |
| } |
| if (x <= min_xray || max_xray < x) { |
| // Reject inconsistent intersections. |
| y_edge = 1e9; |
| } |
| } |
| tile_seg.y_edge = y_edge; |
| tile_seg.next.offset = old; |
| TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg); |
| tile_offset += TileSeg_size; |
| } |
| xc += b; |
| base += stride; |
| xray = next_xray; |
| } |
| |
| n_out += 1; |
| target += v_step; |
| p0 = p1; |
| } |
| val_sum += params.val; |
| |
| qp0 = qp2; |
| } |
| |
| break; |
| } |
| } |