Checkpoint of work
Getting farther into actually rendering. Allocation of strips is next.
diff --git a/shader/merge.wgsl b/shader/merge.wgsl
index 28b8350..d20c5c5 100644
--- a/shader/merge.wgsl
+++ b/shader/merge.wgsl
@@ -8,8 +8,14 @@
path_ix: u32,
x: u32,
y: u32,
- delta: i32,
- // TODO: slope etc
+ p0: u32, // packed
+ p1: u32, // packed
+}
+
+fn unpack_point(p: u32) -> vec2f {
+ let x = f32(p & 0xffffu) * (1.0 / 8192.0);
+ let y = f32(p >> 16u) * (1.0 / 8192.0);
+ return vec2(x, y);
}
@group(0) @binding(0)
@@ -34,10 +40,15 @@
return c;
}
-fn mm_histogram(t: Minitile) -> u32 {
- // TODO: get these from tile
- let xmin = 0u;
- let xmax = 4u;
+fn mt_delta(t: Minitile) -> i32 {
+ return i32((t.p1 >> 16u) == 0u) - i32((t.p0 >> 16u) == 0u);
+}
+
+fn mt_histogram(t: Minitile) -> u32 {
+ let x0 = f32(t.p0 & 0xffffu) * (1.0 / 8192.0);
+ let x1 = f32(t.p1 & 0xffffu) * (1.0 / 8192.0);
+ let xmin = u32(floor(min(x0, x1)));
+ let xmax = u32(ceil(max(x0, x1)));
let rshift = (4u - (xmax - xmin)) * 8u;
let lshift = xmin * 8u;
return (0x01010101u >> rshift) << lshift;
@@ -54,10 +65,11 @@
var<workgroup> sh_histo: array<u32, WG_SIZE>;
var<workgroup> sh_seg_end: array<u32, WG_SIZE>;
var<workgroup> sh_inclusive_cols: array<u32, WG_SIZE>;
+var<workgroup> sh_area: array<atomic<i32>, WG_SIZE>;
+var<workgroup> sh_carryover: array<i32, 4>;
@compute @workgroup_size(256)
fn main(
- @builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
@@ -65,19 +77,20 @@
var first = false;
var first_x = false;
// predicate? or pad?
- let tile = input[global_id.x];
- if global_id.x != 0u {
- let prev = input[global_id.x - 1u];
+ let global_ix = wg_id.x * WG_SIZE + local_id.x;
+ let tile = input[global_ix];
+ if global_ix != 0u {
+ let prev = input[global_ix - 1u];
first = tile.path_ix != prev.path_ix || tile.y != prev.y;
first_x = first || tile.x != prev.x;
}
- let winding = tile.delta;
+ let winding = mt_delta(tile);
let n_strips = u32(first);
let start = select(0u, local_id.x, first);
let start_x = select(0u, local_id.x, first_x);
var agg = MergeMonoid(winding, n_strips, start, start_x);
- let local_histo = mm_histogram(tile);
+ let local_histo = mt_histogram(tile);
var histo = local_histo;
sh_mm[local_id.x] = agg;
sh_histo[local_id.x] = histo;
@@ -159,5 +172,34 @@
}
}
// at this point, lo should index our tile
+ // TODO: predicate on ix < total_cols?
+ let render_tile = input[wg_id.x * WG_SIZE + lo];
+ var alphas = 0u;
+ for (var y = 0u; y < 4u; y++) {
+ if tile_within_col == 0u {
+ atomicStore(&sh_area[local_id.x], 0);
+ }
+ workgroupBarrier();
+ var area_init = 0;
+ if local_id.x == 0u && block_ix != 0u {
+ area_init = sh_carryover[y];
+ }
+ let area = area_init; // TODO: compute from tile
+ atomicAdd(&sh_area[local_id.x - tile_within_col], area);
+ workgroupBarrier();
+ if tile_within_col == 0u {
+ let summed_area = atomicLoad(&sh_area[local_id.x]);
+ if seg_end == WG_SIZE - 1u {
+ // TODO: only if last column
+ sh_carryover[y] = summed_area;
+ }
+ let winding_area = sh_mm[seg_end].winding * 256;
+ let alpha_u8 = u32(min(abs(summed_area + winding_area), 255));
+ alphas = (alphas >> 8u) + (alpha_u8 << 24u);
+ }
+ }
+ if tile_within_col == 0u {
+ // TODO: store alphas
+ }
}
}