Further improvements
diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index 1d10faa..547f26e 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl
@@ -17,10 +17,14 @@ // Paint texture id locates the encoded image data `EncodedImage` in `encoded_paints_texture` // More details in the `StripInstance` documentation below. // -// `StripInstance::payload` field can either encode a color, [x, y] for image sampling or a slot index +// `StripInstance::payload` field can either encode a color, [x, y] scene coordinates or a slot index. // - If color source is payload and the paint type is solid, the fragment shader uses the color directly. -// - If color source is payload and the paint type is image, the fragment shader samples the image. -// - Otherwise, the fragment shader samples the source clip texture using the given slot index. +// - If color source is payload and the paint type is image, the vertex shader consumes the scene +// coordinates to compute sample_xy, then overwrites VertexOutput::payload with packed_tint for +// the fragment shader. +// - If color source is payload and the paint type is a gradient, the vertex shader consumes the +// scene coordinates to compute sample_xy. +// - Otherwise, the fragment shader uses the payload as a slot index. // More details in the `StripInstance` documentation below. @@ -170,7 +174,8 @@ // │ └── payload = [r, g, b, a] RGBA (packed as u8s) // │ // ├── paint_type = 1 (PAINT_TYPE_IMAGE) - Image rendering -// │ └── payload = packed image parameters +// │ └── payload = [x, y] scene coordinates (packed as u16s, consumed by vertex shader) +// │ (vertex overwrites VertexOutput::payload with packed_tint for fragment shader) // │ // ├── paint_type = 2 (PAINT_TYPE_LINEAR_GRADIENT) - Linear gradient rendering // ├── paint_type = 3 (PAINT_TYPE_RADIAL_GRADIENT) - Radial gradient (with kind discriminator) @@ -224,7 +229,7 @@ // For normal strips: ending x-position of the dense (alpha) region. // For rect strips: packed dimensions (width | height << 16). @location(3) @interpolate(flat) dense_end_or_rect_size: u32, - // Color value or slot index when alpha is 0 + // Color value, packed_tint (for IMAGE), or slot index. @location(4) @interpolate(flat) payload: u32, // Packed fractional edge offsets for rectangles. // Bits 0-7: x0, 8-15: y0, 16-23: x1, 24-31: y1. @@ -234,6 +239,12 @@ // xy = uv_min, zw = uv_max (prevents atlas bleeding at image boundaries). // Only meaningful for PAINT_TYPE_IMAGE with IMAGE_QUALITY_MEDIUM. @location(6) @interpolate(flat) uv_bounds: vec4<f32>, + // Forwarded image data (avoids redundant textureLoad in the fragment shader). + // x = (offset.x << 16) | offset.y, + // y = (size.x << 16) | size.y, + // z = atlas_index[0:7] | extend_x[8:9] | extend_y[10:11] | tint_mode[12]. + // For IMAGE, packed_tint is forwarded via `payload` (unused by IMAGE fragments). + @location(7) @interpolate(flat) image_data: vec3<u32>, // Normalized device coordinates (NDC) for the current vertex @builtin(position) position: vec4<f32>, }; @@ -289,6 +300,9 @@ let ndc_x = pix_x * 2.0 / f32(config.width) - 1.0; let ndc_y = 1.0 - pix_y * 2.0 / f32(config.height); + out.payload = instance.payload; + out.paint_and_rect_flag = instance.paint_and_rect_flag; + let color_source = (instance.paint_and_rect_flag >> 29u) & 0x3u; if color_source == COLOR_SOURCE_PAYLOAD { let paint_type = (instance.paint_and_rect_flag >> 26u) & 0x7u; @@ -306,6 +320,16 @@ let uv_min = (encoded_image.image_offset + 0.5) * inv_atlas_dim; let uv_max = (encoded_image.image_offset + encoded_image.image_size - 0.5) * inv_atlas_dim; out.uv_bounds = vec4(uv_min, uv_max); + out.image_data = vec3<u32>( + (u32(encoded_image.image_offset.x) << 16u) | u32(encoded_image.image_offset.y), + (u32(encoded_image.image_size.x) << 16u) | u32(encoded_image.image_size.y), + encoded_image.atlas_index + | (encoded_image.extend_modes.x << 8u) + | (encoded_image.extend_modes.y << 10u) + | (encoded_image.tint_mode << 12u) + ); + // Reuse payload (not read by IMAGE fragments) to forward packed_tint. + out.payload = encoded_image.packed_tint; } else if paint_type == PAINT_TYPE_LINEAR_GRADIENT || paint_type == PAINT_TYPE_RADIAL_GRADIENT || paint_type == PAINT_TYPE_SWEEP_GRADIENT { // Use view coordinates for gradient transform (always in global view space) out.sample_xy = vec2<f32>( @@ -319,8 +343,6 @@ out.tex_coord = vec2<f32>(col_offset + x * f32(width), y * f32(height)); out.position = vec4<f32>(ndc_x, ndc_y, 0.0, 1.0); - out.payload = instance.payload; - out.paint_and_rect_flag = instance.paint_and_rect_flag; return out; } @@ -384,22 +406,25 @@ if color_source == COLOR_SOURCE_PAYLOAD { let paint_type = (in.paint_and_rect_flag >> 26u) & 0x7u; - // in.payload encodes a color for PAINT_TYPE_SOLID or sample_xy for PAINT_TYPE_IMAGE + // in.payload encodes a color for PAINT_TYPE_SOLID, or packed_tint for PAINT_TYPE_IMAGE. if paint_type == PAINT_TYPE_SOLID { return alpha * unpack4x8unorm(in.payload); } else if paint_type == PAINT_TYPE_IMAGE { - let paint_tex_idx = in.paint_and_rect_flag & PAINT_TEXTURE_INDEX_MASK; - let encoded_image = unpack_encoded_image(paint_tex_idx); - let image_offset = encoded_image.image_offset; - let image_size = encoded_image.image_size; + let image_offset = vec2<f32>(f32(in.image_data.x >> 16u), f32(in.image_data.x & 0xFFFFu)); + let image_size = vec2<f32>(f32(in.image_data.y >> 16u), f32(in.image_data.y & 0xFFFFu)); + let atlas_index = in.image_data.z & 0xFFu; + let extend_x = (in.image_data.z >> 8u) & 0x3u; + let extend_y = (in.image_data.z >> 10u) & 0x3u; + let tint_mode = (in.image_data.z >> 12u) & 0x1u; + let local_xy = in.sample_xy - image_offset; // This offset doesn't exist in vello_cpu, and we use it because 45 degree skewing seems to cause // artifacts on the GPU. We have something similar in place for gradients. It might be worth revisiting // this to see whether a better approach is possible. let offset = 0.00001; let extended_xy = vec2<f32>( - extend_mode(local_xy.x + offset, encoded_image.extend_modes.x, image_size.x), - extend_mode(local_xy.y + offset, encoded_image.extend_modes.y, image_size.y) + extend_mode(local_xy.x + offset, extend_x, image_size.x), + extend_mode(local_xy.y + offset, extend_y, image_size.y) ); // TODO: add a fast path for images where we are using bilinear sampling and want transparent pixels, @@ -411,31 +436,32 @@ // sample_color = bicubic_sample( // atlas_texture_array, // final_xy, - // i32(encoded_image.atlas_index), + // i32(atlas_index), // image_offset, // image_size, - // encoded_image.extend_modes, - // encoded_image.image_padding, + // vec2<u32>(extend_x, extend_y), + // image_padding, // ); //} else if encoded_image.quality == IMAGE_QUALITY_MEDIUM { - let final_xy = image_offset + extended_xy; - let inv_atlas_dim = 1.0 / f32(1u << config.atlas_dim_bits); - let uv = clamp(final_xy * inv_atlas_dim, in.uv_bounds.xy, in.uv_bounds.zw); - let sample_color = textureSample(atlas_texture_array, atlas_sampler, uv, i32(encoded_image.atlas_index)); + let final_xy = image_offset + extended_xy; + let inv_atlas_dim = 1.0 / f32(1u << config.atlas_dim_bits); + let uv = clamp(final_xy * inv_atlas_dim, in.uv_bounds.xy, in.uv_bounds.zw); + let sample_color = textureSample(atlas_texture_array, atlas_sampler, uv, i32(atlas_index)); //} else { // let final_xy = image_offset + extended_xy; // sample_color = textureLoad( // atlas_texture_array, // vec2<u32>(final_xy), - // i32(encoded_image.atlas_index), + // i32(atlas_index), // 0, // ); //} - let is_multiply = bool(encoded_image.tint_mode); + let tint = select(vec4<f32>(1.0), unpack4x8unorm(in.payload), in.payload != 0u); + let is_multiply = bool(tint_mode); return alpha * select( - encoded_image.tint * sample_color.a, - sample_color * encoded_image.tint, + tint * sample_color.a, + sample_color * tint, is_multiply ); } else if paint_type == PAINT_TYPE_LINEAR_GRADIENT { @@ -840,8 +866,8 @@ transform: mat2x2<f32>, /// Translation part of the affine transform [tx, ty]. translate: vec2<f32>, - /// Premultiplied tint color. Identity (vec4(1.0)) when no tint is set. - tint: vec4<f32>, + /// Raw packed tint color (4×8 unorm). Zero means no tint (identity). + packed_tint: u32, /// Tint mode: TINT_MODE_ALPHA_MASK (`0`) or TINT_MODE_MULTIPLY (`1`). tint_mode: u32, /// Number of transparent padding pixels around the image in the atlas. @@ -875,10 +901,9 @@ vec2<f32>(bitcast<f32>(texel1.y), bitcast<f32>(texel1.z)) ); let translate = vec2<f32>(bitcast<f32>(texel1.w), bitcast<f32>(texel2.x)); - // When packed_tint is zero (no tint), use identity color vec4(1.0) with - // Multiply mode so the math reduces to sample_color * 1.0 = sample_color. let packed_tint = texel2.y; - let tint = select(vec4<f32>(1.0), unpack4x8unorm(packed_tint), packed_tint != 0u); + // When packed_tint is zero (no tint), force Multiply mode so the + // identity tint vec4(1.0) reduces to sample_color * 1.0 = sample_color. let tint_mode = select(TINT_MODE_MULTIPLY, texel2.z, packed_tint != 0u); let image_padding = f32(texel2.w); @@ -890,7 +915,7 @@ atlas_index, transform, translate, - tint, + packed_tint, tint_mode, image_padding );