.
diff --git a/sparse_strips/vello_hybrid/src/gradient_cache.rs b/sparse_strips/vello_hybrid/src/gradient_cache.rs index e30b6e2..0f91b38 100644 --- a/sparse_strips/vello_hybrid/src/gradient_cache.rs +++ b/sparse_strips/vello_hybrid/src/gradient_cache.rs
@@ -66,11 +66,19 @@ self.has_changed = true; self.cache .insert(gradient.cache_key.clone(), (cached_ramp, self.epoch)); - self.maybe_evict_lru_entry(); (lut_start, width) } + /// Maintain the gradient cache by evicting old entries. + pub(crate) fn maintain(&mut self) { + let entries_to_remove_count = self + .cache + .len() + .saturating_sub(self.retained_count as usize); + self.remove_lru_entries(entries_to_remove_count); + } + /// Get the size of the packed luts. pub(crate) fn luts_size(&self) -> usize { self.luts.len() @@ -101,50 +109,102 @@ self.luts = luts; } - /// Evict the least recently used cache entry if the cache size exceeds the retained count. - fn maybe_evict_lru_entry(&mut self) { - if self.cache.len() <= self.retained_count as usize { + /// Remove multiple least recently used cache entries and compact the luts vector efficiently. + fn remove_lru_entries(&mut self, count: usize) { + if count == 0 { return; } - self.remove_lru_entry(); - } - - /// Remove the least recently used cache entry and compact the luts vector. - fn remove_lru_entry(&mut self) { - let (key, (cached_ramp, _)) = self.get_lru_entry().unwrap(); - let key = key.clone(); - let cached_ramp = cached_ramp.clone(); - - self.cache.remove(&key).unwrap(); - self.compact_luts(cached_ramp); - + let entries_to_remove = self.get_lru_entries(count); + if entries_to_remove.is_empty() { + return; + } + for (key, _) in &entries_to_remove { + self.cache.remove(key); + } + self.compact_luts(entries_to_remove); self.has_changed = true; } - /// Compact the luts vector by removing the least recently used cache entry lut data. - fn compact_luts(&mut self, cached_ramp: CachedRamp) { - // Calculate the byte range to remove from the luts vector - let start = (cached_ramp.lut_start * BYTES_PER_TEXEL) as usize; - let end = start + (cached_ramp.width * BYTES_PER_TEXEL) as usize; + /// Compact the luts vector by removing multiple cached ramps + fn compact_luts(&mut self, mut ramps_to_remove: Vec<(CacheKey<GradientCacheKey>, CachedRamp)>) { + if ramps_to_remove.is_empty() { + return; + } - // Drain the LUT data from the vector - self.luts.drain(start..end); + // Sort by lut_start position (ascending) for efficient processing + ramps_to_remove.sort_by_key(|(_, ramp)| ramp.lut_start); - // Update all lut_start values for entries that were added after the removed entry - let removed_width = cached_ramp.width; - for (_, (ramp, _)) in self.cache.iter_mut() { - if ramp.lut_start > cached_ramp.lut_start { - ramp.lut_start -= removed_width; + // Convert to byte ranges for easier processing + let ranges_to_remove: Vec<(usize, usize)> = ramps_to_remove + .iter() + .map(|(_, ramp)| { + let start = (ramp.lut_start * BYTES_PER_TEXEL) as usize; + let end = start + (ramp.width * BYTES_PER_TEXEL) as usize; + (start, end) + }) + .collect(); + + // Total bytes removed so far + let mut write_offset = 0; + // Current read position + let mut read_pos = 0; + // Index into ranges_to_remove + let mut range_idx = 0; + + while read_pos < self.luts.len() { + // Check if we're at the start of a range to remove + if range_idx < ranges_to_remove.len() && read_pos == ranges_to_remove[range_idx].0 { + let (start, end) = ranges_to_remove[range_idx]; + // Skip over the range to remove + write_offset += end - start; + read_pos = end; + range_idx += 1; + } else { + // Copy byte from read position to write position (read_pos - write_offset) + if write_offset > 0 { + self.luts[read_pos - write_offset] = self.luts[read_pos]; + } + read_pos += 1; } } + + // Truncate the vector to remove the unused tail + self.luts.truncate(self.luts.len() - write_offset); + + // Update lut_start values for remaining entries + // Calculate how much data was removed before each ramp's original position + for (_, (ramp, _)) in self.cache.iter_mut() { + let mut removed_before = 0; + for (_, removed_ramp) in &ramps_to_remove { + if removed_ramp.lut_start < ramp.lut_start { + removed_before += removed_ramp.width; + } + } + ramp.lut_start -= removed_before; + } } - /// Get the least recently used cache entry. - fn get_lru_entry(&mut self) -> Option<(&CacheKey<GradientCacheKey>, &(CachedRamp, u64))> { - self.cache + /// Get multiple least recently used cache entries. + fn get_lru_entries(&self, count: usize) -> Vec<(CacheKey<GradientCacheKey>, CachedRamp)> { + if count == 0 || self.cache.is_empty() { + return Vec::new(); + } + + let mut entries: Vec<_> = self + .cache .iter() - .min_by_key(|(_, (_, last_used))| *last_used) + .map(|(key, (ramp, last_used))| (key.clone(), ramp.clone(), *last_used)) + .collect(); + + // Sort by last_used (ascending) to get LRU entries first + entries.sort_by_key(|(_, _, last_used)| *last_used); + + entries + .into_iter() + .take(count) + .map(|(key, ramp, _)| (key, ramp)) + .collect() } } @@ -243,7 +303,8 @@ #[test] fn test_cache_empty() { - let cache = GradientRampCache::new(5, Level::fallback()); + let mut cache = GradientRampCache::new(5, Level::fallback()); + cache.maintain(); assert_eq!(cache.cache.len(), 0); assert_eq!(cache.epoch, 0); @@ -255,6 +316,7 @@ fn test_unique_entry_creation() { let mut cache = GradientRampCache::new(5, Level::fallback()); insert_entries(&mut cache, 4); + cache.maintain(); assert_eq!(cache.cache.len(), 4); assert!(!cache.is_empty()); @@ -265,6 +327,7 @@ fn test_no_eviction_under_limit() { let mut cache = GradientRampCache::new(5, Level::fallback()); insert_entries(&mut cache, 4); + cache.maintain(); assert_eq!(cache.cache.len(), 4); } @@ -273,6 +336,7 @@ fn test_no_eviction_at_limit() { let mut cache = GradientRampCache::new(5, Level::fallback()); insert_entries(&mut cache, 5); + cache.maintain(); assert_eq!(cache.cache.len(), 5); } @@ -281,6 +345,7 @@ fn test_eviction_over_limit() { let mut cache = GradientRampCache::new(5, Level::fallback()); insert_entries(&mut cache, 10); + cache.maintain(); // Should trigger eviction since we're over the limit assert_eq!(cache.cache.len(), 5); @@ -303,6 +368,7 @@ let gradient = create_gradient(i as f32 / 10.0); insert_entry(&mut cache, gradient); } + cache.maintain(); assert_eq!(cache.cache.len(), 2); // Verify that luts vector was compacted properly @@ -351,6 +417,7 @@ // Now insert a new gradient that should evict gradient2 let gradient4 = create_gradient(0.4); insert_entry(&mut cache, gradient4.clone()); + cache.maintain(); // Cache should still have 3 entries assert_eq!(cache.cache.len(), 3); @@ -394,6 +461,7 @@ insert_entry(&mut cache, gradient1.clone()); insert_entry(&mut cache, gradient2.clone()); insert_entry(&mut cache, gradient3.clone()); + cache.maintain(); let original_size = cache.luts_size(); let original_cache_size = cache.cache.len(); @@ -415,4 +483,33 @@ assert!(cache.cache.contains_key(&encoded_gradient2.cache_key)); assert!(cache.cache.contains_key(&encoded_gradient3.cache_key)); } + + #[test] + fn test_lut_start_invalidation() { + let mut cache = GradientRampCache::new(2, Level::fallback()); + + let gradient_1 = create_encoded_gradient(create_gradient(0.1)); + let gradient_2 = create_encoded_gradient(create_gradient(0.2)); + let gradient_3 = create_encoded_gradient(create_gradient(0.3)); + + // Frame 1 start + let _lut_start_1 = cache.get_or_create_ramp(&gradient_1).0; + let _lut_start_2 = cache.get_or_create_ramp(&gradient_2).0; + // Frame 1 end + + // Frame 2 start + let lut_start_2 = cache.get_or_create_ramp(&gradient_2).0; + let lut_start_3 = cache.get_or_create_ramp(&gradient_3).0; + // Frame 2 end + + // Gradient lut shouldn't mutate within a frame + assert_eq!( + lut_start_2, + cache.cache.get(&gradient_2.cache_key).unwrap().0.lut_start + ); + assert_eq!( + lut_start_3, + cache.cache.get(&gradient_3.cache_key).unwrap().0.lut_start + ); + } }
diff --git a/sparse_strips/vello_hybrid/src/render/webgl.rs b/sparse_strips/vello_hybrid/src/render/webgl.rs index b0c0100..a3e8fef 100644 --- a/sparse_strips/vello_hybrid/src/render/webgl.rs +++ b/sparse_strips/vello_hybrid/src/render/webgl.rs
@@ -44,7 +44,7 @@ use core::{fmt::Debug, mem}; use vello_common::{ coarse::WideTile, - encode::{EncodedKind, EncodedPaint, MAX_GRADIENT_LUT_SIZE, RadialKind}, + encode::{EncodedGradient, EncodedKind, EncodedPaint, MAX_GRADIENT_LUT_SIZE, RadialKind}, kurbo::Affine, paint::ImageSource, tile::Tile, @@ -330,6 +330,8 @@ .resize_with(encoded_paints.len(), || GPU_PAINT_PLACEHOLDER); self.paint_idxs.resize(encoded_paints.len() + 1, 0); + self.gradient_cache.maintain(); + let mut current_idx = 0; for (encoded_paint_idx, paint) in encoded_paints.iter().enumerate() { self.paint_idxs[encoded_paint_idx] = current_idx; @@ -395,7 +397,7 @@ fn encode_gradient_paint( &self, - gradient: &vello_common::encode::EncodedGradient, + gradient: &EncodedGradient, gradient_width: u32, gradient_start: u32, ) -> GpuEncodedPaint {
diff --git a/sparse_strips/vello_hybrid/src/render/wgpu.rs b/sparse_strips/vello_hybrid/src/render/wgpu.rs index 7265364..2ab2ddc 100644 --- a/sparse_strips/vello_hybrid/src/render/wgpu.rs +++ b/sparse_strips/vello_hybrid/src/render/wgpu.rs
@@ -42,7 +42,7 @@ use bytemuck::{Pod, Zeroable}; use vello_common::{ coarse::WideTile, - encode::{EncodedKind, EncodedPaint, MAX_GRADIENT_LUT_SIZE, RadialKind}, + encode::{EncodedGradient, EncodedKind, EncodedPaint, MAX_GRADIENT_LUT_SIZE, RadialKind}, kurbo::Affine, paint::ImageSource, pixmap::Pixmap, @@ -264,6 +264,8 @@ .resize_with(encoded_paints.len(), || GPU_PAINT_PLACEHOLDER); self.paint_idxs.resize(encoded_paints.len() + 1, 0); + self.gradient_cache.maintain(); + let mut current_idx = 0; for (encoded_paint_idx, paint) in encoded_paints.iter().enumerate() { self.paint_idxs[encoded_paint_idx] = current_idx; @@ -329,7 +331,7 @@ fn encode_gradient_paint( &self, - gradient: &vello_common::encode::EncodedGradient, + gradient: &EncodedGradient, gradient_width: u32, gradient_start: u32, ) -> GpuEncodedPaint {