Address review feedback

Clarify some nits, and also make a distinction between reporting failure in path_count and coarse.
diff --git a/shader/binning.wgsl b/shader/binning.wgsl
index 42cdf6f..55d80c3 100644
--- a/shader/binning.wgsl
+++ b/shader/binning.wgsl
@@ -53,7 +53,7 @@
 // store count values packed two u16's to a u32
 var<workgroup> sh_count: array<array<u32, N_TILE>, N_SUBSLICE>;
 var<workgroup> sh_chunk_offset: array<u32, N_TILE>;
-var<workgroup> sh_atomic_failed: u32;
+var<workgroup> sh_previous_failed: u32;
 
 @compute @workgroup_size(256)
 fn main(
@@ -66,10 +66,10 @@
     }
     if local_id.x == 0u {
         let failed = bump.lines > config.lines_size;
-        sh_atomic_failed = u32(failed);
+        sh_previous_failed = u32(failed);
     }
     // also functions as barrier to protect zeroing of bitmaps
-    let failed = workgroupUniformLoad(&sh_atomic_failed);
+    let failed = workgroupUniformLoad(&sh_previous_failed);
     if failed != 0u {
         if global_id.x == 0u {
             bump.failed |= STAGE_FLATTEN;
diff --git a/shader/coarse.wgsl b/shader/coarse.wgsl
index d90052e..c28f8d2 100644
--- a/shader/coarse.wgsl
+++ b/shader/coarse.wgsl
@@ -155,15 +155,18 @@
     // We need to check only prior stages, as if this stage has failed in another workgroup, 
     // we still want to know this workgroup's memory requirement.   
     if local_id.x == 0u {
-        let failed = (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_FLATTEN)) != 0u
-            || atomicLoad(&bump.seg_counts) > config.seg_counts_size;
+        var failed = atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_FLATTEN);
+        if atomicLoad(&bump.seg_counts) > config.seg_counts_size {
+            failed |= STAGE_PATH_COUNT;
+        }
         // Reuse sh_part_count to hold failed flag, shmem is tight
         sh_part_count[0] = u32(failed);
     }
     let failed = workgroupUniformLoad(&sh_part_count[0]);
     if failed != 0u {
         if wg_id.x == 0u && local_id.x == 0u {
-            atomicOr(&bump.failed, STAGE_COARSE);
+            // propagate PATH_COUNT failure to path_tiling_setup so it doesn't need to bind config
+            atomicOr(&bump.failed, failed);
         }
         return;
     }
diff --git a/shader/shared/bump.wgsl b/shader/shared/bump.wgsl
index 9f0ea37..9270fc2 100644
--- a/shader/shared/bump.wgsl
+++ b/shader/shared/bump.wgsl
@@ -5,7 +5,8 @@
 let STAGE_BINNING: u32 = 0x1u;
 let STAGE_TILE_ALLOC: u32 = 0x2u;
 let STAGE_FLATTEN: u32 = 0x4u;
-let STAGE_COARSE: u32 = 0x8u;
+let STAGE_PATH_COUNT: u32 = 0x8u;
+let STAGE_COARSE: u32 = 0x10u;
 
 // This must be kept in sync with the struct in config.rs in the encoding crate.
 struct BumpAllocators {
diff --git a/shader/tile_alloc.wgsl b/shader/tile_alloc.wgsl
index 60e95f6..c6073d1 100644
--- a/shader/tile_alloc.wgsl
+++ b/shader/tile_alloc.wgsl
@@ -30,7 +30,7 @@
 
 var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
 var<workgroup> sh_tile_offset: u32;
-var<workgroup> sh_atomic_failed: u32;
+var<workgroup> sh_previous_failed: u32;
 
 @compute @workgroup_size(256)
 fn main(
@@ -42,9 +42,9 @@
     // we still want to know this workgroup's memory requirement.
     if local_id.x == 0u {
         let failed = (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_FLATTEN)) != 0u;
-        sh_atomic_failed = u32(failed);
+        sh_previous_failed = u32(failed);
     }
-    let failed = workgroupUniformLoad(&sh_atomic_failed);
+    let failed = workgroupUniformLoad(&sh_previous_failed);
     if failed != 0u {
         return;
     }