printf: Remove stage specific info (#5495)

Remove stage specific debug info that is only needed by GPU-AV.
This allows debug printfs to be used in multi-stage shader modules.

Fixes #4892
diff --git a/include/spirv-tools/instrument.hpp b/include/spirv-tools/instrument.hpp
index ae9278b..0a6e630 100644
--- a/include/spirv-tools/instrument.hpp
+++ b/include/spirv-tools/instrument.hpp
@@ -73,81 +73,11 @@
 // which generated the validation error.
 static const int kInstCommonOutInstructionIdx = 2;
 
-// This is the stage which generated the validation error. This word is used
-// to determine the contents of the next two words in the record.
-// 0:Vert, 1:TessCtrl, 2:TessEval, 3:Geom, 4:Frag, 5:Compute
-static const int kInstCommonOutStageIdx = 3;
-static const int kInstCommonOutCnt = 4;
-
-// Stage-specific Stream Record Offsets
-//
-// Each stage will contain different values in the next set of words of the
-// record used to identify which instantiation of the shader generated the
-// validation error.
-//
-// Vertex Shader Output Record Offsets
-static const int kInstVertOutVertexIndex = kInstCommonOutCnt;
-static const int kInstVertOutInstanceIndex = kInstCommonOutCnt + 1;
-static const int kInstVertOutUnused = kInstCommonOutCnt + 2;
-
-// Frag Shader Output Record Offsets
-static const int kInstFragOutFragCoordX = kInstCommonOutCnt;
-static const int kInstFragOutFragCoordY = kInstCommonOutCnt + 1;
-static const int kInstFragOutUnused = kInstCommonOutCnt + 2;
-
-// Compute Shader Output Record Offsets
-static const int kInstCompOutGlobalInvocationIdX = kInstCommonOutCnt;
-static const int kInstCompOutGlobalInvocationIdY = kInstCommonOutCnt + 1;
-static const int kInstCompOutGlobalInvocationIdZ = kInstCommonOutCnt + 2;
-
-// Tessellation Control Shader Output Record Offsets
-static const int kInstTessCtlOutInvocationId = kInstCommonOutCnt;
-static const int kInstTessCtlOutPrimitiveId = kInstCommonOutCnt + 1;
-static const int kInstTessCtlOutUnused = kInstCommonOutCnt + 2;
-
-// Tessellation Eval Shader Output Record Offsets
-static const int kInstTessEvalOutPrimitiveId = kInstCommonOutCnt;
-static const int kInstTessEvalOutTessCoordU = kInstCommonOutCnt + 1;
-static const int kInstTessEvalOutTessCoordV = kInstCommonOutCnt + 2;
-
-// Geometry Shader Output Record Offsets
-static const int kInstGeomOutPrimitiveId = kInstCommonOutCnt;
-static const int kInstGeomOutInvocationId = kInstCommonOutCnt + 1;
-static const int kInstGeomOutUnused = kInstCommonOutCnt + 2;
-
-// Ray Tracing Shader Output Record Offsets
-static const int kInstRayTracingOutLaunchIdX = kInstCommonOutCnt;
-static const int kInstRayTracingOutLaunchIdY = kInstCommonOutCnt + 1;
-static const int kInstRayTracingOutLaunchIdZ = kInstCommonOutCnt + 2;
-
-// Mesh Shader Output Record Offsets
-static const int kInstMeshOutGlobalInvocationIdX = kInstCommonOutCnt;
-static const int kInstMeshOutGlobalInvocationIdY = kInstCommonOutCnt + 1;
-static const int kInstMeshOutGlobalInvocationIdZ = kInstCommonOutCnt + 2;
-
-// Task Shader Output Record Offsets
-static const int kInstTaskOutGlobalInvocationIdX = kInstCommonOutCnt;
-static const int kInstTaskOutGlobalInvocationIdY = kInstCommonOutCnt + 1;
-static const int kInstTaskOutGlobalInvocationIdZ = kInstCommonOutCnt + 2;
-
-// Size of Common and Stage-specific Members
-static const int kInstStageOutCnt = kInstCommonOutCnt + 3;
-
 // Debug Buffer Bindings
 //
 // These are the bindings for the different buffers which are
 // read or written by the instrumentation passes.
 //
-// This is the output buffer written by InstBindlessCheckPass,
-// InstBuffAddrCheckPass, and possibly other future validations.
-static const int kDebugOutputBindingStream = 0;
-
-// The binding for the input buffer read by InstBindlessCheckPass.
-static const int kDebugInputBindingBindless = 1;
-
-// The binding for the input buffer read by InstBuffAddrCheckPass.
-static const int kDebugInputBindingBuffAddr = 2;
-
 // This is the output buffer written by InstDebugPrintfPass.
 static const int kDebugOutputPrintfStream = 3;
 
diff --git a/source/opt/inst_bindless_check_pass.h b/source/opt/inst_bindless_check_pass.h
index f99b59d..243cba7 100644
--- a/source/opt/inst_bindless_check_pass.h
+++ b/source/opt/inst_bindless_check_pass.h
@@ -29,7 +29,7 @@
 class InstBindlessCheckPass : public InstrumentPass {
  public:
   InstBindlessCheckPass(uint32_t shader_id)
-      : InstrumentPass(0, shader_id, true) {}
+      : InstrumentPass(0, shader_id, true, true) {}
 
   ~InstBindlessCheckPass() override = default;
 
diff --git a/source/opt/inst_buff_addr_check_pass.h b/source/opt/inst_buff_addr_check_pass.h
index 70076a3..f07f98a 100644
--- a/source/opt/inst_buff_addr_check_pass.h
+++ b/source/opt/inst_buff_addr_check_pass.h
@@ -29,9 +29,10 @@
 class InstBuffAddrCheckPass : public InstrumentPass {
  public:
   // For test harness only
-  InstBuffAddrCheckPass() : InstrumentPass(0, 23) {}
+  InstBuffAddrCheckPass() : InstrumentPass(0, 23, false, true) {}
   // For all other interfaces
-  InstBuffAddrCheckPass(uint32_t shader_id) : InstrumentPass(0, shader_id) {}
+  InstBuffAddrCheckPass(uint32_t shader_id)
+      : InstrumentPass(0, shader_id, false, true) {}
 
   ~InstBuffAddrCheckPass() override = default;
 
diff --git a/source/opt/inst_debug_printf_pass.cpp b/source/opt/inst_debug_printf_pass.cpp
index a48a28f..abd25e9 100644
--- a/source/opt/inst_debug_printf_pass.cpp
+++ b/source/opt/inst_debug_printf_pass.cpp
@@ -138,7 +138,7 @@
 }
 
 void InstDebugPrintfPass::GenOutputCode(
-    Instruction* printf_inst, uint32_t stage_idx,
+    Instruction* printf_inst,
     std::vector<std::unique_ptr<BasicBlock>>* new_blocks) {
   BasicBlock* back_blk_ptr = &*new_blocks->back();
   InstructionBuilder builder(
@@ -168,14 +168,14 @@
       });
   GenDebugStreamWrite(
       builder.GetUintConstantId(shader_id_),
-      builder.GetUintConstantId(uid2offset_[printf_inst->unique_id()]),
-      GenStageInfo(stage_idx, &builder), val_ids, &builder);
+      builder.GetUintConstantId(uid2offset_[printf_inst->unique_id()]), val_ids,
+      &builder);
   context()->KillInst(printf_inst);
 }
 
 void InstDebugPrintfPass::GenDebugPrintfCode(
     BasicBlock::iterator ref_inst_itr,
-    UptrVectorIterator<BasicBlock> ref_block_itr, uint32_t stage_idx,
+    UptrVectorIterator<BasicBlock> ref_block_itr,
     std::vector<std::unique_ptr<BasicBlock>>* new_blocks) {
   // If not DebugPrintf OpExtInst, return.
   Instruction* printf_inst = &*ref_inst_itr;
@@ -191,7 +191,7 @@
   MovePreludeCode(ref_inst_itr, ref_block_itr, &new_blk_ptr);
   new_blocks->push_back(std::move(new_blk_ptr));
   // Generate instructions to output printf args to printf buffer
-  GenOutputCode(printf_inst, stage_idx, new_blocks);
+  GenOutputCode(printf_inst, new_blocks);
   // Caller expects at least two blocks with last block containing remaining
   // code, so end block after instrumentation, create remainder block, and
   // branch to it
@@ -301,8 +301,7 @@
   enum {
     kShaderId = 0,
     kInstructionIndex = 1,
-    kStageInfo = 2,
-    kFirstParam = 3,
+    kFirstParam = 2,
   };
   // Total param count is common params plus validation-specific
   // params
@@ -312,12 +311,9 @@
     analysis::TypeManager* type_mgr = context()->get_type_mgr();
 
     const analysis::Type* uint_type = GetInteger(32, false);
-    const analysis::Vector v4uint(uint_type, 4);
-    const analysis::Type* v4uint_type = type_mgr->GetRegisteredType(&v4uint);
 
     std::vector<const analysis::Type*> param_types(kFirstParam + param_cnt,
                                                    uint_type);
-    param_types[kStageInfo] = v4uint_type;
     std::unique_ptr<Function> output_func = StartFunction(
         param2output_func_id_[param_cnt], type_mgr->GetVoidType(), param_types);
 
@@ -330,8 +326,8 @@
         context(), &*new_blk_ptr,
         IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
     // Gen test if debug output buffer size will not be exceeded.
-    const uint32_t val_spec_offset = kInstStageOutCnt;
-    const uint32_t obuf_record_sz = val_spec_offset + param_cnt;
+    const uint32_t first_param_offset = kInstCommonOutInstructionIdx + 1;
+    const uint32_t obuf_record_sz = first_param_offset + param_cnt;
     const uint32_t buf_id = GetOutputBufferId();
     const uint32_t buf_uint_ptr_id = GetOutputBufferPtrId();
     Instruction* obuf_curr_sz_ac_inst = builder.AddAccessChain(
@@ -382,16 +378,9 @@
     // Store Instruction Idx
     GenDebugOutputFieldCode(obuf_curr_sz_id, kInstCommonOutInstructionIdx,
                             param_ids[kInstructionIndex], &builder);
-    // Store stage info. Stage Idx + 3 words of stage-specific data.
-    for (uint32_t i = 0; i < 4; ++i) {
-      Instruction* field =
-          builder.AddCompositeExtract(GetUintId(), param_ids[kStageInfo], {i});
-      GenDebugOutputFieldCode(obuf_curr_sz_id, kInstCommonOutStageIdx + i,
-                              field->result_id(), &builder);
-    }
     // Gen writes of validation specific data
     for (uint32_t i = 0; i < param_cnt; ++i) {
-      GenDebugOutputFieldCode(obuf_curr_sz_id, val_spec_offset + i,
+      GenDebugOutputFieldCode(obuf_curr_sz_id, first_param_offset + i,
                               param_ids[kFirstParam + i], &builder);
     }
     // Close write block and gen merge block
@@ -416,12 +405,12 @@
 }
 
 void InstDebugPrintfPass::GenDebugStreamWrite(
-    uint32_t shader_id, uint32_t instruction_idx_id, uint32_t stage_info_id,
+    uint32_t shader_id, uint32_t instruction_idx_id,
     const std::vector<uint32_t>& validation_ids, InstructionBuilder* builder) {
   // Call debug output function. Pass func_idx, instruction_idx and
   // validation ids as args.
   uint32_t val_id_cnt = static_cast<uint32_t>(validation_ids.size());
-  std::vector<uint32_t> args = {shader_id, instruction_idx_id, stage_info_id};
+  std::vector<uint32_t> args = {shader_id, instruction_idx_id};
   (void)args.insert(args.end(), validation_ids.begin(), validation_ids.end());
   (void)builder->AddFunctionCall(GetVoidId(),
                                  GetStreamWriteFunctionId(val_id_cnt), args);
@@ -455,10 +444,10 @@
   // Perform printf instrumentation on each entry point function in module
   InstProcessFunction pfn =
       [this](BasicBlock::iterator ref_inst_itr,
-             UptrVectorIterator<BasicBlock> ref_block_itr, uint32_t stage_idx,
+             UptrVectorIterator<BasicBlock> ref_block_itr,
+             [[maybe_unused]] uint32_t stage_idx,
              std::vector<std::unique_ptr<BasicBlock>>* new_blocks) {
-        return GenDebugPrintfCode(ref_inst_itr, ref_block_itr, stage_idx,
-                                  new_blocks);
+        return GenDebugPrintfCode(ref_inst_itr, ref_block_itr, new_blocks);
       };
   (void)InstProcessEntryPointCallTree(pfn);
   // Remove DebugPrintf OpExtInstImport instruction
diff --git a/source/opt/inst_debug_printf_pass.h b/source/opt/inst_debug_printf_pass.h
index 3a2078a..5688d38 100644
--- a/source/opt/inst_debug_printf_pass.h
+++ b/source/opt/inst_debug_printf_pass.h
@@ -28,10 +28,10 @@
 class InstDebugPrintfPass : public InstrumentPass {
  public:
   // For test harness only
-  InstDebugPrintfPass() : InstrumentPass(7, 23) {}
+  InstDebugPrintfPass() : InstrumentPass(7, 23, false, false) {}
   // For all other interfaces
   InstDebugPrintfPass(uint32_t desc_set, uint32_t shader_id)
-      : InstrumentPass(desc_set, shader_id) {}
+      : InstrumentPass(desc_set, shader_id, false, false) {}
 
   ~InstDebugPrintfPass() override = default;
 
@@ -52,9 +52,7 @@
   // validation and write a record to the end of the stream, if enough space
   // in the buffer remains. The record will contain the index of the function
   // and instruction within that function |func_idx, instruction_idx| which
-  // generated the record. It will also contain additional information to
-  // identify the instance of the shader, depending on the stage |stage_idx|
-  // of the shader. Finally, the record will contain validation-specific
+  // generated the record. Finally, the record will contain validation-specific
   // data contained in |validation_ids| which will identify the validation
   // error as well as the values involved in the error.
   //
@@ -83,9 +81,6 @@
   //     Record Size
   //     Shader ID
   //     Instruction Index
-  //     Stage
-  //     Stage-specific Word 0
-  //     Stage-specific Word 1
   //     ...
   //     Validation Error Code
   //     Validation-specific Word 0
@@ -93,8 +88,8 @@
   //     Validation-specific Word 2
   //     ...
   //
-  // Each record consists of three subsections: members common across all
-  // validation, members specific to the stage, and members specific to a
+  // Each record consists of two subsections: members common across all
+  // validation and members specific to a
   // validation.
   //
   // The Record Size is the number of 32-bit words in the record, including
@@ -106,18 +101,6 @@
   // The Instruction Index is the position of the instruction within the
   // SPIR-V file which is in error.
   //
-  // The Stage is the pipeline stage which has generated the error as defined
-  // by the SpvExecutionModel_ enumeration. This is used to interpret the
-  // following Stage-specific words.
-  //
-  // The Stage-specific Words identify which invocation of the shader generated
-  // the error. Every stage will write a fixed number of words. Vertex shaders
-  // will write the Vertex and Instance ID. Fragment shaders will write
-  // FragCoord.xy. Compute shaders will write the GlobalInvocation ID.
-  // The tessellation eval shader will write the Primitive ID and TessCoords.uv.
-  // The tessellation control shader and geometry shader will write the
-  // Primitive ID and Invocation ID.
-  //
   // The Validation Error Code specifies the exact error which has occurred.
   // These are enumerated with the kInstError* static consts. This allows
   // multiple validation layers to use the same, single output buffer.
@@ -131,7 +114,6 @@
   // before writing, the size of the debug out buffer can be used by the
   // validation layer to control the number of error records that are written.
   void GenDebugStreamWrite(uint32_t shader_id, uint32_t instruction_idx_id,
-                           uint32_t stage_info_id,
                            const std::vector<uint32_t>& validation_ids,
                            InstructionBuilder* builder);
 
@@ -144,7 +126,7 @@
   // If |ref_inst_itr| is an OpDebugPrintf, return in |new_blocks| the result
   // of replacing it with buffer write instructions within its block at
   // |ref_block_itr|.  The instructions write a record to the printf
-  // output buffer stream including |function_idx, instruction_idx, stage_idx|
+  // output buffer stream including |function_idx, instruction_idx|
   // and removes the OpDebugPrintf. The block at |ref_block_itr| can just be
   // replaced with the block in |new_blocks|. Besides the buffer writes, this
   // block will comprise all instructions preceding and following
@@ -162,7 +144,6 @@
   // DebugPrintf.
   void GenDebugPrintfCode(BasicBlock::iterator ref_inst_itr,
                           UptrVectorIterator<BasicBlock> ref_block_itr,
-                          uint32_t stage_idx,
                           std::vector<std::unique_ptr<BasicBlock>>* new_blocks);
 
   // Generate a sequence of uint32 instructions in |builder| (if necessary)
@@ -175,7 +156,7 @@
   // Generate instructions to write a record containing the operands of
   // |printf_inst| arguments to printf buffer, adding new code to the end of
   // the last block in |new_blocks|. Kill OpDebugPrintf instruction.
-  void GenOutputCode(Instruction* printf_inst, uint32_t stage_idx,
+  void GenOutputCode(Instruction* printf_inst,
                      std::vector<std::unique_ptr<BasicBlock>>* new_blocks);
 
   // Set the name for a function or global variable, names will be
diff --git a/source/opt/instrument_pass.cpp b/source/opt/instrument_pass.cpp
index 829de49..dc33e14 100644
--- a/source/opt/instrument_pass.cpp
+++ b/source/opt/instrument_pass.cpp
@@ -653,44 +653,50 @@
 }
 
 bool InstrumentPass::InstProcessEntryPointCallTree(InstProcessFunction& pfn) {
-  // Make sure all entry points have the same execution model. Do not
-  // instrument if they do not.
-  // TODO(greg-lunarg): Handle mixed stages. Technically, a shader module
-  // can contain entry points with different execution models, although
-  // such modules will likely be rare as GLSL and HLSL are geared toward
-  // one model per module. In such cases we will need
-  // to clone any functions which are in the call trees of entrypoints
-  // with differing execution models.
-  spv::ExecutionModel stage = context()->GetStage();
-  // Check for supported stages
-  if (stage != spv::ExecutionModel::Vertex &&
-      stage != spv::ExecutionModel::Fragment &&
-      stage != spv::ExecutionModel::Geometry &&
-      stage != spv::ExecutionModel::GLCompute &&
-      stage != spv::ExecutionModel::TessellationControl &&
-      stage != spv::ExecutionModel::TessellationEvaluation &&
-      stage != spv::ExecutionModel::TaskNV &&
-      stage != spv::ExecutionModel::MeshNV &&
-      stage != spv::ExecutionModel::RayGenerationNV &&
-      stage != spv::ExecutionModel::IntersectionNV &&
-      stage != spv::ExecutionModel::AnyHitNV &&
-      stage != spv::ExecutionModel::ClosestHitNV &&
-      stage != spv::ExecutionModel::MissNV &&
-      stage != spv::ExecutionModel::CallableNV &&
-      stage != spv::ExecutionModel::TaskEXT &&
-      stage != spv::ExecutionModel::MeshEXT) {
-    if (consumer()) {
-      std::string message = "Stage not supported by instrumentation";
-      consumer()(SPV_MSG_ERROR, 0, {0, 0, 0}, message.c_str());
+  uint32_t stage_id;
+  if (use_stage_info_) {
+    // Make sure all entry points have the same execution model. Do not
+    // instrument if they do not.
+    // TODO(greg-lunarg): Handle mixed stages. Technically, a shader module
+    // can contain entry points with different execution models, although
+    // such modules will likely be rare as GLSL and HLSL are geared toward
+    // one model per module. In such cases we will need
+    // to clone any functions which are in the call trees of entrypoints
+    // with differing execution models.
+    spv::ExecutionModel stage = context()->GetStage();
+    // Check for supported stages
+    if (stage != spv::ExecutionModel::Vertex &&
+        stage != spv::ExecutionModel::Fragment &&
+        stage != spv::ExecutionModel::Geometry &&
+        stage != spv::ExecutionModel::GLCompute &&
+        stage != spv::ExecutionModel::TessellationControl &&
+        stage != spv::ExecutionModel::TessellationEvaluation &&
+        stage != spv::ExecutionModel::TaskNV &&
+        stage != spv::ExecutionModel::MeshNV &&
+        stage != spv::ExecutionModel::RayGenerationNV &&
+        stage != spv::ExecutionModel::IntersectionNV &&
+        stage != spv::ExecutionModel::AnyHitNV &&
+        stage != spv::ExecutionModel::ClosestHitNV &&
+        stage != spv::ExecutionModel::MissNV &&
+        stage != spv::ExecutionModel::CallableNV &&
+        stage != spv::ExecutionModel::TaskEXT &&
+        stage != spv::ExecutionModel::MeshEXT) {
+      if (consumer()) {
+        std::string message = "Stage not supported by instrumentation";
+        consumer()(SPV_MSG_ERROR, 0, {0, 0, 0}, message.c_str());
+      }
+      return false;
     }
-    return false;
+    stage_id = static_cast<uint32_t>(stage);
+  } else {
+    stage_id = 0;
   }
   // Add together the roots of all entry points
   std::queue<uint32_t> roots;
   for (auto& e : get_module()->entry_points()) {
     roots.push(e.GetSingleWordInOperand(kEntryPointFunctionIdInIdx));
   }
-  bool modified = InstProcessCallTreeFromRoots(pfn, &roots, uint32_t(stage));
+  bool modified = InstProcessCallTreeFromRoots(pfn, &roots, stage_id);
   return modified;
 }
 
diff --git a/source/opt/instrument_pass.h b/source/opt/instrument_pass.h
index 8b64374..e4408c9 100644
--- a/source/opt/instrument_pass.h
+++ b/source/opt/instrument_pass.h
@@ -77,12 +77,13 @@
   // set |desc_set| for debug input and output buffers and writes |shader_id|
   // into debug output records. |opt_direct_reads| indicates that the pass
   // will see direct input buffer reads and should prepare to optimize them.
-  InstrumentPass(uint32_t desc_set, uint32_t shader_id,
-                 bool opt_direct_reads = false)
+  InstrumentPass(uint32_t desc_set, uint32_t shader_id, bool opt_direct_reads,
+                 bool use_stage_info)
       : Pass(),
         desc_set_(desc_set),
         shader_id_(shader_id),
-        opt_direct_reads_(opt_direct_reads) {}
+        opt_direct_reads_(opt_direct_reads),
+        use_stage_info_(use_stage_info) {}
 
   // Initialize state for instrumentation of module.
   void InitializeInstrument();
@@ -312,7 +313,11 @@
 
   // Optimize direct debug input buffer reads. Specifically, move all such
   // reads with constant args to first block and reuse them.
-  bool opt_direct_reads_{false};
+  const bool opt_direct_reads_;
+
+  // Set true if the instrumentation needs to know the current stage.
+  // Note that this does not work with multi-stage modules.
+  const bool use_stage_info_;
 };
 
 }  // namespace opt
diff --git a/test/opt/inst_debug_printf_test.cpp b/test/opt/inst_debug_printf_test.cpp
index e9774de..24c0bc6 100644
--- a/test/opt/inst_debug_printf_test.cpp
+++ b/test/opt/inst_debug_printf_test.cpp
@@ -74,7 +74,7 @@
 ; CHECK: OpExtension "SPV_KHR_storage_buffer_storage_class"
 OpMemoryModel Logical GLSL450
 OpEntryPoint Fragment %2 "MainPs" %3 %4
-; CHECK: OpEntryPoint Fragment %2 "MainPs" %3 %4 %gl_FragCoord
+; CHECK: OpEntryPoint Fragment %2 "MainPs" %3 %4
 OpExecutionMode %2 OriginUpperLeft
 %5 = OpString "Color is %vn"
 )";
@@ -87,8 +87,6 @@
 OpDecorate %7 Binding 0
 OpDecorate %3 Location 0
 OpDecorate %4 Location 0
-; CHECK: OpDecorate %gl_FragCoord BuiltIn FragCoord
-; CHECK: OpDecorate %_runtimearr_uint ArrayStride 4
 )" + kOutputDecorations;
 
   const std::string globals =
@@ -109,10 +107,7 @@
 %_ptr_Output_v4float = OpTypePointer Output %v4float
 %4 = OpVariable %_ptr_Output_v4float Output
 ; CHECK: %uint = OpTypeInt 32 0
-; CHECK: %_ptr_Input_v4float = OpTypePointer Input %v4float
-; CHECK: %gl_FragCoord = OpVariable %_ptr_Input_v4float Input
-; CHECK: %v4uint = OpTypeVector %uint 4
-; CHECK: [[func_type:%\w+]] = OpTypeFunction %void %uint %uint %v4uint %uint %uint %uint %uint %uint
+; CHECK: [[func_type:%\w+]] = OpTypeFunction %void %uint %uint %uint %uint %uint %uint %uint
 ; CHECK: %_runtimearr_uint = OpTypeRuntimeArray %uint
 )" + kOutputGlobals + R"(
 ; CHECK: %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
@@ -138,12 +133,7 @@
 ; CHECK: {{%\w+}} = OpBitcast %uint {{%\w+}}
 ; CHECK: {{%\w+}} = OpCompositeExtract %float %25 3
 ; CHECK: {{%\w+}} = OpBitcast %uint {{%\w+}}
-; CHECK: {{%\w+}} = OpLoad %v4float %gl_FragCoord
-; CHECK: {{%\w+}} = OpBitcast %v4uint {{%\w+}}
-; CHECK: {{%\w+}} = OpCompositeExtract %uint {{%\w+}} 0
-; CHECK: {{%\w+}} = OpCompositeExtract %uint {{%\w+}} 1
-; CHECK: {{%\w+}} = OpCompositeConstruct %v4uint %uint_4 {{%\w+}} {{%\w+}} %uint_0
-; CHECK: {{%\w+}} = OpFunctionCall %void %inst_printf_stream_write_5 %uint_23 %uint_36 {{%\w+}} %uint_5 {{%\w+}} {{%\w+}} {{%\w+}} {{%\w+}}
+; CHECK: {{%\w+}} = OpFunctionCall %void %inst_printf_stream_write_5 %uint_23 %uint_36 %uint_5 {{%\w+}} {{%\w+}} {{%\w+}} {{%\w+}}
 ; CHECK: OpBranch {{%\w+}}
 ; CHECK: {{%\w+}} = OpLabel
 OpStore %4 %25
@@ -155,7 +145,6 @@
 ; CHECK: %inst_printf_stream_write_5 = OpFunction %void None {{%\w+}}
 ; CHECK: [[sw_shader_id:%\w+]] = OpFunctionParameter %uint
 ; CHECK: [[sw_inst_idx:%\w+]] = OpFunctionParameter %uint
-; CHECK: [[sw_stage_info:%\w+]] = OpFunctionParameter %v4uint
 ; CHECK: [[sw_param_1:%\w+]] = OpFunctionParameter %uint
 ; CHECK: [[sw_param_2:%\w+]] = OpFunctionParameter %uint
 ; CHECK: [[sw_param_3:%\w+]] = OpFunctionParameter %uint
@@ -163,8 +152,8 @@
 ; CHECK: [[sw_param_5:%\w+]] = OpFunctionParameter %uint
 ; CHECK: {{%\w+}} = OpLabel
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_1
-; CHECK: {{%\w+}} = OpAtomicIAdd %uint {{%\w+}} %uint_4 %uint_0 %uint_12
-; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_12
+; CHECK: {{%\w+}} = OpAtomicIAdd %uint {{%\w+}} %uint_4 %uint_0 %uint_8
+; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_8
 ; CHECK: {{%\w+}} = OpArrayLength %uint [[output_buffer_var]] 2
 ; CHECK: {{%\w+}} = OpULessThanEqual %bool {{%\w+}} {{%\w+}}
 ; CHECK: OpSelectionMerge {{%\w+}} None
@@ -172,42 +161,26 @@
 ; CHECK: {{%\w+}} = OpLabel
 ; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_0
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} %uint_12
+; CHECK: OpStore {{%\w+}} %uint_8
 ; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_1
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
 ; CHECK: OpStore {{%\w+}} [[sw_shader_id]]
 ; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_2
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
 ; CHECK: OpStore {{%\w+}} [[sw_inst_idx]]
-; CHECK: {{%\w+}} = OpCompositeExtract %uint [[sw_stage_info]] 0
 ; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_3
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} {{%\w+}}
-; CHECK: {{%\w+}} = OpCompositeExtract %uint [[sw_stage_info]] 1
+; CHECK: OpStore {{%\w+}} [[sw_param_1]]
 ; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_4
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} {{%\w+}}
-; CHECK: {{%\w+}} = OpCompositeExtract %uint [[sw_stage_info]] 2
+; CHECK: OpStore {{%\w+}} [[sw_param_2]]
 ; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_5
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} {{%\w+}}
-; CHECK: {{%\w+}} = OpCompositeExtract %uint [[sw_stage_info]] 3
+; CHECK: OpStore {{%\w+}} [[sw_param_3]]
 ; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_6
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} {{%\w+}}
-; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_7
-; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} [[sw_param_1]]
-; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_8
-; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} [[sw_param_2]]
-; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_9
-; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
-; CHECK: OpStore {{%\w+}} [[sw_param_3]]
-; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_10
-; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
 ; CHECK: OpStore {{%\w+}} [[sw_param_4]]
-; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_11
+; CHECK: {{%\w+}} = OpIAdd %uint {{%\w+}} %uint_7
 ; CHECK: {{%\w+}} = OpAccessChain %_ptr_StorageBuffer_uint [[output_buffer_var]] %uint_2 {{%\w+}}
 ; CHECK: OpStore {{%\w+}} [[sw_param_5]]
 ; CHECK: OpBranch {{%\w+}}