Instrument: Fix version 2 output record write for tess eval shaders. (#2782)

Fix output record write for tess eval shaders.

Also change command line for bindless instrumentation to use use
output record version 2.

diff --git a/source/opt/instrument_pass.cpp b/source/opt/instrument_pass.cpp
index 6645a2e..00bb918 100644
--- a/source/opt/instrument_pass.cpp
+++ b/source/opt/instrument_pass.cpp
@@ -243,10 +243,13 @@
             kInstTessEvalOutPrimitiveId, base_offset_id, builder);
         uint32_t load_id = GenVarLoad(
             context()->GetBuiltinInputVarId(SpvBuiltInTessCoord), builder);
+        Instruction* uvec3_cast_inst =
+            builder->AddUnaryOp(GetVec3UintId(), SpvOpBitcast, load_id);
+        uint32_t uvec3_cast_id = uvec3_cast_inst->result_id();
         Instruction* u_inst = builder->AddIdLiteralOp(
-            GetUintId(), SpvOpCompositeExtract, load_id, 0);
+            GetUintId(), SpvOpCompositeExtract, uvec3_cast_id, 0);
         Instruction* v_inst = builder->AddIdLiteralOp(
-            GetUintId(), SpvOpCompositeExtract, load_id, 1);
+            GetUintId(), SpvOpCompositeExtract, uvec3_cast_id, 1);
         GenDebugOutputFieldCode(base_offset_id, kInstTessEvalOutTessCoordU,
                                 u_inst->result_id(), builder);
         GenDebugOutputFieldCode(base_offset_id, kInstTessEvalOutTessCoordV,
@@ -552,18 +555,26 @@
   return uint_id_;
 }
 
+uint32_t InstrumentPass::GetVecUintId(uint32_t len) {
+  analysis::TypeManager* type_mgr = context()->get_type_mgr();
+  analysis::Integer uint_ty(32, false);
+  analysis::Type* reg_uint_ty = type_mgr->GetRegisteredType(&uint_ty);
+  analysis::Vector v_uint_ty(reg_uint_ty, len);
+  analysis::Type* reg_v_uint_ty = type_mgr->GetRegisteredType(&v_uint_ty);
+  uint32_t v_uint_id = type_mgr->GetTypeInstruction(reg_v_uint_ty);
+  return v_uint_id;
+}
+
 uint32_t InstrumentPass::GetVec4UintId() {
-  if (v4uint_id_ == 0) {
-    analysis::TypeManager* type_mgr = context()->get_type_mgr();
-    analysis::Integer uint_ty(32, false);
-    analysis::Type* reg_uint_ty = type_mgr->GetRegisteredType(&uint_ty);
-    analysis::Vector v4uint_ty(reg_uint_ty, 4);
-    analysis::Type* reg_v4uint_ty = type_mgr->GetRegisteredType(&v4uint_ty);
-    v4uint_id_ = type_mgr->GetTypeInstruction(reg_v4uint_ty);
-  }
+  if (v4uint_id_ == 0) v4uint_id_ = GetVecUintId(4u);
   return v4uint_id_;
 }
 
+uint32_t InstrumentPass::GetVec3UintId() {
+  if (v3uint_id_ == 0) v3uint_id_ = GetVecUintId(3u);
+  return v3uint_id_;
+}
+
 uint32_t InstrumentPass::GetBoolId() {
   if (bool_id_ == 0) {
     analysis::TypeManager* type_mgr = context()->get_type_mgr();
@@ -890,6 +901,7 @@
   v4float_id_ = 0;
   uint_id_ = 0;
   v4uint_id_ = 0;
+  v3uint_id_ = 0;
   bool_id_ = 0;
   void_id_ = 0;
   storage_buffer_ext_defined_ = false;
diff --git a/source/opt/instrument_pass.h b/source/opt/instrument_pass.h
index d255698..1b3f83e 100644
--- a/source/opt/instrument_pass.h
+++ b/source/opt/instrument_pass.h
@@ -248,9 +248,15 @@
   // Return id for v4float type
   uint32_t GetVec4FloatId();
 
+  // Return id for uint vector type of |length|
+  uint32_t GetVecUintId(uint32_t length);
+
   // Return id for v4uint type
   uint32_t GetVec4UintId();
 
+  // Return id for v3uint type
+  uint32_t GetVec3UintId();
+
   // Return id for output function. Define if it doesn't exist with
   // |val_spec_param_cnt| validation-specific uint32 parameters.
   uint32_t GetStreamWriteFunctionId(uint32_t stage_idx,
@@ -366,9 +372,12 @@
   // id for v4float type
   uint32_t v4float_id_;
 
-  // id for v4float type
+  // id for v4uint type
   uint32_t v4uint_id_;
 
+  // id for v3uint type
+  uint32_t v3uint_id_;
+
   // id for 32-bit unsigned type
   uint32_t uint_id_;
 
diff --git a/source/opt/ir_context.cpp b/source/opt/ir_context.cpp
index 823c2b7..3e32980 100644
--- a/source/opt/ir_context.cpp
+++ b/source/opt/ir_context.cpp
@@ -700,6 +700,13 @@
         reg_type = type_mgr->GetRegisteredType(&v3uint_ty);
         break;
       }
+      case SpvBuiltInTessCoord: {
+        analysis::Float float_ty(32);
+        analysis::Type* reg_float_ty = type_mgr->GetRegisteredType(&float_ty);
+        analysis::Vector v3float_ty(reg_float_ty, 3);
+        reg_type = type_mgr->GetRegisteredType(&v3float_ty);
+        break;
+      }
       default: {
         assert(false && "unhandled builtin");
         return 0;
diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp
index 4cc5e97..f3b56c3 100644
--- a/source/opt/optimizer.cpp
+++ b/source/opt/optimizer.cpp
@@ -396,7 +396,7 @@
   } else if (pass_name == "replace-invalid-opcode") {
     RegisterPass(CreateReplaceInvalidOpcodePass());
   } else if (pass_name == "inst-bindless-check") {
-    RegisterPass(CreateInstBindlessCheckPass(7, 23, true, true, 1));
+    RegisterPass(CreateInstBindlessCheckPass(7, 23, true, true, 2));
     RegisterPass(CreateSimplificationPass());
     RegisterPass(CreateDeadBranchElimPass());
     RegisterPass(CreateBlockMergePass());
diff --git a/test/opt/inst_bindless_check_test.cpp b/test/opt/inst_bindless_check_test.cpp
index 6fe27c6..6e1adaa 100644
--- a/test/opt/inst_bindless_check_test.cpp
+++ b/test/opt/inst_bindless_check_test.cpp
@@ -5706,6 +5706,282 @@
       true, 7u, 23u, false, false, 2u);
 }
 
+TEST_F(InstBindlessTest, InstrumentTeseSimpleV2) {
+  // This test verifies that the pass will correctly instrument tessellation
+  // evaluation shader doing bindless buffer load.
+  //
+  // clang-format off
+  //
+  // #version 450
+  // #extension GL_EXT_nonuniform_qualifier : enable
+  //
+  // layout(std140, set = 0, binding = 0) uniform ufoo { uint index; } uniform_index_buffer;
+  //
+  // layout(set = 0, binding = 1) buffer bfoo { vec4 val; } adds[11];
+  //
+  // layout(triangles, equal_spacing, cw) in;
+  //
+  // void main() {
+  //   gl_Position = adds[uniform_index_buffer.index].val;
+  // }
+  //
+  // clang-format on
+
+  const std::string defs_before =
+      R"(OpCapability Tessellation
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint TessellationEvaluation %main "main" %_
+OpExecutionMode %main Triangles
+OpExecutionMode %main SpacingEqual
+OpExecutionMode %main VertexOrderCw
+OpSource GLSL 450
+OpSourceExtension "GL_EXT_nonuniform_qualifier"
+OpName %main "main"
+OpName %gl_PerVertex "gl_PerVertex"
+OpMemberName %gl_PerVertex 0 "gl_Position"
+OpMemberName %gl_PerVertex 1 "gl_PointSize"
+OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+OpName %_ ""
+OpName %bfoo "bfoo"
+OpMemberName %bfoo 0 "val"
+OpName %adds "adds"
+OpName %ufoo "ufoo"
+OpMemberName %ufoo 0 "index"
+OpName %uniform_index_buffer "uniform_index_buffer"
+OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+OpDecorate %gl_PerVertex Block
+OpMemberDecorate %bfoo 0 Offset 0
+OpDecorate %bfoo Block
+OpDecorate %adds DescriptorSet 0
+OpDecorate %adds Binding 1
+OpMemberDecorate %ufoo 0 Offset 0
+OpDecorate %ufoo Block
+OpDecorate %uniform_index_buffer DescriptorSet 0
+OpDecorate %uniform_index_buffer Binding 0
+%void = OpTypeVoid
+%3 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%uint = OpTypeInt 32 0
+%uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+%_ = OpVariable %_ptr_Output_gl_PerVertex Output
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%bfoo = OpTypeStruct %v4float
+%uint_11 = OpConstant %uint 11
+%_arr_bfoo_uint_11 = OpTypeArray %bfoo %uint_11
+%_ptr_StorageBuffer__arr_bfoo_uint_11 = OpTypePointer StorageBuffer %_arr_bfoo_uint_11
+%adds = OpVariable %_ptr_StorageBuffer__arr_bfoo_uint_11 StorageBuffer
+%ufoo = OpTypeStruct %uint
+%_ptr_Uniform_ufoo = OpTypePointer Uniform %ufoo
+%uniform_index_buffer = OpVariable %_ptr_Uniform_ufoo Uniform
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+)";
+
+  const std::string defs_after =
+      R"(OpCapability Tessellation
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint TessellationEvaluation %main "main" %_ %gl_PrimitiveID %gl_TessCoord
+OpExecutionMode %main Triangles
+OpExecutionMode %main SpacingEqual
+OpExecutionMode %main VertexOrderCw
+OpSource GLSL 450
+OpSourceExtension "GL_EXT_nonuniform_qualifier"
+OpName %main "main"
+OpName %gl_PerVertex "gl_PerVertex"
+OpMemberName %gl_PerVertex 0 "gl_Position"
+OpMemberName %gl_PerVertex 1 "gl_PointSize"
+OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+OpName %_ ""
+OpName %bfoo "bfoo"
+OpMemberName %bfoo 0 "val"
+OpName %adds "adds"
+OpName %ufoo "ufoo"
+OpMemberName %ufoo 0 "index"
+OpName %uniform_index_buffer "uniform_index_buffer"
+OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+OpDecorate %gl_PerVertex Block
+OpMemberDecorate %bfoo 0 Offset 0
+OpDecorate %bfoo Block
+OpDecorate %adds DescriptorSet 0
+OpDecorate %adds Binding 1
+OpMemberDecorate %ufoo 0 Offset 0
+OpDecorate %ufoo Block
+OpDecorate %uniform_index_buffer DescriptorSet 0
+OpDecorate %uniform_index_buffer Binding 0
+OpDecorate %_runtimearr_uint ArrayStride 4
+OpDecorate %_struct_47 Block
+OpMemberDecorate %_struct_47 0 Offset 0
+OpMemberDecorate %_struct_47 1 Offset 4
+OpDecorate %49 DescriptorSet 7
+OpDecorate %49 Binding 0
+OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
+OpDecorate %gl_TessCoord BuiltIn TessCoord
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%uint = OpTypeInt 32 0
+%uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+%_ = OpVariable %_ptr_Output_gl_PerVertex Output
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%bfoo = OpTypeStruct %v4float
+%uint_11 = OpConstant %uint 11
+%_arr_bfoo_uint_11 = OpTypeArray %bfoo %uint_11
+%_ptr_StorageBuffer__arr_bfoo_uint_11 = OpTypePointer StorageBuffer %_arr_bfoo_uint_11
+%adds = OpVariable %_ptr_StorageBuffer__arr_bfoo_uint_11 StorageBuffer
+%ufoo = OpTypeStruct %uint
+%_ptr_Uniform_ufoo = OpTypePointer Uniform %ufoo
+%uniform_index_buffer = OpVariable %_ptr_Uniform_ufoo Uniform
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%uint_0 = OpConstant %uint 0
+%bool = OpTypeBool
+%40 = OpTypeFunction %void %uint %uint %uint %uint
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+%_struct_47 = OpTypeStruct %uint %_runtimearr_uint
+%_ptr_StorageBuffer__struct_47 = OpTypePointer StorageBuffer %_struct_47
+%49 = OpVariable %_ptr_StorageBuffer__struct_47 StorageBuffer
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+%uint_10 = OpConstant %uint 10
+%uint_4 = OpConstant %uint 4
+%uint_23 = OpConstant %uint 23
+%uint_2 = OpConstant %uint 2
+%uint_3 = OpConstant %uint 3
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
+%v3float = OpTypeVector %float 3
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%gl_TessCoord = OpVariable %_ptr_Input_v3float Input
+%v3uint = OpTypeVector %uint 3
+%uint_5 = OpConstant %uint 5
+%uint_6 = OpConstant %uint 6
+%uint_7 = OpConstant %uint 7
+%uint_8 = OpConstant %uint 8
+%uint_9 = OpConstant %uint 9
+%uint_63 = OpConstant %uint 63
+%101 = OpConstantNull %v4float
+)";
+
+  const std::string func_before =
+      R"(%main = OpFunction %void None %3
+%5 = OpLabel
+%25 = OpAccessChain %_ptr_Uniform_uint %uniform_index_buffer %int_0
+%26 = OpLoad %uint %25
+%28 = OpAccessChain %_ptr_StorageBuffer_v4float %adds %26 %int_0
+%29 = OpLoad %v4float %28
+%31 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+OpStore %31 %29
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string func_after =
+      R"(%main = OpFunction %void None %10
+%26 = OpLabel
+%27 = OpAccessChain %_ptr_Uniform_uint %uniform_index_buffer %int_0
+%28 = OpLoad %uint %27
+%29 = OpAccessChain %_ptr_StorageBuffer_v4float %adds %28 %int_0
+%34 = OpULessThan %bool %28 %uint_11
+OpSelectionMerge %35 None
+OpBranchConditional %34 %36 %37
+%36 = OpLabel
+%38 = OpLoad %v4float %29
+OpBranch %35
+%37 = OpLabel
+%100 = OpFunctionCall %void %39 %uint_63 %uint_0 %28 %uint_11
+OpBranch %35
+%35 = OpLabel
+%102 = OpPhi %v4float %38 %36 %101 %37
+%31 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+OpStore %31 %102
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string output_func =
+      R"(%39 = OpFunction %void None %40
+%41 = OpFunctionParameter %uint
+%42 = OpFunctionParameter %uint
+%43 = OpFunctionParameter %uint
+%44 = OpFunctionParameter %uint
+%45 = OpLabel
+%51 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_0
+%54 = OpAtomicIAdd %uint %51 %uint_4 %uint_0 %uint_10
+%55 = OpIAdd %uint %54 %uint_10
+%56 = OpArrayLength %uint %49 1
+%57 = OpULessThanEqual %bool %55 %56
+OpSelectionMerge %58 None
+OpBranchConditional %57 %59 %58
+%59 = OpLabel
+%60 = OpIAdd %uint %54 %uint_0
+%61 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %60
+OpStore %61 %uint_10
+%63 = OpIAdd %uint %54 %uint_1
+%64 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %63
+OpStore %64 %uint_23
+%66 = OpIAdd %uint %54 %uint_2
+%67 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %66
+OpStore %67 %41
+%69 = OpIAdd %uint %54 %uint_3
+%70 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %69
+OpStore %70 %uint_2
+%73 = OpLoad %uint %gl_PrimitiveID
+%74 = OpIAdd %uint %54 %uint_4
+%75 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %74
+OpStore %75 %73
+%79 = OpLoad %v3float %gl_TessCoord
+%81 = OpBitcast %v3uint %79
+%82 = OpCompositeExtract %uint %81 0
+%83 = OpCompositeExtract %uint %81 1
+%85 = OpIAdd %uint %54 %uint_5
+%86 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %85
+OpStore %86 %82
+%88 = OpIAdd %uint %54 %uint_6
+%89 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %88
+OpStore %89 %83
+%91 = OpIAdd %uint %54 %uint_7
+%92 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %91
+OpStore %92 %42
+%94 = OpIAdd %uint %54 %uint_8
+%95 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %94
+OpStore %95 %43
+%97 = OpIAdd %uint %54 %uint_9
+%98 = OpAccessChain %_ptr_StorageBuffer_uint %49 %uint_1 %97
+OpStore %98 %44
+OpBranch %58
+%58 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+  // SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+  SinglePassRunAndCheck<InstBindlessCheckPass>(
+      defs_before + func_before, defs_after + func_after + output_func, true,
+      true, 7u, 23u, false, false, 2u);
+}
+
 TEST_F(InstBindlessTest, MultipleDebugFunctionsV2) {
   // Same source as Simple, but compiled -g and not optimized, especially not
   // inlined. The OpSource has had the source extracted for the sake of brevity.