source/opt/amd_ext_to_khr.cpp - external/github.com/KhronosGroup/SPIRV-Tools - Git at Google

 // Copyright (c) 2019 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "source/opt/amd_ext_to_khr.h"

 #include <set>
 #include <string>

 #include "ir_builder.h"
 #include "source/opt/ir_context.h"
 #include "spv-amd-shader-ballot.insts.inc"
 #include "type_manager.h"

 namespace spvtools {
 namespace opt {

 namespace {

 enum AmdShaderBallotExtOpcodes {
   AmdShaderBallotSwizzleInvocationsAMD = 1,
   AmdShaderBallotSwizzleInvocationsMaskedAMD = 2,
   AmdShaderBallotWriteInvocationAMD = 3,
   AmdShaderBallotMbcntAMD = 4
 };

 enum AmdShaderTrinaryMinMaxExtOpCodes {
   FMin3AMD = 1,
   UMin3AMD = 2,
   SMin3AMD = 3,
   FMax3AMD = 4,
   UMax3AMD = 5,
   SMax3AMD = 6,
   FMid3AMD = 7,
   UMid3AMD = 8,
   SMid3AMD = 9
 };

 enum AmdGcnShader { CubeFaceCoordAMD = 2, CubeFaceIndexAMD = 1, TimeAMD = 3 };

 analysis::Type* GetUIntType(IRContext* ctx) {
   analysis::Integer int_type(32, false);
   return ctx->get_type_mgr()->GetRegisteredType(&int_type);
 }

 bool NotImplementedYet(IRContext*, Instruction*,
                        const std::vector<const analysis::Constant*>&) {
   assert(false && "Not implemented.");
   return false;
 }

 // Returns a folding rule that replaces |op(a,b,c)| by |op(op(a,b),c)|, where
 // |op| is either min or max. |opcode| is the binary opcode in the GLSLstd450
 // extended instruction set that corresponds to the trinary instruction being
 // replaced.
 template <GLSLstd450 opcode>
 bool ReplaceTrinaryMinMax(IRContext* ctx, Instruction* inst,
                           const std::vector<const analysis::Constant*>&) {
   uint32_t glsl405_ext_inst_id =
       ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   if (glsl405_ext_inst_id == 0) {
     ctx->AddExtInstImport("GLSL.std.450");
     glsl405_ext_inst_id =
         ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   }

   InstructionBuilder ir_builder(
       ctx, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);

   uint32_t op1 = inst->GetSingleWordInOperand(2);
   uint32_t op2 = inst->GetSingleWordInOperand(3);
   uint32_t op3 = inst->GetSingleWordInOperand(4);

   Instruction* temp = ir_builder.AddNaryExtendedInstruction(
       inst->type_id(), glsl405_ext_inst_id, opcode, {op1, op2});

   Instruction::OperandList new_operands;
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {glsl405_ext_inst_id}});
   new_operands.push_back({SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER,
                           {static_cast<uint32_t>(opcode)}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {temp->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {op3}});

   inst->SetInOperands(std::move(new_operands));
   ctx->UpdateDefUse(inst);
   return true;
 }

 // Returns a folding rule that replaces |mid(a,b,c)| by |clamp(a, min(b,c),
 // max(b,c)|. The three parameters are the opcode that correspond to the min,
 // max, and clamp operations for the type of the instruction being replaced.
 template <GLSLstd450 min_opcode, GLSLstd450 max_opcode, GLSLstd450 clamp_opcode>
 bool ReplaceTrinaryMid(IRContext* ctx, Instruction* inst,
                        const std::vector<const analysis::Constant*>&) {
   uint32_t glsl405_ext_inst_id =
       ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   if (glsl405_ext_inst_id == 0) {
     ctx->AddExtInstImport("GLSL.std.450");
     glsl405_ext_inst_id =
         ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   }

   InstructionBuilder ir_builder(
       ctx, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);

   uint32_t op1 = inst->GetSingleWordInOperand(2);
   uint32_t op2 = inst->GetSingleWordInOperand(3);
   uint32_t op3 = inst->GetSingleWordInOperand(4);

   Instruction* min = ir_builder.AddNaryExtendedInstruction(
       inst->type_id(), glsl405_ext_inst_id, static_cast<uint32_t>(min_opcode),
       {op2, op3});
   Instruction* max = ir_builder.AddNaryExtendedInstruction(
       inst->type_id(), glsl405_ext_inst_id, static_cast<uint32_t>(max_opcode),
       {op2, op3});

   Instruction::OperandList new_operands;
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {glsl405_ext_inst_id}});
   new_operands.push_back({SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER,
                           {static_cast<uint32_t>(clamp_opcode)}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {op1}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {min->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {max->result_id()}});

   inst->SetInOperands(std::move(new_operands));
   ctx->UpdateDefUse(inst);
   return true;
 }

 // Returns a folding rule that will replace the opcode with |opcode| and add
 // the capabilities required.  The folding rule assumes it is folding an
 // OpGroup*NonUniformAMD instruction from the SPV_AMD_shader_ballot extension.
 template <SpvOp new_opcode>
 bool ReplaceGroupNonuniformOperationOpCode(
     IRContext* ctx, Instruction* inst,
     const std::vector<const analysis::Constant*>&) {
   switch (new_opcode) {
     case SpvOpGroupNonUniformIAdd:
     case SpvOpGroupNonUniformFAdd:
     case SpvOpGroupNonUniformUMin:
     case SpvOpGroupNonUniformSMin:
     case SpvOpGroupNonUniformFMin:
     case SpvOpGroupNonUniformUMax:
     case SpvOpGroupNonUniformSMax:
     case SpvOpGroupNonUniformFMax:
       break;
     default:
       assert(
           false &&
           "Should be replacing with a group non uniform arithmetic operation.");
   }

   switch (inst->opcode()) {
     case SpvOpGroupIAddNonUniformAMD:
     case SpvOpGroupFAddNonUniformAMD:
     case SpvOpGroupUMinNonUniformAMD:
     case SpvOpGroupSMinNonUniformAMD:
     case SpvOpGroupFMinNonUniformAMD:
     case SpvOpGroupUMaxNonUniformAMD:
     case SpvOpGroupSMaxNonUniformAMD:
     case SpvOpGroupFMaxNonUniformAMD:
       break;
     default:
       assert(false &&
              "Should be replacing a group non uniform arithmetic operation.");
   }

   ctx->AddCapability(SpvCapabilityGroupNonUniformArithmetic);
   inst->SetOpcode(new_opcode);
   return true;
 }

 // Returns a folding rule that will replace the SwizzleInvocationsAMD extended
 // instruction in the SPV_AMD_shader_ballot extension.
 //
 // The instruction
 //
 //  %offset = OpConstantComposite %v3uint %x %y %z %w
 //  %result = OpExtInst %type %1 SwizzleInvocationsAMD %data %offset
 //
 // is replaced with
 //
 // potentially new constants and types
 //
 // clang-format off
 //         %uint_max = OpConstant %uint 0xFFFFFFFF
 //           %v4uint = OpTypeVector %uint 4
 //     %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max
 //             %null = OpConstantNull %type
 // clang-format on
 //
 // and the following code in the function body
 //
 // clang-format off
 //         %id = OpLoad %uint %SubgroupLocalInvocationId
 //   %quad_idx = OpBitwiseAnd %uint %id %uint_3
 //   %quad_ldr = OpBitwiseXor %uint %id %quad_idx
 //  %my_offset = OpVectorExtractDynamic %uint %offset %quad_idx
 // %target_inv = OpIAdd %uint %quad_ldr %my_offset
 //  %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv
 //    %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv
 //     %result = OpSelect %type %is_active %shuffle %null
 // clang-format on
 //
 // Also adding the capabilities and builtins that are needed.
 bool ReplaceSwizzleInvocations(IRContext* ctx, Instruction* inst,
                                const std::vector<const analysis::Constant*>&) {
   analysis::TypeManager* type_mgr = ctx->get_type_mgr();
   analysis::ConstantManager* const_mgr = ctx->get_constant_mgr();

   ctx->AddExtension("SPV_KHR_shader_ballot");
   ctx->AddCapability(SpvCapabilityGroupNonUniformBallot);
   ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle);

   InstructionBuilder ir_builder(
       ctx, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);

   uint32_t data_id = inst->GetSingleWordInOperand(2);
   uint32_t offset_id = inst->GetSingleWordInOperand(3);

   // Get the subgroup invocation id.
   uint32_t var_id =
       ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId);
   assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable.");
   Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id);
   Instruction* var_ptr_type =
       ctx->get_def_use_mgr()->GetDef(var_inst->type_id());
   uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1);

   Instruction* id = ir_builder.AddLoad(uint_type_id, var_id);

   uint32_t quad_mask = ir_builder.GetUintConstantId(3);

   // This gives the offset in the group of 4 of this invocation.
   Instruction* quad_idx = ir_builder.AddBinaryOp(uint_type_id, SpvOpBitwiseAnd,
                                                  id->result_id(), quad_mask);

   // Get the invocation id of the first invocation in the group of 4.
   Instruction* quad_ldr = ir_builder.AddBinaryOp(
       uint_type_id, SpvOpBitwiseXor, id->result_id(), quad_idx->result_id());

   // Get the offset of the target invocation from the offset vector.
   Instruction* my_offset =
       ir_builder.AddBinaryOp(uint_type_id, SpvOpVectorExtractDynamic, offset_id,
                              quad_idx->result_id());

   // Determine the index of the invocation to read from.
   Instruction* target_inv = ir_builder.AddBinaryOp(
       uint_type_id, SpvOpIAdd, quad_ldr->result_id(), my_offset->result_id());

   // Do the group operations
   uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF);
   uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup);
   const auto* ballot_value_const = const_mgr->GetConstant(
       type_mgr->GetUIntVectorType(4),
       {uint_max_id, uint_max_id, uint_max_id, uint_max_id});
   Instruction* ballot_value =
       const_mgr->GetDefiningInstruction(ballot_value_const);
   Instruction* is_active = ir_builder.AddNaryOp(
       type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract,
       {subgroup_scope, ballot_value->result_id(), target_inv->result_id()});
   Instruction* shuffle =
       ir_builder.AddNaryOp(inst->type_id(), SpvOpGroupNonUniformShuffle,
                            {subgroup_scope, data_id, target_inv->result_id()});

   // Create the null constant to use in the select.
   const auto* null = const_mgr->GetConstant(type_mgr->GetType(inst->type_id()),
                                             std::vector<uint32_t>());
   Instruction* null_inst = const_mgr->GetDefiningInstruction(null);

   // Build the select.
   inst->SetOpcode(SpvOpSelect);
   Instruction::OperandList new_operands;
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}});

   inst->SetInOperands(std::move(new_operands));
   ctx->UpdateDefUse(inst);
   return true;
 }

 // Returns a folding rule that will replace the SwizzleInvocationsMaskedAMD
 // extended instruction in the SPV_AMD_shader_ballot extension.
 //
 // The instruction
 //
 //    %mask = OpConstantComposite %v3uint %uint_x %uint_y %uint_z
 //  %result = OpExtInst %uint %1 SwizzleInvocationsMaskedAMD %data %mask
 //
 // is replaced with
 //
 // potentially new constants and types
 //
 // clang-format off
 // %uint_mask_extend = OpConstant %uint 0xFFFFFFE0
 //         %uint_max = OpConstant %uint 0xFFFFFFFF
 //           %v4uint = OpTypeVector %uint 4
 //     %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max
 // clang-format on
 //
 // and the following code in the function body
 //
 // clang-format off
 //         %id = OpLoad %uint %SubgroupLocalInvocationId
 //   %and_mask = OpBitwiseOr %uint %uint_x %uint_mask_extend
 //        %and = OpBitwiseAnd %uint %id %and_mask
 //         %or = OpBitwiseOr %uint %and %uint_y
 // %target_inv = OpBitwiseXor %uint %or %uint_z
 //  %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv
 //    %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv
 //     %result = OpSelect %type %is_active %shuffle %uint_0
 // clang-format on
 //
 // Also adding the capabilities and builtins that are needed.
 bool ReplaceSwizzleInvocationsMasked(
     IRContext* ctx, Instruction* inst,
     const std::vector<const analysis::Constant*>&) {
   analysis::TypeManager* type_mgr = ctx->get_type_mgr();
   analysis::DefUseManager* def_use_mgr = ctx->get_def_use_mgr();
   analysis::ConstantManager* const_mgr = ctx->get_constant_mgr();

   ctx->AddCapability(SpvCapabilityGroupNonUniformBallot);
   ctx->AddCapability(SpvCapabilityGroupNonUniformShuffle);

   InstructionBuilder ir_builder(
       ctx, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);

   // Get the operands to inst, and the components of the mask
   uint32_t data_id = inst->GetSingleWordInOperand(2);

   Instruction* mask_inst = def_use_mgr->GetDef(inst->GetSingleWordInOperand(3));
   assert(mask_inst->opcode() == SpvOpConstantComposite &&
          "The mask is suppose to be a vector constant.");
   assert(mask_inst->NumInOperands() == 3 &&
          "The mask is suppose to have 3 components.");

   uint32_t uint_x = mask_inst->GetSingleWordInOperand(0);
   uint32_t uint_y = mask_inst->GetSingleWordInOperand(1);
   uint32_t uint_z = mask_inst->GetSingleWordInOperand(2);

   // Get the subgroup invocation id.
   uint32_t var_id =
       ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId);
   ctx->AddExtension("SPV_KHR_shader_ballot");
   assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable.");
   Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id);
   Instruction* var_ptr_type =
       ctx->get_def_use_mgr()->GetDef(var_inst->type_id());
   uint32_t uint_type_id = var_ptr_type->GetSingleWordInOperand(1);

   Instruction* id = ir_builder.AddLoad(uint_type_id, var_id);

   // Do the bitwise operations.
   uint32_t mask_extended = ir_builder.GetUintConstantId(0xFFFFFFE0);
   Instruction* and_mask = ir_builder.AddBinaryOp(uint_type_id, SpvOpBitwiseOr,
                                                  uint_x, mask_extended);
   Instruction* and_result = ir_builder.AddBinaryOp(
       uint_type_id, SpvOpBitwiseAnd, id->result_id(), and_mask->result_id());
   Instruction* or_result = ir_builder.AddBinaryOp(
       uint_type_id, SpvOpBitwiseOr, and_result->result_id(), uint_y);
   Instruction* target_inv = ir_builder.AddBinaryOp(
       uint_type_id, SpvOpBitwiseXor, or_result->result_id(), uint_z);

   // Do the group operations
   uint32_t uint_max_id = ir_builder.GetUintConstantId(0xFFFFFFFF);
   uint32_t subgroup_scope = ir_builder.GetUintConstantId(SpvScopeSubgroup);
   const auto* ballot_value_const = const_mgr->GetConstant(
       type_mgr->GetUIntVectorType(4),
       {uint_max_id, uint_max_id, uint_max_id, uint_max_id});
   Instruction* ballot_value =
       const_mgr->GetDefiningInstruction(ballot_value_const);
   Instruction* is_active = ir_builder.AddNaryOp(
       type_mgr->GetBoolTypeId(), SpvOpGroupNonUniformBallotBitExtract,
       {subgroup_scope, ballot_value->result_id(), target_inv->result_id()});
   Instruction* shuffle =
       ir_builder.AddNaryOp(inst->type_id(), SpvOpGroupNonUniformShuffle,
                            {subgroup_scope, data_id, target_inv->result_id()});

   // Create the null constant to use in the select.
   const auto* null = const_mgr->GetConstant(type_mgr->GetType(inst->type_id()),
                                             std::vector<uint32_t>());
   Instruction* null_inst = const_mgr->GetDefiningInstruction(null);

   // Build the select.
   inst->SetOpcode(SpvOpSelect);
   Instruction::OperandList new_operands;
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_active->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {shuffle->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {null_inst->result_id()}});

   inst->SetInOperands(std::move(new_operands));
   ctx->UpdateDefUse(inst);
   return true;
 }

 // Returns a folding rule that will replace the WriteInvocationAMD extended
 // instruction in the SPV_AMD_shader_ballot extension.
 //
 // The instruction
 //
 // clang-format off
 //    %result = OpExtInst %type %1 WriteInvocationAMD %input_value %write_value %invocation_index
 // clang-format on
 //
 // with
 //
 //     %id = OpLoad %uint %SubgroupLocalInvocationId
 //    %cmp = OpIEqual %bool %id %invocation_index
 // %result = OpSelect %type %cmp %write_value %input_value
 //
 // Also adding the capabilities and builtins that are needed.
 bool ReplaceWriteInvocation(IRContext* ctx, Instruction* inst,
                             const std::vector<const analysis::Constant*>&) {
   uint32_t var_id =
       ctx->GetBuiltinInputVarId(SpvBuiltInSubgroupLocalInvocationId);
   ctx->AddCapability(SpvCapabilitySubgroupBallotKHR);
   ctx->AddExtension("SPV_KHR_shader_ballot");
   assert(var_id != 0 && "Could not get SubgroupLocalInvocationId variable.");
   Instruction* var_inst = ctx->get_def_use_mgr()->GetDef(var_id);
   Instruction* var_ptr_type =
       ctx->get_def_use_mgr()->GetDef(var_inst->type_id());

   InstructionBuilder ir_builder(
       ctx, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   Instruction* t =
       ir_builder.AddLoad(var_ptr_type->GetSingleWordInOperand(1), var_id);
   analysis::Bool bool_type;
   uint32_t bool_type_id = ctx->get_type_mgr()->GetTypeInstruction(&bool_type);
   Instruction* cmp =
       ir_builder.AddBinaryOp(bool_type_id, SpvOpIEqual, t->result_id(),
                              inst->GetSingleWordInOperand(4));

   // Build a select.
   inst->SetOpcode(SpvOpSelect);
   Instruction::OperandList new_operands;
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {cmp->result_id()}});
   new_operands.push_back(inst->GetInOperand(3));
   new_operands.push_back(inst->GetInOperand(2));

   inst->SetInOperands(std::move(new_operands));
   ctx->UpdateDefUse(inst);
   return true;
 }

 // Returns a folding rule that will replace the MbcntAMD extended instruction in
 // the SPV_AMD_shader_ballot extension.
 //
 // The instruction
 //
 //  %result = OpExtInst %uint %1 MbcntAMD %mask
 //
 // with
 //
 // Get SubgroupLtMask and convert the first 64-bits into a uint64_t because
 // AMD's shader compiler expects a 64-bit integer mask.
 //
 //     %var = OpLoad %v4uint %SubgroupLtMaskKHR
 // %shuffle = OpVectorShuffle %v2uint %var %var 0 1
 //    %cast = OpBitcast %ulong %shuffle
 //
 // Perform the mask and count the bits.
 //
 //     %and = OpBitwiseAnd %ulong %cast %mask
 //  %result = OpBitCount %uint %and
 //
 // Also adding the capabilities and builtins that are needed.
 bool ReplaceMbcnt(IRContext* context, Instruction* inst,
                   const std::vector<const analysis::Constant*>&) {
   analysis::TypeManager* type_mgr = context->get_type_mgr();
   analysis::DefUseManager* def_use_mgr = context->get_def_use_mgr();

   uint32_t var_id = context->GetBuiltinInputVarId(SpvBuiltInSubgroupLtMask);
   assert(var_id != 0 && "Could not get SubgroupLtMask variable.");
   context->AddCapability(SpvCapabilityGroupNonUniformBallot);
   Instruction* var_inst = def_use_mgr->GetDef(var_id);
   Instruction* var_ptr_type = def_use_mgr->GetDef(var_inst->type_id());
   Instruction* var_type =
       def_use_mgr->GetDef(var_ptr_type->GetSingleWordInOperand(1));
   assert(var_type->opcode() == SpvOpTypeVector &&
          "Variable is suppose to be a vector of 4 ints");

   // Get the type for the shuffle.
   analysis::Vector temp_type(GetUIntType(context), 2);
   const analysis::Type* shuffle_type =
       context->get_type_mgr()->GetRegisteredType(&temp_type);
   uint32_t shuffle_type_id = type_mgr->GetTypeInstruction(shuffle_type);

   uint32_t mask_id = inst->GetSingleWordInOperand(2);
   Instruction* mask_inst = def_use_mgr->GetDef(mask_id);

   // Testing with amd's shader compiler shows that a 64-bit mask is expected.
   assert(type_mgr->GetType(mask_inst->type_id())->AsInteger() != nullptr);
   assert(type_mgr->GetType(mask_inst->type_id())->AsInteger()->width() == 64);

   InstructionBuilder ir_builder(
       context, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   Instruction* load = ir_builder.AddLoad(var_type->result_id(), var_id);
   Instruction* shuffle = ir_builder.AddVectorShuffle(
       shuffle_type_id, load->result_id(), load->result_id(), {0, 1});
   Instruction* bitcast = ir_builder.AddUnaryOp(
       mask_inst->type_id(), SpvOpBitcast, shuffle->result_id());
   Instruction* t = ir_builder.AddBinaryOp(mask_inst->type_id(), SpvOpBitwiseAnd,
                                           bitcast->result_id(), mask_id);

   inst->SetOpcode(SpvOpBitCount);
   inst->SetInOperands({{SPV_OPERAND_TYPE_ID, {t->result_id()}}});
   context->UpdateDefUse(inst);
   return true;
 }

 // A folding rule that will replace the CubeFaceCoordAMD extended
 // instruction in the SPV_AMD_gcn_shader_ballot.  Returns true if the folding is
 // successful.
 //
 // The instruction
 //
 //  %result = OpExtInst %v2float %1 CubeFaceCoordAMD %input
 //
 // with
 //
 //             %x = OpCompositeExtract %float %input 0
 //             %y = OpCompositeExtract %float %input 1
 //             %z = OpCompositeExtract %float %input 2
 //            %nx = OpFNegate %float %x
 //            %ny = OpFNegate %float %y
 //            %nz = OpFNegate %float %z
 //            %ax = OpExtInst %float %n_1 FAbs %x
 //            %ay = OpExtInst %float %n_1 FAbs %y
 //            %az = OpExtInst %float %n_1 FAbs %z
 //      %amax_x_y = OpExtInst %float %n_1 FMax %ay %ax
 //          %amax = OpExtInst %float %n_1 FMax %az %amax_x_y
 //        %cubema = OpFMul %float %float_2 %amax
 //      %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y
 //  %not_is_z_max = OpLogicalNot %bool %is_z_max
 //        %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax
 //      %is_y_max = OpLogicalAnd %bool %not_is_z_max %y_gt_x
 //      %is_z_neg = OpFOrdLessThan %bool %z %float_0
 // %cubesc_case_1 = OpSelect %float %is_z_neg %nx %x
 //      %is_x_neg = OpFOrdLessThan %bool %x %float_0
 // %cubesc_case_2 = OpSelect %float %is_x_neg %z %nz
 //           %sel = OpSelect %float %is_y_max %x %cubesc_case_2
 //        %cubesc = OpSelect %float %is_z_max %cubesc_case_1 %sel
 //      %is_y_neg = OpFOrdLessThan %bool %y %float_0
 // %cubetc_case_1 = OpSelect %float %is_y_neg %nz %z
 //        %cubetc = OpSelect %float %is_y_max %cubetc_case_1 %ny
 //          %cube = OpCompositeConstruct %v2float %cubesc %cubetc
 //         %denom = OpCompositeConstruct %v2float %cubema %cubema
 //           %div = OpFDiv %v2float %cube %denom
 //        %result = OpFAdd %v2float %div %const
 //
 // Also adding the capabilities and builtins that are needed.
 bool ReplaceCubeFaceCoord(IRContext* ctx, Instruction* inst,
                           const std::vector<const analysis::Constant*>&) {
   analysis::TypeManager* type_mgr = ctx->get_type_mgr();
   analysis::ConstantManager* const_mgr = ctx->get_constant_mgr();

   uint32_t float_type_id = type_mgr->GetFloatTypeId();
   const analysis::Type* v2_float_type = type_mgr->GetFloatVectorType(2);
   uint32_t v2_float_type_id = type_mgr->GetId(v2_float_type);
   uint32_t bool_id = type_mgr->GetBoolTypeId();

   InstructionBuilder ir_builder(
       ctx, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);

   uint32_t input_id = inst->GetSingleWordInOperand(2);
   uint32_t glsl405_ext_inst_id =
       ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   if (glsl405_ext_inst_id == 0) {
     ctx->AddExtInstImport("GLSL.std.450");
     glsl405_ext_inst_id =
         ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   }

   // Get the constants that will be used.
   uint32_t f0_const_id = const_mgr->GetFloatConst(0.0);
   uint32_t f2_const_id = const_mgr->GetFloatConst(2.0);
   uint32_t f0_5_const_id = const_mgr->GetFloatConst(0.5);
   const analysis::Constant* vec_const =
       const_mgr->GetConstant(v2_float_type, {f0_5_const_id, f0_5_const_id});
   uint32_t vec_const_id =
       const_mgr->GetDefiningInstruction(vec_const)->result_id();

   // Extract the input values.
   Instruction* x = ir_builder.AddCompositeExtract(float_type_id, input_id, {0});
   Instruction* y = ir_builder.AddCompositeExtract(float_type_id, input_id, {1});
   Instruction* z = ir_builder.AddCompositeExtract(float_type_id, input_id, {2});

   // Negate the input values.
   Instruction* nx =
       ir_builder.AddUnaryOp(float_type_id, SpvOpFNegate, x->result_id());
   Instruction* ny =
       ir_builder.AddUnaryOp(float_type_id, SpvOpFNegate, y->result_id());
   Instruction* nz =
       ir_builder.AddUnaryOp(float_type_id, SpvOpFNegate, z->result_id());

   // Get the abolsute values of the inputs.
   Instruction* ax = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {x->result_id()});
   Instruction* ay = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {y->result_id()});
   Instruction* az = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {z->result_id()});

   // Find which values are negative.  Used in later computations.
   Instruction* is_z_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan,
                                                  z->result_id(), f0_const_id);
   Instruction* is_y_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan,
                                                  y->result_id(), f0_const_id);
   Instruction* is_x_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan,
                                                  x->result_id(), f0_const_id);

   // Compute cubema
   Instruction* amax_x_y = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FMax,
       {ax->result_id(), ay->result_id()});
   Instruction* amax = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FMax,
       {az->result_id(), amax_x_y->result_id()});
   Instruction* cubema = ir_builder.AddBinaryOp(float_type_id, SpvOpFMul,
                                                f2_const_id, amax->result_id());

   // Do the comparisons needed for computing cubesc and cubetc.
   Instruction* is_z_max =
       ir_builder.AddBinaryOp(bool_id, SpvOpFOrdGreaterThanEqual,
                              az->result_id(), amax_x_y->result_id());
   Instruction* not_is_z_max =
       ir_builder.AddUnaryOp(bool_id, SpvOpLogicalNot, is_z_max->result_id());
   Instruction* y_gr_x = ir_builder.AddBinaryOp(
       bool_id, SpvOpFOrdGreaterThanEqual, ay->result_id(), ax->result_id());
   Instruction* is_y_max = ir_builder.AddBinaryOp(
       bool_id, SpvOpLogicalAnd, not_is_z_max->result_id(), y_gr_x->result_id());

   // Select the correct value for cubesc.
   Instruction* cubesc_case_1 = ir_builder.AddSelect(
       float_type_id, is_z_neg->result_id(), nx->result_id(), x->result_id());
   Instruction* cubesc_case_2 = ir_builder.AddSelect(
       float_type_id, is_x_neg->result_id(), z->result_id(), nz->result_id());
   Instruction* sel =
       ir_builder.AddSelect(float_type_id, is_y_max->result_id(), x->result_id(),
                            cubesc_case_2->result_id());
   Instruction* cubesc =
       ir_builder.AddSelect(float_type_id, is_z_max->result_id(),
                            cubesc_case_1->result_id(), sel->result_id());

   // Select the correct value for cubetc.
   Instruction* cubetc_case_1 = ir_builder.AddSelect(
       float_type_id, is_y_neg->result_id(), nz->result_id(), z->result_id());
   Instruction* cubetc =
       ir_builder.AddSelect(float_type_id, is_y_max->result_id(),
                            cubetc_case_1->result_id(), ny->result_id());

   // Do the division
   Instruction* cube = ir_builder.AddCompositeConstruct(
       v2_float_type_id, {cubesc->result_id(), cubetc->result_id()});
   Instruction* denom = ir_builder.AddCompositeConstruct(
       v2_float_type_id, {cubema->result_id(), cubema->result_id()});
   Instruction* div = ir_builder.AddBinaryOp(
       v2_float_type_id, SpvOpFDiv, cube->result_id(), denom->result_id());

   // Get the final result by adding 0.5 to |div|.
   inst->SetOpcode(SpvOpFAdd);
   Instruction::OperandList new_operands;
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {div->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {vec_const_id}});

   inst->SetInOperands(std::move(new_operands));
   ctx->UpdateDefUse(inst);
   return true;
 }

 // A folding rule that will replace the CubeFaceCoordAMD extended
 // instruction in the SPV_AMD_gcn_shader_ballot.  Returns true if the folding
 // is successful.
 //
 // The instruction
 //
 //  %result = OpExtInst %v2float %1 CubeFaceCoordAMD %input
 //
 // with
 //
 //             %x = OpCompositeExtract %float %input 0
 //             %y = OpCompositeExtract %float %input 1
 //             %z = OpCompositeExtract %float %input 2
 //            %ax = OpExtInst %float %n_1 FAbs %x
 //            %ay = OpExtInst %float %n_1 FAbs %y
 //            %az = OpExtInst %float %n_1 FAbs %z
 //      %is_z_neg = OpFOrdLessThan %bool %z %float_0
 //      %is_y_neg = OpFOrdLessThan %bool %y %float_0
 //      %is_x_neg = OpFOrdLessThan %bool %x %float_0
 //      %amax_x_y = OpExtInst %float %n_1 FMax %ay %ax
 //      %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y
 //        %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax
 //        %case_z = OpSelect %float %is_z_neg %float_5 %float4
 //        %case_y = OpSelect %float %is_y_neg %float_3 %float2
 //        %case_x = OpSelect %float %is_x_neg %float_1 %float0
 //           %sel = OpSelect %float %y_gt_x %case_y %case_x
 //        %result = OpSelect %float %is_z_max %case_z %sel
 //
 // Also adding the capabilities and builtins that are needed.
 bool ReplaceCubeFaceIndex(IRContext* ctx, Instruction* inst,
                           const std::vector<const analysis::Constant*>&) {
   analysis::TypeManager* type_mgr = ctx->get_type_mgr();
   analysis::ConstantManager* const_mgr = ctx->get_constant_mgr();

   uint32_t float_type_id = type_mgr->GetFloatTypeId();
   uint32_t bool_id = type_mgr->GetBoolTypeId();

   InstructionBuilder ir_builder(
       ctx, inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);

   uint32_t input_id = inst->GetSingleWordInOperand(2);
   uint32_t glsl405_ext_inst_id =
       ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   if (glsl405_ext_inst_id == 0) {
     ctx->AddExtInstImport("GLSL.std.450");
     glsl405_ext_inst_id =
         ctx->get_feature_mgr()->GetExtInstImportId_GLSLstd450();
   }

   // Get the constants that will be used.
   uint32_t f0_const_id = const_mgr->GetFloatConst(0.0);
   uint32_t f1_const_id = const_mgr->GetFloatConst(1.0);
   uint32_t f2_const_id = const_mgr->GetFloatConst(2.0);
   uint32_t f3_const_id = const_mgr->GetFloatConst(3.0);
   uint32_t f4_const_id = const_mgr->GetFloatConst(4.0);
   uint32_t f5_const_id = const_mgr->GetFloatConst(5.0);

   // Extract the input values.
   Instruction* x = ir_builder.AddCompositeExtract(float_type_id, input_id, {0});
   Instruction* y = ir_builder.AddCompositeExtract(float_type_id, input_id, {1});
   Instruction* z = ir_builder.AddCompositeExtract(float_type_id, input_id, {2});

   // Get the absolute values of the inputs.
   Instruction* ax = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {x->result_id()});
   Instruction* ay = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {y->result_id()});
   Instruction* az = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FAbs, {z->result_id()});

   // Find which values are negative.  Used in later computations.
   Instruction* is_z_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan,
                                                  z->result_id(), f0_const_id);
   Instruction* is_y_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan,
                                                  y->result_id(), f0_const_id);
   Instruction* is_x_neg = ir_builder.AddBinaryOp(bool_id, SpvOpFOrdLessThan,
                                                  x->result_id(), f0_const_id);

   // Find the max value.
   Instruction* amax_x_y = ir_builder.AddNaryExtendedInstruction(
       float_type_id, glsl405_ext_inst_id, GLSLstd450FMax,
       {ax->result_id(), ay->result_id()});
   Instruction* is_z_max =
       ir_builder.AddBinaryOp(bool_id, SpvOpFOrdGreaterThanEqual,
                              az->result_id(), amax_x_y->result_id());
   Instruction* y_gr_x = ir_builder.AddBinaryOp(
       bool_id, SpvOpFOrdGreaterThanEqual, ay->result_id(), ax->result_id());

   // Get the value for each case.
   Instruction* case_z = ir_builder.AddSelect(
       float_type_id, is_z_neg->result_id(), f5_const_id, f4_const_id);
   Instruction* case_y = ir_builder.AddSelect(
       float_type_id, is_y_neg->result_id(), f3_const_id, f2_const_id);
   Instruction* case_x = ir_builder.AddSelect(
       float_type_id, is_x_neg->result_id(), f1_const_id, f0_const_id);

   // Select the correct case.
   Instruction* sel =
       ir_builder.AddSelect(float_type_id, y_gr_x->result_id(),
                            case_y->result_id(), case_x->result_id());

   // Get the final result by adding 0.5 to |div|.
   inst->SetOpcode(SpvOpSelect);
   Instruction::OperandList new_operands;
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {is_z_max->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {case_z->result_id()}});
   new_operands.push_back({SPV_OPERAND_TYPE_ID, {sel->result_id()}});

   inst->SetInOperands(std::move(new_operands));
   ctx->UpdateDefUse(inst);
   return true;
 }

 class AmdExtFoldingRules : public FoldingRules {
  public:
   explicit AmdExtFoldingRules(IRContext* ctx) : FoldingRules(ctx) {}

  protected:
   virtual void AddFoldingRules() override {
     rules_[SpvOpGroupIAddNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformIAdd>);
     rules_[SpvOpGroupFAddNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformFAdd>);
     rules_[SpvOpGroupUMinNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformUMin>);
     rules_[SpvOpGroupSMinNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformSMin>);
     rules_[SpvOpGroupFMinNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformFMin>);
     rules_[SpvOpGroupUMaxNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformUMax>);
     rules_[SpvOpGroupSMaxNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformSMax>);
     rules_[SpvOpGroupFMaxNonUniformAMD].push_back(
         ReplaceGroupNonuniformOperationOpCode<SpvOpGroupNonUniformFMax>);

     uint32_t extension_id =
         context()->module()->GetExtInstImportId("SPV_AMD_shader_ballot");

     if (extension_id != 0) {
       ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsAMD}]
           .push_back(ReplaceSwizzleInvocations);
       ext_rules_[{extension_id, AmdShaderBallotSwizzleInvocationsMaskedAMD}]
           .push_back(ReplaceSwizzleInvocationsMasked);
       ext_rules_[{extension_id, AmdShaderBallotWriteInvocationAMD}].push_back(
           ReplaceWriteInvocation);
       ext_rules_[{extension_id, AmdShaderBallotMbcntAMD}].push_back(
           ReplaceMbcnt);
     }

     extension_id = context()->module()->GetExtInstImportId(
         "SPV_AMD_shader_trinary_minmax");

     if (extension_id != 0) {
       ext_rules_[{extension_id, FMin3AMD}].push_back(
           ReplaceTrinaryMinMax<GLSLstd450FMin>);
       ext_rules_[{extension_id, UMin3AMD}].push_back(
           ReplaceTrinaryMinMax<GLSLstd450UMin>);
       ext_rules_[{extension_id, SMin3AMD}].push_back(
           ReplaceTrinaryMinMax<GLSLstd450SMin>);
       ext_rules_[{extension_id, FMax3AMD}].push_back(
           ReplaceTrinaryMinMax<GLSLstd450FMax>);
       ext_rules_[{extension_id, UMax3AMD}].push_back(
           ReplaceTrinaryMinMax<GLSLstd450UMax>);
       ext_rules_[{extension_id, SMax3AMD}].push_back(
           ReplaceTrinaryMinMax<GLSLstd450SMax>);
       ext_rules_[{extension_id, FMid3AMD}].push_back(
           ReplaceTrinaryMid<GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp>);
       ext_rules_[{extension_id, UMid3AMD}].push_back(
           ReplaceTrinaryMid<GLSLstd450UMin, GLSLstd450UMax, GLSLstd450UClamp>);
       ext_rules_[{extension_id, SMid3AMD}].push_back(
           ReplaceTrinaryMid<GLSLstd450SMin, GLSLstd450SMax, GLSLstd450SClamp>);
     }

     extension_id =
         context()->module()->GetExtInstImportId("SPV_AMD_gcn_shader");

     if (extension_id != 0) {
       ext_rules_[{extension_id, CubeFaceCoordAMD}].push_back(
           ReplaceCubeFaceCoord);
       ext_rules_[{extension_id, CubeFaceIndexAMD}].push_back(
           ReplaceCubeFaceIndex);
       ext_rules_[{extension_id, TimeAMD}].push_back(NotImplementedYet);
     }
   }
 };

 class AmdExtConstFoldingRules : public ConstantFoldingRules {
  public:
   AmdExtConstFoldingRules(IRContext* ctx) : ConstantFoldingRules(ctx) {}

  protected:
   virtual void AddFoldingRules() override {}
 };

 }  // namespace

 Pass::Status AmdExtensionToKhrPass::Process() {
   bool changed = false;

   // Traverse the body of the functions to replace instructions that require
   // the extensions.
   InstructionFolder folder(
       context(),
       std::unique_ptr<AmdExtFoldingRules>(new AmdExtFoldingRules(context())),
       MakeUnique<AmdExtConstFoldingRules>(context()));
   for (Function& func : *get_module()) {
     func.ForEachInst([&changed, &folder](Instruction* inst) {
       if (folder.FoldInstruction(inst)) {
         changed = true;
       }
     });
   }

   // Now that instruction that require the extensions have been removed, we can
   // remove the extension instructions.
   std::set<std::string> ext_to_remove = {"SPV_AMD_shader_ballot",
                                          "SPV_AMD_shader_trinary_minmax",
                                          "SPV_AMD_gcn_shader"};

   std::vector<Instruction*> to_be_killed;
   for (Instruction& inst : context()->module()->extensions()) {
     if (inst.opcode() == SpvOpExtension) {
       if (ext_to_remove.count(reinterpret_cast<const char*>(
               &(inst.GetInOperand(0).words[0]))) != 0) {
         to_be_killed.push_back(&inst);
       }
     }
   }

   for (Instruction& inst : context()->ext_inst_imports()) {
     if (inst.opcode() == SpvOpExtInstImport) {
       if (ext_to_remove.count(reinterpret_cast<const char*>(
               &(inst.GetInOperand(0).words[0]))) != 0) {
         to_be_killed.push_back(&inst);
       }
     }
   }

   for (Instruction* inst : to_be_killed) {
     context()->KillInst(inst);
     changed = true;
   }

   // The replacements that take place use instructions that are missing before
   // SPIR-V 1.3. If we changed something, we will have to make sure the version
   // is at least SPIR-V 1.3 to make sure those instruction can be used.
   if (changed) {
     uint32_t version = get_module()->version();
     if (version < 0x00010300 /*1.3*/) {
       get_module()->set_version(0x00010300);
     }
   }
   return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
 }

 }  // namespace opt
 }  // namespace spvtools