source/opt/convert_to_half_pass.cpp - external/github.com/KhronosGroup/SPIRV-Tools - Git at Google

 // Copyright (c) 2019 The Khronos Group Inc.
 // Copyright (c) 2019 Valve Corporation
 // Copyright (c) 2019 LunarG Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "convert_to_half_pass.h"

 #include "source/opt/ir_builder.h"

 namespace spvtools {
 namespace opt {
 namespace {
 // Indices of operands in SPIR-V instructions
 constexpr int kImageSampleDrefIdInIdx = 2;
 }  // namespace

 bool ConvertToHalfPass::IsArithmetic(Instruction* inst) {
   return target_ops_core_.count(inst->opcode()) != 0 ||
          (inst->opcode() == spv::Op::OpExtInst &&
           inst->GetSingleWordInOperand(0) ==
               context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
           target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0);
 }

 bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
   uint32_t ty_id = inst->type_id();
   if (ty_id == 0) return false;
   return Pass::IsFloat(ty_id, width);
 }

 bool ConvertToHalfPass::IsDecoratedRelaxed(Instruction* inst) {
   uint32_t r_id = inst->result_id();
   for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
     if (r_inst->opcode() == spv::Op::OpDecorate &&
         spv::Decoration(r_inst->GetSingleWordInOperand(1)) ==
             spv::Decoration::RelaxedPrecision) {
       return true;
     }
   return false;
 }

 bool ConvertToHalfPass::IsRelaxed(uint32_t id) {
   return relaxed_ids_set_.count(id) > 0;
 }

 void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); }

 analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
   analysis::Float float_ty(width);
   return context()->get_type_mgr()->GetRegisteredType(&float_ty);
 }

 analysis::Type* ConvertToHalfPass::FloatVectorType(uint32_t v_len,
                                                    uint32_t width) {
   analysis::Type* reg_float_ty = FloatScalarType(width);
   analysis::Vector vec_ty(reg_float_ty, v_len);
   return context()->get_type_mgr()->GetRegisteredType(&vec_ty);
 }

 analysis::Type* ConvertToHalfPass::FloatMatrixType(uint32_t v_cnt,
                                                    uint32_t vty_id,
                                                    uint32_t width) {
   Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
   uint32_t v_len = vty_inst->GetSingleWordInOperand(1);
   analysis::Type* reg_vec_ty = FloatVectorType(v_len, width);
   analysis::Matrix mat_ty(reg_vec_ty, v_cnt);
   return context()->get_type_mgr()->GetRegisteredType(&mat_ty);
 }

 uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
   analysis::Type* reg_equiv_ty;
   Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id);
   if (ty_inst->opcode() == spv::Op::OpTypeMatrix)
     reg_equiv_ty = FloatMatrixType(ty_inst->GetSingleWordInOperand(1),
                                    ty_inst->GetSingleWordInOperand(0), width);
   else if (ty_inst->opcode() == spv::Op::OpTypeVector)
     reg_equiv_ty = FloatVectorType(ty_inst->GetSingleWordInOperand(1), width);
   else  // spv::Op::OpTypeFloat
     reg_equiv_ty = FloatScalarType(width);
   return context()->get_type_mgr()->GetTypeInstruction(reg_equiv_ty);
 }

 void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
                                    Instruction* inst) {
   Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
   uint32_t ty_id = val_inst->type_id();
   uint32_t nty_id = EquivFloatTypeId(ty_id, width);
   if (nty_id == ty_id) return;
   Instruction* cvt_inst;
   InstructionBuilder builder(
       context(), inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   if (val_inst->opcode() == spv::Op::OpUndef)
     cvt_inst = builder.AddNullaryOp(nty_id, spv::Op::OpUndef);
   else
     cvt_inst = builder.AddUnaryOp(nty_id, spv::Op::OpFConvert, *val_idp);
   *val_idp = cvt_inst->result_id();
 }

 bool ConvertToHalfPass::MatConvertCleanup(Instruction* inst) {
   if (inst->opcode() != spv::Op::OpFConvert) return false;
   uint32_t mty_id = inst->type_id();
   Instruction* mty_inst = get_def_use_mgr()->GetDef(mty_id);
   if (mty_inst->opcode() != spv::Op::OpTypeMatrix) return false;
   uint32_t vty_id = mty_inst->GetSingleWordInOperand(0);
   uint32_t v_cnt = mty_inst->GetSingleWordInOperand(1);
   Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
   uint32_t cty_id = vty_inst->GetSingleWordInOperand(0);
   Instruction* cty_inst = get_def_use_mgr()->GetDef(cty_id);
   InstructionBuilder builder(
       context(), inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   // Convert each component vector, combine them with OpCompositeConstruct
   // and replace original instruction.
   uint32_t orig_width = (cty_inst->GetSingleWordInOperand(0) == 16) ? 32 : 16;
   uint32_t orig_mat_id = inst->GetSingleWordInOperand(0);
   uint32_t orig_vty_id = EquivFloatTypeId(vty_id, orig_width);
   std::vector<Operand> opnds = {};
   for (uint32_t vidx = 0; vidx < v_cnt; ++vidx) {
     Instruction* ext_inst = builder.AddIdLiteralOp(
         orig_vty_id, spv::Op::OpCompositeExtract, orig_mat_id, vidx);
     Instruction* cvt_inst =
         builder.AddUnaryOp(vty_id, spv::Op::OpFConvert, ext_inst->result_id());
     opnds.push_back({SPV_OPERAND_TYPE_ID, {cvt_inst->result_id()}});
   }
   uint32_t mat_id = TakeNextId();
   std::unique_ptr<Instruction> mat_inst(new Instruction(
       context(), spv::Op::OpCompositeConstruct, mty_id, mat_id, opnds));
   (void)builder.AddInstruction(std::move(mat_inst));
   context()->ReplaceAllUsesWith(inst->result_id(), mat_id);
   // Turn original instruction into copy so it is valid.
   inst->SetOpcode(spv::Op::OpCopyObject);
   inst->SetResultType(EquivFloatTypeId(mty_id, orig_width));
   get_def_use_mgr()->AnalyzeInstUse(inst);
   return true;
 }

 bool ConvertToHalfPass::RemoveRelaxedDecoration(uint32_t id) {
   return context()->get_decoration_mgr()->RemoveDecorationsFrom(
       id, [](const Instruction& dec) {
         if (dec.opcode() == spv::Op::OpDecorate &&
             spv::Decoration(dec.GetSingleWordInOperand(1u)) ==
                 spv::Decoration::RelaxedPrecision) {
           return true;
         } else
           return false;
       });
 }

 bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
   bool modified = false;
   // Convert all float32 based operands to float16 equivalent and change
   // instruction type to float16 equivalent.
   inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
     Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
     if (!IsFloat(op_inst, 32)) return;
     GenConvert(idp, 16, inst);
     modified = true;
   });
   if (IsFloat(inst, 32)) {
     inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
     converted_ids_.insert(inst->result_id());
     modified = true;
   }
   if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
   return modified;
 }

 bool ConvertToHalfPass::ProcessPhi(Instruction* inst, uint32_t from_width,
                                    uint32_t to_width) {
   // Add converts of any float operands to to_width if they are of from_width.
   // If converting to 16, change type of phi to float16 equivalent and remember
   // result id. Converts need to be added to preceding blocks.
   uint32_t ocnt = 0;
   uint32_t* prev_idp;
   bool modified = false;
   inst->ForEachInId([&ocnt, &prev_idp, &from_width, &to_width, &modified,
                      this](uint32_t* idp) {
     if (ocnt % 2 == 0) {
       prev_idp = idp;
     } else {
       Instruction* val_inst = get_def_use_mgr()->GetDef(*prev_idp);
       if (IsFloat(val_inst, from_width)) {
         BasicBlock* bp = context()->get_instr_block(*idp);
         auto insert_before = bp->tail();
         if (insert_before != bp->begin()) {
           --insert_before;
           if (insert_before->opcode() != spv::Op::OpSelectionMerge &&
               insert_before->opcode() != spv::Op::OpLoopMerge)
             ++insert_before;
         }
         GenConvert(prev_idp, to_width, &*insert_before);
         modified = true;
       }
     }
     ++ocnt;
   });
   if (to_width == 16u) {
     inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16u));
     converted_ids_.insert(inst->result_id());
     modified = true;
   }
   if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
   return modified;
 }

 bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
   // If float32 and relaxed, change to float16 convert
   if (IsFloat(inst, 32) && IsRelaxed(inst->result_id())) {
     inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
     get_def_use_mgr()->AnalyzeInstUse(inst);
     converted_ids_.insert(inst->result_id());
   }
   // If operand and result types are the same, change FConvert to CopyObject to
   // keep validator happy; simplification and DCE will clean it up
   // One way this can happen is if an FConvert generated during this pass
   // (likely by ProcessPhi) is later encountered here and its operand has been
   // changed to half.
   uint32_t val_id = inst->GetSingleWordInOperand(0);
   Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
   if (inst->type_id() == val_inst->type_id())
     inst->SetOpcode(spv::Op::OpCopyObject);
   return true;  // modified
 }

 bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
   bool modified = false;
   // If image reference, only need to convert dref args back to float32
   if (dref_image_ops_.count(inst->opcode()) != 0) {
     uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
     if (converted_ids_.count(dref_id) > 0) {
       GenConvert(&dref_id, 32, inst);
       inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
       get_def_use_mgr()->AnalyzeInstUse(inst);
       modified = true;
     }
   }
   return modified;
 }

 bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
   // If non-relaxed instruction has changed operands, need to convert
   // them back to float32
   if (inst->opcode() == spv::Op::OpPhi) return ProcessPhi(inst, 16u, 32u);
   bool modified = false;
   inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
     if (converted_ids_.count(*idp) == 0) return;
     uint32_t old_id = *idp;
     GenConvert(idp, 32, inst);
     if (*idp != old_id) modified = true;
   });
   if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
   return modified;
 }

 bool ConvertToHalfPass::GenHalfInst(Instruction* inst) {
   bool modified = false;
   // Remember id for later deletion of RelaxedPrecision decoration
   bool inst_relaxed = IsRelaxed(inst->result_id());
   if (IsArithmetic(inst) && inst_relaxed)
     modified = GenHalfArith(inst);
   else if (inst->opcode() == spv::Op::OpPhi && inst_relaxed)
     modified = ProcessPhi(inst, 32u, 16u);
   else if (inst->opcode() == spv::Op::OpFConvert)
     modified = ProcessConvert(inst);
   else if (image_ops_.count(inst->opcode()) != 0)
     modified = ProcessImageRef(inst);
   else
     modified = ProcessDefault(inst);
   return modified;
 }

 bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) {
   if (inst->result_id() == 0) return false;
   if (IsRelaxed(inst->result_id())) return false;
   if (!IsFloat(inst, 32)) return false;
   if (IsDecoratedRelaxed(inst)) {
     AddRelaxed(inst->result_id());
     return true;
   }
   if (closure_ops_.count(inst->opcode()) == 0) return false;
   // Can relax if all float operands are relaxed
   bool relax = true;
   inst->ForEachInId([&relax, this](uint32_t* idp) {
     Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
     if (!IsFloat(op_inst, 32)) return;
     if (!IsRelaxed(*idp)) relax = false;
   });
   if (relax) {
     AddRelaxed(inst->result_id());
     return true;
   }
   // Can relax if all uses are relaxed
   relax = true;
   get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) {
     if (uinst->result_id() == 0 || !IsFloat(uinst, 32) ||
         (!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) {
       relax = false;
       return;
     }
   });
   if (relax) {
     AddRelaxed(inst->result_id());
     return true;
   }
   return false;
 }

 bool ConvertToHalfPass::ProcessFunction(Function* func) {
   // Do a closure of Relaxed on composite and phi instructions
   bool changed = true;
   while (changed) {
     changed = false;
     cfg()->ForEachBlockInReversePostOrder(
         func->entry().get(), [&changed, this](BasicBlock* bb) {
           for (auto ii = bb->begin(); ii != bb->end(); ++ii)
             changed |= CloseRelaxInst(&*ii);
         });
   }
   // Do convert of relaxed instructions to half precision
   bool modified = false;
   cfg()->ForEachBlockInReversePostOrder(
       func->entry().get(), [&modified, this](BasicBlock* bb) {
         for (auto ii = bb->begin(); ii != bb->end(); ++ii)
           modified |= GenHalfInst(&*ii);
       });
   // Replace invalid converts of matrix into equivalent vector extracts,
   // converts and finally a composite construct
   cfg()->ForEachBlockInReversePostOrder(
       func->entry().get(), [&modified, this](BasicBlock* bb) {
         for (auto ii = bb->begin(); ii != bb->end(); ++ii)
           modified |= MatConvertCleanup(&*ii);
       });
   return modified;
 }

 Pass::Status ConvertToHalfPass::ProcessImpl() {
   Pass::ProcessFunction pfn = [this](Function* fp) {
     return ProcessFunction(fp);
   };
   bool modified = context()->ProcessReachableCallTree(pfn);
   // If modified, make sure module has Float16 capability
   if (modified) context()->AddCapability(spv::Capability::Float16);
   // Remove all RelaxedPrecision decorations from instructions and globals
   for (auto c_id : relaxed_ids_set_) {
     modified |= RemoveRelaxedDecoration(c_id);
   }
   for (auto& val : get_module()->types_values()) {
     uint32_t v_id = val.result_id();
     if (v_id != 0) {
       modified |= RemoveRelaxedDecoration(v_id);
     }
   }
   return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
 }

 Pass::Status ConvertToHalfPass::Process() {
   Initialize();
   return ProcessImpl();
 }

 void ConvertToHalfPass::Initialize() {
   target_ops_core_ = {
       spv::Op::OpVectorExtractDynamic,
       spv::Op::OpVectorInsertDynamic,
       spv::Op::OpVectorShuffle,
       spv::Op::OpCompositeConstruct,
       spv::Op::OpCompositeInsert,
       spv::Op::OpCompositeExtract,
       spv::Op::OpCopyObject,
       spv::Op::OpTranspose,
       spv::Op::OpConvertSToF,
       spv::Op::OpConvertUToF,
       // spv::Op::OpFConvert,
       // spv::Op::OpQuantizeToF16,
       spv::Op::OpFNegate,
       spv::Op::OpFAdd,
       spv::Op::OpFSub,
       spv::Op::OpFMul,
       spv::Op::OpFDiv,
       spv::Op::OpFMod,
       spv::Op::OpVectorTimesScalar,
       spv::Op::OpMatrixTimesScalar,
       spv::Op::OpVectorTimesMatrix,
       spv::Op::OpMatrixTimesVector,
       spv::Op::OpMatrixTimesMatrix,
       spv::Op::OpOuterProduct,
       spv::Op::OpDot,
       spv::Op::OpSelect,
       spv::Op::OpFOrdEqual,
       spv::Op::OpFUnordEqual,
       spv::Op::OpFOrdNotEqual,
       spv::Op::OpFUnordNotEqual,
       spv::Op::OpFOrdLessThan,
       spv::Op::OpFUnordLessThan,
       spv::Op::OpFOrdGreaterThan,
       spv::Op::OpFUnordGreaterThan,
       spv::Op::OpFOrdLessThanEqual,
       spv::Op::OpFUnordLessThanEqual,
       spv::Op::OpFOrdGreaterThanEqual,
       spv::Op::OpFUnordGreaterThanEqual,
   };
   target_ops_450_ = {
       GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs,
       GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract,
       GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos,
       GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan,
       GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh,
       GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow,
       GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2,
       GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant,
       GLSLstd450MatrixInverse,
       // TODO(greg-lunarg): GLSLstd450ModfStruct,
       GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix,
       GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma,
       // TODO(greg-lunarg): GLSLstd450FrexpStruct,
       GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross,
       GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect,
       GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp};
   image_ops_ = {spv::Op::OpImageSampleImplicitLod,
                 spv::Op::OpImageSampleExplicitLod,
                 spv::Op::OpImageSampleDrefImplicitLod,
                 spv::Op::OpImageSampleDrefExplicitLod,
                 spv::Op::OpImageSampleProjImplicitLod,
                 spv::Op::OpImageSampleProjExplicitLod,
                 spv::Op::OpImageSampleProjDrefImplicitLod,
                 spv::Op::OpImageSampleProjDrefExplicitLod,
                 spv::Op::OpImageFetch,
                 spv::Op::OpImageGather,
                 spv::Op::OpImageDrefGather,
                 spv::Op::OpImageRead,
                 spv::Op::OpImageSparseSampleImplicitLod,
                 spv::Op::OpImageSparseSampleExplicitLod,
                 spv::Op::OpImageSparseSampleDrefImplicitLod,
                 spv::Op::OpImageSparseSampleDrefExplicitLod,
                 spv::Op::OpImageSparseSampleProjImplicitLod,
                 spv::Op::OpImageSparseSampleProjExplicitLod,
                 spv::Op::OpImageSparseSampleProjDrefImplicitLod,
                 spv::Op::OpImageSparseSampleProjDrefExplicitLod,
                 spv::Op::OpImageSparseFetch,
                 spv::Op::OpImageSparseGather,
                 spv::Op::OpImageSparseDrefGather,
                 spv::Op::OpImageSparseTexelsResident,
                 spv::Op::OpImageSparseRead};
   dref_image_ops_ = {
       spv::Op::OpImageSampleDrefImplicitLod,
       spv::Op::OpImageSampleDrefExplicitLod,
       spv::Op::OpImageSampleProjDrefImplicitLod,
       spv::Op::OpImageSampleProjDrefExplicitLod,
       spv::Op::OpImageDrefGather,
       spv::Op::OpImageSparseSampleDrefImplicitLod,
       spv::Op::OpImageSparseSampleDrefExplicitLod,
       spv::Op::OpImageSparseSampleProjDrefImplicitLod,
       spv::Op::OpImageSparseSampleProjDrefExplicitLod,
       spv::Op::OpImageSparseDrefGather,
   };
   closure_ops_ = {
       spv::Op::OpVectorExtractDynamic,
       spv::Op::OpVectorInsertDynamic,
       spv::Op::OpVectorShuffle,
       spv::Op::OpCompositeConstruct,
       spv::Op::OpCompositeInsert,
       spv::Op::OpCompositeExtract,
       spv::Op::OpCopyObject,
       spv::Op::OpTranspose,
       spv::Op::OpPhi,
   };
   relaxed_ids_set_.clear();
   converted_ids_.clear();
 }

 }  // namespace opt
 }  // namespace spvtools
	// Copyright (c) 2019 The Khronos Group Inc.
	// Copyright (c) 2019 Valve Corporation
	// Copyright (c) 2019 LunarG Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "convert_to_half_pass.h"

	#include "source/opt/ir_builder.h"

	namespace spvtools {
	namespace opt {
	namespace {
	// Indices of operands in SPIR-V instructions
	constexpr int kImageSampleDrefIdInIdx = 2;
	} // namespace

	bool ConvertToHalfPass::IsArithmetic(Instruction* inst) {
	return target_ops_core_.count(inst->opcode()) != 0 \|\|
	(inst->opcode() == spv::Op::OpExtInst &&
	inst->GetSingleWordInOperand(0) ==
	context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
	target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0);
	}

	bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
	uint32_t ty_id = inst->type_id();
	if (ty_id == 0) return false;
	return Pass::IsFloat(ty_id, width);
	}

	bool ConvertToHalfPass::IsDecoratedRelaxed(Instruction* inst) {
	uint32_t r_id = inst->result_id();
	for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
	if (r_inst->opcode() == spv::Op::OpDecorate &&
	spv::Decoration(r_inst->GetSingleWordInOperand(1)) ==
	spv::Decoration::RelaxedPrecision) {
	return true;
	}
	return false;
	}

	bool ConvertToHalfPass::IsRelaxed(uint32_t id) {
	return relaxed_ids_set_.count(id) > 0;
	}

	void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); }

	analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
	analysis::Float float_ty(width);
	return context()->get_type_mgr()->GetRegisteredType(&float_ty);
	}

	analysis::Type* ConvertToHalfPass::FloatVectorType(uint32_t v_len,
	uint32_t width) {
	analysis::Type* reg_float_ty = FloatScalarType(width);
	analysis::Vector vec_ty(reg_float_ty, v_len);
	return context()->get_type_mgr()->GetRegisteredType(&vec_ty);
	}

	analysis::Type* ConvertToHalfPass::FloatMatrixType(uint32_t v_cnt,
	uint32_t vty_id,
	uint32_t width) {
	Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
	uint32_t v_len = vty_inst->GetSingleWordInOperand(1);
	analysis::Type* reg_vec_ty = FloatVectorType(v_len, width);
	analysis::Matrix mat_ty(reg_vec_ty, v_cnt);
	return context()->get_type_mgr()->GetRegisteredType(&mat_ty);
	}

	uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
	analysis::Type* reg_equiv_ty;
	Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id);
	if (ty_inst->opcode() == spv::Op::OpTypeMatrix)
	reg_equiv_ty = FloatMatrixType(ty_inst->GetSingleWordInOperand(1),
	ty_inst->GetSingleWordInOperand(0), width);
	else if (ty_inst->opcode() == spv::Op::OpTypeVector)
	reg_equiv_ty = FloatVectorType(ty_inst->GetSingleWordInOperand(1), width);
	else // spv::Op::OpTypeFloat
	reg_equiv_ty = FloatScalarType(width);
	return context()->get_type_mgr()->GetTypeInstruction(reg_equiv_ty);
	}

	void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
	Instruction* inst) {
	Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
	uint32_t ty_id = val_inst->type_id();
	uint32_t nty_id = EquivFloatTypeId(ty_id, width);
	if (nty_id == ty_id) return;
	Instruction* cvt_inst;
	InstructionBuilder builder(
	context(), inst,
	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
	if (val_inst->opcode() == spv::Op::OpUndef)
	cvt_inst = builder.AddNullaryOp(nty_id, spv::Op::OpUndef);
	else
	cvt_inst = builder.AddUnaryOp(nty_id, spv::Op::OpFConvert, *val_idp);
	*val_idp = cvt_inst->result_id();
	}

	bool ConvertToHalfPass::MatConvertCleanup(Instruction* inst) {
	if (inst->opcode() != spv::Op::OpFConvert) return false;
	uint32_t mty_id = inst->type_id();
	Instruction* mty_inst = get_def_use_mgr()->GetDef(mty_id);
	if (mty_inst->opcode() != spv::Op::OpTypeMatrix) return false;
	uint32_t vty_id = mty_inst->GetSingleWordInOperand(0);
	uint32_t v_cnt = mty_inst->GetSingleWordInOperand(1);
	Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
	uint32_t cty_id = vty_inst->GetSingleWordInOperand(0);
	Instruction* cty_inst = get_def_use_mgr()->GetDef(cty_id);
	InstructionBuilder builder(
	context(), inst,
	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
	// Convert each component vector, combine them with OpCompositeConstruct
	// and replace original instruction.
	uint32_t orig_width = (cty_inst->GetSingleWordInOperand(0) == 16) ? 32 : 16;
	uint32_t orig_mat_id = inst->GetSingleWordInOperand(0);
	uint32_t orig_vty_id = EquivFloatTypeId(vty_id, orig_width);
	std::vector<Operand> opnds = {};
	for (uint32_t vidx = 0; vidx < v_cnt; ++vidx) {
	Instruction* ext_inst = builder.AddIdLiteralOp(
	orig_vty_id, spv::Op::OpCompositeExtract, orig_mat_id, vidx);
	Instruction* cvt_inst =
	builder.AddUnaryOp(vty_id, spv::Op::OpFConvert, ext_inst->result_id());
	opnds.push_back({SPV_OPERAND_TYPE_ID, {cvt_inst->result_id()}});
	}
	uint32_t mat_id = TakeNextId();
	std::unique_ptr<Instruction> mat_inst(new Instruction(
	context(), spv::Op::OpCompositeConstruct, mty_id, mat_id, opnds));
	(void)builder.AddInstruction(std::move(mat_inst));
	context()->ReplaceAllUsesWith(inst->result_id(), mat_id);
	// Turn original instruction into copy so it is valid.
	inst->SetOpcode(spv::Op::OpCopyObject);
	inst->SetResultType(EquivFloatTypeId(mty_id, orig_width));
	get_def_use_mgr()->AnalyzeInstUse(inst);
	return true;
	}

	bool ConvertToHalfPass::RemoveRelaxedDecoration(uint32_t id) {
	return context()->get_decoration_mgr()->RemoveDecorationsFrom(
	id, [](const Instruction& dec) {
	if (dec.opcode() == spv::Op::OpDecorate &&
	spv::Decoration(dec.GetSingleWordInOperand(1u)) ==
	spv::Decoration::RelaxedPrecision) {
	return true;
	} else
	return false;
	});
	}

	bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
	bool modified = false;
	// Convert all float32 based operands to float16 equivalent and change
	// instruction type to float16 equivalent.
	inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
	Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
	if (!IsFloat(op_inst, 32)) return;
	GenConvert(idp, 16, inst);
	modified = true;
	});
	if (IsFloat(inst, 32)) {
	inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
	converted_ids_.insert(inst->result_id());
	modified = true;
	}
	if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
	return modified;
	}

	bool ConvertToHalfPass::ProcessPhi(Instruction* inst, uint32_t from_width,
	uint32_t to_width) {
	// Add converts of any float operands to to_width if they are of from_width.
	// If converting to 16, change type of phi to float16 equivalent and remember
	// result id. Converts need to be added to preceding blocks.
	uint32_t ocnt = 0;
	uint32_t* prev_idp;
	bool modified = false;
	inst->ForEachInId([&ocnt, &prev_idp, &from_width, &to_width, &modified,
	this](uint32_t* idp) {
	if (ocnt % 2 == 0) {
	prev_idp = idp;
	} else {
	Instruction* val_inst = get_def_use_mgr()->GetDef(*prev_idp);
	if (IsFloat(val_inst, from_width)) {
	BasicBlock* bp = context()->get_instr_block(*idp);
	auto insert_before = bp->tail();
	if (insert_before != bp->begin()) {
	--insert_before;
	if (insert_before->opcode() != spv::Op::OpSelectionMerge &&
	insert_before->opcode() != spv::Op::OpLoopMerge)
	++insert_before;
	}
	GenConvert(prev_idp, to_width, &*insert_before);
	modified = true;
	}
	}
	++ocnt;
	});
	if (to_width == 16u) {
	inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16u));
	converted_ids_.insert(inst->result_id());
	modified = true;
	}
	if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
	return modified;
	}

	bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
	// If float32 and relaxed, change to float16 convert
	if (IsFloat(inst, 32) && IsRelaxed(inst->result_id())) {
	inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
	get_def_use_mgr()->AnalyzeInstUse(inst);
	converted_ids_.insert(inst->result_id());
	}
	// If operand and result types are the same, change FConvert to CopyObject to
	// keep validator happy; simplification and DCE will clean it up
	// One way this can happen is if an FConvert generated during this pass
	// (likely by ProcessPhi) is later encountered here and its operand has been
	// changed to half.
	uint32_t val_id = inst->GetSingleWordInOperand(0);
	Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
	if (inst->type_id() == val_inst->type_id())
	inst->SetOpcode(spv::Op::OpCopyObject);
	return true; // modified
	}

	bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
	bool modified = false;
	// If image reference, only need to convert dref args back to float32
	if (dref_image_ops_.count(inst->opcode()) != 0) {
	uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
	if (converted_ids_.count(dref_id) > 0) {
	GenConvert(&dref_id, 32, inst);
	inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
	get_def_use_mgr()->AnalyzeInstUse(inst);
	modified = true;
	}
	}
	return modified;
	}

	bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
	// If non-relaxed instruction has changed operands, need to convert
	// them back to float32
	if (inst->opcode() == spv::Op::OpPhi) return ProcessPhi(inst, 16u, 32u);
	bool modified = false;
	inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
	if (converted_ids_.count(*idp) == 0) return;
	uint32_t old_id = *idp;
	GenConvert(idp, 32, inst);
	if (*idp != old_id) modified = true;
	});
	if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
	return modified;
	}

	bool ConvertToHalfPass::GenHalfInst(Instruction* inst) {
	bool modified = false;
	// Remember id for later deletion of RelaxedPrecision decoration
	bool inst_relaxed = IsRelaxed(inst->result_id());
	if (IsArithmetic(inst) && inst_relaxed)
	modified = GenHalfArith(inst);
	else if (inst->opcode() == spv::Op::OpPhi && inst_relaxed)
	modified = ProcessPhi(inst, 32u, 16u);
	else if (inst->opcode() == spv::Op::OpFConvert)
	modified = ProcessConvert(inst);
	else if (image_ops_.count(inst->opcode()) != 0)
	modified = ProcessImageRef(inst);
	else
	modified = ProcessDefault(inst);
	return modified;
	}

	bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) {
	if (inst->result_id() == 0) return false;
	if (IsRelaxed(inst->result_id())) return false;
	if (!IsFloat(inst, 32)) return false;
	if (IsDecoratedRelaxed(inst)) {
	AddRelaxed(inst->result_id());
	return true;
	}
	if (closure_ops_.count(inst->opcode()) == 0) return false;
	// Can relax if all float operands are relaxed
	bool relax = true;
	inst->ForEachInId([&relax, this](uint32_t* idp) {
	Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
	if (!IsFloat(op_inst, 32)) return;
	if (!IsRelaxed(*idp)) relax = false;
	});
	if (relax) {
	AddRelaxed(inst->result_id());
	return true;
	}
	// Can relax if all uses are relaxed
	relax = true;
	get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) {
	if (uinst->result_id() == 0 \|\| !IsFloat(uinst, 32) \|\|
	(!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) {
	relax = false;
	return;
	}
	});
	if (relax) {
	AddRelaxed(inst->result_id());
	return true;
	}
	return false;
	}

	bool ConvertToHalfPass::ProcessFunction(Function* func) {
	// Do a closure of Relaxed on composite and phi instructions
	bool changed = true;
	while (changed) {
	changed = false;
	cfg()->ForEachBlockInReversePostOrder(
	func->entry().get(), [&changed, this](BasicBlock* bb) {
	for (auto ii = bb->begin(); ii != bb->end(); ++ii)
	changed \|= CloseRelaxInst(&*ii);
	});
	}
	// Do convert of relaxed instructions to half precision
	bool modified = false;
	cfg()->ForEachBlockInReversePostOrder(
	func->entry().get(), [&modified, this](BasicBlock* bb) {
	for (auto ii = bb->begin(); ii != bb->end(); ++ii)
	modified \|= GenHalfInst(&*ii);
	});
	// Replace invalid converts of matrix into equivalent vector extracts,
	// converts and finally a composite construct
	cfg()->ForEachBlockInReversePostOrder(
	func->entry().get(), [&modified, this](BasicBlock* bb) {
	for (auto ii = bb->begin(); ii != bb->end(); ++ii)
	modified \|= MatConvertCleanup(&*ii);
	});
	return modified;
	}

	Pass::Status ConvertToHalfPass::ProcessImpl() {
	Pass::ProcessFunction pfn = [this](Function* fp) {
	return ProcessFunction(fp);
	};
	bool modified = context()->ProcessReachableCallTree(pfn);
	// If modified, make sure module has Float16 capability
	if (modified) context()->AddCapability(spv::Capability::Float16);
	// Remove all RelaxedPrecision decorations from instructions and globals
	for (auto c_id : relaxed_ids_set_) {
	modified \|= RemoveRelaxedDecoration(c_id);
	}
	for (auto& val : get_module()->types_values()) {
	uint32_t v_id = val.result_id();
	if (v_id != 0) {
	modified \|= RemoveRelaxedDecoration(v_id);
	}
	}
	return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
	}

	Pass::Status ConvertToHalfPass::Process() {
	Initialize();
	return ProcessImpl();
	}

	void ConvertToHalfPass::Initialize() {
	target_ops_core_ = {
	spv::Op::OpVectorExtractDynamic,
	spv::Op::OpVectorInsertDynamic,
	spv::Op::OpVectorShuffle,
	spv::Op::OpCompositeConstruct,
	spv::Op::OpCompositeInsert,
	spv::Op::OpCompositeExtract,
	spv::Op::OpCopyObject,
	spv::Op::OpTranspose,
	spv::Op::OpConvertSToF,
	spv::Op::OpConvertUToF,
	// spv::Op::OpFConvert,
	// spv::Op::OpQuantizeToF16,
	spv::Op::OpFNegate,
	spv::Op::OpFAdd,
	spv::Op::OpFSub,
	spv::Op::OpFMul,
	spv::Op::OpFDiv,
	spv::Op::OpFMod,
	spv::Op::OpVectorTimesScalar,
	spv::Op::OpMatrixTimesScalar,
	spv::Op::OpVectorTimesMatrix,
	spv::Op::OpMatrixTimesVector,
	spv::Op::OpMatrixTimesMatrix,
	spv::Op::OpOuterProduct,
	spv::Op::OpDot,
	spv::Op::OpSelect,
	spv::Op::OpFOrdEqual,
	spv::Op::OpFUnordEqual,
	spv::Op::OpFOrdNotEqual,
	spv::Op::OpFUnordNotEqual,
	spv::Op::OpFOrdLessThan,
	spv::Op::OpFUnordLessThan,
	spv::Op::OpFOrdGreaterThan,
	spv::Op::OpFUnordGreaterThan,
	spv::Op::OpFOrdLessThanEqual,
	spv::Op::OpFUnordLessThanEqual,
	spv::Op::OpFOrdGreaterThanEqual,
	spv::Op::OpFUnordGreaterThanEqual,
	};
	target_ops_450_ = {
	GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs,
	GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract,
	GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos,
	GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan,
	GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh,
	GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow,
	GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2,
	GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant,
	GLSLstd450MatrixInverse,
	// TODO(greg-lunarg): GLSLstd450ModfStruct,
	GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix,
	GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma,
	// TODO(greg-lunarg): GLSLstd450FrexpStruct,
	GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross,
	GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect,
	GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp};
	image_ops_ = {spv::Op::OpImageSampleImplicitLod,
	spv::Op::OpImageSampleExplicitLod,
	spv::Op::OpImageSampleDrefImplicitLod,
	spv::Op::OpImageSampleDrefExplicitLod,
	spv::Op::OpImageSampleProjImplicitLod,
	spv::Op::OpImageSampleProjExplicitLod,
	spv::Op::OpImageSampleProjDrefImplicitLod,
	spv::Op::OpImageSampleProjDrefExplicitLod,
	spv::Op::OpImageFetch,
	spv::Op::OpImageGather,
	spv::Op::OpImageDrefGather,
	spv::Op::OpImageRead,
	spv::Op::OpImageSparseSampleImplicitLod,
	spv::Op::OpImageSparseSampleExplicitLod,
	spv::Op::OpImageSparseSampleDrefImplicitLod,
	spv::Op::OpImageSparseSampleDrefExplicitLod,
	spv::Op::OpImageSparseSampleProjImplicitLod,
	spv::Op::OpImageSparseSampleProjExplicitLod,
	spv::Op::OpImageSparseSampleProjDrefImplicitLod,
	spv::Op::OpImageSparseSampleProjDrefExplicitLod,
	spv::Op::OpImageSparseFetch,
	spv::Op::OpImageSparseGather,
	spv::Op::OpImageSparseDrefGather,
	spv::Op::OpImageSparseTexelsResident,
	spv::Op::OpImageSparseRead};
	dref_image_ops_ = {
	spv::Op::OpImageSampleDrefImplicitLod,
	spv::Op::OpImageSampleDrefExplicitLod,
	spv::Op::OpImageSampleProjDrefImplicitLod,
	spv::Op::OpImageSampleProjDrefExplicitLod,
	spv::Op::OpImageDrefGather,
	spv::Op::OpImageSparseSampleDrefImplicitLod,
	spv::Op::OpImageSparseSampleDrefExplicitLod,
	spv::Op::OpImageSparseSampleProjDrefImplicitLod,
	spv::Op::OpImageSparseSampleProjDrefExplicitLod,
	spv::Op::OpImageSparseDrefGather,
	};
	closure_ops_ = {
	spv::Op::OpVectorExtractDynamic,
	spv::Op::OpVectorInsertDynamic,
	spv::Op::OpVectorShuffle,
	spv::Op::OpCompositeConstruct,
	spv::Op::OpCompositeInsert,
	spv::Op::OpCompositeExtract,
	spv::Op::OpCopyObject,
	spv::Op::OpTranspose,
	spv::Op::OpPhi,
	};
	relaxed_ids_set_.clear();
	converted_ids_.clear();
	}

	} // namespace opt
	} // namespace spvtools