|  | // Copyright (c) 2018 Google LLC. | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | #include "source/opt/loop_fission.h" | 
|  |  | 
|  | #include <set> | 
|  |  | 
|  | #include "source/opt/register_pressure.h" | 
|  |  | 
|  | // Implement loop fission with an optional parameter to split only | 
|  | // if the register pressure in a given loop meets a certain criteria. This is | 
|  | // controlled via the constructors of LoopFissionPass. | 
|  | // | 
|  | // 1 - Build a list of loops to be split, these are top level loops (loops | 
|  | // without child loops themselves) which meet the register pressure criteria, as | 
|  | // determined by the ShouldSplitLoop method of LoopFissionPass. | 
|  | // | 
|  | // 2 - For each loop in the list, group each instruction into a set of related | 
|  | // instructions by traversing each instructions users and operands recursively. | 
|  | // We stop if we encounter an instruction we have seen before or an instruction | 
|  | // which we don't consider relevant (i.e OpLoopMerge). We then group these | 
|  | // groups into two different sets, one for the first loop and one for the | 
|  | // second. | 
|  | // | 
|  | // 3 - We then run CanPerformSplit to check that it would be legal to split a | 
|  | // loop using those two sets. We check that we haven't altered the relative | 
|  | // order load/stores appear in the binary and that we aren't breaking any | 
|  | // dependency between load/stores by splitting them into two loops. We also | 
|  | // check that none of the OpBranch instructions are dependent on a load as we | 
|  | // leave control flow structure intact and move only instructions in the body so | 
|  | // we want to avoid any loads with side affects or aliasing. | 
|  | // | 
|  | // 4 - We then split the loop by calling SplitLoop. This function clones the | 
|  | // loop and attaches it to the preheader and connects the new loops merge block | 
|  | // to the current loop header block. We then use the two sets built in step 2 to | 
|  | // remove instructions from each loop. If an instruction appears in the first | 
|  | // set it is removed from the second loop and vice versa. | 
|  | // | 
|  | // 5 - If the multiple split passes flag is set we check if each of the loops | 
|  | // still meet the register pressure criteria. If they do then we add them to the | 
|  | // list of loops to be split (created in step one) to allow for loops to be | 
|  | // split multiple times. | 
|  | // | 
|  |  | 
|  | namespace spvtools { | 
|  | namespace opt { | 
|  |  | 
|  | class LoopFissionImpl { | 
|  | public: | 
|  | LoopFissionImpl(IRContext* context, Loop* loop) | 
|  | : context_(context), loop_(loop), load_used_in_condition_(false) {} | 
|  |  | 
|  | // Group each instruction in the loop into sets of instructions related by | 
|  | // their usedef chains. An instruction which uses another will appear in the | 
|  | // same set. Then merge those sets into just two sets. Returns false if there | 
|  | // was one or less sets created. | 
|  | bool GroupInstructionsByUseDef(); | 
|  |  | 
|  | // Check if the sets built by GroupInstructionsByUseDef violate any data | 
|  | // dependence rules. | 
|  | bool CanPerformSplit(); | 
|  |  | 
|  | // Split the loop and return a pointer to the new loop. | 
|  | Loop* SplitLoop(); | 
|  |  | 
|  | // Checks if |inst| is safe to move. We can only move instructions which don't | 
|  | // have any side effects and OpLoads and OpStores. | 
|  | bool MovableInstruction(const Instruction& inst) const; | 
|  |  | 
|  | private: | 
|  | // Traverse the def use chain of |inst| and add the users and uses of |inst| | 
|  | // which are in the same loop to the |returned_set|. | 
|  | void TraverseUseDef(Instruction* inst, std::set<Instruction*>* returned_set, | 
|  | bool ignore_phi_users = false, bool report_loads = false); | 
|  |  | 
|  | // We group the instructions in the block into two different groups, the | 
|  | // instructions to be kept in the original loop and the ones to be cloned into | 
|  | // the new loop. As the cloned loop is attached to the preheader it will be | 
|  | // the first loop and the second loop will be the original. | 
|  | std::set<Instruction*> cloned_loop_instructions_; | 
|  | std::set<Instruction*> original_loop_instructions_; | 
|  |  | 
|  | // We need a set of all the instructions to be seen so we can break any | 
|  | // recursion and also so we can ignore certain instructions by preemptively | 
|  | // adding them to this set. | 
|  | std::set<Instruction*> seen_instructions_; | 
|  |  | 
|  | // A map of instructions to their relative position in the function. | 
|  | std::map<Instruction*, size_t> instruction_order_; | 
|  |  | 
|  | IRContext* context_; | 
|  |  | 
|  | Loop* loop_; | 
|  |  | 
|  | // This is set to true by TraverseUseDef when traversing the instructions | 
|  | // related to the loop condition and any if conditions should any of those | 
|  | // instructions be a load. | 
|  | bool load_used_in_condition_; | 
|  | }; | 
|  |  | 
|  | bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const { | 
|  | return inst.opcode() == SpvOp::SpvOpLoad || | 
|  | inst.opcode() == SpvOp::SpvOpStore || | 
|  | inst.opcode() == SpvOp::SpvOpSelectionMerge || | 
|  | inst.opcode() == SpvOp::SpvOpPhi || inst.IsOpcodeCodeMotionSafe(); | 
|  | } | 
|  |  | 
|  | void LoopFissionImpl::TraverseUseDef(Instruction* inst, | 
|  | std::set<Instruction*>* returned_set, | 
|  | bool ignore_phi_users, bool report_loads) { | 
|  | assert(returned_set && "Set to be returned cannot be null."); | 
|  |  | 
|  | analysis::DefUseManager* def_use = context_->get_def_use_mgr(); | 
|  | std::set<Instruction*>& inst_set = *returned_set; | 
|  |  | 
|  | // We create this functor to traverse the use def chain to build the | 
|  | // grouping of related instructions. The lambda captures the std::function | 
|  | // to allow it to recurse. | 
|  | std::function<void(Instruction*)> traverser_functor; | 
|  | traverser_functor = [this, def_use, &inst_set, &traverser_functor, | 
|  | ignore_phi_users, report_loads](Instruction* user) { | 
|  | // If we've seen the instruction before or it is not inside the loop end the | 
|  | // traversal. | 
|  | if (!user || seen_instructions_.count(user) != 0 || | 
|  | !context_->get_instr_block(user) || | 
|  | !loop_->IsInsideLoop(context_->get_instr_block(user))) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Don't include labels or loop merge instructions in the instruction sets. | 
|  | // Including them would mean we group instructions related only by using the | 
|  | // same labels (i.e phis). We already preempt the inclusion of | 
|  | // OpSelectionMerge by adding related instructions to the seen_instructions_ | 
|  | // set. | 
|  | if (user->opcode() == SpvOp::SpvOpLoopMerge || | 
|  | user->opcode() == SpvOp::SpvOpLabel) | 
|  | return; | 
|  |  | 
|  | // If the |report_loads| flag is set, set the class field | 
|  | // load_used_in_condition_ to false. This is used to check that none of the | 
|  | // condition checks in the loop rely on loads. | 
|  | if (user->opcode() == SpvOp::SpvOpLoad && report_loads) { | 
|  | load_used_in_condition_ = true; | 
|  | } | 
|  |  | 
|  | // Add the instruction to the set of instructions already seen, this breaks | 
|  | // recursion and allows us to ignore certain instructions. | 
|  | seen_instructions_.insert(user); | 
|  |  | 
|  | inst_set.insert(user); | 
|  |  | 
|  | // Wrapper functor to traverse the operands of each instruction. | 
|  | auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) { | 
|  | traverser_functor(def_use->GetDef(*id)); | 
|  | }; | 
|  | user->ForEachInOperand(traverse_operand); | 
|  |  | 
|  | // For the first traversal we want to ignore the users of the phi. | 
|  | if (ignore_phi_users && user->opcode() == SpvOp::SpvOpPhi) return; | 
|  |  | 
|  | // Traverse each user with this lambda. | 
|  | def_use->ForEachUser(user, traverser_functor); | 
|  |  | 
|  | // Wrapper functor for the use traversal. | 
|  | auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) { | 
|  | traverser_functor(use); | 
|  | }; | 
|  | def_use->ForEachUse(user, traverse_use); | 
|  |  | 
|  | }; | 
|  |  | 
|  | // We start the traversal of the use def graph by invoking the above | 
|  | // lambda with the |inst| parameter. | 
|  | traverser_functor(inst); | 
|  | } | 
|  |  | 
|  | bool LoopFissionImpl::GroupInstructionsByUseDef() { | 
|  | std::vector<std::set<Instruction*>> sets{}; | 
|  |  | 
|  | // We want to ignore all the instructions stemming from the loop condition | 
|  | // instruction. | 
|  | BasicBlock* condition_block = loop_->FindConditionBlock(); | 
|  |  | 
|  | if (!condition_block) return false; | 
|  | Instruction* condition = &*condition_block->tail(); | 
|  |  | 
|  | // We iterate over the blocks via iterating over all the blocks in the | 
|  | // function, we do this so we are iterating in the same order which the blocks | 
|  | // appear in the binary. | 
|  | Function& function = *loop_->GetHeaderBlock()->GetParent(); | 
|  |  | 
|  | // Create a temporary set to ignore certain groups of instructions within the | 
|  | // loop. We don't want any instructions related to control flow to be removed | 
|  | // from either loop only instructions within the control flow bodies. | 
|  | std::set<Instruction*> instructions_to_ignore{}; | 
|  | TraverseUseDef(condition, &instructions_to_ignore, true, true); | 
|  |  | 
|  | // Traverse control flow instructions to ensure they are added to the | 
|  | // seen_instructions_ set and will be ignored when it it called with actual | 
|  | // sets. | 
|  | for (BasicBlock& block : function) { | 
|  | if (!loop_->IsInsideLoop(block.id())) continue; | 
|  |  | 
|  | for (Instruction& inst : block) { | 
|  | // Ignore all instructions related to control flow. | 
|  | if (inst.opcode() == SpvOp::SpvOpSelectionMerge || inst.IsBranch()) { | 
|  | TraverseUseDef(&inst, &instructions_to_ignore, true, true); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Traverse the instructions and generate the sets, automatically ignoring any | 
|  | // instructions in instructions_to_ignore. | 
|  | for (BasicBlock& block : function) { | 
|  | if (!loop_->IsInsideLoop(block.id()) || | 
|  | loop_->GetHeaderBlock()->id() == block.id()) | 
|  | continue; | 
|  |  | 
|  | for (Instruction& inst : block) { | 
|  | // Record the order that each load/store is seen. | 
|  | if (inst.opcode() == SpvOp::SpvOpLoad || | 
|  | inst.opcode() == SpvOp::SpvOpStore) { | 
|  | instruction_order_[&inst] = instruction_order_.size(); | 
|  | } | 
|  |  | 
|  | // Ignore instructions already seen in a traversal. | 
|  | if (seen_instructions_.count(&inst) != 0) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | // Build the set. | 
|  | std::set<Instruction*> inst_set{}; | 
|  | TraverseUseDef(&inst, &inst_set); | 
|  | if (!inst_set.empty()) sets.push_back(std::move(inst_set)); | 
|  | } | 
|  | } | 
|  |  | 
|  | // If we have one or zero sets return false to indicate that due to | 
|  | // insufficient instructions we couldn't split the loop into two groups and | 
|  | // thus the loop can't be split any further. | 
|  | if (sets.size() < 2) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Merge the loop sets into two different sets. In CanPerformSplit we will | 
|  | // validate that we don't break the relative ordering of loads/stores by doing | 
|  | // this. | 
|  | for (size_t index = 0; index < sets.size() / 2; ++index) { | 
|  | cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end()); | 
|  | } | 
|  | for (size_t index = sets.size() / 2; index < sets.size(); ++index) { | 
|  | original_loop_instructions_.insert(sets[index].begin(), sets[index].end()); | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool LoopFissionImpl::CanPerformSplit() { | 
|  | // Return false if any of the condition instructions in the loop depend on a | 
|  | // load. | 
|  | if (load_used_in_condition_) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Build a list of all parent loops of this loop. Loop dependence analysis | 
|  | // needs this structure. | 
|  | std::vector<const Loop*> loops; | 
|  | Loop* parent_loop = loop_; | 
|  | while (parent_loop) { | 
|  | loops.push_back(parent_loop); | 
|  | parent_loop = parent_loop->GetParent(); | 
|  | } | 
|  |  | 
|  | LoopDependenceAnalysis analysis{context_, loops}; | 
|  |  | 
|  | // A list of all the stores in the cloned loop. | 
|  | std::vector<Instruction*> set_one_stores{}; | 
|  |  | 
|  | // A list of all the loads in the cloned loop. | 
|  | std::vector<Instruction*> set_one_loads{}; | 
|  |  | 
|  | // Populate the above lists. | 
|  | for (Instruction* inst : cloned_loop_instructions_) { | 
|  | if (inst->opcode() == SpvOp::SpvOpStore) { | 
|  | set_one_stores.push_back(inst); | 
|  | } else if (inst->opcode() == SpvOp::SpvOpLoad) { | 
|  | set_one_loads.push_back(inst); | 
|  | } | 
|  |  | 
|  | // If we find any instruction which we can't move (such as a barrier), | 
|  | // return false. | 
|  | if (!MovableInstruction(*inst)) return false; | 
|  | } | 
|  |  | 
|  | // We need to calculate the depth of the loop to create the loop dependency | 
|  | // distance vectors. | 
|  | const size_t loop_depth = loop_->GetDepth(); | 
|  |  | 
|  | // Check the dependencies between loads in the cloned loop and stores in the | 
|  | // original and vice versa. | 
|  | for (Instruction* inst : original_loop_instructions_) { | 
|  | // If we find any instruction which we can't move (such as a barrier), | 
|  | // return false. | 
|  | if (!MovableInstruction(*inst)) return false; | 
|  |  | 
|  | // Look at the dependency between the loads in the original and stores in | 
|  | // the cloned loops. | 
|  | if (inst->opcode() == SpvOp::SpvOpLoad) { | 
|  | for (Instruction* store : set_one_stores) { | 
|  | DistanceVector vec{loop_depth}; | 
|  |  | 
|  | // If the store actually should appear after the load, return false. | 
|  | // This means the store has been placed in the wrong grouping. | 
|  | if (instruction_order_[store] > instruction_order_[inst]) { | 
|  | return false; | 
|  | } | 
|  | // If not independent check the distance vector. | 
|  | if (!analysis.GetDependence(store, inst, &vec)) { | 
|  | for (DistanceEntry& entry : vec.GetEntries()) { | 
|  | // A distance greater than zero means that the store in the cloned | 
|  | // loop has a dependency on the load in the original loop. | 
|  | if (entry.distance > 0) return false; | 
|  | } | 
|  | } | 
|  | } | 
|  | } else if (inst->opcode() == SpvOp::SpvOpStore) { | 
|  | for (Instruction* load : set_one_loads) { | 
|  | DistanceVector vec{loop_depth}; | 
|  |  | 
|  | // If the load actually should appear after the store, return false. | 
|  | if (instruction_order_[load] > instruction_order_[inst]) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // If not independent check the distance vector. | 
|  | if (!analysis.GetDependence(inst, load, &vec)) { | 
|  | for (DistanceEntry& entry : vec.GetEntries()) { | 
|  | // A distance less than zero means the load in the cloned loop is | 
|  | // dependent on the store instruction in the original loop. | 
|  | if (entry.distance < 0) return false; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | Loop* LoopFissionImpl::SplitLoop() { | 
|  | // Clone the loop. | 
|  | LoopUtils util{context_, loop_}; | 
|  | LoopUtils::LoopCloningResult clone_results; | 
|  | Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results); | 
|  |  | 
|  | // Update the OpLoopMerge in the cloned loop. | 
|  | cloned_loop->UpdateLoopMergeInst(); | 
|  |  | 
|  | // Add the loop_ to the module. | 
|  | // TODO(1841): Handle failure to create pre-header. | 
|  | Function::iterator it = | 
|  | util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id()); | 
|  | util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(), | 
|  | clone_results.cloned_bb_.end(), ++it); | 
|  | loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock()); | 
|  |  | 
|  | std::vector<Instruction*> instructions_to_kill{}; | 
|  |  | 
|  | // Kill all the instructions which should appear in the cloned loop but not in | 
|  | // the original loop. | 
|  | for (uint32_t id : loop_->GetBlocks()) { | 
|  | BasicBlock* block = context_->cfg()->block(id); | 
|  |  | 
|  | for (Instruction& inst : *block) { | 
|  | // If the instruction appears in the cloned loop instruction group, kill | 
|  | // it. | 
|  | if (cloned_loop_instructions_.count(&inst) == 1 && | 
|  | original_loop_instructions_.count(&inst) == 0) { | 
|  | instructions_to_kill.push_back(&inst); | 
|  | if (inst.opcode() == SpvOp::SpvOpPhi) { | 
|  | context_->ReplaceAllUsesWith( | 
|  | inst.result_id(), clone_results.value_map_[inst.result_id()]); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Kill all instructions which should appear in the original loop and not in | 
|  | // the cloned loop. | 
|  | for (uint32_t id : cloned_loop->GetBlocks()) { | 
|  | BasicBlock* block = context_->cfg()->block(id); | 
|  | for (Instruction& inst : *block) { | 
|  | Instruction* old_inst = clone_results.ptr_map_[&inst]; | 
|  | // If the instruction belongs to the original loop instruction group, kill | 
|  | // it. | 
|  | if (cloned_loop_instructions_.count(old_inst) == 0 && | 
|  | original_loop_instructions_.count(old_inst) == 1) { | 
|  | instructions_to_kill.push_back(&inst); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | for (Instruction* i : instructions_to_kill) { | 
|  | context_->KillInst(i); | 
|  | } | 
|  |  | 
|  | return cloned_loop; | 
|  | } | 
|  |  | 
|  | LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split, | 
|  | bool split_multiple_times) | 
|  | : split_multiple_times_(split_multiple_times) { | 
|  | // Split if the number of registers in the loop exceeds | 
|  | // |register_threshold_to_split|. | 
|  | split_criteria_ = | 
|  | [register_threshold_to_split]( | 
|  | const RegisterLiveness::RegionRegisterLiveness& liveness) { | 
|  | return liveness.used_registers_ > register_threshold_to_split; | 
|  | }; | 
|  | } | 
|  |  | 
|  | LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) { | 
|  | // Split by default. | 
|  | split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) { | 
|  | return true; | 
|  | }; | 
|  | } | 
|  |  | 
|  | bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) { | 
|  | LivenessAnalysis* analysis = c->GetLivenessAnalysis(); | 
|  |  | 
|  | RegisterLiveness::RegionRegisterLiveness liveness{}; | 
|  |  | 
|  | Function* function = loop.GetHeaderBlock()->GetParent(); | 
|  | analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness); | 
|  |  | 
|  | return split_criteria_(liveness); | 
|  | } | 
|  |  | 
|  | Pass::Status LoopFissionPass::Process() { | 
|  | bool changed = false; | 
|  |  | 
|  | for (Function& f : *context()->module()) { | 
|  | // We collect all the inner most loops in the function and run the loop | 
|  | // splitting util on each. The reason we do this is to allow us to iterate | 
|  | // over each, as creating new loops will invalidate the loop iterator. | 
|  | std::vector<Loop*> inner_most_loops{}; | 
|  | LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f); | 
|  | for (Loop& loop : loop_descriptor) { | 
|  | if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) { | 
|  | inner_most_loops.push_back(&loop); | 
|  | } | 
|  | } | 
|  |  | 
|  | // List of new loops which meet the criteria to be split again. | 
|  | std::vector<Loop*> new_loops_to_split{}; | 
|  |  | 
|  | while (!inner_most_loops.empty()) { | 
|  | for (Loop* loop : inner_most_loops) { | 
|  | LoopFissionImpl impl{context(), loop}; | 
|  |  | 
|  | // Group the instructions in the loop into two different sets of related | 
|  | // instructions. If we can't group the instructions into the two sets | 
|  | // then we can't split the loop any further. | 
|  | if (!impl.GroupInstructionsByUseDef()) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (impl.CanPerformSplit()) { | 
|  | Loop* second_loop = impl.SplitLoop(); | 
|  | changed = true; | 
|  | context()->InvalidateAnalysesExceptFor( | 
|  | IRContext::kAnalysisLoopAnalysis); | 
|  |  | 
|  | // If the newly created loop meets the criteria to be split, split it | 
|  | // again. | 
|  | if (ShouldSplitLoop(*second_loop, context())) | 
|  | new_loops_to_split.push_back(second_loop); | 
|  |  | 
|  | // If the original loop (now split) still meets the criteria to be | 
|  | // split, split it again. | 
|  | if (ShouldSplitLoop(*loop, context())) | 
|  | new_loops_to_split.push_back(loop); | 
|  | } | 
|  | } | 
|  |  | 
|  | // If the split multiple times flag has been set add the new loops which | 
|  | // meet the splitting criteria into the list of loops to be split on the | 
|  | // next iteration. | 
|  | if (split_multiple_times_) { | 
|  | inner_most_loops = std::move(new_loops_to_split); | 
|  | } else { | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return changed ? Pass::Status::SuccessWithChange | 
|  | : Pass::Status::SuccessWithoutChange; | 
|  | } | 
|  |  | 
|  | }  // namespace opt | 
|  | }  // namespace spvtools |