source/opt/loop_peeling.cpp - external/github.com/KhronosGroup/SPIRV-Tools - Git at Google

 // Copyright (c) 2018 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <algorithm>
 #include <functional>
 #include <memory>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>

 #include "source/opt/ir_builder.h"
 #include "source/opt/ir_context.h"
 #include "source/opt/loop_descriptor.h"
 #include "source/opt/loop_peeling.h"
 #include "source/opt/loop_utils.h"
 #include "source/opt/scalar_analysis.h"
 #include "source/opt/scalar_analysis_nodes.h"

 namespace spvtools {
 namespace opt {
 size_t LoopPeelingPass::code_grow_threshold_ = 1000;

 void LoopPeeling::DuplicateAndConnectLoop(
     LoopUtils::LoopCloningResult* clone_results) {
   CFG& cfg = *context_->cfg();
   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();

   assert(CanPeelLoop() && "Cannot peel loop!");

   std::vector<BasicBlock*> ordered_loop_blocks;
   // TODO(1841): Handle failure to create pre-header.
   BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();

   loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);

   cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);

   // Add the basic block to the function.
   Function::iterator it =
       loop_utils_.GetFunction()->FindBlock(pre_header->id());
   assert(it != loop_utils_.GetFunction()->end() &&
          "Pre-header not found in the function.");
   loop_utils_.GetFunction()->AddBasicBlocks(
       clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);

   // Make the |loop_|'s preheader the |cloned_loop_| one.
   BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
   pre_header->ForEachSuccessorLabel(
       [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });

   // Update cfg.
   cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
   cloned_loop_->SetPreHeaderBlock(pre_header);
   loop_->SetPreHeaderBlock(nullptr);

   // When cloning the loop, we didn't cloned the merge block, so currently
   // |cloned_loop_| shares the same block as |loop_|.
   // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
   // branch to |loop_|'s header (so header will also be the merge of
   // |cloned_loop_|).
   uint32_t cloned_loop_exit = 0;
   for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
     if (loop_->IsInsideLoop(pred_id)) continue;
     BasicBlock* bb = cfg.block(pred_id);
     assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
     cloned_loop_exit = bb->id();
     bb->ForEachSuccessorLabel([this](uint32_t* succ) {
       if (*succ == loop_->GetMergeBlock()->id())
         *succ = loop_->GetHeaderBlock()->id();
     });
   }

   // Update cfg.
   cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
   cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());

   // Patch the phi of the original loop header:
   //  - Set the loop entry branch to come from the cloned loop exit block;
   //  - Set the initial value of the phi using the corresponding cloned loop
   //    exit values.
   //
   // We patch the iterating value initializers of the original loop using the
   // corresponding cloned loop exit values. Connects the cloned loop iterating
   // values to the original loop. This make sure that the initial value of the
   // second loop starts with the last value of the first loop.
   //
   // For example, loops like:
   //
   // int z = 0;
   // for (int i = 0; i++ < M; i += cst1) {
   //   if (cond)
   //     z += cst2;
   // }
   //
   // Will become:
   //
   // int z = 0;
   // int i = 0;
   // for (; i++ < M; i += cst1) {
   //   if (cond)
   //     z += cst2;
   // }
   // for (; i++ < M; i += cst1) {
   //   if (cond)
   //     z += cst2;
   // }
   loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
                                            clone_results,
                                            this](Instruction* phi) {
     for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
       if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
         phi->SetInOperand(i,
                           {clone_results->value_map_.at(
                               exit_value_.at(phi->result_id())->result_id())});
         phi->SetInOperand(i + 1, {cloned_loop_exit});
         def_use_mgr->AnalyzeInstUse(phi);
         return;
       }
     }
   });

   // Force the creation of a new preheader for the original loop and set it as
   // the merge block for the cloned loop.
   // TODO(1841): Handle failure to create pre-header.
   cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
 }

 void LoopPeeling::InsertCanonicalInductionVariable(
     LoopUtils::LoopCloningResult* clone_results) {
   if (original_loop_canonical_induction_variable_) {
     canonical_induction_variable_ =
         context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
             original_loop_canonical_induction_variable_->result_id()));
     return;
   }

   BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
   if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
     --insert_point;
   }
   InstructionBuilder builder(
       context_, &*insert_point,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   Instruction* uint_1_cst =
       builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned());
   // Create the increment.
   // Note that we do "1 + 1" here, one of the operand should the phi
   // value but we don't have it yet. The operand will be set latter.
   Instruction* iv_inc = builder.AddIAdd(
       uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());

   builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());

   canonical_induction_variable_ = builder.AddPhi(
       uint_1_cst->type_id(),
       {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(),
        GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
        GetClonedLoop()->GetLatchBlock()->id()});
   // Connect everything.
   iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});

   // Update def/use manager.
   context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);

   // If do-while form, use the incremented value.
   if (do_while_form_) {
     canonical_induction_variable_ = iv_inc;
   }
 }

 void LoopPeeling::GetIteratorUpdateOperations(
     const Loop* loop, Instruction* iterator,
     std::unordered_set<Instruction*>* operations) {
   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
   operations->insert(iterator);
   iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
     Instruction* insn = def_use_mgr->GetDef(*id);
     if (insn->opcode() == SpvOpLabel) {
       return;
     }
     if (operations->count(insn)) {
       return;
     }
     if (!loop->IsInsideLoop(insn)) {
       return;
     }
     GetIteratorUpdateOperations(loop, insn, operations);
   });
 }

 // Gather the set of blocks for all the path from |entry| to |root|.
 static void GetBlocksInPath(uint32_t block, uint32_t entry,
                             std::unordered_set<uint32_t>* blocks_in_path,
                             const CFG& cfg) {
   for (uint32_t pid : cfg.preds(block)) {
     if (blocks_in_path->insert(pid).second) {
       if (pid != entry) {
         GetBlocksInPath(pid, entry, blocks_in_path, cfg);
       }
     }
   }
 }

 bool LoopPeeling::IsConditionCheckSideEffectFree() const {
   CFG& cfg = *context_->cfg();

   // The "do-while" form does not cause issues, the algorithm takes into account
   // the first iteration.
   if (!do_while_form_) {
     uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];

     std::unordered_set<uint32_t> blocks_in_path;

     blocks_in_path.insert(condition_block_id);
     GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
                     &blocks_in_path, cfg);

     for (uint32_t bb_id : blocks_in_path) {
       BasicBlock* bb = cfg.block(bb_id);
       if (!bb->WhileEachInst([this](Instruction* insn) {
             if (insn->IsBranch()) return true;
             switch (insn->opcode()) {
               case SpvOpLabel:
               case SpvOpSelectionMerge:
               case SpvOpLoopMerge:
                 return true;
               default:
                 break;
             }
             return context_->IsCombinatorInstruction(insn);
           })) {
         return false;
       }
     }
   }

   return true;
 }

 void LoopPeeling::GetIteratingExitValues() {
   CFG& cfg = *context_->cfg();

   loop_->GetHeaderBlock()->ForEachPhiInst(
       [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });

   if (!loop_->GetMergeBlock()) {
     return;
   }
   if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
     return;
   }
   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();

   uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];

   auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
   do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
                              condition_block_id) != header_pred.end();
   if (do_while_form_) {
     loop_->GetHeaderBlock()->ForEachPhiInst(
         [condition_block_id, def_use_mgr, this](Instruction* phi) {
           std::unordered_set<Instruction*> operations;

           for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
             if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
               exit_value_[phi->result_id()] =
                   def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
             }
           }
         });
   } else {
     DominatorTree* dom_tree =
         &context_->GetDominatorAnalysis(loop_utils_.GetFunction())
              ->GetDomTree();
     BasicBlock* condition_block = cfg.block(condition_block_id);

     loop_->GetHeaderBlock()->ForEachPhiInst(
         [dom_tree, condition_block, this](Instruction* phi) {
           std::unordered_set<Instruction*> operations;

           // Not the back-edge value, check if the phi instruction is the only
           // possible candidate.
           GetIteratorUpdateOperations(loop_, phi, &operations);

           for (Instruction* insn : operations) {
             if (insn == phi) {
               continue;
             }
             if (dom_tree->Dominates(context_->get_instr_block(insn),
                                     condition_block)) {
               return;
             }
           }
           exit_value_[phi->result_id()] = phi;
         });
   }
 }

 void LoopPeeling::FixExitCondition(
     const std::function<uint32_t(Instruction*)>& condition_builder) {
   CFG& cfg = *context_->cfg();

   uint32_t condition_block_id = 0;
   for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
     if (GetClonedLoop()->IsInsideLoop(id)) {
       condition_block_id = id;
       break;
     }
   }
   assert(condition_block_id != 0 && "2nd loop in improperly connected");

   BasicBlock* condition_block = cfg.block(condition_block_id);
   Instruction* exit_condition = condition_block->terminator();
   assert(exit_condition->opcode() == SpvOpBranchConditional);
   BasicBlock::iterator insert_point = condition_block->tail();
   if (condition_block->GetMergeInst()) {
     --insert_point;
   }

   exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});

   uint32_t to_continue_block_idx =
       GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
           ? 1
           : 2;
   exit_condition->SetInOperand(
       1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
   exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});

   // Update def/use manager.
   context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
 }

 BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
   CFG& cfg = *context_->cfg();
   assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");

   // TODO(1841): Handle id overflow.
   std::unique_ptr<BasicBlock> new_bb =
       MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction(
           context_, SpvOpLabel, 0, context_->TakeNextId(), {})));
   new_bb->SetParent(loop_utils_.GetFunction());
   // Update the loop descriptor.
   Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
   if (in_loop) {
     in_loop->AddBasicBlock(new_bb.get());
     loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
   }

   context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
   def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());

   BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
   bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
     if (*id == bb->id()) {
       *id = new_bb->id();
     }
   });
   cfg.RemoveEdge(bb_pred->id(), bb->id());
   cfg.AddEdge(bb_pred->id(), new_bb->id());
   def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());

   // Update the incoming branch.
   bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
     phi->SetInOperand(1, {new_bb->id()});
     def_use_mgr->AnalyzeInstUse(phi);
   });
   InstructionBuilder(
       context_, new_bb.get(),
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
       .AddBranch(bb->id());
   cfg.RegisterBlock(new_bb.get());

   // Add the basic block to the function.
   Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
   assert(it != loop_utils_.GetFunction()->end() &&
          "Basic block not found in the function.");
   BasicBlock* ret = new_bb.get();
   loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
   return ret;
 }

 BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
                                      BasicBlock* if_merge) {
   // TODO(1841): Handle failure to create pre-header.
   BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
   // Will no longer be a pre-header because of the if.
   loop->SetPreHeaderBlock(nullptr);
   // Kill the branch to the header.
   context_->KillInst(&*if_block->tail());

   InstructionBuilder builder(
       context_, if_block,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   builder.AddConditionalBranch(condition->result_id(),
                                loop->GetHeaderBlock()->id(), if_merge->id(),
                                if_merge->id());

   return if_block;
 }

 void LoopPeeling::PeelBefore(uint32_t peel_factor) {
   assert(CanPeelLoop() && "Cannot peel loop");
   LoopUtils::LoopCloningResult clone_results;

   // Clone the loop and insert the cloned one before the loop.
   DuplicateAndConnectLoop(&clone_results);

   // Add a canonical induction variable "canonical_induction_variable_".
   InsertCanonicalInductionVariable(&clone_results);

   InstructionBuilder builder(
       context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   Instruction* factor =
       builder.GetIntConstant(peel_factor, int_type_->IsSigned());

   Instruction* has_remaining_iteration = builder.AddLessThan(
       factor->result_id(), loop_iteration_count_->result_id());
   Instruction* max_iteration = builder.AddSelect(
       factor->type_id(), has_remaining_iteration->result_id(),
       factor->result_id(), loop_iteration_count_->result_id());

   // Change the exit condition of the cloned loop to be (exit when become
   // false):
   //  "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
   FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
     return InstructionBuilder(context_, insert_before_point,
                               IRContext::kAnalysisDefUse |
                                   IRContext::kAnalysisInstrToBlockMapping)
         .AddLessThan(canonical_induction_variable_->result_id(),
                      max_iteration->result_id())
         ->result_id();
   });

   // "Protect" the second loop: the second loop can only be executed if
   // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
   BasicBlock* if_merge_block = loop_->GetMergeBlock();
   loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
   // Prevent the second loop from being executed if we already executed all the
   // required iterations.
   BasicBlock* if_block =
       ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
   // Patch the phi of the merge block.
   if_merge_block->ForEachPhiInst(
       [&clone_results, if_block, this](Instruction* phi) {
         // if_merge_block had previously only 1 predecessor.
         uint32_t incoming_value = phi->GetSingleWordInOperand(0);
         auto def_in_loop = clone_results.value_map_.find(incoming_value);
         if (def_in_loop != clone_results.value_map_.end())
           incoming_value = def_in_loop->second;
         phi->AddOperand(
             {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
         phi->AddOperand(
             {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
         context_->get_def_use_mgr()->AnalyzeInstUse(phi);
       });

   context_->InvalidateAnalysesExceptFor(
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
       IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
 }

 void LoopPeeling::PeelAfter(uint32_t peel_factor) {
   assert(CanPeelLoop() && "Cannot peel loop");
   LoopUtils::LoopCloningResult clone_results;

   // Clone the loop and insert the cloned one before the loop.
   DuplicateAndConnectLoop(&clone_results);

   // Add a canonical induction variable "canonical_induction_variable_".
   InsertCanonicalInductionVariable(&clone_results);

   InstructionBuilder builder(
       context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   Instruction* factor =
       builder.GetIntConstant(peel_factor, int_type_->IsSigned());

   Instruction* has_remaining_iteration = builder.AddLessThan(
       factor->result_id(), loop_iteration_count_->result_id());

   // Change the exit condition of the cloned loop to be (exit when become
   // false):
   //  "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
   FixExitCondition([factor, this](Instruction* insert_before_point) {
     InstructionBuilder cond_builder(
         context_, insert_before_point,
         IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
     // Build the following check: canonical_induction_variable_ + factor <
     // iteration_count
     return cond_builder
         .AddLessThan(cond_builder
                          .AddIAdd(canonical_induction_variable_->type_id(),
                                   canonical_induction_variable_->result_id(),
                                   factor->result_id())
                          ->result_id(),
                      loop_iteration_count_->result_id())
         ->result_id();
   });

   // "Protect" the first loop: the first loop can only be executed if
   // factor < loop_iteration_count_.

   // The original loop's pre-header was the cloned loop merge block.
   GetClonedLoop()->SetMergeBlock(
       CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
   // Use the second loop preheader as if merge block.

   // Prevent the first loop if only the peeled loop needs it.
   BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
                                      GetOriginalLoop()->GetPreHeaderBlock());

   // Patch the phi of the header block.
   // We added an if to enclose the first loop and because the phi node are
   // connected to the exit value of the first loop, the definition no longer
   // dominate the preheader.
   // We had to the preheader (our if merge block) the required phi instruction
   // and patch the header phi.
   GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
       [&clone_results, if_block, this](Instruction* phi) {
         analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();

         auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
           uint32_t preheader_value_idx =
               !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
           return preheader_value_idx;
         };

         Instruction* cloned_phi =
             def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
         uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
             find_value_idx(cloned_phi, GetClonedLoop()));

         Instruction* new_phi =
             InstructionBuilder(context_,
                                &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
                                IRContext::kAnalysisDefUse |
                                    IRContext::kAnalysisInstrToBlockMapping)
                 .AddPhi(phi->type_id(),
                         {phi->GetSingleWordInOperand(
                              find_value_idx(phi, GetOriginalLoop())),
                          GetClonedLoop()->GetMergeBlock()->id(),
                          cloned_preheader_value, if_block->id()});

         phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
                           {new_phi->result_id()});
         def_use_mgr->AnalyzeInstUse(phi);
       });

   context_->InvalidateAnalysesExceptFor(
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
       IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
 }

 Pass::Status LoopPeelingPass::Process() {
   bool modified = false;
   Module* module = context()->module();

   // Process each function in the module
   for (Function& f : *module) {
     modified |= ProcessFunction(&f);
   }

   return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
 }

 bool LoopPeelingPass::ProcessFunction(Function* f) {
   bool modified = false;
   LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);

   std::vector<Loop*> to_process_loop;
   to_process_loop.reserve(loop_descriptor.NumLoops());
   for (Loop& l : loop_descriptor) {
     to_process_loop.push_back(&l);
   }

   ScalarEvolutionAnalysis scev_analysis(context());

   for (Loop* loop : to_process_loop) {
     CodeMetrics loop_size;
     loop_size.Analyze(*loop);

     auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
       if (!loop_to_peel->IsLCSSA()) {
         LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA();
       }

       bool peeled_loop;
       Loop* still_peelable_loop;
       std::tie(peeled_loop, still_peelable_loop) =
           ProcessLoop(loop_to_peel, &loop_size);

       if (peeled_loop) {
         modified = true;
       }

       return still_peelable_loop;
     };

     Loop* still_peelable_loop = try_peel(loop);
     // The pass is working out the maximum factor by which a loop can be peeled.
     // If the loop can potentially be peeled again, then there is only one
     // possible direction, so only one call is still needed.
     if (still_peelable_loop) {
       try_peel(loop);
     }
   }

   return modified;
 }

 std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop,
                                                     CodeMetrics* loop_size) {
   ScalarEvolutionAnalysis* scev_analysis =
       context()->GetScalarEvolutionAnalysis();
   // Default values for bailing out.
   std::pair<bool, Loop*> bail_out{false, nullptr};

   BasicBlock* exit_block = loop->FindConditionBlock();
   if (!exit_block) {
     return bail_out;
   }

   Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
   if (!exiting_iv) {
     return bail_out;
   }
   size_t iterations = 0;
   if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
                                     &iterations)) {
     return bail_out;
   }
   if (!iterations) {
     return bail_out;
   }

   Instruction* canonical_induction_variable = nullptr;

   loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
                                             scev_analysis,
                                             this](Instruction* insn) {
     if (const SERecurrentNode* iv =
             scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
       const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
       const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
       if (offset && coeff && offset->FoldToSingleValue() == 0 &&
           coeff->FoldToSingleValue() == 1) {
         if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
           canonical_induction_variable = insn;
           return false;
         }
       }
     }
     return true;
   });

   bool is_signed = canonical_induction_variable
                        ? context()
                              ->get_type_mgr()
                              ->GetType(canonical_induction_variable->type_id())
                              ->AsInteger()
                              ->IsSigned()
                        : false;

   LoopPeeling peeler(
       loop,
       InstructionBuilder(
           context(), loop->GetHeaderBlock(),
           IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
           .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
                                     is_signed),
       canonical_induction_variable);

   if (!peeler.CanPeelLoop()) {
     return bail_out;
   }

   // For each basic block in the loop, check if it can be peeled. If it
   // can, get the direction (before/after) and by which factor.
   LoopPeelingInfo peel_info(loop, iterations, scev_analysis);

   uint32_t peel_before_factor = 0;
   uint32_t peel_after_factor = 0;

   for (uint32_t block : loop->GetBlocks()) {
     if (block == exit_block->id()) {
       continue;
     }
     BasicBlock* bb = cfg()->block(block);
     PeelDirection direction;
     uint32_t factor;
     std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);

     if (direction == PeelDirection::kNone) {
       continue;
     }
     if (direction == PeelDirection::kBefore) {
       peel_before_factor = std::max(peel_before_factor, factor);
     } else {
       assert(direction == PeelDirection::kAfter);
       peel_after_factor = std::max(peel_after_factor, factor);
     }
   }
   PeelDirection direction = PeelDirection::kNone;
   uint32_t factor = 0;

   // Find which direction we should peel.
   if (peel_before_factor) {
     factor = peel_before_factor;
     direction = PeelDirection::kBefore;
   }
   if (peel_after_factor) {
     if (peel_before_factor < peel_after_factor) {
       // Favor a peel after here and give the peel before another shot later.
       factor = peel_after_factor;
       direction = PeelDirection::kAfter;
     }
   }

   // Do the peel if we can.
   if (direction == PeelDirection::kNone) return bail_out;

   // This does not take into account branch elimination opportunities and
   // the unrolling. It assumes the peeled loop will be unrolled as well.
   if (factor * loop_size->roi_size_ > code_grow_threshold_) {
     return bail_out;
   }
   loop_size->roi_size_ *= factor;

   // Find if a loop should be peeled again.
   Loop* extra_opportunity = nullptr;

   if (direction == PeelDirection::kBefore) {
     peeler.PeelBefore(factor);
     if (stats_) {
       stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
     }
     if (peel_after_factor) {
       // We could have peeled after, give it another try.
       extra_opportunity = peeler.GetOriginalLoop();
     }
   } else {
     peeler.PeelAfter(factor);
     if (stats_) {
       stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
     }
     if (peel_before_factor) {
       // We could have peeled before, give it another try.
       extra_opportunity = peeler.GetClonedLoop();
     }
   }

   return {true, extra_opportunity};
 }

 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
     Instruction* condition) const {
   for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
     BasicBlock* bb =
         context_->get_instr_block(condition->GetSingleWordInOperand(i));
     if (bb && loop_->IsInsideLoop(bb)) {
       return condition->GetSingleWordInOperand(i);
     }
   }

   return 0;
 }

 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
     Instruction* condition) const {
   for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
     BasicBlock* bb =
         context_->get_instr_block(condition->GetSingleWordInOperand(i));
     if (!bb || !loop_->IsInsideLoop(bb)) {
       return condition->GetSingleWordInOperand(i);
     }
   }

   return 0;
 }

 static bool IsHandledCondition(SpvOp opcode) {
   switch (opcode) {
     case SpvOpIEqual:
     case SpvOpINotEqual:
     case SpvOpUGreaterThan:
     case SpvOpSGreaterThan:
     case SpvOpUGreaterThanEqual:
     case SpvOpSGreaterThanEqual:
     case SpvOpULessThan:
     case SpvOpSLessThan:
     case SpvOpULessThanEqual:
     case SpvOpSLessThanEqual:
       return true;
     default:
       return false;
   }
 }

 LoopPeelingPass::LoopPeelingInfo::Direction
 LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
   if (bb->terminator()->opcode() != SpvOpBranchConditional) {
     return GetNoneDirection();
   }

   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();

   Instruction* condition =
       def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0));

   if (!IsHandledCondition(condition->opcode())) {
     return GetNoneDirection();
   }

   if (!GetFirstLoopInvariantOperand(condition)) {
     // No loop invariant, it cannot be peeled by this pass.
     return GetNoneDirection();
   }
   if (!GetFirstNonLoopInvariantOperand(condition)) {
     // Seems to be a job for the unswitch pass.
     return GetNoneDirection();
   }

   // Left hand-side.
   SExpression lhs = scev_analysis_->AnalyzeInstruction(
       def_use_mgr->GetDef(condition->GetSingleWordInOperand(0)));
   if (lhs->GetType() == SENode::CanNotCompute) {
     // Can't make any conclusion.
     return GetNoneDirection();
   }

   // Right hand-side.
   SExpression rhs = scev_analysis_->AnalyzeInstruction(
       def_use_mgr->GetDef(condition->GetSingleWordInOperand(1)));
   if (rhs->GetType() == SENode::CanNotCompute) {
     // Can't make any conclusion.
     return GetNoneDirection();
   }

   // Only take into account recurrent expression over the current loop.
   bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
   bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);

   if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) {
     return GetNoneDirection();
   }

   if (is_lhs_rec) {
     if (!lhs->AsSERecurrentNode() ||
         lhs->AsSERecurrentNode()->GetLoop() != loop_) {
       return GetNoneDirection();
     }
   }
   if (is_rhs_rec) {
     if (!rhs->AsSERecurrentNode() ||
         rhs->AsSERecurrentNode()->GetLoop() != loop_) {
       return GetNoneDirection();
     }
   }

   // If the op code is ==, then we try a peel before or after.
   // If opcode is not <, >, <= or >=, we bail out.
   //
   // For the remaining cases, we canonicalize the expression so that the
   // constant expression is on the left hand side and the recurring expression
   // is on the right hand side. If we swap hand side, then < becomes >, <=
   // becomes >= etc.
   // If the opcode is <=, then we add 1 to the right hand side and do the peel
   // check on <.
   // If the opcode is >=, then we add 1 to the left hand side and do the peel
   // check on >.

   CmpOperator cmp_operator;
   switch (condition->opcode()) {
     default:
       return GetNoneDirection();
     case SpvOpIEqual:
     case SpvOpINotEqual:
       return HandleEquality(lhs, rhs);
     case SpvOpUGreaterThan:
     case SpvOpSGreaterThan: {
       cmp_operator = CmpOperator::kGT;
       break;
     }
     case SpvOpULessThan:
     case SpvOpSLessThan: {
       cmp_operator = CmpOperator::kLT;
       break;
     }
     // We add one to transform >= into > and <= into <.
     case SpvOpUGreaterThanEqual:
     case SpvOpSGreaterThanEqual: {
       cmp_operator = CmpOperator::kGE;
       break;
     }
     case SpvOpULessThanEqual:
     case SpvOpSLessThanEqual: {
       cmp_operator = CmpOperator::kLE;
       break;
     }
   }

   // Force the left hand side to be the non recurring expression.
   if (is_lhs_rec) {
     std::swap(lhs, rhs);
     switch (cmp_operator) {
       case CmpOperator::kLT: {
         cmp_operator = CmpOperator::kGT;
         break;
       }
       case CmpOperator::kGT: {
         cmp_operator = CmpOperator::kLT;
         break;
       }
       case CmpOperator::kLE: {
         cmp_operator = CmpOperator::kGE;
         break;
       }
       case CmpOperator::kGE: {
         cmp_operator = CmpOperator::kLE;
         break;
       }
     }
   }
   return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode());
 }

 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
     SERecurrentNode* rec) const {
   return rec->GetOffset();
 }

 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
     SERecurrentNode* rec, int64_t iteration) const {
   SExpression coeff = rec->GetCoefficient();
   SExpression offset = rec->GetOffset();

   return (coeff * iteration) + offset;
 }

 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
     SERecurrentNode* rec) const {
   return GetValueAtIteration(rec, loop_max_iterations_ - 1);
 }

 bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
                                                     SExpression lhs,
                                                     SExpression rhs,
                                                     bool* result) const {
   assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
   assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
   // We perform the test: 0 cmp_op rhs - lhs
   // What is left is then to determine the sign of the expression.
   switch (cmp_op) {
     case CmpOperator::kLT: {
       return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
     }
     case CmpOperator::kGT: {
       return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
     }
     case CmpOperator::kLE: {
       return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
     }
     case CmpOperator::kGE: {
       return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
     }
   }
   return false;
 }

 LoopPeelingPass::LoopPeelingInfo::Direction
 LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
                                                  SExpression rhs) const {
   {
     // Try peel before opportunity.
     SExpression lhs_cst = lhs;
     if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
       lhs_cst = rec_node->GetOffset();
     }
     SExpression rhs_cst = rhs;
     if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
       rhs_cst = rec_node->GetOffset();
     }

     if (lhs_cst == rhs_cst) {
       return Direction{LoopPeelingPass::PeelDirection::kBefore, 1};
     }
   }

   {
     // Try peel after opportunity.
     SExpression lhs_cst = lhs;
     if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
       // rec_node(x) = a * x + b
       // assign to lhs: a * (loop_max_iterations_ - 1) + b
       lhs_cst = GetValueAtLastIteration(rec_node);
     }
     SExpression rhs_cst = rhs;
     if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
       // rec_node(x) = a * x + b
       // assign to lhs: a * (loop_max_iterations_ - 1) + b
       rhs_cst = GetValueAtLastIteration(rec_node);
     }

     if (lhs_cst == rhs_cst) {
       return Direction{LoopPeelingPass::PeelDirection::kAfter, 1};
     }
   }

   return GetNoneDirection();
 }

 LoopPeelingPass::LoopPeelingInfo::Direction
 LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
                                                    SExpression lhs,
                                                    SERecurrentNode* rhs) const {
   SExpression offset = rhs->GetOffset();
   SExpression coefficient = rhs->GetCoefficient();
   // Compute (cst - B) / A.
   std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
   if (!flip_iteration.first->AsSEConstantNode()) {
     return GetNoneDirection();
   }
   // note: !!flip_iteration.second normalize to 0/1 (via bool cast).
   int64_t iteration =
       flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() +
       !!flip_iteration.second;
   if (iteration <= 0 ||
       loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
     // Always true or false within the loop bounds.
     return GetNoneDirection();
   }
   // If this is a <= or >= operator and the iteration, make sure |iteration| is
   // the one flipping the condition.
   // If (cst - B) and A are not divisible, this equivalent to a < or > check, so
   // we skip this test.
   if (!flip_iteration.second &&
       (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) {
     bool first_iteration;
     bool current_iteration;
     if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) ||
         !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
                       &current_iteration)) {
       return GetNoneDirection();
     }
     // If the condition did not flip the next will.
     if (first_iteration == current_iteration) {
       iteration++;
     }
   }

   uint32_t cast_iteration = 0;
   // sanity check: can we fit |iteration| in a uint32_t ?
   if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
     cast_iteration = static_cast<uint32_t>(iteration);
   }

   if (cast_iteration) {
     // Peel before if we are closer to the start, after if closer to the end.
     if (loop_max_iterations_ / 2 > cast_iteration) {
       return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
     } else {
       return Direction{
           LoopPeelingPass::PeelDirection::kAfter,
           static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
     }
   }

   return GetNoneDirection();
 }

 }  // namespace opt
 }  // namespace spvtools