Add loop peeling utility
The loop peeler util takes a loop as input and create a new one before.
The iterator of the duplicated loop then set to accommodate the number
of iteration required for the peeling.
The loop peeling pass that decided to do the peeling and profitability
analysis is left for a follow-up PR.
diff --git a/Android.mk b/Android.mk
index e2afc82..04a84a1 100644
--- a/Android.mk
+++ b/Android.mk
@@ -100,6 +100,7 @@
source/opt/local_single_store_elim_pass.cpp \
source/opt/local_ssa_elim_pass.cpp \
source/opt/loop_descriptor.cpp \
+ source/opt/loop_peeling.cpp \
source/opt/loop_unroller.cpp \
source/opt/loop_unswitch_pass.cpp \
source/opt/loop_utils.cpp \
diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt
index 4121f7c..bc442e6 100644
--- a/source/opt/CMakeLists.txt
+++ b/source/opt/CMakeLists.txt
@@ -58,6 +58,7 @@
local_ssa_elim_pass.h
log.h
loop_descriptor.h
+ loop_peeling.h
loop_unroller.h
loop_utils.h
loop_unswitch_pass.h
@@ -132,6 +133,7 @@
local_single_store_elim_pass.cpp
local_ssa_elim_pass.cpp
loop_descriptor.cpp
+ loop_peeling.cpp
loop_utils.cpp
loop_unroller.cpp
loop_unswitch_pass.cpp
diff --git a/source/opt/function.h b/source/opt/function.h
index f9fc655..be7b70b 100644
--- a/source/opt/function.h
+++ b/source/opt/function.h
@@ -15,6 +15,7 @@
#ifndef LIBSPIRV_OPT_CONSTRUCTS_H_
#define LIBSPIRV_OPT_CONSTRUCTS_H_
+#include <algorithm>
#include <functional>
#include <memory>
#include <utility>
@@ -92,6 +93,13 @@
return const_iterator(&blocks_, blocks_.cend());
}
+ // Returns an iterator to the basic block |id|.
+ iterator FindBlock(uint32_t bb_id) {
+ return std::find_if(begin(), end(), [bb_id](const ir::BasicBlock& it_bb) {
+ return bb_id == it_bb.id();
+ });
+ }
+
// Runs the given function |f| on each instruction in this function, and
// optionally on debug line instructions that might precede them.
void ForEachInst(const std::function<void(Instruction*)>& f,
diff --git a/source/opt/ir_builder.h b/source/opt/ir_builder.h
index aa722cb..6c4f5f5 100644
--- a/source/opt/ir_builder.h
+++ b/source/opt/ir_builder.h
@@ -159,6 +159,61 @@
return AddInstruction(std::move(phi_inst));
}
+ // Creates an addition instruction.
+ // The id |type| must be the id of the instruction's type, must be the same as
+ // |op1| and |op2| types.
+ // The id |op1| is the left hand side of the operation.
+ // The id |op2| is the right hand side of the operation.
+ ir::Instruction* AddIAdd(uint32_t type, uint32_t op1, uint32_t op2) {
+ std::unique_ptr<ir::Instruction> inst(new ir::Instruction(
+ GetContext(), SpvOpIAdd, type, GetContext()->TakeNextId(),
+ {{SPV_OPERAND_TYPE_ID, {op1}}, {SPV_OPERAND_TYPE_ID, {op2}}}));
+ return AddInstruction(std::move(inst));
+ }
+
+ // Creates a less than instruction for unsigned integer.
+ // The id |op1| is the left hand side of the operation.
+ // The id |op2| is the right hand side of the operation.
+ // It is assumed that |op1| and |op2| have the same underlying type.
+ ir::Instruction* AddULessThan(uint32_t op1, uint32_t op2) {
+ analysis::Bool bool_type;
+ uint32_t type = GetContext()->get_type_mgr()->GetId(&bool_type);
+ std::unique_ptr<ir::Instruction> inst(new ir::Instruction(
+ GetContext(), SpvOpULessThan, type, GetContext()->TakeNextId(),
+ {{SPV_OPERAND_TYPE_ID, {op1}}, {SPV_OPERAND_TYPE_ID, {op2}}}));
+ return AddInstruction(std::move(inst));
+ }
+
+ // Creates a less than instruction for signed integer.
+ // The id |op1| is the left hand side of the operation.
+ // The id |op2| is the right hand side of the operation.
+ // It is assumed that |op1| and |op2| have the same underlying type.
+ ir::Instruction* AddSLessThan(uint32_t op1, uint32_t op2) {
+ analysis::Bool bool_type;
+ uint32_t type = GetContext()->get_type_mgr()->GetId(&bool_type);
+ std::unique_ptr<ir::Instruction> inst(new ir::Instruction(
+ GetContext(), SpvOpSLessThan, type, GetContext()->TakeNextId(),
+ {{SPV_OPERAND_TYPE_ID, {op1}}, {SPV_OPERAND_TYPE_ID, {op2}}}));
+ return AddInstruction(std::move(inst));
+ }
+
+ // Creates an OpILessThan or OpULessThen instruction depending on the sign of
+ // |op1|. The id |op1| is the left hand side of the operation. The id |op2| is
+ // the right hand side of the operation. It is assumed that |op1| and |op2|
+ // have the same underlying type.
+ ir::Instruction* AddLessThan(uint32_t op1, uint32_t op2) {
+ ir::Instruction* op1_insn = context_->get_def_use_mgr()->GetDef(op1);
+ analysis::Type* type =
+ GetContext()->get_type_mgr()->GetType(op1_insn->type_id());
+ analysis::Integer* int_type = type->AsInteger();
+ assert(int_type && "Operand is not of int type");
+
+ if (int_type->IsSigned())
+ return AddSLessThan(op1, op2);
+ else
+ return AddULessThan(op1, op2);
+ }
+
// Creates a select instruction.
// |type| must match the types of |true_value| and |false_value|. It is up to
// the caller to ensure that |cond| is a correct type (bool or vector of
diff --git a/source/opt/loop_descriptor.cpp b/source/opt/loop_descriptor.cpp
index 27f53e2..0a4ed92 100644
--- a/source/opt/loop_descriptor.cpp
+++ b/source/opt/loop_descriptor.cpp
@@ -289,13 +289,16 @@
}
void Loop::SetPreHeaderBlock(BasicBlock* preheader) {
- assert(!IsInsideLoop(preheader) && "The preheader block is in the loop");
- assert(preheader->tail()->opcode() == SpvOpBranch &&
- "The preheader block does not unconditionally branch to the header "
- "block");
- assert(preheader->tail()->GetSingleWordOperand(0) == GetHeaderBlock()->id() &&
- "The preheader block does not unconditionally branch to the header "
- "block");
+ if (preheader) {
+ assert(!IsInsideLoop(preheader) && "The preheader block is in the loop");
+ assert(preheader->tail()->opcode() == SpvOpBranch &&
+ "The preheader block does not unconditionally branch to the header "
+ "block");
+ assert(preheader->tail()->GetSingleWordOperand(0) ==
+ GetHeaderBlock()->id() &&
+ "The preheader block does not unconditionally branch to the header "
+ "block");
+ }
loop_preheader_ = preheader;
}
diff --git a/source/opt/loop_peeling.cpp b/source/opt/loop_peeling.cpp
new file mode 100644
index 0000000..b2d1b08
--- /dev/null
+++ b/source/opt/loop_peeling.cpp
@@ -0,0 +1,563 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "ir_builder.h"
+#include "ir_context.h"
+#include "loop_descriptor.h"
+#include "loop_peeling.h"
+#include "loop_utils.h"
+
+namespace spvtools {
+namespace opt {
+
+void LoopPeeling::DuplicateAndConnectLoop(
+ LoopUtils::LoopCloningResult* clone_results) {
+ ir::CFG& cfg = *context_->cfg();
+ analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
+
+ assert(CanPeelLoop() && "Cannot peel loop!");
+
+ std::vector<ir::BasicBlock*> ordered_loop_blocks;
+ ir::BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
+
+ loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
+
+ cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
+
+ // Add the basic block to the function.
+ ir::Function::iterator it =
+ loop_utils_.GetFunction()->FindBlock(pre_header->id());
+ assert(it != loop_utils_.GetFunction()->end() &&
+ "Pre-header not found in the function.");
+ loop_utils_.GetFunction()->AddBasicBlocks(
+ clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
+
+ // Make the |loop_|'s preheader the |cloned_loop_| one.
+ ir::BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
+ pre_header->ForEachSuccessorLabel(
+ [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
+
+ // Update cfg.
+ cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
+ cloned_loop_->SetPreHeaderBlock(pre_header);
+ loop_->SetPreHeaderBlock(nullptr);
+
+ // When cloning the loop, we didn't cloned the merge block, so currently
+ // |cloned_loop_| shares the same block as |loop_|.
+ // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
+ // branch to |loop_|'s header (so header will also be the merge of
+ // |cloned_loop_|).
+ uint32_t cloned_loop_exit = 0;
+ for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
+ if (loop_->IsInsideLoop(pred_id)) continue;
+ ir::BasicBlock* bb = cfg.block(pred_id);
+ assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
+ cloned_loop_exit = bb->id();
+ bb->ForEachSuccessorLabel([this](uint32_t* succ) {
+ if (*succ == loop_->GetMergeBlock()->id())
+ *succ = loop_->GetHeaderBlock()->id();
+ });
+ }
+
+ // Update cfg.
+ cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
+ cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
+
+ // Patch the phi of the original loop header:
+ // - Set the loop entry branch to come from the cloned loop exit block;
+ // - Set the initial value of the phi using the corresponding cloned loop
+ // exit values.
+ //
+ // We patch the iterating value initializers of the original loop using the
+ // corresponding cloned loop exit values. Connects the cloned loop iterating
+ // values to the original loop. This make sure that the initial value of the
+ // second loop starts with the last value of the first loop.
+ //
+ // For example, loops like:
+ //
+ // int z = 0;
+ // for (int i = 0; i++ < M; i += cst1) {
+ // if (cond)
+ // z += cst2;
+ // }
+ //
+ // Will become:
+ //
+ // int z = 0;
+ // int i = 0;
+ // for (; i++ < M; i += cst1) {
+ // if (cond)
+ // z += cst2;
+ // }
+ // for (; i++ < M; i += cst1) {
+ // if (cond)
+ // z += cst2;
+ // }
+ loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
+ clone_results,
+ this](ir::Instruction* phi) {
+ for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
+ if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
+ phi->SetInOperand(i,
+ {clone_results->value_map_.at(
+ exit_value_.at(phi->result_id())->result_id())});
+ phi->SetInOperand(i + 1, {cloned_loop_exit});
+ def_use_mgr->AnalyzeInstUse(phi);
+ return;
+ }
+ }
+ });
+
+ // Force the creation of a new preheader for the original loop and set it as
+ // the merge block for the cloned loop.
+ cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
+}
+
+void LoopPeeling::InsertCanonicalInductionVariable() {
+ ir::BasicBlock::iterator insert_point =
+ GetClonedLoop()->GetLatchBlock()->tail();
+ if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
+ --insert_point;
+ }
+ InstructionBuilder builder(context_, &*insert_point,
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping);
+ ir::Instruction* uint_1_cst =
+ builder.Add32BitConstantInteger<uint32_t>(1, int_type_->IsSigned());
+ // Create the increment.
+ // Note that we do "1 + 1" here, one of the operand should the phi
+ // value but we don't have it yet. The operand will be set latter.
+ ir::Instruction* iv_inc = builder.AddIAdd(
+ uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
+
+ builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
+
+ canonical_induction_variable_ = builder.AddPhi(
+ uint_1_cst->type_id(),
+ {builder.Add32BitConstantInteger<uint32_t>(0, int_type_->IsSigned())
+ ->result_id(),
+ GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
+ GetClonedLoop()->GetLatchBlock()->id()});
+ // Connect everything.
+ iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});
+
+ // Update def/use manager.
+ context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
+
+ // If do-while form, use the incremented value.
+ if (do_while_form_) {
+ canonical_induction_variable_ = iv_inc;
+ }
+}
+
+void LoopPeeling::GetIteratorUpdateOperations(
+ const ir::Loop* loop, ir::Instruction* iterator,
+ std::unordered_set<ir::Instruction*>* operations) {
+ opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
+ operations->insert(iterator);
+ iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
+ ir::Instruction* insn = def_use_mgr->GetDef(*id);
+ if (insn->opcode() == SpvOpLabel) {
+ return;
+ }
+ if (operations->count(insn)) {
+ return;
+ }
+ if (!loop->IsInsideLoop(insn)) {
+ return;
+ }
+ GetIteratorUpdateOperations(loop, insn, operations);
+ });
+}
+
+// Gather the set of blocks for all the path from |entry| to |root|.
+static void GetBlocksInPath(uint32_t block, uint32_t entry,
+ std::unordered_set<uint32_t>* blocks_in_path,
+ const ir::CFG& cfg) {
+ for (uint32_t pid : cfg.preds(block)) {
+ if (blocks_in_path->insert(pid).second) {
+ if (pid != entry) {
+ GetBlocksInPath(pid, entry, blocks_in_path, cfg);
+ }
+ }
+ }
+}
+
+bool LoopPeeling::IsConditionCheckSideEffectFree() const {
+ ir::CFG& cfg = *context_->cfg();
+
+ // The "do-while" form does not cause issues, the algorithm takes into account
+ // the first iteration.
+ if (!do_while_form_) {
+ uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
+
+ std::unordered_set<uint32_t> blocks_in_path;
+
+ blocks_in_path.insert(condition_block_id);
+ GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
+ &blocks_in_path, cfg);
+
+ for (uint32_t bb_id : blocks_in_path) {
+ ir::BasicBlock* bb = cfg.block(bb_id);
+ if (!bb->WhileEachInst([this](ir::Instruction* insn) {
+ if (insn->IsBranch()) return true;
+ switch (insn->opcode()) {
+ case SpvOpLabel:
+ case SpvOpSelectionMerge:
+ case SpvOpLoopMerge:
+ return true;
+ default:
+ break;
+ }
+ return context_->IsCombinatorInstruction(insn);
+ })) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void LoopPeeling::GetIteratingExitValues() {
+ ir::CFG& cfg = *context_->cfg();
+
+ loop_->GetHeaderBlock()->ForEachPhiInst([this](ir::Instruction* phi) {
+ exit_value_[phi->result_id()] = nullptr;
+ });
+
+ if (!loop_->GetMergeBlock()) {
+ return;
+ }
+ if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
+ return;
+ }
+ opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
+
+ uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
+
+ auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
+ do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
+ condition_block_id) != header_pred.end();
+ if (do_while_form_) {
+ loop_->GetHeaderBlock()->ForEachPhiInst(
+ [condition_block_id, def_use_mgr, this](ir::Instruction* phi) {
+ std::unordered_set<ir::Instruction*> operations;
+
+ for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
+ if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
+ exit_value_[phi->result_id()] =
+ def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
+ }
+ }
+ });
+ } else {
+ DominatorTree* dom_tree =
+ &context_->GetDominatorAnalysis(loop_utils_.GetFunction(), cfg)
+ ->GetDomTree();
+ ir::BasicBlock* condition_block = cfg.block(condition_block_id);
+
+ loop_->GetHeaderBlock()->ForEachPhiInst(
+ [dom_tree, condition_block, this](ir::Instruction* phi) {
+ std::unordered_set<ir::Instruction*> operations;
+
+ // Not the back-edge value, check if the phi instruction is the only
+ // possible candidate.
+ GetIteratorUpdateOperations(loop_, phi, &operations);
+
+ for (ir::Instruction* insn : operations) {
+ if (insn == phi) {
+ continue;
+ }
+ if (dom_tree->Dominates(context_->get_instr_block(insn),
+ condition_block)) {
+ return;
+ }
+ }
+ exit_value_[phi->result_id()] = phi;
+ });
+ }
+}
+
+void LoopPeeling::FixExitCondition(
+ const std::function<uint32_t(ir::Instruction*)>& condition_builder) {
+ ir::CFG& cfg = *context_->cfg();
+
+ uint32_t condition_block_id = 0;
+ for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
+ if (GetClonedLoop()->IsInsideLoop(id)) {
+ condition_block_id = id;
+ break;
+ }
+ }
+ assert(condition_block_id != 0 && "2nd loop in improperly connected");
+
+ ir::BasicBlock* condition_block = cfg.block(condition_block_id);
+ ir::Instruction* exit_condition = condition_block->terminator();
+ assert(exit_condition->opcode() == SpvOpBranchConditional);
+ ir::BasicBlock::iterator insert_point = condition_block->tail();
+ if (condition_block->GetMergeInst()) {
+ --insert_point;
+ }
+
+ exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});
+
+ uint32_t to_continue_block_idx =
+ GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
+ ? 1
+ : 2;
+ exit_condition->SetInOperand(
+ 1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
+ exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});
+
+ // Update def/use manager.
+ context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
+}
+
+ir::BasicBlock* LoopPeeling::CreateBlockBefore(ir::BasicBlock* bb) {
+ analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
+ ir::CFG& cfg = *context_->cfg();
+ assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");
+
+ std::unique_ptr<ir::BasicBlock> new_bb = MakeUnique<ir::BasicBlock>(
+ std::unique_ptr<ir::Instruction>(new ir::Instruction(
+ context_, SpvOpLabel, 0, context_->TakeNextId(), {})));
+ new_bb->SetParent(loop_utils_.GetFunction());
+ // Update the loop descriptor.
+ ir::Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
+ if (in_loop) {
+ in_loop->AddBasicBlock(new_bb.get());
+ loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
+ }
+
+ context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
+ def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());
+
+ ir::BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
+ bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
+ if (*id == bb->id()) {
+ *id = new_bb->id();
+ }
+ });
+ cfg.RemoveEdge(bb_pred->id(), bb->id());
+ cfg.AddEdge(bb_pred->id(), new_bb->id());
+ def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
+
+ // Update the incoming branch.
+ bb->ForEachPhiInst([&new_bb, def_use_mgr](ir::Instruction* phi) {
+ phi->SetInOperand(1, {new_bb->id()});
+ def_use_mgr->AnalyzeInstUse(phi);
+ });
+ InstructionBuilder(context_, new_bb.get(),
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping)
+ .AddBranch(bb->id());
+ cfg.AddEdge(new_bb->id(), bb->id());
+
+ // Add the basic block to the function.
+ ir::Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
+ assert(it != loop_utils_.GetFunction()->end() &&
+ "Basic block not found in the function.");
+ ir::BasicBlock* ret = new_bb.get();
+ loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
+ return ret;
+}
+
+ir::BasicBlock* LoopPeeling::ProtectLoop(ir::Loop* loop,
+ ir::Instruction* condition,
+ ir::BasicBlock* if_merge) {
+ ir::BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
+ // Will no longer be a pre-header because of the if.
+ loop->SetPreHeaderBlock(nullptr);
+ // Kill the branch to the header.
+ context_->KillInst(&*if_block->tail());
+
+ InstructionBuilder builder(context_, if_block,
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping);
+ builder.AddConditionalBranch(condition->result_id(),
+ loop->GetHeaderBlock()->id(), if_merge->id(),
+ if_merge->id());
+
+ return if_block;
+}
+
+void LoopPeeling::PeelBefore(uint32_t peel_factor) {
+ assert(CanPeelLoop() && "Cannot peel loop");
+ LoopUtils::LoopCloningResult clone_results;
+
+ // Clone the loop and insert the cloned one before the loop.
+ DuplicateAndConnectLoop(&clone_results);
+
+ // Add a canonical induction variable "canonical_induction_variable_".
+ InsertCanonicalInductionVariable();
+
+ InstructionBuilder builder(context_,
+ &*cloned_loop_->GetPreHeaderBlock()->tail(),
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping);
+ ir::Instruction* factor =
+ builder.Add32BitConstantInteger(peel_factor, int_type_->IsSigned());
+
+ ir::Instruction* has_remaining_iteration = builder.AddLessThan(
+ factor->result_id(), loop_iteration_count_->result_id());
+ ir::Instruction* max_iteration = builder.AddSelect(
+ factor->type_id(), has_remaining_iteration->result_id(),
+ factor->result_id(), loop_iteration_count_->result_id());
+
+ // Change the exit condition of the cloned loop to be (exit when become
+ // false):
+ // "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
+ FixExitCondition([max_iteration, this](ir::Instruction* insert_before_point) {
+ return InstructionBuilder(context_, insert_before_point,
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping)
+ .AddLessThan(canonical_induction_variable_->result_id(),
+ max_iteration->result_id())
+ ->result_id();
+ });
+
+ // "Protect" the second loop: the second loop can only be executed if
+ // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
+ ir::BasicBlock* if_merge_block = loop_->GetMergeBlock();
+ loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
+ // Prevent the second loop from being executed if we already executed all the
+ // required iterations.
+ ir::BasicBlock* if_block =
+ ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
+ // Patch the phi of the merge block.
+ if_merge_block->ForEachPhiInst(
+ [&clone_results, if_block, this](ir::Instruction* phi) {
+ // if_merge_block had previously only 1 predecessor.
+ uint32_t incoming_value = phi->GetSingleWordInOperand(0);
+ auto def_in_loop = clone_results.value_map_.find(incoming_value);
+ if (def_in_loop != clone_results.value_map_.end())
+ incoming_value = def_in_loop->second;
+ phi->AddOperand(
+ {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
+ phi->AddOperand(
+ {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
+ context_->get_def_use_mgr()->AnalyzeInstUse(phi);
+ });
+
+ context_->InvalidateAnalysesExceptFor(
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping |
+ ir::IRContext::kAnalysisLoopAnalysis | ir::IRContext::kAnalysisCFG);
+}
+
+void LoopPeeling::PeelAfter(uint32_t peel_factor) {
+ assert(CanPeelLoop() && "Cannot peel loop");
+ LoopUtils::LoopCloningResult clone_results;
+
+ // Clone the loop and insert the cloned one before the loop.
+ DuplicateAndConnectLoop(&clone_results);
+
+ // Add a canonical induction variable "canonical_induction_variable_".
+ InsertCanonicalInductionVariable();
+
+ InstructionBuilder builder(context_,
+ &*cloned_loop_->GetPreHeaderBlock()->tail(),
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping);
+ ir::Instruction* factor =
+ builder.Add32BitConstantInteger(peel_factor, int_type_->IsSigned());
+
+ ir::Instruction* has_remaining_iteration = builder.AddLessThan(
+ factor->result_id(), loop_iteration_count_->result_id());
+
+ // Change the exit condition of the cloned loop to be (exit when become
+ // false):
+ // "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
+ FixExitCondition([factor, this](ir::Instruction* insert_before_point) {
+ InstructionBuilder cond_builder(
+ context_, insert_before_point,
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping);
+ // Build the following check: canonical_induction_variable_ + factor <
+ // iteration_count
+ return cond_builder
+ .AddLessThan(cond_builder
+ .AddIAdd(canonical_induction_variable_->type_id(),
+ canonical_induction_variable_->result_id(),
+ factor->result_id())
+ ->result_id(),
+ loop_iteration_count_->result_id())
+ ->result_id();
+ });
+
+ // "Protect" the first loop: the first loop can only be executed if
+ // factor < loop_iteration_count_.
+
+ // The original loop's pre-header was the cloned loop merge block.
+ GetClonedLoop()->SetMergeBlock(
+ CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
+ // Use the second loop preheader as if merge block.
+
+ // Prevent the first loop if only the peeled loop needs it.
+ ir::BasicBlock* if_block =
+ ProtectLoop(cloned_loop_, has_remaining_iteration,
+ GetOriginalLoop()->GetPreHeaderBlock());
+
+ // Patch the phi of the header block.
+ // We added an if to enclose the first loop and because the phi node are
+ // connected to the exit value of the first loop, the definition no longer
+ // dominate the preheader.
+ // We had to the preheader (our if merge block) the required phi instruction
+ // and patch the header phi.
+ GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
+ [&clone_results, if_block, this](ir::Instruction* phi) {
+ opt::analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
+
+ auto find_value_idx = [](ir::Instruction* phi_inst, ir::Loop* loop) {
+ uint32_t preheader_value_idx =
+ !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
+ return preheader_value_idx;
+ };
+
+ ir::Instruction* cloned_phi =
+ def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
+ uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
+ find_value_idx(cloned_phi, GetClonedLoop()));
+
+ ir::Instruction* new_phi =
+ InstructionBuilder(context_,
+ &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping)
+ .AddPhi(phi->type_id(),
+ {phi->GetSingleWordInOperand(
+ find_value_idx(phi, GetOriginalLoop())),
+ GetClonedLoop()->GetMergeBlock()->id(),
+ cloned_preheader_value, if_block->id()});
+
+ phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
+ {new_phi->result_id()});
+ def_use_mgr->AnalyzeInstUse(phi);
+ });
+
+ context_->InvalidateAnalysesExceptFor(
+ ir::IRContext::kAnalysisDefUse |
+ ir::IRContext::kAnalysisInstrToBlockMapping |
+ ir::IRContext::kAnalysisLoopAnalysis | ir::IRContext::kAnalysisCFG);
+}
+
+} // namespace opt
+} // namespace spvtools
diff --git a/source/opt/loop_peeling.h b/source/opt/loop_peeling.h
new file mode 100644
index 0000000..9b912e5
--- /dev/null
+++ b/source/opt/loop_peeling.h
@@ -0,0 +1,223 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SOURCE_OPT_LOOP_PEELING_H_
+#define SOURCE_OPT_LOOP_PEELING_H_
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "opt/ir_context.h"
+#include "opt/loop_descriptor.h"
+#include "opt/loop_utils.h"
+
+namespace spvtools {
+namespace opt {
+
+// Utility class to perform the peeling of a given loop.
+// The loop peeling transformation make a certain amount of a loop iterations to
+// be executed either before (peel before) or after (peel after) the transformed
+// loop.
+//
+// For peeling cases the transformation does the following steps:
+// - It clones the loop and inserts the cloned loop before the original loop;
+// - It connects all iterating values of the cloned loop with the
+// corresponding original loop values so that the second loop starts with
+// the appropriate values.
+// - It inserts a new induction variable "i" is inserted into the cloned that
+// starts with the value 0 and increment by step of one.
+//
+// The last step is specific to each case:
+// - Peel before: the transformation is to peel the "N" first iterations.
+// The exit condition of the cloned loop is changed so that the loop
+// exits when "i < N" becomes false. The original loop is then protected to
+// only execute if there is any iteration left to do.
+// - Peel after: the transformation is to peel the "N" last iterations,
+// then the exit condition of the cloned loop is changed so that the loop
+// exits when "i + N < max_iteration" becomes false, where "max_iteration"
+// is the upper bound of the loop. The cloned loop is then protected to
+// only execute if there is any iteration left to do no covered by the
+// second.
+//
+// To be peelable:
+// - The loop must be in LCSSA form;
+// - The loop must not contain any breaks;
+// - The loop must not have any ambiguous iterators updates (see
+// "CanPeelLoop").
+// The method "CanPeelLoop" checks that those constrained are met.
+//
+// FIXME(Victor): Allow the utility it accept an canonical induction variable
+// rather than automatically create one.
+// FIXME(Victor): When possible, evaluate the initial value of the second loop
+// iterating values rather than using the exit value of the first loop.
+// FIXME(Victor): Make the utility work-out the upper bound without having to
+// provide it. This should become easy once the scalar evolution is in.
+class LoopPeeling {
+ public:
+ // LoopPeeling constructor.
+ // |loop| is the loop to peel.
+ // |loop_iteration_count| is the instruction holding the |loop| iteration
+ // count, must be invariant for |loop| and must be of an int 32 type (signed
+ // or unsigned).
+ LoopPeeling(ir::IRContext* context, ir::Loop* loop,
+ ir::Instruction* loop_iteration_count)
+ : context_(context),
+ loop_utils_(context, loop),
+ loop_(loop),
+ loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
+ ? loop_iteration_count
+ : nullptr),
+ int_type_(nullptr),
+ canonical_induction_variable_(nullptr) {
+ if (loop_iteration_count_) {
+ int_type_ = context_->get_type_mgr()
+ ->GetType(loop_iteration_count_->type_id())
+ ->AsInteger();
+ }
+ GetIteratingExitValues();
+ }
+
+ // Returns true if the loop can be peeled.
+ // To be peelable, all operation involved in the update of the loop iterators
+ // must not dominates the exit condition. This restriction is a work around to
+ // not miss compile code like:
+ //
+ // for (int i = 0; i + 1 < N; i++) {}
+ // for (int i = 0; ++i < N; i++) {}
+ //
+ // The increment will happen before the test on the exit condition leading to
+ // very look-a-like code.
+ //
+ // This restriction will not apply if a loop rotate is applied before (i.e.
+ // becomes a do-while loop).
+ bool CanPeelLoop() const {
+ ir::CFG& cfg = *context_->cfg();
+
+ if (!loop_iteration_count_) {
+ return false;
+ }
+ if (!int_type_) {
+ return false;
+ }
+ if (int_type_->width() != 32) {
+ return false;
+ }
+ if (!loop_->IsLCSSA()) {
+ return false;
+ }
+ if (!loop_->GetMergeBlock()) {
+ return false;
+ }
+ if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
+ return false;
+ }
+ if (!IsConditionCheckSideEffectFree()) {
+ return false;
+ }
+
+ return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
+ [](std::pair<uint32_t, ir::Instruction*> it) {
+ return it.second == nullptr;
+ });
+ }
+
+ // Moves the execution of the |factor| first iterations of the loop into a
+ // dedicated loop.
+ void PeelBefore(uint32_t factor);
+
+ // Moves the execution of the |factor| last iterations of the loop into a
+ // dedicated loop.
+ void PeelAfter(uint32_t factor);
+
+ // Returns the cloned loop.
+ ir::Loop* GetClonedLoop() { return cloned_loop_; }
+ // Returns the original loop.
+ ir::Loop* GetOriginalLoop() { return loop_; }
+
+ private:
+ ir::IRContext* context_;
+ LoopUtils loop_utils_;
+ // The original loop.
+ ir::Loop* loop_;
+ // The initial |loop_| upper bound.
+ ir::Instruction* loop_iteration_count_;
+ // The int type to use for the canonical_induction_variable_.
+ analysis::Integer* int_type_;
+ // The cloned loop.
+ ir::Loop* cloned_loop_;
+ // This is set to true when the exit and back-edge branch instruction is the
+ // same.
+ bool do_while_form_;
+
+ // The canonical induction variable of the cloned loop. The induction variable
+ // is initialized to 0 and incremented by step of 1.
+ ir::Instruction* canonical_induction_variable_;
+
+ // Map between loop iterators and exit values. Loop iterators
+ std::unordered_map<uint32_t, ir::Instruction*> exit_value_;
+
+ // Duplicate |loop_| and place the new loop before the cloned loop. Iterating
+ // values from the cloned loop are then connected to the original loop as
+ // initializer.
+ void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
+
+ // Insert the canonical induction variable into the first loop as a simplified
+ // counter.
+ void InsertCanonicalInductionVariable();
+
+ // Fixes the exit condition of the before loop. The function calls
+ // |condition_builder| to get the condition to use in the conditional branch
+ // of the loop exit. The loop will be exited if the condition evaluate to
+ // true. |condition_builder| takes an ir::Instruction* that represent the
+ // insertion point.
+ void FixExitCondition(
+ const std::function<uint32_t(ir::Instruction*)>& condition_builder);
+
+ // Gathers all operations involved in the update of |iterator| into
+ // |operations|.
+ void GetIteratorUpdateOperations(
+ const ir::Loop* loop, ir::Instruction* iterator,
+ std::unordered_set<ir::Instruction*>* operations);
+
+ // Gathers exiting iterator values. The function builds a map between each
+ // iterating value in the loop (a phi instruction in the loop header) and its
+ // SSA value when it exit the loop. If no exit value can be accurately found,
+ // it is map to nullptr (see comment on CanPeelLoop).
+ void GetIteratingExitValues();
+
+ // Returns true if a for-loop has no instruction with effects before the
+ // condition check.
+ bool IsConditionCheckSideEffectFree() const;
+
+ // Creates a new basic block and insert it between |bb| and the predecessor of
+ // |bb|.
+ ir::BasicBlock* CreateBlockBefore(ir::BasicBlock* bb);
+
+ // Inserts code to only execute |loop| only if the given |condition| is true.
+ // |if_merge| is a suitable basic block to be used by the if condition as
+ // merge block.
+ // The function returns the if block protecting the loop.
+ ir::BasicBlock* ProtectLoop(ir::Loop* loop, ir::Instruction* condition,
+ ir::BasicBlock* if_merge);
+};
+
+} // namespace opt
+} // namespace spvtools
+
+#endif // SOURCE_OPT_LOOP_PEELING_H_
diff --git a/source/opt/loop_unswitch_pass.cpp b/source/opt/loop_unswitch_pass.cpp
index 53f6299..964e765 100644
--- a/source/opt/loop_unswitch_pass.cpp
+++ b/source/opt/loop_unswitch_pass.cpp
@@ -88,9 +88,7 @@
// Return the iterator to the basic block |bb|.
ir::Function::iterator FindBasicBlockPosition(ir::BasicBlock* bb_to_find) {
- ir::Function::iterator it = std::find_if(
- function_->begin(), function_->end(),
- [bb_to_find](const ir::BasicBlock& bb) { return bb_to_find == &bb; });
+ ir::Function::iterator it = function_->FindBlock(bb_to_find->id());
assert(it != function_->end() && "Basic Block not found");
return it;
}
diff --git a/source/opt/loop_utils.cpp b/source/opt/loop_utils.cpp
index 8532679..3c99b6a 100644
--- a/source/opt/loop_utils.cpp
+++ b/source/opt/loop_utils.cpp
@@ -582,13 +582,20 @@
new_loop->SetLatchBlock(
cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id()));
if (old_loop->GetMergeBlock()) {
- ir::BasicBlock* bb =
- cloning_result.old_to_new_bb_.at(old_loop->GetMergeBlock()->id());
+ auto it =
+ cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id());
+ ir::BasicBlock* bb = it != cloning_result.old_to_new_bb_.end()
+ ? it->second
+ : old_loop->GetMergeBlock();
new_loop->SetMergeBlock(bb);
}
- if (old_loop->GetPreHeaderBlock())
- new_loop->SetPreHeaderBlock(
- cloning_result.old_to_new_bb_.at(old_loop->GetPreHeaderBlock()->id()));
+ if (old_loop->GetPreHeaderBlock()) {
+ auto it =
+ cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id());
+ if (it != cloning_result.old_to_new_bb_.end()) {
+ new_loop->SetPreHeaderBlock(it->second);
+ }
+ }
}
} // namespace opt
diff --git a/source/opt/loop_utils.h b/source/opt/loop_utils.h
index 0e77bb6..a50be33 100644
--- a/source/opt/loop_utils.h
+++ b/source/opt/loop_utils.h
@@ -87,11 +87,14 @@
// Clone |loop_| and remap its instructions. Newly created blocks
// will be added to the |cloning_result.cloned_bb_| list, correctly ordered to
- // be inserted into a function. If the loop is structured, the merge construct
- // will also be cloned. The function preserves the def/use, cfg and instr to
- // block analyses.
+ // be inserted into a function.
+ // It is assumed that |ordered_loop_blocks| is compatible with the result of
+ // |Loop::ComputeLoopStructuredOrder|. If the preheader and merge block are in
+ // the list they will also be cloned. If not, the resulting loop will share
+ // them with the original loop.
+ // The function preserves the def/use, cfg and instr to block analyses.
// The cloned loop nest will be added to the loop descriptor and will have
- // owner ship.
+ // ownership.
ir::Loop* CloneLoop(
LoopCloningResult* cloning_result,
const std::vector<ir::BasicBlock*>& ordered_loop_blocks) const;
@@ -125,6 +128,15 @@
// called, otherwise the analysis should be invalidated.
void Finalize();
+ // Returns the context associate to |loop_|.
+ ir::IRContext* GetContext() { return context_; }
+ // Returns the loop descriptor owning |loop_|.
+ ir::LoopDescriptor* GetLoopDescriptor() { return loop_desc_; }
+ // Returns the loop on which the object operates on.
+ ir::Loop* GetLoop() const { return loop_; }
+ // Returns the function that |loop_| belong to.
+ ir::Function* GetFunction() const { return &function_; }
+
private:
ir::IRContext* context_;
ir::LoopDescriptor* loop_desc_;
diff --git a/test/opt/loop_optimizations/CMakeLists.txt b/test/opt/loop_optimizations/CMakeLists.txt
index 3a1144d..5305481 100644
--- a/test/opt/loop_optimizations/CMakeLists.txt
+++ b/test/opt/loop_optimizations/CMakeLists.txt
@@ -84,3 +84,9 @@
unswitch.cpp
LIBS SPIRV-Tools-opt
)
+
+add_spvtools_unittest(TARGET peeling_test
+ SRCS ../function_utils.h
+ peeling.cpp
+ LIBS SPIRV-Tools-opt
+)
diff --git a/test/opt/loop_optimizations/peeling.cpp b/test/opt/loop_optimizations/peeling.cpp
new file mode 100644
index 0000000..a88e8dd
--- /dev/null
+++ b/test/opt/loop_optimizations/peeling.cpp
@@ -0,0 +1,1075 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gmock/gmock.h>
+
+#ifdef SPIRV_EFFCEE
+#include "effcee/effcee.h"
+#endif
+
+#include "../pass_fixture.h"
+#include "opt/ir_builder.h"
+#include "opt/loop_descriptor.h"
+#include "opt/loop_peeling.h"
+
+namespace {
+
+using namespace spvtools;
+
+using PeelingTest = PassTest<::testing::Test>;
+
+bool Validate(const std::vector<uint32_t>& bin) {
+ spv_target_env target_env = SPV_ENV_UNIVERSAL_1_2;
+ spv_context spvContext = spvContextCreate(target_env);
+ spv_diagnostic diagnostic = nullptr;
+ spv_const_binary_t binary = {bin.data(), bin.size()};
+ spv_result_t error = spvValidate(spvContext, &binary, &diagnostic);
+ if (error != 0) spvDiagnosticPrint(diagnostic);
+ spvDiagnosticDestroy(diagnostic);
+ spvContextDestroy(spvContext);
+ return error == 0;
+}
+
+void Match(const std::string& checks, ir::IRContext* context) {
+ std::vector<uint32_t> bin;
+ context->module()->ToBinary(&bin, true);
+ EXPECT_TRUE(Validate(bin));
+#ifdef SPIRV_EFFCEE
+ std::string assembly;
+ SpirvTools tools(SPV_ENV_UNIVERSAL_1_2);
+ EXPECT_TRUE(
+ tools.Disassemble(bin, &assembly, SPV_BINARY_TO_TEXT_OPTION_NO_HEADER))
+ << "Disassembling failed for shader:\n"
+ << assembly << std::endl;
+ auto match_result = effcee::Match(assembly, checks);
+ EXPECT_EQ(effcee::Result::Status::Ok, match_result.status())
+ << match_result.message() << "\nChecking result:\n"
+ << assembly;
+#endif // ! SPIRV_EFFCEE
+}
+
+/*
+Generated from the following GLSL + --eliminate-local-multi-store
+
+First test:
+#version 330 core
+void main() {
+ for(int i = 0; i < 10; ++i) {
+ if (i < 4)
+ break;
+ }
+}
+
+Second test (with a common sub-expression elimination):
+#version 330 core
+void main() {
+ for(int i = 0; i + 1 < 10; ++i) {
+ }
+}
+
+Third test:
+#version 330 core
+void main() {
+ int a[10];
+ for (int i = 0; a[i] != 0; i++) {}
+}
+
+Forth test:
+#version 330 core
+void main() {
+ for (long i = 0; i < 10; i++) {}
+}
+
+Fifth test:
+#version 330 core
+void main() {
+ for (float i = 0; i < 10; i++) {}
+}
+
+Sixth test:
+#version 450
+layout(location = 0)out float o;
+void main() {
+ o = 0.0;
+ for( int i = 0; true; i++ ) {
+ o += 1.0;
+ if (i > 10) break;
+ }
+}
+*/
+TEST_F(PeelingTest, CannotPeel) {
+ // Build the given SPIR-V program in |text|, take the first loop in the first
+ // function and test that it is not peelable. |loop_count_id| is the id
+ // representing the loop count, if equals to 0, then the function build a 10
+ // constant as loop count.
+ auto test_cannot_peel = [](const std::string& text, uint32_t loop_count_id) {
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ ir::Instruction* loop_count = nullptr;
+ if (loop_count_id) {
+ loop_count = context->get_def_use_mgr()->GetDef(loop_count_id);
+ } else {
+ opt::InstructionBuilder builder(context.get(), &*f.begin());
+ // Exit condition.
+ loop_count = builder.Add32BitSignedIntegerConstant(10);
+ }
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count);
+ EXPECT_FALSE(peel.CanPeelLoop());
+ };
+ {
+ SCOPED_TRACE("loop with break");
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main"
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 330
+ OpName %main "main"
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+ %int_0 = OpConstant %int 0
+ %int_10 = OpConstant %int 10
+ %bool = OpTypeBool
+ %int_4 = OpConstant %int 4
+ %int_1 = OpConstant %int 1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ OpBranch %10
+ %10 = OpLabel
+ %28 = OpPhi %int %int_0 %5 %27 %13
+ OpLoopMerge %12 %13 None
+ OpBranch %14
+ %14 = OpLabel
+ %18 = OpSLessThan %bool %28 %int_10
+ OpBranchConditional %18 %11 %12
+ %11 = OpLabel
+ %21 = OpSLessThan %bool %28 %int_4
+ OpSelectionMerge %23 None
+ OpBranchConditional %21 %22 %23
+ %22 = OpLabel
+ OpBranch %12
+ %23 = OpLabel
+ OpBranch %13
+ %13 = OpLabel
+ %27 = OpIAdd %int %28 %int_1
+ OpBranch %10
+ %12 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+ test_cannot_peel(text, 0);
+ }
+
+ {
+ SCOPED_TRACE("Ambiguous iterator update");
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main"
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 330
+ OpName %main "main"
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+ %int_0 = OpConstant %int 0
+ %int_1 = OpConstant %int 1
+ %int_10 = OpConstant %int 10
+ %bool = OpTypeBool
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ OpBranch %10
+ %10 = OpLabel
+ %23 = OpPhi %int %int_0 %5 %17 %13
+ OpLoopMerge %12 %13 None
+ OpBranch %14
+ %14 = OpLabel
+ %17 = OpIAdd %int %23 %int_1
+ %20 = OpSLessThan %bool %17 %int_10
+ OpBranchConditional %20 %11 %12
+ %11 = OpLabel
+ OpBranch %13
+ %13 = OpLabel
+ OpBranch %10
+ %12 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ test_cannot_peel(text, 0);
+ }
+
+ {
+ SCOPED_TRACE("No loop static bounds");
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main"
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 330
+ OpName %main "main"
+ OpName %i "i"
+ OpName %a "a"
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+ %int_0 = OpConstant %int 0
+ %uint = OpTypeInt 32 0
+ %uint_10 = OpConstant %uint 10
+%_arr_int_uint_10 = OpTypeArray %int %uint_10
+%_ptr_Function__arr_int_uint_10 = OpTypePointer Function %_arr_int_uint_10
+ %bool = OpTypeBool
+ %int_1 = OpConstant %int 1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %i = OpVariable %_ptr_Function_int Function
+ %a = OpVariable %_ptr_Function__arr_int_uint_10 Function
+ OpStore %i %int_0
+ OpBranch %10
+ %10 = OpLabel
+ %28 = OpPhi %int %int_0 %5 %27 %13
+ OpLoopMerge %12 %13 None
+ OpBranch %14
+ %14 = OpLabel
+ %21 = OpAccessChain %_ptr_Function_int %a %28
+ %22 = OpLoad %int %21
+ %24 = OpINotEqual %bool %22 %int_0
+ OpBranchConditional %24 %11 %12
+ %11 = OpLabel
+ OpBranch %13
+ %13 = OpLabel
+ %27 = OpIAdd %int %28 %int_1
+ OpStore %i %27
+ OpBranch %10
+ %12 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ test_cannot_peel(text, 22);
+ }
+ {
+ SCOPED_TRACE("Int 64 type for conditions");
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginLowerLeft
+ OpSource GLSL 330
+ OpName %2 "main"
+ OpName %4 "i"
+ %6 = OpTypeVoid
+ %3 = OpTypeFunction %6
+ %7 = OpTypeInt 64 1
+ %8 = OpTypePointer Function %7
+ %9 = OpConstant %7 0
+ %15 = OpConstant %7 10
+ %16 = OpTypeBool
+ %17 = OpConstant %7 1
+ %2 = OpFunction %6 None %3
+ %5 = OpLabel
+ %4 = OpVariable %8 Function
+ OpStore %4 %9
+ OpBranch %10
+ %10 = OpLabel
+ %22 = OpPhi %7 %9 %5 %21 %13
+ OpLoopMerge %12 %13 None
+ OpBranch %14
+ %14 = OpLabel
+ %18 = OpSLessThan %16 %22 %15
+ OpBranchConditional %18 %11 %12
+ %11 = OpLabel
+ OpBranch %13
+ %13 = OpLabel
+ %21 = OpIAdd %7 %22 %17
+ OpStore %4 %21
+ OpBranch %10
+ %12 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+ // %15 is a constant for a 64 int. Currently rejected.
+ test_cannot_peel(text, 15);
+ }
+ {
+ SCOPED_TRACE("Float type for conditions");
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginLowerLeft
+ OpSource GLSL 330
+ OpName %2 "main"
+ OpName %4 "i"
+ %6 = OpTypeVoid
+ %3 = OpTypeFunction %6
+ %7 = OpTypeFloat 32
+ %8 = OpTypePointer Function %7
+ %9 = OpConstant %7 0
+ %15 = OpConstant %7 10
+ %16 = OpTypeBool
+ %17 = OpConstant %7 1
+ %2 = OpFunction %6 None %3
+ %5 = OpLabel
+ %4 = OpVariable %8 Function
+ OpStore %4 %9
+ OpBranch %10
+ %10 = OpLabel
+ %22 = OpPhi %7 %9 %5 %21 %13
+ OpLoopMerge %12 %13 None
+ OpBranch %14
+ %14 = OpLabel
+ %18 = OpFOrdLessThan %16 %22 %15
+ OpBranchConditional %18 %11 %12
+ %11 = OpLabel
+ OpBranch %13
+ %13 = OpLabel
+ %21 = OpFAdd %7 %22 %17
+ OpStore %4 %21
+ OpBranch %10
+ %12 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+ // %15 is a constant for a float. Currently rejected.
+ test_cannot_peel(text, 15);
+ }
+ {
+ SCOPED_TRACE("Side effect before exit");
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %o
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %o "o"
+ OpName %i "i"
+ OpDecorate %o Location 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+%_ptr_Output_float = OpTypePointer Output %float
+ %o = OpVariable %_ptr_Output_float Output
+ %float_0 = OpConstant %float 0
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+ %int_0 = OpConstant %int 0
+ %bool = OpTypeBool
+ %true = OpConstantTrue %bool
+ %float_1 = OpConstant %float 1
+ %int_10 = OpConstant %int 10
+ %int_1 = OpConstant %int 1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %i = OpVariable %_ptr_Function_int Function
+ OpStore %o %float_0
+ OpStore %i %int_0
+ OpBranch %14
+ %14 = OpLabel
+ %33 = OpPhi %int %int_0 %5 %32 %17
+ OpLoopMerge %16 %17 None
+ OpBranch %15
+ %15 = OpLabel
+ %22 = OpLoad %float %o
+ %23 = OpFAdd %float %22 %float_1
+ OpStore %o %23
+ %26 = OpSGreaterThan %bool %33 %int_10
+ OpSelectionMerge %28 None
+ OpBranchConditional %26 %27 %28
+ %27 = OpLabel
+ OpBranch %16
+ %28 = OpLabel
+ OpBranch %17
+ %17 = OpLabel
+ %32 = OpIAdd %int %33 %int_1
+ OpStore %i %32
+ OpBranch %14
+ %16 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+ test_cannot_peel(text, 0);
+ }
+}
+
+/*
+Generated from the following GLSL + --eliminate-local-multi-store
+
+#version 330 core
+void main() {
+ int i = 0;
+ for (; i < 10; i++) {}
+}
+*/
+TEST_F(PeelingTest, SimplePeeling) {
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main"
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 330
+ OpName %main "main"
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+ %int_0 = OpConstant %int 0
+ %int_10 = OpConstant %int 10
+ %bool = OpTypeBool
+ %int_1 = OpConstant %int 1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ OpBranch %10
+ %10 = OpLabel
+ %22 = OpPhi %int %int_0 %5 %21 %13
+ OpLoopMerge %12 %13 None
+ OpBranch %14
+ %14 = OpLabel
+ %18 = OpSLessThan %bool %22 %int_10
+ OpBranchConditional %18 %11 %12
+ %11 = OpLabel
+ OpBranch %13
+ %13 = OpLabel
+ %21 = OpIAdd %int %22 %int_1
+ OpBranch %10
+ %12 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ // Peel before.
+ {
+ SCOPED_TRACE("Peel before");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ opt::InstructionBuilder builder(context.get(), &*f.begin());
+ // Exit condition.
+ ir::Instruction* ten_cst = builder.Add32BitSignedIntegerConstant(10);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelBefore(2);
+
+ const std::string check = R"(
+CHECK: [[CST_TEN:%\w+]] = OpConstant {{%\w+}} 10
+CHECK: [[CST_TWO:%\w+]] = OpConstant {{%\w+}} 2
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} [[CST_TWO]] [[CST_TEN]]
+CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]] [[CST_TWO]] [[CST_TEN]]
+CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None
+CHECK: [[COND_BLOCK:%\w+]] = OpLabel
+CHECK-NEXT: OpSLessThan
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[AFTER_LOOP_PREHEADER]]
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]]
+CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranch [[BEFORE_LOOP]]
+
+CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[i]] [[AFTER_LOOP_PREHEADER]]
+CHECK-NEXT: OpLoopMerge
+)";
+
+ Match(check, context.get());
+ }
+
+ // Peel after.
+ {
+ SCOPED_TRACE("Peel after");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ opt::InstructionBuilder builder(context.get(), &*f.begin());
+ // Exit condition.
+ ir::Instruction* ten_cst = builder.Add32BitSignedIntegerConstant(10);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelAfter(2);
+
+ const std::string check = R"(
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}}
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]]
+CHECK: [[BEFORE_LOOP]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None
+CHECK: [[COND_BLOCK:%\w+]] = OpLabel
+CHECK-NEXT: OpSLessThan
+CHECK-NEXT: [[TMP:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT]] {{%\w+}}
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[TMP]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[BEFORE_LOOP_MERGE]]
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]]
+CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranch [[BEFORE_LOOP]]
+
+CHECK: [[IF_MERGE]] = OpLabel
+CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I]] [[BEFORE_LOOP_MERGE]]
+CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]]
+CHECK-NEXT: OpLoopMerge
+
+)";
+
+ Match(check, context.get());
+ }
+}
+
+/*
+Generated from the following GLSL + --eliminate-local-multi-store
+
+#version 330 core
+void main() {
+ int a[10];
+ int n = a[0];
+ for(int i = 0; i < n; ++i) {}
+}
+*/
+TEST_F(PeelingTest, PeelingUncountable) {
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main"
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 330
+ OpName %main "main"
+ OpName %a "a"
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+ %uint = OpTypeInt 32 0
+ %uint_10 = OpConstant %uint 10
+%_arr_int_uint_10 = OpTypeArray %int %uint_10
+%_ptr_Function__arr_int_uint_10 = OpTypePointer Function %_arr_int_uint_10
+ %int_0 = OpConstant %int 0
+ %bool = OpTypeBool
+ %int_1 = OpConstant %int 1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %a = OpVariable %_ptr_Function__arr_int_uint_10 Function
+ %15 = OpAccessChain %_ptr_Function_int %a %int_0
+ %16 = OpLoad %int %15
+ OpBranch %18
+ %18 = OpLabel
+ %30 = OpPhi %int %int_0 %5 %29 %21
+ OpLoopMerge %20 %21 None
+ OpBranch %22
+ %22 = OpLabel
+ %26 = OpSLessThan %bool %30 %16
+ OpBranchConditional %26 %19 %20
+ %19 = OpLabel
+ OpBranch %21
+ %21 = OpLabel
+ %29 = OpIAdd %int %30 %int_1
+ OpBranch %18
+ %20 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ // Peel before.
+ {
+ SCOPED_TRACE("Peel before");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(16);
+ EXPECT_EQ(loop_count->opcode(), SpvOpLoad);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelBefore(1);
+
+ const std::string check = R"(
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[LOOP_COUNT:%\w+]] = OpLoad
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} {{%\w+}} [[LOOP_COUNT]]
+CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]] {{%\w+}} [[LOOP_COUNT]]
+CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None
+CHECK: [[COND_BLOCK:%\w+]] = OpLabel
+CHECK-NEXT: OpSLessThan
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[AFTER_LOOP_PREHEADER]]
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]]
+CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranch [[BEFORE_LOOP]]
+
+CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[i]] [[AFTER_LOOP_PREHEADER]]
+CHECK-NEXT: OpLoopMerge
+)";
+
+ Match(check, context.get());
+ }
+
+ // Peel after.
+ {
+ SCOPED_TRACE("Peel after");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(16);
+ EXPECT_EQ(loop_count->opcode(), SpvOpLoad);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelAfter(1);
+
+ const std::string check = R"(
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}}
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]]
+CHECK: [[BEFORE_LOOP]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None
+CHECK: [[COND_BLOCK:%\w+]] = OpLabel
+CHECK-NEXT: OpSLessThan
+CHECK-NEXT: [[TMP:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT]] {{%\w+}}
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[TMP]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[BEFORE_LOOP_MERGE]]
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]]
+CHECK-NEXT: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranch [[BEFORE_LOOP]]
+
+CHECK: [[IF_MERGE]] = OpLabel
+CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I]] [[BEFORE_LOOP_MERGE]]
+CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]]
+CHECK-NEXT: OpLoopMerge
+
+)";
+
+ Match(check, context.get());
+ }
+}
+
+/*
+Generated from the following GLSL + --eliminate-local-multi-store
+
+#version 330 core
+void main() {
+ int i = 0;
+ do {
+ i++;
+ } while (i < 10);
+}
+*/
+TEST_F(PeelingTest, DoWhilePeeling) {
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main"
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 330
+ OpName %main "main"
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+ %int_0 = OpConstant %int 0
+ %int_1 = OpConstant %int 1
+ %int_10 = OpConstant %int 10
+ %bool = OpTypeBool
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ OpBranch %10
+ %10 = OpLabel
+ %21 = OpPhi %int %int_0 %5 %16 %13
+ OpLoopMerge %12 %13 None
+ OpBranch %11
+ %11 = OpLabel
+ %16 = OpIAdd %int %21 %int_1
+ OpBranch %13
+ %13 = OpLabel
+ %20 = OpSLessThan %bool %16 %int_10
+ OpBranchConditional %20 %10 %12
+ %12 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ // Peel before.
+ {
+ SCOPED_TRACE("Peel before");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+ opt::InstructionBuilder builder(context.get(), &*f.begin());
+ // Exit condition.
+ ir::Instruction* ten_cst = builder.Add32BitUnsignedIntegerConstant(10);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelBefore(2);
+
+ const std::string check = R"(
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpULessThan {{%\w+}}
+CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]]
+CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]]
+CHECK: [[BE]] = OpLabel
+CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpULessThan {{%\w+}} [[DUMMY_IT_1]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] [[BEFORE_LOOP]] [[AFTER_LOOP_PREHEADER]]
+
+CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[I_1]] [[AFTER_LOOP_PREHEADER]]
+CHECK-NEXT: OpLoopMerge
+)";
+
+ Match(check, context.get());
+ }
+
+ // Peel after.
+ {
+ SCOPED_TRACE("Peel after");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ opt::InstructionBuilder builder(context.get(), &*f.begin());
+ // Exit condition.
+ ir::Instruction* ten_cst = builder.Add32BitUnsignedIntegerConstant(10);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), ten_cst);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelAfter(2);
+
+ const std::string check = R"(
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpULessThan {{%\w+}}
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]]
+CHECK: [[BEFORE_LOOP]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]]
+CHECK: [[BE]] = OpLabel
+CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: [[EXIT_VAL:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT_1]]
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpULessThan {{%\w+}} [[EXIT_VAL]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] [[BEFORE_LOOP]] [[BEFORE_LOOP_MERGE]]
+
+CHECK: [[IF_MERGE]] = OpLabel
+CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I_1]] [[BEFORE_LOOP_MERGE]]
+CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]]
+CHECK-NEXT: OpLoopMerge
+)";
+
+ Match(check, context.get());
+ }
+}
+
+/*
+Generated from the following GLSL + --eliminate-local-multi-store
+
+#version 330 core
+void main() {
+ int a[10];
+ int n = a[0];
+ for(int i = 0; i < n; ++i) {}
+}
+*/
+TEST_F(PeelingTest, PeelingLoopWithStore) {
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %o %n
+ OpExecutionMode %main OriginLowerLeft
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %o "o"
+ OpName %end "end"
+ OpName %n "n"
+ OpName %i "i"
+ OpDecorate %o Location 0
+ OpDecorate %n Flat
+ OpDecorate %n Location 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+%_ptr_Output_float = OpTypePointer Output %float
+ %o = OpVariable %_ptr_Output_float Output
+ %float_0 = OpConstant %float 0
+ %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Input_int = OpTypePointer Input %int
+ %n = OpVariable %_ptr_Input_int Input
+ %int_0 = OpConstant %int 0
+ %bool = OpTypeBool
+ %float_1 = OpConstant %float 1
+ %int_1 = OpConstant %int 1
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %end = OpVariable %_ptr_Function_int Function
+ %i = OpVariable %_ptr_Function_int Function
+ OpStore %o %float_0
+ %15 = OpLoad %int %n
+ OpStore %end %15
+ OpStore %i %int_0
+ OpBranch %18
+ %18 = OpLabel
+ %33 = OpPhi %int %int_0 %5 %32 %21
+ OpLoopMerge %20 %21 None
+ OpBranch %22
+ %22 = OpLabel
+ %26 = OpSLessThan %bool %33 %15
+ OpBranchConditional %26 %19 %20
+ %19 = OpLabel
+ %28 = OpLoad %float %o
+ %29 = OpFAdd %float %28 %float_1
+ OpStore %o %29
+ OpBranch %21
+ %21 = OpLabel
+ %32 = OpIAdd %int %33 %int_1
+ OpStore %i %32
+ OpBranch %18
+ %20 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ // Peel before.
+ {
+ SCOPED_TRACE("Peel before");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(15);
+ EXPECT_EQ(loop_count->opcode(), SpvOpLoad);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelBefore(1);
+
+ const std::string check = R"(
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[LOOP_COUNT:%\w+]] = OpLoad
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}} {{%\w+}} [[LOOP_COUNT]]
+CHECK-NEXT: [[LOOP_COUNT:%\w+]] = OpSelect {{%\w+}} [[MIN_LOOP_COUNT]] {{%\w+}} [[LOOP_COUNT]]
+CHECK: [[BEFORE_LOOP:%\w+]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[i:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[AFTER_LOOP_PREHEADER:%\w+]] [[BE]] None
+CHECK: [[COND_BLOCK:%\w+]] = OpLabel
+CHECK-NEXT: OpSLessThan
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[AFTER_LOOP_PREHEADER]]
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[i]]
+CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranch [[BEFORE_LOOP]]
+
+CHECK: [[AFTER_LOOP_PREHEADER]] = OpLabel
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[AFTER_LOOP:%\w+]] [[IF_MERGE]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[i]] [[AFTER_LOOP_PREHEADER]]
+CHECK-NEXT: OpLoopMerge
+)";
+
+ Match(check, context.get());
+ }
+
+ // Peel after.
+ {
+ SCOPED_TRACE("Peel after");
+
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << text << std::endl;
+ ir::Function& f = *module->begin();
+ ir::LoopDescriptor& ld = *context->GetLoopDescriptor(&f);
+
+ EXPECT_EQ(ld.NumLoops(), 1u);
+
+ ir::Instruction* loop_count = context->get_def_use_mgr()->GetDef(15);
+ EXPECT_EQ(loop_count->opcode(), SpvOpLoad);
+
+ opt::LoopPeeling peel(context.get(), &*ld.begin(), loop_count);
+ EXPECT_TRUE(peel.CanPeelLoop());
+ peel.PeelAfter(1);
+
+ const std::string check = R"(
+CHECK: OpFunction
+CHECK-NEXT: [[ENTRY:%\w+]] = OpLabel
+CHECK: [[MIN_LOOP_COUNT:%\w+]] = OpSLessThan {{%\w+}}
+CHECK-NEXT: OpSelectionMerge [[IF_MERGE:%\w+]]
+CHECK-NEXT: OpBranchConditional [[MIN_LOOP_COUNT]] [[BEFORE_LOOP:%\w+]] [[IF_MERGE]]
+CHECK: [[BEFORE_LOOP]] = OpLabel
+CHECK-NEXT: [[DUMMY_IT:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[DUMMY_IT_1:%\w+]] [[BE:%\w+]]
+CHECK-NEXT: [[I:%\w+]] = OpPhi {{%\w+}} {{%\w+}} [[ENTRY]] [[I_1:%\w+]] [[BE]]
+CHECK-NEXT: OpLoopMerge [[BEFORE_LOOP_MERGE:%\w+]] [[BE]] None
+CHECK: [[COND_BLOCK:%\w+]] = OpLabel
+CHECK-NEXT: OpSLessThan
+CHECK-NEXT: [[TMP:%\w+]] = OpIAdd {{%\w+}} [[DUMMY_IT]] {{%\w+}}
+CHECK-NEXT: [[EXIT_COND:%\w+]] = OpSLessThan {{%\w+}} [[TMP]]
+CHECK-NEXT: OpBranchConditional [[EXIT_COND]] {{%\w+}} [[BEFORE_LOOP_MERGE]]
+CHECK: [[I_1]] = OpIAdd {{%\w+}} [[I]]
+CHECK: [[DUMMY_IT_1]] = OpIAdd {{%\w+}} [[DUMMY_IT]]
+CHECK-NEXT: OpBranch [[BEFORE_LOOP]]
+
+CHECK: [[IF_MERGE]] = OpLabel
+CHECK-NEXT: [[TMP:%\w+]] = OpPhi {{%\w+}} [[I]] [[BEFORE_LOOP_MERGE]]
+CHECK-NEXT: OpBranch [[AFTER_LOOP:%\w+]]
+
+CHECK: [[AFTER_LOOP]] = OpLabel
+CHECK-NEXT: OpPhi {{%\w+}} {{%\w+}} {{%\w+}} [[TMP]] [[IF_MERGE]]
+CHECK-NEXT: OpLoopMerge
+
+)";
+
+ Match(check, context.get());
+ }
+}
+
+} // namespace