[spirv-opt] refactor inlining pass (#3328)


- No longer inline functions with early exits. Merge return can modify them so they can be inlined.
- Otherwise no functional change, should be just refactoring.
diff --git a/source/opt/inline_pass.cpp b/source/opt/inline_pass.cpp
index 3c874a7..bc07ff0 100644
--- a/source/opt/inline_pass.cpp
+++ b/source/opt/inline_pass.cpp
@@ -20,6 +20,7 @@
 #include <utility>
 
 #include "source/cfa.h"
+#include "source/opt/reflect.h"
 #include "source/util/make_unique.h"
 
 // Indices of operands in SPIR-V instructions
@@ -232,6 +233,220 @@
   });
 }
 
+void InlinePass::MoveInstsBeforeEntryBlock(
+    std::unordered_map<uint32_t, Instruction*>* preCallSB,
+    BasicBlock* new_blk_ptr, BasicBlock::iterator call_inst_itr,
+    UptrVectorIterator<BasicBlock> call_block_itr) {
+  for (auto cii = call_block_itr->begin(); cii != call_inst_itr;
+       cii = call_block_itr->begin()) {
+    Instruction* inst = &*cii;
+    inst->RemoveFromList();
+    std::unique_ptr<Instruction> cp_inst(inst);
+    // Remember same-block ops for possible regeneration.
+    if (IsSameBlockOp(&*cp_inst)) {
+      auto* sb_inst_ptr = cp_inst.get();
+      (*preCallSB)[cp_inst->result_id()] = sb_inst_ptr;
+    }
+    new_blk_ptr->AddInstruction(std::move(cp_inst));
+  }
+}
+
+std::unique_ptr<BasicBlock> InlinePass::AddGuardBlock(
+    std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
+    std::unordered_map<uint32_t, uint32_t>* callee2caller,
+    std::unique_ptr<BasicBlock> new_blk_ptr, uint32_t entry_blk_label_id) {
+  const auto guard_block_id = context()->TakeNextId();
+  if (guard_block_id == 0) {
+    return nullptr;
+  }
+  AddBranch(guard_block_id, &new_blk_ptr);
+  new_blocks->push_back(std::move(new_blk_ptr));
+  // Start the next block.
+  new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(guard_block_id));
+  // Reset the mapping of the callee's entry block to point to
+  // the guard block.  Do this so we can fix up phis later on to
+  // satisfy dominance.
+  (*callee2caller)[entry_blk_label_id] = guard_block_id;
+  return new_blk_ptr;
+}
+
+InstructionList::iterator InlinePass::AddStoresForVariableInitializers(
+    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+    std::unique_ptr<BasicBlock>* new_blk_ptr,
+    UptrVectorIterator<BasicBlock> callee_first_block_itr) {
+  auto callee_var_itr = callee_first_block_itr->begin();
+  while (callee_var_itr->opcode() == SpvOp::SpvOpVariable) {
+    if (callee_var_itr->NumInOperands() == 2) {
+      assert(callee2caller.count(callee_var_itr->result_id()) &&
+             "Expected the variable to have already been mapped.");
+      uint32_t new_var_id = callee2caller.at(callee_var_itr->result_id());
+
+      // The initializer must be a constant or global value.  No mapped
+      // should be used.
+      uint32_t val_id = callee_var_itr->GetSingleWordInOperand(1);
+      AddStore(new_var_id, val_id, new_blk_ptr);
+    }
+    ++callee_var_itr;
+  }
+  return callee_var_itr;
+}
+
+bool InlinePass::InlineInstructionInBB(
+    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+    BasicBlock* new_blk_ptr, const Instruction* inst) {
+  // If we have return, it must be at the end of the callee. We will handle
+  // it at the end.
+  if (inst->opcode() == SpvOpReturnValue || inst->opcode() == SpvOpReturn)
+    return true;
+
+  // Copy callee instruction and remap all input Ids.
+  std::unique_ptr<Instruction> cp_inst(inst->Clone(context()));
+  cp_inst->ForEachInId([&callee2caller](uint32_t* iid) {
+    const auto mapItr = callee2caller.find(*iid);
+    if (mapItr != callee2caller.end()) {
+      *iid = mapItr->second;
+    }
+  });
+  // If result id is non-zero, remap it.
+  const uint32_t rid = cp_inst->result_id();
+  if (rid != 0) {
+    const auto mapItr = callee2caller.find(rid);
+    if (mapItr == callee2caller.end()) return false;
+    uint32_t nid = mapItr->second;
+    cp_inst->SetResultId(nid);
+    get_decoration_mgr()->CloneDecorations(rid, nid);
+  }
+  new_blk_ptr->AddInstruction(std::move(cp_inst));
+  return true;
+}
+
+std::unique_ptr<BasicBlock> InlinePass::InlineReturn(
+    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+    std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
+    std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn,
+    const Instruction* inst, uint32_t returnVarId) {
+  // Store return value to return variable.
+  if (inst->opcode() == SpvOpReturnValue) {
+    assert(returnVarId != 0);
+    uint32_t valId = inst->GetInOperand(kSpvReturnValueId).words[0];
+    const auto mapItr = callee2caller.find(valId);
+    if (mapItr != callee2caller.end()) {
+      valId = mapItr->second;
+    }
+    AddStore(returnVarId, valId, &new_blk_ptr);
+  }
+
+  uint32_t returnLabelId = 0;
+  for (auto callee_block_itr = calleeFn->begin();
+       callee_block_itr != calleeFn->end(); ++callee_block_itr) {
+    if (callee_block_itr->tail()->opcode() == SpvOpUnreachable ||
+        callee_block_itr->tail()->opcode() == SpvOpKill) {
+      returnLabelId = context()->TakeNextId();
+      break;
+    }
+  }
+  if (returnLabelId == 0) return new_blk_ptr;
+
+  if (inst->opcode() == SpvOpReturn || inst->opcode() == SpvOpReturnValue)
+    AddBranch(returnLabelId, &new_blk_ptr);
+  new_blocks->push_back(std::move(new_blk_ptr));
+  return MakeUnique<BasicBlock>(NewLabel(returnLabelId));
+}
+
+bool InlinePass::InlineEntryBlock(
+    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+    std::unique_ptr<BasicBlock>* new_blk_ptr,
+    UptrVectorIterator<BasicBlock> callee_first_block) {
+  auto callee_inst_itr = AddStoresForVariableInitializers(
+      callee2caller, new_blk_ptr, callee_first_block);
+
+  while (callee_inst_itr != callee_first_block->end()) {
+    if (!InlineInstructionInBB(callee2caller, new_blk_ptr->get(),
+                               &*callee_inst_itr)) {
+      return false;
+    }
+    ++callee_inst_itr;
+  }
+  return true;
+}
+
+std::unique_ptr<BasicBlock> InlinePass::InlineBasicBlocks(
+    std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
+    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+    std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn) {
+  auto callee_block_itr = calleeFn->begin();
+  ++callee_block_itr;
+
+  while (callee_block_itr != calleeFn->end()) {
+    new_blocks->push_back(std::move(new_blk_ptr));
+    const auto mapItr =
+        callee2caller.find(callee_block_itr->GetLabelInst()->result_id());
+    if (mapItr == callee2caller.end()) return nullptr;
+    new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(mapItr->second));
+
+    auto tail_inst_itr = callee_block_itr->end();
+    for (auto inst_itr = callee_block_itr->begin(); inst_itr != tail_inst_itr;
+         ++inst_itr) {
+      if (!InlineInstructionInBB(callee2caller, new_blk_ptr.get(),
+                                 &*inst_itr)) {
+        return nullptr;
+      }
+    }
+
+    ++callee_block_itr;
+  }
+  return new_blk_ptr;
+}
+
+bool InlinePass::MoveCallerInstsAfterFunctionCall(
+    std::unordered_map<uint32_t, Instruction*>* preCallSB,
+    std::unordered_map<uint32_t, uint32_t>* postCallSB,
+    std::unique_ptr<BasicBlock>* new_blk_ptr,
+    BasicBlock::iterator call_inst_itr, bool multiBlocks) {
+  // Copy remaining instructions from caller block.
+  for (Instruction* inst = call_inst_itr->NextNode(); inst;
+       inst = call_inst_itr->NextNode()) {
+    inst->RemoveFromList();
+    std::unique_ptr<Instruction> cp_inst(inst);
+    // If multiple blocks generated, regenerate any same-block
+    // instruction that has not been seen in this last block.
+    if (multiBlocks) {
+      if (!CloneSameBlockOps(&cp_inst, postCallSB, preCallSB, new_blk_ptr)) {
+        return false;
+      }
+
+      // Remember same-block ops in this block.
+      if (IsSameBlockOp(&*cp_inst)) {
+        const uint32_t rid = cp_inst->result_id();
+        (*postCallSB)[rid] = rid;
+      }
+    }
+    new_blk_ptr->get()->AddInstruction(std::move(cp_inst));
+  }
+
+  return true;
+}
+
+void InlinePass::MoveLoopMergeInstToFirstBlock(
+    std::vector<std::unique_ptr<BasicBlock>>* new_blocks) {
+  // Move the OpLoopMerge from the last block back to the first, where
+  // it belongs.
+  auto& first = new_blocks->front();
+  auto& last = new_blocks->back();
+  assert(first != last);
+
+  // Insert a modified copy of the loop merge into the first block.
+  auto loop_merge_itr = last->tail();
+  --loop_merge_itr;
+  assert(loop_merge_itr->opcode() == SpvOpLoopMerge);
+  std::unique_ptr<Instruction> cp_inst(loop_merge_itr->Clone(context()));
+  first->tail().InsertBefore(std::move(cp_inst));
+
+  // Remove the loop merge from the last block.
+  loop_merge_itr->RemoveFromList();
+  delete &*loop_merge_itr;
+}
+
 bool InlinePass::GenInlineCode(
     std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
     std::vector<std::unique_ptr<Instruction>>* new_vars,
@@ -250,13 +465,19 @@
   // valid.  These operations can fail.
   context()->InvalidateAnalyses(IRContext::kAnalysisDefUse);
 
+  // If the caller is a loop header and the callee has multiple blocks, then the
+  // normal inlining logic will place the OpLoopMerge in the last of several
+  // blocks in the loop.  Instead, it should be placed at the end of the first
+  // block.  We'll wait to move the OpLoopMerge until the end of the regular
+  // inlining logic, and only if necessary.
+  bool caller_is_loop_header = call_block_itr->GetLoopMergeInst() != nullptr;
+
+  // Single-trip loop continue block
+  std::unique_ptr<BasicBlock> single_trip_loop_cont_blk;
+
   Function* calleeFn = id2function_[call_inst_itr->GetSingleWordOperand(
       kSpvFunctionCallFunctionId)];
 
-  // Check for multiple returns in the callee.
-  auto fi = early_return_funcs_.find(calleeFn->result_id());
-  const bool earlyReturn = fi != early_return_funcs_.end();
-
   // Map parameters to actual arguments.
   MapParams(calleeFn, call_inst_itr, &callee2caller);
 
@@ -266,6 +487,31 @@
     return false;
   }
 
+  // First block needs to use label of original block
+  // but map callee label in case of phi reference.
+  uint32_t entry_blk_label_id = calleeFn->begin()->GetLabelInst()->result_id();
+  callee2caller[entry_blk_label_id] = call_block_itr->id();
+  std::unique_ptr<BasicBlock> new_blk_ptr =
+      MakeUnique<BasicBlock>(NewLabel(call_block_itr->id()));
+
+  // Move instructions of original caller block up to call instruction.
+  MoveInstsBeforeEntryBlock(&preCallSB, new_blk_ptr.get(), call_inst_itr,
+                            call_block_itr);
+
+  if (caller_is_loop_header &&
+      (*(calleeFn->begin())).GetMergeInst() != nullptr) {
+    // We can't place both the caller's merge instruction and
+    // another merge instruction in the same block.  So split the
+    // calling block. Insert an unconditional branch to a new guard
+    // block.  Later, once we know the ID of the last block,  we
+    // will move the caller's OpLoopMerge from the last generated
+    // block into the first block. We also wait to avoid
+    // invalidating various iterators.
+    new_blk_ptr = AddGuardBlock(new_blocks, &callee2caller,
+                                std::move(new_blk_ptr), entry_blk_label_id);
+    if (new_blk_ptr == nullptr) return false;
+  }
+
   // Create return var if needed.
   const uint32_t calleeTypeId = calleeFn->type_id();
   uint32_t returnVarId = 0;
@@ -277,341 +523,50 @@
     }
   }
 
-  // Create set of callee result ids. Used to detect forward references
-  std::unordered_set<uint32_t> callee_result_ids;
-  calleeFn->ForEachInst([&callee_result_ids](const Instruction* cpi) {
+  calleeFn->WhileEachInst([&callee2caller, this](const Instruction* cpi) {
+    // Create set of callee result ids. Used to detect forward references
     const uint32_t rid = cpi->result_id();
-    if (rid != 0) callee_result_ids.insert(rid);
+    if (rid != 0 && callee2caller.find(rid) == callee2caller.end()) {
+      const uint32_t nid = context()->TakeNextId();
+      if (nid == 0) return false;
+      callee2caller[rid] = nid;
+    }
+    return true;
   });
 
-  // If the caller is a loop header and the callee has multiple blocks, then the
-  // normal inlining logic will place the OpLoopMerge in the last of several
-  // blocks in the loop.  Instead, it should be placed at the end of the first
-  // block.  We'll wait to move the OpLoopMerge until the end of the regular
-  // inlining logic, and only if necessary.
-  bool caller_is_loop_header = false;
-  if (call_block_itr->GetLoopMergeInst()) {
-    caller_is_loop_header = true;
-  }
-
-  bool callee_begins_with_structured_header =
-      (*(calleeFn->begin())).GetMergeInst() != nullptr;
-
-  // Clone and map callee code. Copy caller block code to beginning of
-  // first block and end of last block.
-  bool prevInstWasReturn = false;
-  uint32_t singleTripLoopHeaderId = 0;
-  uint32_t singleTripLoopContinueId = 0;
-  uint32_t returnLabelId = 0;
-  bool multiBlocks = false;
-  // new_blk_ptr is a new basic block in the caller.  New instructions are
-  // written to it.  It is created when we encounter the OpLabel
-  // of the first callee block.  It is appended to new_blocks only when
-  // it is complete.
-  std::unique_ptr<BasicBlock> new_blk_ptr;
-  bool successful = calleeFn->WhileEachInst(
-      [&new_blocks, &callee2caller, &call_block_itr, &call_inst_itr,
-       &new_blk_ptr, &prevInstWasReturn, &returnLabelId, &returnVarId,
-       caller_is_loop_header, callee_begins_with_structured_header,
-       &calleeTypeId, &multiBlocks, &postCallSB, &preCallSB, earlyReturn,
-       &singleTripLoopHeaderId, &singleTripLoopContinueId, &callee_result_ids,
-       this](const Instruction* cpi) {
-        switch (cpi->opcode()) {
-          case SpvOpFunction:
-          case SpvOpFunctionParameter:
-            // Already processed
-            break;
-          case SpvOpVariable:
-            if (cpi->NumInOperands() == 2) {
-              assert(callee2caller.count(cpi->result_id()) &&
-                     "Expected the variable to have already been mapped.");
-              uint32_t new_var_id = callee2caller.at(cpi->result_id());
-
-              // The initializer must be a constant or global value.  No mapped
-              // should be used.
-              uint32_t val_id = cpi->GetSingleWordInOperand(1);
-              AddStore(new_var_id, val_id, &new_blk_ptr);
-            }
-            break;
-          case SpvOpUnreachable:
-          case SpvOpKill: {
-            // Generate a return label so that we split the block with the
-            // function call. Copy the terminator into the new block.
-            if (returnLabelId == 0) {
-              returnLabelId = context()->TakeNextId();
-              if (returnLabelId == 0) {
-                return false;
-              }
-            }
-            std::unique_ptr<Instruction> terminator(
-                new Instruction(context(), cpi->opcode(), 0, 0, {}));
-            new_blk_ptr->AddInstruction(std::move(terminator));
-            break;
-          }
-          case SpvOpLabel: {
-            // If previous instruction was early return, insert branch
-            // instruction to return block.
-            if (prevInstWasReturn) {
-              if (returnLabelId == 0) {
-                returnLabelId = context()->TakeNextId();
-                if (returnLabelId == 0) {
-                  return false;
-                }
-              }
-              AddBranch(returnLabelId, &new_blk_ptr);
-              prevInstWasReturn = false;
-            }
-            // Finish current block (if it exists) and get label for next block.
-            uint32_t labelId;
-            bool firstBlock = false;
-            if (new_blk_ptr != nullptr) {
-              new_blocks->push_back(std::move(new_blk_ptr));
-              // If result id is already mapped, use it, otherwise get a new
-              // one.
-              const uint32_t rid = cpi->result_id();
-              const auto mapItr = callee2caller.find(rid);
-              labelId = (mapItr != callee2caller.end())
-                            ? mapItr->second
-                            : context()->TakeNextId();
-              if (labelId == 0) {
-                return false;
-              }
-            } else {
-              // First block needs to use label of original block
-              // but map callee label in case of phi reference.
-              labelId = call_block_itr->id();
-              callee2caller[cpi->result_id()] = labelId;
-              firstBlock = true;
-            }
-            // Create first/next block.
-            new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(labelId));
-            if (firstBlock) {
-              // Copy contents of original caller block up to call instruction.
-              for (auto cii = call_block_itr->begin(); cii != call_inst_itr;
-                   cii = call_block_itr->begin()) {
-                Instruction* inst = &*cii;
-                inst->RemoveFromList();
-                std::unique_ptr<Instruction> cp_inst(inst);
-                // Remember same-block ops for possible regeneration.
-                if (IsSameBlockOp(&*cp_inst)) {
-                  auto* sb_inst_ptr = cp_inst.get();
-                  preCallSB[cp_inst->result_id()] = sb_inst_ptr;
-                }
-                new_blk_ptr->AddInstruction(std::move(cp_inst));
-              }
-              if (caller_is_loop_header &&
-                  callee_begins_with_structured_header) {
-                // We can't place both the caller's merge instruction and
-                // another merge instruction in the same block.  So split the
-                // calling block. Insert an unconditional branch to a new guard
-                // block.  Later, once we know the ID of the last block,  we
-                // will move the caller's OpLoopMerge from the last generated
-                // block into the first block. We also wait to avoid
-                // invalidating various iterators.
-                const auto guard_block_id = context()->TakeNextId();
-                if (guard_block_id == 0) {
-                  return false;
-                }
-                AddBranch(guard_block_id, &new_blk_ptr);
-                new_blocks->push_back(std::move(new_blk_ptr));
-                // Start the next block.
-                new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(guard_block_id));
-                // Reset the mapping of the callee's entry block to point to
-                // the guard block.  Do this so we can fix up phis later on to
-                // satisfy dominance.
-                callee2caller[cpi->result_id()] = guard_block_id;
-              }
-              // If callee has early return, insert a header block for
-              // single-trip loop that will encompass callee code.  Start
-              // postheader block.
-              //
-              // Note: Consider the following combination:
-              //  - the caller is a single block loop
-              //  - the callee does not begin with a structure header
-              //  - the callee has multiple returns.
-              // We still need to split the caller block and insert a guard
-              // block. But we only need to do it once. We haven't done it yet,
-              // but the single-trip loop header will serve the same purpose.
-              if (earlyReturn) {
-                singleTripLoopHeaderId = context()->TakeNextId();
-                if (singleTripLoopHeaderId == 0) {
-                  return false;
-                }
-                AddBranch(singleTripLoopHeaderId, &new_blk_ptr);
-                new_blocks->push_back(std::move(new_blk_ptr));
-                new_blk_ptr =
-                    MakeUnique<BasicBlock>(NewLabel(singleTripLoopHeaderId));
-                returnLabelId = context()->TakeNextId();
-                singleTripLoopContinueId = context()->TakeNextId();
-                if (returnLabelId == 0 || singleTripLoopContinueId == 0) {
-                  return false;
-                }
-                AddLoopMerge(returnLabelId, singleTripLoopContinueId,
-                             &new_blk_ptr);
-                uint32_t postHeaderId = context()->TakeNextId();
-                if (postHeaderId == 0) {
-                  return false;
-                }
-                AddBranch(postHeaderId, &new_blk_ptr);
-                new_blocks->push_back(std::move(new_blk_ptr));
-                new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(postHeaderId));
-                multiBlocks = true;
-                // Reset the mapping of the callee's entry block to point to
-                // the post-header block.  Do this so we can fix up phis later
-                // on to satisfy dominance.
-                callee2caller[cpi->result_id()] = postHeaderId;
-              }
-            } else {
-              multiBlocks = true;
-            }
-          } break;
-          case SpvOpReturnValue: {
-            // Store return value to return variable.
-            assert(returnVarId != 0);
-            uint32_t valId = cpi->GetInOperand(kSpvReturnValueId).words[0];
-            const auto mapItr = callee2caller.find(valId);
-            if (mapItr != callee2caller.end()) {
-              valId = mapItr->second;
-            }
-            AddStore(returnVarId, valId, &new_blk_ptr);
-
-            // Remember we saw a return; if followed by a label, will need to
-            // insert branch.
-            prevInstWasReturn = true;
-          } break;
-          case SpvOpReturn: {
-            // Remember we saw a return; if followed by a label, will need to
-            // insert branch.
-            prevInstWasReturn = true;
-          } break;
-          case SpvOpFunctionEnd: {
-            // If there was an early return, we generated a return label id
-            // for it.  Now we have to generate the return block with that Id.
-            if (returnLabelId != 0) {
-              // If previous instruction was return, insert branch instruction
-              // to return block.
-              if (prevInstWasReturn) AddBranch(returnLabelId, &new_blk_ptr);
-              if (earlyReturn) {
-                // If we generated a loop header for the single-trip loop
-                // to accommodate early returns, insert the continue
-                // target block now, with a false branch back to the loop
-                // header.
-                new_blocks->push_back(std::move(new_blk_ptr));
-                new_blk_ptr =
-                    MakeUnique<BasicBlock>(NewLabel(singleTripLoopContinueId));
-                uint32_t false_id = GetFalseId();
-                if (false_id == 0) {
-                  return false;
-                }
-                AddBranchCond(false_id, singleTripLoopHeaderId, returnLabelId,
-                              &new_blk_ptr);
-              }
-              // Generate the return block.
-              new_blocks->push_back(std::move(new_blk_ptr));
-              new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(returnLabelId));
-              multiBlocks = true;
-            }
-            // Load return value into result id of call, if it exists.
-            if (returnVarId != 0) {
-              const uint32_t resId = call_inst_itr->result_id();
-              assert(resId != 0);
-              AddLoad(calleeTypeId, resId, returnVarId, &new_blk_ptr);
-            }
-            // Copy remaining instructions from caller block.
-            for (Instruction* inst = call_inst_itr->NextNode(); inst;
-                 inst = call_inst_itr->NextNode()) {
-              inst->RemoveFromList();
-              std::unique_ptr<Instruction> cp_inst(inst);
-              // If multiple blocks generated, regenerate any same-block
-              // instruction that has not been seen in this last block.
-              if (multiBlocks) {
-                if (!CloneSameBlockOps(&cp_inst, &postCallSB, &preCallSB,
-                                       &new_blk_ptr)) {
-                  return false;
-                }
-
-                // Remember same-block ops in this block.
-                if (IsSameBlockOp(&*cp_inst)) {
-                  const uint32_t rid = cp_inst->result_id();
-                  postCallSB[rid] = rid;
-                }
-              }
-              new_blk_ptr->AddInstruction(std::move(cp_inst));
-            }
-            // Finalize inline code.
-            new_blocks->push_back(std::move(new_blk_ptr));
-          } break;
-          default: {
-            // Copy callee instruction and remap all input Ids.
-            std::unique_ptr<Instruction> cp_inst(cpi->Clone(context()));
-            bool succeeded = cp_inst->WhileEachInId(
-                [&callee2caller, &callee_result_ids, this](uint32_t* iid) {
-                  const auto mapItr = callee2caller.find(*iid);
-                  if (mapItr != callee2caller.end()) {
-                    *iid = mapItr->second;
-                  } else if (callee_result_ids.find(*iid) !=
-                             callee_result_ids.end()) {
-                    // Forward reference. Allocate a new id, map it,
-                    // use it and check for it when remapping result ids
-                    const uint32_t nid = context()->TakeNextId();
-                    if (nid == 0) {
-                      return false;
-                    }
-                    callee2caller[*iid] = nid;
-                    *iid = nid;
-                  }
-                  return true;
-                });
-            if (!succeeded) {
-              return false;
-            }
-            // If result id is non-zero, remap it. If already mapped, use mapped
-            // value, else use next id.
-            const uint32_t rid = cp_inst->result_id();
-            if (rid != 0) {
-              const auto mapItr = callee2caller.find(rid);
-              uint32_t nid;
-              if (mapItr != callee2caller.end()) {
-                nid = mapItr->second;
-              } else {
-                nid = context()->TakeNextId();
-                if (nid == 0) {
-                  return false;
-                }
-                callee2caller[rid] = nid;
-              }
-              cp_inst->SetResultId(nid);
-              get_decoration_mgr()->CloneDecorations(rid, nid);
-            }
-            new_blk_ptr->AddInstruction(std::move(cp_inst));
-          } break;
-        }
-        return true;
-      });
-
-  if (!successful) {
+  // Inline the entry block of the callee function.
+  if (!InlineEntryBlock(callee2caller, &new_blk_ptr, calleeFn->begin())) {
     return false;
   }
 
-  if (caller_is_loop_header && (new_blocks->size() > 1)) {
-    // Move the OpLoopMerge from the last block back to the first, where
-    // it belongs.
-    auto& first = new_blocks->front();
-    auto& last = new_blocks->back();
-    assert(first != last);
+  // Inline blocks of the callee function other than the entry block.
+  new_blk_ptr = InlineBasicBlocks(new_blocks, callee2caller,
+                                  std::move(new_blk_ptr), calleeFn);
+  if (new_blk_ptr == nullptr) return false;
 
-    // Insert a modified copy of the loop merge into the first block.
-    auto loop_merge_itr = last->tail();
-    --loop_merge_itr;
-    assert(loop_merge_itr->opcode() == SpvOpLoopMerge);
-    std::unique_ptr<Instruction> cp_inst(loop_merge_itr->Clone(context()));
-    first->tail().InsertBefore(std::move(cp_inst));
+  new_blk_ptr =
+      InlineReturn(callee2caller, new_blocks, std::move(new_blk_ptr), calleeFn,
+                   &*(calleeFn->tail()->tail()), returnVarId);
 
-    // Remove the loop merge from the last block.
-    loop_merge_itr->RemoveFromList();
-    delete &*loop_merge_itr;
+  // Load return value into result id of call, if it exists.
+  if (returnVarId != 0) {
+    const uint32_t resId = call_inst_itr->result_id();
+    assert(resId != 0);
+    AddLoad(calleeTypeId, resId, returnVarId, &new_blk_ptr);
   }
 
+  // Move instructions of original caller block after call instruction.
+  if (!MoveCallerInstsAfterFunctionCall(&preCallSB, &postCallSB, &new_blk_ptr,
+                                        call_inst_itr,
+                                        calleeFn->begin() != calleeFn->end()))
+    return false;
+
+  // Finalize inline code.
+  new_blocks->push_back(std::move(new_blk_ptr));
+
+  if (caller_is_loop_header && (new_blocks->size() > 1))
+    MoveLoopMergeInstToFirstBlock(new_blocks);
+
   // Update block map given replacement blocks.
   for (auto& blk : *new_blocks) {
     id2block_[blk->id()] = &*blk;
@@ -624,7 +579,21 @@
   const uint32_t calleeFnId =
       inst->GetSingleWordOperand(kSpvFunctionCallFunctionId);
   const auto ci = inlinable_.find(calleeFnId);
-  return ci != inlinable_.cend();
+  if (ci == inlinable_.cend()) return false;
+
+  if (early_return_funcs_.find(calleeFnId) != early_return_funcs_.end()) {
+    // We rely on the merge-return pass to handle the early return case
+    // in advance.
+    std::string message =
+        "The function '" + id2function_[calleeFnId]->DefInst().PrettyPrint() +
+        "' could not be inlined because the return instruction "
+        "is not at the end of the function. This could be fixed by "
+        "running merge-return before inlining.";
+    consumer()(SPV_MSG_WARNING, "", {0, 0, 0}, message.c_str());
+    return false;
+  }
+
+  return true;
 }
 
 void InlinePass::UpdateSucceedingPhis(
@@ -645,26 +614,6 @@
       });
 }
 
-bool InlinePass::HasNoReturnInStructuredConstruct(Function* func) {
-  // If control not structured, do not do loop/return analysis
-  // TODO: Analyze returns in non-structured control flow
-  if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
-    return false;
-  const auto structured_analysis = context()->GetStructuredCFGAnalysis();
-  // Search for returns in structured construct.
-  bool return_in_construct = false;
-  for (auto& blk : *func) {
-    auto terminal_ii = blk.cend();
-    --terminal_ii;
-    if (spvOpcodeIsReturn(terminal_ii->opcode()) &&
-        structured_analysis->ContainingConstruct(blk.id()) != 0) {
-      return_in_construct = true;
-      break;
-    }
-  }
-  return !return_in_construct;
-}
-
 bool InlinePass::HasNoReturnInLoop(Function* func) {
   // If control not structured, do not do loop/return analysis
   // TODO: Analyze returns in non-structured control flow
@@ -686,10 +635,18 @@
 }
 
 void InlinePass::AnalyzeReturns(Function* func) {
+  // Analyze functions without a return in loop.
   if (HasNoReturnInLoop(func)) {
     no_return_in_loop_.insert(func->result_id());
-    if (!HasNoReturnInStructuredConstruct(func))
+  }
+  // Analyze functions with a return before its tail basic block.
+  for (auto& blk : *func) {
+    auto terminal_ii = blk.cend();
+    --terminal_ii;
+    if (spvOpcodeIsReturn(terminal_ii->opcode()) && &blk != func->tail()) {
       early_return_funcs_.insert(func->result_id());
+      break;
+    }
   }
 }
 
diff --git a/source/opt/inline_pass.h b/source/opt/inline_pass.h
index bc5f781..19fb26e 100644
--- a/source/opt/inline_pass.h
+++ b/source/opt/inline_pass.h
@@ -124,10 +124,6 @@
   // Return true if |inst| is a function call that can be inlined.
   bool IsInlinableFunctionCall(const Instruction* inst);
 
-  // Return true if |func| does not have a return that is
-  // nested in a structured if, switch or loop.
-  bool HasNoReturnInStructuredConstruct(Function* func);
-
   // Return true if |func| has no return in a loop. The current analysis
   // requires structured control flow, so return false if control flow not
   // structured ie. module is not a shader.
@@ -171,6 +167,64 @@
   // Set of functions that are originally called directly or indirectly from a
   // continue construct.
   std::unordered_set<uint32_t> funcs_called_from_continue_;
+
+ private:
+  // Moves instructions of the caller function up to the call instruction
+  // to |new_blk_ptr|.
+  void MoveInstsBeforeEntryBlock(
+      std::unordered_map<uint32_t, Instruction*>* preCallSB,
+      BasicBlock* new_blk_ptr, BasicBlock::iterator call_inst_itr,
+      UptrVectorIterator<BasicBlock> call_block_itr);
+
+  // Returns a new guard block after adding a branch to the end of
+  // |new_blocks|.
+  std::unique_ptr<BasicBlock> AddGuardBlock(
+      std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
+      std::unordered_map<uint32_t, uint32_t>* callee2caller,
+      std::unique_ptr<BasicBlock> new_blk_ptr, uint32_t entry_blk_label_id);
+
+  // Add store instructions for initializers of variables.
+  InstructionList::iterator AddStoresForVariableInitializers(
+      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+      std::unique_ptr<BasicBlock>* new_blk_ptr,
+      UptrVectorIterator<BasicBlock> callee_block_itr);
+
+  // Inlines a single instruction of the callee function.
+  bool InlineInstructionInBB(
+      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+      BasicBlock* new_blk_ptr, const Instruction* inst);
+
+  // Inlines the return instruction of the callee function.
+  std::unique_ptr<BasicBlock> InlineReturn(
+      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+      std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
+      std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn,
+      const Instruction* inst, uint32_t returnVarId);
+
+  // Inlines the entry block of the callee function.
+  bool InlineEntryBlock(
+      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+      std::unique_ptr<BasicBlock>* new_blk_ptr,
+      UptrVectorIterator<BasicBlock> callee_first_block);
+
+  // Inlines basic blocks of the callee function other than the entry basic
+  // block.
+  std::unique_ptr<BasicBlock> InlineBasicBlocks(
+      std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
+      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
+      std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn);
+
+  // Moves instructions of the caller function after the call instruction
+  // to |new_blk_ptr|.
+  bool MoveCallerInstsAfterFunctionCall(
+      std::unordered_map<uint32_t, Instruction*>* preCallSB,
+      std::unordered_map<uint32_t, uint32_t>* postCallSB,
+      std::unique_ptr<BasicBlock>* new_blk_ptr,
+      BasicBlock::iterator call_inst_itr, bool multiBlocks);
+
+  // Move the OpLoopMerge from the last block back to the first.
+  void MoveLoopMergeInstToFirstBlock(
+      std::vector<std::unique_ptr<BasicBlock>>* new_blocks);
 };
 
 }  // namespace opt
diff --git a/test/opt/inline_opaque_test.cpp b/test/opt/inline_opaque_test.cpp
index d10913a..b8d2dfa 100644
--- a/test/opt/inline_opaque_test.cpp
+++ b/test/opt/inline_opaque_test.cpp
@@ -102,12 +102,12 @@
 OpStore %32 %31
 %33 = OpLoad %S_t %s0
 OpStore %param %33
-%41 = OpAccessChain %_ptr_Function_18 %param %int_2
-%42 = OpLoad %18 %41
-%43 = OpAccessChain %_ptr_Function_v2float %param %int_0
-%44 = OpLoad %v2float %43
-%45 = OpImageSampleImplicitLod %v4float %42 %44
-OpStore %outColor %45
+%42 = OpAccessChain %_ptr_Function_18 %param %int_2
+%43 = OpLoad %18 %42
+%44 = OpAccessChain %_ptr_Function_v2float %param %int_0
+%45 = OpLoad %v2float %44
+%46 = OpImageSampleImplicitLod %v4float %43 %45
+OpStore %outColor %46
 OpReturn
 OpFunctionEnd
 )";
@@ -191,10 +191,10 @@
 %34 = OpVariable %_ptr_Function_20 Function
 %35 = OpVariable %_ptr_Function_20 Function
 %25 = OpVariable %_ptr_Function_20 Function
-%36 = OpLoad %20 %sampler16
-OpStore %34 %36
-%37 = OpLoad %20 %34
-OpStore %35 %37
+%37 = OpLoad %20 %sampler16
+OpStore %34 %37
+%38 = OpLoad %20 %34
+OpStore %35 %38
 %26 = OpLoad %20 %35
 OpStore %25 %26
 %27 = OpLoad %20 %25
@@ -301,12 +301,12 @@
 OpStore %33 %32
 %34 = OpLoad %S_t %s0
 OpStore %param %34
-%44 = OpAccessChain %_ptr_Function_19 %param %int_2
-%45 = OpLoad %19 %44
-%46 = OpAccessChain %_ptr_Function_v2float %param %int_0
-%47 = OpLoad %v2float %46
-%48 = OpImageSampleImplicitLod %v4float %45 %47
-OpStore %outColor %48
+%45 = OpAccessChain %_ptr_Function_19 %param %int_2
+%46 = OpLoad %19 %45
+%47 = OpAccessChain %_ptr_Function_v2float %param %int_0
+%48 = OpLoad %v2float %47
+%49 = OpImageSampleImplicitLod %v4float %46 %48
+OpStore %outColor %49
 OpReturn
 OpFunctionEnd
 )";
diff --git a/test/opt/inline_test.cpp b/test/opt/inline_test.cpp
index f44c04a..76573a6 100644
--- a/test/opt/inline_test.cpp
+++ b/test/opt/inline_test.cpp
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
@@ -115,12 +116,12 @@
       "%param = OpVariable %_ptr_Function_v4float Function",
          "%22 = OpLoad %v4float %BaseColor",
                "OpStore %param %22",
-         "%33 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%34 = OpLoad %float %33",
-         "%35 = OpAccessChain %_ptr_Function_float %param %uint_1",
-         "%36 = OpLoad %float %35",
-         "%37 = OpFAdd %float %34 %36",
-               "OpStore %32 %37",
+         "%34 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%35 = OpLoad %float %34",
+         "%36 = OpAccessChain %_ptr_Function_float %param %uint_1",
+         "%37 = OpLoad %float %36",
+         "%38 = OpFAdd %float %35 %37",
+               "OpStore %32 %38",
          "%23 = OpLoad %float %32",
          "%24 = OpCompositeConstruct %v4float %23 %23 %23 %23",
                "OpStore %color %24",
@@ -248,7 +249,7 @@
       // clang-format off
        "%main = OpFunction %void None %15",
          "%28 = OpLabel",
-         "%57 = OpVariable %_ptr_Function_float Function",
+         "%58 = OpVariable %_ptr_Function_float Function",
          "%46 = OpVariable %_ptr_Function_float Function",
          "%47 = OpVariable %_ptr_Function_float Function",
          "%48 = OpVariable %_ptr_Function_float Function",
@@ -256,21 +257,21 @@
     "%param_1 = OpVariable %_ptr_Function_v4float Function",
          "%29 = OpLoad %v4float %BaseColor",
                "OpStore %param_1 %29",
-         "%49 = OpAccessChain %_ptr_Function_float %param_1 %uint_0",
-         "%50 = OpLoad %float %49",
-         "%51 = OpAccessChain %_ptr_Function_float %param_1 %uint_1",
-         "%52 = OpLoad %float %51",
-         "%53 = OpFAdd %float %50 %52",
-               "OpStore %46 %53",
-         "%54 = OpAccessChain %_ptr_Function_float %param_1 %uint_2",
-         "%55 = OpLoad %float %54",
-               "OpStore %47 %55",
-         "%58 = OpLoad %float %46",
-         "%59 = OpLoad %float %47",
-         "%60 = OpFMul %float %58 %59",
-               "OpStore %57 %60",
-         "%56 = OpLoad %float %57",
-               "OpStore %48 %56",
+         "%50 = OpAccessChain %_ptr_Function_float %param_1 %uint_0",
+         "%51 = OpLoad %float %50",
+         "%52 = OpAccessChain %_ptr_Function_float %param_1 %uint_1",
+         "%53 = OpLoad %float %52",
+         "%54 = OpFAdd %float %51 %53",
+               "OpStore %46 %54",
+         "%55 = OpAccessChain %_ptr_Function_float %param_1 %uint_2",
+         "%56 = OpLoad %float %55",
+               "OpStore %47 %56",
+         "%60 = OpLoad %float %46",
+         "%61 = OpLoad %float %47",
+         "%62 = OpFMul %float %60 %61",
+               "OpStore %58 %62",
+         "%57 = OpLoad %float %58",
+               "OpStore %48 %57",
          "%30 = OpLoad %float %48",
          "%31 = OpCompositeConstruct %v4float %30 %30 %30 %30",
                "OpStore %color %31",
@@ -390,13 +391,13 @@
                "OpStore %b %24",
          "%25 = OpLoad %v4float %b",
                "OpStore %param %25",
-         "%39 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%40 = OpLoad %float %39",
-         "%41 = OpAccessChain %_ptr_Function_float %param %uint_1",
-         "%42 = OpLoad %float %41",
-         "%43 = OpFAdd %float %40 %42",
-         "%44 = OpAccessChain %_ptr_Function_float %param %uint_2",
-               "OpStore %44 %43",
+         "%40 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%41 = OpLoad %float %40",
+         "%42 = OpAccessChain %_ptr_Function_float %param %uint_1",
+         "%43 = OpLoad %float %42",
+         "%44 = OpFAdd %float %41 %43",
+         "%45 = OpAccessChain %_ptr_Function_float %param %uint_2",
+               "OpStore %45 %44",
          "%27 = OpLoad %v4float %param",
                "OpStore %b %27",
          "%28 = OpAccessChain %_ptr_Function_float %b %uint_2",
@@ -521,21 +522,21 @@
       "%param = OpVariable %_ptr_Function_v4float Function",
          "%24 = OpLoad %v4float %BaseColor",
                "OpStore %param %24",
-         "%40 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%41 = OpLoad %float %40",
-               "OpStore %38 %41",
-         "%42 = OpLoad %float %38",
-         "%43 = OpFOrdLessThan %bool %42 %float_0",
-               "OpSelectionMerge %44 None",
-               "OpBranchConditional %43 %45 %44",
+         "%41 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%42 = OpLoad %float %41",
+               "OpStore %38 %42",
+         "%43 = OpLoad %float %38",
+         "%44 = OpFOrdLessThan %bool %43 %float_0",
+               "OpSelectionMerge %48 None",
+               "OpBranchConditional %44 %45 %48",
          "%45 = OpLabel",
          "%46 = OpLoad %float %38",
          "%47 = OpFNegate %float %46",
                "OpStore %38 %47",
-               "OpBranch %44",
-         "%44 = OpLabel",
-         "%48 = OpLoad %float %38",
-               "OpStore %39 %48",
+               "OpBranch %48",
+         "%48 = OpLabel",
+         "%49 = OpLoad %float %38",
+               "OpStore %39 %49",
          "%25 = OpLoad %float %39",
          "%26 = OpCompositeConstruct %v4float %25 %25 %25 %25",
                "OpStore %color %26",
@@ -675,8 +676,8 @@
       // clang-format off
        "%main = OpFunction %void None %12",
          "%27 = OpLabel",
-         "%62 = OpVariable %_ptr_Function_float Function",
          "%63 = OpVariable %_ptr_Function_float Function",
+         "%64 = OpVariable %_ptr_Function_float Function",
          "%52 = OpVariable %_ptr_Function_float Function",
          "%53 = OpVariable %_ptr_Function_float Function",
       "%color = OpVariable %_ptr_Function_v4float Function",
@@ -687,20 +688,20 @@
          "%29 = OpAccessChain %_ptr_Function_float %color %uint_0",
          "%30 = OpLoad %float %29",
                "OpStore %param %30",
-         "%54 = OpLoad %float %param",
-               "OpStore %52 %54",
-         "%55 = OpLoad %float %52",
-         "%56 = OpFOrdLessThan %bool %55 %float_0",
-               "OpSelectionMerge %57 None",
-               "OpBranchConditional %56 %58 %57",
+         "%55 = OpLoad %float %param",
+               "OpStore %52 %55",
+         "%56 = OpLoad %float %52",
+         "%57 = OpFOrdLessThan %bool %56 %float_0",
+               "OpSelectionMerge %61 None",
+               "OpBranchConditional %57 %58 %61",
          "%58 = OpLabel",
          "%59 = OpLoad %float %52",
          "%60 = OpFNegate %float %59",
                "OpStore %52 %60",
-               "OpBranch %57",
-         "%57 = OpLabel",
-         "%61 = OpLoad %float %52",
-               "OpStore %53 %61",
+               "OpBranch %61",
+         "%61 = OpLabel",
+         "%62 = OpLoad %float %52",
+               "OpStore %53 %62",
          "%31 = OpLoad %float %53",
          "%32 = OpFOrdGreaterThan %bool %31 %float_2",
                "OpSelectionMerge %33 None",
@@ -709,25 +710,25 @@
          "%35 = OpAccessChain %_ptr_Function_float %color %uint_1",
          "%36 = OpLoad %float %35",
                "OpStore %param_0 %36",
-         "%64 = OpLoad %float %param_0",
-               "OpStore %62 %64",
-         "%65 = OpLoad %float %62",
-         "%66 = OpFOrdLessThan %bool %65 %float_0",
-               "OpSelectionMerge %67 None",
-               "OpBranchConditional %66 %68 %67",
-         "%68 = OpLabel",
-         "%69 = OpLoad %float %62",
-         "%70 = OpFNegate %float %69",
-               "OpStore %62 %70",
-               "OpBranch %67",
-         "%67 = OpLabel",
-         "%71 = OpLoad %float %62",
+         "%66 = OpLoad %float %param_0",
+               "OpStore %63 %66",
+         "%67 = OpLoad %float %63",
+         "%68 = OpFOrdLessThan %bool %67 %float_0",
+               "OpSelectionMerge %72 None",
+               "OpBranchConditional %68 %69 %72",
+         "%69 = OpLabel",
+         "%70 = OpLoad %float %63",
+         "%71 = OpFNegate %float %70",
                "OpStore %63 %71",
-         "%37 = OpLoad %float %63",
+               "OpBranch %72",
+         "%72 = OpLabel",
+         "%73 = OpLoad %float %63",
+               "OpStore %64 %73",
+         "%37 = OpLoad %float %64",
          "%38 = OpFOrdGreaterThan %bool %37 %float_2",
                "OpBranch %33",
          "%33 = OpLabel",
-         "%39 = OpPhi %bool %32 %57 %38 %67",
+         "%39 = OpPhi %bool %32 %61 %38 %72",
                "OpSelectionMerge %40 None",
                "OpBranchConditional %39 %41 %40",
          "%41 = OpLabel",
@@ -902,28 +903,28 @@
                "OpStore %color1 %42",
          "%43 = OpLoad %v4float %BaseColor",
                "OpStore %param %43",
-         "%68 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%69 = OpLoad %float %68",
-               "OpStore %66 %69",
-         "%70 = OpLoad %float %66",
-         "%71 = OpFOrdLessThan %bool %70 %float_0",
-               "OpSelectionMerge %72 None",
-               "OpBranchConditional %71 %73 %72",
+         "%69 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%70 = OpLoad %float %69",
+               "OpStore %66 %70",
+         "%71 = OpLoad %float %66",
+         "%72 = OpFOrdLessThan %bool %71 %float_0",
+               "OpSelectionMerge %76 None",
+               "OpBranchConditional %72 %73 %76",
          "%73 = OpLabel",
          "%74 = OpLoad %float %66",
          "%75 = OpFNegate %float %74",
                "OpStore %66 %75",
-               "OpBranch %72",
-         "%72 = OpLabel",
-         "%76 = OpLoad %float %66",
-               "OpStore %67 %76",
+               "OpBranch %76",
+         "%76 = OpLabel",
+         "%77 = OpLoad %float %66",
+               "OpStore %67 %77",
          "%44 = OpLoad %float %67",
          "%45 = OpCompositeConstruct %v4float %44 %44 %44 %44",
                "OpStore %color2 %45",
          "%46 = OpLoad %25 %t2D",
          "%47 = OpLoad %27 %samp",
-         "%77 = OpSampledImage %29 %39 %40",
-         "%48 = OpImageSampleImplicitLod %v4float %77 %35",
+         "%78 = OpSampledImage %29 %39 %40",
+         "%48 = OpImageSampleImplicitLod %v4float %78 %35",
                "OpStore %color3 %48",
          "%49 = OpLoad %v4float %color1",
          "%50 = OpLoad %v4float %color2",
@@ -1108,27 +1109,27 @@
                "OpStore %color1 %43",
          "%46 = OpLoad %v4float %BaseColor",
                "OpStore %param %46",
-         "%70 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%71 = OpLoad %float %70",
-               "OpStore %68 %71",
-         "%72 = OpLoad %float %68",
-         "%73 = OpFOrdLessThan %bool %72 %float_0",
-               "OpSelectionMerge %74 None",
-               "OpBranchConditional %73 %75 %74",
+         "%71 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%72 = OpLoad %float %71",
+               "OpStore %68 %72",
+         "%73 = OpLoad %float %68",
+         "%74 = OpFOrdLessThan %bool %73 %float_0",
+               "OpSelectionMerge %78 None",
+               "OpBranchConditional %74 %75 %78",
          "%75 = OpLabel",
          "%76 = OpLoad %float %68",
          "%77 = OpFNegate %float %76",
                "OpStore %68 %77",
-               "OpBranch %74",
-         "%74 = OpLabel",
-         "%78 = OpLoad %float %68",
-               "OpStore %69 %78",
+               "OpBranch %78",
+         "%78 = OpLabel",
+         "%79 = OpLoad %float %68",
+               "OpStore %69 %79",
          "%47 = OpLoad %float %69",
          "%48 = OpCompositeConstruct %v4float %47 %47 %47 %47",
                "OpStore %color2 %48",
-         "%79 = OpSampledImage %30 %40 %41",
-         "%80 = OpImage %26 %79",
-         "%49 = OpSampledImage %30 %80 %45",
+         "%80 = OpSampledImage %30 %40 %41",
+         "%81 = OpImage %26 %80",
+         "%49 = OpSampledImage %30 %81 %45",
          "%50 = OpImageSampleImplicitLod %v4float %49 %36",
                "OpStore %color3 %50",
          "%51 = OpLoad %v4float %color1",
@@ -1314,28 +1315,28 @@
                "OpStore %color1 %43",
          "%47 = OpLoad %v4float %BaseColor",
                "OpStore %param %47",
-         "%70 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%71 = OpLoad %float %70",
-               "OpStore %68 %71",
-         "%72 = OpLoad %float %68",
-         "%73 = OpFOrdLessThan %bool %72 %float_0",
-               "OpSelectionMerge %74 None",
-               "OpBranchConditional %73 %75 %74",
+         "%71 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%72 = OpLoad %float %71",
+               "OpStore %68 %72",
+         "%73 = OpLoad %float %68",
+         "%74 = OpFOrdLessThan %bool %73 %float_0",
+               "OpSelectionMerge %78 None",
+               "OpBranchConditional %74 %75 %78",
          "%75 = OpLabel",
          "%76 = OpLoad %float %68",
          "%77 = OpFNegate %float %76",
                "OpStore %68 %77",
-               "OpBranch %74",
-         "%74 = OpLabel",
-         "%78 = OpLoad %float %68",
-               "OpStore %69 %78",
+               "OpBranch %78",
+         "%78 = OpLabel",
+         "%79 = OpLoad %float %68",
+               "OpStore %69 %79",
          "%48 = OpLoad %float %69",
          "%49 = OpCompositeConstruct %v4float %48 %48 %48 %48",
                "OpStore %color2 %49",
-         "%79 = OpSampledImage %30 %40 %41",
-         "%80 = OpImage %26 %79",
-         "%81 = OpSampledImage %30 %80 %45",
-         "%50 = OpImageSampleImplicitLod %v4float %81 %36",
+         "%80 = OpSampledImage %30 %40 %41",
+         "%81 = OpImage %26 %80",
+         "%82 = OpSampledImage %30 %81 %45",
+         "%50 = OpImageSampleImplicitLod %v4float %82 %36",
                "OpStore %color3 %50",
          "%51 = OpLoad %v4float %color1",
          "%52 = OpLoad %v4float %color2",
@@ -1355,292 +1356,6 @@
       /* skip_nop = */ false, /* do_validate = */ true);
 }
 
-TEST_F(InlineTest, EarlyReturnFunctionInlined) {
-  // #version 140
-  //
-  // in vec4 BaseColor;
-  //
-  // float foo(vec4 bar)
-  // {
-  //     if (bar.x < 0.0)
-  //         return 0.0;
-  //     return bar.x;
-  // }
-  //
-  // void main()
-  // {
-  //     vec4 color = vec4(foo(BaseColor));
-  //     gl_FragColor = color;
-  // }
-
-  const std::string predefs =
-      R"(OpCapability Shader
-%1 = OpExtInstImport "GLSL.std.450"
-OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor
-OpExecutionMode %main OriginUpperLeft
-OpSource GLSL 140
-OpName %main "main"
-OpName %foo_vf4_ "foo(vf4;"
-OpName %bar "bar"
-OpName %color "color"
-OpName %BaseColor "BaseColor"
-OpName %param "param"
-OpName %gl_FragColor "gl_FragColor"
-%void = OpTypeVoid
-%10 = OpTypeFunction %void
-%float = OpTypeFloat 32
-%v4float = OpTypeVector %float 4
-%_ptr_Function_v4float = OpTypePointer Function %v4float
-%14 = OpTypeFunction %float %_ptr_Function_v4float
-%uint = OpTypeInt 32 0
-%uint_0 = OpConstant %uint 0
-%_ptr_Function_float = OpTypePointer Function %float
-%float_0 = OpConstant %float 0
-%bool = OpTypeBool
-%_ptr_Input_v4float = OpTypePointer Input %v4float
-%BaseColor = OpVariable %_ptr_Input_v4float Input
-%_ptr_Output_v4float = OpTypePointer Output %v4float
-%gl_FragColor = OpVariable %_ptr_Output_v4float Output
-)";
-
-  const std::string nonEntryFuncs =
-      R"(%foo_vf4_ = OpFunction %float None %14
-%bar = OpFunctionParameter %_ptr_Function_v4float
-%27 = OpLabel
-%28 = OpAccessChain %_ptr_Function_float %bar %uint_0
-%29 = OpLoad %float %28
-%30 = OpFOrdLessThan %bool %29 %float_0
-OpSelectionMerge %31 None
-OpBranchConditional %30 %32 %31
-%32 = OpLabel
-OpReturnValue %float_0
-%31 = OpLabel
-%33 = OpAccessChain %_ptr_Function_float %bar %uint_0
-%34 = OpLoad %float %33
-OpReturnValue %34
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%main = OpFunction %void None %10
-%22 = OpLabel
-%color = OpVariable %_ptr_Function_v4float Function
-%param = OpVariable %_ptr_Function_v4float Function
-%23 = OpLoad %v4float %BaseColor
-OpStore %param %23
-%24 = OpFunctionCall %float %foo_vf4_ %param
-%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
-OpStore %color %25
-%26 = OpLoad %v4float %color
-OpStore %gl_FragColor %26
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%false = OpConstantFalse %bool
-%main = OpFunction %void None %10
-%22 = OpLabel
-%35 = OpVariable %_ptr_Function_float Function
-%color = OpVariable %_ptr_Function_v4float Function
-%param = OpVariable %_ptr_Function_v4float Function
-%23 = OpLoad %v4float %BaseColor
-OpStore %param %23
-OpBranch %36
-%36 = OpLabel
-OpLoopMerge %37 %38 None
-OpBranch %39
-%39 = OpLabel
-%40 = OpAccessChain %_ptr_Function_float %param %uint_0
-%41 = OpLoad %float %40
-%42 = OpFOrdLessThan %bool %41 %float_0
-OpSelectionMerge %43 None
-OpBranchConditional %42 %44 %43
-%44 = OpLabel
-OpStore %35 %float_0
-OpBranch %37
-%43 = OpLabel
-%45 = OpAccessChain %_ptr_Function_float %param %uint_0
-%46 = OpLoad %float %45
-OpStore %35 %46
-OpBranch %37
-%38 = OpLabel
-OpBranchConditional %false %36 %37
-%37 = OpLabel
-%24 = OpLoad %float %35
-%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
-OpStore %color %25
-%26 = OpLoad %v4float %color
-OpStore %gl_FragColor %26
-OpReturn
-OpFunctionEnd
-)";
-
-  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + before + nonEntryFuncs,
-                                              predefs + after + nonEntryFuncs,
-                                              false, true);
-}
-
-TEST_F(InlineTest, EarlyReturnNotAppearingLastInFunctionInlined) {
-  // Example from https://github.com/KhronosGroup/SPIRV-Tools/issues/755
-  //
-  // Original example is derived from:
-  //
-  // #version 450
-  //
-  // float foo() {
-  //     if (true) {
-  //     }
-  // }
-  //
-  // void main() { foo(); }
-  //
-  // But the order of basic blocks in foo is changed so that the return
-  // block is listed second-last.  There is only one return in the callee
-  // but it does not appear last.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-OpMemoryModel Logical GLSL450
-OpEntryPoint Vertex %main "main"
-OpSource GLSL 450
-OpName %main "main"
-OpName %foo_ "foo("
-%void = OpTypeVoid
-%4 = OpTypeFunction %void
-%bool = OpTypeBool
-%true = OpConstantTrue %bool
-)";
-
-  const std::string nonEntryFuncs =
-      R"(%foo_ = OpFunction %void None %4
-%7 = OpLabel
-OpSelectionMerge %8 None
-OpBranchConditional %true %9 %8
-%8 = OpLabel
-OpReturn
-%9 = OpLabel
-OpBranch %8
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%main = OpFunction %void None %4
-%10 = OpLabel
-%11 = OpFunctionCall %void %foo_
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%main = OpFunction %void None %4
-%10 = OpLabel
-OpSelectionMerge %12 None
-OpBranchConditional %true %13 %12
-%12 = OpLabel
-OpBranch %14
-%13 = OpLabel
-OpBranch %12
-%14 = OpLabel
-OpReturn
-OpFunctionEnd
-)";
-
-  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
-                                              predefs + nonEntryFuncs + after,
-                                              false, true);
-}
-
-TEST_F(InlineTest, ForwardReferencesInPhiInlined) {
-  // The basic structure of the test case is like this:
-  //
-  // int foo() {
-  //   int result = 1;
-  //   if (true) {
-  //      result = 1;
-  //   }
-  //   return result;
-  // }
-  //
-  // void main() {
-  //  int x = foo();
-  // }
-  //
-  // but with modifications: Using Phi instead of load/store, and the
-  // return block in foo appears before the "then" block.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-%1 = OpExtInstImport "GLSL.std.450"
-OpMemoryModel Logical GLSL450
-OpEntryPoint Vertex %main "main"
-OpSource GLSL 450
-OpName %main "main"
-OpName %foo_ "foo("
-OpName %x "x"
-%void = OpTypeVoid
-%6 = OpTypeFunction %void
-%int = OpTypeInt 32 1
-%8 = OpTypeFunction %int
-%bool = OpTypeBool
-%true = OpConstantTrue %bool
-%int_0 = OpConstant %int 0
-%_ptr_Function_int = OpTypePointer Function %int
-)";
-
-  const std::string nonEntryFuncs =
-      R"(%foo_ = OpFunction %int None %8
-%13 = OpLabel
-%14 = OpCopyObject %int %int_0
-OpSelectionMerge %15 None
-OpBranchConditional %true %16 %15
-%15 = OpLabel
-%17 = OpPhi %int %14 %13 %18 %16
-OpReturnValue %17
-%16 = OpLabel
-%18 = OpCopyObject %int %int_0
-OpBranch %15
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%main = OpFunction %void None %6
-%19 = OpLabel
-%x = OpVariable %_ptr_Function_int Function
-%20 = OpFunctionCall %int %foo_
-OpStore %x %20
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%main = OpFunction %void None %6
-%19 = OpLabel
-%21 = OpVariable %_ptr_Function_int Function
-%x = OpVariable %_ptr_Function_int Function
-%22 = OpCopyObject %int %int_0
-OpSelectionMerge %23 None
-OpBranchConditional %true %24 %23
-%23 = OpLabel
-%26 = OpPhi %int %22 %19 %25 %24
-OpStore %21 %26
-OpBranch %27
-%24 = OpLabel
-%25 = OpCopyObject %int %int_0
-OpBranch %23
-%27 = OpLabel
-%20 = OpLoad %int %21
-OpStore %x %20
-OpReturn
-OpFunctionEnd
-)";
-
-  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
-                                              predefs + nonEntryFuncs + after,
-                                              false, true);
-}
-
 TEST_F(InlineTest, EarlyReturnInLoopIsNotInlined) {
   // #version 140
   //
@@ -1820,8 +1535,8 @@
 OpBranch %10
 %10 = OpLabel
 OpLoopMerge %12 %10 None
-OpBranch %13
-%13 = OpLabel
+OpBranch %14
+%14 = OpLabel
 OpBranchConditional %true %10 %12
 %12 = OpLabel
 OpReturn
@@ -1890,11 +1605,11 @@
 OpBranch %18
 %18 = OpLabel
 %19 = OpCopyObject %int %int_3
-%25 = OpCopyObject %int %int_1
+%26 = OpCopyObject %int %int_1
 OpLoopMerge %22 %23 None
-OpBranch %26
-%26 = OpLabel
-%27 = OpCopyObject %int %int_2
+OpBranch %27
+%27 = OpLabel
+%28 = OpCopyObject %int %int_2
 %21 = OpCopyObject %int %int_4
 OpBranchConditional %true %23 %22
 %23 = OpLabel
@@ -1983,11 +1698,11 @@
 OpLoopMerge %16 %13 None
 OpBranch %17
 %17 = OpLabel
-%18 = OpCopyObject %bool %true
-OpSelectionMerge %19 None
-OpBranchConditional %true %19 %19
-%19 = OpLabel
-%20 = OpPhi %bool %18 %17
+%19 = OpCopyObject %bool %true
+OpSelectionMerge %20 None
+OpBranchConditional %true %20 %20
+%20 = OpLabel
+%21 = OpPhi %bool %19 %17
 OpBranchConditional %true %13 %16
 %16 = OpLabel
 OpReturn
@@ -2060,11 +1775,11 @@
 OpLoopMerge %22 %23 None
 OpBranch %25
 %25 = OpLabel
-%26 = OpCopyObject %int %int_1
-OpSelectionMerge %27 None
-OpBranchConditional %true %27 %27
-%27 = OpLabel
-%28 = OpCopyObject %int %int_2
+%27 = OpCopyObject %int %int_1
+OpSelectionMerge %28 None
+OpBranchConditional %true %28 %28
+%28 = OpLabel
+%29 = OpCopyObject %int %int_2
 %21 = OpCopyObject %int %int_4
 OpBranchConditional %true %23 %22
 %23 = OpLabel
@@ -2080,165 +1795,6 @@
                                               false, true);
 }
 
-TEST_F(
-    InlineTest,
-    SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMergeAndMultiReturns) {
-  // This is similar to SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMerge
-  // except that in addition to starting with a selection header, the
-  // callee also has multi returns.
-  //
-  // So now we have to accommodate:
-  // - The caller's OpLoopMerge (which must move to the first block)
-  // - The single-trip loop to wrap the multi returns, and
-  // - The callee's selection merge in its first block.
-  // Each of these must go into their own blocks.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %1 "main"
-OpSource OpenCL_C 120
-%bool = OpTypeBool
-%int = OpTypeInt 32 1
-%true = OpConstantTrue %bool
-%false = OpConstantFalse %bool
-%int_0 = OpConstant %int 0
-%int_1 = OpConstant %int 1
-%int_2 = OpConstant %int 2
-%int_3 = OpConstant %int 3
-%int_4 = OpConstant %int 4
-%void = OpTypeVoid
-%12 = OpTypeFunction %void
-)";
-
-  const std::string nonEntryFuncs =
-      R"(%13 = OpFunction %void None %12
-%14 = OpLabel
-%15 = OpCopyObject %int %int_0
-OpReturn
-%16 = OpLabel
-%17 = OpCopyObject %int %int_1
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%1 = OpFunction %void None %12
-%18 = OpLabel
-OpBranch %19
-%19 = OpLabel
-%20 = OpCopyObject %int %int_2
-%21 = OpFunctionCall %void %13
-%22 = OpCopyObject %int %int_3
-OpLoopMerge %23 %19 None
-OpBranchConditional %true %19 %23
-%23 = OpLabel
-%24 = OpCopyObject %int %int_4
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%1 = OpFunction %void None %12
-%18 = OpLabel
-OpBranch %19
-%19 = OpLabel
-%20 = OpCopyObject %int %int_2
-%25 = OpCopyObject %int %int_0
-OpLoopMerge %23 %19 None
-OpBranch %26
-%27 = OpLabel
-%28 = OpCopyObject %int %int_1
-OpBranch %26
-%26 = OpLabel
-%22 = OpCopyObject %int %int_3
-OpBranchConditional %true %19 %23
-%23 = OpLabel
-%24 = OpCopyObject %int %int_4
-OpReturn
-OpFunctionEnd
-)";
-
-  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
-                                              predefs + nonEntryFuncs + after,
-                                              false, true);
-}
-
-TEST_F(InlineTest, CalleeWithMultiReturnAndPhiRequiresEntryBlockRemapping) {
-  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/790
-  //
-  // The callee has multiple returns, and so must be wrapped with a single-trip
-  // loop.  That code must remap the callee entry block ID to the introduced
-  // loop body's ID.  Otherwise you can get a dominance error in a cloned OpPhi.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %1 "main"
-OpSource OpenCL_C 120
-%int = OpTypeInt 32 1
-%int_0 = OpConstant %int 0
-%int_1 = OpConstant %int 1
-%int_2 = OpConstant %int 2
-%int_3 = OpConstant %int 3
-%int_4 = OpConstant %int 4
-%void = OpTypeVoid
-%9 = OpTypeFunction %void
-%bool = OpTypeBool
-%false = OpConstantFalse %bool
-)";
-
-  // This callee has multiple returns, and a Phi in the second block referencing
-  // a value generated in the entry block.
-  const std::string nonEntryFuncs =
-      R"(%12 = OpFunction %void None %9
-%13 = OpLabel
-%14 = OpCopyObject %int %int_0
-OpBranch %15
-%15 = OpLabel
-%16 = OpPhi %int %14 %13
-%17 = OpCopyObject %int %int_1
-OpReturn
-%18 = OpLabel
-%19 = OpCopyObject %int %int_2
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%1 = OpFunction %void None %9
-%20 = OpLabel
-%21 = OpCopyObject %int %int_3
-%22 = OpFunctionCall %void %12
-%23 = OpCopyObject %int %int_4
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%1 = OpFunction %void None %9
-%20 = OpLabel
-%21 = OpCopyObject %int %int_3
-%24 = OpCopyObject %int %int_0
-OpBranch %25
-%25 = OpLabel
-%26 = OpPhi %int %24 %20
-%27 = OpCopyObject %int %int_1
-OpBranch %28
-%29 = OpLabel
-%30 = OpCopyObject %int %int_2
-OpBranch %28
-%28 = OpLabel
-%23 = OpCopyObject %int %int_4
-OpReturn
-OpFunctionEnd
-)";
-
-  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
-                                              predefs + nonEntryFuncs + after,
-                                              false, true);
-}
-
 TEST_F(InlineTest, NonInlinableCalleeWithSingleReturn) {
   // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
   //
@@ -2324,138 +1880,6 @@
       predefs + caller + callee, predefs + caller + callee, false, true);
 }
 
-TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) {
-  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
-  //
-  // The callee has a single return, but needs single-trip loop wrapper
-  // to be inlined because the return is in a selection structure.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-%1 = OpExtInstImport "GLSL.std.450"
-OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %main "main" %_GLF_color
-OpExecutionMode %main OriginUpperLeft
-OpSource ESSL 310
-OpName %main "main"
-OpName %f_ "f("
-OpName %i "i"
-OpName %_GLF_color "_GLF_color"
-OpDecorate %_GLF_color Location 0
-%void = OpTypeVoid
-%7 = OpTypeFunction %void
-%float = OpTypeFloat 32
-%9 = OpTypeFunction %float
-%float_1 = OpConstant %float 1
-%bool = OpTypeBool
-%false = OpConstantFalse %bool
-%true = OpConstantTrue %bool
-%int = OpTypeInt 32 1
-%_ptr_Function_int = OpTypePointer Function %int
-%int_0 = OpConstant %int 0
-%int_1 = OpConstant %int 1
-%v4float = OpTypeVector %float 4
-%_ptr_Output_v4float = OpTypePointer Output %v4float
-%_GLF_color = OpVariable %_ptr_Output_v4float Output
-%float_0 = OpConstant %float 0
-%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
-%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
-)";
-
-  const std::string new_predefs =
-      R"(%_ptr_Function_float = OpTypePointer Function %float
-)";
-
-  const std::string main_before =
-      R"(%main = OpFunction %void None %7
-%23 = OpLabel
-%i = OpVariable %_ptr_Function_int Function
-OpStore %i %int_0
-OpBranch %24
-%24 = OpLabel
-OpLoopMerge %25 %26 None
-OpBranch %27
-%27 = OpLabel
-%28 = OpLoad %int %i
-%29 = OpSLessThan %bool %28 %int_1
-OpBranchConditional %29 %30 %25
-%30 = OpLabel
-OpStore %_GLF_color %21
-%31 = OpFunctionCall %float %f_
-OpBranch %26
-%26 = OpLabel
-%32 = OpLoad %int %i
-%33 = OpIAdd %int %32 %int_1
-OpStore %i %33
-OpBranch %24
-%25 = OpLabel
-OpStore %_GLF_color %22
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string main_after =
-      R"(%main = OpFunction %void None %7
-%23 = OpLabel
-%38 = OpVariable %_ptr_Function_float Function
-%i = OpVariable %_ptr_Function_int Function
-OpStore %i %int_0
-OpBranch %24
-%24 = OpLabel
-OpLoopMerge %25 %26 None
-OpBranch %27
-%27 = OpLabel
-%28 = OpLoad %int %i
-%29 = OpSLessThan %bool %28 %int_1
-OpBranchConditional %29 %30 %25
-%30 = OpLabel
-OpStore %_GLF_color %21
-OpBranch %39
-%39 = OpLabel
-OpLoopMerge %40 %41 None
-OpBranch %42
-%42 = OpLabel
-OpSelectionMerge %43 None
-OpBranchConditional %true %44 %43
-%44 = OpLabel
-OpStore %38 %float_1
-OpBranch %40
-%43 = OpLabel
-OpStore %38 %float_1
-OpBranch %40
-%41 = OpLabel
-OpBranchConditional %false %39 %40
-%40 = OpLabel
-%31 = OpLoad %float %38
-OpBranch %26
-%26 = OpLabel
-%32 = OpLoad %int %i
-%33 = OpIAdd %int %32 %int_1
-OpStore %i %33
-OpBranch %24
-%25 = OpLabel
-OpStore %_GLF_color %22
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string callee =
-      R"(%f_ = OpFunction %float None %9
-%34 = OpLabel
-OpSelectionMerge %35 None
-OpBranchConditional %true %36 %35
-%36 = OpLabel
-OpReturnValue %float_1
-%35 = OpLabel
-OpReturnValue %float_1
-OpFunctionEnd
-)";
-
-  SinglePassRunAndCheck<InlineExhaustivePass>(
-      predefs + main_before + callee,
-      predefs + new_predefs + main_after + callee, false, true);
-}
-
 TEST_F(InlineTest, Decorated1) {
   // Same test as Simple with the difference
   // that OpFAdd in the outlined function is
@@ -2526,7 +1950,7 @@
 )";
 
   const std::string after =
-      R"(OpDecorate %37 RelaxedPrecision
+      R"(OpDecorate %38 RelaxedPrecision
 %void = OpTypeVoid
 %11 = OpTypeFunction %void
 %float = OpTypeFloat 32
@@ -2548,12 +1972,12 @@
 %param = OpVariable %_ptr_Function_v4float Function
 %23 = OpLoad %v4float %BaseColor
 OpStore %param %23
-%33 = OpAccessChain %_ptr_Function_float %param %uint_0
-%34 = OpLoad %float %33
-%35 = OpAccessChain %_ptr_Function_float %param %uint_1
-%36 = OpLoad %float %35
-%37 = OpFAdd %float %34 %36
-OpStore %32 %37
+%34 = OpAccessChain %_ptr_Function_float %param %uint_0
+%35 = OpLoad %float %34
+%36 = OpAccessChain %_ptr_Function_float %param %uint_1
+%37 = OpLoad %float %36
+%38 = OpFAdd %float %35 %37
+OpStore %32 %38
 %24 = OpLoad %float %32
 %25 = OpCompositeConstruct %v4float %24 %24 %24 %24
 OpStore %color %25
@@ -2672,12 +2096,12 @@
 %param = OpVariable %_ptr_Function_v4float Function
 %22 = OpLoad %v4float %BaseColor
 OpStore %param %22
-%33 = OpAccessChain %_ptr_Function_float %param %uint_0
-%34 = OpLoad %float %33
-%35 = OpAccessChain %_ptr_Function_float %param %uint_1
-%36 = OpLoad %float %35
-%37 = OpFAdd %float %34 %36
-OpStore %32 %37
+%34 = OpAccessChain %_ptr_Function_float %param %uint_0
+%35 = OpLoad %float %34
+%36 = OpAccessChain %_ptr_Function_float %param %uint_1
+%37 = OpLoad %float %36
+%38 = OpFAdd %float %35 %37
+OpStore %32 %38
 %23 = OpLoad %float %32
 %24 = OpCompositeConstruct %v4float %23 %23 %23 %23
 OpStore %color %24
@@ -3017,7 +2441,7 @@
 %main = OpFunction %void None %3
 %5 = OpLabel
 OpKill
-%17 = OpLabel
+%18 = OpLabel
 OpReturn
 OpFunctionEnd
 %kill_ = OpFunction %void None %3
@@ -3030,6 +2454,560 @@
   SinglePassRunAndCheck<InlineExhaustivePass>(before, after, false, true);
 }
 
+TEST_F(InlineTest, EarlyReturnFunctionInlined) {
+  // #version 140
+  //
+  // in vec4 BaseColor;
+  //
+  // float foo(vec4 bar)
+  // {
+  //     if (bar.x < 0.0)
+  //         return 0.0;
+  //     return bar.x;
+  // }
+  //
+  // void main()
+  // {
+  //     vec4 color = vec4(foo(BaseColor));
+  //     gl_FragColor = color;
+  // }
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 140
+OpName %main "main"
+OpName %foo_vf4_ "foo(vf4;"
+OpName %bar "bar"
+OpName %color "color"
+OpName %BaseColor "BaseColor"
+OpName %param "param"
+OpName %gl_FragColor "gl_FragColor"
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%14 = OpTypeFunction %float %_ptr_Function_v4float
+%uint = OpTypeInt 32 0
+%uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+%float_0 = OpConstant %float 0
+%bool = OpTypeBool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%BaseColor = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%gl_FragColor = OpVariable %_ptr_Output_v4float Output
+)";
+
+  const std::string foo =
+      R"(%foo_vf4_ = OpFunction %float None %14
+%bar = OpFunctionParameter %_ptr_Function_v4float
+%27 = OpLabel
+%28 = OpAccessChain %_ptr_Function_float %bar %uint_0
+%29 = OpLoad %float %28
+%30 = OpFOrdLessThan %bool %29 %float_0
+OpSelectionMerge %31 None
+OpBranchConditional %30 %32 %31
+%32 = OpLabel
+OpReturnValue %float_0
+%31 = OpLabel
+%33 = OpAccessChain %_ptr_Function_float %bar %uint_0
+%34 = OpLoad %float %33
+OpReturnValue %34
+OpFunctionEnd
+)";
+
+  const std::string fooMergeReturn =
+      R"(%foo_vf4_ = OpFunction %float None %14
+%bar = OpFunctionParameter %_ptr_Function_v4float
+%27 = OpLabel
+%41 = OpVariable %_ptr_Function_bool Function %false
+%36 = OpVariable %_ptr_Function_float Function
+OpSelectionMerge %35 None
+OpSwitch %uint_0 %38
+%38 = OpLabel
+%28 = OpAccessChain %_ptr_Function_float %bar %uint_0
+%29 = OpLoad %float %28
+%30 = OpFOrdLessThan %bool %29 %float_0
+OpSelectionMerge %31 None
+OpBranchConditional %30 %32 %31
+%32 = OpLabel
+OpStore %41 %true
+OpStore %36 %float_0
+OpBranch %35
+%31 = OpLabel
+%33 = OpAccessChain %_ptr_Function_float %bar %uint_0
+%34 = OpLoad %float %33
+OpStore %41 %true
+OpStore %36 %34
+OpBranch %35
+%35 = OpLabel
+%37 = OpLoad %float %36
+OpReturnValue %37
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %10
+%22 = OpLabel
+%color = OpVariable %_ptr_Function_v4float Function
+%param = OpVariable %_ptr_Function_v4float Function
+%23 = OpLoad %v4float %BaseColor
+OpStore %param %23
+%24 = OpFunctionCall %float %foo_vf4_ %param
+%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
+OpStore %color %25
+%26 = OpLoad %v4float %color
+OpStore %gl_FragColor %26
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%false = OpConstantFalse %bool
+%_ptr_Function_bool = OpTypePointer Function %bool
+%true = OpConstantTrue %bool
+%main = OpFunction %void None %10
+%22 = OpLabel
+%43 = OpVariable %_ptr_Function_bool Function %false
+%44 = OpVariable %_ptr_Function_float Function
+%45 = OpVariable %_ptr_Function_float Function
+%color = OpVariable %_ptr_Function_v4float Function
+%param = OpVariable %_ptr_Function_v4float Function
+%23 = OpLoad %v4float %BaseColor
+OpStore %param %23
+OpStore %43 %false
+OpSelectionMerge %55 None
+OpSwitch %uint_0 %47
+%47 = OpLabel
+%48 = OpAccessChain %_ptr_Function_float %param %uint_0
+%49 = OpLoad %float %48
+%50 = OpFOrdLessThan %bool %49 %float_0
+OpSelectionMerge %52 None
+OpBranchConditional %50 %51 %52
+%51 = OpLabel
+OpStore %43 %true
+OpStore %44 %float_0
+OpBranch %55
+%52 = OpLabel
+%53 = OpAccessChain %_ptr_Function_float %param %uint_0
+%54 = OpLoad %float %53
+OpStore %43 %true
+OpStore %44 %54
+OpBranch %55
+%55 = OpLabel
+%56 = OpLoad %float %44
+OpStore %45 %56
+%24 = OpLoad %float %45
+%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
+OpStore %color %25
+%26 = OpLoad %v4float %color
+OpStore %gl_FragColor %26
+OpReturn
+OpFunctionEnd
+)";
+
+  // The early return case must be handled by merge-return first.
+  AddPass<MergeReturnPass>();
+  AddPass<InlineExhaustivePass>();
+  RunAndCheck(predefs + before + foo, predefs + after + fooMergeReturn);
+}
+
+TEST_F(InlineTest, EarlyReturnNotAppearingLastInFunctionInlined) {
+  // Example from https://github.com/KhronosGroup/SPIRV-Tools/issues/755
+  //
+  // Original example is derived from:
+  //
+  // #version 450
+  //
+  // float foo() {
+  //     if (true) {
+  //     }
+  // }
+  //
+  // void main() { foo(); }
+  //
+  // But the order of basic blocks in foo is changed so that the return
+  // block is listed second-last.  There is only one return in the callee
+  // but it does not appear last.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint Vertex %main "main"
+OpSource GLSL 450
+OpName %main "main"
+OpName %foo_ "foo("
+%void = OpTypeVoid
+%4 = OpTypeFunction %void
+%bool = OpTypeBool
+%true = OpConstantTrue %bool
+)";
+
+  const std::string foo =
+      R"(%foo_ = OpFunction %void None %4
+%7 = OpLabel
+OpSelectionMerge %8 None
+OpBranchConditional %true %9 %8
+%8 = OpLabel
+OpReturn
+%9 = OpLabel
+OpBranch %8
+OpFunctionEnd
+)";
+
+  const std::string fooMergeReturn =
+      R"(%uint = OpTypeInt 32 0
+%uint_0 = OpConstant %uint 0
+%false = OpConstantFalse %bool
+%_ptr_Function_bool = OpTypePointer Function %bool
+%foo_ = OpFunction %void None %4
+%7 = OpLabel
+%18 = OpVariable %_ptr_Function_bool Function %false
+OpSelectionMerge %12 None
+OpSwitch %uint_0 %13
+%13 = OpLabel
+OpSelectionMerge %8 None
+OpBranchConditional %true %9 %8
+%8 = OpLabel
+OpStore %18 %true
+OpBranch %12
+%9 = OpLabel
+OpBranch %8
+%12 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %4
+%10 = OpLabel
+%11 = OpFunctionCall %void %foo_
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %4
+%10 = OpLabel
+%19 = OpVariable %_ptr_Function_bool Function %false
+OpStore %19 %false
+OpSelectionMerge %24 None
+OpSwitch %uint_0 %21
+%21 = OpLabel
+OpSelectionMerge %22 None
+OpBranchConditional %true %23 %22
+%22 = OpLabel
+OpStore %19 %true
+OpBranch %24
+%23 = OpLabel
+OpBranch %22
+%24 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+  // The early return case must be handled by merge-return first.
+  AddPass<MergeReturnPass>();
+  AddPass<InlineExhaustivePass>();
+  RunAndCheck(predefs + foo + before, predefs + fooMergeReturn + after);
+}
+
+TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) {
+  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
+  //
+  // The callee has a single return, but needs single-trip loop wrapper
+  // to be inlined because the return is in a selection structure.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %_GLF_color
+OpExecutionMode %main OriginUpperLeft
+OpSource ESSL 310
+OpName %main "main"
+OpName %f_ "f("
+OpName %i "i"
+OpName %_GLF_color "_GLF_color"
+OpDecorate %_GLF_color Location 0
+%void = OpTypeVoid
+%7 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%9 = OpTypeFunction %float
+%float_1 = OpConstant %float 1
+%bool = OpTypeBool
+%false = OpConstantFalse %bool
+%true = OpConstantTrue %bool
+%int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%int_0 = OpConstant %int 0
+%int_1 = OpConstant %int 1
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_GLF_color = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
+)";
+
+  const std::string new_predefs =
+      R"(%_ptr_Function_float = OpTypePointer Function %float
+%uint = OpTypeInt 32 0
+%uint_0 = OpConstant %uint 0
+%_ptr_Function_bool = OpTypePointer Function %bool
+)";
+
+  const std::string main_before =
+      R"(%main = OpFunction %void None %7
+%23 = OpLabel
+%i = OpVariable %_ptr_Function_int Function
+OpStore %i %int_0
+OpBranch %24
+%24 = OpLabel
+OpLoopMerge %25 %26 None
+OpBranch %27
+%27 = OpLabel
+%28 = OpLoad %int %i
+%29 = OpSLessThan %bool %28 %int_1
+OpBranchConditional %29 %30 %25
+%30 = OpLabel
+OpStore %_GLF_color %21
+%31 = OpFunctionCall %float %f_
+OpBranch %26
+%26 = OpLabel
+%32 = OpLoad %int %i
+%33 = OpIAdd %int %32 %int_1
+OpStore %i %33
+OpBranch %24
+%25 = OpLabel
+OpStore %_GLF_color %22
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string main_after =
+      R"(%main = OpFunction %void None %7
+%23 = OpLabel
+%46 = OpVariable %_ptr_Function_bool Function %false
+%47 = OpVariable %_ptr_Function_float Function
+%48 = OpVariable %_ptr_Function_float Function
+%i = OpVariable %_ptr_Function_int Function
+OpStore %i %int_0
+OpBranch %24
+%24 = OpLabel
+OpLoopMerge %25 %26 None
+OpBranch %27
+%27 = OpLabel
+%28 = OpLoad %int %i
+%29 = OpSLessThan %bool %28 %int_1
+OpBranchConditional %29 %30 %25
+%30 = OpLabel
+OpStore %_GLF_color %21
+OpStore %46 %false
+OpSelectionMerge %53 None
+OpSwitch %uint_0 %50
+%50 = OpLabel
+OpSelectionMerge %52 None
+OpBranchConditional %true %51 %52
+%51 = OpLabel
+OpStore %46 %true
+OpStore %47 %float_1
+OpBranch %53
+%52 = OpLabel
+OpStore %46 %true
+OpStore %47 %float_1
+OpBranch %53
+%53 = OpLabel
+%54 = OpLoad %float %47
+OpStore %48 %54
+%31 = OpLoad %float %48
+OpBranch %26
+%26 = OpLabel
+%32 = OpLoad %int %i
+%33 = OpIAdd %int %32 %int_1
+OpStore %i %33
+OpBranch %24
+%25 = OpLabel
+OpStore %_GLF_color %22
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string callee =
+      R"(%f_ = OpFunction %float None %9
+%34 = OpLabel
+OpSelectionMerge %35 None
+OpBranchConditional %true %36 %35
+%36 = OpLabel
+OpReturnValue %float_1
+%35 = OpLabel
+OpReturnValue %float_1
+OpFunctionEnd
+)";
+
+  const std::string calleeMergeReturn =
+      R"(%f_ = OpFunction %float None %9
+%34 = OpLabel
+%45 = OpVariable %_ptr_Function_bool Function %false
+%39 = OpVariable %_ptr_Function_float Function
+OpSelectionMerge %37 None
+OpSwitch %uint_0 %41
+%41 = OpLabel
+OpSelectionMerge %35 None
+OpBranchConditional %true %36 %35
+%36 = OpLabel
+OpStore %45 %true
+OpStore %39 %float_1
+OpBranch %37
+%35 = OpLabel
+OpStore %45 %true
+OpStore %39 %float_1
+OpBranch %37
+%37 = OpLabel
+%40 = OpLoad %float %39
+OpReturnValue %40
+OpFunctionEnd
+)";
+
+  // The early return case must be handled by merge-return first.
+  AddPass<MergeReturnPass>();
+  AddPass<InlineExhaustivePass>();
+  RunAndCheck(predefs + main_before + callee,
+              predefs + new_predefs + main_after + calleeMergeReturn);
+}
+
+TEST_F(InlineTest, ForwardReferencesInPhiInlined) {
+  // The basic structure of the test case is like this:
+  //
+  // int foo() {
+  //   int result = 1;
+  //   if (true) {
+  //      result = 1;
+  //   }
+  //   return result;
+  // }
+  //
+  // void main() {
+  //  int x = foo();
+  // }
+  //
+  // but with modifications: Using Phi instead of load/store, and the
+  // return block in foo appears before the "then" block.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Vertex %main "main"
+OpSource GLSL 450
+OpName %main "main"
+OpName %foo_ "foo("
+OpName %x "x"
+%void = OpTypeVoid
+%6 = OpTypeFunction %void
+%int = OpTypeInt 32 1
+%8 = OpTypeFunction %int
+%bool = OpTypeBool
+%true = OpConstantTrue %bool
+%int_0 = OpConstant %int 0
+%_ptr_Function_int = OpTypePointer Function %int
+)";
+
+  const std::string callee =
+      R"(%foo_ = OpFunction %int None %8
+%13 = OpLabel
+%14 = OpCopyObject %int %int_0
+OpSelectionMerge %15 None
+OpBranchConditional %true %16 %15
+%15 = OpLabel
+%17 = OpPhi %int %14 %13 %18 %16
+OpReturnValue %17
+%16 = OpLabel
+%18 = OpCopyObject %int %int_0
+OpBranch %15
+OpFunctionEnd
+)";
+
+  const std::string calleeMergeReturn =
+      R"(%uint = OpTypeInt 32 0
+%uint_0 = OpConstant %uint 0
+%false = OpConstantFalse %bool
+%_ptr_Function_bool = OpTypePointer Function %bool
+%foo_ = OpFunction %int None %8
+%13 = OpLabel
+%29 = OpVariable %_ptr_Function_bool Function %false
+%22 = OpVariable %_ptr_Function_int Function
+OpSelectionMerge %21 None
+OpSwitch %uint_0 %24
+%24 = OpLabel
+%14 = OpCopyObject %int %int_0
+OpSelectionMerge %15 None
+OpBranchConditional %true %16 %15
+%15 = OpLabel
+%17 = OpPhi %int %14 %24 %18 %16
+OpStore %29 %true
+OpStore %22 %17
+OpBranch %21
+%16 = OpLabel
+%18 = OpCopyObject %int %int_0
+OpBranch %15
+%21 = OpLabel
+%23 = OpLoad %int %22
+OpReturnValue %23
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %6
+%19 = OpLabel
+%x = OpVariable %_ptr_Function_int Function
+%20 = OpFunctionCall %int %foo_
+OpStore %x %20
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %6
+%19 = OpLabel
+%30 = OpVariable %_ptr_Function_bool Function %false
+%31 = OpVariable %_ptr_Function_int Function
+%32 = OpVariable %_ptr_Function_int Function
+%x = OpVariable %_ptr_Function_int Function
+OpStore %30 %false
+OpSelectionMerge %40 None
+OpSwitch %uint_0 %34
+%34 = OpLabel
+%35 = OpCopyObject %int %int_0
+OpSelectionMerge %36 None
+OpBranchConditional %true %38 %36
+%36 = OpLabel
+%37 = OpPhi %int %35 %34 %39 %38
+OpStore %30 %true
+OpStore %31 %37
+OpBranch %40
+%38 = OpLabel
+%39 = OpCopyObject %int %int_0
+OpBranch %36
+%40 = OpLabel
+%41 = OpLoad %int %31
+OpStore %32 %41
+%20 = OpLoad %int %32
+OpStore %x %20
+OpReturn
+OpFunctionEnd
+)";
+
+  AddPass<MergeReturnPass>();
+  AddPass<InlineExhaustivePass>();
+  RunAndCheck(predefs + callee + before, predefs + calleeMergeReturn + after);
+}
+
 // TODO(greg-lunarg): Add tests to verify handling of these cases:
 //
 //    Empty modules