diff --git a/source/opt/inline_pass.cpp b/source/opt/inline_pass.cpp
index bc07ff0..3c874a7 100644
--- a/source/opt/inline_pass.cpp
+++ b/source/opt/inline_pass.cpp
@@ -20,7 +20,6 @@
 #include <utility>
 
 #include "source/cfa.h"
-#include "source/opt/reflect.h"
 #include "source/util/make_unique.h"
 
 // Indices of operands in SPIR-V instructions
@@ -233,220 +232,6 @@
   });
 }
 
-void InlinePass::MoveInstsBeforeEntryBlock(
-    std::unordered_map<uint32_t, Instruction*>* preCallSB,
-    BasicBlock* new_blk_ptr, BasicBlock::iterator call_inst_itr,
-    UptrVectorIterator<BasicBlock> call_block_itr) {
-  for (auto cii = call_block_itr->begin(); cii != call_inst_itr;
-       cii = call_block_itr->begin()) {
-    Instruction* inst = &*cii;
-    inst->RemoveFromList();
-    std::unique_ptr<Instruction> cp_inst(inst);
-    // Remember same-block ops for possible regeneration.
-    if (IsSameBlockOp(&*cp_inst)) {
-      auto* sb_inst_ptr = cp_inst.get();
-      (*preCallSB)[cp_inst->result_id()] = sb_inst_ptr;
-    }
-    new_blk_ptr->AddInstruction(std::move(cp_inst));
-  }
-}
-
-std::unique_ptr<BasicBlock> InlinePass::AddGuardBlock(
-    std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
-    std::unordered_map<uint32_t, uint32_t>* callee2caller,
-    std::unique_ptr<BasicBlock> new_blk_ptr, uint32_t entry_blk_label_id) {
-  const auto guard_block_id = context()->TakeNextId();
-  if (guard_block_id == 0) {
-    return nullptr;
-  }
-  AddBranch(guard_block_id, &new_blk_ptr);
-  new_blocks->push_back(std::move(new_blk_ptr));
-  // Start the next block.
-  new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(guard_block_id));
-  // Reset the mapping of the callee's entry block to point to
-  // the guard block.  Do this so we can fix up phis later on to
-  // satisfy dominance.
-  (*callee2caller)[entry_blk_label_id] = guard_block_id;
-  return new_blk_ptr;
-}
-
-InstructionList::iterator InlinePass::AddStoresForVariableInitializers(
-    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-    std::unique_ptr<BasicBlock>* new_blk_ptr,
-    UptrVectorIterator<BasicBlock> callee_first_block_itr) {
-  auto callee_var_itr = callee_first_block_itr->begin();
-  while (callee_var_itr->opcode() == SpvOp::SpvOpVariable) {
-    if (callee_var_itr->NumInOperands() == 2) {
-      assert(callee2caller.count(callee_var_itr->result_id()) &&
-             "Expected the variable to have already been mapped.");
-      uint32_t new_var_id = callee2caller.at(callee_var_itr->result_id());
-
-      // The initializer must be a constant or global value.  No mapped
-      // should be used.
-      uint32_t val_id = callee_var_itr->GetSingleWordInOperand(1);
-      AddStore(new_var_id, val_id, new_blk_ptr);
-    }
-    ++callee_var_itr;
-  }
-  return callee_var_itr;
-}
-
-bool InlinePass::InlineInstructionInBB(
-    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-    BasicBlock* new_blk_ptr, const Instruction* inst) {
-  // If we have return, it must be at the end of the callee. We will handle
-  // it at the end.
-  if (inst->opcode() == SpvOpReturnValue || inst->opcode() == SpvOpReturn)
-    return true;
-
-  // Copy callee instruction and remap all input Ids.
-  std::unique_ptr<Instruction> cp_inst(inst->Clone(context()));
-  cp_inst->ForEachInId([&callee2caller](uint32_t* iid) {
-    const auto mapItr = callee2caller.find(*iid);
-    if (mapItr != callee2caller.end()) {
-      *iid = mapItr->second;
-    }
-  });
-  // If result id is non-zero, remap it.
-  const uint32_t rid = cp_inst->result_id();
-  if (rid != 0) {
-    const auto mapItr = callee2caller.find(rid);
-    if (mapItr == callee2caller.end()) return false;
-    uint32_t nid = mapItr->second;
-    cp_inst->SetResultId(nid);
-    get_decoration_mgr()->CloneDecorations(rid, nid);
-  }
-  new_blk_ptr->AddInstruction(std::move(cp_inst));
-  return true;
-}
-
-std::unique_ptr<BasicBlock> InlinePass::InlineReturn(
-    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-    std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
-    std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn,
-    const Instruction* inst, uint32_t returnVarId) {
-  // Store return value to return variable.
-  if (inst->opcode() == SpvOpReturnValue) {
-    assert(returnVarId != 0);
-    uint32_t valId = inst->GetInOperand(kSpvReturnValueId).words[0];
-    const auto mapItr = callee2caller.find(valId);
-    if (mapItr != callee2caller.end()) {
-      valId = mapItr->second;
-    }
-    AddStore(returnVarId, valId, &new_blk_ptr);
-  }
-
-  uint32_t returnLabelId = 0;
-  for (auto callee_block_itr = calleeFn->begin();
-       callee_block_itr != calleeFn->end(); ++callee_block_itr) {
-    if (callee_block_itr->tail()->opcode() == SpvOpUnreachable ||
-        callee_block_itr->tail()->opcode() == SpvOpKill) {
-      returnLabelId = context()->TakeNextId();
-      break;
-    }
-  }
-  if (returnLabelId == 0) return new_blk_ptr;
-
-  if (inst->opcode() == SpvOpReturn || inst->opcode() == SpvOpReturnValue)
-    AddBranch(returnLabelId, &new_blk_ptr);
-  new_blocks->push_back(std::move(new_blk_ptr));
-  return MakeUnique<BasicBlock>(NewLabel(returnLabelId));
-}
-
-bool InlinePass::InlineEntryBlock(
-    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-    std::unique_ptr<BasicBlock>* new_blk_ptr,
-    UptrVectorIterator<BasicBlock> callee_first_block) {
-  auto callee_inst_itr = AddStoresForVariableInitializers(
-      callee2caller, new_blk_ptr, callee_first_block);
-
-  while (callee_inst_itr != callee_first_block->end()) {
-    if (!InlineInstructionInBB(callee2caller, new_blk_ptr->get(),
-                               &*callee_inst_itr)) {
-      return false;
-    }
-    ++callee_inst_itr;
-  }
-  return true;
-}
-
-std::unique_ptr<BasicBlock> InlinePass::InlineBasicBlocks(
-    std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
-    const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-    std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn) {
-  auto callee_block_itr = calleeFn->begin();
-  ++callee_block_itr;
-
-  while (callee_block_itr != calleeFn->end()) {
-    new_blocks->push_back(std::move(new_blk_ptr));
-    const auto mapItr =
-        callee2caller.find(callee_block_itr->GetLabelInst()->result_id());
-    if (mapItr == callee2caller.end()) return nullptr;
-    new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(mapItr->second));
-
-    auto tail_inst_itr = callee_block_itr->end();
-    for (auto inst_itr = callee_block_itr->begin(); inst_itr != tail_inst_itr;
-         ++inst_itr) {
-      if (!InlineInstructionInBB(callee2caller, new_blk_ptr.get(),
-                                 &*inst_itr)) {
-        return nullptr;
-      }
-    }
-
-    ++callee_block_itr;
-  }
-  return new_blk_ptr;
-}
-
-bool InlinePass::MoveCallerInstsAfterFunctionCall(
-    std::unordered_map<uint32_t, Instruction*>* preCallSB,
-    std::unordered_map<uint32_t, uint32_t>* postCallSB,
-    std::unique_ptr<BasicBlock>* new_blk_ptr,
-    BasicBlock::iterator call_inst_itr, bool multiBlocks) {
-  // Copy remaining instructions from caller block.
-  for (Instruction* inst = call_inst_itr->NextNode(); inst;
-       inst = call_inst_itr->NextNode()) {
-    inst->RemoveFromList();
-    std::unique_ptr<Instruction> cp_inst(inst);
-    // If multiple blocks generated, regenerate any same-block
-    // instruction that has not been seen in this last block.
-    if (multiBlocks) {
-      if (!CloneSameBlockOps(&cp_inst, postCallSB, preCallSB, new_blk_ptr)) {
-        return false;
-      }
-
-      // Remember same-block ops in this block.
-      if (IsSameBlockOp(&*cp_inst)) {
-        const uint32_t rid = cp_inst->result_id();
-        (*postCallSB)[rid] = rid;
-      }
-    }
-    new_blk_ptr->get()->AddInstruction(std::move(cp_inst));
-  }
-
-  return true;
-}
-
-void InlinePass::MoveLoopMergeInstToFirstBlock(
-    std::vector<std::unique_ptr<BasicBlock>>* new_blocks) {
-  // Move the OpLoopMerge from the last block back to the first, where
-  // it belongs.
-  auto& first = new_blocks->front();
-  auto& last = new_blocks->back();
-  assert(first != last);
-
-  // Insert a modified copy of the loop merge into the first block.
-  auto loop_merge_itr = last->tail();
-  --loop_merge_itr;
-  assert(loop_merge_itr->opcode() == SpvOpLoopMerge);
-  std::unique_ptr<Instruction> cp_inst(loop_merge_itr->Clone(context()));
-  first->tail().InsertBefore(std::move(cp_inst));
-
-  // Remove the loop merge from the last block.
-  loop_merge_itr->RemoveFromList();
-  delete &*loop_merge_itr;
-}
-
 bool InlinePass::GenInlineCode(
     std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
     std::vector<std::unique_ptr<Instruction>>* new_vars,
@@ -465,19 +250,13 @@
   // valid.  These operations can fail.
   context()->InvalidateAnalyses(IRContext::kAnalysisDefUse);
 
-  // If the caller is a loop header and the callee has multiple blocks, then the
-  // normal inlining logic will place the OpLoopMerge in the last of several
-  // blocks in the loop.  Instead, it should be placed at the end of the first
-  // block.  We'll wait to move the OpLoopMerge until the end of the regular
-  // inlining logic, and only if necessary.
-  bool caller_is_loop_header = call_block_itr->GetLoopMergeInst() != nullptr;
-
-  // Single-trip loop continue block
-  std::unique_ptr<BasicBlock> single_trip_loop_cont_blk;
-
   Function* calleeFn = id2function_[call_inst_itr->GetSingleWordOperand(
       kSpvFunctionCallFunctionId)];
 
+  // Check for multiple returns in the callee.
+  auto fi = early_return_funcs_.find(calleeFn->result_id());
+  const bool earlyReturn = fi != early_return_funcs_.end();
+
   // Map parameters to actual arguments.
   MapParams(calleeFn, call_inst_itr, &callee2caller);
 
@@ -487,31 +266,6 @@
     return false;
   }
 
-  // First block needs to use label of original block
-  // but map callee label in case of phi reference.
-  uint32_t entry_blk_label_id = calleeFn->begin()->GetLabelInst()->result_id();
-  callee2caller[entry_blk_label_id] = call_block_itr->id();
-  std::unique_ptr<BasicBlock> new_blk_ptr =
-      MakeUnique<BasicBlock>(NewLabel(call_block_itr->id()));
-
-  // Move instructions of original caller block up to call instruction.
-  MoveInstsBeforeEntryBlock(&preCallSB, new_blk_ptr.get(), call_inst_itr,
-                            call_block_itr);
-
-  if (caller_is_loop_header &&
-      (*(calleeFn->begin())).GetMergeInst() != nullptr) {
-    // We can't place both the caller's merge instruction and
-    // another merge instruction in the same block.  So split the
-    // calling block. Insert an unconditional branch to a new guard
-    // block.  Later, once we know the ID of the last block,  we
-    // will move the caller's OpLoopMerge from the last generated
-    // block into the first block. We also wait to avoid
-    // invalidating various iterators.
-    new_blk_ptr = AddGuardBlock(new_blocks, &callee2caller,
-                                std::move(new_blk_ptr), entry_blk_label_id);
-    if (new_blk_ptr == nullptr) return false;
-  }
-
   // Create return var if needed.
   const uint32_t calleeTypeId = calleeFn->type_id();
   uint32_t returnVarId = 0;
@@ -523,50 +277,341 @@
     }
   }
 
-  calleeFn->WhileEachInst([&callee2caller, this](const Instruction* cpi) {
-    // Create set of callee result ids. Used to detect forward references
+  // Create set of callee result ids. Used to detect forward references
+  std::unordered_set<uint32_t> callee_result_ids;
+  calleeFn->ForEachInst([&callee_result_ids](const Instruction* cpi) {
     const uint32_t rid = cpi->result_id();
-    if (rid != 0 && callee2caller.find(rid) == callee2caller.end()) {
-      const uint32_t nid = context()->TakeNextId();
-      if (nid == 0) return false;
-      callee2caller[rid] = nid;
-    }
-    return true;
+    if (rid != 0) callee_result_ids.insert(rid);
   });
 
-  // Inline the entry block of the callee function.
-  if (!InlineEntryBlock(callee2caller, &new_blk_ptr, calleeFn->begin())) {
+  // If the caller is a loop header and the callee has multiple blocks, then the
+  // normal inlining logic will place the OpLoopMerge in the last of several
+  // blocks in the loop.  Instead, it should be placed at the end of the first
+  // block.  We'll wait to move the OpLoopMerge until the end of the regular
+  // inlining logic, and only if necessary.
+  bool caller_is_loop_header = false;
+  if (call_block_itr->GetLoopMergeInst()) {
+    caller_is_loop_header = true;
+  }
+
+  bool callee_begins_with_structured_header =
+      (*(calleeFn->begin())).GetMergeInst() != nullptr;
+
+  // Clone and map callee code. Copy caller block code to beginning of
+  // first block and end of last block.
+  bool prevInstWasReturn = false;
+  uint32_t singleTripLoopHeaderId = 0;
+  uint32_t singleTripLoopContinueId = 0;
+  uint32_t returnLabelId = 0;
+  bool multiBlocks = false;
+  // new_blk_ptr is a new basic block in the caller.  New instructions are
+  // written to it.  It is created when we encounter the OpLabel
+  // of the first callee block.  It is appended to new_blocks only when
+  // it is complete.
+  std::unique_ptr<BasicBlock> new_blk_ptr;
+  bool successful = calleeFn->WhileEachInst(
+      [&new_blocks, &callee2caller, &call_block_itr, &call_inst_itr,
+       &new_blk_ptr, &prevInstWasReturn, &returnLabelId, &returnVarId,
+       caller_is_loop_header, callee_begins_with_structured_header,
+       &calleeTypeId, &multiBlocks, &postCallSB, &preCallSB, earlyReturn,
+       &singleTripLoopHeaderId, &singleTripLoopContinueId, &callee_result_ids,
+       this](const Instruction* cpi) {
+        switch (cpi->opcode()) {
+          case SpvOpFunction:
+          case SpvOpFunctionParameter:
+            // Already processed
+            break;
+          case SpvOpVariable:
+            if (cpi->NumInOperands() == 2) {
+              assert(callee2caller.count(cpi->result_id()) &&
+                     "Expected the variable to have already been mapped.");
+              uint32_t new_var_id = callee2caller.at(cpi->result_id());
+
+              // The initializer must be a constant or global value.  No mapped
+              // should be used.
+              uint32_t val_id = cpi->GetSingleWordInOperand(1);
+              AddStore(new_var_id, val_id, &new_blk_ptr);
+            }
+            break;
+          case SpvOpUnreachable:
+          case SpvOpKill: {
+            // Generate a return label so that we split the block with the
+            // function call. Copy the terminator into the new block.
+            if (returnLabelId == 0) {
+              returnLabelId = context()->TakeNextId();
+              if (returnLabelId == 0) {
+                return false;
+              }
+            }
+            std::unique_ptr<Instruction> terminator(
+                new Instruction(context(), cpi->opcode(), 0, 0, {}));
+            new_blk_ptr->AddInstruction(std::move(terminator));
+            break;
+          }
+          case SpvOpLabel: {
+            // If previous instruction was early return, insert branch
+            // instruction to return block.
+            if (prevInstWasReturn) {
+              if (returnLabelId == 0) {
+                returnLabelId = context()->TakeNextId();
+                if (returnLabelId == 0) {
+                  return false;
+                }
+              }
+              AddBranch(returnLabelId, &new_blk_ptr);
+              prevInstWasReturn = false;
+            }
+            // Finish current block (if it exists) and get label for next block.
+            uint32_t labelId;
+            bool firstBlock = false;
+            if (new_blk_ptr != nullptr) {
+              new_blocks->push_back(std::move(new_blk_ptr));
+              // If result id is already mapped, use it, otherwise get a new
+              // one.
+              const uint32_t rid = cpi->result_id();
+              const auto mapItr = callee2caller.find(rid);
+              labelId = (mapItr != callee2caller.end())
+                            ? mapItr->second
+                            : context()->TakeNextId();
+              if (labelId == 0) {
+                return false;
+              }
+            } else {
+              // First block needs to use label of original block
+              // but map callee label in case of phi reference.
+              labelId = call_block_itr->id();
+              callee2caller[cpi->result_id()] = labelId;
+              firstBlock = true;
+            }
+            // Create first/next block.
+            new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(labelId));
+            if (firstBlock) {
+              // Copy contents of original caller block up to call instruction.
+              for (auto cii = call_block_itr->begin(); cii != call_inst_itr;
+                   cii = call_block_itr->begin()) {
+                Instruction* inst = &*cii;
+                inst->RemoveFromList();
+                std::unique_ptr<Instruction> cp_inst(inst);
+                // Remember same-block ops for possible regeneration.
+                if (IsSameBlockOp(&*cp_inst)) {
+                  auto* sb_inst_ptr = cp_inst.get();
+                  preCallSB[cp_inst->result_id()] = sb_inst_ptr;
+                }
+                new_blk_ptr->AddInstruction(std::move(cp_inst));
+              }
+              if (caller_is_loop_header &&
+                  callee_begins_with_structured_header) {
+                // We can't place both the caller's merge instruction and
+                // another merge instruction in the same block.  So split the
+                // calling block. Insert an unconditional branch to a new guard
+                // block.  Later, once we know the ID of the last block,  we
+                // will move the caller's OpLoopMerge from the last generated
+                // block into the first block. We also wait to avoid
+                // invalidating various iterators.
+                const auto guard_block_id = context()->TakeNextId();
+                if (guard_block_id == 0) {
+                  return false;
+                }
+                AddBranch(guard_block_id, &new_blk_ptr);
+                new_blocks->push_back(std::move(new_blk_ptr));
+                // Start the next block.
+                new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(guard_block_id));
+                // Reset the mapping of the callee's entry block to point to
+                // the guard block.  Do this so we can fix up phis later on to
+                // satisfy dominance.
+                callee2caller[cpi->result_id()] = guard_block_id;
+              }
+              // If callee has early return, insert a header block for
+              // single-trip loop that will encompass callee code.  Start
+              // postheader block.
+              //
+              // Note: Consider the following combination:
+              //  - the caller is a single block loop
+              //  - the callee does not begin with a structure header
+              //  - the callee has multiple returns.
+              // We still need to split the caller block and insert a guard
+              // block. But we only need to do it once. We haven't done it yet,
+              // but the single-trip loop header will serve the same purpose.
+              if (earlyReturn) {
+                singleTripLoopHeaderId = context()->TakeNextId();
+                if (singleTripLoopHeaderId == 0) {
+                  return false;
+                }
+                AddBranch(singleTripLoopHeaderId, &new_blk_ptr);
+                new_blocks->push_back(std::move(new_blk_ptr));
+                new_blk_ptr =
+                    MakeUnique<BasicBlock>(NewLabel(singleTripLoopHeaderId));
+                returnLabelId = context()->TakeNextId();
+                singleTripLoopContinueId = context()->TakeNextId();
+                if (returnLabelId == 0 || singleTripLoopContinueId == 0) {
+                  return false;
+                }
+                AddLoopMerge(returnLabelId, singleTripLoopContinueId,
+                             &new_blk_ptr);
+                uint32_t postHeaderId = context()->TakeNextId();
+                if (postHeaderId == 0) {
+                  return false;
+                }
+                AddBranch(postHeaderId, &new_blk_ptr);
+                new_blocks->push_back(std::move(new_blk_ptr));
+                new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(postHeaderId));
+                multiBlocks = true;
+                // Reset the mapping of the callee's entry block to point to
+                // the post-header block.  Do this so we can fix up phis later
+                // on to satisfy dominance.
+                callee2caller[cpi->result_id()] = postHeaderId;
+              }
+            } else {
+              multiBlocks = true;
+            }
+          } break;
+          case SpvOpReturnValue: {
+            // Store return value to return variable.
+            assert(returnVarId != 0);
+            uint32_t valId = cpi->GetInOperand(kSpvReturnValueId).words[0];
+            const auto mapItr = callee2caller.find(valId);
+            if (mapItr != callee2caller.end()) {
+              valId = mapItr->second;
+            }
+            AddStore(returnVarId, valId, &new_blk_ptr);
+
+            // Remember we saw a return; if followed by a label, will need to
+            // insert branch.
+            prevInstWasReturn = true;
+          } break;
+          case SpvOpReturn: {
+            // Remember we saw a return; if followed by a label, will need to
+            // insert branch.
+            prevInstWasReturn = true;
+          } break;
+          case SpvOpFunctionEnd: {
+            // If there was an early return, we generated a return label id
+            // for it.  Now we have to generate the return block with that Id.
+            if (returnLabelId != 0) {
+              // If previous instruction was return, insert branch instruction
+              // to return block.
+              if (prevInstWasReturn) AddBranch(returnLabelId, &new_blk_ptr);
+              if (earlyReturn) {
+                // If we generated a loop header for the single-trip loop
+                // to accommodate early returns, insert the continue
+                // target block now, with a false branch back to the loop
+                // header.
+                new_blocks->push_back(std::move(new_blk_ptr));
+                new_blk_ptr =
+                    MakeUnique<BasicBlock>(NewLabel(singleTripLoopContinueId));
+                uint32_t false_id = GetFalseId();
+                if (false_id == 0) {
+                  return false;
+                }
+                AddBranchCond(false_id, singleTripLoopHeaderId, returnLabelId,
+                              &new_blk_ptr);
+              }
+              // Generate the return block.
+              new_blocks->push_back(std::move(new_blk_ptr));
+              new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(returnLabelId));
+              multiBlocks = true;
+            }
+            // Load return value into result id of call, if it exists.
+            if (returnVarId != 0) {
+              const uint32_t resId = call_inst_itr->result_id();
+              assert(resId != 0);
+              AddLoad(calleeTypeId, resId, returnVarId, &new_blk_ptr);
+            }
+            // Copy remaining instructions from caller block.
+            for (Instruction* inst = call_inst_itr->NextNode(); inst;
+                 inst = call_inst_itr->NextNode()) {
+              inst->RemoveFromList();
+              std::unique_ptr<Instruction> cp_inst(inst);
+              // If multiple blocks generated, regenerate any same-block
+              // instruction that has not been seen in this last block.
+              if (multiBlocks) {
+                if (!CloneSameBlockOps(&cp_inst, &postCallSB, &preCallSB,
+                                       &new_blk_ptr)) {
+                  return false;
+                }
+
+                // Remember same-block ops in this block.
+                if (IsSameBlockOp(&*cp_inst)) {
+                  const uint32_t rid = cp_inst->result_id();
+                  postCallSB[rid] = rid;
+                }
+              }
+              new_blk_ptr->AddInstruction(std::move(cp_inst));
+            }
+            // Finalize inline code.
+            new_blocks->push_back(std::move(new_blk_ptr));
+          } break;
+          default: {
+            // Copy callee instruction and remap all input Ids.
+            std::unique_ptr<Instruction> cp_inst(cpi->Clone(context()));
+            bool succeeded = cp_inst->WhileEachInId(
+                [&callee2caller, &callee_result_ids, this](uint32_t* iid) {
+                  const auto mapItr = callee2caller.find(*iid);
+                  if (mapItr != callee2caller.end()) {
+                    *iid = mapItr->second;
+                  } else if (callee_result_ids.find(*iid) !=
+                             callee_result_ids.end()) {
+                    // Forward reference. Allocate a new id, map it,
+                    // use it and check for it when remapping result ids
+                    const uint32_t nid = context()->TakeNextId();
+                    if (nid == 0) {
+                      return false;
+                    }
+                    callee2caller[*iid] = nid;
+                    *iid = nid;
+                  }
+                  return true;
+                });
+            if (!succeeded) {
+              return false;
+            }
+            // If result id is non-zero, remap it. If already mapped, use mapped
+            // value, else use next id.
+            const uint32_t rid = cp_inst->result_id();
+            if (rid != 0) {
+              const auto mapItr = callee2caller.find(rid);
+              uint32_t nid;
+              if (mapItr != callee2caller.end()) {
+                nid = mapItr->second;
+              } else {
+                nid = context()->TakeNextId();
+                if (nid == 0) {
+                  return false;
+                }
+                callee2caller[rid] = nid;
+              }
+              cp_inst->SetResultId(nid);
+              get_decoration_mgr()->CloneDecorations(rid, nid);
+            }
+            new_blk_ptr->AddInstruction(std::move(cp_inst));
+          } break;
+        }
+        return true;
+      });
+
+  if (!successful) {
     return false;
   }
 
-  // Inline blocks of the callee function other than the entry block.
-  new_blk_ptr = InlineBasicBlocks(new_blocks, callee2caller,
-                                  std::move(new_blk_ptr), calleeFn);
-  if (new_blk_ptr == nullptr) return false;
+  if (caller_is_loop_header && (new_blocks->size() > 1)) {
+    // Move the OpLoopMerge from the last block back to the first, where
+    // it belongs.
+    auto& first = new_blocks->front();
+    auto& last = new_blocks->back();
+    assert(first != last);
 
-  new_blk_ptr =
-      InlineReturn(callee2caller, new_blocks, std::move(new_blk_ptr), calleeFn,
-                   &*(calleeFn->tail()->tail()), returnVarId);
+    // Insert a modified copy of the loop merge into the first block.
+    auto loop_merge_itr = last->tail();
+    --loop_merge_itr;
+    assert(loop_merge_itr->opcode() == SpvOpLoopMerge);
+    std::unique_ptr<Instruction> cp_inst(loop_merge_itr->Clone(context()));
+    first->tail().InsertBefore(std::move(cp_inst));
 
-  // Load return value into result id of call, if it exists.
-  if (returnVarId != 0) {
-    const uint32_t resId = call_inst_itr->result_id();
-    assert(resId != 0);
-    AddLoad(calleeTypeId, resId, returnVarId, &new_blk_ptr);
+    // Remove the loop merge from the last block.
+    loop_merge_itr->RemoveFromList();
+    delete &*loop_merge_itr;
   }
 
-  // Move instructions of original caller block after call instruction.
-  if (!MoveCallerInstsAfterFunctionCall(&preCallSB, &postCallSB, &new_blk_ptr,
-                                        call_inst_itr,
-                                        calleeFn->begin() != calleeFn->end()))
-    return false;
-
-  // Finalize inline code.
-  new_blocks->push_back(std::move(new_blk_ptr));
-
-  if (caller_is_loop_header && (new_blocks->size() > 1))
-    MoveLoopMergeInstToFirstBlock(new_blocks);
-
   // Update block map given replacement blocks.
   for (auto& blk : *new_blocks) {
     id2block_[blk->id()] = &*blk;
@@ -579,21 +624,7 @@
   const uint32_t calleeFnId =
       inst->GetSingleWordOperand(kSpvFunctionCallFunctionId);
   const auto ci = inlinable_.find(calleeFnId);
-  if (ci == inlinable_.cend()) return false;
-
-  if (early_return_funcs_.find(calleeFnId) != early_return_funcs_.end()) {
-    // We rely on the merge-return pass to handle the early return case
-    // in advance.
-    std::string message =
-        "The function '" + id2function_[calleeFnId]->DefInst().PrettyPrint() +
-        "' could not be inlined because the return instruction "
-        "is not at the end of the function. This could be fixed by "
-        "running merge-return before inlining.";
-    consumer()(SPV_MSG_WARNING, "", {0, 0, 0}, message.c_str());
-    return false;
-  }
-
-  return true;
+  return ci != inlinable_.cend();
 }
 
 void InlinePass::UpdateSucceedingPhis(
@@ -614,6 +645,26 @@
       });
 }
 
+bool InlinePass::HasNoReturnInStructuredConstruct(Function* func) {
+  // If control not structured, do not do loop/return analysis
+  // TODO: Analyze returns in non-structured control flow
+  if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
+    return false;
+  const auto structured_analysis = context()->GetStructuredCFGAnalysis();
+  // Search for returns in structured construct.
+  bool return_in_construct = false;
+  for (auto& blk : *func) {
+    auto terminal_ii = blk.cend();
+    --terminal_ii;
+    if (spvOpcodeIsReturn(terminal_ii->opcode()) &&
+        structured_analysis->ContainingConstruct(blk.id()) != 0) {
+      return_in_construct = true;
+      break;
+    }
+  }
+  return !return_in_construct;
+}
+
 bool InlinePass::HasNoReturnInLoop(Function* func) {
   // If control not structured, do not do loop/return analysis
   // TODO: Analyze returns in non-structured control flow
@@ -635,18 +686,10 @@
 }
 
 void InlinePass::AnalyzeReturns(Function* func) {
-  // Analyze functions without a return in loop.
   if (HasNoReturnInLoop(func)) {
     no_return_in_loop_.insert(func->result_id());
-  }
-  // Analyze functions with a return before its tail basic block.
-  for (auto& blk : *func) {
-    auto terminal_ii = blk.cend();
-    --terminal_ii;
-    if (spvOpcodeIsReturn(terminal_ii->opcode()) && &blk != func->tail()) {
+    if (!HasNoReturnInStructuredConstruct(func))
       early_return_funcs_.insert(func->result_id());
-      break;
-    }
   }
 }
 
diff --git a/source/opt/inline_pass.h b/source/opt/inline_pass.h
index 19fb26e..bc5f781 100644
--- a/source/opt/inline_pass.h
+++ b/source/opt/inline_pass.h
@@ -124,6 +124,10 @@
   // Return true if |inst| is a function call that can be inlined.
   bool IsInlinableFunctionCall(const Instruction* inst);
 
+  // Return true if |func| does not have a return that is
+  // nested in a structured if, switch or loop.
+  bool HasNoReturnInStructuredConstruct(Function* func);
+
   // Return true if |func| has no return in a loop. The current analysis
   // requires structured control flow, so return false if control flow not
   // structured ie. module is not a shader.
@@ -167,64 +171,6 @@
   // Set of functions that are originally called directly or indirectly from a
   // continue construct.
   std::unordered_set<uint32_t> funcs_called_from_continue_;
-
- private:
-  // Moves instructions of the caller function up to the call instruction
-  // to |new_blk_ptr|.
-  void MoveInstsBeforeEntryBlock(
-      std::unordered_map<uint32_t, Instruction*>* preCallSB,
-      BasicBlock* new_blk_ptr, BasicBlock::iterator call_inst_itr,
-      UptrVectorIterator<BasicBlock> call_block_itr);
-
-  // Returns a new guard block after adding a branch to the end of
-  // |new_blocks|.
-  std::unique_ptr<BasicBlock> AddGuardBlock(
-      std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
-      std::unordered_map<uint32_t, uint32_t>* callee2caller,
-      std::unique_ptr<BasicBlock> new_blk_ptr, uint32_t entry_blk_label_id);
-
-  // Add store instructions for initializers of variables.
-  InstructionList::iterator AddStoresForVariableInitializers(
-      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-      std::unique_ptr<BasicBlock>* new_blk_ptr,
-      UptrVectorIterator<BasicBlock> callee_block_itr);
-
-  // Inlines a single instruction of the callee function.
-  bool InlineInstructionInBB(
-      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-      BasicBlock* new_blk_ptr, const Instruction* inst);
-
-  // Inlines the return instruction of the callee function.
-  std::unique_ptr<BasicBlock> InlineReturn(
-      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-      std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
-      std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn,
-      const Instruction* inst, uint32_t returnVarId);
-
-  // Inlines the entry block of the callee function.
-  bool InlineEntryBlock(
-      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-      std::unique_ptr<BasicBlock>* new_blk_ptr,
-      UptrVectorIterator<BasicBlock> callee_first_block);
-
-  // Inlines basic blocks of the callee function other than the entry basic
-  // block.
-  std::unique_ptr<BasicBlock> InlineBasicBlocks(
-      std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
-      const std::unordered_map<uint32_t, uint32_t>& callee2caller,
-      std::unique_ptr<BasicBlock> new_blk_ptr, Function* calleeFn);
-
-  // Moves instructions of the caller function after the call instruction
-  // to |new_blk_ptr|.
-  bool MoveCallerInstsAfterFunctionCall(
-      std::unordered_map<uint32_t, Instruction*>* preCallSB,
-      std::unordered_map<uint32_t, uint32_t>* postCallSB,
-      std::unique_ptr<BasicBlock>* new_blk_ptr,
-      BasicBlock::iterator call_inst_itr, bool multiBlocks);
-
-  // Move the OpLoopMerge from the last block back to the first.
-  void MoveLoopMergeInstToFirstBlock(
-      std::vector<std::unique_ptr<BasicBlock>>* new_blocks);
 };
 
 }  // namespace opt
diff --git a/test/opt/inline_opaque_test.cpp b/test/opt/inline_opaque_test.cpp
index b8d2dfa..d10913a 100644
--- a/test/opt/inline_opaque_test.cpp
+++ b/test/opt/inline_opaque_test.cpp
@@ -102,12 +102,12 @@
 OpStore %32 %31
 %33 = OpLoad %S_t %s0
 OpStore %param %33
-%42 = OpAccessChain %_ptr_Function_18 %param %int_2
-%43 = OpLoad %18 %42
-%44 = OpAccessChain %_ptr_Function_v2float %param %int_0
-%45 = OpLoad %v2float %44
-%46 = OpImageSampleImplicitLod %v4float %43 %45
-OpStore %outColor %46
+%41 = OpAccessChain %_ptr_Function_18 %param %int_2
+%42 = OpLoad %18 %41
+%43 = OpAccessChain %_ptr_Function_v2float %param %int_0
+%44 = OpLoad %v2float %43
+%45 = OpImageSampleImplicitLod %v4float %42 %44
+OpStore %outColor %45
 OpReturn
 OpFunctionEnd
 )";
@@ -191,10 +191,10 @@
 %34 = OpVariable %_ptr_Function_20 Function
 %35 = OpVariable %_ptr_Function_20 Function
 %25 = OpVariable %_ptr_Function_20 Function
-%37 = OpLoad %20 %sampler16
-OpStore %34 %37
-%38 = OpLoad %20 %34
-OpStore %35 %38
+%36 = OpLoad %20 %sampler16
+OpStore %34 %36
+%37 = OpLoad %20 %34
+OpStore %35 %37
 %26 = OpLoad %20 %35
 OpStore %25 %26
 %27 = OpLoad %20 %25
@@ -301,12 +301,12 @@
 OpStore %33 %32
 %34 = OpLoad %S_t %s0
 OpStore %param %34
-%45 = OpAccessChain %_ptr_Function_19 %param %int_2
-%46 = OpLoad %19 %45
-%47 = OpAccessChain %_ptr_Function_v2float %param %int_0
-%48 = OpLoad %v2float %47
-%49 = OpImageSampleImplicitLod %v4float %46 %48
-OpStore %outColor %49
+%44 = OpAccessChain %_ptr_Function_19 %param %int_2
+%45 = OpLoad %19 %44
+%46 = OpAccessChain %_ptr_Function_v2float %param %int_0
+%47 = OpLoad %v2float %46
+%48 = OpImageSampleImplicitLod %v4float %45 %47
+OpStore %outColor %48
 OpReturn
 OpFunctionEnd
 )";
diff --git a/test/opt/inline_test.cpp b/test/opt/inline_test.cpp
index 76573a6..f44c04a 100644
--- a/test/opt/inline_test.cpp
+++ b/test/opt/inline_test.cpp
@@ -13,7 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
@@ -116,12 +115,12 @@
       "%param = OpVariable %_ptr_Function_v4float Function",
          "%22 = OpLoad %v4float %BaseColor",
                "OpStore %param %22",
-         "%34 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%35 = OpLoad %float %34",
-         "%36 = OpAccessChain %_ptr_Function_float %param %uint_1",
-         "%37 = OpLoad %float %36",
-         "%38 = OpFAdd %float %35 %37",
-               "OpStore %32 %38",
+         "%33 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%34 = OpLoad %float %33",
+         "%35 = OpAccessChain %_ptr_Function_float %param %uint_1",
+         "%36 = OpLoad %float %35",
+         "%37 = OpFAdd %float %34 %36",
+               "OpStore %32 %37",
          "%23 = OpLoad %float %32",
          "%24 = OpCompositeConstruct %v4float %23 %23 %23 %23",
                "OpStore %color %24",
@@ -249,7 +248,7 @@
       // clang-format off
        "%main = OpFunction %void None %15",
          "%28 = OpLabel",
-         "%58 = OpVariable %_ptr_Function_float Function",
+         "%57 = OpVariable %_ptr_Function_float Function",
          "%46 = OpVariable %_ptr_Function_float Function",
          "%47 = OpVariable %_ptr_Function_float Function",
          "%48 = OpVariable %_ptr_Function_float Function",
@@ -257,21 +256,21 @@
     "%param_1 = OpVariable %_ptr_Function_v4float Function",
          "%29 = OpLoad %v4float %BaseColor",
                "OpStore %param_1 %29",
-         "%50 = OpAccessChain %_ptr_Function_float %param_1 %uint_0",
-         "%51 = OpLoad %float %50",
-         "%52 = OpAccessChain %_ptr_Function_float %param_1 %uint_1",
-         "%53 = OpLoad %float %52",
-         "%54 = OpFAdd %float %51 %53",
-               "OpStore %46 %54",
-         "%55 = OpAccessChain %_ptr_Function_float %param_1 %uint_2",
-         "%56 = OpLoad %float %55",
-               "OpStore %47 %56",
-         "%60 = OpLoad %float %46",
-         "%61 = OpLoad %float %47",
-         "%62 = OpFMul %float %60 %61",
-               "OpStore %58 %62",
-         "%57 = OpLoad %float %58",
-               "OpStore %48 %57",
+         "%49 = OpAccessChain %_ptr_Function_float %param_1 %uint_0",
+         "%50 = OpLoad %float %49",
+         "%51 = OpAccessChain %_ptr_Function_float %param_1 %uint_1",
+         "%52 = OpLoad %float %51",
+         "%53 = OpFAdd %float %50 %52",
+               "OpStore %46 %53",
+         "%54 = OpAccessChain %_ptr_Function_float %param_1 %uint_2",
+         "%55 = OpLoad %float %54",
+               "OpStore %47 %55",
+         "%58 = OpLoad %float %46",
+         "%59 = OpLoad %float %47",
+         "%60 = OpFMul %float %58 %59",
+               "OpStore %57 %60",
+         "%56 = OpLoad %float %57",
+               "OpStore %48 %56",
          "%30 = OpLoad %float %48",
          "%31 = OpCompositeConstruct %v4float %30 %30 %30 %30",
                "OpStore %color %31",
@@ -391,13 +390,13 @@
                "OpStore %b %24",
          "%25 = OpLoad %v4float %b",
                "OpStore %param %25",
-         "%40 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%41 = OpLoad %float %40",
-         "%42 = OpAccessChain %_ptr_Function_float %param %uint_1",
-         "%43 = OpLoad %float %42",
-         "%44 = OpFAdd %float %41 %43",
-         "%45 = OpAccessChain %_ptr_Function_float %param %uint_2",
-               "OpStore %45 %44",
+         "%39 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%40 = OpLoad %float %39",
+         "%41 = OpAccessChain %_ptr_Function_float %param %uint_1",
+         "%42 = OpLoad %float %41",
+         "%43 = OpFAdd %float %40 %42",
+         "%44 = OpAccessChain %_ptr_Function_float %param %uint_2",
+               "OpStore %44 %43",
          "%27 = OpLoad %v4float %param",
                "OpStore %b %27",
          "%28 = OpAccessChain %_ptr_Function_float %b %uint_2",
@@ -522,21 +521,21 @@
       "%param = OpVariable %_ptr_Function_v4float Function",
          "%24 = OpLoad %v4float %BaseColor",
                "OpStore %param %24",
-         "%41 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%42 = OpLoad %float %41",
-               "OpStore %38 %42",
-         "%43 = OpLoad %float %38",
-         "%44 = OpFOrdLessThan %bool %43 %float_0",
-               "OpSelectionMerge %48 None",
-               "OpBranchConditional %44 %45 %48",
+         "%40 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%41 = OpLoad %float %40",
+               "OpStore %38 %41",
+         "%42 = OpLoad %float %38",
+         "%43 = OpFOrdLessThan %bool %42 %float_0",
+               "OpSelectionMerge %44 None",
+               "OpBranchConditional %43 %45 %44",
          "%45 = OpLabel",
          "%46 = OpLoad %float %38",
          "%47 = OpFNegate %float %46",
                "OpStore %38 %47",
-               "OpBranch %48",
-         "%48 = OpLabel",
-         "%49 = OpLoad %float %38",
-               "OpStore %39 %49",
+               "OpBranch %44",
+         "%44 = OpLabel",
+         "%48 = OpLoad %float %38",
+               "OpStore %39 %48",
          "%25 = OpLoad %float %39",
          "%26 = OpCompositeConstruct %v4float %25 %25 %25 %25",
                "OpStore %color %26",
@@ -676,8 +675,8 @@
       // clang-format off
        "%main = OpFunction %void None %12",
          "%27 = OpLabel",
+         "%62 = OpVariable %_ptr_Function_float Function",
          "%63 = OpVariable %_ptr_Function_float Function",
-         "%64 = OpVariable %_ptr_Function_float Function",
          "%52 = OpVariable %_ptr_Function_float Function",
          "%53 = OpVariable %_ptr_Function_float Function",
       "%color = OpVariable %_ptr_Function_v4float Function",
@@ -688,20 +687,20 @@
          "%29 = OpAccessChain %_ptr_Function_float %color %uint_0",
          "%30 = OpLoad %float %29",
                "OpStore %param %30",
-         "%55 = OpLoad %float %param",
-               "OpStore %52 %55",
-         "%56 = OpLoad %float %52",
-         "%57 = OpFOrdLessThan %bool %56 %float_0",
-               "OpSelectionMerge %61 None",
-               "OpBranchConditional %57 %58 %61",
+         "%54 = OpLoad %float %param",
+               "OpStore %52 %54",
+         "%55 = OpLoad %float %52",
+         "%56 = OpFOrdLessThan %bool %55 %float_0",
+               "OpSelectionMerge %57 None",
+               "OpBranchConditional %56 %58 %57",
          "%58 = OpLabel",
          "%59 = OpLoad %float %52",
          "%60 = OpFNegate %float %59",
                "OpStore %52 %60",
-               "OpBranch %61",
-         "%61 = OpLabel",
-         "%62 = OpLoad %float %52",
-               "OpStore %53 %62",
+               "OpBranch %57",
+         "%57 = OpLabel",
+         "%61 = OpLoad %float %52",
+               "OpStore %53 %61",
          "%31 = OpLoad %float %53",
          "%32 = OpFOrdGreaterThan %bool %31 %float_2",
                "OpSelectionMerge %33 None",
@@ -710,25 +709,25 @@
          "%35 = OpAccessChain %_ptr_Function_float %color %uint_1",
          "%36 = OpLoad %float %35",
                "OpStore %param_0 %36",
-         "%66 = OpLoad %float %param_0",
-               "OpStore %63 %66",
-         "%67 = OpLoad %float %63",
-         "%68 = OpFOrdLessThan %bool %67 %float_0",
-               "OpSelectionMerge %72 None",
-               "OpBranchConditional %68 %69 %72",
-         "%69 = OpLabel",
-         "%70 = OpLoad %float %63",
-         "%71 = OpFNegate %float %70",
+         "%64 = OpLoad %float %param_0",
+               "OpStore %62 %64",
+         "%65 = OpLoad %float %62",
+         "%66 = OpFOrdLessThan %bool %65 %float_0",
+               "OpSelectionMerge %67 None",
+               "OpBranchConditional %66 %68 %67",
+         "%68 = OpLabel",
+         "%69 = OpLoad %float %62",
+         "%70 = OpFNegate %float %69",
+               "OpStore %62 %70",
+               "OpBranch %67",
+         "%67 = OpLabel",
+         "%71 = OpLoad %float %62",
                "OpStore %63 %71",
-               "OpBranch %72",
-         "%72 = OpLabel",
-         "%73 = OpLoad %float %63",
-               "OpStore %64 %73",
-         "%37 = OpLoad %float %64",
+         "%37 = OpLoad %float %63",
          "%38 = OpFOrdGreaterThan %bool %37 %float_2",
                "OpBranch %33",
          "%33 = OpLabel",
-         "%39 = OpPhi %bool %32 %61 %38 %72",
+         "%39 = OpPhi %bool %32 %57 %38 %67",
                "OpSelectionMerge %40 None",
                "OpBranchConditional %39 %41 %40",
          "%41 = OpLabel",
@@ -903,28 +902,28 @@
                "OpStore %color1 %42",
          "%43 = OpLoad %v4float %BaseColor",
                "OpStore %param %43",
-         "%69 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%70 = OpLoad %float %69",
-               "OpStore %66 %70",
-         "%71 = OpLoad %float %66",
-         "%72 = OpFOrdLessThan %bool %71 %float_0",
-               "OpSelectionMerge %76 None",
-               "OpBranchConditional %72 %73 %76",
+         "%68 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%69 = OpLoad %float %68",
+               "OpStore %66 %69",
+         "%70 = OpLoad %float %66",
+         "%71 = OpFOrdLessThan %bool %70 %float_0",
+               "OpSelectionMerge %72 None",
+               "OpBranchConditional %71 %73 %72",
          "%73 = OpLabel",
          "%74 = OpLoad %float %66",
          "%75 = OpFNegate %float %74",
                "OpStore %66 %75",
-               "OpBranch %76",
-         "%76 = OpLabel",
-         "%77 = OpLoad %float %66",
-               "OpStore %67 %77",
+               "OpBranch %72",
+         "%72 = OpLabel",
+         "%76 = OpLoad %float %66",
+               "OpStore %67 %76",
          "%44 = OpLoad %float %67",
          "%45 = OpCompositeConstruct %v4float %44 %44 %44 %44",
                "OpStore %color2 %45",
          "%46 = OpLoad %25 %t2D",
          "%47 = OpLoad %27 %samp",
-         "%78 = OpSampledImage %29 %39 %40",
-         "%48 = OpImageSampleImplicitLod %v4float %78 %35",
+         "%77 = OpSampledImage %29 %39 %40",
+         "%48 = OpImageSampleImplicitLod %v4float %77 %35",
                "OpStore %color3 %48",
          "%49 = OpLoad %v4float %color1",
          "%50 = OpLoad %v4float %color2",
@@ -1109,27 +1108,27 @@
                "OpStore %color1 %43",
          "%46 = OpLoad %v4float %BaseColor",
                "OpStore %param %46",
-         "%71 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%72 = OpLoad %float %71",
-               "OpStore %68 %72",
-         "%73 = OpLoad %float %68",
-         "%74 = OpFOrdLessThan %bool %73 %float_0",
-               "OpSelectionMerge %78 None",
-               "OpBranchConditional %74 %75 %78",
+         "%70 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%71 = OpLoad %float %70",
+               "OpStore %68 %71",
+         "%72 = OpLoad %float %68",
+         "%73 = OpFOrdLessThan %bool %72 %float_0",
+               "OpSelectionMerge %74 None",
+               "OpBranchConditional %73 %75 %74",
          "%75 = OpLabel",
          "%76 = OpLoad %float %68",
          "%77 = OpFNegate %float %76",
                "OpStore %68 %77",
-               "OpBranch %78",
-         "%78 = OpLabel",
-         "%79 = OpLoad %float %68",
-               "OpStore %69 %79",
+               "OpBranch %74",
+         "%74 = OpLabel",
+         "%78 = OpLoad %float %68",
+               "OpStore %69 %78",
          "%47 = OpLoad %float %69",
          "%48 = OpCompositeConstruct %v4float %47 %47 %47 %47",
                "OpStore %color2 %48",
-         "%80 = OpSampledImage %30 %40 %41",
-         "%81 = OpImage %26 %80",
-         "%49 = OpSampledImage %30 %81 %45",
+         "%79 = OpSampledImage %30 %40 %41",
+         "%80 = OpImage %26 %79",
+         "%49 = OpSampledImage %30 %80 %45",
          "%50 = OpImageSampleImplicitLod %v4float %49 %36",
                "OpStore %color3 %50",
          "%51 = OpLoad %v4float %color1",
@@ -1315,28 +1314,28 @@
                "OpStore %color1 %43",
          "%47 = OpLoad %v4float %BaseColor",
                "OpStore %param %47",
-         "%71 = OpAccessChain %_ptr_Function_float %param %uint_0",
-         "%72 = OpLoad %float %71",
-               "OpStore %68 %72",
-         "%73 = OpLoad %float %68",
-         "%74 = OpFOrdLessThan %bool %73 %float_0",
-               "OpSelectionMerge %78 None",
-               "OpBranchConditional %74 %75 %78",
+         "%70 = OpAccessChain %_ptr_Function_float %param %uint_0",
+         "%71 = OpLoad %float %70",
+               "OpStore %68 %71",
+         "%72 = OpLoad %float %68",
+         "%73 = OpFOrdLessThan %bool %72 %float_0",
+               "OpSelectionMerge %74 None",
+               "OpBranchConditional %73 %75 %74",
          "%75 = OpLabel",
          "%76 = OpLoad %float %68",
          "%77 = OpFNegate %float %76",
                "OpStore %68 %77",
-               "OpBranch %78",
-         "%78 = OpLabel",
-         "%79 = OpLoad %float %68",
-               "OpStore %69 %79",
+               "OpBranch %74",
+         "%74 = OpLabel",
+         "%78 = OpLoad %float %68",
+               "OpStore %69 %78",
          "%48 = OpLoad %float %69",
          "%49 = OpCompositeConstruct %v4float %48 %48 %48 %48",
                "OpStore %color2 %49",
-         "%80 = OpSampledImage %30 %40 %41",
-         "%81 = OpImage %26 %80",
-         "%82 = OpSampledImage %30 %81 %45",
-         "%50 = OpImageSampleImplicitLod %v4float %82 %36",
+         "%79 = OpSampledImage %30 %40 %41",
+         "%80 = OpImage %26 %79",
+         "%81 = OpSampledImage %30 %80 %45",
+         "%50 = OpImageSampleImplicitLod %v4float %81 %36",
                "OpStore %color3 %50",
          "%51 = OpLoad %v4float %color1",
          "%52 = OpLoad %v4float %color2",
@@ -1356,6 +1355,292 @@
       /* skip_nop = */ false, /* do_validate = */ true);
 }
 
+TEST_F(InlineTest, EarlyReturnFunctionInlined) {
+  // #version 140
+  //
+  // in vec4 BaseColor;
+  //
+  // float foo(vec4 bar)
+  // {
+  //     if (bar.x < 0.0)
+  //         return 0.0;
+  //     return bar.x;
+  // }
+  //
+  // void main()
+  // {
+  //     vec4 color = vec4(foo(BaseColor));
+  //     gl_FragColor = color;
+  // }
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 140
+OpName %main "main"
+OpName %foo_vf4_ "foo(vf4;"
+OpName %bar "bar"
+OpName %color "color"
+OpName %BaseColor "BaseColor"
+OpName %param "param"
+OpName %gl_FragColor "gl_FragColor"
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%14 = OpTypeFunction %float %_ptr_Function_v4float
+%uint = OpTypeInt 32 0
+%uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+%float_0 = OpConstant %float 0
+%bool = OpTypeBool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%BaseColor = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%gl_FragColor = OpVariable %_ptr_Output_v4float Output
+)";
+
+  const std::string nonEntryFuncs =
+      R"(%foo_vf4_ = OpFunction %float None %14
+%bar = OpFunctionParameter %_ptr_Function_v4float
+%27 = OpLabel
+%28 = OpAccessChain %_ptr_Function_float %bar %uint_0
+%29 = OpLoad %float %28
+%30 = OpFOrdLessThan %bool %29 %float_0
+OpSelectionMerge %31 None
+OpBranchConditional %30 %32 %31
+%32 = OpLabel
+OpReturnValue %float_0
+%31 = OpLabel
+%33 = OpAccessChain %_ptr_Function_float %bar %uint_0
+%34 = OpLoad %float %33
+OpReturnValue %34
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %10
+%22 = OpLabel
+%color = OpVariable %_ptr_Function_v4float Function
+%param = OpVariable %_ptr_Function_v4float Function
+%23 = OpLoad %v4float %BaseColor
+OpStore %param %23
+%24 = OpFunctionCall %float %foo_vf4_ %param
+%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
+OpStore %color %25
+%26 = OpLoad %v4float %color
+OpStore %gl_FragColor %26
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%false = OpConstantFalse %bool
+%main = OpFunction %void None %10
+%22 = OpLabel
+%35 = OpVariable %_ptr_Function_float Function
+%color = OpVariable %_ptr_Function_v4float Function
+%param = OpVariable %_ptr_Function_v4float Function
+%23 = OpLoad %v4float %BaseColor
+OpStore %param %23
+OpBranch %36
+%36 = OpLabel
+OpLoopMerge %37 %38 None
+OpBranch %39
+%39 = OpLabel
+%40 = OpAccessChain %_ptr_Function_float %param %uint_0
+%41 = OpLoad %float %40
+%42 = OpFOrdLessThan %bool %41 %float_0
+OpSelectionMerge %43 None
+OpBranchConditional %42 %44 %43
+%44 = OpLabel
+OpStore %35 %float_0
+OpBranch %37
+%43 = OpLabel
+%45 = OpAccessChain %_ptr_Function_float %param %uint_0
+%46 = OpLoad %float %45
+OpStore %35 %46
+OpBranch %37
+%38 = OpLabel
+OpBranchConditional %false %36 %37
+%37 = OpLabel
+%24 = OpLoad %float %35
+%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
+OpStore %color %25
+%26 = OpLoad %v4float %color
+OpStore %gl_FragColor %26
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + before + nonEntryFuncs,
+                                              predefs + after + nonEntryFuncs,
+                                              false, true);
+}
+
+TEST_F(InlineTest, EarlyReturnNotAppearingLastInFunctionInlined) {
+  // Example from https://github.com/KhronosGroup/SPIRV-Tools/issues/755
+  //
+  // Original example is derived from:
+  //
+  // #version 450
+  //
+  // float foo() {
+  //     if (true) {
+  //     }
+  // }
+  //
+  // void main() { foo(); }
+  //
+  // But the order of basic blocks in foo is changed so that the return
+  // block is listed second-last.  There is only one return in the callee
+  // but it does not appear last.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint Vertex %main "main"
+OpSource GLSL 450
+OpName %main "main"
+OpName %foo_ "foo("
+%void = OpTypeVoid
+%4 = OpTypeFunction %void
+%bool = OpTypeBool
+%true = OpConstantTrue %bool
+)";
+
+  const std::string nonEntryFuncs =
+      R"(%foo_ = OpFunction %void None %4
+%7 = OpLabel
+OpSelectionMerge %8 None
+OpBranchConditional %true %9 %8
+%8 = OpLabel
+OpReturn
+%9 = OpLabel
+OpBranch %8
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %4
+%10 = OpLabel
+%11 = OpFunctionCall %void %foo_
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %4
+%10 = OpLabel
+OpSelectionMerge %12 None
+OpBranchConditional %true %13 %12
+%12 = OpLabel
+OpBranch %14
+%13 = OpLabel
+OpBranch %12
+%14 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
+                                              predefs + nonEntryFuncs + after,
+                                              false, true);
+}
+
+TEST_F(InlineTest, ForwardReferencesInPhiInlined) {
+  // The basic structure of the test case is like this:
+  //
+  // int foo() {
+  //   int result = 1;
+  //   if (true) {
+  //      result = 1;
+  //   }
+  //   return result;
+  // }
+  //
+  // void main() {
+  //  int x = foo();
+  // }
+  //
+  // but with modifications: Using Phi instead of load/store, and the
+  // return block in foo appears before the "then" block.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Vertex %main "main"
+OpSource GLSL 450
+OpName %main "main"
+OpName %foo_ "foo("
+OpName %x "x"
+%void = OpTypeVoid
+%6 = OpTypeFunction %void
+%int = OpTypeInt 32 1
+%8 = OpTypeFunction %int
+%bool = OpTypeBool
+%true = OpConstantTrue %bool
+%int_0 = OpConstant %int 0
+%_ptr_Function_int = OpTypePointer Function %int
+)";
+
+  const std::string nonEntryFuncs =
+      R"(%foo_ = OpFunction %int None %8
+%13 = OpLabel
+%14 = OpCopyObject %int %int_0
+OpSelectionMerge %15 None
+OpBranchConditional %true %16 %15
+%15 = OpLabel
+%17 = OpPhi %int %14 %13 %18 %16
+OpReturnValue %17
+%16 = OpLabel
+%18 = OpCopyObject %int %int_0
+OpBranch %15
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %6
+%19 = OpLabel
+%x = OpVariable %_ptr_Function_int Function
+%20 = OpFunctionCall %int %foo_
+OpStore %x %20
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %6
+%19 = OpLabel
+%21 = OpVariable %_ptr_Function_int Function
+%x = OpVariable %_ptr_Function_int Function
+%22 = OpCopyObject %int %int_0
+OpSelectionMerge %23 None
+OpBranchConditional %true %24 %23
+%23 = OpLabel
+%26 = OpPhi %int %22 %19 %25 %24
+OpStore %21 %26
+OpBranch %27
+%24 = OpLabel
+%25 = OpCopyObject %int %int_0
+OpBranch %23
+%27 = OpLabel
+%20 = OpLoad %int %21
+OpStore %x %20
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
+                                              predefs + nonEntryFuncs + after,
+                                              false, true);
+}
+
 TEST_F(InlineTest, EarlyReturnInLoopIsNotInlined) {
   // #version 140
   //
@@ -1535,8 +1820,8 @@
 OpBranch %10
 %10 = OpLabel
 OpLoopMerge %12 %10 None
-OpBranch %14
-%14 = OpLabel
+OpBranch %13
+%13 = OpLabel
 OpBranchConditional %true %10 %12
 %12 = OpLabel
 OpReturn
@@ -1605,11 +1890,11 @@
 OpBranch %18
 %18 = OpLabel
 %19 = OpCopyObject %int %int_3
-%26 = OpCopyObject %int %int_1
+%25 = OpCopyObject %int %int_1
 OpLoopMerge %22 %23 None
-OpBranch %27
-%27 = OpLabel
-%28 = OpCopyObject %int %int_2
+OpBranch %26
+%26 = OpLabel
+%27 = OpCopyObject %int %int_2
 %21 = OpCopyObject %int %int_4
 OpBranchConditional %true %23 %22
 %23 = OpLabel
@@ -1698,11 +1983,11 @@
 OpLoopMerge %16 %13 None
 OpBranch %17
 %17 = OpLabel
-%19 = OpCopyObject %bool %true
-OpSelectionMerge %20 None
-OpBranchConditional %true %20 %20
-%20 = OpLabel
-%21 = OpPhi %bool %19 %17
+%18 = OpCopyObject %bool %true
+OpSelectionMerge %19 None
+OpBranchConditional %true %19 %19
+%19 = OpLabel
+%20 = OpPhi %bool %18 %17
 OpBranchConditional %true %13 %16
 %16 = OpLabel
 OpReturn
@@ -1775,11 +2060,11 @@
 OpLoopMerge %22 %23 None
 OpBranch %25
 %25 = OpLabel
-%27 = OpCopyObject %int %int_1
-OpSelectionMerge %28 None
-OpBranchConditional %true %28 %28
-%28 = OpLabel
-%29 = OpCopyObject %int %int_2
+%26 = OpCopyObject %int %int_1
+OpSelectionMerge %27 None
+OpBranchConditional %true %27 %27
+%27 = OpLabel
+%28 = OpCopyObject %int %int_2
 %21 = OpCopyObject %int %int_4
 OpBranchConditional %true %23 %22
 %23 = OpLabel
@@ -1795,6 +2080,165 @@
                                               false, true);
 }
 
+TEST_F(
+    InlineTest,
+    SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMergeAndMultiReturns) {
+  // This is similar to SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMerge
+  // except that in addition to starting with a selection header, the
+  // callee also has multi returns.
+  //
+  // So now we have to accommodate:
+  // - The caller's OpLoopMerge (which must move to the first block)
+  // - The single-trip loop to wrap the multi returns, and
+  // - The callee's selection merge in its first block.
+  // Each of these must go into their own blocks.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %1 "main"
+OpSource OpenCL_C 120
+%bool = OpTypeBool
+%int = OpTypeInt 32 1
+%true = OpConstantTrue %bool
+%false = OpConstantFalse %bool
+%int_0 = OpConstant %int 0
+%int_1 = OpConstant %int 1
+%int_2 = OpConstant %int 2
+%int_3 = OpConstant %int 3
+%int_4 = OpConstant %int 4
+%void = OpTypeVoid
+%12 = OpTypeFunction %void
+)";
+
+  const std::string nonEntryFuncs =
+      R"(%13 = OpFunction %void None %12
+%14 = OpLabel
+%15 = OpCopyObject %int %int_0
+OpReturn
+%16 = OpLabel
+%17 = OpCopyObject %int %int_1
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%1 = OpFunction %void None %12
+%18 = OpLabel
+OpBranch %19
+%19 = OpLabel
+%20 = OpCopyObject %int %int_2
+%21 = OpFunctionCall %void %13
+%22 = OpCopyObject %int %int_3
+OpLoopMerge %23 %19 None
+OpBranchConditional %true %19 %23
+%23 = OpLabel
+%24 = OpCopyObject %int %int_4
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%1 = OpFunction %void None %12
+%18 = OpLabel
+OpBranch %19
+%19 = OpLabel
+%20 = OpCopyObject %int %int_2
+%25 = OpCopyObject %int %int_0
+OpLoopMerge %23 %19 None
+OpBranch %26
+%27 = OpLabel
+%28 = OpCopyObject %int %int_1
+OpBranch %26
+%26 = OpLabel
+%22 = OpCopyObject %int %int_3
+OpBranchConditional %true %19 %23
+%23 = OpLabel
+%24 = OpCopyObject %int %int_4
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
+                                              predefs + nonEntryFuncs + after,
+                                              false, true);
+}
+
+TEST_F(InlineTest, CalleeWithMultiReturnAndPhiRequiresEntryBlockRemapping) {
+  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/790
+  //
+  // The callee has multiple returns, and so must be wrapped with a single-trip
+  // loop.  That code must remap the callee entry block ID to the introduced
+  // loop body's ID.  Otherwise you can get a dominance error in a cloned OpPhi.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %1 "main"
+OpSource OpenCL_C 120
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%int_1 = OpConstant %int 1
+%int_2 = OpConstant %int 2
+%int_3 = OpConstant %int 3
+%int_4 = OpConstant %int 4
+%void = OpTypeVoid
+%9 = OpTypeFunction %void
+%bool = OpTypeBool
+%false = OpConstantFalse %bool
+)";
+
+  // This callee has multiple returns, and a Phi in the second block referencing
+  // a value generated in the entry block.
+  const std::string nonEntryFuncs =
+      R"(%12 = OpFunction %void None %9
+%13 = OpLabel
+%14 = OpCopyObject %int %int_0
+OpBranch %15
+%15 = OpLabel
+%16 = OpPhi %int %14 %13
+%17 = OpCopyObject %int %int_1
+OpReturn
+%18 = OpLabel
+%19 = OpCopyObject %int %int_2
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string before =
+      R"(%1 = OpFunction %void None %9
+%20 = OpLabel
+%21 = OpCopyObject %int %int_3
+%22 = OpFunctionCall %void %12
+%23 = OpCopyObject %int %int_4
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%1 = OpFunction %void None %9
+%20 = OpLabel
+%21 = OpCopyObject %int %int_3
+%24 = OpCopyObject %int %int_0
+OpBranch %25
+%25 = OpLabel
+%26 = OpPhi %int %24 %20
+%27 = OpCopyObject %int %int_1
+OpBranch %28
+%29 = OpLabel
+%30 = OpCopyObject %int %int_2
+OpBranch %28
+%28 = OpLabel
+%23 = OpCopyObject %int %int_4
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(predefs + nonEntryFuncs + before,
+                                              predefs + nonEntryFuncs + after,
+                                              false, true);
+}
+
 TEST_F(InlineTest, NonInlinableCalleeWithSingleReturn) {
   // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
   //
@@ -1880,6 +2324,138 @@
       predefs + caller + callee, predefs + caller + callee, false, true);
 }
 
+TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) {
+  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
+  //
+  // The callee has a single return, but needs single-trip loop wrapper
+  // to be inlined because the return is in a selection structure.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %_GLF_color
+OpExecutionMode %main OriginUpperLeft
+OpSource ESSL 310
+OpName %main "main"
+OpName %f_ "f("
+OpName %i "i"
+OpName %_GLF_color "_GLF_color"
+OpDecorate %_GLF_color Location 0
+%void = OpTypeVoid
+%7 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%9 = OpTypeFunction %float
+%float_1 = OpConstant %float 1
+%bool = OpTypeBool
+%false = OpConstantFalse %bool
+%true = OpConstantTrue %bool
+%int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%int_0 = OpConstant %int 0
+%int_1 = OpConstant %int 1
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_GLF_color = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
+)";
+
+  const std::string new_predefs =
+      R"(%_ptr_Function_float = OpTypePointer Function %float
+)";
+
+  const std::string main_before =
+      R"(%main = OpFunction %void None %7
+%23 = OpLabel
+%i = OpVariable %_ptr_Function_int Function
+OpStore %i %int_0
+OpBranch %24
+%24 = OpLabel
+OpLoopMerge %25 %26 None
+OpBranch %27
+%27 = OpLabel
+%28 = OpLoad %int %i
+%29 = OpSLessThan %bool %28 %int_1
+OpBranchConditional %29 %30 %25
+%30 = OpLabel
+OpStore %_GLF_color %21
+%31 = OpFunctionCall %float %f_
+OpBranch %26
+%26 = OpLabel
+%32 = OpLoad %int %i
+%33 = OpIAdd %int %32 %int_1
+OpStore %i %33
+OpBranch %24
+%25 = OpLabel
+OpStore %_GLF_color %22
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string main_after =
+      R"(%main = OpFunction %void None %7
+%23 = OpLabel
+%38 = OpVariable %_ptr_Function_float Function
+%i = OpVariable %_ptr_Function_int Function
+OpStore %i %int_0
+OpBranch %24
+%24 = OpLabel
+OpLoopMerge %25 %26 None
+OpBranch %27
+%27 = OpLabel
+%28 = OpLoad %int %i
+%29 = OpSLessThan %bool %28 %int_1
+OpBranchConditional %29 %30 %25
+%30 = OpLabel
+OpStore %_GLF_color %21
+OpBranch %39
+%39 = OpLabel
+OpLoopMerge %40 %41 None
+OpBranch %42
+%42 = OpLabel
+OpSelectionMerge %43 None
+OpBranchConditional %true %44 %43
+%44 = OpLabel
+OpStore %38 %float_1
+OpBranch %40
+%43 = OpLabel
+OpStore %38 %float_1
+OpBranch %40
+%41 = OpLabel
+OpBranchConditional %false %39 %40
+%40 = OpLabel
+%31 = OpLoad %float %38
+OpBranch %26
+%26 = OpLabel
+%32 = OpLoad %int %i
+%33 = OpIAdd %int %32 %int_1
+OpStore %i %33
+OpBranch %24
+%25 = OpLabel
+OpStore %_GLF_color %22
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string callee =
+      R"(%f_ = OpFunction %float None %9
+%34 = OpLabel
+OpSelectionMerge %35 None
+OpBranchConditional %true %36 %35
+%36 = OpLabel
+OpReturnValue %float_1
+%35 = OpLabel
+OpReturnValue %float_1
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(
+      predefs + main_before + callee,
+      predefs + new_predefs + main_after + callee, false, true);
+}
+
 TEST_F(InlineTest, Decorated1) {
   // Same test as Simple with the difference
   // that OpFAdd in the outlined function is
@@ -1950,7 +2526,7 @@
 )";
 
   const std::string after =
-      R"(OpDecorate %38 RelaxedPrecision
+      R"(OpDecorate %37 RelaxedPrecision
 %void = OpTypeVoid
 %11 = OpTypeFunction %void
 %float = OpTypeFloat 32
@@ -1972,12 +2548,12 @@
 %param = OpVariable %_ptr_Function_v4float Function
 %23 = OpLoad %v4float %BaseColor
 OpStore %param %23
-%34 = OpAccessChain %_ptr_Function_float %param %uint_0
-%35 = OpLoad %float %34
-%36 = OpAccessChain %_ptr_Function_float %param %uint_1
-%37 = OpLoad %float %36
-%38 = OpFAdd %float %35 %37
-OpStore %32 %38
+%33 = OpAccessChain %_ptr_Function_float %param %uint_0
+%34 = OpLoad %float %33
+%35 = OpAccessChain %_ptr_Function_float %param %uint_1
+%36 = OpLoad %float %35
+%37 = OpFAdd %float %34 %36
+OpStore %32 %37
 %24 = OpLoad %float %32
 %25 = OpCompositeConstruct %v4float %24 %24 %24 %24
 OpStore %color %25
@@ -2096,12 +2672,12 @@
 %param = OpVariable %_ptr_Function_v4float Function
 %22 = OpLoad %v4float %BaseColor
 OpStore %param %22
-%34 = OpAccessChain %_ptr_Function_float %param %uint_0
-%35 = OpLoad %float %34
-%36 = OpAccessChain %_ptr_Function_float %param %uint_1
-%37 = OpLoad %float %36
-%38 = OpFAdd %float %35 %37
-OpStore %32 %38
+%33 = OpAccessChain %_ptr_Function_float %param %uint_0
+%34 = OpLoad %float %33
+%35 = OpAccessChain %_ptr_Function_float %param %uint_1
+%36 = OpLoad %float %35
+%37 = OpFAdd %float %34 %36
+OpStore %32 %37
 %23 = OpLoad %float %32
 %24 = OpCompositeConstruct %v4float %23 %23 %23 %23
 OpStore %color %24
@@ -2441,7 +3017,7 @@
 %main = OpFunction %void None %3
 %5 = OpLabel
 OpKill
-%18 = OpLabel
+%17 = OpLabel
 OpReturn
 OpFunctionEnd
 %kill_ = OpFunction %void None %3
@@ -2454,560 +3030,6 @@
   SinglePassRunAndCheck<InlineExhaustivePass>(before, after, false, true);
 }
 
-TEST_F(InlineTest, EarlyReturnFunctionInlined) {
-  // #version 140
-  //
-  // in vec4 BaseColor;
-  //
-  // float foo(vec4 bar)
-  // {
-  //     if (bar.x < 0.0)
-  //         return 0.0;
-  //     return bar.x;
-  // }
-  //
-  // void main()
-  // {
-  //     vec4 color = vec4(foo(BaseColor));
-  //     gl_FragColor = color;
-  // }
-
-  const std::string predefs =
-      R"(OpCapability Shader
-%1 = OpExtInstImport "GLSL.std.450"
-OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor
-OpExecutionMode %main OriginUpperLeft
-OpSource GLSL 140
-OpName %main "main"
-OpName %foo_vf4_ "foo(vf4;"
-OpName %bar "bar"
-OpName %color "color"
-OpName %BaseColor "BaseColor"
-OpName %param "param"
-OpName %gl_FragColor "gl_FragColor"
-%void = OpTypeVoid
-%10 = OpTypeFunction %void
-%float = OpTypeFloat 32
-%v4float = OpTypeVector %float 4
-%_ptr_Function_v4float = OpTypePointer Function %v4float
-%14 = OpTypeFunction %float %_ptr_Function_v4float
-%uint = OpTypeInt 32 0
-%uint_0 = OpConstant %uint 0
-%_ptr_Function_float = OpTypePointer Function %float
-%float_0 = OpConstant %float 0
-%bool = OpTypeBool
-%_ptr_Input_v4float = OpTypePointer Input %v4float
-%BaseColor = OpVariable %_ptr_Input_v4float Input
-%_ptr_Output_v4float = OpTypePointer Output %v4float
-%gl_FragColor = OpVariable %_ptr_Output_v4float Output
-)";
-
-  const std::string foo =
-      R"(%foo_vf4_ = OpFunction %float None %14
-%bar = OpFunctionParameter %_ptr_Function_v4float
-%27 = OpLabel
-%28 = OpAccessChain %_ptr_Function_float %bar %uint_0
-%29 = OpLoad %float %28
-%30 = OpFOrdLessThan %bool %29 %float_0
-OpSelectionMerge %31 None
-OpBranchConditional %30 %32 %31
-%32 = OpLabel
-OpReturnValue %float_0
-%31 = OpLabel
-%33 = OpAccessChain %_ptr_Function_float %bar %uint_0
-%34 = OpLoad %float %33
-OpReturnValue %34
-OpFunctionEnd
-)";
-
-  const std::string fooMergeReturn =
-      R"(%foo_vf4_ = OpFunction %float None %14
-%bar = OpFunctionParameter %_ptr_Function_v4float
-%27 = OpLabel
-%41 = OpVariable %_ptr_Function_bool Function %false
-%36 = OpVariable %_ptr_Function_float Function
-OpSelectionMerge %35 None
-OpSwitch %uint_0 %38
-%38 = OpLabel
-%28 = OpAccessChain %_ptr_Function_float %bar %uint_0
-%29 = OpLoad %float %28
-%30 = OpFOrdLessThan %bool %29 %float_0
-OpSelectionMerge %31 None
-OpBranchConditional %30 %32 %31
-%32 = OpLabel
-OpStore %41 %true
-OpStore %36 %float_0
-OpBranch %35
-%31 = OpLabel
-%33 = OpAccessChain %_ptr_Function_float %bar %uint_0
-%34 = OpLoad %float %33
-OpStore %41 %true
-OpStore %36 %34
-OpBranch %35
-%35 = OpLabel
-%37 = OpLoad %float %36
-OpReturnValue %37
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%main = OpFunction %void None %10
-%22 = OpLabel
-%color = OpVariable %_ptr_Function_v4float Function
-%param = OpVariable %_ptr_Function_v4float Function
-%23 = OpLoad %v4float %BaseColor
-OpStore %param %23
-%24 = OpFunctionCall %float %foo_vf4_ %param
-%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
-OpStore %color %25
-%26 = OpLoad %v4float %color
-OpStore %gl_FragColor %26
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%false = OpConstantFalse %bool
-%_ptr_Function_bool = OpTypePointer Function %bool
-%true = OpConstantTrue %bool
-%main = OpFunction %void None %10
-%22 = OpLabel
-%43 = OpVariable %_ptr_Function_bool Function %false
-%44 = OpVariable %_ptr_Function_float Function
-%45 = OpVariable %_ptr_Function_float Function
-%color = OpVariable %_ptr_Function_v4float Function
-%param = OpVariable %_ptr_Function_v4float Function
-%23 = OpLoad %v4float %BaseColor
-OpStore %param %23
-OpStore %43 %false
-OpSelectionMerge %55 None
-OpSwitch %uint_0 %47
-%47 = OpLabel
-%48 = OpAccessChain %_ptr_Function_float %param %uint_0
-%49 = OpLoad %float %48
-%50 = OpFOrdLessThan %bool %49 %float_0
-OpSelectionMerge %52 None
-OpBranchConditional %50 %51 %52
-%51 = OpLabel
-OpStore %43 %true
-OpStore %44 %float_0
-OpBranch %55
-%52 = OpLabel
-%53 = OpAccessChain %_ptr_Function_float %param %uint_0
-%54 = OpLoad %float %53
-OpStore %43 %true
-OpStore %44 %54
-OpBranch %55
-%55 = OpLabel
-%56 = OpLoad %float %44
-OpStore %45 %56
-%24 = OpLoad %float %45
-%25 = OpCompositeConstruct %v4float %24 %24 %24 %24
-OpStore %color %25
-%26 = OpLoad %v4float %color
-OpStore %gl_FragColor %26
-OpReturn
-OpFunctionEnd
-)";
-
-  // The early return case must be handled by merge-return first.
-  AddPass<MergeReturnPass>();
-  AddPass<InlineExhaustivePass>();
-  RunAndCheck(predefs + before + foo, predefs + after + fooMergeReturn);
-}
-
-TEST_F(InlineTest, EarlyReturnNotAppearingLastInFunctionInlined) {
-  // Example from https://github.com/KhronosGroup/SPIRV-Tools/issues/755
-  //
-  // Original example is derived from:
-  //
-  // #version 450
-  //
-  // float foo() {
-  //     if (true) {
-  //     }
-  // }
-  //
-  // void main() { foo(); }
-  //
-  // But the order of basic blocks in foo is changed so that the return
-  // block is listed second-last.  There is only one return in the callee
-  // but it does not appear last.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-OpMemoryModel Logical GLSL450
-OpEntryPoint Vertex %main "main"
-OpSource GLSL 450
-OpName %main "main"
-OpName %foo_ "foo("
-%void = OpTypeVoid
-%4 = OpTypeFunction %void
-%bool = OpTypeBool
-%true = OpConstantTrue %bool
-)";
-
-  const std::string foo =
-      R"(%foo_ = OpFunction %void None %4
-%7 = OpLabel
-OpSelectionMerge %8 None
-OpBranchConditional %true %9 %8
-%8 = OpLabel
-OpReturn
-%9 = OpLabel
-OpBranch %8
-OpFunctionEnd
-)";
-
-  const std::string fooMergeReturn =
-      R"(%uint = OpTypeInt 32 0
-%uint_0 = OpConstant %uint 0
-%false = OpConstantFalse %bool
-%_ptr_Function_bool = OpTypePointer Function %bool
-%foo_ = OpFunction %void None %4
-%7 = OpLabel
-%18 = OpVariable %_ptr_Function_bool Function %false
-OpSelectionMerge %12 None
-OpSwitch %uint_0 %13
-%13 = OpLabel
-OpSelectionMerge %8 None
-OpBranchConditional %true %9 %8
-%8 = OpLabel
-OpStore %18 %true
-OpBranch %12
-%9 = OpLabel
-OpBranch %8
-%12 = OpLabel
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%main = OpFunction %void None %4
-%10 = OpLabel
-%11 = OpFunctionCall %void %foo_
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%main = OpFunction %void None %4
-%10 = OpLabel
-%19 = OpVariable %_ptr_Function_bool Function %false
-OpStore %19 %false
-OpSelectionMerge %24 None
-OpSwitch %uint_0 %21
-%21 = OpLabel
-OpSelectionMerge %22 None
-OpBranchConditional %true %23 %22
-%22 = OpLabel
-OpStore %19 %true
-OpBranch %24
-%23 = OpLabel
-OpBranch %22
-%24 = OpLabel
-OpReturn
-OpFunctionEnd
-)";
-
-  // The early return case must be handled by merge-return first.
-  AddPass<MergeReturnPass>();
-  AddPass<InlineExhaustivePass>();
-  RunAndCheck(predefs + foo + before, predefs + fooMergeReturn + after);
-}
-
-TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) {
-  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
-  //
-  // The callee has a single return, but needs single-trip loop wrapper
-  // to be inlined because the return is in a selection structure.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-%1 = OpExtInstImport "GLSL.std.450"
-OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %main "main" %_GLF_color
-OpExecutionMode %main OriginUpperLeft
-OpSource ESSL 310
-OpName %main "main"
-OpName %f_ "f("
-OpName %i "i"
-OpName %_GLF_color "_GLF_color"
-OpDecorate %_GLF_color Location 0
-%void = OpTypeVoid
-%7 = OpTypeFunction %void
-%float = OpTypeFloat 32
-%9 = OpTypeFunction %float
-%float_1 = OpConstant %float 1
-%bool = OpTypeBool
-%false = OpConstantFalse %bool
-%true = OpConstantTrue %bool
-%int = OpTypeInt 32 1
-%_ptr_Function_int = OpTypePointer Function %int
-%int_0 = OpConstant %int 0
-%int_1 = OpConstant %int 1
-%v4float = OpTypeVector %float 4
-%_ptr_Output_v4float = OpTypePointer Output %v4float
-%_GLF_color = OpVariable %_ptr_Output_v4float Output
-%float_0 = OpConstant %float 0
-%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
-%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
-)";
-
-  const std::string new_predefs =
-      R"(%_ptr_Function_float = OpTypePointer Function %float
-%uint = OpTypeInt 32 0
-%uint_0 = OpConstant %uint 0
-%_ptr_Function_bool = OpTypePointer Function %bool
-)";
-
-  const std::string main_before =
-      R"(%main = OpFunction %void None %7
-%23 = OpLabel
-%i = OpVariable %_ptr_Function_int Function
-OpStore %i %int_0
-OpBranch %24
-%24 = OpLabel
-OpLoopMerge %25 %26 None
-OpBranch %27
-%27 = OpLabel
-%28 = OpLoad %int %i
-%29 = OpSLessThan %bool %28 %int_1
-OpBranchConditional %29 %30 %25
-%30 = OpLabel
-OpStore %_GLF_color %21
-%31 = OpFunctionCall %float %f_
-OpBranch %26
-%26 = OpLabel
-%32 = OpLoad %int %i
-%33 = OpIAdd %int %32 %int_1
-OpStore %i %33
-OpBranch %24
-%25 = OpLabel
-OpStore %_GLF_color %22
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string main_after =
-      R"(%main = OpFunction %void None %7
-%23 = OpLabel
-%46 = OpVariable %_ptr_Function_bool Function %false
-%47 = OpVariable %_ptr_Function_float Function
-%48 = OpVariable %_ptr_Function_float Function
-%i = OpVariable %_ptr_Function_int Function
-OpStore %i %int_0
-OpBranch %24
-%24 = OpLabel
-OpLoopMerge %25 %26 None
-OpBranch %27
-%27 = OpLabel
-%28 = OpLoad %int %i
-%29 = OpSLessThan %bool %28 %int_1
-OpBranchConditional %29 %30 %25
-%30 = OpLabel
-OpStore %_GLF_color %21
-OpStore %46 %false
-OpSelectionMerge %53 None
-OpSwitch %uint_0 %50
-%50 = OpLabel
-OpSelectionMerge %52 None
-OpBranchConditional %true %51 %52
-%51 = OpLabel
-OpStore %46 %true
-OpStore %47 %float_1
-OpBranch %53
-%52 = OpLabel
-OpStore %46 %true
-OpStore %47 %float_1
-OpBranch %53
-%53 = OpLabel
-%54 = OpLoad %float %47
-OpStore %48 %54
-%31 = OpLoad %float %48
-OpBranch %26
-%26 = OpLabel
-%32 = OpLoad %int %i
-%33 = OpIAdd %int %32 %int_1
-OpStore %i %33
-OpBranch %24
-%25 = OpLabel
-OpStore %_GLF_color %22
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string callee =
-      R"(%f_ = OpFunction %float None %9
-%34 = OpLabel
-OpSelectionMerge %35 None
-OpBranchConditional %true %36 %35
-%36 = OpLabel
-OpReturnValue %float_1
-%35 = OpLabel
-OpReturnValue %float_1
-OpFunctionEnd
-)";
-
-  const std::string calleeMergeReturn =
-      R"(%f_ = OpFunction %float None %9
-%34 = OpLabel
-%45 = OpVariable %_ptr_Function_bool Function %false
-%39 = OpVariable %_ptr_Function_float Function
-OpSelectionMerge %37 None
-OpSwitch %uint_0 %41
-%41 = OpLabel
-OpSelectionMerge %35 None
-OpBranchConditional %true %36 %35
-%36 = OpLabel
-OpStore %45 %true
-OpStore %39 %float_1
-OpBranch %37
-%35 = OpLabel
-OpStore %45 %true
-OpStore %39 %float_1
-OpBranch %37
-%37 = OpLabel
-%40 = OpLoad %float %39
-OpReturnValue %40
-OpFunctionEnd
-)";
-
-  // The early return case must be handled by merge-return first.
-  AddPass<MergeReturnPass>();
-  AddPass<InlineExhaustivePass>();
-  RunAndCheck(predefs + main_before + callee,
-              predefs + new_predefs + main_after + calleeMergeReturn);
-}
-
-TEST_F(InlineTest, ForwardReferencesInPhiInlined) {
-  // The basic structure of the test case is like this:
-  //
-  // int foo() {
-  //   int result = 1;
-  //   if (true) {
-  //      result = 1;
-  //   }
-  //   return result;
-  // }
-  //
-  // void main() {
-  //  int x = foo();
-  // }
-  //
-  // but with modifications: Using Phi instead of load/store, and the
-  // return block in foo appears before the "then" block.
-
-  const std::string predefs =
-      R"(OpCapability Shader
-%1 = OpExtInstImport "GLSL.std.450"
-OpMemoryModel Logical GLSL450
-OpEntryPoint Vertex %main "main"
-OpSource GLSL 450
-OpName %main "main"
-OpName %foo_ "foo("
-OpName %x "x"
-%void = OpTypeVoid
-%6 = OpTypeFunction %void
-%int = OpTypeInt 32 1
-%8 = OpTypeFunction %int
-%bool = OpTypeBool
-%true = OpConstantTrue %bool
-%int_0 = OpConstant %int 0
-%_ptr_Function_int = OpTypePointer Function %int
-)";
-
-  const std::string callee =
-      R"(%foo_ = OpFunction %int None %8
-%13 = OpLabel
-%14 = OpCopyObject %int %int_0
-OpSelectionMerge %15 None
-OpBranchConditional %true %16 %15
-%15 = OpLabel
-%17 = OpPhi %int %14 %13 %18 %16
-OpReturnValue %17
-%16 = OpLabel
-%18 = OpCopyObject %int %int_0
-OpBranch %15
-OpFunctionEnd
-)";
-
-  const std::string calleeMergeReturn =
-      R"(%uint = OpTypeInt 32 0
-%uint_0 = OpConstant %uint 0
-%false = OpConstantFalse %bool
-%_ptr_Function_bool = OpTypePointer Function %bool
-%foo_ = OpFunction %int None %8
-%13 = OpLabel
-%29 = OpVariable %_ptr_Function_bool Function %false
-%22 = OpVariable %_ptr_Function_int Function
-OpSelectionMerge %21 None
-OpSwitch %uint_0 %24
-%24 = OpLabel
-%14 = OpCopyObject %int %int_0
-OpSelectionMerge %15 None
-OpBranchConditional %true %16 %15
-%15 = OpLabel
-%17 = OpPhi %int %14 %24 %18 %16
-OpStore %29 %true
-OpStore %22 %17
-OpBranch %21
-%16 = OpLabel
-%18 = OpCopyObject %int %int_0
-OpBranch %15
-%21 = OpLabel
-%23 = OpLoad %int %22
-OpReturnValue %23
-OpFunctionEnd
-)";
-
-  const std::string before =
-      R"(%main = OpFunction %void None %6
-%19 = OpLabel
-%x = OpVariable %_ptr_Function_int Function
-%20 = OpFunctionCall %int %foo_
-OpStore %x %20
-OpReturn
-OpFunctionEnd
-)";
-
-  const std::string after =
-      R"(%main = OpFunction %void None %6
-%19 = OpLabel
-%30 = OpVariable %_ptr_Function_bool Function %false
-%31 = OpVariable %_ptr_Function_int Function
-%32 = OpVariable %_ptr_Function_int Function
-%x = OpVariable %_ptr_Function_int Function
-OpStore %30 %false
-OpSelectionMerge %40 None
-OpSwitch %uint_0 %34
-%34 = OpLabel
-%35 = OpCopyObject %int %int_0
-OpSelectionMerge %36 None
-OpBranchConditional %true %38 %36
-%36 = OpLabel
-%37 = OpPhi %int %35 %34 %39 %38
-OpStore %30 %true
-OpStore %31 %37
-OpBranch %40
-%38 = OpLabel
-%39 = OpCopyObject %int %int_0
-OpBranch %36
-%40 = OpLabel
-%41 = OpLoad %int %31
-OpStore %32 %41
-%20 = OpLoad %int %32
-OpStore %x %20
-OpReturn
-OpFunctionEnd
-)";
-
-  AddPass<MergeReturnPass>();
-  AddPass<InlineExhaustivePass>();
-  RunAndCheck(predefs + callee + before, predefs + calleeMergeReturn + after);
-}
-
 // TODO(greg-lunarg): Add tests to verify handling of these cases:
 //
 //    Empty modules
