Don't assume one return means function can be inlined. (#2018) (#2025)

If there is only 1 return and it is in a loop, then the function cannot be inlined.

Fix condition when inlined code needs one-trip loop wrapper.  The dummy loop is needed when there is a return inside a selection construct.  Even if there is only 1 return.
diff --git a/source/opt/dead_branch_elim_pass.cpp b/source/opt/dead_branch_elim_pass.cpp
index 7f3e737..c65e551 100644
--- a/source/opt/dead_branch_elim_pass.cpp
+++ b/source/opt/dead_branch_elim_pass.cpp
@@ -93,7 +93,7 @@
 
 bool DeadBranchElimPass::MarkLiveBlocks(
     Function* func, std::unordered_set<BasicBlock*>* live_blocks) {
-  StructuredCFGAnalysis* cfgAnalysis = context()->GetStructuedCFGAnalaysis();
+  StructuredCFGAnalysis* cfgAnalysis = context()->GetStructuredCFGAnalysis();
 
   std::unordered_set<BasicBlock*> continues;
   std::vector<BasicBlock*> stack;
diff --git a/source/opt/inline_pass.cpp b/source/opt/inline_pass.cpp
index 5a88ef5..0543f44 100644
--- a/source/opt/inline_pass.cpp
+++ b/source/opt/inline_pass.cpp
@@ -27,7 +27,6 @@
 static const int kSpvFunctionCallFunctionId = 2;
 static const int kSpvFunctionCallArgumentId = 3;
 static const int kSpvReturnValueId = 0;
-static const int kSpvLoopMergeMergeBlockId = 0;
 static const int kSpvLoopMergeContinueTargetIdInIdx = 1;
 
 namespace spvtools {
@@ -225,8 +224,8 @@
       kSpvFunctionCallFunctionId)];
 
   // Check for multiple returns in the callee.
-  auto fi = multi_return_funcs_.find(calleeFn->result_id());
-  const bool multiReturn = fi != multi_return_funcs_.end();
+  auto fi = early_return_funcs_.find(calleeFn->result_id());
+  const bool earlyReturn = fi != early_return_funcs_.end();
 
   // Map parameters to actual arguments.
   MapParams(calleeFn, call_inst_itr, &callee2caller);
@@ -280,7 +279,7 @@
                          &call_inst_itr, &new_blk_ptr, &prevInstWasReturn,
                          &returnLabelId, &returnVarId, caller_is_loop_header,
                          callee_begins_with_structured_header, &calleeTypeId,
-                         &multiBlocks, &postCallSB, &preCallSB, multiReturn,
+                         &multiBlocks, &postCallSB, &preCallSB, earlyReturn,
                          &singleTripLoopHeaderId, &singleTripLoopContinueId,
                          &callee_result_ids, this](const Instruction* cpi) {
     switch (cpi->opcode()) {
@@ -369,7 +368,7 @@
             // satisfy dominance.
             callee2caller[cpi->result_id()] = guard_block_id;
           }
-          // If callee has multiple returns, insert a header block for
+          // If callee has early return, insert a header block for
           // single-trip loop that will encompass callee code.  Start postheader
           // block.
           //
@@ -380,7 +379,7 @@
           // We still need to split the caller block and insert a guard block.
           // But we only need to do it once. We haven't done it yet, but the
           // single-trip loop header will serve the same purpose.
-          if (multiReturn) {
+          if (earlyReturn) {
             singleTripLoopHeaderId = this->TakeNextId();
             AddBranch(singleTripLoopHeaderId, &new_blk_ptr);
             new_blocks->push_back(std::move(new_blk_ptr));
@@ -429,9 +428,9 @@
           // If previous instruction was return, insert branch instruction
           // to return block.
           if (prevInstWasReturn) AddBranch(returnLabelId, &new_blk_ptr);
-          if (multiReturn) {
-            // If we generated a loop header to for the single-trip loop
-            // to accommodate multiple returns, insert the continue
+          if (earlyReturn) {
+            // If we generated a loop header for the single-trip loop
+            // to accommodate early returns, insert the continue
             // target block now, with a false branch back to the loop header.
             new_blocks->push_back(std::move(new_blk_ptr));
             new_blk_ptr =
@@ -561,44 +560,24 @@
       });
 }
 
-bool InlinePass::HasMultipleReturns(Function* func) {
-  bool seenReturn = false;
-  bool multipleReturns = false;
+bool InlinePass::HasNoReturnInStructuredConstruct(Function* func) {
+  // If control not structured, do not do loop/return analysis
+  // TODO: Analyze returns in non-structured control flow
+  if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
+    return false;
+  const auto structured_analysis = context()->GetStructuredCFGAnalysis();
+  // Search for returns in structured construct.
+  bool return_in_construct = false;
   for (auto& blk : *func) {
     auto terminal_ii = blk.cend();
     --terminal_ii;
-    if (terminal_ii->opcode() == SpvOpReturn ||
-        terminal_ii->opcode() == SpvOpReturnValue) {
-      if (seenReturn) {
-        multipleReturns = true;
-        break;
-      }
-      seenReturn = true;
+    if (spvOpcodeIsReturn(terminal_ii->opcode()) &&
+        structured_analysis->ContainingConstruct(blk.id()) != 0) {
+      return_in_construct = true;
+      break;
     }
   }
-  return multipleReturns;
-}
-
-void InlinePass::ComputeStructuredSuccessors(Function* func) {
-  // If header, make merge block first successor.
-  for (auto& blk : *func) {
-    uint32_t mbid = blk.MergeBlockIdIfAny();
-    if (mbid != 0) {
-      block2structured_succs_[&blk].push_back(id2block_[mbid]);
-    }
-
-    // Add true successors.
-    const auto& const_blk = blk;
-    const_blk.ForEachSuccessorLabel([&blk, this](const uint32_t sbid) {
-      block2structured_succs_[&blk].push_back(id2block_[sbid]);
-    });
-  }
-}
-
-InlinePass::GetBlocksFunction InlinePass::StructuredSuccessorsFunction() {
-  return [this](const BasicBlock* block) {
-    return &(block2structured_succs_[block]);
-  };
+  return !return_in_construct;
 }
 
 bool InlinePass::HasNoReturnInLoop(Function* func) {
@@ -606,52 +585,27 @@
   // TODO: Analyze returns in non-structured control flow
   if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
     return false;
-  // Compute structured block order. This order has the property
-  // that dominators are before all blocks they dominate and merge blocks
-  // are after all blocks that are in the control constructs of their header.
-  ComputeStructuredSuccessors(func);
-  auto ignore_block = [](cbb_ptr) {};
-  auto ignore_edge = [](cbb_ptr, cbb_ptr) {};
-  std::list<const BasicBlock*> structuredOrder;
-  CFA<BasicBlock>::DepthFirstTraversal(
-      &*func->begin(), StructuredSuccessorsFunction(), ignore_block,
-      [&](cbb_ptr b) { structuredOrder.push_front(b); }, ignore_edge);
-  // Search for returns in loops. Only need to track outermost loop
+  const auto structured_analysis = context()->GetStructuredCFGAnalysis();
+  // Search for returns in structured construct.
   bool return_in_loop = false;
-  uint32_t outerLoopMergeId = 0;
-  for (auto& blk : structuredOrder) {
-    // Exiting current outer loop
-    if (blk->id() == outerLoopMergeId) outerLoopMergeId = 0;
-    // Return block
-    auto terminal_ii = blk->cend();
+  for (auto& blk : *func) {
+    auto terminal_ii = blk.cend();
     --terminal_ii;
-    if (terminal_ii->opcode() == SpvOpReturn ||
-        terminal_ii->opcode() == SpvOpReturnValue) {
-      if (outerLoopMergeId != 0) {
-        return_in_loop = true;
-        break;
-      }
-    } else if (terminal_ii != blk->cbegin()) {
-      auto merge_ii = terminal_ii;
-      --merge_ii;
-      // Entering outermost loop
-      if (merge_ii->opcode() == SpvOpLoopMerge && outerLoopMergeId == 0)
-        outerLoopMergeId =
-            merge_ii->GetSingleWordOperand(kSpvLoopMergeMergeBlockId);
+    if (spvOpcodeIsReturn(terminal_ii->opcode()) &&
+        structured_analysis->ContainingLoop(blk.id()) != 0) {
+      return_in_loop = true;
+      break;
     }
   }
   return !return_in_loop;
 }
 
 void InlinePass::AnalyzeReturns(Function* func) {
-  // Look for multiple returns
-  if (!HasMultipleReturns(func)) {
+  if (HasNoReturnInLoop(func)) {
     no_return_in_loop_.insert(func->result_id());
-    return;
+    if (!HasNoReturnInStructuredConstruct(func))
+      early_return_funcs_.insert(func->result_id());
   }
-  multi_return_funcs_.insert(func->result_id());
-  // If multiple returns, see if any are in a loop
-  if (HasNoReturnInLoop(func)) no_return_in_loop_.insert(func->result_id());
 }
 
 bool InlinePass::IsInlinableFunction(Function* func) {
@@ -673,10 +627,9 @@
   // clear collections
   id2function_.clear();
   id2block_.clear();
-  block2structured_succs_.clear();
   inlinable_.clear();
   no_return_in_loop_.clear();
-  multi_return_funcs_.clear();
+  early_return_funcs_.clear();
 
   for (auto& fn : *get_module()) {
     // Initialize function and block maps.
diff --git a/source/opt/inline_pass.h b/source/opt/inline_pass.h
index 55369c9..e23e7f0 100644
--- a/source/opt/inline_pass.h
+++ b/source/opt/inline_pass.h
@@ -36,9 +36,6 @@
   using cbb_ptr = const BasicBlock*;
 
  public:
-  using GetBlocksFunction =
-      std::function<std::vector<BasicBlock*>*(const BasicBlock*)>;
-
   virtual ~InlinePass() = default;
 
  protected:
@@ -122,21 +119,9 @@
   // Return true if |inst| is a function call that can be inlined.
   bool IsInlinableFunctionCall(const Instruction* inst);
 
-  // Compute structured successors for function |func|.
-  // A block's structured successors are the blocks it branches to
-  // together with its declared merge block if it has one.
-  // When order matters, the merge block always appears first.
-  // This assures correct depth first search in the presence of early
-  // returns and kills. If the successor vector contain duplicates
-  // if the merge block, they are safely ignored by DFS.
-  void ComputeStructuredSuccessors(Function* func);
-
-  // Return function to return ordered structure successors for a given block
-  // Assumes ComputeStructuredSuccessors() has been called.
-  GetBlocksFunction StructuredSuccessorsFunction();
-
-  // Return true if |func| has multiple returns
-  bool HasMultipleReturns(Function* func);
+  // Return true if |func| does not have a return that is
+  // nested in a structured if, switch or loop.
+  bool HasNoReturnInStructuredConstruct(Function* func);
 
   // Return true if |func| has no return in a loop. The current analysis
   // requires structured control flow, so return false if control flow not
@@ -163,8 +148,8 @@
   // CFG. It has functionality not present in CFG. Consolidate.
   std::unordered_map<uint32_t, BasicBlock*> id2block_;
 
-  // Set of ids of functions with multiple returns.
-  std::set<uint32_t> multi_return_funcs_;
+  // Set of ids of functions with early return.
+  std::set<uint32_t> early_return_funcs_;
 
   // Set of ids of functions with no returns in loop
   std::set<uint32_t> no_return_in_loop_;
@@ -174,13 +159,6 @@
 
   // result id for OpConstantFalse
   uint32_t false_id_;
-
-  // Map from block to its structured successor blocks. See
-  // ComputeStructuredSuccessors() for definition. TODO(dnovillo): This is
-  // superfluous wrt CFG, but it seems to be computed in a slightly
-  // different way in the inliner. Can these be consolidated?
-  std::unordered_map<const BasicBlock*, std::vector<BasicBlock*>>
-      block2structured_succs_;
 };
 
 }  // namespace opt
diff --git a/source/opt/ir_context.h b/source/opt/ir_context.h
index 3d323e6..742c868 100644
--- a/source/opt/ir_context.h
+++ b/source/opt/ir_context.h
@@ -231,7 +231,7 @@
 
   // Returns a pointer to a StructuredCFGAnalysis.  If the analysis is invalid,
   // it is rebuilt first.
-  StructuredCFGAnalysis* GetStructuedCFGAnalaysis() {
+  StructuredCFGAnalysis* GetStructuredCFGAnalysis() {
     if (!AreAnalysesValid(kAnalysisStructuredCFG)) {
       BuildStructuredCFGAnalysis();
     }
diff --git a/source/opt/merge_return_pass.cpp b/source/opt/merge_return_pass.cpp
index b5df490..10c7d2f 100644
--- a/source/opt/merge_return_pass.cpp
+++ b/source/opt/merge_return_pass.cpp
@@ -743,7 +743,7 @@
     }
 
     StructuredCFGAnalysis* struct_cfg_analysis =
-        context()->GetStructuedCFGAnalaysis();
+        context()->GetStructuredCFGAnalysis();
     if (struct_cfg_analysis->IsMergeBlock(bb.id())) {
       // |bb| must be an empty block ending with OpUnreachable.
       if (bb.begin()->opcode() != SpvOpUnreachable) {
diff --git a/test/opt/inline_test.cpp b/test/opt/inline_test.cpp
index f0a376e..2ece9f4 100644
--- a/test/opt/inline_test.cpp
+++ b/test/opt/inline_test.cpp
@@ -2144,19 +2144,12 @@
 OpBranch %19
 %19 = OpLabel
 %20 = OpCopyObject %int %int_2
+%25 = OpCopyObject %int %int_0
 OpLoopMerge %23 %26 None
-OpBranch %25
-%25 = OpLabel
-OpLoopMerge %26 %27 None
-OpBranch %28
-%28 = OpLabel
-%29 = OpCopyObject %int %int_0
-OpBranch %26
-%30 = OpLabel
-%31 = OpCopyObject %int %int_1
 OpBranch %26
 %27 = OpLabel
-OpBranchConditional %false %25 %26
+%28 = OpCopyObject %int %int_1
+OpBranch %26
 %26 = OpLabel
 %22 = OpCopyObject %int %int_3
 OpBranchConditional %true %19 %23
@@ -2226,23 +2219,16 @@
       R"(%1 = OpFunction %void None %9
 %20 = OpLabel
 %21 = OpCopyObject %int %int_3
-OpBranch %24
-%24 = OpLabel
-OpLoopMerge %25 %26 None
-OpBranch %27
-%27 = OpLabel
-%28 = OpCopyObject %int %int_0
-OpBranch %29
-%29 = OpLabel
-%30 = OpPhi %int %28 %27
-%31 = OpCopyObject %int %int_1
+%24 = OpCopyObject %int %int_0
 OpBranch %25
-%32 = OpLabel
-%33 = OpCopyObject %int %int_2
-OpBranch %25
-%26 = OpLabel
-OpBranchConditional %false %24 %25
 %25 = OpLabel
+%26 = OpPhi %int %24 %20
+%27 = OpCopyObject %int %int_1
+OpBranch %28
+%29 = OpLabel
+%30 = OpCopyObject %int %int_2
+OpBranch %28
+%28 = OpLabel
 %23 = OpCopyObject %int %int_4
 OpReturn
 OpFunctionEnd
@@ -2253,6 +2239,222 @@
                                               false, true);
 }
 
+TEST_F(InlineTest, NonInlinableCalleeWithSingleReturn) {
+  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
+  //
+  // The callee has a single return, but cannot be inlined because the
+  // return is inside a loop.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %_GLF_color
+OpExecutionMode %main OriginUpperLeft
+OpSource ESSL 310
+OpName %main "main"
+OpName %f_ "f("
+OpName %i "i"
+OpName %_GLF_color "_GLF_color"
+OpDecorate %_GLF_color Location 0
+%void = OpTypeVoid
+%7 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%9 = OpTypeFunction %float
+%float_1 = OpConstant %float 1
+%bool = OpTypeBool
+%false = OpConstantFalse %bool
+%int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%int_0 = OpConstant %int 0
+%int_1 = OpConstant %int 1
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_GLF_color = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+%20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+%21 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
+)";
+
+  const std::string caller =
+      R"(%main = OpFunction %void None %7
+%22 = OpLabel
+%i = OpVariable %_ptr_Function_int Function
+OpStore %i %int_0
+OpBranch %23
+%23 = OpLabel
+OpLoopMerge %24 %25 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpLoad %int %i
+%28 = OpSLessThan %bool %27 %int_1
+OpBranchConditional %28 %29 %24
+%29 = OpLabel
+OpStore %_GLF_color %20
+%30 = OpFunctionCall %float %f_
+OpBranch %25
+%25 = OpLabel
+%31 = OpLoad %int %i
+%32 = OpIAdd %int %31 %int_1
+OpStore %i %32
+OpBranch %23
+%24 = OpLabel
+OpStore %_GLF_color %21
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string callee =
+      R"(%f_ = OpFunction %float None %9
+%33 = OpLabel
+OpBranch %34
+%34 = OpLabel
+OpLoopMerge %35 %36 None
+OpBranch %37
+%37 = OpLabel
+OpReturnValue %float_1
+%36 = OpLabel
+OpBranch %34
+%35 = OpLabel
+OpUnreachable
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(
+      predefs + caller + callee, predefs + caller + callee, false, true);
+}
+
+TEST_F(InlineTest, CalleeWithSingleReturnNeedsSingleTripLoopWrapper) {
+  // The case from https://github.com/KhronosGroup/SPIRV-Tools/issues/2018
+  //
+  // The callee has a single return, but needs single-trip loop wrapper
+  // to be inlined because the return is in a selection structure.
+
+  const std::string predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %_GLF_color
+OpExecutionMode %main OriginUpperLeft
+OpSource ESSL 310
+OpName %main "main"
+OpName %f_ "f("
+OpName %i "i"
+OpName %_GLF_color "_GLF_color"
+OpDecorate %_GLF_color Location 0
+%void = OpTypeVoid
+%7 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%9 = OpTypeFunction %float
+%float_1 = OpConstant %float 1
+%bool = OpTypeBool
+%false = OpConstantFalse %bool
+%true = OpConstantTrue %bool
+%int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%int_0 = OpConstant %int 0
+%int_1 = OpConstant %int 1
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_GLF_color = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+%21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+%22 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
+)";
+
+  const std::string new_predefs =
+      R"(%_ptr_Function_float = OpTypePointer Function %float
+)";
+
+  const std::string main_before =
+      R"(%main = OpFunction %void None %7
+%23 = OpLabel
+%i = OpVariable %_ptr_Function_int Function
+OpStore %i %int_0
+OpBranch %24
+%24 = OpLabel
+OpLoopMerge %25 %26 None
+OpBranch %27
+%27 = OpLabel
+%28 = OpLoad %int %i
+%29 = OpSLessThan %bool %28 %int_1
+OpBranchConditional %29 %30 %25
+%30 = OpLabel
+OpStore %_GLF_color %21
+%31 = OpFunctionCall %float %f_
+OpBranch %26
+%26 = OpLabel
+%32 = OpLoad %int %i
+%33 = OpIAdd %int %32 %int_1
+OpStore %i %33
+OpBranch %24
+%25 = OpLabel
+OpStore %_GLF_color %22
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string main_after =
+      R"(%main = OpFunction %void None %7
+%23 = OpLabel
+%38 = OpVariable %_ptr_Function_float Function
+%i = OpVariable %_ptr_Function_int Function
+OpStore %i %int_0
+OpBranch %24
+%24 = OpLabel
+OpLoopMerge %25 %26 None
+OpBranch %27
+%27 = OpLabel
+%28 = OpLoad %int %i
+%29 = OpSLessThan %bool %28 %int_1
+OpBranchConditional %29 %30 %25
+%30 = OpLabel
+OpStore %_GLF_color %21
+OpBranch %39
+%39 = OpLabel
+OpLoopMerge %40 %41 None
+OpBranch %42
+%42 = OpLabel
+OpSelectionMerge %43 None
+OpBranchConditional %true %44 %43
+%44 = OpLabel
+OpStore %38 %float_1
+OpBranch %40
+%43 = OpLabel
+OpUnreachable
+%41 = OpLabel
+OpBranchConditional %false %39 %40
+%40 = OpLabel
+%31 = OpLoad %float %38
+OpBranch %26
+%26 = OpLabel
+%32 = OpLoad %int %i
+%33 = OpIAdd %int %32 %int_1
+OpStore %i %33
+OpBranch %24
+%25 = OpLabel
+OpStore %_GLF_color %22
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string callee =
+      R"(%f_ = OpFunction %float None %9
+%34 = OpLabel
+OpSelectionMerge %35 None
+OpBranchConditional %true %36 %35
+%36 = OpLabel
+OpReturnValue %float_1
+%35 = OpLabel
+OpUnreachable
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<InlineExhaustivePass>(
+      predefs + main_before + callee,
+      predefs + new_predefs + main_after + callee, false, true);
+}
+
 TEST_F(InlineTest, Decorated1) {
   // Same test as Simple with the difference
   // that OpFAdd in the outlined function is