Record correct dominators in merge return (#2760)

In merge return, we need to know the original dominator for a block in order to
traverse code from the original dominator to the new dominator and add
appropriate Phi nodes.  The current code gets this wrong because the dominator
tree is build as needed.  The first time we get the immediate dominator for a
function we just built the dominator tree and it takes into account that a
block has been split.  The second time it does not.

This inconsistency needs to be fixed.  We do that by recording the original
dominator for all blocks at the start of the pass.

If we were to record just the basic block, that could change if the block is
split.  We want to traverse the code in the body of the original dominator,
whatever block it ends up in.  To make this easy to track, we not save the
terminator instruction to represent the original dominator.

Fixes #2745
diff --git a/source/opt/merge_return_pass.cpp b/source/opt/merge_return_pass.cpp
index 6e7284b..b938b2f 100644
--- a/source/opt/merge_return_pass.cpp
+++ b/source/opt/merge_return_pass.cpp
@@ -81,6 +81,7 @@
     return false;
   }
 
+  RecordImmediateDominators(function);
   AddDummyLoopAroundFunction();
 
   std::list<BasicBlock*> order;
@@ -236,10 +237,6 @@
     inst->AddOperand({SPV_OPERAND_TYPE_ID, {new_source->id()}});
     context()->UpdateDefUse(inst);
   });
-
-  // Store the immediate dominator for this block in case new phi nodes will be
-  // needed later.
-  RecordImmediateDominator(target);
 }
 
 void MergeReturnPass::CreatePhiNodesForInst(BasicBlock* merge_block,
@@ -683,7 +680,7 @@
     return;
   }
 
-  BasicBlock* current_bb = new_merge_nodes_[bb];
+  BasicBlock* current_bb = context()->get_instr_block(original_dominator_[bb]);
   while (current_bb != nullptr && current_bb != dominator) {
     for (Instruction& inst : *current_bb) {
       CreatePhiNodesForInst(bb, inst);
@@ -692,11 +689,16 @@
   }
 }
 
-void MergeReturnPass::RecordImmediateDominator(BasicBlock* block) {
-  DominatorAnalysis* dom_tree =
-      context()->GetDominatorAnalysis(block->GetParent());
-  auto idom = dom_tree->ImmediateDominator(block);
-  new_merge_nodes_[block] = idom;
+void MergeReturnPass::RecordImmediateDominators(Function* function) {
+  DominatorAnalysis* dom_tree = context()->GetDominatorAnalysis(function);
+  for (BasicBlock& bb : *function) {
+    BasicBlock* dominator_bb = dom_tree->ImmediateDominator(&bb);
+    if (dominator_bb && dominator_bb != cfg()->pseudo_entry_block()) {
+      original_dominator_[&bb] = dominator_bb->terminator();
+    } else {
+      original_dominator_[&bb] = nullptr;
+    }
+  }
 }
 
 void MergeReturnPass::InsertAfterElement(BasicBlock* element,
diff --git a/source/opt/merge_return_pass.h b/source/opt/merge_return_pass.h
index d9eae39..f8edd27 100644
--- a/source/opt/merge_return_pass.h
+++ b/source/opt/merge_return_pass.h
@@ -246,24 +246,23 @@
   // instruction.
   void CreatePhiNodesForInst(BasicBlock* merge_block, Instruction& inst);
 
-  // Traverse the nodes in |new_merge_nodes_|, and adds the OpPhi instructions
-  // that are needed to make the code correct.  It is assumed that at this point
-  // there are no unreachable blocks in the control flow graph.
+  // Add new phi nodes for any id that no longer dominate all of it uses.  A phi
+  // node is added to a block |bb| for an id if the id is defined between the
+  // original immediate dominator of |bb| and its new immidiate dominator.  It
+  // is assumed that at this point there are no unreachable blocks in the
+  // control flow graph.
   void AddNewPhiNodes();
 
   // Creates any new phi nodes that are needed in |bb|.  |AddNewPhiNodes| must
   // have already been called on the original dominators of |bb|.
   void AddNewPhiNodes(BasicBlock* bb);
 
-  // Saves |block| to a list of basic block that will require OpPhi nodes to be
-  // added by calling |AddNewPhiNodes|.  It is assumed that |block| used to have
-  // a single predecessor, |single_original_pred|, but now has more.
-  void RecordImmediateDominator(BasicBlock* block);
+  // Records the terminator of immediate dominator for every basic block in
+  // |function|.
+  void RecordImmediateDominators(Function* function);
 
   // Modifies existing OpPhi instruction in |target| block to account for the
-  // new edge from |new_source|.  The value for that edge will be an Undef. If
-  // |target| only had a single predecessor, then it is marked as needing new
-  // phi nodes.  See |RecordImmediateDominator|.
+  // new edge from |new_source|.  The value for that edge will be an Undef.
   //
   // The CFG must not include the edge from |new_source| to |target| yet.
   void UpdatePhiNodes(BasicBlock* new_source, BasicBlock* target);
@@ -317,9 +316,10 @@
   // after processing the current function.
   BasicBlock* final_return_block_;
 
-  // This is a map from a node to its original immediate dominator.  This is
-  // used to determine which values will require a new phi node.
-  std::unordered_map<BasicBlock*, BasicBlock*> new_merge_nodes_;
+  // This is a map from a node to its original immediate dominator identified by
+  // the terminator if that block.  We use the terminator because the block we
+  // want may change if the block is split.
+  std::unordered_map<BasicBlock*, Instruction*> original_dominator_;
 
   // A map from a basic block, bb, to the set of basic blocks which represent
   // the new edges that reach |bb|.
diff --git a/test/opt/pass_merge_return_test.cpp b/test/opt/pass_merge_return_test.cpp
index f6b30de..baf7820 100644
--- a/test/opt/pass_merge_return_test.cpp
+++ b/test/opt/pass_merge_return_test.cpp
@@ -1779,6 +1779,57 @@
   SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
   SinglePassRunAndMatch<MergeReturnPass>(text, true);
 }
+TEST_F(MergeReturnPassTest, PhiInSecondMerge) {
+  //  Add and use a phi in the second merge block from the return.
+  const std::string text =
+      R"(
+; CHECK: OpLoopMerge
+; CHECK: OpLoopMerge [[merge_bb:%\w+]] [[continue_bb:%\w+]]
+; CHECK: [[continue_bb]] = OpLabel
+; CHECK-NEXT: [[val:%\w+]] = OpUndef %float
+; CHECK: [[merge_bb]] = OpLabel
+; CHECK-NEXT: [[phi:%\w+]] = OpPhi %float {{%\w+}} {{%\w+}} [[val]] [[continue_bb]]
+; CHECK-NOT: OpLabel
+; CHECK: OpBranchConditional {{%\w+}} {{%\w+}} [[old_merge:%\w+]]
+; CHECK: [[old_merge]] = OpLabel
+; CHECK-NEXT: OpConvertFToS %int [[phi]]
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %2 "main"
+               OpExecutionMode %2 OriginUpperLeft
+               OpSource ESSL 310
+       %void = OpTypeVoid
+          %4 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+       %bool = OpTypeBool
+          %8 = OpUndef %bool
+          %2 = OpFunction %void None %4
+          %9 = OpLabel
+               OpBranch %10
+         %10 = OpLabel
+               OpLoopMerge %11 %12 None
+               OpBranch %13
+         %13 = OpLabel
+               OpLoopMerge %12 %14 None
+               OpBranchConditional %8 %15 %12
+         %15 = OpLabel
+               OpReturn
+         %14 = OpLabel
+               OpBranch %13
+         %12 = OpLabel
+         %16 = OpUndef %float
+               OpBranchConditional %8 %10 %11
+         %11 = OpLabel
+         %17 = OpConvertFToS %int %16
+               OpReturn
+               OpFunctionEnd
+)";
+
+  SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+  SinglePassRunAndMatch<MergeReturnPass>(text, true);
+}
 
 }  // namespace
 }  // namespace opt