spirv-opt: Add dataflow analysis framework (#4402)

This PR adds a generic dataflow analysis framework to SPIRV-opt, with the intent of being used in SPIRV-lint. This may also be useful for SPIRV-opt, as existing ad-hoc analyses can be rewritten to use a common framework, but this is not the target of this PR.
diff --git a/Android.mk b/Android.mk
index 057731f..ff856fe 100644
--- a/Android.mk
+++ b/Android.mk
@@ -91,6 +91,7 @@
 		source/opt/control_dependence.cpp \
 		source/opt/convert_to_half_pass.cpp \
 		source/opt/copy_prop_arrays.cpp \
+		source/opt/dataflow.cpp \
 		source/opt/dead_branch_elim_pass.cpp \
 		source/opt/dead_insert_elim_pass.cpp \
 		source/opt/dead_variable_elimination.cpp \
diff --git a/BUILD.gn b/BUILD.gn
index fea0279..036a0a1 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -612,6 +612,8 @@
     "source/opt/convert_to_half_pass.h",
     "source/opt/copy_prop_arrays.cpp",
     "source/opt/copy_prop_arrays.h",
+    "source/opt/dataflow.h",
+    "source/opt/dataflow.cpp",
     "source/opt/dead_branch_elim_pass.cpp",
     "source/opt/dead_branch_elim_pass.h",
     "source/opt/dead_insert_elim_pass.cpp",
diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt
index f6ebcfa..b6027c0 100644
--- a/source/opt/CMakeLists.txt
+++ b/source/opt/CMakeLists.txt
@@ -30,6 +30,7 @@
   control_dependence.h
   convert_to_half_pass.h
   copy_prop_arrays.h
+  dataflow.h
   dead_branch_elim_pass.h
   dead_insert_elim_pass.h
   dead_variable_elimination.h
@@ -137,6 +138,7 @@
   control_dependence.cpp
   convert_to_half_pass.cpp
   copy_prop_arrays.cpp
+  dataflow.cpp
   dead_branch_elim_pass.cpp
   dead_insert_elim_pass.cpp
   dead_variable_elimination.cpp
diff --git a/source/opt/dataflow.cpp b/source/opt/dataflow.cpp
new file mode 100644
index 0000000..c91fad0
--- /dev/null
+++ b/source/opt/dataflow.cpp
@@ -0,0 +1,91 @@
+// Copyright (c) 2021 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "source/opt/dataflow.h"
+
+#include <algorithm>
+#include <cstdint>
+
+namespace spvtools {
+namespace opt {
+
+bool DataFlowAnalysis::Enqueue(Instruction* inst) {
+  bool& is_enqueued = on_worklist_[inst];
+  if (is_enqueued) return false;
+  is_enqueued = true;
+  worklist_.push(inst);
+  return true;
+}
+
+DataFlowAnalysis::VisitResult DataFlowAnalysis::RunOnce(
+    Function* function, bool is_first_iteration) {
+  InitializeWorklist(function, is_first_iteration);
+  VisitResult ret = VisitResult::kResultFixed;
+  while (!worklist_.empty()) {
+    Instruction* top = worklist_.front();
+    worklist_.pop();
+    on_worklist_[top] = false;
+    VisitResult result = Visit(top);
+    if (result == VisitResult::kResultChanged) {
+      EnqueueSuccessors(top);
+      ret = VisitResult::kResultChanged;
+    }
+  }
+  return ret;
+}
+
+void DataFlowAnalysis::Run(Function* function) {
+  VisitResult result = RunOnce(function, true);
+  while (result == VisitResult::kResultChanged) {
+    result = RunOnce(function, false);
+  }
+}
+
+void ForwardDataFlowAnalysis::InitializeWorklist(Function* function,
+                                                 bool /*is_first_iteration*/) {
+  context().cfg()->ForEachBlockInReversePostOrder(
+      function->entry().get(), [this](BasicBlock* bb) {
+        if (label_position_ == LabelPosition::kLabelsOnly) {
+          Enqueue(bb->GetLabelInst());
+          return;
+        }
+        if (label_position_ == LabelPosition::kLabelsAtBeginning) {
+          Enqueue(bb->GetLabelInst());
+        }
+        for (Instruction& inst : *bb) {
+          Enqueue(&inst);
+        }
+        if (label_position_ == LabelPosition::kLabelsAtEnd) {
+          Enqueue(bb->GetLabelInst());
+        }
+      });
+}
+
+void ForwardDataFlowAnalysis::EnqueueUsers(Instruction* inst) {
+  context().get_def_use_mgr()->ForEachUser(
+      inst, [this](Instruction* user) { Enqueue(user); });
+}
+
+void ForwardDataFlowAnalysis::EnqueueBlockSuccessors(Instruction* inst) {
+  if (inst->opcode() != SpvOpLabel) return;
+  context()
+      .cfg()
+      ->block(inst->result_id())
+      ->ForEachSuccessorLabel([this](uint32_t* label) {
+        Enqueue(context().cfg()->block(*label)->GetLabelInst());
+      });
+}
+
+}  // namespace opt
+}  // namespace spvtools
diff --git a/source/opt/dataflow.h b/source/opt/dataflow.h
new file mode 100644
index 0000000..be07415
--- /dev/null
+++ b/source/opt/dataflow.h
@@ -0,0 +1,148 @@
+// Copyright (c) 2021 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SOURCE_OPT_DATAFLOW_H_
+#define SOURCE_OPT_DATAFLOW_H_
+
+#include <queue>
+#include <unordered_map>
+#include <vector>
+
+#include "source/opt/instruction.h"
+#include "source/opt/ir_context.h"
+
+namespace spvtools {
+namespace opt {
+
+// Generic data-flow analysis.
+// Maintains a worklist of instructions to process and processes them in a
+// specified order. See also ForwardDataFlowAnalysis, which is specialized for
+// forward data-flow analysis.
+class DataFlowAnalysis {
+ public:
+  // The result of a |Visit| operation on an instruction.
+  // This is used to determine when analysis has reached a fixpoint.
+  enum class VisitResult {
+    // The analysis result for this instruction has changed.
+    // This means that any instructions that depend on it (its successors) must
+    // be recomputed.
+    kResultChanged,
+    // The analysis result for this instruction has not changed.
+    // When all visit operations return |kResultFixed|, the analysis has reached
+    // a fixpoint (converged).
+    kResultFixed,
+  };
+
+  virtual ~DataFlowAnalysis() {}
+
+  // Run this analysis on a given function.
+  // For analyses which work interprocedurally, |function| may be ignored.
+  void Run(Function* function);
+
+ protected:
+  DataFlowAnalysis(IRContext& context) : context_(context) {}
+
+  // Initialize the worklist for a given function.
+  // |is_first_iteration| is true on the first call to |Run| and false
+  // afterwards. All subsequent runs are only necessary to check if the analysis
+  // has converged; if |EnqueueSuccessors| is complete, |InitializeWorklist|
+  // should do nothing after the first iteration.
+  virtual void InitializeWorklist(Function* function,
+                                  bool is_first_iteration) = 0;
+
+  // Enqueues the successors (instructions which use the analysis result) of
+  // |inst|. This is not required to be complete, but convergence is faster when
+  // it is. This is called whenever |Visit| returns |kResultChanged|.
+  virtual void EnqueueSuccessors(Instruction* inst) = 0;
+
+  // Visits the given instruction, recomputing the analysis result. This is
+  // called once per instruction queued in |InitializeWorklist| and afterward
+  // when a predecessor is changed, through |EnqueueSuccessors|.
+  virtual VisitResult Visit(Instruction* inst) = 0;
+
+  // Enqueues the given instruction to be visited. Ignored if already in the
+  // worklist.
+  bool Enqueue(Instruction* inst);
+
+  IRContext& context() { return context_; }
+
+ private:
+  // Runs one pass, calling |InitializeWorklist| and then iterating through the
+  // worklist until all fixed.
+  VisitResult RunOnce(Function* function, bool is_first_iteration);
+
+  IRContext& context_;
+  std::unordered_map<Instruction*, bool> on_worklist_;
+  // The worklist, which contains the list of instructions to be visited.
+  //
+  // The choice of data structure was influenced by the data in "Iterative
+  // Data-flow Analysis, Revisited" (Cooper et al, 2002).
+  // https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.125.1549&rep=rep1&type=pdf
+  // The paper shows that the overall performance benefit of a priority queue
+  // over a regular queue or stack is relatively small (or negative).
+  //
+  // A queue has the advantage that nodes are visited in the same order they are
+  // enqueued, which relieves the analysis from inserting nodes "backwards", for
+  // example in worklist initialization. Also, as the paper claims that sorting
+  // successors does not improve runtime, we can use a single queue which is
+  // modified during iteration.
+  std::queue<Instruction*> worklist_;
+};
+
+// A generic data flow analysis, specialized for forward analysis.
+class ForwardDataFlowAnalysis : public DataFlowAnalysis {
+ public:
+  // Indicates where labels should be in the worklist RPO ordering.
+  enum class LabelPosition {
+    // Labels should be placed at the beginning of their blocks.
+    kLabelsAtBeginning,
+    // Labels should be placed at the end of their blocks.
+    kLabelsAtEnd,
+    // Labels should not be in the worklist.
+    kNoLabels,
+    // Only labels should be placed in the worklist.
+    kLabelsOnly,
+  };
+
+  ForwardDataFlowAnalysis(IRContext& context, LabelPosition label_position)
+      : DataFlowAnalysis(context), label_position_(label_position) {}
+
+ protected:
+  // Initializes the worklist in reverse postorder, regardless of
+  // |is_first_iteration|. Labels are placed according to the label position
+  // specified in the constructor.
+  void InitializeWorklist(Function* function, bool is_first_iteration) override;
+
+  // Enqueues the users and block successors of the given instruction.
+  // See |EnqueueUsers| and |EnqueueBlockSuccessors|.
+  void EnqueueSuccessors(Instruction* inst) override {
+    EnqueueUsers(inst);
+    EnqueueBlockSuccessors(inst);
+  }
+
+  // Enqueues the users of the given instruction.
+  void EnqueueUsers(Instruction* inst);
+
+  // Enqueues the labels of the successors of the block corresponding to the
+  // given label instruction. Does nothing for other instructions.
+  void EnqueueBlockSuccessors(Instruction* inst);
+
+ private:
+  LabelPosition label_position_;
+};
+
+}  // namespace opt
+}  // namespace spvtools
+
+#endif  // SOURCE_OPT_DATAFLOW_H_
diff --git a/test/opt/CMakeLists.txt b/test/opt/CMakeLists.txt
index 0331246..76ca99e 100644
--- a/test/opt/CMakeLists.txt
+++ b/test/opt/CMakeLists.txt
@@ -31,6 +31,7 @@
        control_dependence.cpp
        convert_relaxed_to_half_test.cpp
        copy_prop_array_test.cpp
+       dataflow.cpp
        dead_branch_elim_test.cpp
        dead_insert_elim_test.cpp
        dead_variable_elim_test.cpp
diff --git a/test/opt/dataflow.cpp b/test/opt/dataflow.cpp
new file mode 100644
index 0000000..4742015
--- /dev/null
+++ b/test/opt/dataflow.cpp
@@ -0,0 +1,225 @@
+// Copyright (c) 2021 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "source/opt/dataflow.h"
+
+#include <map>
+#include <set>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "opt/function_utils.h"
+#include "source/opt/build_module.h"
+
+namespace spvtools {
+namespace opt {
+namespace {
+
+using DataFlowTest = ::testing::Test;
+
+// Simple analyses for testing:
+
+// Stores the result IDs of visited instructions in visit order.
+struct VisitOrder : public ForwardDataFlowAnalysis {
+  std::vector<uint32_t> visited_result_ids;
+
+  VisitOrder(IRContext& context, LabelPosition label_position)
+      : ForwardDataFlowAnalysis(context, label_position) {}
+
+  VisitResult Visit(Instruction* inst) override {
+    if (inst->HasResultId()) {
+      visited_result_ids.push_back(inst->result_id());
+    }
+    return DataFlowAnalysis::VisitResult::kResultFixed;
+  }
+};
+
+// For each block, stores the set of blocks it can be preceded by.
+// For example, with the following CFG:
+//    V-----------.
+// -> 11 -> 12 -> 13 -> 15
+//            \-> 14 ---^
+//
+// The answer is:
+// 11: 11, 12, 13
+// 12: 11, 12, 13
+// 13: 11, 12, 13
+// 14: 11, 12, 13
+// 15: 11, 12, 13, 14
+struct BackwardReachability : public ForwardDataFlowAnalysis {
+  std::map<uint32_t, std::set<uint32_t>> reachable_from;
+
+  BackwardReachability(IRContext& context)
+      : ForwardDataFlowAnalysis(
+            context, ForwardDataFlowAnalysis::LabelPosition::kLabelsOnly) {}
+
+  VisitResult Visit(Instruction* inst) override {
+    // Conditional branches can be enqueued from labels, so skip them.
+    if (inst->opcode() != SpvOpLabel)
+      return DataFlowAnalysis::VisitResult::kResultFixed;
+    uint32_t id = inst->result_id();
+    VisitResult ret = DataFlowAnalysis::VisitResult::kResultFixed;
+    std::set<uint32_t>& precedents = reachable_from[id];
+    for (uint32_t pred : context().cfg()->preds(id)) {
+      bool pred_inserted = precedents.insert(pred).second;
+      if (pred_inserted) {
+        ret = DataFlowAnalysis::VisitResult::kResultChanged;
+      }
+      for (uint32_t block : reachable_from[pred]) {
+        bool inserted = precedents.insert(block).second;
+        if (inserted) {
+          ret = DataFlowAnalysis::VisitResult::kResultChanged;
+        }
+      }
+    }
+    return ret;
+  }
+
+  void InitializeWorklist(Function* function,
+                          bool is_first_iteration) override {
+    // Since successor function is exact, only need one pass.
+    if (is_first_iteration) {
+      ForwardDataFlowAnalysis::InitializeWorklist(function, true);
+    }
+  }
+};
+
+TEST_F(DataFlowTest, ReversePostOrder) {
+  // Note: labels and IDs are intentionally out of order.
+  //
+  // CFG: (order of branches is from bottom to top)
+  //          V-----------.
+  // -> 50 -> 40 -> 20 -> 60 -> 70
+  //            \-> 30 ---^
+
+  // DFS tree with RPO numbering:
+  // -> 50[0] -> 40[1] -> 20[2]    60[4] -> 70[5]
+  //                  \-> 30[3] ---^
+
+  const std::string text = R"(
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %2 "main"
+               OpExecutionMode %2 OriginUpperLeft
+               OpSource GLSL 430
+          %3 = OpTypeVoid
+          %4 = OpTypeFunction %3
+          %6 = OpTypeBool
+          %5 = OpConstantTrue %6
+          %2 = OpFunction %3 None %4
+         %50 = OpLabel
+         %51 = OpUndef %6
+         %52 = OpUndef %6
+               OpBranch %40
+         %70 = OpLabel
+         %69 = OpUndef %6
+               OpReturn
+         %60 = OpLabel
+         %61 = OpUndef %6
+               OpBranchConditional %5 %70 %40
+         %30 = OpLabel
+         %29 = OpUndef %6
+               OpBranch %60
+         %20 = OpLabel
+         %21 = OpUndef %6
+               OpBranch %60
+         %40 = OpLabel
+         %39 = OpUndef %6
+               OpBranchConditional %5 %30 %20
+               OpFunctionEnd
+  )";
+
+  std::unique_ptr<IRContext> context =
+      BuildModule(SPV_ENV_UNIVERSAL_1_2, nullptr, text,
+                  SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+  ASSERT_NE(context, nullptr);
+
+  Function* function = spvtest::GetFunction(context->module(), 2);
+
+  std::map<ForwardDataFlowAnalysis::LabelPosition, std::vector<uint32_t>>
+      expected_order;
+  expected_order[ForwardDataFlowAnalysis::LabelPosition::kLabelsOnly] = {
+      50, 40, 20, 30, 60, 70,
+  };
+  expected_order[ForwardDataFlowAnalysis::LabelPosition::kLabelsAtBeginning] = {
+      50, 51, 52, 40, 39, 20, 21, 30, 29, 60, 61, 70, 69,
+  };
+  expected_order[ForwardDataFlowAnalysis::LabelPosition::kLabelsAtEnd] = {
+      51, 52, 50, 39, 40, 21, 20, 29, 30, 61, 60, 69, 70,
+  };
+  expected_order[ForwardDataFlowAnalysis::LabelPosition::kNoLabels] = {
+      51, 52, 39, 21, 29, 61, 69,
+  };
+
+  for (const auto& test_case : expected_order) {
+    VisitOrder analysis(*context, test_case.first);
+    analysis.Run(function);
+    EXPECT_EQ(test_case.second, analysis.visited_result_ids);
+  }
+}
+
+TEST_F(DataFlowTest, BackwardReachability) {
+  // CFG:
+  //    V-----------.
+  // -> 11 -> 12 -> 13 -> 15
+  //            \-> 14 ---^
+
+  const std::string text = R"(
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %2 "main"
+               OpExecutionMode %2 OriginUpperLeft
+               OpSource GLSL 430
+          %3 = OpTypeVoid
+          %4 = OpTypeFunction %3
+          %6 = OpTypeBool
+          %5 = OpConstantTrue %6
+          %2 = OpFunction %3 None %4
+         %11 = OpLabel
+               OpBranch %12
+         %12 = OpLabel
+               OpBranchConditional %5 %14 %13
+         %13 = OpLabel
+               OpBranchConditional %5 %15 %11
+         %14 = OpLabel
+               OpBranch %15
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+  )";
+
+  std::unique_ptr<IRContext> context =
+      BuildModule(SPV_ENV_UNIVERSAL_1_2, nullptr, text,
+                  SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+  ASSERT_NE(context, nullptr);
+
+  Function* function = spvtest::GetFunction(context->module(), 2);
+
+  BackwardReachability analysis(*context);
+  analysis.Run(function);
+
+  std::map<uint32_t, std::set<uint32_t>> expected_result;
+  expected_result[11] = {11, 12, 13};
+  expected_result[12] = {11, 12, 13};
+  expected_result[13] = {11, 12, 13};
+  expected_result[14] = {11, 12, 13};
+  expected_result[15] = {11, 12, 13, 14};
+  EXPECT_EQ(expected_result, analysis.reachable_from);
+}
+
+}  // namespace
+}  // namespace opt
+}  // namespace spvtools