spirv-opt: Add dataflow analysis framework (#4402)
This PR adds a generic dataflow analysis framework to SPIRV-opt, with the intent of being used in SPIRV-lint. This may also be useful for SPIRV-opt, as existing ad-hoc analyses can be rewritten to use a common framework, but this is not the target of this PR.
diff --git a/Android.mk b/Android.mk
index 057731f..ff856fe 100644
--- a/Android.mk
+++ b/Android.mk
@@ -91,6 +91,7 @@
source/opt/control_dependence.cpp \
source/opt/convert_to_half_pass.cpp \
source/opt/copy_prop_arrays.cpp \
+ source/opt/dataflow.cpp \
source/opt/dead_branch_elim_pass.cpp \
source/opt/dead_insert_elim_pass.cpp \
source/opt/dead_variable_elimination.cpp \
diff --git a/BUILD.gn b/BUILD.gn
index fea0279..036a0a1 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -612,6 +612,8 @@
"source/opt/convert_to_half_pass.h",
"source/opt/copy_prop_arrays.cpp",
"source/opt/copy_prop_arrays.h",
+ "source/opt/dataflow.h",
+ "source/opt/dataflow.cpp",
"source/opt/dead_branch_elim_pass.cpp",
"source/opt/dead_branch_elim_pass.h",
"source/opt/dead_insert_elim_pass.cpp",
diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt
index f6ebcfa..b6027c0 100644
--- a/source/opt/CMakeLists.txt
+++ b/source/opt/CMakeLists.txt
@@ -30,6 +30,7 @@
control_dependence.h
convert_to_half_pass.h
copy_prop_arrays.h
+ dataflow.h
dead_branch_elim_pass.h
dead_insert_elim_pass.h
dead_variable_elimination.h
@@ -137,6 +138,7 @@
control_dependence.cpp
convert_to_half_pass.cpp
copy_prop_arrays.cpp
+ dataflow.cpp
dead_branch_elim_pass.cpp
dead_insert_elim_pass.cpp
dead_variable_elimination.cpp
diff --git a/source/opt/dataflow.cpp b/source/opt/dataflow.cpp
new file mode 100644
index 0000000..c91fad0
--- /dev/null
+++ b/source/opt/dataflow.cpp
@@ -0,0 +1,91 @@
+// Copyright (c) 2021 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "source/opt/dataflow.h"
+
+#include <algorithm>
+#include <cstdint>
+
+namespace spvtools {
+namespace opt {
+
+bool DataFlowAnalysis::Enqueue(Instruction* inst) {
+ bool& is_enqueued = on_worklist_[inst];
+ if (is_enqueued) return false;
+ is_enqueued = true;
+ worklist_.push(inst);
+ return true;
+}
+
+DataFlowAnalysis::VisitResult DataFlowAnalysis::RunOnce(
+ Function* function, bool is_first_iteration) {
+ InitializeWorklist(function, is_first_iteration);
+ VisitResult ret = VisitResult::kResultFixed;
+ while (!worklist_.empty()) {
+ Instruction* top = worklist_.front();
+ worklist_.pop();
+ on_worklist_[top] = false;
+ VisitResult result = Visit(top);
+ if (result == VisitResult::kResultChanged) {
+ EnqueueSuccessors(top);
+ ret = VisitResult::kResultChanged;
+ }
+ }
+ return ret;
+}
+
+void DataFlowAnalysis::Run(Function* function) {
+ VisitResult result = RunOnce(function, true);
+ while (result == VisitResult::kResultChanged) {
+ result = RunOnce(function, false);
+ }
+}
+
+void ForwardDataFlowAnalysis::InitializeWorklist(Function* function,
+ bool /*is_first_iteration*/) {
+ context().cfg()->ForEachBlockInReversePostOrder(
+ function->entry().get(), [this](BasicBlock* bb) {
+ if (label_position_ == LabelPosition::kLabelsOnly) {
+ Enqueue(bb->GetLabelInst());
+ return;
+ }
+ if (label_position_ == LabelPosition::kLabelsAtBeginning) {
+ Enqueue(bb->GetLabelInst());
+ }
+ for (Instruction& inst : *bb) {
+ Enqueue(&inst);
+ }
+ if (label_position_ == LabelPosition::kLabelsAtEnd) {
+ Enqueue(bb->GetLabelInst());
+ }
+ });
+}
+
+void ForwardDataFlowAnalysis::EnqueueUsers(Instruction* inst) {
+ context().get_def_use_mgr()->ForEachUser(
+ inst, [this](Instruction* user) { Enqueue(user); });
+}
+
+void ForwardDataFlowAnalysis::EnqueueBlockSuccessors(Instruction* inst) {
+ if (inst->opcode() != SpvOpLabel) return;
+ context()
+ .cfg()
+ ->block(inst->result_id())
+ ->ForEachSuccessorLabel([this](uint32_t* label) {
+ Enqueue(context().cfg()->block(*label)->GetLabelInst());
+ });
+}
+
+} // namespace opt
+} // namespace spvtools
diff --git a/source/opt/dataflow.h b/source/opt/dataflow.h
new file mode 100644
index 0000000..be07415
--- /dev/null
+++ b/source/opt/dataflow.h
@@ -0,0 +1,148 @@
+// Copyright (c) 2021 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SOURCE_OPT_DATAFLOW_H_
+#define SOURCE_OPT_DATAFLOW_H_
+
+#include <queue>
+#include <unordered_map>
+#include <vector>
+
+#include "source/opt/instruction.h"
+#include "source/opt/ir_context.h"
+
+namespace spvtools {
+namespace opt {
+
+// Generic data-flow analysis.
+// Maintains a worklist of instructions to process and processes them in a
+// specified order. See also ForwardDataFlowAnalysis, which is specialized for
+// forward data-flow analysis.
+class DataFlowAnalysis {
+ public:
+ // The result of a |Visit| operation on an instruction.
+ // This is used to determine when analysis has reached a fixpoint.
+ enum class VisitResult {
+ // The analysis result for this instruction has changed.
+ // This means that any instructions that depend on it (its successors) must
+ // be recomputed.
+ kResultChanged,
+ // The analysis result for this instruction has not changed.
+ // When all visit operations return |kResultFixed|, the analysis has reached
+ // a fixpoint (converged).
+ kResultFixed,
+ };
+
+ virtual ~DataFlowAnalysis() {}
+
+ // Run this analysis on a given function.
+ // For analyses which work interprocedurally, |function| may be ignored.
+ void Run(Function* function);
+
+ protected:
+ DataFlowAnalysis(IRContext& context) : context_(context) {}
+
+ // Initialize the worklist for a given function.
+ // |is_first_iteration| is true on the first call to |Run| and false
+ // afterwards. All subsequent runs are only necessary to check if the analysis
+ // has converged; if |EnqueueSuccessors| is complete, |InitializeWorklist|
+ // should do nothing after the first iteration.
+ virtual void InitializeWorklist(Function* function,
+ bool is_first_iteration) = 0;
+
+ // Enqueues the successors (instructions which use the analysis result) of
+ // |inst|. This is not required to be complete, but convergence is faster when
+ // it is. This is called whenever |Visit| returns |kResultChanged|.
+ virtual void EnqueueSuccessors(Instruction* inst) = 0;
+
+ // Visits the given instruction, recomputing the analysis result. This is
+ // called once per instruction queued in |InitializeWorklist| and afterward
+ // when a predecessor is changed, through |EnqueueSuccessors|.
+ virtual VisitResult Visit(Instruction* inst) = 0;
+
+ // Enqueues the given instruction to be visited. Ignored if already in the
+ // worklist.
+ bool Enqueue(Instruction* inst);
+
+ IRContext& context() { return context_; }
+
+ private:
+ // Runs one pass, calling |InitializeWorklist| and then iterating through the
+ // worklist until all fixed.
+ VisitResult RunOnce(Function* function, bool is_first_iteration);
+
+ IRContext& context_;
+ std::unordered_map<Instruction*, bool> on_worklist_;
+ // The worklist, which contains the list of instructions to be visited.
+ //
+ // The choice of data structure was influenced by the data in "Iterative
+ // Data-flow Analysis, Revisited" (Cooper et al, 2002).
+ // https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.125.1549&rep=rep1&type=pdf
+ // The paper shows that the overall performance benefit of a priority queue
+ // over a regular queue or stack is relatively small (or negative).
+ //
+ // A queue has the advantage that nodes are visited in the same order they are
+ // enqueued, which relieves the analysis from inserting nodes "backwards", for
+ // example in worklist initialization. Also, as the paper claims that sorting
+ // successors does not improve runtime, we can use a single queue which is
+ // modified during iteration.
+ std::queue<Instruction*> worklist_;
+};
+
+// A generic data flow analysis, specialized for forward analysis.
+class ForwardDataFlowAnalysis : public DataFlowAnalysis {
+ public:
+ // Indicates where labels should be in the worklist RPO ordering.
+ enum class LabelPosition {
+ // Labels should be placed at the beginning of their blocks.
+ kLabelsAtBeginning,
+ // Labels should be placed at the end of their blocks.
+ kLabelsAtEnd,
+ // Labels should not be in the worklist.
+ kNoLabels,
+ // Only labels should be placed in the worklist.
+ kLabelsOnly,
+ };
+
+ ForwardDataFlowAnalysis(IRContext& context, LabelPosition label_position)
+ : DataFlowAnalysis(context), label_position_(label_position) {}
+
+ protected:
+ // Initializes the worklist in reverse postorder, regardless of
+ // |is_first_iteration|. Labels are placed according to the label position
+ // specified in the constructor.
+ void InitializeWorklist(Function* function, bool is_first_iteration) override;
+
+ // Enqueues the users and block successors of the given instruction.
+ // See |EnqueueUsers| and |EnqueueBlockSuccessors|.
+ void EnqueueSuccessors(Instruction* inst) override {
+ EnqueueUsers(inst);
+ EnqueueBlockSuccessors(inst);
+ }
+
+ // Enqueues the users of the given instruction.
+ void EnqueueUsers(Instruction* inst);
+
+ // Enqueues the labels of the successors of the block corresponding to the
+ // given label instruction. Does nothing for other instructions.
+ void EnqueueBlockSuccessors(Instruction* inst);
+
+ private:
+ LabelPosition label_position_;
+};
+
+} // namespace opt
+} // namespace spvtools
+
+#endif // SOURCE_OPT_DATAFLOW_H_
diff --git a/test/opt/CMakeLists.txt b/test/opt/CMakeLists.txt
index 0331246..76ca99e 100644
--- a/test/opt/CMakeLists.txt
+++ b/test/opt/CMakeLists.txt
@@ -31,6 +31,7 @@
control_dependence.cpp
convert_relaxed_to_half_test.cpp
copy_prop_array_test.cpp
+ dataflow.cpp
dead_branch_elim_test.cpp
dead_insert_elim_test.cpp
dead_variable_elim_test.cpp
diff --git a/test/opt/dataflow.cpp b/test/opt/dataflow.cpp
new file mode 100644
index 0000000..4742015
--- /dev/null
+++ b/test/opt/dataflow.cpp
@@ -0,0 +1,225 @@
+// Copyright (c) 2021 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "source/opt/dataflow.h"
+
+#include <map>
+#include <set>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "opt/function_utils.h"
+#include "source/opt/build_module.h"
+
+namespace spvtools {
+namespace opt {
+namespace {
+
+using DataFlowTest = ::testing::Test;
+
+// Simple analyses for testing:
+
+// Stores the result IDs of visited instructions in visit order.
+struct VisitOrder : public ForwardDataFlowAnalysis {
+ std::vector<uint32_t> visited_result_ids;
+
+ VisitOrder(IRContext& context, LabelPosition label_position)
+ : ForwardDataFlowAnalysis(context, label_position) {}
+
+ VisitResult Visit(Instruction* inst) override {
+ if (inst->HasResultId()) {
+ visited_result_ids.push_back(inst->result_id());
+ }
+ return DataFlowAnalysis::VisitResult::kResultFixed;
+ }
+};
+
+// For each block, stores the set of blocks it can be preceded by.
+// For example, with the following CFG:
+// V-----------.
+// -> 11 -> 12 -> 13 -> 15
+// \-> 14 ---^
+//
+// The answer is:
+// 11: 11, 12, 13
+// 12: 11, 12, 13
+// 13: 11, 12, 13
+// 14: 11, 12, 13
+// 15: 11, 12, 13, 14
+struct BackwardReachability : public ForwardDataFlowAnalysis {
+ std::map<uint32_t, std::set<uint32_t>> reachable_from;
+
+ BackwardReachability(IRContext& context)
+ : ForwardDataFlowAnalysis(
+ context, ForwardDataFlowAnalysis::LabelPosition::kLabelsOnly) {}
+
+ VisitResult Visit(Instruction* inst) override {
+ // Conditional branches can be enqueued from labels, so skip them.
+ if (inst->opcode() != SpvOpLabel)
+ return DataFlowAnalysis::VisitResult::kResultFixed;
+ uint32_t id = inst->result_id();
+ VisitResult ret = DataFlowAnalysis::VisitResult::kResultFixed;
+ std::set<uint32_t>& precedents = reachable_from[id];
+ for (uint32_t pred : context().cfg()->preds(id)) {
+ bool pred_inserted = precedents.insert(pred).second;
+ if (pred_inserted) {
+ ret = DataFlowAnalysis::VisitResult::kResultChanged;
+ }
+ for (uint32_t block : reachable_from[pred]) {
+ bool inserted = precedents.insert(block).second;
+ if (inserted) {
+ ret = DataFlowAnalysis::VisitResult::kResultChanged;
+ }
+ }
+ }
+ return ret;
+ }
+
+ void InitializeWorklist(Function* function,
+ bool is_first_iteration) override {
+ // Since successor function is exact, only need one pass.
+ if (is_first_iteration) {
+ ForwardDataFlowAnalysis::InitializeWorklist(function, true);
+ }
+ }
+};
+
+TEST_F(DataFlowTest, ReversePostOrder) {
+ // Note: labels and IDs are intentionally out of order.
+ //
+ // CFG: (order of branches is from bottom to top)
+ // V-----------.
+ // -> 50 -> 40 -> 20 -> 60 -> 70
+ // \-> 30 ---^
+
+ // DFS tree with RPO numbering:
+ // -> 50[0] -> 40[1] -> 20[2] 60[4] -> 70[5]
+ // \-> 30[3] ---^
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ %3 = OpTypeVoid
+ %4 = OpTypeFunction %3
+ %6 = OpTypeBool
+ %5 = OpConstantTrue %6
+ %2 = OpFunction %3 None %4
+ %50 = OpLabel
+ %51 = OpUndef %6
+ %52 = OpUndef %6
+ OpBranch %40
+ %70 = OpLabel
+ %69 = OpUndef %6
+ OpReturn
+ %60 = OpLabel
+ %61 = OpUndef %6
+ OpBranchConditional %5 %70 %40
+ %30 = OpLabel
+ %29 = OpUndef %6
+ OpBranch %60
+ %20 = OpLabel
+ %21 = OpUndef %6
+ OpBranch %60
+ %40 = OpLabel
+ %39 = OpUndef %6
+ OpBranchConditional %5 %30 %20
+ OpFunctionEnd
+ )";
+
+ std::unique_ptr<IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_2, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ASSERT_NE(context, nullptr);
+
+ Function* function = spvtest::GetFunction(context->module(), 2);
+
+ std::map<ForwardDataFlowAnalysis::LabelPosition, std::vector<uint32_t>>
+ expected_order;
+ expected_order[ForwardDataFlowAnalysis::LabelPosition::kLabelsOnly] = {
+ 50, 40, 20, 30, 60, 70,
+ };
+ expected_order[ForwardDataFlowAnalysis::LabelPosition::kLabelsAtBeginning] = {
+ 50, 51, 52, 40, 39, 20, 21, 30, 29, 60, 61, 70, 69,
+ };
+ expected_order[ForwardDataFlowAnalysis::LabelPosition::kLabelsAtEnd] = {
+ 51, 52, 50, 39, 40, 21, 20, 29, 30, 61, 60, 69, 70,
+ };
+ expected_order[ForwardDataFlowAnalysis::LabelPosition::kNoLabels] = {
+ 51, 52, 39, 21, 29, 61, 69,
+ };
+
+ for (const auto& test_case : expected_order) {
+ VisitOrder analysis(*context, test_case.first);
+ analysis.Run(function);
+ EXPECT_EQ(test_case.second, analysis.visited_result_ids);
+ }
+}
+
+TEST_F(DataFlowTest, BackwardReachability) {
+ // CFG:
+ // V-----------.
+ // -> 11 -> 12 -> 13 -> 15
+ // \-> 14 ---^
+
+ const std::string text = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ %3 = OpTypeVoid
+ %4 = OpTypeFunction %3
+ %6 = OpTypeBool
+ %5 = OpConstantTrue %6
+ %2 = OpFunction %3 None %4
+ %11 = OpLabel
+ OpBranch %12
+ %12 = OpLabel
+ OpBranchConditional %5 %14 %13
+ %13 = OpLabel
+ OpBranchConditional %5 %15 %11
+ %14 = OpLabel
+ OpBranch %15
+ %15 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ std::unique_ptr<IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_2, nullptr, text,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ASSERT_NE(context, nullptr);
+
+ Function* function = spvtest::GetFunction(context->module(), 2);
+
+ BackwardReachability analysis(*context);
+ analysis.Run(function);
+
+ std::map<uint32_t, std::set<uint32_t>> expected_result;
+ expected_result[11] = {11, 12, 13};
+ expected_result[12] = {11, 12, 13};
+ expected_result[13] = {11, 12, 13};
+ expected_result[14] = {11, 12, 13};
+ expected_result[15] = {11, 12, 13, 14};
+ EXPECT_EQ(expected_result, analysis.reachable_from);
+}
+
+} // namespace
+} // namespace opt
+} // namespace spvtools