spirv-fuzz: Refactor 'split blocks' to identify instructions differently (#2961)

This change refactors the 'split blocks' transformation so that an
instruction is identified via a base, opcode, and number of those
opcodes to be skipped when searching from the base, as opposed to the
previous design which used a base and offset.
diff --git a/source/fuzz/fuzzer_pass_split_blocks.cpp b/source/fuzz/fuzzer_pass_split_blocks.cpp
index c0318e8..6a2ea4d 100644
--- a/source/fuzz/fuzzer_pass_split_blocks.cpp
+++ b/source/fuzz/fuzzer_pass_split_blocks.cpp
@@ -14,9 +14,9 @@
 
 #include "source/fuzz/fuzzer_pass_split_blocks.h"
 
-#include <utility>
 #include <vector>
 
+#include "source/fuzz/instruction_descriptor.h"
 #include "source/fuzz/transformation_split_block.h"
 
 namespace spvtools {
@@ -49,40 +49,49 @@
       // We are not going to try to split this block.
       continue;
     }
+
+    // TODO(https://github.com/KhronosGroup/SPIRV-Tools/issues/2964): consider
+    //  taking a simpler approach to identifying the instruction before which
+    //  to split a block.
+
     // We are going to try to split this block.  We now need to choose where
-    // to split it.  We do this by finding a base instruction that has a
-    // result id, and an offset from that base instruction.  We would like
-    // offsets to be as small as possible and ideally 0 - we only need offsets
-    // because not all instructions can be identified by a result id (e.g.
-    // OpStore instructions cannot).
-    std::vector<std::pair<uint32_t, uint32_t>> base_offset_pairs;
+    // to split it.  We describe the instruction before which we would like to
+    // split a block via an InstructionDescriptor, details of which are
+    // commented in the protobufs definition file.
+    std::vector<protobufs::InstructionDescriptor> instruction_descriptors;
+
     // The initial base instruction is the block label.
     uint32_t base = block->id();
-    uint32_t offset = 0;
+
+    // Counts the number of times we have seen each opcode since we reset the
+    // base instruction.
+    std::map<SpvOp, uint32_t> skip_count;
+
     // Consider every instruction in the block.  The label is excluded: it is
     // only necessary to consider it as a base in case the first instruction
     // in the block does not have a result id.
     for (auto& inst : *block) {
       if (inst.HasResultId()) {
         // In the case that the instruction has a result id, we use the
-        // instruction as its own base, with zero offset.
+        // instruction as its own base, and clear the skip counts we have
+        // collected.
         base = inst.result_id();
-        offset = 0;
-      } else {
-        // The instruction does not have a result id, so we need to identify
-        // it via the latest instruction that did have a result id (base), and
-        // an incremented offset.
-        offset++;
+        skip_count.clear();
       }
-      base_offset_pairs.emplace_back(base, offset);
+      const SpvOp opcode = inst.opcode();
+      instruction_descriptors.emplace_back(MakeInstructionDescriptor(
+          base, opcode, skip_count.count(opcode) ? skip_count.at(opcode) : 0));
+      if (!inst.HasResultId()) {
+        skip_count[opcode] =
+            skip_count.count(opcode) ? skip_count.at(opcode) + 1 : 1;
+      }
     }
     // Having identified all the places we might be able to split the block,
     // we choose one of them.
-    auto base_offset =
-        base_offset_pairs[GetFuzzerContext()->RandomIndex(base_offset_pairs)];
-    auto transformation =
-        TransformationSplitBlock(base_offset.first, base_offset.second,
-                                 GetFuzzerContext()->GetFreshId());
+    auto transformation = TransformationSplitBlock(
+        instruction_descriptors[GetFuzzerContext()->RandomIndex(
+            instruction_descriptors)],
+        GetFuzzerContext()->GetFreshId());
     // If the position we have chosen turns out to be a valid place to split
     // the block, we apply the split. Otherwise the block just doesn't get
     // split.
diff --git a/source/fuzz/fuzzer_util.cpp b/source/fuzz/fuzzer_util.cpp
index e61ccae..25bc709 100644
--- a/source/fuzz/fuzzer_util.cpp
+++ b/source/fuzz/fuzzer_util.cpp
@@ -170,6 +170,16 @@
   return false;
 }
 
+opt::BasicBlock::iterator GetIteratorForInstruction(
+    opt::BasicBlock* block, const opt::Instruction* inst) {
+  for (auto inst_it = block->begin(); inst_it != block->end(); ++inst_it) {
+    if (inst == &*inst_it) {
+      return inst_it;
+    }
+  }
+  return block->end();
+}
+
 opt::BasicBlock::iterator GetIteratorForBaseInstructionAndOffset(
     opt::BasicBlock* block, const opt::Instruction* base_inst,
     uint32_t offset) {
diff --git a/source/fuzz/fuzzer_util.h b/source/fuzz/fuzzer_util.h
index 890ceac..ec3f900 100644
--- a/source/fuzz/fuzzer_util.h
+++ b/source/fuzz/fuzzer_util.h
@@ -64,6 +64,11 @@
 bool BlockIsInLoopContinueConstruct(opt::IRContext* context, uint32_t block_id,
                                     uint32_t maybe_loop_header_id);
 
+// If |block| contains |inst|, an iterator for |inst| is returned.
+// Otherwise |block|->end() is returned.
+opt::BasicBlock::iterator GetIteratorForInstruction(
+    opt::BasicBlock* block, const opt::Instruction* inst);
+
 // Requires that |base_inst| is either the label instruction of |block| or an
 // instruction inside |block|.
 //
diff --git a/source/fuzz/protobufs/spvtoolsfuzz.proto b/source/fuzz/protobufs/spvtoolsfuzz.proto
index 1e4527c..f1ead09 100644
--- a/source/fuzz/protobufs/spvtoolsfuzz.proto
+++ b/source/fuzz/protobufs/spvtoolsfuzz.proto
@@ -491,13 +491,9 @@
 
   // A transformation that splits a basic block into two basic blocks
 
-  // The result id of an instruction
-  uint32 base_instruction_id = 1;
-
-  // An offset, such that the block containing |base_instruction_id| should be
-  // split right before the instruction |offset| instructions after
-  // |base_instruction_id|
-  uint32 offset = 2;
+  // A descriptor for an instruction such that the block containing the
+  // described instruction should be split right before the instruction.
+  InstructionDescriptor instruction_to_split_before = 1;
 
   // An id that must not yet be used by the module to which this transformation
   // is applied.  Rather than having the transformation choose a suitable id on
@@ -507,6 +503,6 @@
   // transformation, and if we end up changing what that id is, due to removing
   // earlier transformations, it may inhibit later transformations from
   // applying.
-  uint32 fresh_id = 3;
+  uint32 fresh_id = 2;
 
 }
diff --git a/source/fuzz/transformation_split_block.cpp b/source/fuzz/transformation_split_block.cpp
index a2da371..9f6da7c 100644
--- a/source/fuzz/transformation_split_block.cpp
+++ b/source/fuzz/transformation_split_block.cpp
@@ -17,6 +17,7 @@
 #include <utility>
 
 #include "source/fuzz/fuzzer_util.h"
+#include "source/fuzz/instruction_descriptor.h"
 #include "source/util/make_unique.h"
 
 namespace spvtools {
@@ -26,11 +27,10 @@
     const spvtools::fuzz::protobufs::TransformationSplitBlock& message)
     : message_(message) {}
 
-TransformationSplitBlock::TransformationSplitBlock(uint32_t base_instruction_id,
-                                                   uint32_t offset,
-                                                   uint32_t fresh_id) {
-  message_.set_base_instruction_id(base_instruction_id);
-  message_.set_offset(offset);
+TransformationSplitBlock::TransformationSplitBlock(
+    const protobufs::InstructionDescriptor& instruction_to_split_before,
+    uint32_t fresh_id) {
+  *message_.mutable_instruction_to_split_before() = instruction_to_split_before;
   message_.set_fresh_id(fresh_id);
 }
 
@@ -40,31 +40,28 @@
     // We require the id for the new block to be unused.
     return false;
   }
-  auto base_instruction =
-      context->get_def_use_mgr()->GetDef(message_.base_instruction_id());
-  if (!base_instruction) {
+  auto instruction_to_split_before =
+      FindInstruction(message_.instruction_to_split_before(), context);
+  if (!instruction_to_split_before) {
     // The instruction describing the block we should split does not exist.
     return false;
   }
-  auto block_containing_base_instruction =
-      context->get_instr_block(base_instruction);
-  if (!block_containing_base_instruction) {
-    // The instruction describing the block we should split is not contained in
-    // a block.
-    return false;
-  }
+  auto block_to_split = context->get_instr_block(instruction_to_split_before);
+  assert(block_to_split &&
+         "We should not have managed to find the "
+         "instruction if it was not contained in a block.");
 
-  if (block_containing_base_instruction->IsLoopHeader()) {
+  if (block_to_split->IsLoopHeader()) {
     // We cannot split a loop header block: back-edges would become invalid.
     return false;
   }
 
-  auto split_before = fuzzerutil::GetIteratorForBaseInstructionAndOffset(
-      block_containing_base_instruction, base_instruction, message_.offset());
-  if (split_before == block_containing_base_instruction->end()) {
-    // The offset was inappropriate.
-    return false;
-  }
+  auto split_before = fuzzerutil::GetIteratorForInstruction(
+      block_to_split, instruction_to_split_before);
+  assert(split_before != block_to_split->end() &&
+         "At this point we know the"
+         " block split point exists.");
+
   if (split_before->PreviousNode() &&
       split_before->PreviousNode()->opcode() == SpvOpSelectionMerge) {
     // We cannot split directly after a selection merge: this would separate
@@ -84,44 +81,39 @@
 
 void TransformationSplitBlock::Apply(opt::IRContext* context,
                                      FactManager* /*unused*/) const {
-  auto base_instruction =
-      context->get_def_use_mgr()->GetDef(message_.base_instruction_id());
-  assert(base_instruction && "Base instruction must exist");
-  auto block_containing_base_instruction =
-      context->get_instr_block(base_instruction);
-  assert(block_containing_base_instruction &&
-         "Base instruction must be in a block");
-  auto split_before = fuzzerutil::GetIteratorForBaseInstructionAndOffset(
-      block_containing_base_instruction, base_instruction, message_.offset());
-  assert(split_before != block_containing_base_instruction->end() &&
+  opt::Instruction* instruction_to_split_before =
+      FindInstruction(message_.instruction_to_split_before(), context);
+  opt::BasicBlock* block_to_split =
+      context->get_instr_block(instruction_to_split_before);
+  auto split_before = fuzzerutil::GetIteratorForInstruction(
+      block_to_split, instruction_to_split_before);
+  assert(split_before != block_to_split->end() &&
          "If the transformation is applicable, we should have an "
          "instruction to split on.");
+
   // We need to make sure the module's id bound is large enough to add the
   // fresh id.
   fuzzerutil::UpdateModuleIdBound(context, message_.fresh_id());
   // Split the block.
-  auto new_bb = block_containing_base_instruction->SplitBasicBlock(
-      context, message_.fresh_id(), split_before);
+  auto new_bb = block_to_split->SplitBasicBlock(context, message_.fresh_id(),
+                                                split_before);
   // The split does not automatically add a branch between the two parts of
   // the original block, so we add one.
-  block_containing_base_instruction->AddInstruction(
-      MakeUnique<opt::Instruction>(
-          context, SpvOpBranch, 0, 0,
-          std::initializer_list<opt::Operand>{
-              opt::Operand(spv_operand_type_t::SPV_OPERAND_TYPE_ID,
-                           {message_.fresh_id()})}));
+  block_to_split->AddInstruction(MakeUnique<opt::Instruction>(
+      context, SpvOpBranch, 0, 0,
+      std::initializer_list<opt::Operand>{opt::Operand(
+          spv_operand_type_t::SPV_OPERAND_TYPE_ID, {message_.fresh_id()})}));
   // If we split before OpPhi instructions, we need to update their
   // predecessor operand so that the block they used to be inside is now the
   // predecessor.
-  new_bb->ForEachPhiInst(
-      [block_containing_base_instruction](opt::Instruction* phi_inst) {
-        // The following assertion is a sanity check.  It is guaranteed to hold
-        // if IsApplicable holds.
-        assert(phi_inst->NumInOperands() == 2 &&
-               "We can only split a block before an OpPhi if block has exactly "
-               "one predecessor.");
-        phi_inst->SetInOperand(1, {block_containing_base_instruction->id()});
-      });
+  new_bb->ForEachPhiInst([block_to_split](opt::Instruction* phi_inst) {
+    // The following assertion is a sanity check.  It is guaranteed to hold
+    // if IsApplicable holds.
+    assert(phi_inst->NumInOperands() == 2 &&
+           "We can only split a block before an OpPhi if block has exactly "
+           "one predecessor.");
+    phi_inst->SetInOperand(1, {block_to_split->id()});
+  });
   // Invalidate all analyses
   context->InvalidateAnalysesExceptFor(opt::IRContext::Analysis::kAnalysisNone);
 }
diff --git a/source/fuzz/transformation_split_block.h b/source/fuzz/transformation_split_block.h
index 4a7095a..63dc7f5 100644
--- a/source/fuzz/transformation_split_block.h
+++ b/source/fuzz/transformation_split_block.h
@@ -28,8 +28,9 @@
   explicit TransformationSplitBlock(
       const protobufs::TransformationSplitBlock& message);
 
-  TransformationSplitBlock(uint32_t base_instruction_id, uint32_t offset,
-                           uint32_t fresh_id);
+  TransformationSplitBlock(
+      const protobufs::InstructionDescriptor& instruction_to_split_before,
+      uint32_t fresh_id);
 
   // - |message_.base_instruction_id| must be the result id of an instruction
   //   'base' in some block 'blk'.
diff --git a/test/fuzz/transformation_split_block_test.cpp b/test/fuzz/transformation_split_block_test.cpp
index dc6d4d0..d162e07 100644
--- a/test/fuzz/transformation_split_block_test.cpp
+++ b/test/fuzz/transformation_split_block_test.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "source/fuzz/transformation_split_block.h"
+#include "source/fuzz/instruction_descriptor.h"
 #include "test/fuzz/fuzz_test_util.h"
 
 namespace spvtools {
@@ -90,44 +91,55 @@
   FactManager fact_manager;
 
   // No split before OpVariable
-  ASSERT_FALSE(TransformationSplitBlock(8, 0, 100).IsApplicable(context.get(),
-                                                                fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(8, 1, 100).IsApplicable(context.get(),
-                                                                fact_manager));
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(8, SpvOpVariable, 0), 100)
+                   .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(8, SpvOpVariable, 1), 100)
+                   .IsApplicable(context.get(), fact_manager));
 
   // No split before OpLabel
-  ASSERT_FALSE(TransformationSplitBlock(14, 0, 100)
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(14, SpvOpLabel, 0), 100)
                    .IsApplicable(context.get(), fact_manager));
 
   // No split if base instruction is outside a function
-  ASSERT_FALSE(TransformationSplitBlock(1, 0, 100).IsApplicable(context.get(),
-                                                                fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(1, 4, 100).IsApplicable(context.get(),
-                                                                fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(1, 35, 100)
+  ASSERT_FALSE(
+      TransformationSplitBlock(MakeInstructionDescriptor(1, SpvOpLabel, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(1, SpvOpExecutionMode, 0), 100)
                    .IsApplicable(context.get(), fact_manager));
 
   // No split if block is loop header
-  ASSERT_FALSE(TransformationSplitBlock(27, 0, 100)
-                   .IsApplicable(context.get(), fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(27, 1, 100)
-                   .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(
+      TransformationSplitBlock(MakeInstructionDescriptor(27, SpvOpPhi, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(
+      TransformationSplitBlock(MakeInstructionDescriptor(27, SpvOpPhi, 1), 100)
+          .IsApplicable(context.get(), fact_manager));
 
   // No split if base instruction does not exist
-  ASSERT_FALSE(TransformationSplitBlock(88, 0, 100)
-                   .IsApplicable(context.get(), fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(88, 22, 100)
+  ASSERT_FALSE(
+      TransformationSplitBlock(MakeInstructionDescriptor(88, SpvOpIAdd, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(88, SpvOpIMul, 22), 100)
                    .IsApplicable(context.get(), fact_manager));
 
-  // No split if offset is too large (goes into another block)
-  ASSERT_FALSE(TransformationSplitBlock(18, 3, 100)
-                   .IsApplicable(context.get(), fact_manager));
+  // No split if too many instructions with the desired opcode are skipped
+  ASSERT_FALSE(
+      TransformationSplitBlock(
+          MakeInstructionDescriptor(18, SpvOpBranchConditional, 1), 100)
+          .IsApplicable(context.get(), fact_manager));
 
   // No split if id in use
-  ASSERT_FALSE(TransformationSplitBlock(18, 0, 27).IsApplicable(context.get(),
-                                                                fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(18, 0, 14).IsApplicable(context.get(),
-                                                                fact_manager));
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(18, SpvOpSLessThan, 0), 27)
+                   .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(18, SpvOpSLessThan, 0), 14)
+                   .IsApplicable(context.get(), fact_manager));
 }
 
 TEST(TransformationSplitBlockTest, SplitBlockSeveralTimes) {
@@ -188,7 +200,8 @@
 
   FactManager fact_manager;
 
-  auto split_1 = TransformationSplitBlock(5, 3, 100);
+  auto split_1 = TransformationSplitBlock(
+      MakeInstructionDescriptor(5, SpvOpStore, 0), 100);
   ASSERT_TRUE(split_1.IsApplicable(context.get(), fact_manager));
   split_1.Apply(context.get(), &fact_manager);
   ASSERT_TRUE(IsValid(env, context.get()));
@@ -235,7 +248,8 @@
   )";
   ASSERT_TRUE(IsEqual(env, after_split_1, context.get()));
 
-  auto split_2 = TransformationSplitBlock(11, 1, 101);
+  auto split_2 = TransformationSplitBlock(
+      MakeInstructionDescriptor(11, SpvOpStore, 0), 101);
   ASSERT_TRUE(split_2.IsApplicable(context.get(), fact_manager));
   split_2.Apply(context.get(), &fact_manager);
   ASSERT_TRUE(IsValid(env, context.get()));
@@ -284,7 +298,8 @@
   )";
   ASSERT_TRUE(IsEqual(env, after_split_2, context.get()));
 
-  auto split_3 = TransformationSplitBlock(14, 0, 102);
+  auto split_3 = TransformationSplitBlock(
+      MakeInstructionDescriptor(14, SpvOpLoad, 0), 102);
   ASSERT_TRUE(split_3.IsApplicable(context.get(), fact_manager));
   split_3.Apply(context.get(), &fact_manager);
   ASSERT_TRUE(IsValid(env, context.get()));
@@ -399,12 +414,17 @@
   FactManager fact_manager;
 
   // Illegal to split between the merge and the conditional branch.
-  ASSERT_FALSE(TransformationSplitBlock(14, 2, 100)
-                   .IsApplicable(context.get(), fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(12, 3, 100)
-                   .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(
+      TransformationSplitBlock(
+          MakeInstructionDescriptor(14, SpvOpBranchConditional, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(
+      TransformationSplitBlock(
+          MakeInstructionDescriptor(12, SpvOpBranchConditional, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
 
-  auto split = TransformationSplitBlock(14, 1, 100);
+  auto split = TransformationSplitBlock(
+      MakeInstructionDescriptor(14, SpvOpSelectionMerge, 0), 100);
   ASSERT_TRUE(split.IsApplicable(context.get(), fact_manager));
   split.Apply(context.get(), &fact_manager);
   ASSERT_TRUE(IsValid(env, context.get()));
@@ -523,12 +543,15 @@
   FactManager fact_manager;
 
   // Illegal to split between the merge and the conditional branch.
-  ASSERT_FALSE(TransformationSplitBlock(9, 2, 100).IsApplicable(context.get(),
-                                                                fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(15, 3, 100)
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(9, SpvOpSwitch, 0), 100)
+                   .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(TransformationSplitBlock(
+                   MakeInstructionDescriptor(15, SpvOpSwitch, 0), 100)
                    .IsApplicable(context.get(), fact_manager));
 
-  auto split = TransformationSplitBlock(9, 1, 100);
+  auto split = TransformationSplitBlock(
+      MakeInstructionDescriptor(9, SpvOpSelectionMerge, 0), 100);
   ASSERT_TRUE(split.IsApplicable(context.get(), fact_manager));
   split.Apply(context.get(), &fact_manager);
   ASSERT_TRUE(IsValid(env, context.get()));
@@ -654,12 +677,15 @@
 
   // We cannot split before OpPhi instructions, since the number of incoming
   // blocks may not appropriately match after splitting.
-  ASSERT_FALSE(TransformationSplitBlock(26, 0, 100)
-                   .IsApplicable(context.get(), fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(27, 0, 100)
-                   .IsApplicable(context.get(), fact_manager));
-  ASSERT_FALSE(TransformationSplitBlock(27, 1, 100)
-                   .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(
+      TransformationSplitBlock(MakeInstructionDescriptor(26, SpvOpPhi, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(
+      TransformationSplitBlock(MakeInstructionDescriptor(27, SpvOpPhi, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
+  ASSERT_FALSE(
+      TransformationSplitBlock(MakeInstructionDescriptor(27, SpvOpPhi, 1), 100)
+          .IsApplicable(context.get(), fact_manager));
 }
 
 TEST(TransformationSplitBlockTest, SplitOpPhiWithSinglePredecessor) {
@@ -701,9 +727,13 @@
 
   FactManager fact_manager;
 
-  ASSERT_TRUE(TransformationSplitBlock(21, 0, 100)
-                  .IsApplicable(context.get(), fact_manager));
-  auto split = TransformationSplitBlock(20, 1, 100);
+  ASSERT_TRUE(
+      TransformationSplitBlock(MakeInstructionDescriptor(21, SpvOpPhi, 0), 100)
+          .IsApplicable(context.get(), fact_manager));
+  // An equivalent transformation to the above, just described with respect to a
+  // different base instruction.
+  auto split =
+      TransformationSplitBlock(MakeInstructionDescriptor(20, SpvOpPhi, 0), 100);
   ASSERT_TRUE(split.IsApplicable(context.get(), fact_manager));
   split.Apply(context.get(), &fact_manager);
   ASSERT_TRUE(IsValid(env, context.get()));