Add spirv-dump tool (#5146)

* dump: add tool skeleton.

This tool aims to be used a bit like objdump (hence the name).
Allowing the user to dump some info from a spirv-binary.

* add test structure for spirv-dump

* add spirv-dump to bazel build file

* fix licenses

* fix compilation with ubsan

* remove fdiagnostics

* rename dump to objdump

* move tests to test/tools

* rename dump to objdump for bazel
diff --git a/BUILD.bazel b/BUILD.bazel
index 141509d..0afcfa9 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -305,6 +305,23 @@
 )
 
 cc_binary(
+    name = "spirv-objdump",
+    srcs = [
+        "tools/objdump/objdump.cpp",
+        "tools/objdump/extract_source.cpp",
+        "tools/objdump/extract_source.h",
+    ],
+    copts = COMMON_COPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":tools_io",
+        ":tools_util",
+        ":spirv_tools_internal",
+        ":spirv_tools_opt_internal",
+    ],
+)
+
+cc_binary(
     name = "spirv-val",
     srcs = [
         "tools/val/val.cpp",
diff --git a/test/tools/CMakeLists.txt b/test/tools/CMakeLists.txt
index 0520bd7..4898e57 100644
--- a/test/tools/CMakeLists.txt
+++ b/test/tools/CMakeLists.txt
@@ -26,3 +26,4 @@
   DEFINES TESTING=1)
 
 add_subdirectory(opt)
+add_subdirectory(objdump)
diff --git a/test/tools/objdump/CMakeLists.txt b/test/tools/objdump/CMakeLists.txt
new file mode 100644
index 0000000..46fae21
--- /dev/null
+++ b/test/tools/objdump/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright (c) 2023 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_spvtools_unittest(
+  TARGET spirv_unit_test_tools_objdump
+  SRCS
+    extract_source_test.cpp
+    ${spirv-tools_SOURCE_DIR}/tools/util/flags.cpp
+    ${spirv-tools_SOURCE_DIR}/tools/util/cli_consumer.cpp
+    ${spirv-tools_SOURCE_DIR}/tools/objdump/extract_source.cpp
+    LIBS ${SPIRV_TOOLS_FULL_VISIBILITY} SPIRV-Tools-opt
+  DEFINES TESTING=1)
diff --git a/test/tools/objdump/extract_source_test.cpp b/test/tools/objdump/extract_source_test.cpp
new file mode 100644
index 0000000..3fe633b
--- /dev/null
+++ b/test/tools/objdump/extract_source_test.cpp
@@ -0,0 +1,63 @@
+// Copyright (c) 2023 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tools/objdump/extract_source.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+#include "source/opt/build_module.h"
+#include "source/opt/ir_context.h"
+#include "spirv-tools/libspirv.hpp"
+#include "tools/util/cli_consumer.h"
+
+namespace {
+
+constexpr auto kDefaultEnvironment = SPV_ENV_UNIVERSAL_1_6;
+
+std::pair<bool, std::unordered_map<std::string, std::string>> extractSource(
+    const std::string& spv_source) {
+  std::unique_ptr<spvtools::opt::IRContext> ctx = spvtools::BuildModule(
+      kDefaultEnvironment, spvtools::utils::CLIMessageConsumer, spv_source,
+      spvtools::SpirvTools::kDefaultAssembleOption |
+          SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+  std::vector<uint32_t> binary;
+  ctx->module()->ToBinary(&binary, /* skip_nop = */ false);
+  std::unordered_map<std::string, std::string> output;
+  bool result = extract_source_from_module(binary, &output);
+  return std::make_pair(result, std::move(output));
+}
+
+}  // namespace
+
+TEST(ExtractSourceTest, no_debug) {
+  std::string source = R"(
+           OpCapability Shader
+           OpCapability Linkage
+           OpMemoryModel Logical GLSL450
+   %void = OpTypeVoid
+      %2 = OpTypeFunction %void
+   %bool = OpTypeBool
+      %4 = OpUndef %bool
+      %5 = OpFunction %void None %2
+      %6 = OpLabel
+           OpReturn
+           OpFunctionEnd
+  )";
+
+  auto[success, result] = extractSource(source);
+  ASSERT_TRUE(success);
+  ASSERT_TRUE(result.size() == 0);
+}
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 6bf7a11..4644a52 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -16,6 +16,7 @@
  add_subdirectory(lesspipe)
 endif()
 add_subdirectory(emacs)
+#add_subdirectory(objdump)
 
 # Add a SPIR-V Tools command line tool. Signature:
 #   add_spvtools_tool(
@@ -65,8 +66,18 @@
                     LIBS ${SPIRV_TOOLS_FULL_VISIBILITY})
   target_include_directories(spirv-cfg PRIVATE ${spirv-tools_SOURCE_DIR}
                                                ${SPIRV_HEADER_INCLUDE_DIR})
+
+  add_spvtools_tool(TARGET spirv-objdump
+                    SRCS objdump/objdump.cpp
+                         objdump/extract_source.cpp
+                         util/cli_consumer.cpp
+                         ${COMMON_TOOLS_SRCS}
+                    LIBS ${SPIRV_TOOLS_FULL_VISIBILITY})
+  target_include_directories(spirv-objdump PRIVATE ${spirv-tools_SOURCE_DIR}
+                                                   ${SPIRV_HEADER_INCLUDE_DIR})
+
   set(SPIRV_INSTALL_TARGETS spirv-as spirv-dis spirv-val spirv-opt
-                            spirv-cfg spirv-link spirv-lint)
+                            spirv-cfg spirv-link spirv-lint spirv-objdump)
   if(NOT (${CMAKE_SYSTEM_NAME} STREQUAL "iOS"))
     set(SPIRV_INSTALL_TARGETS ${SPIRV_INSTALL_TARGETS} spirv-reduce)
   endif()
diff --git a/tools/objdump/extract_source.cpp b/tools/objdump/extract_source.cpp
new file mode 100644
index 0000000..3722cf1
--- /dev/null
+++ b/tools/objdump/extract_source.cpp
@@ -0,0 +1,56 @@
+// Copyright (c) 2023 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "extract_source.h"
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "source/opt/log.h"
+#include "spirv-tools/libspirv.hpp"
+#include "tools/util/cli_consumer.h"
+
+namespace {
+constexpr auto kDefaultEnvironment = SPV_ENV_UNIVERSAL_1_6;
+}  // namespace
+
+bool extract_source_from_module(
+    const std::vector<uint32_t>& binary,
+    std::unordered_map<std::string, std::string>* output) {
+  auto context = spvtools::SpirvTools(kDefaultEnvironment);
+  context.SetMessageConsumer(spvtools::utils::CLIMessageConsumer);
+
+  spvtools::HeaderParser headerParser =
+      [](const spv_endianness_t endianess,
+         const spv_parsed_header_t& instruction) {
+        (void)endianess;
+        (void)instruction;
+        return SPV_SUCCESS;
+      };
+
+  spvtools::InstructionParser instructionParser =
+      [](const spv_parsed_instruction_t& instruction) {
+        (void)instruction;
+        return SPV_SUCCESS;
+      };
+
+  if (!context.Parse(binary, headerParser, instructionParser)) {
+    return false;
+  }
+
+  // FIXME
+  (void)output;
+  return true;
+}
diff --git a/tools/objdump/extract_source.h b/tools/objdump/extract_source.h
new file mode 100644
index 0000000..1a8af21
--- /dev/null
+++ b/tools/objdump/extract_source.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2023 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef INCLUDE_SPIRV_TOOLS_EXTRACT_SOURCE_HPP_
+#define INCLUDE_SPIRV_TOOLS_EXTRACT_SOURCE_HPP_
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+// Parse a SPIR-V module, and extracts all HLSL source code from it.
+// This function doesn't lift the SPIR-V code, but only relies on debug symbols.
+// This means if the compiler didn't include some files, they won't show up.
+//
+// Returns a map of <filename, source_code> extracted from it.
+// - `binary`: a vector containing the whole SPIR-V binary to extract source
+// from.
+// - `output`: <filename, source_code> mapping, mapping each filename
+//            (if defined) to its code.
+//
+// Returns `true` if the extraction succeeded, `false` otherwise.
+// `output` value is undefined if `false` is returned.
+bool extract_source_from_module(
+    const std::vector<uint32_t>& binary,
+    std::unordered_map<std::string, std::string>* output);
+
+#endif  // INCLUDE_SPIRV_TOOLS_EXTRACT_SOURCE_HPP_
diff --git a/tools/objdump/objdump.cpp b/tools/objdump/objdump.cpp
new file mode 100644
index 0000000..520ff19
--- /dev/null
+++ b/tools/objdump/objdump.cpp
@@ -0,0 +1,97 @@
+// Copyright (c) 2023 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "extract_source.h"
+#include "source/opt/log.h"
+#include "tools/io.h"
+#include "tools/util/cli_consumer.h"
+#include "tools/util/flags.h"
+
+namespace {
+
+constexpr auto kHelpTextFmt =
+    R"(%s - Dumps information from a SPIR-V binary.
+
+Usage: %s [options] <filename>
+
+one of the following switches must be given:
+  --source        Extract source files obtained from debug symbols, output to stdout.
+  --entrypoint    Extracts the entrypoint name of the module, output to stdout.
+  --compiler-cmd  Extracts the command line used to compile this module, output to stdout.
+
+
+General options:
+  -h, --help      Print this help.
+  --version       Display assembler version information.
+  -f,--force      Allow output file overwrite.
+
+Source dump options:
+  --list          Do not extract source code, only print filenames to stdout.
+  --outdir        Where shall the exrtacted HLSL/HLSL files be written to?
+                  File written to stdout if '-' is given. Default is `-`.
+)";
+
+}  // namespace
+
+// clang-format off
+FLAG_SHORT_bool(  h,            /* default_value= */ false, /* required= */ false);
+FLAG_LONG_bool(   help,         /* default_value= */ false, /* required= */ false);
+FLAG_LONG_bool(   version,      /* default_value= */ false, /* required= */ false);
+FLAG_LONG_bool(   source,       /* default_value= */ false, /* required= */ false);
+FLAG_LONG_bool(   entrypoint,   /* default_value= */ false, /* required= */ false);
+FLAG_LONG_bool(   compiler_cmd, /* default_value= */ false, /* required= */ false);
+FLAG_SHORT_bool(  f,            /* default_value= */ false, /* required= */ false);
+FLAG_LONG_bool(   force,        /* default_value= */ false, /* required= */ false);
+FLAG_LONG_string( outdir,       /* default_value= */ "-",   /* required= */ false);
+FLAG_LONG_bool(   list,         /* default_value= */ false, /* required= */ false);
+// clang-format on
+
+int main(int, const char** argv) {
+  if (!flags::Parse(argv)) {
+    return 1;
+  }
+  if (flags::h.value() || flags::help.value()) {
+    printf(kHelpTextFmt, argv[0], argv[0]);
+    return 0;
+  }
+  if (flags::version.value()) {
+    printf("%s\n", spvSoftwareVersionDetailsString());
+    return 0;
+  }
+
+  if (flags::positional_arguments.size() != 1) {
+    spvtools::Error(spvtools::utils::CLIMessageConsumer, nullptr, {},
+                    "expected exactly one input file.");
+    return 1;
+  }
+  if (flags::source.value() || flags::entrypoint.value() ||
+      flags::compiler_cmd.value()) {
+    spvtools::Error(spvtools::utils::CLIMessageConsumer, nullptr, {},
+                    "not implemented yet.");
+    return 1;
+  }
+
+  std::vector<uint32_t> binary;
+  if (!ReadBinaryFile(flags::positional_arguments[0].c_str(), &binary)) {
+    return 1;
+  }
+
+  if (flags::source.value()) {
+    std::unordered_map<std::string, std::string> output;
+    return extract_source_from_module(binary, &output) ? 0 : 1;
+  }
+
+  // FIXME: implement logic.
+  return 0;
+}