Add fetch tool for retrieving Chromium's custom-built Rust toolchain

Adapt the approach [1] that Chromium uses for fetching its Rust
toolchain to Skia. Take update_rust.py and the minimally required helper
functions from Chromium's clang update script to fetch a specific, fully
functional and self-contained Rust build from Chromium's CDS bucket. In
these buckets, Chromium stores its Clang and Rust toolchain builds.

Maintaining and building a tip-of-tree up-to-date rust toolchain
requires a level of constant effort that can be avoided on the Skia side
if we rely on Chromium's work here.

Currently, this script needs manual rolling to the respective latest
build in [2]. Issue skia:14191 tracks changing that so that the script
is upgraded to be able to automatically roll to the latest known-good
revision.

[1] https://source.chromium.org/chromium/chromium/src/+/main:tools/rust/update_rust.py
[2] https://commondatastorage.googleapis.com/chromium-browser-clang/index.html?path=Linux_x64/rust-toolchain-

Bug: skia:14185
Change-Id: I66657a4943251b15d1c2ce69044bbfbb5403b4d4
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/657157
Reviewed-by: Kevin Lubick <kjlubick@google.com>
Commit-Queue: Dominik Röttsches <drott@google.com>
diff --git a/.gitignore b/.gitignore
index 13a92b5..90cfd19 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,6 +51,7 @@
 bin/sk.exe
 bin/sk.version
 third_party/ninja
+third_party/rust-toolchain
 bin/ninja.exe
 bin/ninja.version
 
diff --git a/bin/clang_update.py b/bin/clang_update.py
new file mode 100644
index 0000000..9562e4f
--- /dev/null
+++ b/bin/clang_update.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""
+Bits and pieces of Chromium's [1] tools/clang/scripts/update.py needed
+for fetching Rust toolchain, see fetch-rust-toolchain.
+
+[1] https://source.chromium.org/chromium/chromium/src/+/main:tools/clang/scripts/update.py
+
+"""
+
+import argparse
+import os
+import platform
+import shutil
+import stat
+import tarfile
+import tempfile
+import time
+import urllib.request
+import urllib.error
+import zipfile
+import zlib
+import sys
+
+
+CDS_URL = os.environ.get(
+    "CDS_CLANG_BUCKET_OVERRIDE",
+    "https://commondatastorage.googleapis.com/chromium-browser-clang",
+)
+
+
+def EnsureDirExists(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+
+def DownloadAndUnpack(url, output_dir, path_prefixes=None):
+    """Download an archive from url and extract into output_dir. If path_prefixes
+    is not None, only extract files whose paths within the archive start with
+    any prefix in path_prefixes."""
+    with tempfile.TemporaryFile() as f:
+        DownloadUrl(url, f)
+        f.seek(0)
+        EnsureDirExists(output_dir)
+        if url.endswith(".zip"):
+            assert path_prefixes is None
+            zipfile.ZipFile(f).extractall(path=output_dir)
+        else:
+            t = tarfile.open(mode="r:*", fileobj=f)
+            members = None
+            if path_prefixes is not None:
+                members = [
+                    m
+                    for m in t.getmembers()
+                    if any(m.name.startswith(p) for p in path_prefixes)
+                ]
+            t.extractall(path=output_dir, members=members)
+
+
+def GetPlatformUrlPrefix(host_os):
+    _HOST_OS_URL_MAP = {
+        "linux": "Linux_x64",
+        "mac": "Mac",
+        "mac-arm64": "Mac_arm64",
+        "win": "Win",
+    }
+    return CDS_URL + "/" + _HOST_OS_URL_MAP[host_os] + "/"
+
+
+def DownloadUrl(url, output_file):
+    """Download url into output_file."""
+    CHUNK_SIZE = 4096
+    TOTAL_DOTS = 10
+    num_retries = 3
+    retry_wait_s = 5  # Doubled at each retry.
+
+    while True:
+        try:
+            sys.stdout.write("Downloading %s " % url)
+            sys.stdout.flush()
+            request = urllib.request.Request(url)
+            request.add_header("Accept-Encoding", "gzip")
+            response = urllib.request.urlopen(request)
+            total_size = None
+            if "Content-Length" in response.headers:
+                total_size = int(response.headers["Content-Length"].strip())
+
+            is_gzipped = response.headers.get("Content-Encoding", "").strip() == "gzip"
+            if is_gzipped:
+                gzip_decode = zlib.decompressobj(zlib.MAX_WBITS + 16)
+
+            bytes_done = 0
+            dots_printed = 0
+            while True:
+                chunk = response.read(CHUNK_SIZE)
+                if not chunk:
+                    break
+                bytes_done += len(chunk)
+
+                if is_gzipped:
+                    chunk = gzip_decode.decompress(chunk)
+                output_file.write(chunk)
+
+                if total_size is not None:
+                    num_dots = TOTAL_DOTS * bytes_done // total_size
+                    sys.stdout.write("." * (num_dots - dots_printed))
+                    sys.stdout.flush()
+                    dots_printed = num_dots
+            if total_size is not None and bytes_done != total_size:
+                raise urllib.error.URLError(
+                    "only got %d of %d bytes" % (bytes_done, total_size)
+                )
+            if is_gzipped:
+                output_file.write(gzip_decode.flush())
+            print(" Done.")
+            return
+        except urllib.error.URLError as e:
+            sys.stdout.write("\n")
+            print(e)
+            if (
+                num_retries == 0
+                or isinstance(e, urllib.error.HTTPError)
+                and e.code == 404
+            ):
+                raise e
+            num_retries -= 1
+            output_file.seek(0)
+            output_file.truncate()
+            print("Retrying in %d s ..." % retry_wait_s)
+            sys.stdout.flush()
+            time.sleep(retry_wait_s)
+            retry_wait_s *= 2
+
+
+def GetDefaultHostOs():
+    _PLATFORM_HOST_OS_MAP = {
+        "darwin": "mac",
+        "cygwin": "win",
+        "linux2": "linux",
+        "win32": "win",
+    }
+    default_host_os = _PLATFORM_HOST_OS_MAP.get(sys.platform, sys.platform)
+    if default_host_os == "mac" and platform.machine() == "arm64":
+        default_host_os = "mac-arm64"
+    return default_host_os
diff --git a/bin/fetch-rust-toolchain b/bin/fetch-rust-toolchain
new file mode 100755
index 0000000..9b423e8
--- /dev/null
+++ b/bin/fetch-rust-toolchain
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Update in-tree checkout of Rust toolchain
+
+Minimal Skia adapation of [1] to fetch a Rust revision manually
+specified in MANUAL_REVISION below. Frequently roll this to latest
+version available in [2].
+
+[1] https://source.chromium.org/chromium/chromium/src/+/main:tools/rust/update_rust.py
+[2] https://commondatastorage.googleapis.com/chromium-browser-clang/index.html?path=Linux_x64/rust-toolchain-
+
+"""
+
+import argparse
+import os
+import re
+import shutil
+import sys
+import tempfile
+import urllib
+
+from pathlib import Path
+
+# Chromium's Rust builds (for Linux) that worked are found at:
+# https://commondatastorage.googleapis.com/chromium-browser-clang/index.html?path=Linux_x64/rust-toolchain-
+# The latest builds are prefixed with a date, such as `20230101-1`.
+# To update, roll this to versions that Chromium's tools/rust/build-rust.py has produced and which are found from the CDS url.
+MANUAL_REVISION = (
+    "ac4379fea9e83465d814bb05005689f49bd2141e-1-llvmorg-17-init-3874-g93a2fecc-1"
+)
+
+THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+SKIA_DIR = os.path.abspath(os.path.join(THIS_DIR, ".."))
+THIRD_PARTY_DIR = os.path.join(SKIA_DIR, "third_party")
+RUST_TOOLCHAIN_OUT_DIR = os.path.join(THIRD_PARTY_DIR, "rust-toolchain")
+VERSION_STAMP_PATH = os.path.join(RUST_TOOLCHAIN_OUT_DIR, "VERSION")
+
+
+def GetDownloadPackageVersion():
+    # TODO(https://crbug.com/14191): This is hardcoded in Skia right
+    # now, as Skia does not rebuild rust on its own and can't directly
+    # access the most recent version that's been built for
+    # Chromium. Could the Chromium side-build publish something like a
+    # symbolic link to last-known-good?
+    return MANUAL_REVISION
+
+
+# Get the version of the toolchain package we already have.
+def GetStampVersion():
+    if os.path.exists(RUST_TOOLCHAIN_OUT_DIR):
+        with open(VERSION_STAMP_PATH) as version_file:
+            existing_stamp = version_file.readline().rstrip()
+        version_re = re.compile(r"rustc [0-9.]+ [0-9a-f]+ \((.+?) chromium\)")
+        match = version_re.fullmatch(existing_stamp)
+        if match is None:
+            return None
+        return match.group(1)
+
+    return None
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Update Rust package")
+    parser.add_argument(
+        "--print-package-version",
+        action="store_true",
+        help="Print Rust package version (including both the "
+        "Rust and Clang revisions) and quit.",
+    )
+    args = parser.parse_args()
+
+    if args.print_package_version:
+        print(GetDownloadPackageVersion())
+        return 0
+
+    from clang_update import DownloadAndUnpack, GetDefaultHostOs, GetPlatformUrlPrefix
+
+    # Exit early if the existing package is up-to-date. Note that we cannot
+    # simply call DownloadAndUnpack() every time: aside from unnecessarily
+    # downloading the toolchain if it hasn't changed, it also leads to multiple
+    # versions of the same rustlibs. build/rust/std/find_std_rlibs.py chokes in
+    # this case.
+    if os.path.exists(RUST_TOOLCHAIN_OUT_DIR):
+        if GetDownloadPackageVersion() == GetStampVersion():
+            return 0
+
+    if os.path.exists(RUST_TOOLCHAIN_OUT_DIR):
+        shutil.rmtree(RUST_TOOLCHAIN_OUT_DIR)
+
+    try:
+        host_os = GetDefaultHostOs()
+        # TODO(https://crbug.com/skia/14190): Enable this on additional
+        # platforms.
+        if not "linux" in host_os:
+            print(
+                "Unsupported platform, Rust support only available on Linux "
+                "at the moment, see https://crbug.com/skia/14190"
+            )
+            return 1
+        platform_prefix = GetPlatformUrlPrefix(host_os)
+        version = GetDownloadPackageVersion()
+        url = f"{platform_prefix}rust-toolchain-{version}.tgz"
+        DownloadAndUnpack(url, THIRD_PARTY_DIR)
+    except urllib.error.HTTPError as e:
+        # Fail softly for now. This can happen if a Rust package was not
+        # produced, e.g. if the Rust build failed upon a Clang update, or if a
+        # Rust roll and a Clang roll raced against each other.
+        #
+        # TODO(https://crbug.com/1245714): Reconsider how to handle this.
+        print(f"warning: could not download Rust package")
+
+    # Ensure the newly extracted package has the correct version.
+    assert GetDownloadPackageVersion() == GetStampVersion()
+
+
+if __name__ == "__main__":
+    sys.exit(main())