tests/scripts/download-test-fonts.sh rewrite in Python3

Replaces download-test-fonts.sh with download-test-fonts.py
which does the same work, and also avoids downloading anything
if the files are already installed with the right content.

Now uses the first 8 byte of each file's sha256 hash for the
digest.
diff --git a/ChangeLog b/ChangeLog
index 751fdd4..790ef56 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2021-07-15  David Turner  <david@freetype.org>
+
+	Replaces download-test-fonts.sh with download-test-fonts.py which
+	does the same work, and also avoids downloading anything if the
+	files are already installed with the right content.
+
+	Now uses the first 8 byte of each file's sha256 hash for the digest.
+
+	* tests/scripts/download-test-fonts.sh: Removed
+	* tests/scripts/download-test-fonts.py: New script
+	* tests/README.md: Updated
+
 2021-07-15  Alex Richardson  <Alexander.Richardson@cl.cam.ac.uk>
 
 	Support architectures where `long` is smaller than pointers.
diff --git a/tests/README.md b/tests/README.md
index f6f8611..0d0b99a 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -4,7 +4,7 @@
 
 ### Download test fonts
 
-Run the `tests/scripts/download-fonts.sh` script, which will
+Run the `tests/scripts/download-fonts.py` script, which will
 download test fonts to the `tests/data/` directory first.
 
 ### Build the test programs
diff --git a/tests/scripts/download-test-fonts.py b/tests/scripts/download-test-fonts.py
new file mode 100755
index 0000000..cab133d
--- /dev/null
+++ b/tests/scripts/download-test-fonts.py
@@ -0,0 +1,293 @@
+#!/usr/bin/env python3
+
+"""Download test fonts used by the FreeType regression test programs.
+These will be copied to $FREETYPE/tests/data/ by default.
+"""
+
+import argparse
+import collections
+import hashlib
+import io
+import os
+import requests
+import sys
+import zipfile
+
+from typing import Callable, List, Optional, Tuple
+
+# The list of download items describing the font files to install.
+# Each download item is a dictionary with one of the following schemas:
+#
+# - File item:
+#
+#      file_url
+#        Type: URL string.
+#        Required: Yes.
+#        Description: URL to download the file from.
+#
+#      install_name
+#        Type: file name string
+#        Required: No
+#        Description: Installation name for the font file, only provided if it
+#          must be different from the original URL's basename.
+#
+#      hex_digest
+#        Type: hexadecimal string
+#        Required: No
+#        Description: Digest of the input font file.
+#
+# - Zip items:
+#
+#   These items correspond to one or more font files that are embedded in a
+#   remote zip archive. Each entry has the following fields:
+#
+#      zip_url
+#        Type: URL string.
+#        Required: Yes.
+#        Description: URL to download the zip archive from.
+#
+#      zip_files
+#        Type: List of file entries (see below)
+#        Required: Yes
+#        Description: A list of entries describing a single font file to be
+#          extracted from the archive
+#
+# Apart from that, some schemas are used for dictionaries used inside download
+# items:
+#
+# - File entries:
+#
+#   These are dictionaries describing a single font file to extract from an archive.
+#
+#      filename
+#        Type: file path string
+#        Required: Yes
+#        Description: Path of source file, relative to the archive's top-level directory.
+#
+#      install_name
+#        Type: file name string
+#        Required: No
+#        Description: Installation name for the font file, only provided if it must be
+#          different from the original filename value.
+#
+#      hex_digest
+#        Type: hexadecimal string
+#        Required: No
+#        Description: Digest of the input source file
+#
+_DOWNLOAD_ITEMS = [
+    {
+        "zip_url": "https://github.com/python-pillow/Pillow/files/6622147/As.I.Lay.Dying.zip",
+        "zip_files": [
+            {
+                "filename": "As I Lay Dying.ttf",
+                "install_name": "As.I.Lay.Dying.ttf",
+                "hex_digest": "ef146bbc2673b387",
+            },
+        ],
+    },
+]
+
+
+def digest_data(data: bytes):
+    """Compute the digest of a given input byte string, which are the first 8 bytes of its sha256 hash."""
+    m = hashlib.sha256()
+    m.update(data)
+    return m.digest()[:8]
+
+
+def check_existing(path: str, hex_digest: str):
+    """Return True if |path| exists and matches |hex_digest|."""
+    if not os.path.exists(path) or hex_digest is None:
+        return False
+
+    with open(path, "rb") as f:
+        existing_content = f.read()
+
+    return bytes.fromhex(hex_digest) == digest_data(existing_content)
+
+
+def install_file(content: bytes, dest_path: str):
+    """Write a byte string to a given destination file.
+
+    Args:
+      content: Input data, as a byte string
+      dest_path: Installation path
+    """
+    parent_path = os.path.dirname(dest_path)
+    if not os.path.exists(parent_path):
+        os.makedirs(parent_path)
+
+    with open(dest_path, "wb") as f:
+        f.write(content)
+
+
+def download_file(url: str, expected_digest: Optional[bytes] = None):
+    """Download a file from a given URL.
+
+    Args:
+      url: Input URL
+      expected_digest: Optional digest of the file
+        as a byte string
+    Returns:
+      URL content as binary string.
+    """
+    r = requests.get(url, allow_redirects=True)
+    content = r.content
+    if expected_digest is not None:
+        digest = digest_data(r.content)
+        if digest != expected_digest:
+            raise ValueError(
+                "%s has invalid digest %s (expected %s)"
+                % (url, digest.hex(), expected_digest.hex())
+            )
+
+    return content
+
+
+def extract_file_from_zip_archive(
+    archive: zipfile.ZipFile,
+    archive_name: str,
+    filepath: str,
+    expected_digest: Optional[bytes] = None,
+):
+    """Extract a file from a given zipfile.ZipFile archive.
+
+    Args:
+      archive: Input ZipFile objec.
+      archive_name: Archive name or URL, only used to generate a human-readable error
+        message.
+      filepath: Input filepath in archive.
+      expected_digest: Optional digest for the file.
+    Returns:
+      A new File instance corresponding to the extract file.
+    Raises:
+      ValueError if expected_digest is not None and does not match the extracted file.
+    """
+    file = archive.open(filepath)
+    if expected_digest is not None:
+        digest = digest_data(archive.open(filepath).read())
+        if digest != expected_digest:
+            raise ValueError(
+                "%s in zip archive at %s has invalid digest %s (expected %s)"
+                % (filepath, archive_name, digest.hex(), expected_digest.hex())
+            )
+    return file.read()
+
+
+def _get_and_install_file(
+    install_path: str,
+    hex_digest: Optional[str],
+    force_download: bool,
+    get_content: Callable[[], bytes],
+) -> bool:
+    if not force_download and hex_digest is not None and os.path.exists(install_path):
+        with open(install_path, "rb") as f:
+            content: bytes = f.read()
+        if bytes.fromhex(hex_digest) == digest_data(content):
+            return False
+
+    content = get_content()
+    install_file(content, install_path)
+    return True
+
+
+def download_and_install_item(
+    item: dict, install_dir: str, force_download: bool
+) -> List[Tuple[str, bool]]:
+    """Download and install one item.
+
+    Args:
+      item: Download item as a dictionary, see above for schema.
+      install_dir: Installation directory.
+      force_download: Set to True to force download and installation, even if
+        the font file is already installed with the right content.
+
+    Returns:
+      A list of (install_name, status) tuples, where 'install_name' is the file's
+      installation name under 'install_dir', and 'status' is a boolean that is True
+      to indicate that the file was downloaded and installed, or False to indicate that
+      the file is already installed with the right content.
+    """
+    if "file_url" in item:
+        file_url = item["file_url"]
+        install_name = item.get("install_name", os.path.basename(file_url))
+        install_path = os.path.join(install_dir, install_name)
+        hex_digest = item.get("hex_digest")
+
+        def get_content():
+            return download_file(file_url, hex_digest)
+
+        status = _get_and_install_file(
+            install_path, hex_digest, force_download, get_content
+        )
+        return [(install_name, status)]
+
+    if "zip_url" in item:
+        # One or more files from a zip archive.
+        archive_url = item["zip_url"]
+        archive = zipfile.ZipFile(io.BytesIO(download_file(archive_url)))
+
+        result = []
+        for f in item["zip_files"]:
+            filename = f["filename"]
+            install_name = f.get("install_name", filename)
+            hex_digest = f.get("hex_digest")
+
+            def get_content():
+                return extract_file_from_zip_archive(
+                    archive,
+                    archive_url,
+                    filename,
+                    bytes.fromhex(hex_digest) if hex_digest else None,
+                )
+
+            status = _get_and_install_file(
+                os.path.join(install_dir, install_name),
+                hex_digest,
+                force_download,
+                get_content,
+            )
+            result.append((install_name, status))
+
+        return result
+
+    else:
+        raise ValueError("Unknown download item schema: %s" % item)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    # Assume this script is under tests/scripts/ and tests/data/
+    # is the default installation directory.
+    install_dir = os.path.normpath(
+        os.path.join(os.path.dirname(__file__), "..", "data")
+    )
+
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        default=False,
+        help="Force download and installation of font files",
+    )
+
+    parser.add_argument(
+        "--install-dir",
+        default=install_dir,
+        help="Specify installation directory [%s]" % install_dir,
+    )
+
+    args = parser.parse_args()
+
+    for item in _DOWNLOAD_ITEMS:
+        for install_name, status in download_and_install_item(
+            item, args.install_dir, args.force
+        ):
+            print("%s %s" % (install_name, "INSTALLED" if status else "UP-TO-DATE"))
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/scripts/download-test-fonts.sh b/tests/scripts/download-test-fonts.sh
deleted file mode 100755
index 1158f10..0000000
--- a/tests/scripts/download-test-fonts.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/bash
-# Download test fonts used by the FreeType regression test programs.
-# These will be copied to $FREETYPE/tests/data/
-# Each font file contains an 8-hexchar prefix corresponding to its md5sum
-
-set -e
-
-export LANG=C
-export LC_ALL=C
-
-PROGDIR=$(dirname "$0")
-PROGNAME=$(basename "$0")
-
-# Download a file from a given URL
-#
-# $1: URL
-# $2: Destination directory
-# $3: If not empty, destination file name. Default is to take
-# the URL's basename.
-#
-download_file () {
-  local URL=$1
-  local DST_DIR=$2
-  local DST_FILE=$3
-  if [[ -z "$DST_FILE" ]]; then
-    DST_FILE=$(basename "$URL")
-  fi
-  echo "URL: $URL"
-  wget -q -O "$DST_DIR/$DST_FILE" "$URL"
-}
-
-# $1: URL
-# $2: Destination directory
-# $3+: Optional file list, otherwise the full archive is extracted to $2
-download_and_extract_zip () {
-  local URL=$1
-  local DST_DIR=$2
-  shift
-  shift
-  TEMP_DST_DIR=$(mktemp -d)
-  TEMP_DST_NAME="a.zip"
-  download_file "$URL" "$TEMP_DST_DIR" "$TEMP_DST_NAME"
-  unzip -qo "$TEMP_DST_DIR/$TEMP_DST_NAME" -d "$DST_DIR" "$@"
-  rm -rf "$TEMP_DST_DIR"
-}
-
-# $1: File path
-# $2: Expected md5sum
-md5sum_check () {
-  local FILE=$1
-  local EXPECTED=$2
-  local HASH=$(md5sum "$FILE" | cut -d" " -f1)
-  if [[ "$EXPECTED" != "$HASH" ]]; then
-    echo "$FILE: Invalid md5sum $HASH expected $EXPECTED"
-    return 1
-  fi
-}
-
-INSTALL_DIR=$(cd $PROGDIR/.. && pwd)/data
-
-mkdir -p "$INSTALL_DIR"
-
-# See https://gitlab.freedesktop.org/freetype/freetype/-/issues/1063
-download_and_extract_zip "https://github.com/python-pillow/Pillow/files/6622147/As.I.Lay.Dying.zip" "$INSTALL_DIR"
-mv "$INSTALL_DIR/As I Lay Dying.ttf" "$INSTALL_DIR/As.I.Lay.Dying.ttf"
-md5sum_check "$INSTALL_DIR/As.I.Lay.Dying.ttf" e153d60e66199660f7cfe99ef4705ad7