Kokoro CI bots use git-sync-deps to get sources (#5031)

* Kokoro CI bots use git-sync-deps to get sources

Update git-sync-deps to reduce the amount of data downloaded on a first
checkout, while being able to checkout the specific commit specified in
the DEPS file.

Previously the CI bots would only clone --depth=1.  But that's not
enough to check out a specific commit.  So clone either blobless
or treeless.  For a CI bot, treeless is preferable, because it
downloads the least data.  For interactive use, blobless is better
because it prevents redundant downloads of tree data.
See
https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/

Fixes: #5028

* --treeless decays to blob:none when git is too old

* Pin googletest to an older version, to make bazel build work
diff --git a/DEPS b/DEPS
index d6242d8..85cb045 100644
--- a/DEPS
+++ b/DEPS
@@ -4,7 +4,13 @@
   'github': 'https://github.com',
 
   'effcee_revision': '35912e1b7778ec2ddcff7e7188177761539e59e0',
-  'googletest_revision': 'd9bb8412d60b993365abb53f00b6dad9b2c01b62',
+
+  # For now, pin an older googletest revision
+  'googletest_revision': '1fb1bb23bb8418dc73a5a9a82bbed31dc610fec7',
+
+  # Use protobufs before they gained the dependency on abseil
+  'protobuf_revision': 'v3.13.0.1',
+
   're2_revision': 'd2836d1b1c34c4e330a85a1006201db474bf2c8a',
   'spirv_headers_revision': '34d04647d384e0aed037e7a2662a655fc39841bb',
 }
@@ -16,6 +22,9 @@
   'external/googletest':
       Var('github') + '/google/googletest.git@' + Var('googletest_revision'),
 
+  'external/protobuf':
+      Var('github') + '/protocolbuffers/protobuf.git@' + Var('protobuf_revision'),
+
   'external/re2':
       Var('github') + '/google/re2.git@' + Var('re2_revision'),
 
diff --git a/kokoro/scripts/linux/build-docker.sh b/kokoro/scripts/linux/build-docker.sh
index 80043b8..7d62ee3 100755
--- a/kokoro/scripts/linux/build-docker.sh
+++ b/kokoro/scripts/linux/build-docker.sh
@@ -30,14 +30,6 @@
 
 cd $ROOT_DIR
 
-function clone_if_missing() {
-  url=$1
-  dir=$2
-  if [[ ! -d "$dir" ]]; then
-    git clone ${@:3} "$url" "$dir"
-  fi
-}
-
 function clean_dir() {
   dir=$1
   if [[ -d "$dir" ]]; then
@@ -46,12 +38,8 @@
   mkdir "$dir"
 }
 
-clone_if_missing https://github.com/KhronosGroup/SPIRV-Headers external/spirv-headers --depth=1
-clone_if_missing https://github.com/google/googletest          external/googletest
-pushd external/googletest; git reset --hard 1fb1bb23bb8418dc73a5a9a82bbed31dc610fec7; popd
-clone_if_missing https://github.com/google/effcee              external/effcee        --depth=1
-clone_if_missing https://github.com/google/re2                 external/re2           --depth=1
-clone_if_missing https://github.com/protocolbuffers/protobuf   external/protobuf      --branch v3.13.0.1
+# Get source for dependencies, as specified in the DEPS file
+/usr/bin/python3 utils/git-sync-deps --treeless
 
 if [ $TOOL = "cmake" ]; then
   using cmake-3.17.2
diff --git a/kokoro/scripts/macos/build.sh b/kokoro/scripts/macos/build.sh
index 3618e69..1d346e7 100644
--- a/kokoro/scripts/macos/build.sh
+++ b/kokoro/scripts/macos/build.sh
@@ -31,12 +31,7 @@
 export PATH="$PWD:$PATH"
 
 cd $SRC
-git clone --depth=1 https://github.com/KhronosGroup/SPIRV-Headers external/spirv-headers
-git clone https://github.com/google/googletest          external/googletest
-cd external && cd googletest && git reset --hard 1fb1bb23bb8418dc73a5a9a82bbed31dc610fec7 && cd .. && cd ..
-git clone --depth=1 https://github.com/google/effcee              external/effcee
-git clone --depth=1 https://github.com/google/re2                 external/re2
-git clone --depth=1 --branch v3.13.0.1 https://github.com/protocolbuffers/protobuf external/protobuf
+python3 utils/git-sync-deps --treeless
 
 mkdir build && cd $SRC/build
 
diff --git a/kokoro/scripts/windows/build.bat b/kokoro/scripts/windows/build.bat
index d5ea6d0..89e1f01 100644
--- a/kokoro/scripts/windows/build.bat
+++ b/kokoro/scripts/windows/build.bat
@@ -24,14 +24,6 @@
 :: Force usage of python 3.6
 set PATH=C:\python36;"C:\Program Files\cmake-3.23.1-windows-x86_64\bin";%PATH%
 
-cd %SRC%
-git clone --depth=1 https://github.com/KhronosGroup/SPIRV-Headers external/spirv-headers
-git clone https://github.com/google/googletest          external/googletest
-cd external && cd googletest && git reset --hard 1fb1bb23bb8418dc73a5a9a82bbed31dc610fec7 && cd .. && cd ..
-git clone --depth=1 https://github.com/google/effcee              external/effcee
-git clone --depth=1 https://github.com/google/re2                 external/re2
-git clone --depth=1 --branch v3.13.0.1 https://github.com/protocolbuffers/protobuf external/protobuf
-
 :: #########################################
 :: set up msvc build env
 :: #########################################
@@ -41,6 +33,8 @@
 )
 
 cd %SRC%
+python utils/git-sync-deps --treeless
+
 mkdir build
 cd build
 
diff --git a/utils/git-sync-deps b/utils/git-sync-deps
index 7a7e606..43548fe 100755
--- a/utils/git-sync-deps
+++ b/utils/git-sync-deps
@@ -30,6 +30,13 @@
 """Parse a DEPS file and git checkout all of the dependencies.
 
 Args:
+  --treeless  Clone repos without trees. This is the fast option, useful
+              when you only need a single commit, like on a build machine.
+              Defers getting objects until checkout time.
+              Otherwise clones without blobs.
+              Requires git 2.20 or later.
+              https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/
+
   An optional list of deps_os values.
 
 Environment Variables:
@@ -59,12 +66,14 @@
 import threading
 from builtins import bytes
 
-
 def git_executable():
   """Find the git executable.
 
   Returns:
-      A string suitable for passing to subprocess functions, or None.
+      A triple:
+        A string suitable for passing to subprocess functions, or None.
+        The major version number
+        The minor version number
   """
   envgit = os.environ.get('GIT_EXECUTABLE')
   searchlist = ['git', 'git.exe', 'git.bat']
@@ -72,12 +81,21 @@
     searchlist.insert(0, envgit)
   with open(os.devnull, 'w') as devnull:
     for git in searchlist:
+      major=None
+      minor=None
       try:
-        subprocess.call([git, '--version'], stdout=devnull)
+        version_info = subprocess.check_output([git, '--version']).decode('utf-8')
+        match = re.search("^git version (\d+)\.(\d+)",version_info)
+        print("Using {}".format(version_info))
+        if match:
+          major = int(match.group(1))
+          minor = int(match.group(2))
+        else:
+          continue
       except (OSError,):
         continue
-      return git
-  return None
+      return (git,major,minor)
+  return (None,0,0)
 
 
 DEFAULT_DEPS_PATH = os.path.normpath(
@@ -97,6 +115,9 @@
   sys.stderr.write(__doc__)
 
 
+def looks_like_raw_commit(commit):
+  return re.match('^[a-f0-9]{40}$', commit) is not None
+
 def git_repository_sync_is_disabled(git, directory):
   try:
     disable = subprocess.check_output(
@@ -125,14 +146,14 @@
 
 def status(directory, checkoutable):
   def truncate(s, length):
-    return s if len(s) <= length else s[:(length - 3)] + '...'
+    return s if len(s) <= length else '...' + s[-(length - 3):]
   dlen = 36
   directory = truncate(directory, dlen)
   checkoutable = truncate(checkoutable, 40)
   sys.stdout.write('%-*s @ %s\n' % (dlen, directory, checkoutable))
 
 
-def git_checkout_to_directory(git, repo, checkoutable, directory, verbose):
+def git_checkout_to_directory(git, repo, checkoutable, directory, verbose, treeless):
   """Checkout (and clone if needed) a Git repository.
 
   Args:
@@ -147,13 +168,22 @@
     directory (string) the path into which the repository
               should be checked out.
 
-    verbose (boolean)
+    verbose (boolean): emit status info to stdout
+
+    treeless (boolean): when true, clone without any trees.
 
   Raises an exception if any calls to git fail.
   """
   if not os.path.isdir(directory):
+    # Use blobless or treeless checkouts for faster downloads.
+    # This defers some work to checkout time.
+    # https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/
+    filter = ['--filter=tree:0'] if treeless else ['--filter=blob:none']
+    # If the thing to check out looks like a tag (and not like a commit),
+    # then limit the checkout to that branch.
+    branch = [] if looks_like_raw_commit(checkoutable) else ['--branch={}'.format(checkoutable)]
     subprocess.check_call(
-      [git, 'clone', '--quiet', repo, directory])
+        [git, 'clone', '--quiet', '--single-branch'] + filter + branch + [repo, directory])
 
   if not is_git_toplevel(git, directory):
     # if the directory exists, but isn't a git repo, you will modify
@@ -200,7 +230,7 @@
   return dictionary
 
 
-def git_sync_deps(deps_file_path, command_line_os_requests, verbose):
+def git_sync_deps(deps_file_path, command_line_os_requests, verbose, treeless):
   """Grab dependencies, with optional platform support.
 
   Args:
@@ -210,11 +240,20 @@
         List of strings that should each be a key in the deps_os
         dictionary in the DEPS file.
 
+    verbose (boolean): emit status info to stdout
+
+    treeless (boolean): when true, clone as treeless instead of blobless
+
   Raises git Exceptions.
   """
-  git = git_executable()
+  (git,git_major,git_minor) = git_executable()
   assert git
 
+  # --filter=tree:0 is available in git 2.20 and later
+  if (git_major,git_minor) < (2,20):
+    print("disabling --treeless: git is older than v2.20")
+    treeless = False
+
   deps_file_directory = os.path.dirname(deps_file_path)
   deps_file = parse_file_to_dict(deps_file_path)
   dependencies = deps_file['deps'].copy()
@@ -232,6 +271,7 @@
       if directory.startswith(other_dir + '/'):
         raise Exception('%r is parent of %r' % (other_dir, directory))
   list_of_arg_lists = []
+  print("deps {}".format(dependencies))
   for directory in sorted(dependencies):
     if '@' in dependencies[directory]:
       repo, checkoutable = dependencies[directory].split('@', 1)
@@ -241,7 +281,7 @@
     relative_directory = os.path.join(deps_file_directory, directory)
 
     list_of_arg_lists.append(
-      (git, repo, checkoutable, relative_directory, verbose))
+      (git, repo, checkoutable, relative_directory, verbose, treeless))
 
   multithread(git_checkout_to_directory, list_of_arg_lists)
 
@@ -266,15 +306,14 @@
 def main(argv):
   deps_file_path = os.environ.get('GIT_SYNC_DEPS_PATH', DEFAULT_DEPS_PATH)
   verbose = not bool(os.environ.get('GIT_SYNC_DEPS_QUIET', False))
+  treeless = bool("--treeless" in argv)
+  argv = [x for x in argv if x != "--treeless"]
 
   if '--help' in argv or '-h' in argv:
     usage(deps_file_path)
     return 1
 
-  git_sync_deps(deps_file_path, argv, verbose)
-  # subprocess.check_call(
-  #     [sys.executable,
-  #      os.path.join(os.path.dirname(deps_file_path), 'bin', 'fetch-gn')])
+  git_sync_deps(deps_file_path, argv, verbose, treeless)
   return 0