git_utils.py: allow checkouts of local repositories at any commithash along master branch

Needed so that we can create new GM/SKP baselines on a shared
rebaseline_server instance, *efficiently*.  (Create checkouts pinned to certain
commithashes as copies of the local Skia checkout, rather than checking out
copies of the main Skia repo across the internet.)

BUG=skia:1918
NOTREECHECKS=true
R=borenet@google.com

Review URL: https://codereview.chromium.org/484143002
diff --git a/py/utils/git_utils.py b/py/utils/git_utils.py
index 52ad83a..fec79d7 100644
--- a/py/utils/git_utils.py
+++ b/py/utils/git_utils.py
@@ -152,25 +152,28 @@
 class NewGitCheckout(object):
   """Creates a new local checkout of a Git repository."""
 
-  def __init__(self, repository, refspec=None, subdir=None,
-               containing_dir=None):
-    """Check out a new local copy of the repository.
+  def __init__(self, repository, refspec=None, commit='HEAD',
+               subdir=None, containing_dir=None):
+    """Set parameters for this local copy of a Git repository.
 
     Because this is a new checkout, rather than a reference to an existing
     checkout on disk, it is safe to assume that the calling thread is the
     only thread manipulating the checkout.
 
-    You can use the 'with' statement to create this object in such a way that
-    it cleans up after itself:
+    You must use the 'with' statement to create this object:
 
     with NewGitCheckout(*args) as checkout:
       # use checkout instance
     # the checkout is automatically cleaned up here
 
     Args:
-      repository: name of the remote repository
-      refspec: an arbitrary remote ref (e.g., the name of a branch);
-          if None, allow the git command to pick a default
+      repository: URL of the remote repository (e.g.,
+          'https://skia.googlesource.com/common') or path to a local repository
+          (e.g., '/path/to/repo/.git') to check out a copy of
+      refspec: which refs (e.g., a branch name) to fetch from the repository;
+          if None, git-fetch will choose the default refs to fetch
+      commit: commit hash, branch, or tag within refspec, indicating what point
+          to update the local checkout to
       subdir: if specified, the caller only wants access to files within this
           subdir in the repository.
           For now, we check out the entire repository regardless of this param,
@@ -180,19 +183,14 @@
           within this directory; otherwise, a system-dependent default location
           will be used, as determined by tempfile.mkdtemp()
     """
-    # _git_root points to the tree holding the git checkout in its entirety;
-    # _file_root points to the files the caller wants to look at
-    self._git_root = tempfile.mkdtemp(dir=containing_dir)
-    if subdir:
-      self._file_root = os.path.join(self._git_root, subdir)
-    else:
-      self._file_root = self._git_root
+    self._repository = repository
+    self._refspec = refspec
+    self._commit = commit
+    self._subdir = subdir
+    self._containing_dir = containing_dir
+    self._git_root = None
+    self._file_root = None
 
-    pull_cmd = [GIT, 'pull', repository]
-    if refspec:
-      pull_cmd.append(refspec)
-    self._run_in_git_root(args=[GIT, 'init'])
-    self._run_in_git_root(args=pull_cmd)
 
   @property
   def root(self):
@@ -209,6 +207,28 @@
         args=[GIT, 'rev-parse', 'HEAD']).strip()
 
   def __enter__(self):
+    """Check out a new local copy of the repository.
+
+    Uses the parameters that were passed into the constructor.
+    """
+    # _git_root points to the tree holding the git checkout in its entirety;
+    # _file_root points to the files the caller wants to look at
+    self._git_root = tempfile.mkdtemp(dir=self._containing_dir)
+    if self._subdir:
+      self._file_root = os.path.join(self._git_root, self._subdir)
+    else:
+      self._file_root = self._git_root
+
+    local_branch_name = 'local'
+    self._run_in_git_root(args=[GIT, 'init'])
+    fetch_cmd = [GIT, 'fetch', self._repository]
+    if self._refspec:
+      fetch_cmd.append(self._refspec)
+    self._run_in_git_root(args=fetch_cmd)
+    self._run_in_git_root(args=[GIT, 'merge', 'FETCH_HEAD'])
+    self._run_in_git_root(args=[GIT, 'branch', local_branch_name, self._commit])
+    self._run_in_git_root(args=[GIT, 'checkout', local_branch_name])
+
     return self
 
   # pylint: disable=W0622
diff --git a/py/utils/git_utils_manualtest.py b/py/utils/git_utils_manualtest.py
index 720a9e9..a1acaac 100755
--- a/py/utils/git_utils_manualtest.py
+++ b/py/utils/git_utils_manualtest.py
@@ -18,10 +18,12 @@
 import git_utils
 
 
-# A git repo we can use for tests.
-REPO = 'https://skia.googlesource.com/common'
+# A git repo we can use for tests, with local and remote copies.
+LOCAL_REPO = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), os.pardir, os.pardir, '.git'))
+REMOTE_REPO = 'https://skia.googlesource.com/common'
 
-# A file in some subdirectory within REPO.
+# A file in some subdirectory within the test repo.
 REPO_FILE = os.path.join('py', 'utils', 'git_utils.py')
 
 
@@ -29,7 +31,7 @@
 
   def test_defaults(self):
     """Test NewGitCheckout created using default parameters."""
-    with git_utils.NewGitCheckout(repository=REPO) as checkout:
+    with git_utils.NewGitCheckout(repository=LOCAL_REPO) as checkout:
       filepath = os.path.join(checkout.root, REPO_FILE)
       self.assertTrue(
           os.path.exists(filepath),
@@ -39,6 +41,18 @@
         os.path.exists(filepath),
         'file %s should not exist' % filepath)
 
+  def test_remote(self):
+    """Test NewGitCheckout with a remote repo.
+
+    This makes requests across the network, so we may not want to run it
+    very often...
+    """
+    with git_utils.NewGitCheckout(repository=REMOTE_REPO) as checkout:
+      filepath = os.path.join(checkout.root, REPO_FILE)
+      self.assertTrue(
+          os.path.exists(filepath),
+          'file %s should exist' % filepath)
+
   def test_subdir(self):
     """Create NewGitCheckout with a specific subdirectory."""
     subdir = os.path.dirname(REPO_FILE)
@@ -46,7 +60,7 @@
 
     containing_dir = tempfile.mkdtemp()
     try:
-      with git_utils.NewGitCheckout(repository=REPO, subdir=subdir,
+      with git_utils.NewGitCheckout(repository=LOCAL_REPO, subdir=subdir,
                                     containing_dir=containing_dir) as checkout:
         self.assertTrue(
             checkout.root.startswith(containing_dir),
@@ -59,8 +73,8 @@
     finally:
       os.rmdir(containing_dir)
 
-  def test_refspec(self):
-    """Create NewGitCheckout with a specific refspec.
+  def test_commit(self):
+    """Create NewGitCheckout with a specific commit.
 
     This test depends on the fact that the whitespace.txt file was added to the
     repo in a particular commit.
@@ -72,7 +86,7 @@
     hash_with_file = 'c2200447734f13070fb3b2808dea58847241ab0e'
 
     with git_utils.NewGitCheckout(
-        repository=REPO, refspec=hash_without_file) as checkout:
+        repository=LOCAL_REPO, commit=hash_without_file) as checkout:
       filepath = os.path.join(checkout.root, filename)
       self.assertEquals(
           hash_without_file, checkout.commithash(),
@@ -82,7 +96,7 @@
           'file %s should not exist' % filepath)
 
     with git_utils.NewGitCheckout(
-        repository=REPO, refspec=hash_with_file) as checkout:
+        repository=LOCAL_REPO, commit=hash_with_file) as checkout:
       filepath = os.path.join(checkout.root, filename)
       self.assertEquals(
           hash_with_file, checkout.commithash(),