gs_utils.py: use boto instead of google-api-python-client library so we can use .boto file credentials

Also adds these public functions:
- delete_file(bucket, path)
- upload_file(source_path, dest_bucket, dest_path)
and a self-test that exercises all functions.

BUG=skia:2732
R=jcgregorio@google.com

Review URL: https://codereview.chromium.org/387233003
diff --git a/DEPS b/DEPS
index 3580231..c866727 100644
--- a/DEPS
+++ b/DEPS
@@ -3,8 +3,5 @@
 # Dependencies on outside packages.
 #
 deps = {
-  "third_party/externals/google-api-python-client" : "https://github.com/google/google-api-python-client.git@ecc64a0a2baa4a77f35dec83ad05c6c9ba2d2841",
-  "third_party/externals/httplib2" : "https://github.com/jcgregorio/httplib2.git@7d1b88a3cf34774242bf4c0578c09c0092bb05d8",
-  "third_party/externals/oauth2client" : "https://github.com/google/oauth2client.git@d02b317af0313dcf66755844f5421651af5eb356",
-  "third_party/externals/uritemplate-py" : "https://github.com/uri-templates/uritemplate-py.git@1e780a49412cdbb273e9421974cb91845c124f3f",
+  "third_party/externals/boto" : "https://github.com/boto/boto.git@410909e993746c1b7a2604721d82afc0d2778a8a",
 }
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py
index efab8ad..c30295b 100755
--- a/py/utils/gs_utils.py
+++ b/py/utils/gs_utils.py
@@ -1,49 +1,79 @@
 #!/usr/bin/python
 
+# pylint: disable=C0301
 """
 Copyright 2014 Google Inc.
 
 Use of this source code is governed by a BSD-style license that can be
 found in the LICENSE file.
 
-Utilities for accessing Google Cloud Storage.
+Utilities for accessing Google Cloud Storage, using the boto library.
+
+See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-using-boto.html
+for implementation tips.
 """
+# pylint: enable=C0301
 
 # System-level imports
+import errno
 import os
 import posixpath
+import random
+import re
+import shutil
 import sys
+import tempfile
 
 # Imports from third-party code
 TRUNK_DIRECTORY = os.path.abspath(os.path.join(
     os.path.dirname(__file__), os.pardir, os.pardir))
-for import_subdir in ['google-api-python-client', 'httplib2', 'oauth2client',
-                      'uritemplate-py']:
+for import_subdir in ['boto']:
   import_dirpath = os.path.join(
       TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir)
   if import_dirpath not in sys.path:
     # We need to insert at the beginning of the path, to make sure that our
     # imported versions are favored over others that might be in the path.
-    # Also, the google-api-python-client checkout contains an empty
-    # oauth2client directory, which will confuse things unless we insert
-    # our checked-out oauth2client in front of it in the path.
     sys.path.insert(0, import_dirpath)
-try:
-  from googleapiclient.discovery import build as build_service
-except ImportError:
-  # We should not require any googleapiclient dependencies to be
-  # installed at a system level, but in the meanwhile, if developers run into
-  # trouble they can install those system-level dependencies to get unblocked.
-  print ('We should not require any googleapiclient dependencies to be '
-         'installed at a system level, but it seems like some are missing. '
-         'Please install google-api-python-client to get those dependencies; '
-         'directions can be found at https://developers.google.com/'
-         'api-client-library/python/start/installation .  '
-         'More details in http://skbug.com/2641 ')
-  raise
+from boto.gs.connection import GSConnection
+from boto.gs.key import Key
+from boto.s3.bucketlistresultset import BucketListResultSet
+from boto.s3.prefix import Prefix
 
-# Local imports
-import url_utils
+
+def delete_file(bucket, path):
+  """Delete a single file within a GS bucket.
+
+  TODO(epoger): what if bucket or path does not exist?  Should probably raise
+  an exception.  Implement, and add a test to exercise this.
+
+  Params:
+    bucket: GS bucket to delete a file from
+    path: full path (Posix-style) of the file within the bucket to delete
+  """
+  conn = _create_connection()
+  b = conn.get_bucket(bucket_name=bucket)
+  item = Key(b)
+  item.key = path
+  item.delete()
+
+
+def upload_file(source_path, dest_bucket, dest_path):
+  """Upload contents of a local file to Google Storage.
+
+  TODO(epoger): Add the extra parameters provided by upload_file() within
+  https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
+  so we can replace that function with this one.
+
+  params:
+    source_path: full path (local-OS-style) on local disk to read from
+    dest_bucket: GCS bucket to copy the file to
+    dest_path: full path (Posix-style) within that bucket
+  """
+  conn = _create_connection()
+  b = conn.get_bucket(bucket_name=dest_bucket)
+  item = Key(b)
+  item.key = dest_path
+  item.set_contents_from_filename(filename=source_path)
 
 
 def download_file(source_bucket, source_path, dest_path,
@@ -57,44 +87,162 @@
     create_subdirs_if_needed: boolean; whether to create subdirectories as
         needed to create dest_path
   """
-  source_http_url = posixpath.join(
-      'http://storage.googleapis.com', source_bucket, source_path)
-  url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
-                          create_subdirs_if_needed=create_subdirs_if_needed)
+  conn = _create_connection()
+  b = conn.get_bucket(bucket_name=source_bucket)
+  item = Key(b)
+  item.key = source_path
+  if create_subdirs_if_needed:
+    _makedirs_if_needed(os.path.dirname(dest_path))
+  with open(dest_path, 'w') as f:
+    item.get_contents_to_file(fp=f)
 
 
 def list_bucket_contents(bucket, subdir=None):
   """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
 
-  Uses the API documented at
-  https://developers.google.com/storage/docs/json_api/v1/objects/list
-
   Args:
     bucket: name of the Google Storage bucket
     subdir: directory within the bucket to list, or None for root directory
   """
-  # The GCS command relies on the subdir name (if any) ending with a slash.
-  if subdir and not subdir.endswith('/'):
-    subdir += '/'
-  subdir_length = len(subdir) if subdir else 0
+  # The GS command relies on the prefix (if any) ending with a slash.
+  prefix = subdir or ''
+  if prefix and not prefix.endswith('/'):
+    prefix += '/'
+  prefix_length = len(prefix) if prefix else 0
 
-  storage = build_service('storage', 'v1')
-  command = storage.objects().list(
-      bucket=bucket, delimiter='/', fields='items(name),prefixes',
-      prefix=subdir)
-  results = command.execute()
-
-  # The GCS command returned two subdicts:
-  # prefixes: the full path of every directory within subdir, with trailing '/'
-  # items: property dict for each file object within subdir
-  #        (including 'name', which is full path of the object)
+  conn = _create_connection()
+  b = conn.get_bucket(bucket_name=bucket)
+  lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
   dirs = []
-  for dir_fullpath in results.get('prefixes', []):
-    dir_basename = dir_fullpath[subdir_length:]
-    dirs.append(dir_basename[:-1])  # strip trailing slash
   files = []
-  for file_properties in results.get('items', []):
-    file_fullpath = file_properties['name']
-    file_basename = file_fullpath[subdir_length:]
-    files.append(file_basename)
+  for item in lister:
+    t = type(item)
+    if t is Key:
+      files.append(item.key[prefix_length:])
+    elif t is Prefix:
+      dirs.append(item.name[prefix_length:-1])
   return (dirs, files)
+
+
+def _config_file_as_dict(filepath):
+  """Reads a boto-style config file into a dict.
+
+  Parses all lines from the file of this form: key = value
+  TODO(epoger): Create unittest.
+
+  Params:
+    filepath: path to config file on local disk
+
+  Returns: contents of the config file, as a dictionary
+
+  Raises exception if file not found.
+  """
+  dic = {}
+  line_regex = re.compile('^\s*(\S+)\s*=\s*(\S+)\s*$')
+  with open(filepath) as f:
+    for line in f:
+      match = line_regex.match(line)
+      if match:
+        (key, value) = match.groups()
+        dic[key] = value
+  return dic
+
+
+def _create_connection(boto_file_path=os.path.join('~','.boto')):
+  """Returns a GSConnection object we can use to access Google Storage.
+
+  Params:
+    boto_file_path: full path (local-OS-style) on local disk where .boto
+        credentials file can be found
+
+  TODO(epoger): Change this module to be object-based, where __init__() reads
+  the boto file into boto_dict once instead of repeatedly for each operation.
+
+  TODO(epoger): if the file does not exist, rather than raising an exception,
+  create a GSConnection that can operate on public files.
+  """
+  boto_file_path = os.path.expanduser(boto_file_path)
+  print 'Reading boto file from %s' % boto_file_path
+  boto_dict = _config_file_as_dict(filepath=boto_file_path)
+  return GSConnection(
+      gs_access_key_id=boto_dict['gs_access_key_id'],
+      gs_secret_access_key=boto_dict['gs_secret_access_key'])
+
+
+def _makedirs_if_needed(path):
+  """ Creates a directory (and any parent directories needed), if it does not
+  exist yet.
+
+  Args:
+    path: full path of directory to create
+  """
+  try:
+    os.makedirs(path)
+  except OSError as e:
+    if e.errno != errno.EEXIST:
+      raise
+
+
+def _run_self_test():
+  bucket = 'chromium-skia-gm'
+  remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
+  subdir = 'subdir'
+  filenames_to_upload = ['file1', 'file2']
+
+  # Upload test files to Google Storage.
+  local_src_dir = tempfile.mkdtemp()
+  os.mkdir(os.path.join(local_src_dir, subdir))
+  try:
+    for filename in filenames_to_upload:
+      with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
+        f.write('contents of %s\n' % filename)
+      upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
+                  dest_bucket=bucket,
+                  dest_path=posixpath.join(remote_dir, subdir, filename))
+  finally:
+    shutil.rmtree(local_src_dir)
+
+  # Get a list of the files we uploaded to Google Storage.
+  (dirs, files) = list_bucket_contents(
+      bucket=bucket, subdir=remote_dir)
+  assert dirs == [subdir]
+  assert files == []
+  (dirs, files) = list_bucket_contents(
+      bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
+  assert dirs == []
+  assert files == filenames_to_upload
+
+  # Download the files we uploaded to Google Storage, and validate contents.
+  local_dest_dir = tempfile.mkdtemp()
+  try:
+    for filename in filenames_to_upload:
+      download_file(source_bucket=bucket,
+                    source_path=posixpath.join(remote_dir, subdir, filename),
+                    dest_path=os.path.join(local_dest_dir, subdir, filename),
+                    create_subdirs_if_needed=True)
+      with open(os.path.join(local_dest_dir, subdir, filename)) as f:
+        file_contents = f.read()
+      assert file_contents == 'contents of %s\n' % filename
+  finally:
+    shutil.rmtree(local_dest_dir)
+
+  # Delete all the files we uploaded to Google Storage.
+  for filename in filenames_to_upload:
+    delete_file(bucket=bucket,
+                path=posixpath.join(remote_dir, subdir, filename))
+
+  # Confirm that we deleted all the files we uploaded to Google Storage.
+  (dirs, files) = list_bucket_contents(
+      bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
+  assert dirs == []
+  assert files == []
+
+
+# TODO(epoger): How should we exercise this self-test?
+# I avoided using the standard unittest framework, because these Google Storage
+# operations are expensive and require .boto permissions.
+#
+# How can we automatically test this code without wasting too many resources
+# or needing .boto permissions?
+if __name__ == '__main__':
+  _run_self_test()