make gs_utils.py class-based, so credentials file can be read just once

BUG=skia:2618
R=rmistry@google.com

Review URL: https://codereview.chromium.org/385013004
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py
index c30295b..46c9bf1 100755
--- a/py/utils/gs_utils.py
+++ b/py/utils/gs_utils.py
@@ -40,88 +40,110 @@
 from boto.s3.prefix import Prefix
 
 
-def delete_file(bucket, path):
-  """Delete a single file within a GS bucket.
+class GSUtils(object):
+  """Utilities for accessing Google Cloud Storage, using the boto library."""
 
-  TODO(epoger): what if bucket or path does not exist?  Should probably raise
-  an exception.  Implement, and add a test to exercise this.
+  def __init__(self, boto_file_path=os.path.join('~','.boto')):
+    """Constructor.
 
-  Params:
-    bucket: GS bucket to delete a file from
-    path: full path (Posix-style) of the file within the bucket to delete
-  """
-  conn = _create_connection()
-  b = conn.get_bucket(bucket_name=bucket)
-  item = Key(b)
-  item.key = path
-  item.delete()
+    Params:
+      boto_file_path: full path (local-OS-style) on local disk where .boto
+          credentials file can be found.  An exception is thrown if this file
+          is missing.
+          TODO(epoger): Change missing-file behavior: allow the caller to
+          operate on public files in Google Storage.
+    """
+    boto_file_path = os.path.expanduser(boto_file_path)
+    print 'Reading boto file from %s' % boto_file_path
+    boto_dict = _config_file_as_dict(filepath=boto_file_path)
+    self._gs_access_key_id = boto_dict['gs_access_key_id']
+    self._gs_secret_access_key = boto_dict['gs_secret_access_key']
 
+  def delete_file(self, bucket, path):
+    """Delete a single file within a GS bucket.
 
-def upload_file(source_path, dest_bucket, dest_path):
-  """Upload contents of a local file to Google Storage.
+    TODO(epoger): what if bucket or path does not exist?  Should probably raise
+    an exception.  Implement, and add a test to exercise this.
 
-  TODO(epoger): Add the extra parameters provided by upload_file() within
-  https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
-  so we can replace that function with this one.
+    Params:
+      bucket: GS bucket to delete a file from
+      path: full path (Posix-style) of the file within the bucket to delete
+    """
+    conn = self._create_connection()
+    b = conn.get_bucket(bucket_name=bucket)
+    item = Key(b)
+    item.key = path
+    item.delete()
 
-  params:
-    source_path: full path (local-OS-style) on local disk to read from
-    dest_bucket: GCS bucket to copy the file to
-    dest_path: full path (Posix-style) within that bucket
-  """
-  conn = _create_connection()
-  b = conn.get_bucket(bucket_name=dest_bucket)
-  item = Key(b)
-  item.key = dest_path
-  item.set_contents_from_filename(filename=source_path)
+  def upload_file(self, source_path, dest_bucket, dest_path):
+    """Upload contents of a local file to Google Storage.
 
+    TODO(epoger): Add the extra parameters provided by upload_file() within
+    https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
+    so we can replace that function with this one.
 
-def download_file(source_bucket, source_path, dest_path,
-                  create_subdirs_if_needed=False):
-  """ Downloads a single file from Google Cloud Storage to local disk.
+    params:
+      source_path: full path (local-OS-style) on local disk to read from
+      dest_bucket: GCS bucket to copy the file to
+      dest_path: full path (Posix-style) within that bucket
+    """
+    conn = self._create_connection()
+    b = conn.get_bucket(bucket_name=dest_bucket)
+    item = Key(b)
+    item.key = dest_path
+    item.set_contents_from_filename(filename=source_path)
 
-  Args:
-    source_bucket: GCS bucket to download the file from
-    source_path: full path (Posix-style) within that bucket
-    dest_path: full path (local-OS-style) on local disk to copy the file to
-    create_subdirs_if_needed: boolean; whether to create subdirectories as
-        needed to create dest_path
-  """
-  conn = _create_connection()
-  b = conn.get_bucket(bucket_name=source_bucket)
-  item = Key(b)
-  item.key = source_path
-  if create_subdirs_if_needed:
-    _makedirs_if_needed(os.path.dirname(dest_path))
-  with open(dest_path, 'w') as f:
-    item.get_contents_to_file(fp=f)
+  def download_file(self, source_bucket, source_path, dest_path,
+                    create_subdirs_if_needed=False):
+    """Downloads a single file from Google Cloud Storage to local disk.
 
+    Args:
+      source_bucket: GCS bucket to download the file from
+      source_path: full path (Posix-style) within that bucket
+      dest_path: full path (local-OS-style) on local disk to copy the file to
+      create_subdirs_if_needed: boolean; whether to create subdirectories as
+          needed to create dest_path
+    """
+    conn = self._create_connection()
+    b = conn.get_bucket(bucket_name=source_bucket)
+    item = Key(b)
+    item.key = source_path
+    if create_subdirs_if_needed:
+      _makedirs_if_needed(os.path.dirname(dest_path))
+    with open(dest_path, 'w') as f:
+      item.get_contents_to_file(fp=f)
 
-def list_bucket_contents(bucket, subdir=None):
-  """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
+  def list_bucket_contents(self, bucket, subdir=None):
+    """Returns files in the Google Storage bucket as a (dirs, files) tuple.
 
-  Args:
-    bucket: name of the Google Storage bucket
-    subdir: directory within the bucket to list, or None for root directory
-  """
-  # The GS command relies on the prefix (if any) ending with a slash.
-  prefix = subdir or ''
-  if prefix and not prefix.endswith('/'):
-    prefix += '/'
-  prefix_length = len(prefix) if prefix else 0
+    Args:
+      bucket: name of the Google Storage bucket
+      subdir: directory within the bucket to list, or None for root directory
+    """
+    # The GS command relies on the prefix (if any) ending with a slash.
+    prefix = subdir or ''
+    if prefix and not prefix.endswith('/'):
+      prefix += '/'
+    prefix_length = len(prefix) if prefix else 0
 
-  conn = _create_connection()
-  b = conn.get_bucket(bucket_name=bucket)
-  lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
-  dirs = []
-  files = []
-  for item in lister:
-    t = type(item)
-    if t is Key:
-      files.append(item.key[prefix_length:])
-    elif t is Prefix:
-      dirs.append(item.name[prefix_length:-1])
-  return (dirs, files)
+    conn = self._create_connection()
+    b = conn.get_bucket(bucket_name=bucket)
+    lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
+    dirs = []
+    files = []
+    for item in lister:
+      t = type(item)
+      if t is Key:
+        files.append(item.key[prefix_length:])
+      elif t is Prefix:
+        dirs.append(item.name[prefix_length:-1])
+    return (dirs, files)
+
+  def _create_connection(self):
+    """Returns a GSConnection object we can use to access Google Storage."""
+    return GSConnection(
+        gs_access_key_id=self._gs_access_key_id,
+        gs_secret_access_key=self._gs_secret_access_key)
 
 
 def _config_file_as_dict(filepath):
@@ -148,29 +170,8 @@
   return dic
 
 
-def _create_connection(boto_file_path=os.path.join('~','.boto')):
-  """Returns a GSConnection object we can use to access Google Storage.
-
-  Params:
-    boto_file_path: full path (local-OS-style) on local disk where .boto
-        credentials file can be found
-
-  TODO(epoger): Change this module to be object-based, where __init__() reads
-  the boto file into boto_dict once instead of repeatedly for each operation.
-
-  TODO(epoger): if the file does not exist, rather than raising an exception,
-  create a GSConnection that can operate on public files.
-  """
-  boto_file_path = os.path.expanduser(boto_file_path)
-  print 'Reading boto file from %s' % boto_file_path
-  boto_dict = _config_file_as_dict(filepath=boto_file_path)
-  return GSConnection(
-      gs_access_key_id=boto_dict['gs_access_key_id'],
-      gs_secret_access_key=boto_dict['gs_secret_access_key'])
-
-
 def _makedirs_if_needed(path):
-  """ Creates a directory (and any parent directories needed), if it does not
+  """Creates a directory (and any parent directories needed), if it does not
   exist yet.
 
   Args:
@@ -188,6 +189,7 @@
   remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
   subdir = 'subdir'
   filenames_to_upload = ['file1', 'file2']
+  gs = GSUtils()
 
   # Upload test files to Google Storage.
   local_src_dir = tempfile.mkdtemp()
@@ -196,46 +198,47 @@
     for filename in filenames_to_upload:
       with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
         f.write('contents of %s\n' % filename)
-      upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
-                  dest_bucket=bucket,
-                  dest_path=posixpath.join(remote_dir, subdir, filename))
+      gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
+                     dest_bucket=bucket,
+                     dest_path=posixpath.join(remote_dir, subdir, filename))
   finally:
     shutil.rmtree(local_src_dir)
 
   # Get a list of the files we uploaded to Google Storage.
-  (dirs, files) = list_bucket_contents(
+  (dirs, files) = gs.list_bucket_contents(
       bucket=bucket, subdir=remote_dir)
-  assert dirs == [subdir]
-  assert files == []
-  (dirs, files) = list_bucket_contents(
+  assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
+  assert files == [], '%s == []' % files
+  (dirs, files) = gs.list_bucket_contents(
       bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
-  assert dirs == []
-  assert files == filenames_to_upload
+  assert dirs == [], '%s == []' % dirs
+  assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload)
 
   # Download the files we uploaded to Google Storage, and validate contents.
   local_dest_dir = tempfile.mkdtemp()
   try:
     for filename in filenames_to_upload:
-      download_file(source_bucket=bucket,
-                    source_path=posixpath.join(remote_dir, subdir, filename),
-                    dest_path=os.path.join(local_dest_dir, subdir, filename),
-                    create_subdirs_if_needed=True)
+      gs.download_file(source_bucket=bucket,
+                       source_path=posixpath.join(remote_dir, subdir, filename),
+                       dest_path=os.path.join(local_dest_dir, subdir, filename),
+                       create_subdirs_if_needed=True)
       with open(os.path.join(local_dest_dir, subdir, filename)) as f:
         file_contents = f.read()
-      assert file_contents == 'contents of %s\n' % filename
+      assert file_contents == 'contents of %s\n' % filename, (
+          '%s == "contents of %s\n"' % (file_contents, filename))
   finally:
     shutil.rmtree(local_dest_dir)
 
   # Delete all the files we uploaded to Google Storage.
   for filename in filenames_to_upload:
-    delete_file(bucket=bucket,
-                path=posixpath.join(remote_dir, subdir, filename))
+    gs.delete_file(bucket=bucket,
+                   path=posixpath.join(remote_dir, subdir, filename))
 
   # Confirm that we deleted all the files we uploaded to Google Storage.
-  (dirs, files) = list_bucket_contents(
+  (dirs, files) = gs.list_bucket_contents(
       bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
-  assert dirs == []
-  assert files == []
+  assert dirs == [], '%s == []' % dirs
+  assert files == [], '%s == []' % files
 
 
 # TODO(epoger): How should we exercise this self-test?