blob: efab8ad96ed935f2f1ba91ef6d972cd1633bca52 [file] [log] [blame]
Copyright 2014 Google Inc.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
Utilities for accessing Google Cloud Storage.
# System-level imports
import os
import posixpath
import sys
# Imports from third-party code
TRUNK_DIRECTORY = os.path.abspath(os.path.join(
os.path.dirname(__file__), os.pardir, os.pardir))
for import_subdir in ['google-api-python-client', 'httplib2', 'oauth2client',
import_dirpath = os.path.join(
TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir)
if import_dirpath not in sys.path:
# We need to insert at the beginning of the path, to make sure that our
# imported versions are favored over others that might be in the path.
# Also, the google-api-python-client checkout contains an empty
# oauth2client directory, which will confuse things unless we insert
# our checked-out oauth2client in front of it in the path.
sys.path.insert(0, import_dirpath)
from googleapiclient.discovery import build as build_service
except ImportError:
# We should not require any googleapiclient dependencies to be
# installed at a system level, but in the meanwhile, if developers run into
# trouble they can install those system-level dependencies to get unblocked.
print ('We should not require any googleapiclient dependencies to be '
'installed at a system level, but it seems like some are missing. '
'Please install google-api-python-client to get those dependencies; '
'directions can be found at'
'api-client-library/python/start/installation . '
'More details in ')
# Local imports
import url_utils
def download_file(source_bucket, source_path, dest_path,
""" Downloads a single file from Google Cloud Storage to local disk.
source_bucket: GCS bucket to download the file from
source_path: full path (Posix-style) within that bucket
dest_path: full path (local-OS-style) on local disk to copy the file to
create_subdirs_if_needed: boolean; whether to create subdirectories as
needed to create dest_path
source_http_url = posixpath.join(
'', source_bucket, source_path)
url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
def list_bucket_contents(bucket, subdir=None):
""" Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
Uses the API documented at
bucket: name of the Google Storage bucket
subdir: directory within the bucket to list, or None for root directory
# The GCS command relies on the subdir name (if any) ending with a slash.
if subdir and not subdir.endswith('/'):
subdir += '/'
subdir_length = len(subdir) if subdir else 0
storage = build_service('storage', 'v1')
command = storage.objects().list(
bucket=bucket, delimiter='/', fields='items(name),prefixes',
results = command.execute()
# The GCS command returned two subdicts:
# prefixes: the full path of every directory within subdir, with trailing '/'
# items: property dict for each file object within subdir
# (including 'name', which is full path of the object)
dirs = []
for dir_fullpath in results.get('prefixes', []):
dir_basename = dir_fullpath[subdir_length:]
dirs.append(dir_basename[:-1]) # strip trailing slash
files = []
for file_properties in results.get('items', []):
file_fullpath = file_properties['name']
file_basename = file_fullpath[subdir_length:]
return (dirs, files)