blob: 636958be6b545c2251628a3b0f65f2d2db0c25d1 [file] [log] [blame]
#!/usr/bin/python
"""
Copyright 2014 Google Inc.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
Download actual GM results for a particular builder.
"""
# System-level imports
import contextlib
import optparse
import os
import posixpath
import re
import shutil
import sys
import urllib
import urllib2
import urlparse
# Imports from within Skia
#
# We need to add the 'gm' and 'tools' directories, so that we can import
# gm_json.py and buildbot_globals.py.
#
# Make sure that these dirs are in the PYTHONPATH, but add them at the *end*
# so any dirs that are already in the PYTHONPATH will be preferred.
#
# TODO(epoger): Is it OK for this to depend on the 'tools' dir, given that
# the tools dir is dependent on the 'gm' dir (to import gm_json.py)?
TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm')
TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
if GM_DIRECTORY not in sys.path:
sys.path.append(GM_DIRECTORY)
if TOOLS_DIRECTORY not in sys.path:
sys.path.append(TOOLS_DIRECTORY)
import buildbot_globals
import gm_json
# Imports from third-party code
APICLIENT_DIRECTORY = os.path.join(
TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client')
if APICLIENT_DIRECTORY not in sys.path:
sys.path.append(APICLIENT_DIRECTORY)
from googleapiclient.discovery import build as build_service
GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket')
DEFAULT_ACTUALS_BASE_URL = (
'http://storage.googleapis.com/%s' % GM_SUMMARIES_BUCKET)
DEFAULT_JSON_FILENAME = 'actual-results.json'
class Download(object):
def __init__(self, actuals_base_url=DEFAULT_ACTUALS_BASE_URL,
json_filename=DEFAULT_JSON_FILENAME,
gm_actuals_root_url=gm_json.GM_ACTUALS_ROOT_HTTP_URL):
"""
Args:
actuals_base_url: URL pointing at the root directory
containing all actual-results.json files, e.g.,
http://domain.name/path/to/dir OR
file:///absolute/path/to/localdir
json_filename: The JSON filename to read from within each directory.
gm_actuals_root_url: Base URL under which the actually-generated-by-bots
GM images are stored.
"""
self._actuals_base_url = actuals_base_url
self._json_filename = json_filename
self._gm_actuals_root_url = gm_actuals_root_url
self._image_filename_re = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
def fetch(self, builder_name, dest_dir):
""" Downloads actual GM results for a particular builder.
Args:
builder_name: which builder to download results of
dest_dir: path to directory where the image files will be written;
if the directory does not exist yet, it will be created
TODO(epoger): Display progress info. Right now, it can take a long time
to download all of the results, and there is no indication of progress.
TODO(epoger): Download multiple images in parallel to speed things up.
"""
json_url = posixpath.join(self._actuals_base_url, builder_name,
self._json_filename)
json_contents = urllib2.urlopen(json_url).read()
results_dict = gm_json.LoadFromString(json_contents)
actual_results_dict = results_dict[gm_json.JSONKEY_ACTUALRESULTS]
for result_type in sorted(actual_results_dict.keys()):
results_of_this_type = actual_results_dict[result_type]
if not results_of_this_type:
continue
for image_name in sorted(results_of_this_type.keys()):
(test, config) = self._image_filename_re.match(image_name).groups()
(hash_type, hash_digest) = results_of_this_type[image_name]
source_url = gm_json.CreateGmActualUrl(
test_name=test, hash_type=hash_type, hash_digest=hash_digest,
gm_actuals_root_url=self._gm_actuals_root_url)
dest_path = os.path.join(dest_dir, config, test + '.png')
# TODO(epoger): To speed this up, we should only download files that
# we don't already have on local disk.
copy_contents(source_url=source_url, dest_path=dest_path,
create_subdirs_if_needed=True)
def create_filepath_url(filepath):
""" Returns a file:/// URL pointing at the given filepath on local disk.
For now, this is only used by unittests, but I anticipate it being useful
in production, as a way for developers to run rebaseline_server over locally
generated images.
TODO(epoger): Move this function, and copy_contents(), into a shared
utility module. They are generally useful.
Args:
filepath: string; path to a file on local disk (may be absolute or relative,
and the file does not need to exist)
Returns:
A file:/// URL pointing at the file. Regardless of whether filepath was
specified as a relative or absolute path, the URL will contain an
absolute path to the file.
Raises:
An Exception, if filepath is already a URL.
"""
if urlparse.urlparse(filepath).scheme:
raise Exception('"%s" is already a URL' % filepath)
return urlparse.urljoin(
'file:', urllib.pathname2url(os.path.abspath(filepath)))
def copy_contents(source_url, dest_path, create_subdirs_if_needed=False):
""" Copies the full contents of the URL 'source_url' into
filepath 'dest_path'.
Args:
source_url: string; complete URL to read from
dest_path: string; complete filepath to write to (may be absolute or
relative)
create_subdirs_if_needed: boolean; whether to create subdirectories as
needed to create dest_path
Raises:
Some subclass of Exception if unable to read source_url or write dest_path.
"""
if create_subdirs_if_needed:
dest_dir = os.path.dirname(dest_path)
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
with contextlib.closing(urllib.urlopen(source_url)) as source_handle:
with open(dest_path, 'wb') as dest_handle:
shutil.copyfileobj(fsrc=source_handle, fdst=dest_handle)
def gcs_list_bucket_contents(bucket, subdir=None):
""" Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
Uses the API documented at
https://developers.google.com/storage/docs/json_api/v1/objects/list
Args:
bucket: name of the Google Storage bucket
subdir: directory within the bucket to list, or None for root directory
"""
# The GCS command relies on the subdir name (if any) ending with a slash.
if subdir and not subdir.endswith('/'):
subdir += '/'
subdir_length = len(subdir) if subdir else 0
storage = build_service('storage', 'v1')
command = storage.objects().list(
bucket=bucket, delimiter='/', fields='items(name),prefixes',
prefix=subdir)
results = command.execute()
# The GCS command returned two subdicts:
# prefixes: the full path of every directory within subdir, with trailing '/'
# items: property dict for each file object within subdir
# (including 'name', which is full path of the object)
dirs = []
for dir_fullpath in results.get('prefixes', []):
dir_basename = dir_fullpath[subdir_length:]
dirs.append(dir_basename[:-1]) # strip trailing slash
files = []
for file_properties in results.get('items', []):
file_fullpath = file_properties['name']
file_basename = file_fullpath[subdir_length:]
files.append(file_basename)
return (dirs, files)
def main():
parser = optparse.OptionParser()
required_params = []
parser.add_option('--actuals-base-url',
action='store', type='string',
default=DEFAULT_ACTUALS_BASE_URL,
help=('Base URL from which to read files containing JSON '
'summaries of actual GM results; defaults to '
'"%default".'))
required_params.append('builder')
# TODO(epoger): Before https://codereview.chromium.org/309653005 , when this
# tool downloaded the JSON summaries from skia-autogen, it had the ability
# to get results as of a specific revision number. We should add similar
# functionality when retrieving the summaries from Google Storage.
parser.add_option('--builder',
action='store', type='string',
help=('REQUIRED: Which builder to download results for. '
'To see a list of builders, run with the '
'--list-builders option set.'))
required_params.append('dest_dir')
parser.add_option('--dest-dir',
action='store', type='string',
help=('REQUIRED: Directory where all images should be '
'written. If this directory does not exist yet, it '
'will be created.'))
parser.add_option('--json-filename',
action='store', type='string',
default=DEFAULT_JSON_FILENAME,
help=('JSON summary filename to read for each builder; '
'defaults to "%default".'))
parser.add_option('--list-builders', action='store_true',
help=('List all available builders.'))
(params, remaining_args) = parser.parse_args()
if params.list_builders:
dirs, _ = gcs_list_bucket_contents(bucket=GM_SUMMARIES_BUCKET)
print '\n'.join(dirs)
return
# Make sure all required options were set,
# and that there were no items left over in the command line.
for required_param in required_params:
if not getattr(params, required_param):
raise Exception('required option \'%s\' was not set' % required_param)
if len(remaining_args) is not 0:
raise Exception('extra items specified in the command line: %s' %
remaining_args)
downloader = Download(actuals_base_url=params.actuals_base_url)
downloader.fetch(builder_name=params.builder,
dest_dir=params.dest_dir)
if __name__ == '__main__':
main()