| #!/usr/bin/env python |
| # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """This module contains utilities related to Google Storage manipulations.""" |
| |
| import os |
| import posixpath |
| import shutil |
| import tempfile |
| import time |
| |
| from common import chromium_utils |
| from slave import slave_utils |
| |
| import file_utils |
| import shell_utils |
| |
| |
| TIMESTAMP_STARTED_FILENAME = 'TIMESTAMP_LAST_UPLOAD_STARTED' |
| TIMESTAMP_COMPLETED_FILENAME = 'TIMESTAMP_LAST_UPLOAD_COMPLETED' |
| LAST_REBASELINED_BY_FILENAME = 'LAST_REBASELINED_BY' |
| |
| FILES_CHUNK = 500 |
| |
| |
| def delete_storage_object(object_name): |
| """Delete an object on Google Storage.""" |
| gsutil = slave_utils.GSUtilSetup() |
| command = [gsutil] |
| command.extend(['rm', '-R', object_name]) |
| print 'Running command: %s' % command |
| chromium_utils.RunCommand(command) |
| |
| |
| def copy_storage_directory(src_dir, dest_dir, gs_acl='private', |
| http_header_lines=None): |
| """Copy a directory from/to Google Storage. |
| |
| params: |
| src_dir |
| dest_dir |
| gs_acl |
| http_header_lines: a list of HTTP header strings to add, if any |
| |
| The copy operates as a "merge with overwrite": any files in src_dir will be |
| "overlaid" on top of the existing content in dest_dir. Existing files with |
| the same names will be overwritten. |
| """ |
| gsutil = slave_utils.GSUtilSetup() |
| command = [gsutil] |
| if http_header_lines: |
| for http_header_line in http_header_lines: |
| command.extend(['-h', http_header_line]) |
| command.extend(['cp', '-a', gs_acl, '-R', src_dir, dest_dir]) |
| print 'Running command: %s' % command |
| shell_utils.run(command) |
| |
| |
| def move_storage_directory(src_dir, dest_dir): |
| """Move a directory on Google Storage.""" |
| gsutil = slave_utils.GSUtilSetup() |
| command = [gsutil] |
| command.extend(['mv', '-p', src_dir, dest_dir]) |
| print 'Running command: %s' % command |
| chromium_utils.RunCommand(command) |
| |
| |
| def list_storage_directory(dest_gsbase, subdir): |
| """List the contents of the specified Storage directory.""" |
| gsbase_subdir = posixpath.join(dest_gsbase, subdir) |
| status, output_gsutil_ls = slave_utils.GSUtilListBucket(gsbase_subdir, []) |
| if status != 0: |
| raise Exception( |
| 'Could not list contents of %s in Google Storage!' % gsbase_subdir) |
| |
| gs_files = [] |
| for line in set(output_gsutil_ls.splitlines()): |
| # Ignore lines with warnings and status messages. |
| if line and line.startswith(gsbase_subdir) and line != gsbase_subdir: |
| gs_files.append(line) |
| return gs_files |
| |
| |
| def does_storage_object_exist(object_name): |
| """Checks if an object exists on Google Storage. |
| |
| Returns True if it exists else returns False. |
| """ |
| gsutil = slave_utils.GSUtilSetup() |
| command = [gsutil] |
| command.extend(['ls', object_name]) |
| print 'Running command: %s' % command |
| return chromium_utils.RunCommand(command) == 0 |
| |
| |
| def download_directory_contents_if_changed(gs_base, gs_relative_dir, local_dir): |
| """Compares the TIMESTAMP_LAST_UPLOAD_COMPLETED and downloads if different. |
| |
| The goal of download_directory_contents_if_changed and |
| upload_directory_contents_if_changed is to attempt to replicate directory |
| level rsync functionality to the Google Storage directories we care about. |
| """ |
| if _are_timestamps_equal(gs_base, gs_relative_dir, local_dir): |
| print '\n\n=======Local directory is current=======\n\n' |
| else: |
| file_utils.create_clean_local_dir(local_dir) |
| gs_source = posixpath.join(gs_base, gs_relative_dir, '*') |
| slave_utils.GSUtilDownloadFile(src=gs_source, dst=local_dir) |
| if not _are_timestamps_equal(gs_base, gs_relative_dir, local_dir): |
| raise Exception('Failed to download from GS: %s' % gs_source) |
| |
| |
| def _get_chunks(seq, n): |
| """Yield successive n-sized chunks from the specified sequence.""" |
| for i in xrange(0, len(seq), n): |
| yield seq[i:i+n] |
| |
| |
| def delete_directory_contents(gs_base, gs_relative_dir, files_to_delete): |
| """Deletes the specified files from the Google Storage Directory. |
| |
| Args: |
| gs_base: str - The Google Storage base. Eg: gs://rmistry. |
| gs_relative_dir: str - Relative directory to the Google Storage base. |
| files_to_delete: Files that should be deleted from the Google Storage |
| directory. The files are deleted one at a time. If files_to_delete is |
| None or empty then all directory contents are deleted. |
| """ |
| gs_dest = posixpath.join(gs_base, gs_relative_dir) |
| if files_to_delete: |
| for file_to_delete in files_to_delete: |
| delete_storage_object(object_name=posixpath.join(gs_dest, file_to_delete)) |
| else: |
| delete_storage_object(gs_dest) |
| |
| |
| def upload_directory_contents_if_changed(gs_base, gs_relative_dir, gs_acl, |
| local_dir, force_upload=False, |
| upload_chunks=False, |
| files_to_upload=None): |
| """Compares the TIMESTAMP_LAST_UPLOAD_COMPLETED and uploads if different. |
| |
| Args: |
| gs_base: str - The Google Storage base. Eg: gs://rmistry. |
| gs_relative_dir: str - Relative directory to the Google Storage base. |
| gs_acl: str - ACL to use when uploading to Google Storage. |
| local_dir: str - The local directory to upload. |
| force_upload: bool - Whether upload should be done regardless of timestamps |
| matching or not. |
| upload_chunks: bool - Whether upload should be done in chunks or in a single |
| command. |
| files_to_upload: str seq - Specific files that should be uploaded, if not |
| specified then all files in local_dir are uploaded. If upload_chunks is |
| True then files will be uploaded in chunks else they will be uploaded |
| one at a time. The Google Storage directory is not cleaned before upload |
| if files_to_upload is specified. |
| |
| The goal of download_directory_contents_if_changed and |
| upload_directory_contents_if_changed is to attempt to replicate directory |
| level rsync functionality to the Google Storage directories we care about. |
| |
| Returns True if contents were uploaded, else returns False. |
| """ |
| if not force_upload and _are_timestamps_equal(gs_base, gs_relative_dir, |
| local_dir): |
| print '\n\n=======Local directory is current=======\n\n' |
| return False |
| else: |
| local_src = os.path.join(local_dir, '*') |
| gs_dest = posixpath.join(gs_base, gs_relative_dir) |
| timestamp_value = time.time() |
| |
| if not files_to_upload: |
| print '\n\n=======Delete Storage directory before uploading=======\n\n' |
| delete_storage_object(gs_dest) |
| |
| print '\n\n=======Writing new TIMESTAMP_LAST_UPLOAD_STARTED=======\n\n' |
| write_timestamp_file( |
| timestamp_file_name=TIMESTAMP_STARTED_FILENAME, |
| timestamp_value=timestamp_value, gs_base=gs_base, |
| gs_relative_dir=gs_relative_dir, local_dir=local_dir, gs_acl=gs_acl) |
| |
| if upload_chunks: |
| if files_to_upload: |
| local_files = [ |
| os.path.join(local_dir, local_file) |
| for local_file in files_to_upload] |
| else: |
| local_files = [ |
| os.path.join(local_dir, local_file) |
| for local_file in os.listdir(local_dir)] |
| for files_chunk in _get_chunks(local_files, FILES_CHUNK): |
| gsutil = slave_utils.GSUtilSetup() |
| command = [gsutil, 'cp'] + files_chunk + [gs_dest] |
| if chromium_utils.RunCommand(command) != 0: |
| raise Exception( |
| 'Could not upload the chunk to Google Storage! The chunk: %s' |
| % files_chunk) |
| else: |
| if files_to_upload: |
| for file_to_upload in files_to_upload: |
| if slave_utils.GSUtilDownloadFile( |
| src=os.path.join(local_dir, file_to_upload), dst=gs_dest) != 0: |
| raise Exception( |
| 'Could not upload %s to Google Storage!' % file_to_upload) |
| else: |
| if slave_utils.GSUtilDownloadFile(src=local_src, dst=gs_dest) != 0: |
| raise Exception('Could not upload %s to Google Storage!' % local_src) |
| |
| print '\n\n=======Writing new TIMESTAMP_LAST_UPLOAD_COMPLETED=======\n\n' |
| write_timestamp_file( |
| timestamp_file_name=TIMESTAMP_COMPLETED_FILENAME, |
| timestamp_value=timestamp_value, gs_base=gs_base, |
| gs_relative_dir=gs_relative_dir, local_dir=local_dir, gs_acl=gs_acl) |
| return True |
| |
| |
| def _are_timestamps_equal(gs_base, gs_relative_dir, local_dir): |
| """Compares the local TIMESTAMP with the TIMESTAMP from Google Storage.""" |
| |
| local_timestamp_file = os.path.join(local_dir, TIMESTAMP_COMPLETED_FILENAME) |
| # Make sure that the local TIMESTAMP file exists. |
| if not os.path.exists(local_timestamp_file): |
| return False |
| |
| # Get the timestamp file from Google Storage. |
| src = posixpath.join(gs_base, gs_relative_dir, TIMESTAMP_COMPLETED_FILENAME) |
| temp_file = tempfile.mkstemp()[1] |
| slave_utils.GSUtilDownloadFile(src=src, dst=temp_file) |
| |
| local_file_obj = open(local_timestamp_file, 'r') |
| storage_file_obj = open(temp_file, 'r') |
| try: |
| local_timestamp = local_file_obj.read().strip() |
| storage_timestamp = storage_file_obj.read().strip() |
| return local_timestamp == storage_timestamp |
| finally: |
| local_file_obj.close() |
| storage_file_obj.close() |
| |
| |
| def read_timestamp_file(timestamp_file_name, gs_base, gs_relative_dir): |
| """Reads the specified TIMESTAMP file from the specified GS dir. |
| |
| Returns 0 if the file is empty or does not exist. |
| """ |
| src = posixpath.join(gs_base, gs_relative_dir, timestamp_file_name) |
| temp_file = tempfile.mkstemp()[1] |
| slave_utils.GSUtilDownloadFile(src=src, dst=temp_file) |
| |
| storage_file_obj = open(temp_file, 'r') |
| try: |
| timestamp_value = storage_file_obj.read().strip() |
| return timestamp_value if timestamp_value else "0" |
| finally: |
| storage_file_obj.close() |
| |
| |
| def write_timestamp_file(timestamp_file_name, timestamp_value, gs_base=None, |
| gs_relative_dir=None, gs_acl=None, local_dir=None): |
| """Adds a timestamp file to a Google Storage and/or a Local Directory. |
| |
| If gs_base, gs_relative_dir and gs_acl are provided then the timestamp is |
| written to Google Storage. If local_dir is provided then the timestamp is |
| written to a local directory. |
| """ |
| timestamp_file = os.path.join(tempfile.gettempdir(), timestamp_file_name) |
| f = open(timestamp_file, 'w') |
| try: |
| f.write(str(timestamp_value)) |
| finally: |
| f.close() |
| if local_dir: |
| shutil.copyfile(timestamp_file, |
| os.path.join(local_dir, timestamp_file_name)) |
| if gs_base and gs_relative_dir and gs_acl: |
| slave_utils.GSUtilCopyFile(filename=timestamp_file, gs_base=gs_base, |
| subdir=gs_relative_dir, gs_acl=gs_acl) |