|  | #!/usr/bin/env python | 
|  | # Copyright (c) 2015 The Chromium Authors. All rights reserved. | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  |  | 
|  |  | 
|  | """Parse an LLVM coverage report to generate useable results.""" | 
|  |  | 
|  |  | 
|  | import argparse | 
|  | import json | 
|  | import os | 
|  | import re | 
|  | import subprocess | 
|  | import sys | 
|  |  | 
|  |  | 
|  | def _fix_filename(filename): | 
|  | """Return a filename which we can use to identify the file. | 
|  |  | 
|  | The file paths printed by llvm-cov take the form: | 
|  |  | 
|  | /path/to/repo/out/dir/../../src/filename.cpp | 
|  |  | 
|  | And then they're truncated to 22 characters with leading ellipses: | 
|  |  | 
|  | ...../../src/filename.cpp | 
|  |  | 
|  | This makes it really tough to determine whether the file actually belongs in | 
|  | the Skia repo.  This function strips out the leading junk so that, if the file | 
|  | exists in the repo, the returned string matches the end of some relative path | 
|  | in the repo. This doesn't guarantee correctness, but it's about as close as | 
|  | we can get. | 
|  | """ | 
|  | return filename.split('..')[-1].lstrip('./') | 
|  |  | 
|  |  | 
|  | def _file_in_repo(filename, all_files): | 
|  | """Return the name of the checked-in file matching the given filename. | 
|  |  | 
|  | Use suffix matching to determine which checked-in files the given filename | 
|  | matches. If there are no matches or multiple matches, return None. | 
|  | """ | 
|  | new_file = _fix_filename(filename) | 
|  | matched = [] | 
|  | for f in all_files: | 
|  | if f.endswith(new_file): | 
|  | matched.append(f) | 
|  | if len(matched) == 1: | 
|  | return matched[0] | 
|  | elif len(matched) > 1: | 
|  | print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s' | 
|  | % (new_file, '\n\t'.join(matched))) | 
|  | return None | 
|  |  | 
|  |  | 
|  | def _get_per_file_per_line_coverage(report): | 
|  | """Return a dict whose keys are file names and values are coverage data. | 
|  |  | 
|  | Values are lists which take the form (lineno, coverage, code). | 
|  | """ | 
|  | all_files = [] | 
|  | for root, dirs, files in os.walk(os.getcwd()): | 
|  | if 'third_party/externals' in root: | 
|  | continue | 
|  | files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))] | 
|  | dirs[:] = [d for d in dirs if not d[0] == '.'] | 
|  | for name in files: | 
|  | all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name)) | 
|  | all_files.sort() | 
|  |  | 
|  | lines = report.splitlines() | 
|  | current_file = None | 
|  | file_lines = [] | 
|  | files = {} | 
|  | not_checked_in = '%' # Use this as the file name for not-checked-in files. | 
|  | for line in lines: | 
|  | m = re.match('([a-zA-Z0-9\./_-]+):', line) | 
|  | if m: | 
|  | if current_file and current_file != not_checked_in: | 
|  | files[current_file] = file_lines | 
|  | match_filename = _file_in_repo(m.groups()[0], all_files) | 
|  | current_file = match_filename or not_checked_in | 
|  | file_lines = [] | 
|  | else: | 
|  | if current_file != not_checked_in: | 
|  | skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line) | 
|  | if line and not skip: | 
|  | cov, linenum, code = line.split('|', 2) | 
|  | cov = cov.strip() | 
|  | if cov: | 
|  | cov = int(cov) | 
|  | else: | 
|  | cov = None # We don't care about coverage for this line. | 
|  | linenum = int(linenum.strip()) | 
|  | assert linenum == len(file_lines) + 1 | 
|  | file_lines.append((linenum, cov, code.decode('utf-8', 'replace'))) | 
|  | return files | 
|  |  | 
|  |  | 
|  |  | 
|  | def _testname(filename): | 
|  | """Transform the file name into an ingestible test name.""" | 
|  | return re.sub(r'[^a-zA-Z0-9]', '_', filename) | 
|  |  | 
|  |  | 
|  | def _nanobench_json(results, properties, key): | 
|  | """Return the results in JSON format like that produced by nanobench.""" | 
|  | rv = {} | 
|  | # Copy over the properties first, then set the 'key' and 'results' keys, | 
|  | # in order to avoid bad formatting in case the user passes in a properties | 
|  | # dict containing those keys. | 
|  | rv.update(properties) | 
|  | rv['key'] = key | 
|  | rv['results'] = { | 
|  | _testname(f): { | 
|  | 'coverage': { | 
|  | 'percent': percent, | 
|  | 'lines_not_covered': not_covered_lines, | 
|  | 'options': { | 
|  | 'fullname': f, | 
|  | 'dir': os.path.dirname(f), | 
|  | 'source_type': 'coverage', | 
|  | }, | 
|  | }, | 
|  | } for percent, not_covered_lines, f in results | 
|  | } | 
|  | return rv | 
|  |  | 
|  |  | 
|  | def _parse_key_value(kv_list): | 
|  | """Return a dict whose key/value pairs are derived from the given list. | 
|  |  | 
|  | For example: | 
|  |  | 
|  | ['k1', 'v1', 'k2', 'v2'] | 
|  | becomes: | 
|  |  | 
|  | {'k1': 'v1', | 
|  | 'k2': 'v2'} | 
|  | """ | 
|  | if len(kv_list) % 2 != 0: | 
|  | raise Exception('Invalid key/value pairs: %s' % kv_list) | 
|  |  | 
|  | rv = {} | 
|  | for i in xrange(len(kv_list) / 2): | 
|  | rv[kv_list[i*2]] = kv_list[i*2+1] | 
|  | return rv | 
|  |  | 
|  |  | 
|  | def _get_per_file_summaries(line_by_line): | 
|  | """Summarize the full line-by-line coverage report by file.""" | 
|  | per_file = [] | 
|  | for filepath, lines in line_by_line.iteritems(): | 
|  | total_lines = 0 | 
|  | covered_lines = 0 | 
|  | for _, cov, _ in lines: | 
|  | if cov is not None: | 
|  | total_lines += 1 | 
|  | if cov > 0: | 
|  | covered_lines += 1 | 
|  | if total_lines > 0: | 
|  | per_file.append((float(covered_lines)/float(total_lines)*100.0, | 
|  | total_lines - covered_lines, | 
|  | filepath)) | 
|  | return per_file | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | """Generate useful data from a coverage report.""" | 
|  | # Parse args. | 
|  | parser = argparse.ArgumentParser() | 
|  | parser.add_argument('--report', help='input file; an llvm coverage report.', | 
|  | required=True) | 
|  | parser.add_argument('--nanobench', help='output file for nanobench data.') | 
|  | parser.add_argument( | 
|  | '--key', metavar='key_or_value', nargs='+', | 
|  | help='key/value pairs identifying this bot.') | 
|  | parser.add_argument( | 
|  | '--properties', metavar='key_or_value', nargs='+', | 
|  | help='key/value pairs representing properties of this build.') | 
|  | parser.add_argument('--linebyline', | 
|  | help='output file for line-by-line JSON data.') | 
|  | args = parser.parse_args() | 
|  |  | 
|  | if args.nanobench and not (args.key and args.properties): | 
|  | raise Exception('--key and --properties are required with --nanobench') | 
|  |  | 
|  | with open(args.report) as f: | 
|  | report = f.read() | 
|  |  | 
|  | line_by_line = _get_per_file_per_line_coverage(report) | 
|  |  | 
|  | if args.linebyline: | 
|  | with open(args.linebyline, 'w') as f: | 
|  | json.dump(line_by_line, f) | 
|  |  | 
|  | if args.nanobench: | 
|  | # Parse the key and properties for use in the nanobench JSON output. | 
|  | key = _parse_key_value(args.key) | 
|  | properties = _parse_key_value(args.properties) | 
|  |  | 
|  | # Get per-file summaries. | 
|  | per_file = _get_per_file_summaries(line_by_line) | 
|  |  | 
|  | # Write results. | 
|  | format_results = _nanobench_json(per_file, properties, key) | 
|  | with open(args.nanobench, 'w') as f: | 
|  | json.dump(format_results, f) | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main() |