| #!/usr/bin/env python3 |
| |
| # Copyright 2021 Google LLC |
| # |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| |
| # This script is written to process the output from bloaty, read via stdin |
| # The easiest way to use the script: |
| # |
| # bloaty <path_to_binary> -d compileunits,symbols -n 0 --tsv | bloaty_treemap.py > bloaty.html |
| # |
| # Open the resulting .html file in your browser. |
| |
| # TODO: Deal with symbols vs. fullsymbols, even both? |
| # TODO: Support aggregation by scope, rather than file (split C++ identifiers on '::') |
| # TODO: Deal with duplicate symbols better. These are actually good targets for optimization. |
| # They are sometimes static functions in headers (so they appear in multiple .o files), |
| # There are also symbols that appear multiple times due to inlining (eg, kNoCropRect). |
| # TODO: Figure out why some symbols are misattributed. Eg, Swizzle::Convert and ::Make are tied |
| # to the header by nm, and then to one caller (at random) by bloaty. They're not inlined, |
| # though. Unless LTO is doing something wacky here? Scope-aggregation may be the answer? |
| # Ultimately, this seems like an issue with bloaty and/or debug information itself. |
| |
| import os |
| import sys |
| |
| parent_map = {} |
| |
| # For a given filepath "foo/bar/baz.cpp", `add_path` outputs rows to the data table |
| # establishing the node hierarchy, and ensures that each line is emitted exactly once: |
| # |
| # ['foo/bar/baz.cpp', 'foo/bar', 0], |
| # ['foo/bar', 'foo', 0], |
| # ['foo', 'ROOT', 0], |
| def add_path(path): |
| if not path in parent_map: |
| head = os.path.split(path)[0] |
| if not head: |
| parent_map[path] = "ROOT" |
| else: |
| add_path(head) |
| parent_map[path] = head |
| |
| # We add a suffix to paths to eliminate the chances of a path name colliding with a symbol |
| # name. This is important because google.visualization.TreeMap requires node names to be |
| # unique, and a file such as test/foo/bar.cpp would create a node named "test", which could |
| # collide with a symbol named "test" defined in a C++ file. |
| # |
| # Assumptions made: |
| # - No C++ symbol ends with " (Path)". |
| # - No C++ symbol is named "ROOT". |
| parent = parent_map[path] |
| if parent != "ROOT": parent = "%s (Path)" % parent |
| print("['%s (Path)', '%s', 0]," % (path, parent)) |
| |
| def main(): |
| # HTML/script header, plus the first two (fixed) rows of the data table |
| print(""" |
| <html> |
| <head> |
| <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
| <script type="text/javascript"> |
| google.charts.load('current', {'packages':['treemap']}); |
| google.charts.setOnLoadCallback(drawChart); |
| function drawChart() { |
| const data = google.visualization.arrayToDataTable([ |
| ['Name', 'Parent', 'Size'], |
| ['ROOT', null, 0],""") |
| |
| symbol_frequencies = {} |
| |
| # Skip header row |
| # TODO: In the future, we could use this to automatically detect the source columns |
| next(sys.stdin) |
| |
| for line in sys.stdin: |
| vals = line.rstrip().split("\t") |
| if len(vals) != 4: |
| print("ERROR: Failed to match line\n" + line) |
| sys.exit(1) |
| (filepath, symbol, vmsize, filesize) = vals |
| |
| # Skip any entry where the filepath or symbol starts with '[' |
| # These tend to be section meta-data and debug information |
| if filepath.startswith("[") or symbol.startswith("["): |
| continue |
| |
| # Strip the leading ../../ from paths |
| while filepath.startswith("../"): |
| filepath = filepath[3:]; |
| |
| # Files in third_party sometimes have absolute paths. Strip those: |
| if filepath.startswith("/"): |
| rel_path_start = filepath.find("third_party") |
| if rel_path_start >= 0: |
| filepath = filepath[rel_path_start:] |
| else: |
| print("ERROR: Unexpected absolute path:\n" + filepath) |
| sys.exit(1) |
| |
| # Symbols involving C++ lambdas can contain single quotes |
| symbol = symbol.replace("'", "\\'") |
| |
| # Ensure that we've added intermediate nodes for all portions of this file path |
| add_path(filepath) |
| |
| # Ensure that our final symbol name is unique (a repeated "foo" symbol becomes "foo_1", |
| # "foo_2", etc.) |
| if symbol not in symbol_frequencies: |
| symbol_frequencies[symbol] = 1 |
| else: |
| freq = symbol_frequencies[symbol] |
| symbol_frequencies[symbol] = freq + 1 |
| symbol += "_" + str(freq) |
| |
| # Append another row for our sanitized data |
| print("['%s', '%s (Path)', %d]," % (symbol, filepath, int(filesize))) |
| |
| # HTML/script footer |
| print(""" ]); |
| tree = new google.visualization.TreeMap(document.getElementById('chart_div')); |
| tree.draw(data, { |
| generateTooltip: showTooltip |
| }); |
| |
| function showTooltip(row, size, value) { |
| const escapedLabel = data.getValue(row, 0) |
| .replace('&', '&') |
| .replace('<', '<') |
| .replace('>', '>') |
| return `<div style="background:#fd9; padding:10px; border-style:solid"> |
| <span style="font-family:Courier"> ${escapedLabel} <br> |
| Size: ${size} </div>`; |
| } |
| } |
| </script> |
| </head> |
| <body> |
| <div id="chart_div" style="width: 100%; height: 100%;"></div> |
| </body> |
| </html>""") |
| |
| if __name__ == "__main__": |
| main() |