tools/binary_size/run_binary_size_analysis.py

   1 #!/usr/bin/env python
   2 # Copyright 2014 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Generate a spatial analysis against an arbitrary library.
   7
   8 To use, build the 'binary_size_tool' target. Then run this tool, passing
   9 in the location of the library to be analyzed along with any other options
  10 you desire.
  11 """
  12
  13 import collections
  14 import json
  15 import logging
  16 import multiprocessing
  17 import optparse
  18 import os
  19 import re
  20 import shutil
  21 import struct
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26
  27 import binary_size_utils
  28
  29 # This path change is not beautiful. Temporary (I hope) measure until
  30 # the chromium project has figured out a proper way to organize the
  31 # library of python tools. http://crbug.com/375725
  32 elf_symbolizer_path = os.path.abspath(os.path.join(
  33     os.path.dirname(__file__),
  34     '..',
  35     '..',
  36     'build',
  37     'android',
  38     'pylib'))
  39 sys.path.append(elf_symbolizer_path)
  40 import symbols.elf_symbolizer as elf_symbolizer  # pylint: disable=F0401
  41
  42
  43 # Node dictionary keys. These are output in json read by the webapp so
  44 # keep them short to save file size.
  45 # Note: If these change, the webapp must also change.
  46 NODE_TYPE_KEY = 'k'
  47 NODE_NAME_KEY = 'n'
  48 NODE_CHILDREN_KEY = 'children'
  49 NODE_SYMBOL_TYPE_KEY = 't'
  50 NODE_SYMBOL_SIZE_KEY = 'value'
  51 NODE_MAX_DEPTH_KEY = 'maxDepth'
  52 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
  53
  54 # The display name of the bucket where we put symbols without path.
  55 NAME_NO_PATH_BUCKET = '(No Path)'
  56
  57 # Try to keep data buckets smaller than this to avoid killing the
  58 # graphing lib.
  59 BIG_BUCKET_LIMIT = 3000
  60
  61
  62 # TODO(andrewhayden): Only used for legacy reports. Delete.
  63 def FormatBytes(byte_count):
  64   """Pretty-print a number of bytes."""
  65   if byte_count > 1e6:
  66     byte_count = byte_count / 1.0e6
  67     return '%.1fm' % byte_count
  68   if byte_count > 1e3:
  69     byte_count = byte_count / 1.0e3
  70     return '%.1fk' % byte_count
  71   return str(byte_count)
  72
  73
  74 # TODO(andrewhayden): Only used for legacy reports. Delete.
  75 def SymbolTypeToHuman(symbol_type):
  76   """Convert a symbol type as printed by nm into a human-readable name."""
  77   return {'b': 'bss',
  78           'd': 'data',
  79           'r': 'read-only data',
  80           't': 'code',
  81           'w': 'weak symbol',
  82           'v': 'weak symbol'}[symbol_type]
  83
  84
  85 def _MkChild(node, name):
  86   child = node[NODE_CHILDREN_KEY].get(name)
  87   if child is None:
  88     child = {NODE_NAME_KEY: name,
  89              NODE_CHILDREN_KEY: {}}
  90     node[NODE_CHILDREN_KEY][name] = child
  91   return child
  92
  93
  94
  95 def SplitNoPathBucket(node):
  96   """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
  97   handle. Split it into sub-buckets in that case."""
  98   root_children = node[NODE_CHILDREN_KEY]
  99   if NAME_NO_PATH_BUCKET in root_children:
 100     no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
 101     old_children = no_path_bucket[NODE_CHILDREN_KEY]
 102     count = 0
 103     for symbol_type, symbol_bucket in old_children.iteritems():
 104       count += len(symbol_bucket[NODE_CHILDREN_KEY])
 105     if count > BIG_BUCKET_LIMIT:
 106       new_children = {}
 107       no_path_bucket[NODE_CHILDREN_KEY] = new_children
 108       current_bucket = None
 109       index = 0
 110       for symbol_type, symbol_bucket in old_children.iteritems():
 111         for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
 112           if index % BIG_BUCKET_LIMIT == 0:
 113             group_no = (index / BIG_BUCKET_LIMIT) + 1
 114             current_bucket = _MkChild(no_path_bucket,
 115                                       '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
 116                                                           group_no))
 117             assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
 118             node[NODE_TYPE_KEY] = 'p'  # p for path
 119           index += 1
 120           symbol_size = value[NODE_SYMBOL_SIZE_KEY]
 121           AddSymbolIntoFileNode(current_bucket, symbol_type,
 122                                 symbol_name, symbol_size)
 123
 124
 125 def MakeChildrenDictsIntoLists(node):
 126   largest_list_len = 0
 127   if NODE_CHILDREN_KEY in node:
 128     largest_list_len = len(node[NODE_CHILDREN_KEY])
 129     child_list = []
 130     for child in node[NODE_CHILDREN_KEY].itervalues():
 131       child_largest_list_len = MakeChildrenDictsIntoLists(child)
 132       if child_largest_list_len > largest_list_len:
 133         largest_list_len = child_largest_list_len
 134       child_list.append(child)
 135     node[NODE_CHILDREN_KEY] = child_list
 136
 137   return largest_list_len
 138
 139
 140 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
 141   """Puts symbol into the file path node |node|.
 142   Returns the number of added levels in tree. I.e. returns 2."""
 143
 144   # 'node' is the file node and first step is to find its symbol-type bucket.
 145   node[NODE_LAST_PATH_ELEMENT_KEY] = True
 146   node = _MkChild(node, symbol_type)
 147   assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
 148   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
 149   node[NODE_TYPE_KEY] = 'b'  # b for bucket
 150
 151   # 'node' is now the symbol-type bucket. Make the child entry.
 152   node = _MkChild(node, symbol_name)
 153   if NODE_CHILDREN_KEY in node:
 154     if node[NODE_CHILDREN_KEY]:
 155       logging.warning('A container node used as symbol for %s.' % symbol_name)
 156     # This is going to be used as a leaf so no use for child list.
 157     del node[NODE_CHILDREN_KEY]
 158   node[NODE_SYMBOL_SIZE_KEY] = symbol_size
 159   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
 160   node[NODE_TYPE_KEY] = 's'  # s for symbol
 161
 162   return 2  # Depth of the added subtree.
 163
 164
 165 def MakeCompactTree(symbols, symbol_path_origin_dir):
 166   result = {NODE_NAME_KEY: '/',
 167             NODE_CHILDREN_KEY: {},
 168             NODE_TYPE_KEY: 'p',
 169             NODE_MAX_DEPTH_KEY: 0}
 170   seen_symbol_with_path = False
 171   cwd = os.path.abspath(os.getcwd())
 172   for symbol_name, symbol_type, symbol_size, file_path, _address in symbols:
 173
 174     if 'vtable for ' in symbol_name:
 175       symbol_type = '@'  # hack to categorize these separately
 176     # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']
 177     if file_path and file_path != "??":
 178       file_path = os.path.abspath(os.path.join(symbol_path_origin_dir,
 179                                                file_path))
 180       # Let the output structure be relative to $CWD if inside $CWD,
 181       # otherwise relative to the disk root. This is to avoid
 182       # unnecessary click-through levels in the output.
 183       if file_path.startswith(cwd + os.sep):
 184         file_path = file_path[len(cwd):]
 185       if file_path.startswith('/'):
 186         file_path = file_path[1:]
 187       seen_symbol_with_path = True
 188     else:
 189       file_path = NAME_NO_PATH_BUCKET
 190
 191     path_parts = file_path.split('/')
 192
 193     # Find pre-existing node in tree, or update if it already exists
 194     node = result
 195     depth = 0
 196     while len(path_parts) > 0:
 197       path_part = path_parts.pop(0)
 198       if len(path_part) == 0:
 199         continue
 200       depth += 1
 201       node = _MkChild(node, path_part)
 202       assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
 203       node[NODE_TYPE_KEY] = 'p'  # p for path
 204
 205     depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
 206     result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
 207
 208   if not seen_symbol_with_path:
 209     logging.warning('Symbols lack paths. Data will not be structured.')
 210
 211   # The (no path) bucket can be extremely large if we failed to get
 212   # path information. Split it into subgroups if needed.
 213   SplitNoPathBucket(result)
 214
 215   largest_list_len = MakeChildrenDictsIntoLists(result)
 216
 217   if largest_list_len > BIG_BUCKET_LIMIT:
 218     logging.warning('There are sections with %d nodes. '
 219                     'Results might be unusable.' % largest_list_len)
 220   return result
 221
 222
 223 def DumpCompactTree(symbols, symbol_path_origin_dir, outfile):
 224   tree_root = MakeCompactTree(symbols, symbol_path_origin_dir)
 225   with open(outfile, 'w') as out:
 226     out.write('var tree_data=')
 227     # Use separators without whitespace to get a smaller file.
 228     json.dump(tree_root, out, separators=(',', ':'))
 229   print('Writing %d bytes json' % os.path.getsize(outfile))
 230
 231
 232 def MakeSourceMap(symbols):
 233   sources = {}
 234   for _sym, _symbol_type, size, path, _address in symbols:
 235     key = None
 236     if path:
 237       key = os.path.normpath(path)
 238     else:
 239       key = '[no path]'
 240     if key not in sources:
 241       sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
 242     record = sources[key]
 243     record['size'] += size
 244     record['symbol_count'] += 1
 245   return sources
 246
 247
 248 # Regex for parsing "nm" output. A sample line looks like this:
 249 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
 250 #
 251 # The fields are: address, size, type, name, source location
 252 # Regular expression explained ( see also: https://xkcd.com/208 ):
 253 # ([0-9a-f]{8,}+)   The address
 254 # [\s]+             Whitespace separator
 255 # ([0-9a-f]{8,}+)   The size. From here on out it's all optional.
 256 # [\s]+             Whitespace separator
 257 # (\S?)             The symbol type, which is any non-whitespace char
 258 # [\s*]             Whitespace separator
 259 # ([^\t]*)          Symbol name, any non-tab character (spaces ok!)
 260 # [\t]?             Tab separator
 261 # (.*)              The location (filename[:linennum|?][ (discriminator n)]
 262 sNmPattern = re.compile(
 263   r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
 264
 265 class Progress():
 266   def __init__(self):
 267     self.count = 0
 268     self.skip_count = 0
 269     self.collisions = 0
 270     self.time_last_output = time.time()
 271     self.count_last_output = 0
 272     self.disambiguations = 0
 273     self.was_ambiguous = 0
 274
 275
 276 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
 277                      disambiguate, src_path):
 278   nm_output = RunNm(library, nm_binary)
 279   nm_output_lines = nm_output.splitlines()
 280   nm_output_lines_len = len(nm_output_lines)
 281   address_symbol = {}
 282   progress = Progress()
 283   def map_address_symbol(symbol, addr):
 284     progress.count += 1
 285     if addr in address_symbol:
 286       # 'Collision between %s and %s.' % (str(symbol.name),
 287       #                                   str(address_symbol[addr].name))
 288       progress.collisions += 1
 289     else:
 290       if symbol.disambiguated:
 291         progress.disambiguations += 1
 292       if symbol.was_ambiguous:
 293         progress.was_ambiguous += 1
 294
 295       address_symbol[addr] = symbol
 296
 297     progress_output()
 298
 299   def progress_output():
 300     progress_chunk = 100
 301     if progress.count % progress_chunk == 0:
 302       time_now = time.time()
 303       time_spent = time_now - progress.time_last_output
 304       if time_spent > 1.0:
 305         # Only output at most once per second.
 306         progress.time_last_output = time_now
 307         chunk_size = progress.count - progress.count_last_output
 308         progress.count_last_output = progress.count
 309         if time_spent > 0:
 310           speed = chunk_size / time_spent
 311         else:
 312           speed = 0
 313         progress_percent = (100.0 * (progress.count + progress.skip_count) /
 314                             nm_output_lines_len)
 315         disambiguation_percent = 0
 316         if progress.disambiguations != 0:
 317           disambiguation_percent = (100.0 * progress.disambiguations /
 318                                     progress.was_ambiguous)
 319
 320         sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, '
 321               '%d disambiguations where %.1f%% succeeded)'
 322               ' - %.1f lookups/s.' %
 323               (progress_percent, progress.count, progress.collisions,
 324                progress.disambiguations, disambiguation_percent, speed))
 325
 326   # In case disambiguation was disabled, we remove the source path (which upon
 327   # being set signals the symbolizer to enable disambiguation)
 328   if not disambiguate:
 329     src_path = None
 330   symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
 331                                             map_address_symbol,
 332                                             max_concurrent_jobs=jobs,
 333                                             source_root_path=src_path)
 334   user_interrupted = False
 335   try:
 336     for line in nm_output_lines:
 337       match = sNmPattern.match(line)
 338       if match:
 339         location = match.group(5)
 340         if not location:
 341           addr = int(match.group(1), 16)
 342           size = int(match.group(2), 16)
 343           if addr in address_symbol:  # Already looked up, shortcut
 344                                       # ELFSymbolizer.
 345             map_address_symbol(address_symbol[addr], addr)
 346             continue
 347           elif size == 0:
 348             # Save time by not looking up empty symbols (do they even exist?)
 349             print('Empty symbol: ' + line)
 350           else:
 351             symbolizer.SymbolizeAsync(addr, addr)
 352             continue
 353
 354       progress.skip_count += 1
 355   except KeyboardInterrupt:
 356     user_interrupted = True
 357     print('Interrupting - killing subprocesses. Please wait.')
 358
 359   try:
 360     symbolizer.Join()
 361   except KeyboardInterrupt:
 362     # Don't want to abort here since we will be finished in a few seconds.
 363     user_interrupted = True
 364     print('Patience you must have my young padawan.')
 365
 366   print ''
 367
 368   if user_interrupted:
 369     print('Skipping the rest of the file mapping. '
 370           'Output will not be fully classified.')
 371
 372   symbol_path_origin_dir = os.path.dirname(os.path.abspath(library))
 373
 374   with open(outfile, 'w') as out:
 375     for line in nm_output_lines:
 376       match = sNmPattern.match(line)
 377       if match:
 378         location = match.group(5)
 379         if not location:
 380           addr = int(match.group(1), 16)
 381           symbol = address_symbol.get(addr)
 382           if symbol is not None:
 383             path = '??'
 384             if symbol.source_path is not None:
 385               path = os.path.abspath(os.path.join(symbol_path_origin_dir,
 386                                                   symbol.source_path))
 387             line_number = 0
 388             if symbol.source_line is not None:
 389               line_number = symbol.source_line
 390             out.write('%s\t%s:%d\n' % (line, path, line_number))
 391             continue
 392
 393       out.write('%s\n' % line)
 394
 395   print('%d symbols in the results.' % len(address_symbol))
 396
 397
 398 def RunNm(binary, nm_binary):
 399   cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
 400          binary]
 401   nm_process = subprocess.Popen(cmd,
 402                                 stdout=subprocess.PIPE,
 403                                 stderr=subprocess.PIPE)
 404   (process_output, err_output) = nm_process.communicate()
 405
 406   if nm_process.returncode != 0:
 407     if err_output:
 408       raise Exception, err_output
 409     else:
 410       raise Exception, process_output
 411
 412   return process_output
 413
 414
 415 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
 416                  addr2line_binary, nm_binary, disambiguate, src_path):
 417   if nm_infile is None:
 418     if outfile is None:
 419       outfile = tempfile.NamedTemporaryFile(delete=False).name
 420
 421     if verbose:
 422       print 'Running parallel addr2line, dumping symbols to ' + outfile
 423     RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
 424                      disambiguate, src_path)
 425
 426     nm_infile = outfile
 427
 428   elif verbose:
 429     print 'Using nm input from ' + nm_infile
 430   with file(nm_infile, 'r') as infile:
 431     return list(binary_size_utils.ParseNm(infile))
 432
 433
 434 PAK_RESOURCE_ID_TO_STRING = { "inited": False }
 435
 436 def LoadPakIdsFromResourceFile(filename):
 437   """Given a file name, it loads everything that looks like a resource id
 438   into PAK_RESOURCE_ID_TO_STRING."""
 439   with open(filename) as resource_header:
 440     for line in resource_header:
 441       if line.startswith("#define "):
 442         line_data = line.split()
 443         if len(line_data) == 3:
 444           try:
 445             resource_number = int(line_data[2])
 446             resource_name = line_data[1]
 447             PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name
 448           except ValueError:
 449             pass
 450
 451 def GetReadablePakResourceName(pak_file, resource_id):
 452   """Pak resources have a numeric identifier. It is not helpful when
 453   trying to locate where footprint is generated. This does its best to
 454   map the number to a usable string."""
 455   if not PAK_RESOURCE_ID_TO_STRING['inited']:
 456     # Try to find resource header files generated by grit when
 457     # building the pak file. We'll look for files named *resources.h"
 458     # and lines of the type:
 459     #    #define MY_RESOURCE_JS 1234
 460     PAK_RESOURCE_ID_TO_STRING['inited'] = True
 461     gen_dir = os.path.join(os.path.dirname(pak_file), 'gen')
 462     if os.path.isdir(gen_dir):
 463       for dirname, _dirs, files in os.walk(gen_dir):
 464         for filename in files:
 465           if filename.endswith('resources.h'):
 466             LoadPakIdsFromResourceFile(os.path.join(dirname, filename))
 467   return PAK_RESOURCE_ID_TO_STRING.get(resource_id,
 468                                        'Pak Resource %d' % resource_id)
 469
 470 def AddPakData(symbols, pak_file):
 471   """Adds pseudo-symbols from a pak file."""
 472   pak_file = os.path.abspath(pak_file)
 473   with open(pak_file, 'rb') as pak:
 474     data = pak.read()
 475
 476   PAK_FILE_VERSION = 4
 477   HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries)
 478                              # and one uint8 (encoding of text resources)
 479   INDEX_ENTRY_SIZE = 2 + 4  # Each entry is a uint16 and a uint32.
 480   version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
 481   assert version == PAK_FILE_VERSION, ('Unsupported pak file '
 482                                        'version (%d) in %s. Only '
 483                                        'support version %d' %
 484                                        (version, pak_file, PAK_FILE_VERSION))
 485   if num_entries > 0:
 486     # Read the index and data.
 487     data = data[HEADER_LENGTH:]
 488     for _ in range(num_entries):
 489       resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
 490       data = data[INDEX_ENTRY_SIZE:]
 491       _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
 492       resource_size = next_offset - offset
 493
 494       symbol_name = GetReadablePakResourceName(pak_file, resource_id)
 495       symbol_path = pak_file
 496       symbol_type = 'd' # Data. Approximation.
 497       symbol_size = resource_size
 498       symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))
 499
 500 def _find_in_system_path(binary):
 501   """Locate the full path to binary in the system path or return None
 502   if not found."""
 503   system_path = os.environ["PATH"].split(os.pathsep)
 504   for path in system_path:
 505     binary_path = os.path.join(path, binary)
 506     if os.path.isfile(binary_path):
 507       return binary_path
 508   return None
 509
 510 def CheckDebugFormatSupport(library, addr2line_binary):
 511   """Kills the program if debug data is in an unsupported format.
 512
 513   There are two common versions of the DWARF debug formats and
 514   since we are right now transitioning from DWARF2 to newer formats,
 515   it's possible to have a mix of tools that are not compatible. Detect
 516   that and abort rather than produce meaningless output."""
 517   tool_output = subprocess.check_output([addr2line_binary, '--version'])
 518   version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
 519   parsed_output = version_re.match(tool_output)
 520   major = int(parsed_output.group(1))
 521   minor = int(parsed_output.group(2))
 522   supports_dwarf4 = major > 2 or major == 2 and minor > 22
 523
 524   if supports_dwarf4:
 525     return
 526
 527   print('Checking version of debug information in %s.' % library)
 528   debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
 529                                        '--dwarf-depth=1', library])
 530   dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
 531   parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
 532   version = int(parsed_dwarf_format_output.group(1))
 533   if version > 2:
 534     print('The supplied tools only support DWARF2 debug data but the binary\n' +
 535           'uses DWARF%d. Update the tools or compile the binary\n' % version +
 536           'with -gdwarf-2.')
 537     sys.exit(1)
 538
 539
 540 def main():
 541   usage = """%prog [options]
 542
 543   Runs a spatial analysis on a given library, looking up the source locations
 544   of its symbols and calculating how much space each directory, source file,
 545   and so on is taking. The result is a report that can be used to pinpoint
 546   sources of large portions of the binary, etceteras.
 547
 548   Under normal circumstances, you only need to pass two arguments, thusly:
 549
 550       %prog --library /path/to/library --destdir /path/to/output
 551
 552   In this mode, the program will dump the symbols from the specified library
 553   and map those symbols back to source locations, producing a web-based
 554   report in the specified output directory.
 555
 556   Other options are available via '--help'.
 557   """
 558   parser = optparse.OptionParser(usage=usage)
 559   parser.add_option('--nm-in', metavar='PATH',
 560                     help='if specified, use nm input from <path> instead of '
 561                     'generating it. Note that source locations should be '
 562                     'present in the file; i.e., no addr2line symbol lookups '
 563                     'will be performed when this option is specified. '
 564                     'Mutually exclusive with --library.')
 565   parser.add_option('--destdir', metavar='PATH',
 566                     help='write output to the specified directory. An HTML '
 567                     'report is generated here along with supporting files; '
 568                     'any existing report will be overwritten.')
 569   parser.add_option('--library', metavar='PATH',
 570                     help='if specified, process symbols in the library at '
 571                     'the specified path. Mutually exclusive with --nm-in.')
 572   parser.add_option('--pak', metavar='PATH',
 573                     help='if specified, includes the contents of the '
 574                     'specified *.pak file in the output.')
 575   parser.add_option('--nm-binary',
 576                     help='use the specified nm binary to analyze library. '
 577                     'This is to be used when the nm in the path is not for '
 578                     'the right architecture or of the right version.')
 579   parser.add_option('--addr2line-binary',
 580                     help='use the specified addr2line binary to analyze '
 581                     'library. This is to be used when the addr2line in '
 582                     'the path is not for the right architecture or '
 583                     'of the right version.')
 584   parser.add_option('--jobs', type='int',
 585                     help='number of jobs to use for the parallel '
 586                     'addr2line processing pool; defaults to 1. More '
 587                     'jobs greatly improve throughput but eat RAM like '
 588                     'popcorn, and take several gigabytes each. Start low '
 589                     'and ramp this number up until your machine begins to '
 590                     'struggle with RAM. '
 591                     'This argument is only valid when using --library.')
 592   parser.add_option('-v', dest='verbose', action='store_true',
 593                     help='be verbose, printing lots of status information.')
 594   parser.add_option('--nm-out', metavar='PATH',
 595                     help='keep the nm output file, and store it at the '
 596                     'specified path. This is useful if you want to see the '
 597                     'fully processed nm output after the symbols have been '
 598                     'mapped to source locations. By default, a tempfile is '
 599                     'used and is deleted when the program terminates.'
 600                     'This argument is only valid when using --library.')
 601   parser.add_option('--legacy', action='store_true',
 602                     help='emit legacy binary size report instead of modern')
 603   parser.add_option('--disable-disambiguation', action='store_true',
 604                     help='disables the disambiguation process altogether,'
 605                     ' NOTE: this may, depending on your toolchain, produce'
 606                     ' output with some symbols at the top layer if addr2line'
 607                     ' could not get the entire source path.')
 608   parser.add_option('--source-path', default='./',
 609                     help='the path to the source code of the output binary, '
 610                     'default set to current directory. Used in the'
 611                     ' disambiguation process.')
 612   opts, _args = parser.parse_args()
 613
 614   if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
 615     parser.error('exactly one of --library or --nm-in is required')
 616   if (opts.nm_in):
 617     if opts.jobs:
 618       print >> sys.stderr, ('WARNING: --jobs has no effect '
 619                             'when used with --nm-in')
 620   if not opts.destdir:
 621     parser.error('--destdir is required argument')
 622   if not opts.jobs:
 623     # Use the number of processors but cap between 2 and 4 since raw
 624     # CPU power isn't the limiting factor. It's I/O limited, memory
 625     # bus limited and available-memory-limited. Too many processes and
 626     # the computer will run out of memory and it will be slow.
 627     opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
 628
 629   if opts.addr2line_binary:
 630     assert os.path.isfile(opts.addr2line_binary)
 631     addr2line_binary = opts.addr2line_binary
 632   else:
 633     addr2line_binary = _find_in_system_path('addr2line')
 634     assert addr2line_binary, 'Unable to find addr2line in the path. '\
 635         'Use --addr2line-binary to specify location.'
 636
 637   if opts.nm_binary:
 638     assert os.path.isfile(opts.nm_binary)
 639     nm_binary = opts.nm_binary
 640   else:
 641     nm_binary = _find_in_system_path('nm')
 642     assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
 643         'to specify location.'
 644
 645   if opts.pak:
 646     assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak
 647
 648   print('addr2line: %s' % addr2line_binary)
 649   print('nm: %s' % nm_binary)
 650
 651   if opts.library:
 652     CheckDebugFormatSupport(opts.library, addr2line_binary)
 653
 654   symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
 655                          opts.jobs, opts.verbose is True,
 656                          addr2line_binary, nm_binary,
 657                          opts.disable_disambiguation is None,
 658                          opts.source_path)
 659
 660   if opts.pak:
 661     AddPakData(symbols, opts.pak)
 662
 663   if not os.path.exists(opts.destdir):
 664     os.makedirs(opts.destdir, 0755)
 665
 666
 667   if opts.library:
 668     symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))
 669   else:
 670     # Just a guess. Hopefully all paths in the input file are absolute.
 671     symbol_path_origin_dir = os.path.abspath(os.getcwd())
 672   data_js_file_name = os.path.join(opts.destdir, 'data.js')
 673   DumpCompactTree(symbols, symbol_path_origin_dir, data_js_file_name)
 674   d3_out = os.path.join(opts.destdir, 'd3')
 675   if not os.path.exists(d3_out):
 676     os.makedirs(d3_out, 0755)
 677   d3_src = os.path.join(os.path.dirname(__file__),
 678                         '..',
 679                         '..',
 680                         'third_party', 'd3', 'src')
 681   template_src = os.path.join(os.path.dirname(__file__),
 682                               'template')
 683   shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)
 684   shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)
 685   shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)
 686   shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)
 687
 688   print 'Report saved to ' + opts.destdir + '/index.html'
 689
 690
 691 if __name__ == '__main__':
 692   sys.exit(main())