tools/binary_size/run_binary_size_analysis.py

   1 #!/usr/bin/env python
   2 # Copyright 2014 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Generate a spatial analysis against an arbitrary library.
   7
   8 To use, build the 'binary_size_tool' target. Then run this tool, passing
   9 in the location of the library to be analyzed along with any other options
  10 you desire.
  11 """
  12
  13 import collections
  14 import json
  15 import logging
  16 import multiprocessing
  17 import optparse
  18 import os
  19 import re
  20 import shutil
  21 import struct
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26
  27 import binary_size_utils
  28
  29 # This path change is not beautiful. Temporary (I hope) measure until
  30 # the chromium project has figured out a proper way to organize the
  31 # library of python tools. http://crbug.com/375725
  32 elf_symbolizer_path = os.path.abspath(os.path.join(
  33     os.path.dirname(__file__),
  34     '..',
  35     '..',
  36     'build',
  37     'android',
  38     'pylib'))
  39 sys.path.append(elf_symbolizer_path)
  40 import symbols.elf_symbolizer as elf_symbolizer  # pylint: disable=F0401
  41
  42
  43 # Node dictionary keys. These are output in json read by the webapp so
  44 # keep them short to save file size.
  45 # Note: If these change, the webapp must also change.
  46 NODE_TYPE_KEY = 'k'
  47 NODE_NAME_KEY = 'n'
  48 NODE_CHILDREN_KEY = 'children'
  49 NODE_SYMBOL_TYPE_KEY = 't'
  50 NODE_SYMBOL_SIZE_KEY = 'value'
  51 NODE_MAX_DEPTH_KEY = 'maxDepth'
  52 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
  53
  54 # The display name of the bucket where we put symbols without path.
  55 NAME_NO_PATH_BUCKET = '(No Path)'
  56
  57 # Try to keep data buckets smaller than this to avoid killing the
  58 # graphing lib.
  59 BIG_BUCKET_LIMIT = 3000
  60
  61
  62 def _MkChild(node, name):
  63   child = node[NODE_CHILDREN_KEY].get(name)
  64   if child is None:
  65     child = {NODE_NAME_KEY: name,
  66              NODE_CHILDREN_KEY: {}}
  67     node[NODE_CHILDREN_KEY][name] = child
  68   return child
  69
  70
  71
  72 def SplitNoPathBucket(node):
  73   """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
  74   handle. Split it into sub-buckets in that case."""
  75   root_children = node[NODE_CHILDREN_KEY]
  76   if NAME_NO_PATH_BUCKET in root_children:
  77     no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
  78     old_children = no_path_bucket[NODE_CHILDREN_KEY]
  79     count = 0
  80     for symbol_type, symbol_bucket in old_children.iteritems():
  81       count += len(symbol_bucket[NODE_CHILDREN_KEY])
  82     if count > BIG_BUCKET_LIMIT:
  83       new_children = {}
  84       no_path_bucket[NODE_CHILDREN_KEY] = new_children
  85       current_bucket = None
  86       index = 0
  87       for symbol_type, symbol_bucket in old_children.iteritems():
  88         for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
  89           if index % BIG_BUCKET_LIMIT == 0:
  90             group_no = (index / BIG_BUCKET_LIMIT) + 1
  91             current_bucket = _MkChild(no_path_bucket,
  92                                       '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
  93                                                           group_no))
  94             assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
  95             node[NODE_TYPE_KEY] = 'p'  # p for path
  96           index += 1
  97           symbol_size = value[NODE_SYMBOL_SIZE_KEY]
  98           AddSymbolIntoFileNode(current_bucket, symbol_type,
  99                                 symbol_name, symbol_size)
 100
 101
 102 def MakeChildrenDictsIntoLists(node):
 103   largest_list_len = 0
 104   if NODE_CHILDREN_KEY in node:
 105     largest_list_len = len(node[NODE_CHILDREN_KEY])
 106     child_list = []
 107     for child in node[NODE_CHILDREN_KEY].itervalues():
 108       child_largest_list_len = MakeChildrenDictsIntoLists(child)
 109       if child_largest_list_len > largest_list_len:
 110         largest_list_len = child_largest_list_len
 111       child_list.append(child)
 112     node[NODE_CHILDREN_KEY] = child_list
 113
 114   return largest_list_len
 115
 116
 117 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
 118   """Puts symbol into the file path node |node|.
 119   Returns the number of added levels in tree. I.e. returns 2."""
 120
 121   # 'node' is the file node and first step is to find its symbol-type bucket.
 122   node[NODE_LAST_PATH_ELEMENT_KEY] = True
 123   node = _MkChild(node, symbol_type)
 124   assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
 125   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
 126   node[NODE_TYPE_KEY] = 'b'  # b for bucket
 127
 128   # 'node' is now the symbol-type bucket. Make the child entry.
 129   node = _MkChild(node, symbol_name)
 130   if NODE_CHILDREN_KEY in node:
 131     if node[NODE_CHILDREN_KEY]:
 132       logging.warning('A container node used as symbol for %s.' % symbol_name)
 133     # This is going to be used as a leaf so no use for child list.
 134     del node[NODE_CHILDREN_KEY]
 135   node[NODE_SYMBOL_SIZE_KEY] = symbol_size
 136   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
 137   node[NODE_TYPE_KEY] = 's'  # s for symbol
 138
 139   return 2  # Depth of the added subtree.
 140
 141
 142 def MakeCompactTree(symbols, symbol_path_origin_dir):
 143   result = {NODE_NAME_KEY: '/',
 144             NODE_CHILDREN_KEY: {},
 145             NODE_TYPE_KEY: 'p',
 146             NODE_MAX_DEPTH_KEY: 0}
 147   seen_symbol_with_path = False
 148   cwd = os.path.abspath(os.getcwd())
 149   for symbol_name, symbol_type, symbol_size, file_path, _address in symbols:
 150
 151     if 'vtable for ' in symbol_name:
 152       symbol_type = '@'  # hack to categorize these separately
 153     # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']
 154     if file_path and file_path != "??":
 155       file_path = os.path.abspath(os.path.join(symbol_path_origin_dir,
 156                                                file_path))
 157       # Let the output structure be relative to $CWD if inside $CWD,
 158       # otherwise relative to the disk root. This is to avoid
 159       # unnecessary click-through levels in the output.
 160       if file_path.startswith(cwd + os.sep):
 161         file_path = file_path[len(cwd):]
 162       if file_path.startswith('/'):
 163         file_path = file_path[1:]
 164       seen_symbol_with_path = True
 165     else:
 166       file_path = NAME_NO_PATH_BUCKET
 167
 168     path_parts = file_path.split('/')
 169
 170     # Find pre-existing node in tree, or update if it already exists
 171     node = result
 172     depth = 0
 173     while len(path_parts) > 0:
 174       path_part = path_parts.pop(0)
 175       if len(path_part) == 0:
 176         continue
 177       depth += 1
 178       node = _MkChild(node, path_part)
 179       assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
 180       node[NODE_TYPE_KEY] = 'p'  # p for path
 181
 182     depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
 183     result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
 184
 185   if not seen_symbol_with_path:
 186     logging.warning('Symbols lack paths. Data will not be structured.')
 187
 188   # The (no path) bucket can be extremely large if we failed to get
 189   # path information. Split it into subgroups if needed.
 190   SplitNoPathBucket(result)
 191
 192   largest_list_len = MakeChildrenDictsIntoLists(result)
 193
 194   if largest_list_len > BIG_BUCKET_LIMIT:
 195     logging.warning('There are sections with %d nodes. '
 196                     'Results might be unusable.' % largest_list_len)
 197   return result
 198
 199
 200 def DumpCompactTree(symbols, symbol_path_origin_dir, outfile):
 201   tree_root = MakeCompactTree(symbols, symbol_path_origin_dir)
 202   with open(outfile, 'w') as out:
 203     out.write('var tree_data=')
 204     # Use separators without whitespace to get a smaller file.
 205     json.dump(tree_root, out, separators=(',', ':'))
 206   print('Writing %d bytes json' % os.path.getsize(outfile))
 207
 208
 209 def MakeSourceMap(symbols):
 210   sources = {}
 211   for _sym, _symbol_type, size, path, _address in symbols:
 212     key = None
 213     if path:
 214       key = os.path.normpath(path)
 215     else:
 216       key = '[no path]'
 217     if key not in sources:
 218       sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
 219     record = sources[key]
 220     record['size'] += size
 221     record['symbol_count'] += 1
 222   return sources
 223
 224
 225 # Regex for parsing "nm" output. A sample line looks like this:
 226 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
 227 #
 228 # The fields are: address, size, type, name, source location
 229 # Regular expression explained ( see also: https://xkcd.com/208 ):
 230 # ([0-9a-f]{8,}+)   The address
 231 # [\s]+             Whitespace separator
 232 # ([0-9a-f]{8,}+)   The size. From here on out it's all optional.
 233 # [\s]+             Whitespace separator
 234 # (\S?)             The symbol type, which is any non-whitespace char
 235 # [\s*]             Whitespace separator
 236 # ([^\t]*)          Symbol name, any non-tab character (spaces ok!)
 237 # [\t]?             Tab separator
 238 # (.*)              The location (filename[:linennum|?][ (discriminator n)]
 239 sNmPattern = re.compile(
 240   r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
 241
 242 class Progress():
 243   def __init__(self):
 244     self.count = 0
 245     self.skip_count = 0
 246     self.collisions = 0
 247     self.time_last_output = time.time()
 248     self.count_last_output = 0
 249     self.disambiguations = 0
 250     self.was_ambiguous = 0
 251
 252
 253 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
 254                      disambiguate, src_path):
 255   nm_output = RunNm(library, nm_binary)
 256   nm_output_lines = nm_output.splitlines()
 257   nm_output_lines_len = len(nm_output_lines)
 258   address_symbol = {}
 259   progress = Progress()
 260   def map_address_symbol(symbol, addr):
 261     progress.count += 1
 262     if addr in address_symbol:
 263       # 'Collision between %s and %s.' % (str(symbol.name),
 264       #                                   str(address_symbol[addr].name))
 265       progress.collisions += 1
 266     else:
 267       if symbol.disambiguated:
 268         progress.disambiguations += 1
 269       if symbol.was_ambiguous:
 270         progress.was_ambiguous += 1
 271
 272       address_symbol[addr] = symbol
 273
 274     progress_output()
 275
 276   def progress_output():
 277     progress_chunk = 100
 278     if progress.count % progress_chunk == 0:
 279       time_now = time.time()
 280       time_spent = time_now - progress.time_last_output
 281       if time_spent > 1.0:
 282         # Only output at most once per second.
 283         progress.time_last_output = time_now
 284         chunk_size = progress.count - progress.count_last_output
 285         progress.count_last_output = progress.count
 286         if time_spent > 0:
 287           speed = chunk_size / time_spent
 288         else:
 289           speed = 0
 290         progress_percent = (100.0 * (progress.count + progress.skip_count) /
 291                             nm_output_lines_len)
 292         disambiguation_percent = 0
 293         if progress.disambiguations != 0:
 294           disambiguation_percent = (100.0 * progress.disambiguations /
 295                                     progress.was_ambiguous)
 296
 297         sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, '
 298               '%d disambiguations where %.1f%% succeeded)'
 299               ' - %.1f lookups/s.' %
 300               (progress_percent, progress.count, progress.collisions,
 301                progress.disambiguations, disambiguation_percent, speed))
 302
 303   # In case disambiguation was disabled, we remove the source path (which upon
 304   # being set signals the symbolizer to enable disambiguation)
 305   if not disambiguate:
 306     src_path = None
 307   symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
 308                                             map_address_symbol,
 309                                             max_concurrent_jobs=jobs,
 310                                             source_root_path=src_path)
 311   user_interrupted = False
 312   try:
 313     for line in nm_output_lines:
 314       match = sNmPattern.match(line)
 315       if match:
 316         location = match.group(5)
 317         if not location:
 318           addr = int(match.group(1), 16)
 319           size = int(match.group(2), 16)
 320           if addr in address_symbol:  # Already looked up, shortcut
 321                                       # ELFSymbolizer.
 322             map_address_symbol(address_symbol[addr], addr)
 323             continue
 324           elif size == 0:
 325             # Save time by not looking up empty symbols (do they even exist?)
 326             print('Empty symbol: ' + line)
 327           else:
 328             symbolizer.SymbolizeAsync(addr, addr)
 329             continue
 330
 331       progress.skip_count += 1
 332   except KeyboardInterrupt:
 333     user_interrupted = True
 334     print('Interrupting - killing subprocesses. Please wait.')
 335
 336   try:
 337     symbolizer.Join()
 338   except KeyboardInterrupt:
 339     # Don't want to abort here since we will be finished in a few seconds.
 340     user_interrupted = True
 341     print('Patience you must have my young padawan.')
 342
 343   print ''
 344
 345   if user_interrupted:
 346     print('Skipping the rest of the file mapping. '
 347           'Output will not be fully classified.')
 348
 349   symbol_path_origin_dir = os.path.dirname(os.path.abspath(library))
 350
 351   with open(outfile, 'w') as out:
 352     for line in nm_output_lines:
 353       match = sNmPattern.match(line)
 354       if match:
 355         location = match.group(5)
 356         if not location:
 357           addr = int(match.group(1), 16)
 358           symbol = address_symbol.get(addr)
 359           if symbol is not None:
 360             path = '??'
 361             if symbol.source_path is not None:
 362               path = os.path.abspath(os.path.join(symbol_path_origin_dir,
 363                                                   symbol.source_path))
 364             line_number = 0
 365             if symbol.source_line is not None:
 366               line_number = symbol.source_line
 367             out.write('%s\t%s:%d\n' % (line, path, line_number))
 368             continue
 369
 370       out.write('%s\n' % line)
 371
 372   print('%d symbols in the results.' % len(address_symbol))
 373
 374
 375 def RunNm(binary, nm_binary):
 376   cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
 377          binary]
 378   nm_process = subprocess.Popen(cmd,
 379                                 stdout=subprocess.PIPE,
 380                                 stderr=subprocess.PIPE)
 381   (process_output, err_output) = nm_process.communicate()
 382
 383   if nm_process.returncode != 0:
 384     if err_output:
 385       raise Exception, err_output
 386     else:
 387       raise Exception, process_output
 388
 389   return process_output
 390
 391
 392 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
 393                  addr2line_binary, nm_binary, disambiguate, src_path):
 394   if nm_infile is None:
 395     if outfile is None:
 396       outfile = tempfile.NamedTemporaryFile(delete=False).name
 397
 398     if verbose:
 399       print 'Running parallel addr2line, dumping symbols to ' + outfile
 400     RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
 401                      disambiguate, src_path)
 402
 403     nm_infile = outfile
 404
 405   elif verbose:
 406     print 'Using nm input from ' + nm_infile
 407   with file(nm_infile, 'r') as infile:
 408     return list(binary_size_utils.ParseNm(infile))
 409
 410
 411 PAK_RESOURCE_ID_TO_STRING = { "inited": False }
 412
 413 def LoadPakIdsFromResourceFile(filename):
 414   """Given a file name, it loads everything that looks like a resource id
 415   into PAK_RESOURCE_ID_TO_STRING."""
 416   with open(filename) as resource_header:
 417     for line in resource_header:
 418       if line.startswith("#define "):
 419         line_data = line.split()
 420         if len(line_data) == 3:
 421           try:
 422             resource_number = int(line_data[2])
 423             resource_name = line_data[1]
 424             PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name
 425           except ValueError:
 426             pass
 427
 428 def GetReadablePakResourceName(pak_file, resource_id):
 429   """Pak resources have a numeric identifier. It is not helpful when
 430   trying to locate where footprint is generated. This does its best to
 431   map the number to a usable string."""
 432   if not PAK_RESOURCE_ID_TO_STRING['inited']:
 433     # Try to find resource header files generated by grit when
 434     # building the pak file. We'll look for files named *resources.h"
 435     # and lines of the type:
 436     #    #define MY_RESOURCE_JS 1234
 437     PAK_RESOURCE_ID_TO_STRING['inited'] = True
 438     gen_dir = os.path.join(os.path.dirname(pak_file), 'gen')
 439     if os.path.isdir(gen_dir):
 440       for dirname, _dirs, files in os.walk(gen_dir):
 441         for filename in files:
 442           if filename.endswith('resources.h'):
 443             LoadPakIdsFromResourceFile(os.path.join(dirname, filename))
 444   return PAK_RESOURCE_ID_TO_STRING.get(resource_id,
 445                                        'Pak Resource %d' % resource_id)
 446
 447 def AddPakData(symbols, pak_file):
 448   """Adds pseudo-symbols from a pak file."""
 449   pak_file = os.path.abspath(pak_file)
 450   with open(pak_file, 'rb') as pak:
 451     data = pak.read()
 452
 453   PAK_FILE_VERSION = 4
 454   HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries)
 455                              # and one uint8 (encoding of text resources)
 456   INDEX_ENTRY_SIZE = 2 + 4  # Each entry is a uint16 and a uint32.
 457   version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
 458   assert version == PAK_FILE_VERSION, ('Unsupported pak file '
 459                                        'version (%d) in %s. Only '
 460                                        'support version %d' %
 461                                        (version, pak_file, PAK_FILE_VERSION))
 462   if num_entries > 0:
 463     # Read the index and data.
 464     data = data[HEADER_LENGTH:]
 465     for _ in range(num_entries):
 466       resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
 467       data = data[INDEX_ENTRY_SIZE:]
 468       _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
 469       resource_size = next_offset - offset
 470
 471       symbol_name = GetReadablePakResourceName(pak_file, resource_id)
 472       symbol_path = pak_file
 473       symbol_type = 'd' # Data. Approximation.
 474       symbol_size = resource_size
 475       symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))
 476
 477 def _find_in_system_path(binary):
 478   """Locate the full path to binary in the system path or return None
 479   if not found."""
 480   system_path = os.environ["PATH"].split(os.pathsep)
 481   for path in system_path:
 482     binary_path = os.path.join(path, binary)
 483     if os.path.isfile(binary_path):
 484       return binary_path
 485   return None
 486
 487 def CheckDebugFormatSupport(library, addr2line_binary):
 488   """Kills the program if debug data is in an unsupported format.
 489
 490   There are two common versions of the DWARF debug formats and
 491   since we are right now transitioning from DWARF2 to newer formats,
 492   it's possible to have a mix of tools that are not compatible. Detect
 493   that and abort rather than produce meaningless output."""
 494   tool_output = subprocess.check_output([addr2line_binary, '--version'])
 495   version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
 496   parsed_output = version_re.match(tool_output)
 497   major = int(parsed_output.group(1))
 498   minor = int(parsed_output.group(2))
 499   supports_dwarf4 = major > 2 or major == 2 and minor > 22
 500
 501   if supports_dwarf4:
 502     return
 503
 504   print('Checking version of debug information in %s.' % library)
 505   debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
 506                                        '--dwarf-depth=1', library])
 507   dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
 508   parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
 509   version = int(parsed_dwarf_format_output.group(1))
 510   if version > 2:
 511     print('The supplied tools only support DWARF2 debug data but the binary\n' +
 512           'uses DWARF%d. Update the tools or compile the binary\n' % version +
 513           'with -gdwarf-2.')
 514     sys.exit(1)
 515
 516
 517 def main():
 518   usage = """%prog [options]
 519
 520   Runs a spatial analysis on a given library, looking up the source locations
 521   of its symbols and calculating how much space each directory, source file,
 522   and so on is taking. The result is a report that can be used to pinpoint
 523   sources of large portions of the binary, etceteras.
 524
 525   Under normal circumstances, you only need to pass two arguments, thusly:
 526
 527       %prog --library /path/to/library --destdir /path/to/output
 528
 529   In this mode, the program will dump the symbols from the specified library
 530   and map those symbols back to source locations, producing a web-based
 531   report in the specified output directory.
 532
 533   Other options are available via '--help'.
 534   """
 535   parser = optparse.OptionParser(usage=usage)
 536   parser.add_option('--nm-in', metavar='PATH',
 537                     help='if specified, use nm input from <path> instead of '
 538                     'generating it. Note that source locations should be '
 539                     'present in the file; i.e., no addr2line symbol lookups '
 540                     'will be performed when this option is specified. '
 541                     'Mutually exclusive with --library.')
 542   parser.add_option('--destdir', metavar='PATH',
 543                     help='write output to the specified directory. An HTML '
 544                     'report is generated here along with supporting files; '
 545                     'any existing report will be overwritten.')
 546   parser.add_option('--library', metavar='PATH',
 547                     help='if specified, process symbols in the library at '
 548                     'the specified path. Mutually exclusive with --nm-in.')
 549   parser.add_option('--pak', metavar='PATH',
 550                     help='if specified, includes the contents of the '
 551                     'specified *.pak file in the output.')
 552   parser.add_option('--nm-binary',
 553                     help='use the specified nm binary to analyze library. '
 554                     'This is to be used when the nm in the path is not for '
 555                     'the right architecture or of the right version.')
 556   parser.add_option('--addr2line-binary',
 557                     help='use the specified addr2line binary to analyze '
 558                     'library. This is to be used when the addr2line in '
 559                     'the path is not for the right architecture or '
 560                     'of the right version.')
 561   parser.add_option('--jobs', type='int',
 562                     help='number of jobs to use for the parallel '
 563                     'addr2line processing pool; defaults to 1. More '
 564                     'jobs greatly improve throughput but eat RAM like '
 565                     'popcorn, and take several gigabytes each. Start low '
 566                     'and ramp this number up until your machine begins to '
 567                     'struggle with RAM. '
 568                     'This argument is only valid when using --library.')
 569   parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
 570                     help='be verbose, printing lots of status information.')
 571   parser.add_option('--nm-out', metavar='PATH',
 572                     help='(deprecated) No-op. nm.out is stored in --destdir.')
 573   parser.add_option('--no-nm-out', action='store_true',
 574                     help='do not keep the nm output file. This file is useful '
 575                     'if you want to see the fully processed nm output after '
 576                     'the symbols have been mapped to source locations, or if '
 577                     'you plan to run explain_binary_size_delta.py. By default '
 578                     'the file \'nm.out\' is placed alongside the generated '
 579                     'report. The nm.out file is only created when using '
 580                     '--library.')
 581   parser.add_option('--disable-disambiguation', action='store_true',
 582                     help='disables the disambiguation process altogether,'
 583                     ' NOTE: this may, depending on your toolchain, produce'
 584                     ' output with some symbols at the top layer if addr2line'
 585                     ' could not get the entire source path.')
 586   parser.add_option('--source-path', default='./',
 587                     help='the path to the source code of the output binary, '
 588                     'default set to current directory. Used in the'
 589                     ' disambiguation process.')
 590   opts, _args = parser.parse_args()
 591
 592   if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
 593     parser.error('exactly one of --library or --nm-in is required')
 594   if opts.nm_out:
 595     print >> sys.stderr, ('WARNING: --nm-out is deprecated and has no effect.')
 596   if (opts.nm_in):
 597     if opts.jobs:
 598       print >> sys.stderr, ('WARNING: --jobs has no effect '
 599                             'when used with --nm-in')
 600   if not opts.destdir:
 601     parser.error('--destdir is a required argument')
 602   if not opts.jobs:
 603     # Use the number of processors but cap between 2 and 4 since raw
 604     # CPU power isn't the limiting factor. It's I/O limited, memory
 605     # bus limited and available-memory-limited. Too many processes and
 606     # the computer will run out of memory and it will be slow.
 607     opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
 608
 609   if opts.addr2line_binary:
 610     assert os.path.isfile(opts.addr2line_binary)
 611     addr2line_binary = opts.addr2line_binary
 612   else:
 613     addr2line_binary = _find_in_system_path('addr2line')
 614     assert addr2line_binary, 'Unable to find addr2line in the path. '\
 615         'Use --addr2line-binary to specify location.'
 616
 617   if opts.nm_binary:
 618     assert os.path.isfile(opts.nm_binary)
 619     nm_binary = opts.nm_binary
 620   else:
 621     nm_binary = _find_in_system_path('nm')
 622     assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
 623         'to specify location.'
 624
 625   if opts.pak:
 626     assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak
 627
 628   print('addr2line: %s' % addr2line_binary)
 629   print('nm: %s' % nm_binary)
 630
 631   if opts.library:
 632     CheckDebugFormatSupport(opts.library, addr2line_binary)
 633
 634   # Prepare output directory and report guts
 635   if not os.path.exists(opts.destdir):
 636     os.makedirs(opts.destdir, 0755)
 637   nm_out = os.path.join(opts.destdir, 'nm.out')
 638   if opts.no_nm_out:
 639     nm_out = None
 640
 641   # Copy report boilerplate into output directory. This also proves that the
 642   # output directory is safe for writing, so there should be no problems writing
 643   # the nm.out file later.
 644   data_js_file_name = os.path.join(opts.destdir, 'data.js')
 645   d3_out = os.path.join(opts.destdir, 'd3')
 646   if not os.path.exists(d3_out):
 647     os.makedirs(d3_out, 0755)
 648   d3_src = os.path.join(os.path.dirname(__file__),
 649                         '..',
 650                         '..',
 651                         'third_party', 'd3', 'src')
 652   template_src = os.path.join(os.path.dirname(__file__),
 653                               'template')
 654   shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)
 655   shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)
 656   shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)
 657   shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)
 658
 659   # Run nm and/or addr2line to gather the data
 660   symbols = GetNmSymbols(opts.nm_in, nm_out, opts.library,
 661                          opts.jobs, opts.verbose is True,
 662                          addr2line_binary, nm_binary,
 663                          opts.disable_disambiguation is None,
 664                          opts.source_path)
 665
 666   # Post-processing
 667   if opts.pak:
 668     AddPakData(symbols, opts.pak)
 669   if opts.library:
 670     symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))
 671   else:
 672     # Just a guess. Hopefully all paths in the input file are absolute.
 673     symbol_path_origin_dir = os.path.abspath(os.getcwd())
 674   # Dump JSON for the HTML report.
 675   DumpCompactTree(symbols, symbol_path_origin_dir, data_js_file_name)
 676   print 'Report saved to ' + opts.destdir + '/index.html'
 677
 678 if __name__ == '__main__':
 679   sys.exit(main())