2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Describe the size difference of two binaries.
8 Generates a description of the size difference of two binaries based
9 on the difference of the size of various symbols.
11 This tool needs "nm" dumps of each binary with full symbol
12 information. You can obtain the necessary dumps by running the
13 run_binary_size_analysis.py script upon each binary, with the
14 "--nm-out" parameter set to the location in which you want to save the
17 # obtain symbol data from first binary in /tmp/nm1.dump
19 ninja -C out/Release binary_size_tool
20 tools/binary_size/run_binary_size_analysis \
21 --library <path_to_library>
22 --destdir /tmp/throwaway
23 --nm-out /tmp/nm1.dump
25 # obtain symbol data from second binary in /tmp/nm2.dump
27 ninja -C out/Release binary_size_tool
28 tools/binary_size/run_binary_size_analysis \
29 --library <path_to_library>
30 --destdir /tmp/throwaway
31 --nm-out /tmp/nm2.dump
33 # cleanup useless files
37 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
41 from collections
import Counter
48 import binary_size_utils
51 def CalculateSharedAddresses(symbols
):
52 """Checks how many symbols share the same memory space. This returns a
53 Counter result where result[address] will tell you how many times address was
56 for _
, _
, _
, _
, address
in symbols
:
62 def CalculateEffectiveSize(share_count
, address
, symbol_size
):
63 """Given a raw symbol_size and an address, this method returns the
64 size we should blame on this symbol considering it might share the
65 machine code/data with other symbols. Using the raw symbol_size for
66 each symbol would in those cases over estimate the true cost of that
70 shared_count
= share_count
[address
]
74 assert shared_count
> 1
75 return int(ceil(symbol_size
/ float(shared_count
)))
77 class SymbolDelta(object):
78 """Stores old size, new size and some metadata."""
79 def __init__(self
, shared
):
82 self
.shares_space_with_other_symbols
= shared
84 def __eq__(self
, other
):
85 return (self
.old_size
== other
.old_size
and
86 self
.new_size
== other
.new_size
and
87 self
.shares_space_with_other_symbols
==
88 other
.shares_space_with_other_symbols
)
90 def __ne__(self
, other
):
91 return not self
.__eq
__(other
)
93 def copy_symbol_delta(self
):
94 symbol_delta
= SymbolDelta(self
.shares_space_with_other_symbols
)
95 symbol_delta
.old_size
= self
.old_size
96 symbol_delta
.new_size
= self
.new_size
99 class DeltaInfo(SymbolDelta
):
100 """Summary of a the change for one symbol between two instances."""
101 def __init__(self
, file_path
, symbol_type
, symbol_name
, shared
):
102 SymbolDelta
.__init
__(self
, shared
)
103 self
.file_path
= file_path
104 self
.symbol_type
= symbol_type
105 self
.symbol_name
= symbol_name
107 def __eq__(self
, other
):
108 return (self
.file_path
== other
.file_path
and
109 self
.symbol_type
== other
.symbol_type
and
110 self
.symbol_name
== other
.symbol_name
and
111 SymbolDelta
.__eq
__(self
, other
))
113 def __ne__(self
, other
):
114 return not self
.__eq
__(other
)
116 def ExtractSymbolDelta(self
):
117 """Returns a copy of the SymbolDelta for this DeltaInfo."""
118 return SymbolDelta
.copy_symbol_delta(self
)
120 def Compare(symbols1
, symbols2
):
121 """Executes a comparison of the symbols in symbols1 and symbols2.
124 tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
125 where each list contains DeltaInfo objects.
128 removed
= [] # tuples
129 changed
= [] # tuples
130 unchanged
= [] # tuples
134 # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size)
135 share_count1
= CalculateSharedAddresses(symbols1
)
136 share_count2
= CalculateSharedAddresses(symbols2
)
137 for cache
, symbols
, share_count
in ((cache1
, symbols1
, share_count1
),
138 (cache2
, symbols2
, share_count2
)):
139 for symbol_name
, symbol_type
, symbol_size
, file_path
, address
in symbols
:
140 if 'vtable for ' in symbol_name
:
141 symbol_type
= '@' # hack to categorize these separately
143 file_path
= os
.path
.normpath(file_path
)
144 if sys
.platform
.startswith('win'):
145 file_path
= file_path
.replace('\\', '/')
147 file_path
= '(No Path)'
148 # Take into consideration that multiple symbols might share the same
150 effective_symbol_size
= CalculateEffectiveSize(share_count
, address
,
152 key
= (file_path
, symbol_type
)
153 bucket
= cache
.setdefault(key
, {})
154 size_list
= bucket
.setdefault(symbol_name
, [])
155 size_list
.append((effective_symbol_size
,
156 effective_symbol_size
!= symbol_size
))
158 # Now diff them. We iterate over the elements in cache1. For each symbol
159 # that we find in cache2, we record whether it was deleted, changed, or
160 # unchanged. We then remove it from cache2; all the symbols that remain
161 # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
162 for key
, bucket1
in cache1
.items():
163 bucket2
= cache2
.get(key
)
164 file_path
, symbol_type
= key
;
166 # A file was removed. Everything in bucket1 is dead.
167 for symbol_name
, symbol_size_list
in bucket1
.items():
168 for (symbol_size
, shared
) in symbol_size_list
:
169 delta_info
= DeltaInfo(file_path
, symbol_type
, symbol_name
, shared
)
170 delta_info
.old_size
= symbol_size
171 removed
.append(delta_info
)
173 # File still exists, look for changes within.
174 for symbol_name
, symbol_size_list
in bucket1
.items():
175 size_list2
= bucket2
.get(symbol_name
)
176 if size_list2
is None:
177 # Symbol no longer exists in bucket2.
178 for (symbol_size
, shared
) in symbol_size_list
:
179 delta_info
= DeltaInfo(file_path
, symbol_type
, symbol_name
, shared
)
180 delta_info
.old_size
= symbol_size
181 removed
.append(delta_info
)
183 del bucket2
[symbol_name
] # Symbol is not new, delete from cache2.
184 if len(symbol_size_list
) == 1 and len(size_list2
) == 1:
185 symbol_size
, shared1
= symbol_size_list
[0]
186 size2
, shared2
= size_list2
[0]
187 delta_info
= DeltaInfo(file_path
, symbol_type
, symbol_name
,
189 delta_info
.old_size
= symbol_size
190 delta_info
.new_size
= size2
191 if symbol_size
!= size2
:
192 # Symbol has change size in bucket.
193 changed
.append(delta_info
)
195 # Symbol is unchanged.
196 unchanged
.append(delta_info
)
198 # Complex comparison for when a symbol exists multiple times
199 # in the same file (where file can be "unknown file").
200 symbol_size_counter
= collections
.Counter(symbol_size_list
)
201 delta_counter
= collections
.Counter(symbol_size_list
)
202 delta_counter
.subtract(size_list2
)
203 for delta_counter_key
in sorted(delta_counter
.keys()):
204 delta
= delta_counter
[delta_counter_key
]
205 unchanged_count
= symbol_size_counter
[delta_counter_key
]
206 (symbol_size
, shared
) = delta_counter_key
208 unchanged_count
-= delta
209 for _
in range(unchanged_count
):
210 delta_info
= DeltaInfo(file_path
, symbol_type
,
212 delta_info
.old_size
= symbol_size
213 delta_info
.new_size
= symbol_size
214 unchanged
.append(delta_info
)
215 if delta
> 0: # Used to be more of these than there is now.
216 for _
in range(delta
):
217 delta_info
= DeltaInfo(file_path
, symbol_type
,
219 delta_info
.old_size
= symbol_size
220 removed
.append(delta_info
)
221 elif delta
< 0: # More of this (symbol,size) now.
222 for _
in range(-delta
):
223 delta_info
= DeltaInfo(file_path
, symbol_type
,
225 delta_info
.new_size
= symbol_size
226 added
.append(delta_info
)
228 if len(bucket2
) == 0:
229 del cache1
[key
] # Entire bucket is empty, delete from cache2
231 # We have now analyzed all symbols that are in cache1 and removed all of
232 # the encountered symbols from cache2. What's left in cache2 is the new
234 for key
, bucket2
in cache2
.iteritems():
235 file_path
, symbol_type
= key
;
236 for symbol_name
, symbol_size_list
in bucket2
.items():
237 for (symbol_size
, shared
) in symbol_size_list
:
238 delta_info
= DeltaInfo(file_path
, symbol_type
, symbol_name
, shared
)
239 delta_info
.new_size
= symbol_size
240 added
.append(delta_info
)
241 return (added
, removed
, changed
, unchanged
)
244 def DeltaStr(number
):
245 """Returns the number as a string with a '+' prefix if it's > 0 and
246 a '-' prefix if it's < 0."""
249 result
= '+' + result
253 def SharedInfoStr(symbol_info
):
254 """Returns a string (prefixed by space) explaining that numbers are
255 adjusted because of shared space between symbols, or an empty string
256 if space had not been shared."""
258 if symbol_info
.shares_space_with_other_symbols
:
259 return " (adjusted sizes because of memory sharing)"
263 class CrunchStatsData(object):
264 """Stores a summary of data of a certain kind."""
265 def __init__(self
, symbols
):
266 self
.symbols
= symbols
270 self
.symbols_by_path
= {}
273 def CrunchStats(added
, removed
, changed
, unchanged
, showsources
, showsymbols
):
274 """Outputs to stdout a summary of changes based on the symbol lists."""
275 # Split changed into grown and shrunk because that is easier to
280 if item
.old_size
< item
.new_size
:
285 new_symbols
= CrunchStatsData(added
)
286 removed_symbols
= CrunchStatsData(removed
)
287 grown_symbols
= CrunchStatsData(grown
)
288 shrunk_symbols
= CrunchStatsData(shrunk
)
289 sections
= [new_symbols
, removed_symbols
, grown_symbols
, shrunk_symbols
]
290 for section
in sections
:
291 for item
in section
.symbols
:
292 section
.sources
.add(item
.file_path
)
293 if item
.old_size
is not None:
294 section
.before_size
+= item
.old_size
295 if item
.new_size
is not None:
296 section
.after_size
+= item
.new_size
297 bucket
= section
.symbols_by_path
.setdefault(item
.file_path
, [])
298 bucket
.append((item
.symbol_name
, item
.symbol_type
,
299 item
.ExtractSymbolDelta()))
301 total_change
= sum(s
.after_size
- s
.before_size
for s
in sections
)
302 summary
= 'Total change: %s bytes' % DeltaStr(total_change
)
304 print('=' * len(summary
))
305 for section
in sections
:
306 if not section
.symbols
:
308 if section
.before_size
== 0:
309 description
= ('added, totalling %s bytes' % DeltaStr(section
.after_size
))
310 elif section
.after_size
== 0:
311 description
= ('removed, totalling %s bytes' %
312 DeltaStr(-section
.before_size
))
314 if section
.after_size
> section
.before_size
:
318 description
= ('%s, for a net change of %s bytes '
319 '(%d bytes before, %d bytes after)' %
320 (type_str
, DeltaStr(section
.after_size
- section
.before_size
),
321 section
.before_size
, section
.after_size
))
322 print(' %d %s across %d sources' %
323 (len(section
.symbols
), description
, len(section
.sources
)))
325 maybe_unchanged_sources
= set()
326 unchanged_symbols_size
= 0
327 for item
in unchanged
:
328 maybe_unchanged_sources
.add(item
.file_path
)
329 unchanged_symbols_size
+= item
.old_size
# == item.new_size
330 print(' %d unchanged, totalling %d bytes' %
331 (len(unchanged
), unchanged_symbols_size
))
333 # High level analysis, always output.
334 unchanged_sources
= maybe_unchanged_sources
335 for section
in sections
:
336 unchanged_sources
= unchanged_sources
- section
.sources
337 new_sources
= (new_symbols
.sources
-
338 maybe_unchanged_sources
-
339 removed_symbols
.sources
)
340 removed_sources
= (removed_symbols
.sources
-
341 maybe_unchanged_sources
-
343 partially_changed_sources
= (grown_symbols
.sources |
344 shrunk_symbols
.sources | new_symbols
.sources |
345 removed_symbols
.sources
) - removed_sources
- new_sources
347 for section
in sections
:
348 allFiles
= allFiles | section
.sources
349 allFiles
= allFiles | maybe_unchanged_sources
350 print 'Source stats:'
351 print(' %d sources encountered.' % len(allFiles
))
352 print(' %d completely new.' % len(new_sources
))
353 print(' %d removed completely.' % len(removed_sources
))
354 print(' %d partially changed.' % len(partially_changed_sources
))
355 print(' %d completely unchanged.' % len(unchanged_sources
))
356 remainder
= (allFiles
- new_sources
- removed_sources
-
357 partially_changed_sources
- unchanged_sources
)
358 assert len(remainder
) == 0
361 return # Per-source analysis, only if requested
362 print 'Per-source Analysis:'
364 for section
in sections
:
365 for path
in section
.symbols_by_path
:
366 entry
= delta_by_path
.get(path
)
368 entry
= {'plus': 0, 'minus': 0}
369 delta_by_path
[path
] = entry
370 for symbol_name
, symbol_type
, symbol_delta
in \
371 section
.symbols_by_path
[path
]:
372 if symbol_delta
.old_size
is None:
373 delta
= symbol_delta
.new_size
374 elif symbol_delta
.new_size
is None:
375 delta
= -symbol_delta
.old_size
377 delta
= symbol_delta
.new_size
- symbol_delta
.old_size
380 entry
['plus'] += delta
382 entry
['minus'] += (-1 * delta
)
384 def delta_sort_key(item
):
385 _path
, size_data
= item
386 growth
= size_data
['plus'] - size_data
['minus']
389 for path
, size_data
in sorted(delta_by_path
.iteritems(), key
=delta_sort_key
,
391 gain
= size_data
['plus']
392 loss
= size_data
['minus']
393 delta
= size_data
['plus'] - size_data
['minus']
394 header
= ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta
),
396 divider
= '-' * len(header
)
402 def ExtractNewSize(tup
):
403 symbol_delta
= tup
[2]
404 return symbol_delta
.new_size
405 def ExtractOldSize(tup
):
406 symbol_delta
= tup
[2]
407 return symbol_delta
.old_size
408 if path
in new_symbols
.symbols_by_path
:
409 print ' New symbols:'
410 for symbol_name
, symbol_type
, symbol_delta
in \
411 sorted(new_symbols
.symbols_by_path
[path
],
414 print (' %8s: %s type=%s, size=%d bytes%s' %
415 (DeltaStr(symbol_delta
.new_size
), symbol_name
, symbol_type
,
416 symbol_delta
.new_size
, SharedInfoStr(symbol_delta
)))
417 if path
in removed_symbols
.symbols_by_path
:
418 print ' Removed symbols:'
419 for symbol_name
, symbol_type
, symbol_delta
in \
420 sorted(removed_symbols
.symbols_by_path
[path
],
422 print (' %8s: %s type=%s, size=%d bytes%s' %
423 (DeltaStr(-symbol_delta
.old_size
), symbol_name
, symbol_type
,
424 symbol_delta
.old_size
,
425 SharedInfoStr(symbol_delta
)))
426 for (changed_symbols_by_path
, type_str
) in [
427 (grown_symbols
.symbols_by_path
, "Grown"),
428 (shrunk_symbols
.symbols_by_path
, "Shrunk")]:
429 if path
in changed_symbols_by_path
:
430 print ' %s symbols:' % type_str
431 def changed_symbol_sortkey(item
):
432 symbol_name
, _symbol_type
, symbol_delta
= item
433 return (symbol_delta
.old_size
- symbol_delta
.new_size
, symbol_name
)
434 for symbol_name
, symbol_type
, symbol_delta
in \
435 sorted(changed_symbols_by_path
[path
], key
=changed_symbol_sortkey
):
436 print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s'
437 % (DeltaStr(symbol_delta
.new_size
- symbol_delta
.old_size
),
438 symbol_name
, symbol_type
,
439 symbol_delta
.old_size
, symbol_delta
.new_size
,
440 SharedInfoStr(symbol_delta
)))
444 usage
= """%prog [options]
446 Analyzes the symbolic differences between two binary files
447 (typically, not necessarily, two different builds of the same
448 library) and produces a detailed description of symbols that have
449 been added, removed, or whose size has changed.
452 explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
454 Options are available via '--help'.
456 parser
= optparse
.OptionParser(usage
=usage
)
457 parser
.add_option('--nm1', metavar
='PATH',
458 help='the nm dump of the first library')
459 parser
.add_option('--nm2', metavar
='PATH',
460 help='the nm dump of the second library')
461 parser
.add_option('--showsources', action
='store_true', default
=False,
462 help='show per-source statistics')
463 parser
.add_option('--showsymbols', action
='store_true', default
=False,
464 help='show all symbol information; implies --showfiles')
465 parser
.add_option('--verbose', action
='store_true', default
=False,
466 help='output internal debugging stuff')
467 opts
, _args
= parser
.parse_args()
470 parser
.error('--nm1 is required')
472 parser
.error('--nm2 is required')
474 for path
in [opts
.nm1
, opts
.nm2
]:
475 with
file(path
, 'r') as nm_input
:
477 print 'parsing ' + path
+ '...'
478 symbols
.append(list(binary_size_utils
.ParseNm(nm_input
)))
479 (added
, removed
, changed
, unchanged
) = Compare(symbols
[0], symbols
[1])
480 CrunchStats(added
, removed
, changed
, unchanged
,
481 opts
.showsources | opts
.showsymbols
, opts
.showsymbols
)
483 if __name__
== '__main__':