2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolize log file produced by cypgofile instrumentation.
8 Given a log file and the binary being profiled (e.g. executable, shared
9 library), the script can produce three different outputs: 1) symbols for the
10 addresses, 2) function and line numbers for the addresses, or 3) an order file.
20 def ParseLogLines(log_file_lines
):
21 """Parse a log file produced by the profiled run of clank.
24 log_file_lines: array of lines in log file produced by profiled run
25 lib_name: library or executable containing symbols
27 Below is an example of a small log file:
28 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
29 secs usecs pid:threadid func
31 1314897086 795828 3587:1074648168 0x509e105c
32 1314897086 795874 3587:1074648168 0x509e0eb4
33 1314897086 796326 3587:1074648168 0x509e0e3c
34 1314897086 796552 3587:1074648168 0x509e07bc
38 call_info list with list of tuples of the format (sec, usec, call id,
39 function address called)
43 line
= log_file_lines
[0]
44 assert("r-xp" in line
)
45 end_index
= line
.find('-')
46 vm_start
= int(line
[:end_index
], 16)
47 for line
in log_file_lines
[2:]:
51 call_lines
.append(fields
)
53 # Convert strings to int in fields.
55 for call_line
in call_lines
:
56 (sec_timestamp
, usec_timestamp
) = map(int, call_line
[0:2])
57 callee_id
= call_line
[2]
58 addr
= int(call_line
[3], 16)
61 call_info
.append((sec_timestamp
, usec_timestamp
, callee_id
, addr
))
65 def GetStdOutputLines(cmd
):
66 p
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
67 output
= p
.communicate()[0]
68 return output
.split('\n')
70 def ParseLibSymbols(lib_file
):
71 """Get output from running nm and greping for text symbols.
74 lib_file: the library or executable that contains the profiled code
77 list of sorted unique addresses and corresponding size of function symbols
78 in lib_file and map of addresses to all symbols at a particular address
80 cmd
= ['nm', '-S', '-n', lib_file
]
81 nm_lines
= GetStdOutputLines(cmd
)
84 for nm_line
in nm_lines
:
85 if any(str in nm_line
for str in (' t ', ' W ', ' T ')):
86 nm_symbols
.append(nm_line
)
91 while nm_index
< len(nm_symbols
):
93 # If the length of the split line is not 4, then it does not contain all the
94 # information needed to symbolize (i.e. address, size and symbol name).
95 if len(nm_symbols
[nm_index
].split()) == 4:
96 (addr
, size
) = [int(x
, 16) for x
in nm_symbols
[nm_index
].split()[0:2]]
98 # Multiple symbols may be at the same address. This is do to aliasing
99 # done by the compiler. Since there is no way to be sure which one was
100 # called in profiled run, we will symbolize to include all symbol names at
101 # a particular address.
103 while (nm_index
< len(nm_symbols
) and
104 addr
== int(nm_symbols
[nm_index
].split()[0], 16)):
105 if len(nm_symbols
[nm_index
].split()) == 4:
106 fnames
.append(nm_symbols
[nm_index
].split()[3])
108 address_map
[addr
] = fnames
109 unique_addrs
.append((addr
, size
))
113 return (unique_addrs
, address_map
)
115 class SymbolNotFoundException(Exception):
116 def __init__(self
,value
):
117 super(SymbolNotFoundException
,self
).__init
__(value
)
120 return repr(self
.value
)
122 def BinarySearchAddresses(addr
, start
, end
, arr
):
123 """Find starting address of a symbol at a particular address.
125 The reason we can not directly use the address provided by the log file is
126 that the log file may give an address after the start of the symbol. The
127 logged address is often one byte after the start. By using this search
128 function rather than just subtracting one from the logged address allows
129 the logging instrumentation to log any address in a function.
132 addr: the address being searched for
133 start: the starting index for the binary search
134 end: the ending index for the binary search
135 arr: the list being searched containing tuple of address and size
138 the starting address of the symbol at address addr
141 Exception: if address not found. Functions expects all logged addresses
144 # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
145 if start
>= end
or start
== end
- 1:
146 # arr[i] is a tuple of address and size. Check if addr inside range
147 if addr
>= arr
[start
][0] and addr
< arr
[start
][0] + arr
[start
][1]:
149 elif addr
>= arr
[end
][0] and addr
< arr
[end
][0] + arr
[end
][1]:
152 raise SymbolNotFoundException(addr
)
154 halfway
= (start
+ end
) / 2
155 (nm_addr
, size
) = arr
[halfway
]
156 # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
157 if addr
>= nm_addr
and addr
< nm_addr
+ size
:
160 return BinarySearchAddresses(addr
, start
, halfway
-1, arr
)
162 # Condition (addr >= nm_addr + size) must be true.
163 return BinarySearchAddresses(addr
, halfway
+1, end
, arr
)
166 def FindFunctions(addr
, unique_addrs
, address_map
):
167 """Find function symbol names at address addr."""
168 return address_map
[BinarySearchAddresses(addr
, 0, len(unique_addrs
) - 1,
172 def AddrToLine(addr
, lib_file
):
173 """Use addr2line to determine line info of a particular address."""
174 cmd
= ['addr2line', '-f', '-e', lib_file
, hex(addr
)]
175 output
= GetStdOutputLines(cmd
)
176 assert(len(output
) == 2)
177 return ':'.join(output
)
179 def GetObjectFileNames(obj_dir
):
180 """ Gets the list of object files in the output directory. """
182 for (dirpath
, _
, filenames
) in os
.walk(obj_dir
):
183 for file_name
in filenames
:
184 if file_name
.endswith('.o'):
185 obj_files
.append(os
.path
.join(dirpath
, file_name
))
188 class WarningCollector(object):
189 def __init__(self
, max_warnings
):
191 self
._max
_warnings
= max_warnings
193 def Write(self
, message
):
194 if self
._warnings
< self
._max
_warnings
:
195 sys
.stderr
.write(message
+ '\n')
198 def WriteEnd(self
, message
):
199 if self
._warnings
> self
._max
_warnings
:
200 sys
.stderr
.write(str(self
._warnings
- self
._max
_warnings
) +
201 ' more warnings for: ' + message
+ '\n')
203 def SymbolToSection(obj_dir
):
204 """ Gets a mapping from symbol to linker section name by scanning all
205 of the object files. """
206 object_files
= GetObjectFileNames(obj_dir
)
207 symbol_to_section_map
= {}
208 symbol_warnings
= WarningCollector(300)
209 for obj_file
in object_files
:
210 cmd
= ['objdump', '-w', '-t', obj_file
]
211 symbol_lines
= GetStdOutputLines(cmd
)
212 for symbol_line
in symbol_lines
:
213 items
= symbol_line
.split()
214 # All of the symbol lines we care about are in the form
215 # 0000000000 g F .text.foo 000000000 [.hidden] foo
216 # where g (global) might also be l (local) or w (weak).
217 if len(items
) > 4 and items
[2] == 'F':
218 # This symbol is a function
219 symbol
= items
[len(items
) - 1]
220 if symbol
.startswith('.LTHUNK'):
223 if ((symbol
in symbol_to_section_map
) and
224 (symbol_to_section_map
[symbol
] != section
)):
225 symbol_warnings
.Write('WARNING: Symbol ' + symbol
+
226 ' in conflicting sections ' + section
+
227 ' and ' + symbol_to_section_map
[symbol
])
228 elif not section
.startswith('.text.'):
229 symbol_warnings
.Write('WARNING: Symbol ' + symbol
+
230 ' in incorrect section ' + section
)
232 symbol_to_section_map
[symbol
] = section
233 symbol_warnings
.WriteEnd('bad sections')
234 return symbol_to_section_map
237 """Write output for profiled run to standard out.
239 The format of the output depends on the output type specified as the third
240 command line argument. The default output type is to symbolize the addresses
241 of the functions called.
243 parser
= optparse
.OptionParser('usage: %prog [options] log_file lib_file')
244 parser
.add_option('-t', '--outputType', dest
='output_type',
245 default
='symbolize', type='string',
246 help='lineize or symbolize or orderfile')
248 # Option for output type. The log file and lib file arguments are required
249 # by the script and therefore are not options.
250 (options
, args
) = parser
.parse_args()
252 parser
.error('expected 2 args: log_file lib_file')
254 (log_file
, lib_file
) = args
255 output_type
= options
.output_type
257 obj_dir
= os
.path
.abspath(os
.path
.join(os
.path
.dirname(lib_file
), '../obj'))
259 log_file_lines
= map(string
.rstrip
, open(log_file
).readlines())
260 call_info
= ParseLogLines(log_file_lines
)
261 (unique_addrs
, address_map
) = ParseLibSymbols(lib_file
)
263 # Check for duplicate addresses in the log file, and print a warning if
264 # duplicates are found. The instrumentation that produces the log file
265 # should only print the first time a function is entered.
267 for call
in call_info
:
269 if addr
not in addr_list
:
270 addr_list
.append(addr
)
272 print('WARNING: Address ' + hex(addr
) + ' (line= ' +
273 AddrToLine(addr
, lib_file
) + ') already profiled.')
275 symbol_to_section_map
= SymbolToSection(obj_dir
)
277 unknown_symbol_warnings
= WarningCollector(300)
278 symbol_not_found_warnings
= WarningCollector(300)
279 for call
in call_info
:
281 if output_type
== 'lineize':
282 symbol
= AddrToLine(addr
, lib_file
)
283 print(str(call
[0]) + ' ' + str(call
[1]) + '\t' + str(call
[2]) + '\t'
285 elif output_type
== 'orderfile':
287 symbols
= FindFunctions(addr
, unique_addrs
, address_map
)
288 for symbol
in symbols
:
289 if symbol
in symbol_to_section_map
:
290 print symbol_to_section_map
[symbol
]
292 unknown_symbol_warnings
.Write(
293 'WARNING: No known section for symbol ' + symbol
)
295 except SymbolNotFoundException
:
296 symbol_not_found_warnings
.Write(
297 'WARNING: Did not find function in binary. addr: '
301 symbols
= FindFunctions(addr
, unique_addrs
, address_map
)
302 print(str(call
[0]) + ' ' + str(call
[1]) + '\t' + str(call
[2]) + '\t'
305 for symbol
in symbols
:
307 print '\t\t\t\t\t' + symbol
310 except SymbolNotFoundException
:
311 symbol_not_found_warnings
.Write(
312 'WARNING: Did not find function in binary. addr: '
314 unknown_symbol_warnings
.WriteEnd('no known section for symbol')
315 symbol_not_found_warnings
.WriteEnd('did not find function')
317 if __name__
== '__main__':