[devtools] Increase DevTools WebSocket send buffer to 256MB
[chromium-blink-merge.git] / tools / cygprofile / symbolize.py
blob4f7dfe7a63d3fe30f50b39b11273456724902041
1 #!/usr/bin/python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolize log file produced by cypgofile instrumentation.
8 Given a log file and the binary being profiled (e.g. executable, shared
9 library), the script can produce three different outputs: 1) symbols for the
10 addresses, 2) function and line numbers for the addresses, or 3) an order file.
11 """
13 import optparse
14 import os
15 import string
16 import subprocess
17 import sys
20 def ParseLogLines(log_file_lines):
21 """Parse a log file produced by the profiled run of clank.
23 Args:
24 log_file_lines: array of lines in log file produced by profiled run
25 lib_name: library or executable containing symbols
27 Below is an example of a small log file:
28 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
29 secs usecs pid:threadid func
30 START
31 1314897086 795828 3587:1074648168 0x509e105c
32 1314897086 795874 3587:1074648168 0x509e0eb4
33 1314897086 796326 3587:1074648168 0x509e0e3c
34 1314897086 796552 3587:1074648168 0x509e07bc
35 END
37 Returns:
38 call_info list with list of tuples of the format (sec, usec, call id,
39 function address called)
40 """
41 call_lines = []
42 vm_start = 0
43 line = log_file_lines[0]
44 assert("r-xp" in line)
45 end_index = line.find('-')
46 vm_start = int(line[:end_index], 16)
47 for line in log_file_lines[2:]:
48 # print hex(vm_start)
49 fields = line.split()
50 if len(fields) == 4:
51 call_lines.append(fields)
53 # Convert strings to int in fields.
54 call_info = []
55 for call_line in call_lines:
56 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2])
57 callee_id = call_line[2]
58 addr = int(call_line[3], 16)
59 if vm_start < addr:
60 addr -= vm_start
61 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr))
63 return call_info
65 def GetStdOutputLines(cmd):
66 p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
67 output = p.communicate()[0]
68 return output.split('\n')
70 def ParseLibSymbols(lib_file):
71 """Get output from running nm and greping for text symbols.
73 Args:
74 lib_file: the library or executable that contains the profiled code
76 Returns:
77 list of sorted unique addresses and corresponding size of function symbols
78 in lib_file and map of addresses to all symbols at a particular address
79 """
80 cmd = ['nm', '-S', '-n', lib_file]
81 nm_lines = GetStdOutputLines(cmd)
83 nm_symbols = []
84 for nm_line in nm_lines:
85 if any(str in nm_line for str in (' t ', ' W ', ' T ')):
86 nm_symbols.append(nm_line)
88 nm_index = 0
89 unique_addrs = []
90 address_map = {}
91 while nm_index < len(nm_symbols):
93 # If the length of the split line is not 4, then it does not contain all the
94 # information needed to symbolize (i.e. address, size and symbol name).
95 if len(nm_symbols[nm_index].split()) == 4:
96 (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]]
98 # Multiple symbols may be at the same address. This is do to aliasing
99 # done by the compiler. Since there is no way to be sure which one was
100 # called in profiled run, we will symbolize to include all symbol names at
101 # a particular address.
102 fnames = []
103 while (nm_index < len(nm_symbols) and
104 addr == int(nm_symbols[nm_index].split()[0], 16)):
105 if len(nm_symbols[nm_index].split()) == 4:
106 fnames.append(nm_symbols[nm_index].split()[3])
107 nm_index += 1
108 address_map[addr] = fnames
109 unique_addrs.append((addr, size))
110 else:
111 nm_index += 1
113 return (unique_addrs, address_map)
115 class SymbolNotFoundException(Exception):
116 def __init__(self,value):
117 super(SymbolNotFoundException,self).__init__(value)
118 self.value = value
119 def __str__(self):
120 return repr(self.value)
122 def BinarySearchAddresses(addr, start, end, arr):
123 """Find starting address of a symbol at a particular address.
125 The reason we can not directly use the address provided by the log file is
126 that the log file may give an address after the start of the symbol. The
127 logged address is often one byte after the start. By using this search
128 function rather than just subtracting one from the logged address allows
129 the logging instrumentation to log any address in a function.
131 Args:
132 addr: the address being searched for
133 start: the starting index for the binary search
134 end: the ending index for the binary search
135 arr: the list being searched containing tuple of address and size
137 Returns:
138 the starting address of the symbol at address addr
140 Raises:
141 Exception: if address not found. Functions expects all logged addresses
142 to be found
144 # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
145 if start >= end or start == end - 1:
146 # arr[i] is a tuple of address and size. Check if addr inside range
147 if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]:
148 return arr[start][0]
149 elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]:
150 return arr[end][0]
151 else:
152 raise SymbolNotFoundException(addr)
153 else:
154 halfway = (start + end) / 2
155 (nm_addr, size) = arr[halfway]
156 # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
157 if addr >= nm_addr and addr < nm_addr + size:
158 return nm_addr
159 elif addr < nm_addr:
160 return BinarySearchAddresses(addr, start, halfway-1, arr)
161 else:
162 # Condition (addr >= nm_addr + size) must be true.
163 return BinarySearchAddresses(addr, halfway+1, end, arr)
166 def FindFunctions(addr, unique_addrs, address_map):
167 """Find function symbol names at address addr."""
168 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1,
169 unique_addrs)]
172 def AddrToLine(addr, lib_file):
173 """Use addr2line to determine line info of a particular address."""
174 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)]
175 output = GetStdOutputLines(cmd)
176 assert(len(output) == 2)
177 return ':'.join(output)
179 def GetObjectFileNames(obj_dir):
180 """ Gets the list of object files in the output directory. """
181 obj_files = []
182 for (dirpath, _, filenames) in os.walk(obj_dir):
183 for file_name in filenames:
184 if file_name.endswith('.o'):
185 obj_files.append(os.path.join(dirpath, file_name))
186 return obj_files
188 class WarningCollector(object):
189 def __init__(self, max_warnings):
190 self._warnings = 0
191 self._max_warnings = max_warnings
193 def Write(self, message):
194 if self._warnings < self._max_warnings:
195 sys.stderr.write(message + '\n')
196 self._warnings += 1
198 def WriteEnd(self, message):
199 if self._warnings > self._max_warnings:
200 sys.stderr.write(str(self._warnings - self._max_warnings) +
201 ' more warnings for: ' + message + '\n')
203 def SymbolToSection(obj_dir):
204 """ Gets a mapping from symbol to linker section name by scanning all
205 of the object files. """
206 object_files = GetObjectFileNames(obj_dir)
207 symbol_to_section_map = {}
208 symbol_warnings = WarningCollector(300)
209 for obj_file in object_files:
210 cmd = ['objdump', '-w', '-t', obj_file]
211 symbol_lines = GetStdOutputLines(cmd)
212 for symbol_line in symbol_lines:
213 items = symbol_line.split()
214 # All of the symbol lines we care about are in the form
215 # 0000000000 g F .text.foo 000000000 [.hidden] foo
216 # where g (global) might also be l (local) or w (weak).
217 if len(items) > 4 and items[2] == 'F':
218 # This symbol is a function
219 symbol = items[len(items) - 1]
220 if symbol.startswith('.LTHUNK'):
221 continue
222 section = items[3]
223 if ((symbol in symbol_to_section_map) and
224 (symbol_to_section_map[symbol] != section)):
225 symbol_warnings.Write('WARNING: Symbol ' + symbol +
226 ' in conflicting sections ' + section +
227 ' and ' + symbol_to_section_map[symbol])
228 elif not section.startswith('.text.'):
229 symbol_warnings.Write('WARNING: Symbol ' + symbol +
230 ' in incorrect section ' + section)
231 else:
232 symbol_to_section_map[symbol] = section
233 symbol_warnings.WriteEnd('bad sections')
234 return symbol_to_section_map
236 def main():
237 """Write output for profiled run to standard out.
239 The format of the output depends on the output type specified as the third
240 command line argument. The default output type is to symbolize the addresses
241 of the functions called.
243 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file')
244 parser.add_option('-t', '--outputType', dest='output_type',
245 default='symbolize', type='string',
246 help='lineize or symbolize or orderfile')
248 # Option for output type. The log file and lib file arguments are required
249 # by the script and therefore are not options.
250 (options, args) = parser.parse_args()
251 if len(args) != 2:
252 parser.error('expected 2 args: log_file lib_file')
254 (log_file, lib_file) = args
255 output_type = options.output_type
257 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj'))
259 log_file_lines = map(string.rstrip, open(log_file).readlines())
260 call_info = ParseLogLines(log_file_lines)
261 (unique_addrs, address_map) = ParseLibSymbols(lib_file)
263 # Check for duplicate addresses in the log file, and print a warning if
264 # duplicates are found. The instrumentation that produces the log file
265 # should only print the first time a function is entered.
266 addr_list = []
267 for call in call_info:
268 addr = call[3]
269 if addr not in addr_list:
270 addr_list.append(addr)
271 else:
272 print('WARNING: Address ' + hex(addr) + ' (line= ' +
273 AddrToLine(addr, lib_file) + ') already profiled.')
275 symbol_to_section_map = SymbolToSection(obj_dir)
277 unknown_symbol_warnings = WarningCollector(300)
278 symbol_not_found_warnings = WarningCollector(300)
279 for call in call_info:
280 addr = call[3]
281 if output_type == 'lineize':
282 symbol = AddrToLine(addr, lib_file)
283 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
284 + symbol)
285 elif output_type == 'orderfile':
286 try:
287 symbols = FindFunctions(addr, unique_addrs, address_map)
288 for symbol in symbols:
289 if symbol in symbol_to_section_map:
290 print symbol_to_section_map[symbol]
291 else:
292 unknown_symbol_warnings.Write(
293 'WARNING: No known section for symbol ' + symbol)
294 print ''
295 except SymbolNotFoundException:
296 symbol_not_found_warnings.Write(
297 'WARNING: Did not find function in binary. addr: '
298 + hex(addr))
299 else:
300 try:
301 symbols = FindFunctions(addr, unique_addrs, address_map)
302 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
303 + symbols[0])
304 first_symbol = True
305 for symbol in symbols:
306 if not first_symbol:
307 print '\t\t\t\t\t' + symbol
308 else:
309 first_symbol = False
310 except SymbolNotFoundException:
311 symbol_not_found_warnings.Write(
312 'WARNING: Did not find function in binary. addr: '
313 + hex(addr))
314 unknown_symbol_warnings.WriteEnd('no known section for symbol')
315 symbol_not_found_warnings.WriteEnd('did not find function')
317 if __name__ == '__main__':
318 main()