2 # ***** BEGIN LICENSE BLOCK *****
3 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 # The contents of this file are subject to the Mozilla Public License Version
6 # 1.1 (the "License"); you may not use this file except in compliance with
7 # the License. You may obtain a copy of the License at
8 # http://www.mozilla.org/MPL/
10 # Software distributed under the License is distributed on an "AS IS" basis,
11 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 # for the specific language governing rights and limitations under the
15 # The Original Code is mozilla.org code.
17 # The Initial Developer of the Original Code is
18 # The Mozilla Foundation
19 # Portions created by the Initial Developer are Copyright (C) 2007
20 # the Initial Developer. All Rights Reserved.
23 # Ted Mielczarek <ted.mielczarek@gmail.com>
24 # Ben Turner <mozilla@songbirdnest.com>
26 # Alternatively, the contents of this file may be used under the terms of
27 # either the GNU General Public License Version 2 or later (the "GPL"), or
28 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 # in which case the provisions of the GPL or the LGPL are applicable instead
30 # of those above. If you wish to allow use of your version of this file only
31 # under the terms of either the GPL or the LGPL, and not to allow others to
32 # use your version of this file under the terms of the MPL, indicate your
33 # decision by deleting the provisions above and replace them with the notice
34 # and other provisions required by the GPL or the LGPL. If you do not delete
35 # the provisions above, a recipient may use your version of this file under
36 # the terms of any one of the MPL, the GPL or the LGPL.
38 # ***** END LICENSE BLOCK *****
40 # Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
41 # <debug info files or dirs>
42 # Runs dump_syms on each debug info file specified on the command line,
43 # then places the resulting symbol file in the proper directory
44 # structure in the symbol store path. Accepts multiple files
45 # on the command line, so can be called as part of a pipe using
46 # find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
47 # But really, you might just want to pass it <dir>.
49 # Parameters accepted:
50 # -c : Copy debug info files to the same directory structure
52 # -a "<archs>" : Run dump_syms -a <arch> for each space separated
53 # cpu architecture in <archs> (only on macOS)
54 # -s <srcdir> : Use <srcdir> as the top source directory to
55 # generate relative filenames.
61 from optparse
import OptionParser
66 """ A base class for version-controlled file information. Ensures that the
67 following attributes are generated only once (successfully):
74 The attributes are generated by a single call to the GetRoot,
75 GetRevision, and GetFilename methods. Those methods are explicitly not
76 implemented here and must be implemented in derived classes. """
78 def __init__(self
, file):
83 def __getattr__(self
, name
):
84 """ __getattr__ is only called for attributes that are not set on self,
85 so setting self.[attr] will prevent future calls to the GetRoot,
86 GetRevision, and GetFilename methods. We don't set the values on
87 failure on the off chance that a future call might succeed. """
95 elif name
== "clean_root":
96 clean_root
= self
.GetCleanRoot()
98 self
.clean_root
= clean_root
101 elif name
== "revision":
102 revision
= self
.GetRevision()
104 self
.revision
= revision
107 elif name
== "filename":
108 filename
= self
.GetFilename()
110 self
.filename
= filename
116 """ This method should return the unmodified root for the file or 'None'
118 raise NotImplementedError
120 def GetCleanRoot(self
):
121 """ This method should return the repository root for the file or 'None'
123 raise NotImplementedErrors
125 def GetRevision(self
):
126 """ This method should return the revision number for the file or 'None'
128 raise NotImplementedError
130 def GetFilename(self
):
131 """ This method should return the repository-specific filename for the
132 file or 'None' on failure. """
133 raise NotImplementedError
135 class CVSFileInfo(VCSFileInfo
):
136 """ A class to maintain version information for files in a CVS repository.
137 Derived from VCSFileInfo. """
139 def __init__(self
, file, srcdir
):
140 VCSFileInfo
.__init
__(self
, file)
144 (path
, filename
) = os
.path
.split(self
.file)
145 root
= os
.path
.join(path
, "CVS", "Root")
146 if not os
.path
.isfile(root
):
149 root_name
= f
.readline().strip()
153 print >> sys
.stderr
, "Failed to get CVS Root for %s" % filename
156 def GetCleanRoot(self
):
157 parts
= self
.root
.split('@')
159 # we don't want the extra colon
160 return parts
[1].replace(":","")
161 print >> sys
.stderr
, "Failed to get CVS Root for %s" % filename
164 def GetRevision(self
):
165 (path
, filename
) = os
.path
.split(self
.file)
166 entries
= os
.path
.join(path
, "CVS", "Entries")
167 if not os
.path
.isfile(entries
):
169 f
= open(entries
, "r")
171 parts
= line
.split("/")
172 if len(parts
) > 1 and parts
[1] == filename
:
174 print >> sys
.stderr
, "Failed to get CVS Revision for %s" % filename
177 def GetFilename(self
):
179 if self
.revision
and self
.clean_root
:
181 # strip the base path off
182 # but we actually want the last dir in srcdir
183 file = os
.path
.normpath(file)
184 # the lower() is to handle win32+vc8, where
185 # the source filenames come out all lowercase,
186 # but the srcdir can be mixed case
187 if file.lower().startswith(self
.srcdir
.lower()):
188 file = file[len(self
.srcdir
):]
189 (head
, tail
) = os
.path
.split(self
.srcdir
)
191 tail
= os
.path
.basename(head
)
193 return "cvs:%s:%s:%s" % (self
.clean_root
, file, self
.revision
)
196 class SVNFileInfo(VCSFileInfo
):
201 # This regex separates protocol and optional username/password from a url.
202 # For instance, all the following urls will be transformed into
206 # svn+ssh://user@foo.com/bar
207 # svn+ssh://user:pass@foo.com/bar
209 rootRegex
= re
.compile(r
'^\S+?:/+(?:[^\s/]*@)?(\S+)$')
211 def __init__(self
, file):
212 """ We only want to run subversion's info tool once so pull all the data
215 VCSFileInfo
.__init
__(self
, file)
217 if os
.path
.isfile(file):
218 command
= os
.popen("svn info %s" % file, "r")
220 # The last line of the output is usually '\n'
221 if line
.strip() == '':
223 # Split into a key/value pair on the first colon
224 key
, value
= line
.split(':', 1)
225 if key
in ["Repository Root", "Revision", "URL"]:
226 self
.svndata
[key
] = value
.strip()
228 exitStatus
= command
.close()
230 print >> sys
.stderr
, "Failed to get SVN info for %s" % file
233 key
= "Repository Root"
234 if key
in self
.svndata
:
235 match
= self
.rootRegex
.match(self
.svndata
[key
])
237 return match
.group(1)
238 print >> sys
.stderr
, "Failed to get SVN Root for %s" % self
.file
241 # File bug to get this teased out from the current GetRoot, this is temporary
242 def GetCleanRoot(self
):
245 def GetRevision(self
):
247 if key
in self
.svndata
:
248 return self
.svndata
[key
]
249 print >> sys
.stderr
, "Failed to get SVN Revision for %s" % self
.file
252 def GetFilename(self
):
253 if self
.root
and self
.revision
:
254 if "URL" in self
.svndata
and "Repository Root" in self
.svndata
:
255 url
, repo
= self
.svndata
["URL"], self
.svndata
["Repository Root"]
256 file = url
[len(repo
) + 1:]
257 return "svn:%s:%s:%s" % (self
.root
, file, self
.revision
)
258 print >> sys
.stderr
, "Failed to get SVN Filename for %s" % self
.file
263 # A cache of files for which VCS info has already been determined. Used to
264 # prevent extra filesystem activity or process launching.
265 vcsFileInfoCache
= {}
267 def GetVCSFilename(file, srcdir
):
268 """Given a full path to a file, and the top source directory,
269 look for version control information about this file, and return
271 1) a specially formatted filename that contains the VCS type,
272 VCS location, relative filename, and revision number, formatted like:
273 vcs:vcs location:filename:revision
275 cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
276 2) the unmodified root information if it exists"""
277 (path
, filename
) = os
.path
.split(file)
278 if path
== '' or filename
== '':
283 if file in vcsFileInfoCache
:
284 # Already cached this info, use it.
285 fileInfo
= vcsFileInfoCache
[file]
287 if os
.path
.isdir(os
.path
.join(path
, "CVS")):
288 fileInfo
= CVSFileInfo(file, srcdir
)
291 elif os
.path
.isdir(os
.path
.join(path
, ".svn")) or \
292 os
.path
.isdir(os
.path
.join(path
, "_svn")):
293 fileInfo
= SVNFileInfo(file);
294 vcsFileInfoCache
[file] = fileInfo
297 file = fileInfo
.filename
299 # we want forward slashes on win32 paths
300 return (file.replace("\\", "/"), root
)
302 def GetPlatformSpecificDumper(**kwargs
):
303 """This function simply returns a instance of a subclass of Dumper
304 that is appropriate for the current platform."""
305 return {'win32': Dumper_Win32
,
306 'cygwin': Dumper_Win32
,
307 'linux2': Dumper_Linux
,
308 'sunos5': Dumper_Solaris
,
309 'darwin': Dumper_Mac
}[sys
.platform
](**kwargs
)
311 def SourceIndex(fileStream
, outputPath
, cvs_root
):
312 """Takes a list of files, writes info to a data block in a .stream file"""
313 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
314 # Create the srcsrv data block that indexes the pdb file
316 pdbStreamFile
= open(outputPath
, "w")
317 pdbStreamFile
.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=1\r\nSRCSRV: variables ------------------------------------------\r\nCVS_EXTRACT_CMD=%fnchdir%(%targ%)cvs.exe -d %fnvar%(%var2%) checkout -r %var4% -d %var4% -N %var3%\r\nMYSERVER=''')
318 pdbStreamFile
.write(cvs_root
)
319 pdbStreamFile
.write('''\r\nSRCSRVTRG=%targ%\\%var4%\\%fnbksl%(%var3%)\r\nSRCSRVCMD=%CVS_EXTRACT_CMD%\r\nSRCSRV: source files ---------------------------------------\r\n''')
320 pdbStreamFile
.write(fileStream
) # can't do string interpolation because the source server also uses this and so there are % in the above
321 pdbStreamFile
.write("SRCSRV: end ------------------------------------------------\r\n\n")
322 pdbStreamFile
.close()
326 """This class can dump symbols from a file with debug info, and
327 store the output in a directory structure that is valid for use as
328 a Breakpad symbol server. Requires a path to a dump_syms binary--
329 |dump_syms| and a directory to store symbols in--|symbol_path|.
330 Optionally takes a list of processor architectures to process from
331 each debug file--|archs|, the full path to the top source
332 directory--|srcdir|, for generating relative source file names,
333 and an option to copy debug info files alongside the dumped
334 symbol files--|copy_debug|, mostly useful for creating a
335 Microsoft Symbol Server from the resulting output.
337 You don't want to use this directly if you intend to call
338 ProcessDir. Instead, call GetPlatformSpecificDumper to
339 get an instance of a subclass."""
340 def __init__(self
, dump_syms
, symbol_path
,
341 archs
=None, srcdir
=None, copy_debug
=False, vcsinfo
=False, srcsrv
=False):
342 # popen likes absolute paths, at least on windows
343 self
.dump_syms
= dump_syms
344 self
.symbol_path
= symbol_path
346 # makes the loop logic simpler
349 self
.archs
= ['-a %s' % a
for a
in archs
.split()]
350 if srcdir
is not None:
351 self
.srcdir
= os
.path
.normpath(srcdir
)
354 self
.copy_debug
= copy_debug
355 self
.vcsinfo
= vcsinfo
358 # subclasses override this
359 def ShouldProcess(self
, file):
362 def RunFileCommand(self
, file):
363 """Utility function, returns the output of file(1)"""
365 # we use -L to read the targets of symlinks,
366 # and -b to print just the content, not the filename
367 return os
.popen("file -Lb " + file).read()
371 # This is a no-op except on Win32
372 def FixFilenameCase(self
, file):
375 # This is a no-op except on Win32
376 def SourceServerIndexing(self
, debug_file
, guid
, sourceFileStream
, cvs_root
):
379 # subclasses override this if they want to support this
380 def CopyDebug(self
, file, debug_file
, guid
):
383 def Process(self
, file_or_dir
):
384 "Process a file or all the (valid) files in a directory."
385 if os
.path
.isdir(file_or_dir
):
386 return self
.ProcessDir(file_or_dir
)
387 elif os
.path
.isfile(file_or_dir
):
388 return self
.ProcessFile(file_or_dir
)
389 # maybe it doesn't exist?
392 def ProcessDir(self
, dir):
393 """Process all the valid files in this directory. Valid files
394 are determined by calling ShouldProcess."""
396 for root
, dirs
, files
in os
.walk(dir):
398 fullpath
= os
.path
.join(root
, f
)
399 if self
.ShouldProcess(fullpath
):
400 if not self
.ProcessFile(fullpath
):
404 def ProcessFile(self
, file):
405 """Dump symbols from this file into a symbol file, stored
406 in the proper directory structure in |symbol_path|."""
408 sourceFileStream
= ''
409 # tries to get cvsroot from the .mozconfig first - if it's not set
410 # the tinderbox cvs_path will be assigned further down
411 cvs_root
= os
.environ
.get("SRCSRV_ROOT")
412 for arch
in self
.archs
:
414 cmd
= os
.popen("%s %s %s" % (self
.dump_syms
, arch
, file), "r")
415 module_line
= cmd
.next()
416 if module_line
.startswith("MODULE"):
417 # MODULE os cpu guid debug_file
418 (guid
, debug_file
) = (module_line
.split())[3:5]
419 # strip off .pdb extensions, and append .sym
420 sym_file
= re
.sub(r
"\.pdb$", "", debug_file
) + ".sym"
421 # we do want forward slashes here
422 rel_path
= os
.path
.join(debug_file
,
424 sym_file
).replace("\\", "/")
425 full_path
= os
.path
.normpath(os
.path
.join(self
.symbol_path
,
428 os
.makedirs(os
.path
.dirname(full_path
))
429 except OSError: # already exists
431 f
= open(full_path
, "w")
433 # now process the rest of the output
435 if line
.startswith("FILE"):
436 # FILE index filename
437 (x
, index
, filename
) = line
.split(None, 2)
438 if sys
.platform
== "sunos5":
439 start
= filename
.find(self
.srcdir
)
442 filename
= filename
[start
:]
443 filename
= self
.FixFilenameCase(filename
.rstrip())
444 sourcepath
= filename
446 (filename
, rootname
) = GetVCSFilename(filename
, self
.srcdir
)
447 # sets cvs_root in case the loop through files were to end on an empty rootname
451 # gather up files with cvs for indexing
452 if filename
.startswith("cvs"):
453 (ver
, checkout
, source_file
, revision
) = filename
.split(":", 3)
454 sourceFileStream
+= sourcepath
+ "*MYSERVER*" + source_file
+ '*' + revision
+ "\r\n"
455 f
.write("FILE %s %s\r\n" % (index
, filename
))
457 # pass through all other lines unchanged
460 command_exit
= cmd
.close()
462 if command_exit
== 11:
463 print >> sys
.stderr
, "INFO: dump_syms segfault while processing {}, retrying".format(file)
464 return self
.ProcessFile(file)
465 raise Exception("ERROR - dump_syms error while processing {} (exit code {})".format(file, command_exit
))
466 # we output relative paths so callers can get a list of what
470 self
.CopyDebug(file, debug_file
, guid
)
472 # Call on SourceServerIndexing
473 result
= self
.SourceServerIndexing(debug_file
, guid
, sourceFileStream
, cvs_root
)
475 except StopIteration:
476 print >> sys
.stderr
, "WARN: dump_syms - no debug info extracted for {}".format(file)
479 print >> sys
.stderr
, "Unexpected error: ", sys
.exc_info()[0]
483 # Platform-specific subclasses. For the most part, these just have
484 # logic to determine what files to extract symbols from.
486 class Dumper_Win32(Dumper
):
487 fixedFilenameCaseCache
= {}
489 def ShouldProcess(self
, file):
490 """This function will allow processing of pdb files that have dll
491 or exe files with the same base name next to them."""
494 ".bat", ".class", ".config", ".css", ".glsl", ".hrc", ".ini", ".jar", ".mo", ".msu",
495 ".ods", ".png", ".py", ".pyc", ".rdb", ".rst", ".sh", ".svg", ".ttf", ".txt", ".xml",
497 (path
,ext
) = os
.path
.splitext(file)
498 basename
= os
.path
.basename(file)
499 if ext
in skip_extensions
:
501 elif os
.path
.getsize(file) == 21:
502 # content is the "invalid - merged lib" stub
504 elif basename
.startswith("LICENSE") or basename
.startswith("README"):
506 elif basename
== "msvcp140.dll" or basename
== "vcruntime140.dll":
508 elif basename
.startswith("wininst-") or basename
== "fetch_macholib" or basename
== "command_template":
509 # ignore python distutils stubs and scripts
512 print >> sys
.stderr
, "INFO: Skipping {}, has no extension".format(file)
516 def FixFilenameCase(self
, file):
517 """Recent versions of Visual C++ put filenames into
518 PDB files as all lowercase. If the file exists
519 on the local filesystem, fix it."""
521 # Use a cached version if we have one.
522 if file in self
.fixedFilenameCaseCache
:
523 return self
.fixedFilenameCaseCache
[file]
527 (path
, filename
) = os
.path
.split(file)
528 if os
.path
.isdir(path
):
529 lc_filename
= filename
.lower()
530 for f
in os
.listdir(path
):
531 if f
.lower() == lc_filename
:
532 result
= os
.path
.join(path
, f
)
535 # Cache the corrected version to avoid future filesystem hits.
536 self
.fixedFilenameCaseCache
[file] = result
539 def CopyDebug(self
, file, debug_file
, guid
):
540 rel_path
= os
.path
.join(debug_file
,
542 debug_file
).replace("\\", "/")
544 full_path
= os
.path
.normpath(os
.path
.join(self
.symbol_path
,
546 shutil
.copyfile(file, full_path
)
549 def SourceServerIndexing(self
, debug_file
, guid
, sourceFileStream
, cvs_root
):
550 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
552 streamFilename
= debug_file
+ ".stream"
553 stream_output_path
= os
.path
.join(cwd
, streamFilename
)
554 # Call SourceIndex to create the .stream file
555 result
= SourceIndex(sourceFileStream
, stream_output_path
, cvs_root
)
558 pdbstr_path
= os
.environ
.get("PDBSTR_PATH")
559 pdbstr
= os
.path
.normpath(pdbstr_path
)
560 pdb_rel_path
= os
.path
.join(debug_file
, guid
, debug_file
)
561 pdb_filename
= os
.path
.normpath(os
.path
.join(self
.symbol_path
, pdb_rel_path
))
562 # move to the dir with the stream files to call pdbstr
563 os
.chdir(os
.path
.dirname(stream_output_path
))
564 os
.spawnv(os
.P_WAIT
, pdbstr
, [pdbstr
, "-w", "-p:" + pdb_filename
, "-i:" + streamFilename
, "-s:srcsrv"])
565 # clean up all the .stream files when done
566 os
.remove(stream_output_path
)
569 class Dumper_Linux(Dumper
):
570 def ShouldProcess(self
, file):
571 """This function will allow processing of files that are
572 executable, or end with the .so extension, and additionally
573 file(1) reports as being ELF files. It expects to find the file
575 if file.endswith(".so") or file.endswith(".bin") or os
.access(file, os
.X_OK
):
576 return self
.RunFileCommand(file).startswith("ELF")
579 def CopyDebug(self
, file, debug_file
, guid
):
580 # We want to strip out the debug info, and add a
581 # .gnu_debuglink section to the object, so the debugger can
582 # actually load our debug info later.
583 file_dbg
= file + ".dbg"
584 os
.system("objcopy --only-keep-debug %s %s" % (file, file_dbg
))
585 os
.system("objcopy --add-gnu-debuglink=%s %s" % (file_dbg
, file))
587 rel_path
= os
.path
.join(debug_file
,
590 full_path
= os
.path
.normpath(os
.path
.join(self
.symbol_path
,
592 shutil
.copyfile(file_dbg
, full_path
)
593 # gzip the shipped debug files
594 os
.system("gzip %s" % full_path
)
595 print rel_path
+ ".gz"
597 class Dumper_Solaris(Dumper
):
598 def RunFileCommand(self
, file):
599 """Utility function, returns the output of file(1)"""
601 output
= os
.popen("file " + file).read()
602 return output
.split('\t')[1];
606 def ShouldProcess(self
, file):
607 """This function will allow processing of files that are
608 executable, or end with the .so extension, and additionally
609 file(1) reports as being ELF files. It expects to find the file
611 if file.endswith(".so") or os
.access(file, os
.X_OK
):
612 return self
.RunFileCommand(file).startswith("ELF")
615 class Dumper_Mac(Dumper
):
616 def ShouldProcess(self
, file):
617 """This function will allow processing of files that are
618 executable, or end with the .dylib extension, and additionally
619 file(1) reports as being Mach-O files. It expects to find the file
621 if file.endswith(".dylib") or os
.access(file, os
.X_OK
):
622 return self
.RunFileCommand(file).startswith("Mach-O")
625 # Entry point if called as a standalone program
627 parser
= OptionParser(usage
="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
628 parser
.add_option("-c", "--copy",
629 action
="store_true", dest
="copy_debug", default
=False,
630 help="Copy debug info files into the same directory structure as symbol files")
631 parser
.add_option("-a", "--archs",
632 action
="store", dest
="archs",
633 help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on macOS)")
634 parser
.add_option("-s", "--srcdir",
635 action
="store", dest
="srcdir",
636 help="Use SRCDIR to determine relative paths to source files")
637 parser
.add_option("-v", "--vcs-info",
638 action
="store_true", dest
="vcsinfo",
639 help="Try to retrieve VCS info for each FILE listed in the output")
640 parser
.add_option("-i", "--source-index",
641 action
="store_true", dest
="srcsrv", default
=False,
642 help="Add source index information to debug files, making them suitable for use in a source server.")
643 (options
, args
) = parser
.parse_args()
645 #check to see if the pdbstr.exe exists
647 pdbstr
= os
.environ
.get("PDBSTR_PATH")
648 if not os
.path
.exists(pdbstr
):
649 print >> sys
.stderr
, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n"
653 parser
.error("not enough arguments")
656 dumper
= GetPlatformSpecificDumper(dump_syms
=args
[0],
658 copy_debug
=options
.copy_debug
,
660 srcdir
=options
.srcdir
,
661 vcsinfo
=options
.vcsinfo
,
662 srcsrv
=options
.srcsrv
)
666 # run main if run directly
667 if __name__
== "__main__":