Improve the VFS Makefile so that it is easier for use out of tree but still works...
[Samba/gebeck_regimport.git] / buildtools / wafsamba / symbols.py
blob87757357afce6811cd358e0005c5e5fc34f5afbd
1 # a waf tool to extract symbols from object files or libraries
2 # using nm, producing a set of exposed defined/undefined symbols
4 import Utils, Build, subprocess, Logs, re
5 from samba_wildcard import fake_build_environment
6 from samba_utils import *
8 # these are the data structures used in symbols.py:
10 # bld.env.symbol_map : dictionary mapping public symbol names to list of
11 # subsystem names where that symbol exists
13 # t.in_library : list of libraries that t is in
15 # bld.env.public_symbols: set of public symbols for each subsystem
16 # bld.env.used_symbols : set of used symbols for each subsystem
18 # bld.env.syslib_symbols: dictionary mapping system library name to set of symbols
19 # for that library
20 # bld.env.library_dict : dictionary mapping built library paths to subsystem names
22 # LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type
25 def symbols_extract(bld, objfiles, dynamic=False):
26 '''extract symbols from objfile, returning a dictionary containing
27 the set of undefined and public symbols for each file'''
29 ret = {}
31 # see if we can get some results from the nm cache
32 if not bld.env.nm_cache:
33 bld.env.nm_cache = {}
35 objfiles = set(objfiles).copy()
37 remaining = set()
38 for obj in objfiles:
39 if obj in bld.env.nm_cache:
40 ret[obj] = bld.env.nm_cache[obj].copy()
41 else:
42 remaining.add(obj)
43 objfiles = remaining
45 if len(objfiles) == 0:
46 return ret
48 cmd = ["nm"]
49 if dynamic:
50 # needed for some .so files
51 cmd.append("-D")
52 cmd.extend(list(objfiles))
54 nmpipe = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
55 if len(objfiles) == 1:
56 filename = list(objfiles)[0]
57 ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set()}
59 for line in nmpipe:
60 line = line.strip()
61 if line.endswith(':'):
62 filename = line[:-1]
63 ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set() }
64 continue
65 cols = line.split(" ")
66 if cols == ['']:
67 continue
68 # see if the line starts with an address
69 if len(cols) == 3:
70 symbol_type = cols[1]
71 symbol = cols[2]
72 else:
73 symbol_type = cols[0]
74 symbol = cols[1]
75 if symbol_type in "BDGTRVWSi":
76 # its a public symbol
77 ret[filename]["PUBLIC"].add(symbol)
78 elif symbol_type in "U":
79 ret[filename]["UNDEFINED"].add(symbol)
81 # add to the cache
82 for obj in objfiles:
83 if obj in ret:
84 bld.env.nm_cache[obj] = ret[obj].copy()
85 else:
86 bld.env.nm_cache[obj] = { "PUBLIC": set(), "UNDEFINED" : set() }
88 return ret
91 def real_name(name):
92 if name.find(".objlist") != -1:
93 name = name[:-8]
94 return name
97 def find_ldd_path(bld, libname, binary):
98 '''find the path to the syslib we will link against'''
99 ret = None
100 if not bld.env.syslib_paths:
101 bld.env.syslib_paths = {}
102 if libname in bld.env.syslib_paths:
103 return bld.env.syslib_paths[libname]
105 lddpipe = subprocess.Popen(['ldd', binary], stdout=subprocess.PIPE).stdout
106 for line in lddpipe:
107 line = line.strip()
108 cols = line.split(" ")
109 if len(cols) < 3 or cols[1] != "=>":
110 continue
111 if cols[0].startswith("libc."):
112 # save this one too
113 bld.env.libc_path = cols[2]
114 if cols[0].startswith(libname):
115 ret = cols[2]
116 bld.env.syslib_paths[libname] = ret
117 return ret
120 # some regular expressions for parsing readelf output
121 re_sharedlib = re.compile('Shared library: \[(.*)\]')
122 re_rpath = re.compile('Library rpath: \[(.*)\]')
124 def get_libs(bld, binname):
125 '''find the list of linked libraries for any binary or library
126 binname is the path to the binary/library on disk
128 We do this using readelf instead of ldd as we need to avoid recursing
129 into system libraries
132 # see if we can get the result from the ldd cache
133 if not bld.env.lib_cache:
134 bld.env.lib_cache = {}
135 if binname in bld.env.lib_cache:
136 return bld.env.lib_cache[binname].copy()
138 rpath = []
139 libs = set()
141 elfpipe = subprocess.Popen(['readelf', '--dynamic', binname], stdout=subprocess.PIPE).stdout
142 for line in elfpipe:
143 m = re_sharedlib.search(line)
144 if m:
145 libs.add(m.group(1))
146 m = re_rpath.search(line)
147 if m:
148 rpath.extend(m.group(1).split(":"))
150 ret = set()
151 for lib in libs:
152 found = False
153 for r in rpath:
154 path = os.path.join(r, lib)
155 if os.path.exists(path):
156 ret.add(os.path.realpath(path))
157 found = True
158 break
159 if not found:
160 # we didn't find this lib using rpath. It is probably a system
161 # library, so to find the path to it we either need to use ldd
162 # or we need to start parsing /etc/ld.so.conf* ourselves. We'll
163 # use ldd for now, even though it is slow
164 path = find_ldd_path(bld, lib, binname)
165 if path:
166 ret.add(os.path.realpath(path))
168 bld.env.lib_cache[binname] = ret.copy()
170 return ret
173 def get_libs_recursive(bld, binname, seen):
174 '''find the recursive list of linked libraries for any binary or library
175 binname is the path to the binary/library on disk. seen is a set used
176 to prevent loops
178 if binname in seen:
179 return set()
180 ret = get_libs(bld, binname)
181 seen.add(binname)
182 for lib in ret:
183 # we don't want to recurse into system libraries. If a system
184 # library that we use (eg. libcups) happens to use another library
185 # (such as libkrb5) which contains common symbols with our own
186 # libraries, then that is not an error
187 if lib in bld.env.library_dict:
188 ret = ret.union(get_libs_recursive(bld, lib, seen))
189 return ret
193 def find_syslib_path(bld, libname, deps):
194 '''find the path to the syslib we will link against'''
195 # the strategy is to use the targets that depend on the library, and run ldd
196 # on it to find the real location of the library that is used
198 linkpath = deps[0].link_task.outputs[0].abspath(bld.env)
200 if libname == "python":
201 libname += bld.env.PYTHON_VERSION
203 return find_ldd_path(bld, "lib%s" % libname.lower(), linkpath)
206 def build_symbol_sets(bld, tgt_list):
207 '''build the public_symbols and undefined_symbols attributes for each target'''
209 if bld.env.public_symbols:
210 return
212 objlist = [] # list of object file
213 objmap = {} # map from object filename to target (subsystem) name
215 for t in tgt_list:
216 t.public_symbols = set()
217 t.undefined_symbols = set()
218 t.used_symbols = set()
219 for tsk in getattr(t, 'compiled_tasks', []):
220 for output in tsk.outputs:
221 objpath = output.abspath(bld.env)
222 objlist.append(objpath)
223 objmap[objpath] = t
225 symbols = symbols_extract(bld, objlist)
226 for obj in objlist:
227 t = objmap[obj]
228 t.public_symbols = t.public_symbols.union(symbols[obj]["PUBLIC"])
229 t.undefined_symbols = t.undefined_symbols.union(symbols[obj]["UNDEFINED"])
230 t.used_symbols = t.used_symbols.union(symbols[obj]["UNDEFINED"])
232 t.undefined_symbols = t.undefined_symbols.difference(t.public_symbols)
234 # and the reverse map of public symbols to subsystem name
235 bld.env.symbol_map = {}
237 for t in tgt_list:
238 for s in t.public_symbols:
239 if not s in bld.env.symbol_map:
240 bld.env.symbol_map[s] = []
241 bld.env.symbol_map[s].append(real_name(t.sname))
243 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
245 bld.env.public_symbols = {}
246 for t in tgt_list:
247 name = real_name(t.sname)
248 if name in bld.env.public_symbols:
249 bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t.public_symbols)
250 else:
251 bld.env.public_symbols[name] = t.public_symbols
252 if t.samba_type == 'LIBRARY':
253 for dep in t.add_objects:
254 t2 = bld.name_to_obj(dep, bld.env)
255 bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
256 bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t2.public_symbols)
258 bld.env.used_symbols = {}
259 for t in tgt_list:
260 name = real_name(t.sname)
261 if name in bld.env.used_symbols:
262 bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t.used_symbols)
263 else:
264 bld.env.used_symbols[name] = t.used_symbols
265 if t.samba_type == 'LIBRARY':
266 for dep in t.add_objects:
267 t2 = bld.name_to_obj(dep, bld.env)
268 bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
269 bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t2.used_symbols)
272 def build_library_dict(bld, tgt_list):
273 '''build the library_dict dictionary'''
275 if bld.env.library_dict:
276 return
278 bld.env.library_dict = {}
280 for t in tgt_list:
281 if t.samba_type in [ 'LIBRARY', 'PYTHON' ]:
282 linkpath = os.path.realpath(t.link_task.outputs[0].abspath(bld.env))
283 bld.env.library_dict[linkpath] = t.sname
286 def build_syslib_sets(bld, tgt_list):
287 '''build the public_symbols for all syslibs'''
289 if bld.env.syslib_symbols:
290 return
292 # work out what syslibs we depend on, and what targets those are used in
293 syslibs = {}
294 objmap = {}
295 for t in tgt_list:
296 if getattr(t, 'uselib', []) and t.samba_type in [ 'LIBRARY', 'BINARY', 'PYTHON' ]:
297 for lib in t.uselib:
298 if lib in ['PYEMBED', 'PYEXT']:
299 lib = "python"
300 if not lib in syslibs:
301 syslibs[lib] = []
302 syslibs[lib].append(t)
304 # work out the paths to each syslib
305 syslib_paths = []
306 for lib in syslibs:
307 path = find_syslib_path(bld, lib, syslibs[lib])
308 if path is None:
309 Logs.warn("Unable to find syslib path for %s" % lib)
310 if path is not None:
311 syslib_paths.append(path)
312 objmap[path] = lib.lower()
314 # add in libc
315 syslib_paths.append(bld.env.libc_path)
316 objmap[bld.env.libc_path] = 'c'
318 symbols = symbols_extract(bld, syslib_paths, dynamic=True)
320 # keep a map of syslib names to public symbols
321 bld.env.syslib_symbols = {}
322 for lib in symbols:
323 bld.env.syslib_symbols[lib] = symbols[lib]["PUBLIC"]
325 # add to the map of symbols to dependencies
326 for lib in symbols:
327 for sym in symbols[lib]["PUBLIC"]:
328 if not sym in bld.env.symbol_map:
329 bld.env.symbol_map[sym] = []
330 bld.env.symbol_map[sym].append(objmap[lib])
332 # keep the libc symbols as well, as these are useful for some of the
333 # sanity checks
334 bld.env.libc_symbols = symbols[bld.env.libc_path]["PUBLIC"]
336 # add to the combined map of dependency name to public_symbols
337 for lib in bld.env.syslib_symbols:
338 bld.env.public_symbols[objmap[lib]] = bld.env.syslib_symbols[lib]
341 def build_autodeps(bld, t):
342 '''build the set of dependencies for a target'''
343 deps = set()
344 name = real_name(t.sname)
346 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
348 for sym in t.undefined_symbols:
349 if sym in t.public_symbols:
350 continue
351 if sym in bld.env.symbol_map:
352 depname = bld.env.symbol_map[sym]
353 if depname == [ name ]:
354 # self dependencies aren't interesting
355 continue
356 if t.in_library == depname:
357 # no need to depend on the library we are part of
358 continue
359 if depname[0] in ['c', 'python']:
360 # these don't go into autodeps
361 continue
362 if targets[depname[0]] in [ 'SYSLIB' ]:
363 deps.add(depname[0])
364 continue
365 t2 = bld.name_to_obj(depname[0], bld.env)
366 if len(t2.in_library) != 1:
367 deps.add(depname[0])
368 continue
369 if t2.in_library == t.in_library:
370 # if we're part of the same library, we don't need to autodep
371 continue
372 deps.add(t2.in_library[0])
373 t.autodeps = deps
376 def build_library_names(bld, tgt_list):
377 '''add a in_library attribute to all targets that are part of a library'''
379 if bld.env.done_build_library_names:
380 return
382 for t in tgt_list:
383 t.in_library = []
385 for t in tgt_list:
386 if t.samba_type in [ 'LIBRARY' ]:
387 for obj in t.samba_deps_extended:
388 t2 = bld.name_to_obj(obj, bld.env)
389 if t2 and t2.samba_type in [ 'SUBSYSTEM', 'ASN1' ]:
390 if not t.sname in t2.in_library:
391 t2.in_library.append(t.sname)
392 bld.env.done_build_library_names = True
395 def check_library_deps(bld, t):
396 '''check that all the autodeps that have mutual dependency of this
397 target are in the same library as the target'''
399 name = real_name(t.sname)
401 if len(t.in_library) > 1:
402 Logs.warn("WARNING: Target '%s' in multiple libraries: %s" % (t.sname, t.in_library))
404 for dep in t.autodeps:
405 t2 = bld.name_to_obj(dep, bld.env)
406 if t2 is None:
407 continue
408 for dep2 in t2.autodeps:
409 if dep2 == name and t.in_library != t2.in_library:
410 Logs.warn("WARNING: mutual dependency %s <=> %s" % (name, real_name(t2.sname)))
411 Logs.warn("Libraries should match. %s != %s" % (t.in_library, t2.in_library))
412 # raise Utils.WafError("illegal mutual dependency")
415 def check_syslib_collisions(bld, tgt_list):
416 '''check if a target has any symbol collisions with a syslib
418 We do not want any code in Samba to use a symbol name from a
419 system library. The chance of that causing problems is just too
420 high. Note that libreplace uses a rep_XX approach of renaming
421 symbols via macros
424 has_error = False
425 for t in tgt_list:
426 for lib in bld.env.syslib_symbols:
427 common = t.public_symbols.intersection(bld.env.syslib_symbols[lib])
428 if common:
429 Logs.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t.sname, common, lib))
430 has_error = True
431 if has_error:
432 raise Utils.WafError("symbols in common with system libraries")
435 def check_dependencies(bld, t):
436 '''check for depenencies that should be changed'''
438 if bld.name_to_obj(t.sname + ".objlist", bld.env):
439 return
441 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
443 remaining = t.undefined_symbols.copy()
444 remaining = remaining.difference(t.public_symbols)
446 sname = real_name(t.sname)
448 deps = set(t.samba_deps)
449 for d in t.samba_deps:
450 if targets[d] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]:
451 continue
452 bld.ASSERT(d in bld.env.public_symbols, "Failed to find symbol list for dependency '%s'" % d)
453 diff = remaining.intersection(bld.env.public_symbols[d])
454 if not diff and targets[sname] != 'LIBRARY':
455 Logs.info("Target '%s' has no dependency on %s" % (sname, d))
456 else:
457 remaining = remaining.difference(diff)
459 t.unsatisfied_symbols = set()
460 needed = {}
461 for sym in remaining:
462 if sym in bld.env.symbol_map:
463 dep = bld.env.symbol_map[sym]
464 if not dep[0] in needed:
465 needed[dep[0]] = set()
466 needed[dep[0]].add(sym)
467 else:
468 t.unsatisfied_symbols.add(sym)
470 for dep in needed:
471 Logs.info("Target '%s' should add dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
475 def check_syslib_dependencies(bld, t):
476 '''check for syslib depenencies'''
478 if bld.name_to_obj(t.sname + ".objlist", bld.env):
479 return
481 sname = real_name(t.sname)
483 remaining = set()
485 features = TO_LIST(t.features)
486 if 'pyembed' in features or 'pyext' in features:
487 if 'python' in bld.env.public_symbols:
488 t.unsatisfied_symbols = t.unsatisfied_symbols.difference(bld.env.public_symbols['python'])
490 needed = {}
491 for sym in t.unsatisfied_symbols:
492 if sym in bld.env.symbol_map:
493 dep = bld.env.symbol_map[sym][0]
494 if dep == 'c':
495 continue
496 if not dep in needed:
497 needed[dep] = set()
498 needed[dep].add(sym)
499 else:
500 remaining.add(sym)
502 for dep in needed:
503 Logs.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
505 if remaining:
506 debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname, " ".join(remaining)))
510 def symbols_symbolcheck(task):
511 '''check the internal dependency lists'''
512 bld = task.env.bld
513 tgt_list = get_tgt_list(bld)
515 build_symbol_sets(bld, tgt_list)
516 build_library_names(bld, tgt_list)
518 for t in tgt_list:
519 t.autodeps = set()
520 if getattr(t, 'source', ''):
521 build_autodeps(bld, t)
523 for t in tgt_list:
524 check_dependencies(bld, t)
526 for t in tgt_list:
527 check_library_deps(bld, t)
529 def symbols_syslibcheck(task):
530 '''check the syslib dependencies'''
531 bld = task.env.bld
532 tgt_list = get_tgt_list(bld)
534 build_syslib_sets(bld, tgt_list)
535 check_syslib_collisions(bld, tgt_list)
537 for t in tgt_list:
538 check_syslib_dependencies(bld, t)
541 def symbols_whyneeded(task):
542 """check why 'target' needs to link to 'subsystem'"""
543 bld = task.env.bld
544 tgt_list = get_tgt_list(bld)
546 why = Options.options.WHYNEEDED.split(":")
547 if len(why) != 2:
548 raise Utils.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY")
549 target = why[0]
550 subsystem = why[1]
552 build_symbol_sets(bld, tgt_list)
553 build_library_names(bld, tgt_list)
554 build_syslib_sets(bld, tgt_list)
556 Logs.info("Checking why %s needs to link to %s" % (target, subsystem))
557 if not target in bld.env.used_symbols:
558 Logs.warn("unable to find target '%s' in used_symbols dict" % target)
559 return
560 if not subsystem in bld.env.public_symbols:
561 Logs.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem)
562 return
563 overlap = bld.env.used_symbols[target].intersection(bld.env.public_symbols[subsystem])
564 if not overlap:
565 Logs.info("target '%s' doesn't use any public symbols from '%s'" % (target, subsystem))
566 else:
567 Logs.info("target '%s' uses symbols %s from '%s'" % (target, overlap, subsystem))
570 def report_duplicate(bld, binname, sym, libs, fail_on_error):
571 '''report duplicated symbols'''
572 if sym in ['_init', '_fini']:
573 return
574 libnames = []
575 for lib in libs:
576 if lib in bld.env.library_dict:
577 libnames.append(bld.env.library_dict[lib])
578 else:
579 libnames.append(lib)
580 if fail_on_error:
581 raise Utils.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
582 else:
583 print("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
586 def symbols_dupcheck_binary(bld, binname, fail_on_error):
587 '''check for duplicated symbols in one binary'''
589 libs = get_libs_recursive(bld, binname, set())
590 symlist = symbols_extract(bld, libs, dynamic=True)
592 symmap = {}
593 for libpath in symlist:
594 for sym in symlist[libpath]['PUBLIC']:
595 if not sym in symmap:
596 symmap[sym] = set()
597 symmap[sym].add(libpath)
598 for sym in symmap:
599 if len(symmap[sym]) > 1:
600 for libpath in symmap[sym]:
601 if libpath in bld.env.library_dict:
602 report_duplicate(bld, binname, sym, symmap[sym], fail_on_error)
603 break
605 def symbols_dupcheck(task, fail_on_error=False):
606 '''check for symbols defined in two different subsystems'''
607 bld = task.env.bld
608 tgt_list = get_tgt_list(bld)
610 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
612 build_library_dict(bld, tgt_list)
613 for t in tgt_list:
614 if t.samba_type == 'BINARY':
615 binname = os_path_relpath(t.link_task.outputs[0].abspath(bld.env), os.getcwd())
616 symbols_dupcheck_binary(bld, binname, fail_on_error)
619 def symbols_dupcheck_fatal(task):
620 '''check for symbols defined in two different subsystems (and fail if duplicates are found)'''
621 symbols_dupcheck(task, fail_on_error=True)
624 def SYMBOL_CHECK(bld):
625 '''check our dependency lists'''
626 if Options.options.SYMBOLCHECK:
627 bld.SET_BUILD_GROUP('symbolcheck')
628 task = bld(rule=symbols_symbolcheck, always=True, name='symbol checking')
629 task.env.bld = bld
631 bld.SET_BUILD_GROUP('syslibcheck')
632 task = bld(rule=symbols_syslibcheck, always=True, name='syslib checking')
633 task.env.bld = bld
635 bld.SET_BUILD_GROUP('syslibcheck')
636 task = bld(rule=symbols_dupcheck, always=True, name='symbol duplicate checking')
637 task.env.bld = bld
639 if Options.options.WHYNEEDED:
640 bld.SET_BUILD_GROUP('syslibcheck')
641 task = bld(rule=symbols_whyneeded, always=True, name='check why a dependency is needed')
642 task.env.bld = bld
645 Build.BuildContext.SYMBOL_CHECK = SYMBOL_CHECK
647 def DUP_SYMBOL_CHECK(bld):
648 if Options.options.DUP_SYMBOLCHECK and bld.env.DEVELOPER and not bld.env.BUILD_FARM:
649 '''check for duplicate symbols'''
650 bld.SET_BUILD_GROUP('syslibcheck')
651 task = bld(rule=symbols_dupcheck_fatal, always=True, name='symbol duplicate checking')
652 task.env.bld = bld
654 Build.BuildContext.DUP_SYMBOL_CHECK = DUP_SYMBOL_CHECK