1 # a waf tool to extract symbols from object files or libraries
2 # using nm, producing a set of exposed defined/undefined symbols
4 import Utils
, Build
, subprocess
, Logs
, re
5 from samba_wildcard
import fake_build_environment
6 from samba_utils
import *
8 # these are the data structures used in symbols.py:
10 # bld.env.symbol_map : dictionary mapping public symbol names to list of
11 # subsystem names where that symbol exists
13 # t.in_library : list of libraries that t is in
15 # bld.env.public_symbols: set of public symbols for each subsystem
16 # bld.env.used_symbols : set of used symbols for each subsystem
18 # bld.env.syslib_symbols: dictionary mapping system library name to set of symbols
20 # bld.env.library_dict : dictionary mapping built library paths to subsystem names
22 # LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type
25 def symbols_extract(bld
, objfiles
, dynamic
=False):
26 '''extract symbols from objfile, returning a dictionary containing
27 the set of undefined and public symbols for each file'''
31 # see if we can get some results from the nm cache
32 if not bld
.env
.nm_cache
:
35 objfiles
= set(objfiles
).copy()
39 if obj
in bld
.env
.nm_cache
:
40 ret
[obj
] = bld
.env
.nm_cache
[obj
].copy()
45 if len(objfiles
) == 0:
50 # needed for some .so files
52 cmd
.extend(list(objfiles
))
54 nmpipe
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
).stdout
55 if len(objfiles
) == 1:
56 filename
= list(objfiles
)[0]
57 ret
[filename
] = { "PUBLIC": set(), "UNDEFINED" : set()}
61 if line
.endswith(':'):
63 ret
[filename
] = { "PUBLIC": set(), "UNDEFINED" : set() }
65 cols
= line
.split(" ")
68 # see if the line starts with an address
75 if symbol_type
in "BDGTRVWSi":
77 ret
[filename
]["PUBLIC"].add(symbol
)
78 elif symbol_type
in "U":
79 ret
[filename
]["UNDEFINED"].add(symbol
)
84 bld
.env
.nm_cache
[obj
] = ret
[obj
].copy()
86 bld
.env
.nm_cache
[obj
] = { "PUBLIC": set(), "UNDEFINED" : set() }
92 if name
.find(".objlist") != -1:
97 def find_ldd_path(bld
, libname
, binary
):
98 '''find the path to the syslib we will link against'''
100 if not bld
.env
.syslib_paths
:
101 bld
.env
.syslib_paths
= {}
102 if libname
in bld
.env
.syslib_paths
:
103 return bld
.env
.syslib_paths
[libname
]
105 lddpipe
= subprocess
.Popen(['ldd', binary
], stdout
=subprocess
.PIPE
).stdout
108 cols
= line
.split(" ")
109 if len(cols
) < 3 or cols
[1] != "=>":
111 if cols
[0].startswith("libc."):
113 bld
.env
.libc_path
= cols
[2]
114 if cols
[0].startswith(libname
):
116 bld
.env
.syslib_paths
[libname
] = ret
120 # some regular expressions for parsing readelf output
121 re_sharedlib
= re
.compile('Shared library: \[(.*)\]')
122 re_rpath
= re
.compile('Library rpath: \[(.*)\]')
124 def get_libs(bld
, binname
):
125 '''find the list of linked libraries for any binary or library
126 binname is the path to the binary/library on disk
128 We do this using readelf instead of ldd as we need to avoid recursing
129 into system libraries
132 # see if we can get the result from the ldd cache
133 if not bld
.env
.lib_cache
:
134 bld
.env
.lib_cache
= {}
135 if binname
in bld
.env
.lib_cache
:
136 return bld
.env
.lib_cache
[binname
].copy()
141 elfpipe
= subprocess
.Popen(['readelf', '--dynamic', binname
], stdout
=subprocess
.PIPE
).stdout
143 m
= re_sharedlib
.search(line
)
146 m
= re_rpath
.search(line
)
148 rpath
.extend(m
.group(1).split(":"))
154 path
= os
.path
.join(r
, lib
)
155 if os
.path
.exists(path
):
156 ret
.add(os
.path
.realpath(path
))
160 # we didn't find this lib using rpath. It is probably a system
161 # library, so to find the path to it we either need to use ldd
162 # or we need to start parsing /etc/ld.so.conf* ourselves. We'll
163 # use ldd for now, even though it is slow
164 path
= find_ldd_path(bld
, lib
, binname
)
166 ret
.add(os
.path
.realpath(path
))
168 bld
.env
.lib_cache
[binname
] = ret
.copy()
173 def get_libs_recursive(bld
, binname
, seen
):
174 '''find the recursive list of linked libraries for any binary or library
175 binname is the path to the binary/library on disk. seen is a set used
180 ret
= get_libs(bld
, binname
)
183 # we don't want to recurse into system libraries. If a system
184 # library that we use (eg. libcups) happens to use another library
185 # (such as libkrb5) which contains common symbols with our own
186 # libraries, then that is not an error
187 if lib
in bld
.env
.library_dict
:
188 ret
= ret
.union(get_libs_recursive(bld
, lib
, seen
))
193 def find_syslib_path(bld
, libname
, deps
):
194 '''find the path to the syslib we will link against'''
195 # the strategy is to use the targets that depend on the library, and run ldd
196 # on it to find the real location of the library that is used
198 linkpath
= deps
[0].link_task
.outputs
[0].abspath(bld
.env
)
200 if libname
== "python":
201 libname
+= bld
.env
.PYTHON_VERSION
203 return find_ldd_path(bld
, "lib%s" % libname
.lower(), linkpath
)
206 def build_symbol_sets(bld
, tgt_list
):
207 '''build the public_symbols and undefined_symbols attributes for each target'''
209 if bld
.env
.public_symbols
:
212 objlist
= [] # list of object file
213 objmap
= {} # map from object filename to target (subsystem) name
216 t
.public_symbols
= set()
217 t
.undefined_symbols
= set()
218 t
.used_symbols
= set()
219 for tsk
in getattr(t
, 'compiled_tasks', []):
220 for output
in tsk
.outputs
:
221 objpath
= output
.abspath(bld
.env
)
222 objlist
.append(objpath
)
225 symbols
= symbols_extract(bld
, objlist
)
228 t
.public_symbols
= t
.public_symbols
.union(symbols
[obj
]["PUBLIC"])
229 t
.undefined_symbols
= t
.undefined_symbols
.union(symbols
[obj
]["UNDEFINED"])
230 t
.used_symbols
= t
.used_symbols
.union(symbols
[obj
]["UNDEFINED"])
232 t
.undefined_symbols
= t
.undefined_symbols
.difference(t
.public_symbols
)
234 # and the reverse map of public symbols to subsystem name
235 bld
.env
.symbol_map
= {}
238 for s
in t
.public_symbols
:
239 if not s
in bld
.env
.symbol_map
:
240 bld
.env
.symbol_map
[s
] = []
241 bld
.env
.symbol_map
[s
].append(real_name(t
.sname
))
243 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
245 bld
.env
.public_symbols
= {}
247 name
= real_name(t
.sname
)
248 if name
in bld
.env
.public_symbols
:
249 bld
.env
.public_symbols
[name
] = bld
.env
.public_symbols
[name
].union(t
.public_symbols
)
251 bld
.env
.public_symbols
[name
] = t
.public_symbols
252 if t
.samba_type
== 'LIBRARY':
253 for dep
in t
.add_objects
:
254 t2
= bld
.name_to_obj(dep
, bld
.env
)
255 bld
.ASSERT(t2
is not None, "Library '%s' has unknown dependency '%s'" % (name
, dep
))
256 bld
.env
.public_symbols
[name
] = bld
.env
.public_symbols
[name
].union(t2
.public_symbols
)
258 bld
.env
.used_symbols
= {}
260 name
= real_name(t
.sname
)
261 if name
in bld
.env
.used_symbols
:
262 bld
.env
.used_symbols
[name
] = bld
.env
.used_symbols
[name
].union(t
.used_symbols
)
264 bld
.env
.used_symbols
[name
] = t
.used_symbols
265 if t
.samba_type
== 'LIBRARY':
266 for dep
in t
.add_objects
:
267 t2
= bld
.name_to_obj(dep
, bld
.env
)
268 bld
.ASSERT(t2
is not None, "Library '%s' has unknown dependency '%s'" % (name
, dep
))
269 bld
.env
.used_symbols
[name
] = bld
.env
.used_symbols
[name
].union(t2
.used_symbols
)
272 def build_library_dict(bld
, tgt_list
):
273 '''build the library_dict dictionary'''
275 if bld
.env
.library_dict
:
278 bld
.env
.library_dict
= {}
281 if t
.samba_type
in [ 'LIBRARY', 'PYTHON' ]:
282 linkpath
= os
.path
.realpath(t
.link_task
.outputs
[0].abspath(bld
.env
))
283 bld
.env
.library_dict
[linkpath
] = t
.sname
286 def build_syslib_sets(bld
, tgt_list
):
287 '''build the public_symbols for all syslibs'''
289 if bld
.env
.syslib_symbols
:
292 # work out what syslibs we depend on, and what targets those are used in
296 if getattr(t
, 'uselib', []) and t
.samba_type
in [ 'LIBRARY', 'BINARY', 'PYTHON' ]:
298 if lib
in ['PYEMBED', 'PYEXT']:
300 if not lib
in syslibs
:
302 syslibs
[lib
].append(t
)
304 # work out the paths to each syslib
307 path
= find_syslib_path(bld
, lib
, syslibs
[lib
])
309 Logs
.warn("Unable to find syslib path for %s" % lib
)
311 syslib_paths
.append(path
)
312 objmap
[path
] = lib
.lower()
315 syslib_paths
.append(bld
.env
.libc_path
)
316 objmap
[bld
.env
.libc_path
] = 'c'
318 symbols
= symbols_extract(bld
, syslib_paths
, dynamic
=True)
320 # keep a map of syslib names to public symbols
321 bld
.env
.syslib_symbols
= {}
323 bld
.env
.syslib_symbols
[lib
] = symbols
[lib
]["PUBLIC"]
325 # add to the map of symbols to dependencies
327 for sym
in symbols
[lib
]["PUBLIC"]:
328 if not sym
in bld
.env
.symbol_map
:
329 bld
.env
.symbol_map
[sym
] = []
330 bld
.env
.symbol_map
[sym
].append(objmap
[lib
])
332 # keep the libc symbols as well, as these are useful for some of the
334 bld
.env
.libc_symbols
= symbols
[bld
.env
.libc_path
]["PUBLIC"]
336 # add to the combined map of dependency name to public_symbols
337 for lib
in bld
.env
.syslib_symbols
:
338 bld
.env
.public_symbols
[objmap
[lib
]] = bld
.env
.syslib_symbols
[lib
]
341 def build_autodeps(bld
, t
):
342 '''build the set of dependencies for a target'''
344 name
= real_name(t
.sname
)
346 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
348 for sym
in t
.undefined_symbols
:
349 if sym
in t
.public_symbols
:
351 if sym
in bld
.env
.symbol_map
:
352 depname
= bld
.env
.symbol_map
[sym
]
353 if depname
== [ name
]:
354 # self dependencies aren't interesting
356 if t
.in_library
== depname
:
357 # no need to depend on the library we are part of
359 if depname
[0] in ['c', 'python']:
360 # these don't go into autodeps
362 if targets
[depname
[0]] in [ 'SYSLIB' ]:
365 t2
= bld
.name_to_obj(depname
[0], bld
.env
)
366 if len(t2
.in_library
) != 1:
369 if t2
.in_library
== t
.in_library
:
370 # if we're part of the same library, we don't need to autodep
372 deps
.add(t2
.in_library
[0])
376 def build_library_names(bld
, tgt_list
):
377 '''add a in_library attribute to all targets that are part of a library'''
379 if bld
.env
.done_build_library_names
:
386 if t
.samba_type
in [ 'LIBRARY' ]:
387 for obj
in t
.samba_deps_extended
:
388 t2
= bld
.name_to_obj(obj
, bld
.env
)
389 if t2
and t2
.samba_type
in [ 'SUBSYSTEM', 'ASN1' ]:
390 if not t
.sname
in t2
.in_library
:
391 t2
.in_library
.append(t
.sname
)
392 bld
.env
.done_build_library_names
= True
395 def check_library_deps(bld
, t
):
396 '''check that all the autodeps that have mutual dependency of this
397 target are in the same library as the target'''
399 name
= real_name(t
.sname
)
401 if len(t
.in_library
) > 1:
402 Logs
.warn("WARNING: Target '%s' in multiple libraries: %s" % (t
.sname
, t
.in_library
))
404 for dep
in t
.autodeps
:
405 t2
= bld
.name_to_obj(dep
, bld
.env
)
408 for dep2
in t2
.autodeps
:
409 if dep2
== name
and t
.in_library
!= t2
.in_library
:
410 Logs
.warn("WARNING: mutual dependency %s <=> %s" % (name
, real_name(t2
.sname
)))
411 Logs
.warn("Libraries should match. %s != %s" % (t
.in_library
, t2
.in_library
))
412 # raise Utils.WafError("illegal mutual dependency")
415 def check_syslib_collisions(bld
, tgt_list
):
416 '''check if a target has any symbol collisions with a syslib
418 We do not want any code in Samba to use a symbol name from a
419 system library. The chance of that causing problems is just too
420 high. Note that libreplace uses a rep_XX approach of renaming
426 for lib
in bld
.env
.syslib_symbols
:
427 common
= t
.public_symbols
.intersection(bld
.env
.syslib_symbols
[lib
])
429 Logs
.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t
.sname
, common
, lib
))
432 raise Utils
.WafError("symbols in common with system libraries")
435 def check_dependencies(bld
, t
):
436 '''check for depenencies that should be changed'''
438 if bld
.name_to_obj(t
.sname
+ ".objlist", bld
.env
):
441 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
443 remaining
= t
.undefined_symbols
.copy()
444 remaining
= remaining
.difference(t
.public_symbols
)
446 sname
= real_name(t
.sname
)
448 deps
= set(t
.samba_deps
)
449 for d
in t
.samba_deps
:
450 if targets
[d
] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]:
452 bld
.ASSERT(d
in bld
.env
.public_symbols
, "Failed to find symbol list for dependency '%s'" % d
)
453 diff
= remaining
.intersection(bld
.env
.public_symbols
[d
])
454 if not diff
and targets
[sname
] != 'LIBRARY':
455 Logs
.info("Target '%s' has no dependency on %s" % (sname
, d
))
457 remaining
= remaining
.difference(diff
)
459 t
.unsatisfied_symbols
= set()
461 for sym
in remaining
:
462 if sym
in bld
.env
.symbol_map
:
463 dep
= bld
.env
.symbol_map
[sym
]
464 if not dep
[0] in needed
:
465 needed
[dep
[0]] = set()
466 needed
[dep
[0]].add(sym
)
468 t
.unsatisfied_symbols
.add(sym
)
471 Logs
.info("Target '%s' should add dep '%s' for symbols %s" % (sname
, dep
, " ".join(needed
[dep
])))
475 def check_syslib_dependencies(bld
, t
):
476 '''check for syslib depenencies'''
478 if bld
.name_to_obj(t
.sname
+ ".objlist", bld
.env
):
481 sname
= real_name(t
.sname
)
485 features
= TO_LIST(t
.features
)
486 if 'pyembed' in features
or 'pyext' in features
:
487 if 'python' in bld
.env
.public_symbols
:
488 t
.unsatisfied_symbols
= t
.unsatisfied_symbols
.difference(bld
.env
.public_symbols
['python'])
491 for sym
in t
.unsatisfied_symbols
:
492 if sym
in bld
.env
.symbol_map
:
493 dep
= bld
.env
.symbol_map
[sym
][0]
496 if not dep
in needed
:
503 Logs
.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname
, dep
, " ".join(needed
[dep
])))
506 debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname
, " ".join(remaining
)))
510 def symbols_symbolcheck(task
):
511 '''check the internal dependency lists'''
513 tgt_list
= get_tgt_list(bld
)
515 build_symbol_sets(bld
, tgt_list
)
516 build_library_names(bld
, tgt_list
)
520 if getattr(t
, 'source', ''):
521 build_autodeps(bld
, t
)
524 check_dependencies(bld
, t
)
527 check_library_deps(bld
, t
)
529 def symbols_syslibcheck(task
):
530 '''check the syslib dependencies'''
532 tgt_list
= get_tgt_list(bld
)
534 build_syslib_sets(bld
, tgt_list
)
535 check_syslib_collisions(bld
, tgt_list
)
538 check_syslib_dependencies(bld
, t
)
541 def symbols_whyneeded(task
):
542 """check why 'target' needs to link to 'subsystem'"""
544 tgt_list
= get_tgt_list(bld
)
546 why
= Options
.options
.WHYNEEDED
.split(":")
548 raise Utils
.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY")
552 build_symbol_sets(bld
, tgt_list
)
553 build_library_names(bld
, tgt_list
)
554 build_syslib_sets(bld
, tgt_list
)
556 Logs
.info("Checking why %s needs to link to %s" % (target
, subsystem
))
557 if not target
in bld
.env
.used_symbols
:
558 Logs
.warn("unable to find target '%s' in used_symbols dict" % target
)
560 if not subsystem
in bld
.env
.public_symbols
:
561 Logs
.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem
)
563 overlap
= bld
.env
.used_symbols
[target
].intersection(bld
.env
.public_symbols
[subsystem
])
565 Logs
.info("target '%s' doesn't use any public symbols from '%s'" % (target
, subsystem
))
567 Logs
.info("target '%s' uses symbols %s from '%s'" % (target
, overlap
, subsystem
))
570 def report_duplicate(bld
, binname
, sym
, libs
, fail_on_error
):
571 '''report duplicated symbols'''
572 if sym
in ['_init', '_fini', '_edata', '_end', '__bss_start']:
576 if lib
in bld
.env
.library_dict
:
577 libnames
.append(bld
.env
.library_dict
[lib
])
581 raise Utils
.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname
, sym
, libnames
))
583 print("%s: Symbol %s linked in multiple libraries %s" % (binname
, sym
, libnames
))
586 def symbols_dupcheck_binary(bld
, binname
, fail_on_error
):
587 '''check for duplicated symbols in one binary'''
589 libs
= get_libs_recursive(bld
, binname
, set())
590 symlist
= symbols_extract(bld
, libs
, dynamic
=True)
593 for libpath
in symlist
:
594 for sym
in symlist
[libpath
]['PUBLIC']:
595 if sym
== '_GLOBAL_OFFSET_TABLE_':
597 if not sym
in symmap
:
599 symmap
[sym
].add(libpath
)
601 if len(symmap
[sym
]) > 1:
602 for libpath
in symmap
[sym
]:
603 if libpath
in bld
.env
.library_dict
:
604 report_duplicate(bld
, binname
, sym
, symmap
[sym
], fail_on_error
)
607 def symbols_dupcheck(task
, fail_on_error
=False):
608 '''check for symbols defined in two different subsystems'''
610 tgt_list
= get_tgt_list(bld
)
612 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
614 build_library_dict(bld
, tgt_list
)
616 if t
.samba_type
== 'BINARY':
617 binname
= os_path_relpath(t
.link_task
.outputs
[0].abspath(bld
.env
), os
.getcwd())
618 symbols_dupcheck_binary(bld
, binname
, fail_on_error
)
621 def symbols_dupcheck_fatal(task
):
622 '''check for symbols defined in two different subsystems (and fail if duplicates are found)'''
623 symbols_dupcheck(task
, fail_on_error
=True)
626 def SYMBOL_CHECK(bld
):
627 '''check our dependency lists'''
628 if Options
.options
.SYMBOLCHECK
:
629 bld
.SET_BUILD_GROUP('symbolcheck')
630 task
= bld(rule
=symbols_symbolcheck
, always
=True, name
='symbol checking')
633 bld
.SET_BUILD_GROUP('syslibcheck')
634 task
= bld(rule
=symbols_syslibcheck
, always
=True, name
='syslib checking')
637 bld
.SET_BUILD_GROUP('syslibcheck')
638 task
= bld(rule
=symbols_dupcheck
, always
=True, name
='symbol duplicate checking')
641 if Options
.options
.WHYNEEDED
:
642 bld
.SET_BUILD_GROUP('syslibcheck')
643 task
= bld(rule
=symbols_whyneeded
, always
=True, name
='check why a dependency is needed')
647 Build
.BuildContext
.SYMBOL_CHECK
= SYMBOL_CHECK
649 def DUP_SYMBOL_CHECK(bld
):
650 if Options
.options
.DUP_SYMBOLCHECK
and bld
.env
.DEVELOPER
:
651 '''check for duplicate symbols'''
652 bld
.SET_BUILD_GROUP('syslibcheck')
653 task
= bld(rule
=symbols_dupcheck_fatal
, always
=True, name
='symbol duplicate checking')
656 Build
.BuildContext
.DUP_SYMBOL_CHECK
= DUP_SYMBOL_CHECK