1 # a waf tool to extract symbols from object files or libraries
2 # using nm, producing a set of exposed defined/undefined symbols
4 import os
, re
, subprocess
5 import Utils
, Build
, Options
, Logs
7 from samba_utils
import TO_LIST
, LOCAL_CACHE
, get_tgt_list
, os_path_relpath
9 # these are the data structures used in symbols.py:
11 # bld.env.symbol_map : dictionary mapping public symbol names to list of
12 # subsystem names where that symbol exists
14 # t.in_library : list of libraries that t is in
16 # bld.env.public_symbols: set of public symbols for each subsystem
17 # bld.env.used_symbols : set of used symbols for each subsystem
19 # bld.env.syslib_symbols: dictionary mapping system library name to set of symbols
21 # bld.env.library_dict : dictionary mapping built library paths to subsystem names
23 # LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type
26 def symbols_extract(bld
, objfiles
, dynamic
=False):
27 '''extract symbols from objfile, returning a dictionary containing
28 the set of undefined and public symbols for each file'''
32 # see if we can get some results from the nm cache
33 if not bld
.env
.nm_cache
:
36 objfiles
= set(objfiles
).copy()
40 if obj
in bld
.env
.nm_cache
:
41 ret
[obj
] = bld
.env
.nm_cache
[obj
].copy()
46 if len(objfiles
) == 0:
51 # needed for some .so files
53 cmd
.extend(list(objfiles
))
55 nmpipe
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
).stdout
56 if len(objfiles
) == 1:
57 filename
= list(objfiles
)[0]
58 ret
[filename
] = { "PUBLIC": set(), "UNDEFINED" : set()}
62 if line
.endswith(':'):
64 ret
[filename
] = { "PUBLIC": set(), "UNDEFINED" : set() }
66 cols
= line
.split(" ")
69 # see if the line starts with an address
76 if symbol_type
in "BDGTRVWSi":
78 ret
[filename
]["PUBLIC"].add(symbol
)
79 elif symbol_type
in "U":
80 ret
[filename
]["UNDEFINED"].add(symbol
)
85 bld
.env
.nm_cache
[obj
] = ret
[obj
].copy()
87 bld
.env
.nm_cache
[obj
] = { "PUBLIC": set(), "UNDEFINED" : set() }
93 if name
.find(".objlist") != -1:
98 def find_ldd_path(bld
, libname
, binary
):
99 '''find the path to the syslib we will link against'''
101 if not bld
.env
.syslib_paths
:
102 bld
.env
.syslib_paths
= {}
103 if libname
in bld
.env
.syslib_paths
:
104 return bld
.env
.syslib_paths
[libname
]
106 lddpipe
= subprocess
.Popen(['ldd', binary
], stdout
=subprocess
.PIPE
).stdout
109 cols
= line
.split(" ")
110 if len(cols
) < 3 or cols
[1] != "=>":
112 if cols
[0].startswith("libc."):
114 bld
.env
.libc_path
= cols
[2]
115 if cols
[0].startswith(libname
):
117 bld
.env
.syslib_paths
[libname
] = ret
121 # some regular expressions for parsing readelf output
122 re_sharedlib
= re
.compile('Shared library: \[(.*)\]')
123 re_rpath
= re
.compile('Library rpath: \[(.*)\]')
125 def get_libs(bld
, binname
):
126 '''find the list of linked libraries for any binary or library
127 binname is the path to the binary/library on disk
129 We do this using readelf instead of ldd as we need to avoid recursing
130 into system libraries
133 # see if we can get the result from the ldd cache
134 if not bld
.env
.lib_cache
:
135 bld
.env
.lib_cache
= {}
136 if binname
in bld
.env
.lib_cache
:
137 return bld
.env
.lib_cache
[binname
].copy()
142 elfpipe
= subprocess
.Popen(['readelf', '--dynamic', binname
], stdout
=subprocess
.PIPE
).stdout
144 m
= re_sharedlib
.search(line
)
147 m
= re_rpath
.search(line
)
149 rpath
.extend(m
.group(1).split(":"))
155 path
= os
.path
.join(r
, lib
)
156 if os
.path
.exists(path
):
157 ret
.add(os
.path
.realpath(path
))
161 # we didn't find this lib using rpath. It is probably a system
162 # library, so to find the path to it we either need to use ldd
163 # or we need to start parsing /etc/ld.so.conf* ourselves. We'll
164 # use ldd for now, even though it is slow
165 path
= find_ldd_path(bld
, lib
, binname
)
167 ret
.add(os
.path
.realpath(path
))
169 bld
.env
.lib_cache
[binname
] = ret
.copy()
174 def get_libs_recursive(bld
, binname
, seen
):
175 '''find the recursive list of linked libraries for any binary or library
176 binname is the path to the binary/library on disk. seen is a set used
181 ret
= get_libs(bld
, binname
)
184 # we don't want to recurse into system libraries. If a system
185 # library that we use (eg. libcups) happens to use another library
186 # (such as libkrb5) which contains common symbols with our own
187 # libraries, then that is not an error
188 if lib
in bld
.env
.library_dict
:
189 ret
= ret
.union(get_libs_recursive(bld
, lib
, seen
))
194 def find_syslib_path(bld
, libname
, deps
):
195 '''find the path to the syslib we will link against'''
196 # the strategy is to use the targets that depend on the library, and run ldd
197 # on it to find the real location of the library that is used
199 linkpath
= deps
[0].link_task
.outputs
[0].abspath(bld
.env
)
201 if libname
== "python":
202 libname
+= bld
.env
.PYTHON_VERSION
204 return find_ldd_path(bld
, "lib%s" % libname
.lower(), linkpath
)
207 def build_symbol_sets(bld
, tgt_list
):
208 '''build the public_symbols and undefined_symbols attributes for each target'''
210 if bld
.env
.public_symbols
:
213 objlist
= [] # list of object file
214 objmap
= {} # map from object filename to target (subsystem) name
217 t
.public_symbols
= set()
218 t
.undefined_symbols
= set()
219 t
.used_symbols
= set()
220 for tsk
in getattr(t
, 'compiled_tasks', []):
221 for output
in tsk
.outputs
:
222 objpath
= output
.abspath(bld
.env
)
223 objlist
.append(objpath
)
226 symbols
= symbols_extract(bld
, objlist
)
229 t
.public_symbols
= t
.public_symbols
.union(symbols
[obj
]["PUBLIC"])
230 t
.undefined_symbols
= t
.undefined_symbols
.union(symbols
[obj
]["UNDEFINED"])
231 t
.used_symbols
= t
.used_symbols
.union(symbols
[obj
]["UNDEFINED"])
233 t
.undefined_symbols
= t
.undefined_symbols
.difference(t
.public_symbols
)
235 # and the reverse map of public symbols to subsystem name
236 bld
.env
.symbol_map
= {}
239 for s
in t
.public_symbols
:
240 if not s
in bld
.env
.symbol_map
:
241 bld
.env
.symbol_map
[s
] = []
242 bld
.env
.symbol_map
[s
].append(real_name(t
.sname
))
244 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
246 bld
.env
.public_symbols
= {}
248 name
= real_name(t
.sname
)
249 if name
in bld
.env
.public_symbols
:
250 bld
.env
.public_symbols
[name
] = bld
.env
.public_symbols
[name
].union(t
.public_symbols
)
252 bld
.env
.public_symbols
[name
] = t
.public_symbols
253 if t
.samba_type
== 'LIBRARY':
254 for dep
in t
.add_objects
:
255 t2
= bld
.get_tgen_by_name(dep
)
256 bld
.ASSERT(t2
is not None, "Library '%s' has unknown dependency '%s'" % (name
, dep
))
257 bld
.env
.public_symbols
[name
] = bld
.env
.public_symbols
[name
].union(t2
.public_symbols
)
259 bld
.env
.used_symbols
= {}
261 name
= real_name(t
.sname
)
262 if name
in bld
.env
.used_symbols
:
263 bld
.env
.used_symbols
[name
] = bld
.env
.used_symbols
[name
].union(t
.used_symbols
)
265 bld
.env
.used_symbols
[name
] = t
.used_symbols
266 if t
.samba_type
== 'LIBRARY':
267 for dep
in t
.add_objects
:
268 t2
= bld
.get_tgen_by_name(dep
)
269 bld
.ASSERT(t2
is not None, "Library '%s' has unknown dependency '%s'" % (name
, dep
))
270 bld
.env
.used_symbols
[name
] = bld
.env
.used_symbols
[name
].union(t2
.used_symbols
)
273 def build_library_dict(bld
, tgt_list
):
274 '''build the library_dict dictionary'''
276 if bld
.env
.library_dict
:
279 bld
.env
.library_dict
= {}
282 if t
.samba_type
in [ 'LIBRARY', 'PYTHON' ]:
283 linkpath
= os
.path
.realpath(t
.link_task
.outputs
[0].abspath(bld
.env
))
284 bld
.env
.library_dict
[linkpath
] = t
.sname
287 def build_syslib_sets(bld
, tgt_list
):
288 '''build the public_symbols for all syslibs'''
290 if bld
.env
.syslib_symbols
:
293 # work out what syslibs we depend on, and what targets those are used in
297 if getattr(t
, 'uselib', []) and t
.samba_type
in [ 'LIBRARY', 'BINARY', 'PYTHON' ]:
299 if lib
in ['PYEMBED', 'PYEXT']:
301 if not lib
in syslibs
:
303 syslibs
[lib
].append(t
)
305 # work out the paths to each syslib
308 path
= find_syslib_path(bld
, lib
, syslibs
[lib
])
310 Logs
.warn("Unable to find syslib path for %s" % lib
)
312 syslib_paths
.append(path
)
313 objmap
[path
] = lib
.lower()
316 syslib_paths
.append(bld
.env
.libc_path
)
317 objmap
[bld
.env
.libc_path
] = 'c'
319 symbols
= symbols_extract(bld
, syslib_paths
, dynamic
=True)
321 # keep a map of syslib names to public symbols
322 bld
.env
.syslib_symbols
= {}
324 bld
.env
.syslib_symbols
[lib
] = symbols
[lib
]["PUBLIC"]
326 # add to the map of symbols to dependencies
328 for sym
in symbols
[lib
]["PUBLIC"]:
329 if not sym
in bld
.env
.symbol_map
:
330 bld
.env
.symbol_map
[sym
] = []
331 bld
.env
.symbol_map
[sym
].append(objmap
[lib
])
333 # keep the libc symbols as well, as these are useful for some of the
335 bld
.env
.libc_symbols
= symbols
[bld
.env
.libc_path
]["PUBLIC"]
337 # add to the combined map of dependency name to public_symbols
338 for lib
in bld
.env
.syslib_symbols
:
339 bld
.env
.public_symbols
[objmap
[lib
]] = bld
.env
.syslib_symbols
[lib
]
342 def build_autodeps(bld
, t
):
343 '''build the set of dependencies for a target'''
345 name
= real_name(t
.sname
)
347 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
349 for sym
in t
.undefined_symbols
:
350 if sym
in t
.public_symbols
:
352 if sym
in bld
.env
.symbol_map
:
353 depname
= bld
.env
.symbol_map
[sym
]
354 if depname
== [ name
]:
355 # self dependencies aren't interesting
357 if t
.in_library
== depname
:
358 # no need to depend on the library we are part of
360 if depname
[0] in ['c', 'python']:
361 # these don't go into autodeps
363 if targets
[depname
[0]] in [ 'SYSLIB' ]:
366 t2
= bld
.get_tgen_by_name(depname
[0])
367 if len(t2
.in_library
) != 1:
370 if t2
.in_library
== t
.in_library
:
371 # if we're part of the same library, we don't need to autodep
373 deps
.add(t2
.in_library
[0])
377 def build_library_names(bld
, tgt_list
):
378 '''add a in_library attribute to all targets that are part of a library'''
380 if bld
.env
.done_build_library_names
:
387 if t
.samba_type
in [ 'LIBRARY' ]:
388 for obj
in t
.samba_deps_extended
:
389 t2
= bld
.get_tgen_by_name(obj
)
390 if t2
and t2
.samba_type
in [ 'SUBSYSTEM', 'ASN1' ]:
391 if not t
.sname
in t2
.in_library
:
392 t2
.in_library
.append(t
.sname
)
393 bld
.env
.done_build_library_names
= True
396 def check_library_deps(bld
, t
):
397 '''check that all the autodeps that have mutual dependency of this
398 target are in the same library as the target'''
400 name
= real_name(t
.sname
)
402 if len(t
.in_library
) > 1:
403 Logs
.warn("WARNING: Target '%s' in multiple libraries: %s" % (t
.sname
, t
.in_library
))
405 for dep
in t
.autodeps
:
406 t2
= bld
.get_tgen_by_name(dep
)
409 for dep2
in t2
.autodeps
:
410 if dep2
== name
and t
.in_library
!= t2
.in_library
:
411 Logs
.warn("WARNING: mutual dependency %s <=> %s" % (name
, real_name(t2
.sname
)))
412 Logs
.warn("Libraries should match. %s != %s" % (t
.in_library
, t2
.in_library
))
413 # raise Utils.WafError("illegal mutual dependency")
416 def check_syslib_collisions(bld
, tgt_list
):
417 '''check if a target has any symbol collisions with a syslib
419 We do not want any code in Samba to use a symbol name from a
420 system library. The chance of that causing problems is just too
421 high. Note that libreplace uses a rep_XX approach of renaming
427 for lib
in bld
.env
.syslib_symbols
:
428 common
= t
.public_symbols
.intersection(bld
.env
.syslib_symbols
[lib
])
430 Logs
.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t
.sname
, common
, lib
))
433 raise Utils
.WafError("symbols in common with system libraries")
436 def check_dependencies(bld
, t
):
437 '''check for depenencies that should be changed'''
439 if bld
.get_tgen_by_name(t
.sname
+ ".objlist"):
442 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
444 remaining
= t
.undefined_symbols
.copy()
445 remaining
= remaining
.difference(t
.public_symbols
)
447 sname
= real_name(t
.sname
)
449 deps
= set(t
.samba_deps
)
450 for d
in t
.samba_deps
:
451 if targets
[d
] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]:
453 bld
.ASSERT(d
in bld
.env
.public_symbols
, "Failed to find symbol list for dependency '%s'" % d
)
454 diff
= remaining
.intersection(bld
.env
.public_symbols
[d
])
455 if not diff
and targets
[sname
] != 'LIBRARY':
456 Logs
.info("Target '%s' has no dependency on %s" % (sname
, d
))
458 remaining
= remaining
.difference(diff
)
460 t
.unsatisfied_symbols
= set()
462 for sym
in remaining
:
463 if sym
in bld
.env
.symbol_map
:
464 dep
= bld
.env
.symbol_map
[sym
]
465 if not dep
[0] in needed
:
466 needed
[dep
[0]] = set()
467 needed
[dep
[0]].add(sym
)
469 t
.unsatisfied_symbols
.add(sym
)
472 Logs
.info("Target '%s' should add dep '%s' for symbols %s" % (sname
, dep
, " ".join(needed
[dep
])))
476 def check_syslib_dependencies(bld
, t
):
477 '''check for syslib depenencies'''
479 if bld
.get_tgen_by_name(t
.sname
+ ".objlist"):
482 sname
= real_name(t
.sname
)
486 features
= TO_LIST(t
.features
)
487 if 'pyembed' in features
or 'pyext' in features
:
488 if 'python' in bld
.env
.public_symbols
:
489 t
.unsatisfied_symbols
= t
.unsatisfied_symbols
.difference(bld
.env
.public_symbols
['python'])
492 for sym
in t
.unsatisfied_symbols
:
493 if sym
in bld
.env
.symbol_map
:
494 dep
= bld
.env
.symbol_map
[sym
][0]
497 if not dep
in needed
:
504 Logs
.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname
, dep
, " ".join(needed
[dep
])))
507 debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname
, " ".join(remaining
)))
511 def symbols_symbolcheck(task
):
512 '''check the internal dependency lists'''
514 tgt_list
= get_tgt_list(bld
)
516 build_symbol_sets(bld
, tgt_list
)
517 build_library_names(bld
, tgt_list
)
521 if getattr(t
, 'source', ''):
522 build_autodeps(bld
, t
)
525 check_dependencies(bld
, t
)
528 check_library_deps(bld
, t
)
530 def symbols_syslibcheck(task
):
531 '''check the syslib dependencies'''
533 tgt_list
= get_tgt_list(bld
)
535 build_syslib_sets(bld
, tgt_list
)
536 check_syslib_collisions(bld
, tgt_list
)
539 check_syslib_dependencies(bld
, t
)
542 def symbols_whyneeded(task
):
543 """check why 'target' needs to link to 'subsystem'"""
545 tgt_list
= get_tgt_list(bld
)
547 why
= Options
.options
.WHYNEEDED
.split(":")
549 raise Utils
.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY")
553 build_symbol_sets(bld
, tgt_list
)
554 build_library_names(bld
, tgt_list
)
555 build_syslib_sets(bld
, tgt_list
)
557 Logs
.info("Checking why %s needs to link to %s" % (target
, subsystem
))
558 if not target
in bld
.env
.used_symbols
:
559 Logs
.warn("unable to find target '%s' in used_symbols dict" % target
)
561 if not subsystem
in bld
.env
.public_symbols
:
562 Logs
.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem
)
564 overlap
= bld
.env
.used_symbols
[target
].intersection(bld
.env
.public_symbols
[subsystem
])
566 Logs
.info("target '%s' doesn't use any public symbols from '%s'" % (target
, subsystem
))
568 Logs
.info("target '%s' uses symbols %s from '%s'" % (target
, overlap
, subsystem
))
571 def report_duplicate(bld
, binname
, sym
, libs
, fail_on_error
):
572 '''report duplicated symbols'''
573 if sym
in ['_init', '_fini', '_edata', '_end', '__bss_start']:
577 if lib
in bld
.env
.library_dict
:
578 libnames
.append(bld
.env
.library_dict
[lib
])
582 raise Utils
.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname
, sym
, libnames
))
584 print("%s: Symbol %s linked in multiple libraries %s" % (binname
, sym
, libnames
))
587 def symbols_dupcheck_binary(bld
, binname
, fail_on_error
):
588 '''check for duplicated symbols in one binary'''
590 libs
= get_libs_recursive(bld
, binname
, set())
591 symlist
= symbols_extract(bld
, libs
, dynamic
=True)
594 for libpath
in symlist
:
595 for sym
in symlist
[libpath
]['PUBLIC']:
596 if sym
== '_GLOBAL_OFFSET_TABLE_':
598 if not sym
in symmap
:
600 symmap
[sym
].add(libpath
)
602 if len(symmap
[sym
]) > 1:
603 for libpath
in symmap
[sym
]:
604 if libpath
in bld
.env
.library_dict
:
605 report_duplicate(bld
, binname
, sym
, symmap
[sym
], fail_on_error
)
608 def symbols_dupcheck(task
, fail_on_error
=False):
609 '''check for symbols defined in two different subsystems'''
611 tgt_list
= get_tgt_list(bld
)
613 targets
= LOCAL_CACHE(bld
, 'TARGET_TYPE')
615 build_library_dict(bld
, tgt_list
)
617 if t
.samba_type
== 'BINARY':
618 binname
= os_path_relpath(t
.link_task
.outputs
[0].abspath(bld
.env
), os
.getcwd())
619 symbols_dupcheck_binary(bld
, binname
, fail_on_error
)
622 def symbols_dupcheck_fatal(task
):
623 '''check for symbols defined in two different subsystems (and fail if duplicates are found)'''
624 symbols_dupcheck(task
, fail_on_error
=True)
627 def SYMBOL_CHECK(bld
):
628 '''check our dependency lists'''
629 if Options
.options
.SYMBOLCHECK
:
630 bld
.SET_BUILD_GROUP('symbolcheck')
631 task
= bld(rule
=symbols_symbolcheck
, always
=True, name
='symbol checking')
634 bld
.SET_BUILD_GROUP('syslibcheck')
635 task
= bld(rule
=symbols_syslibcheck
, always
=True, name
='syslib checking')
638 bld
.SET_BUILD_GROUP('syslibcheck')
639 task
= bld(rule
=symbols_dupcheck
, always
=True, name
='symbol duplicate checking')
642 if Options
.options
.WHYNEEDED
:
643 bld
.SET_BUILD_GROUP('syslibcheck')
644 task
= bld(rule
=symbols_whyneeded
, always
=True, name
='check why a dependency is needed')
648 Build
.BuildContext
.SYMBOL_CHECK
= SYMBOL_CHECK
650 def DUP_SYMBOL_CHECK(bld
):
651 if Options
.options
.DUP_SYMBOLCHECK
and bld
.env
.DEVELOPER
:
652 '''check for duplicate symbols'''
653 bld
.SET_BUILD_GROUP('syslibcheck')
654 task
= bld(rule
=symbols_dupcheck_fatal
, always
=True, name
='symbol duplicate checking')
657 Build
.BuildContext
.DUP_SYMBOL_CHECK
= DUP_SYMBOL_CHECK