samba-tool: implement user getgroups command
[Samba.git] / buildtools / wafsamba / symbols.py
blobd3bf9ac1c6b72c35c4694cb43a937d76d928c708
1 # a waf tool to extract symbols from object files or libraries
2 # using nm, producing a set of exposed defined/undefined symbols
4 import os, re, subprocess
5 from waflib import Utils, Build, Options, Logs, Errors
6 from waflib.Logs import debug
7 from samba_utils import TO_LIST, LOCAL_CACHE, get_tgt_list
9 # these are the data structures used in symbols.py:
11 # bld.env.symbol_map : dictionary mapping public symbol names to list of
12 # subsystem names where that symbol exists
14 # t.in_library : list of libraries that t is in
16 # bld.env.public_symbols: set of public symbols for each subsystem
17 # bld.env.used_symbols : set of used symbols for each subsystem
19 # bld.env.syslib_symbols: dictionary mapping system library name to set of symbols
20 # for that library
21 # bld.env.library_dict : dictionary mapping built library paths to subsystem names
23 # LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type
26 def symbols_extract(bld, objfiles, dynamic=False):
27 '''extract symbols from objfile, returning a dictionary containing
28 the set of undefined and public symbols for each file'''
30 ret = {}
32 # see if we can get some results from the nm cache
33 if not bld.env.nm_cache:
34 bld.env.nm_cache = {}
36 objfiles = set(objfiles).copy()
38 remaining = set()
39 for obj in objfiles:
40 if obj in bld.env.nm_cache:
41 ret[obj] = bld.env.nm_cache[obj].copy()
42 else:
43 remaining.add(obj)
44 objfiles = remaining
46 if len(objfiles) == 0:
47 return ret
49 cmd = ["nm"]
50 if dynamic:
51 # needed for some .so files
52 cmd.append("-D")
53 cmd.extend(list(objfiles))
55 nmpipe = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
56 if len(objfiles) == 1:
57 filename = list(objfiles)[0]
58 ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set()}
60 for line in nmpipe:
61 line = line.strip()
62 if line.endswith(b':'):
63 filename = line[:-1]
64 ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set() }
65 continue
66 cols = line.split(b" ")
67 if cols == [b'']:
68 continue
69 # see if the line starts with an address
70 if len(cols) == 3:
71 symbol_type = cols[1]
72 symbol = cols[2]
73 else:
74 symbol_type = cols[0]
75 symbol = cols[1]
76 if symbol_type in b"BDGTRVWSi":
77 # its a public symbol
78 ret[filename]["PUBLIC"].add(symbol)
79 elif symbol_type in b"U":
80 ret[filename]["UNDEFINED"].add(symbol)
82 # add to the cache
83 for obj in objfiles:
84 if obj in ret:
85 bld.env.nm_cache[obj] = ret[obj].copy()
86 else:
87 bld.env.nm_cache[obj] = { "PUBLIC": set(), "UNDEFINED" : set() }
89 return ret
92 def real_name(name):
93 if name.find(".objlist") != -1:
94 name = name[:-8]
95 return name
98 def find_ldd_path(bld, libname, binary):
99 '''find the path to the syslib we will link against'''
100 ret = None
101 if not bld.env.syslib_paths:
102 bld.env.syslib_paths = {}
103 if libname in bld.env.syslib_paths:
104 return bld.env.syslib_paths[libname]
106 lddpipe = subprocess.Popen(['ldd', binary], stdout=subprocess.PIPE).stdout
107 for line in lddpipe:
108 line = line.strip()
109 cols = line.split(b" ")
110 if len(cols) < 3 or cols[1] != b"=>":
111 continue
112 if cols[0].startswith(b"libc."):
113 # save this one too
114 bld.env.libc_path = cols[2]
115 if cols[0].startswith(libname):
116 ret = cols[2]
117 bld.env.syslib_paths[libname] = ret
118 return ret
121 # some regular expressions for parsing readelf output
122 re_sharedlib = re.compile(b'Shared library: \[(.*)\]')
123 # output from readelf could be `Library rpath` or `Libray runpath`
124 re_rpath = re.compile(b'Library (rpath|runpath): \[(.*)\]')
126 def get_libs(bld, binname):
127 '''find the list of linked libraries for any binary or library
128 binname is the path to the binary/library on disk
130 We do this using readelf instead of ldd as we need to avoid recursing
131 into system libraries
134 # see if we can get the result from the ldd cache
135 if not bld.env.lib_cache:
136 bld.env.lib_cache = {}
137 if binname in bld.env.lib_cache:
138 return bld.env.lib_cache[binname].copy()
140 rpath = []
141 libs = set()
143 elfpipe = subprocess.Popen(['readelf', '--dynamic', binname], stdout=subprocess.PIPE).stdout
144 for line in elfpipe:
145 m = re_sharedlib.search(line)
146 if m:
147 libs.add(m.group(1))
148 m = re_rpath.search(line)
149 if m:
150 # output from Popen is always bytestr even in py3
151 rpath.extend(m.group(2).split(b":"))
153 ret = set()
154 for lib in libs:
155 found = False
156 for r in rpath:
157 path = os.path.join(r, lib)
158 if os.path.exists(path):
159 ret.add(os.path.realpath(path))
160 found = True
161 break
162 if not found:
163 # we didn't find this lib using rpath. It is probably a system
164 # library, so to find the path to it we either need to use ldd
165 # or we need to start parsing /etc/ld.so.conf* ourselves. We'll
166 # use ldd for now, even though it is slow
167 path = find_ldd_path(bld, lib, binname)
168 if path:
169 ret.add(os.path.realpath(path))
171 bld.env.lib_cache[binname] = ret.copy()
173 return ret
176 def get_libs_recursive(bld, binname, seen):
177 '''find the recursive list of linked libraries for any binary or library
178 binname is the path to the binary/library on disk. seen is a set used
179 to prevent loops
181 if binname in seen:
182 return set()
183 ret = get_libs(bld, binname)
184 seen.add(binname)
185 for lib in ret:
186 # we don't want to recurse into system libraries. If a system
187 # library that we use (eg. libcups) happens to use another library
188 # (such as libkrb5) which contains common symbols with our own
189 # libraries, then that is not an error
190 if lib in bld.env.library_dict:
191 ret = ret.union(get_libs_recursive(bld, lib, seen))
192 return ret
196 def find_syslib_path(bld, libname, deps):
197 '''find the path to the syslib we will link against'''
198 # the strategy is to use the targets that depend on the library, and run ldd
199 # on it to find the real location of the library that is used
201 linkpath = deps[0].link_task.outputs[0].abspath(bld.env)
203 if libname == "python":
204 libname += bld.env.PYTHON_VERSION
206 return find_ldd_path(bld, "lib%s" % libname.lower(), linkpath)
209 def build_symbol_sets(bld, tgt_list):
210 '''build the public_symbols and undefined_symbols attributes for each target'''
212 if bld.env.public_symbols:
213 return
215 objlist = [] # list of object file
216 objmap = {} # map from object filename to target (subsystem) name
218 for t in tgt_list:
219 t.public_symbols = set()
220 t.undefined_symbols = set()
221 t.used_symbols = set()
222 for tsk in getattr(t, 'compiled_tasks', []):
223 for output in tsk.outputs:
224 objpath = output.abspath(bld.env)
225 objlist.append(objpath)
226 objmap[objpath] = t
228 symbols = symbols_extract(bld, objlist)
229 for obj in objlist:
230 t = objmap[obj]
231 t.public_symbols = t.public_symbols.union(symbols[obj]["PUBLIC"])
232 t.undefined_symbols = t.undefined_symbols.union(symbols[obj]["UNDEFINED"])
233 t.used_symbols = t.used_symbols.union(symbols[obj]["UNDEFINED"])
235 t.undefined_symbols = t.undefined_symbols.difference(t.public_symbols)
237 # and the reverse map of public symbols to subsystem name
238 bld.env.symbol_map = {}
240 for t in tgt_list:
241 for s in t.public_symbols:
242 if not s in bld.env.symbol_map:
243 bld.env.symbol_map[s] = []
244 bld.env.symbol_map[s].append(real_name(t.sname))
246 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
248 bld.env.public_symbols = {}
249 for t in tgt_list:
250 name = real_name(t.sname)
251 if name in bld.env.public_symbols:
252 bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t.public_symbols)
253 else:
254 bld.env.public_symbols[name] = t.public_symbols
255 if t.samba_type == 'LIBRARY':
256 for dep in t.add_objects:
257 t2 = bld.get_tgen_by_name(dep)
258 bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
259 bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t2.public_symbols)
261 bld.env.used_symbols = {}
262 for t in tgt_list:
263 name = real_name(t.sname)
264 if name in bld.env.used_symbols:
265 bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t.used_symbols)
266 else:
267 bld.env.used_symbols[name] = t.used_symbols
268 if t.samba_type == 'LIBRARY':
269 for dep in t.add_objects:
270 t2 = bld.get_tgen_by_name(dep)
271 bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
272 bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t2.used_symbols)
275 def build_library_dict(bld, tgt_list):
276 '''build the library_dict dictionary'''
278 if bld.env.library_dict:
279 return
281 bld.env.library_dict = {}
283 for t in tgt_list:
284 if t.samba_type in [ 'LIBRARY', 'PYTHON' ]:
285 linkpath = os.path.realpath(t.link_task.outputs[0].abspath(bld.env))
286 bld.env.library_dict[linkpath] = t.sname
289 def build_syslib_sets(bld, tgt_list):
290 '''build the public_symbols for all syslibs'''
292 if bld.env.syslib_symbols:
293 return
295 # work out what syslibs we depend on, and what targets those are used in
296 syslibs = {}
297 objmap = {}
298 for t in tgt_list:
299 if getattr(t, 'uselib', []) and t.samba_type in [ 'LIBRARY', 'BINARY', 'PYTHON' ]:
300 for lib in t.uselib:
301 if lib in ['PYEMBED', 'PYEXT']:
302 lib = "python"
303 if not lib in syslibs:
304 syslibs[lib] = []
305 syslibs[lib].append(t)
307 # work out the paths to each syslib
308 syslib_paths = []
309 for lib in syslibs:
310 path = find_syslib_path(bld, lib, syslibs[lib])
311 if path is None:
312 Logs.warn("Unable to find syslib path for %s" % lib)
313 if path is not None:
314 syslib_paths.append(path)
315 objmap[path] = lib.lower()
317 # add in libc
318 syslib_paths.append(bld.env.libc_path)
319 objmap[bld.env.libc_path] = 'c'
321 symbols = symbols_extract(bld, syslib_paths, dynamic=True)
323 # keep a map of syslib names to public symbols
324 bld.env.syslib_symbols = {}
325 for lib in symbols:
326 bld.env.syslib_symbols[lib] = symbols[lib]["PUBLIC"]
328 # add to the map of symbols to dependencies
329 for lib in symbols:
330 for sym in symbols[lib]["PUBLIC"]:
331 if not sym in bld.env.symbol_map:
332 bld.env.symbol_map[sym] = []
333 bld.env.symbol_map[sym].append(objmap[lib])
335 # keep the libc symbols as well, as these are useful for some of the
336 # sanity checks
337 bld.env.libc_symbols = symbols[bld.env.libc_path]["PUBLIC"]
339 # add to the combined map of dependency name to public_symbols
340 for lib in bld.env.syslib_symbols:
341 bld.env.public_symbols[objmap[lib]] = bld.env.syslib_symbols[lib]
344 def build_autodeps(bld, t):
345 '''build the set of dependencies for a target'''
346 deps = set()
347 name = real_name(t.sname)
349 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
351 for sym in t.undefined_symbols:
352 if sym in t.public_symbols:
353 continue
354 if sym in bld.env.symbol_map:
355 depname = bld.env.symbol_map[sym]
356 if depname == [ name ]:
357 # self dependencies aren't interesting
358 continue
359 if t.in_library == depname:
360 # no need to depend on the library we are part of
361 continue
362 if depname[0] in ['c', 'python']:
363 # these don't go into autodeps
364 continue
365 if targets[depname[0]] in [ 'SYSLIB' ]:
366 deps.add(depname[0])
367 continue
368 t2 = bld.get_tgen_by_name(depname[0])
369 if len(t2.in_library) != 1:
370 deps.add(depname[0])
371 continue
372 if t2.in_library == t.in_library:
373 # if we're part of the same library, we don't need to autodep
374 continue
375 deps.add(t2.in_library[0])
376 t.autodeps = deps
379 def build_library_names(bld, tgt_list):
380 '''add a in_library attribute to all targets that are part of a library'''
382 if bld.env.done_build_library_names:
383 return
385 for t in tgt_list:
386 t.in_library = []
388 for t in tgt_list:
389 if t.samba_type in [ 'LIBRARY' ]:
390 for obj in t.samba_deps_extended:
391 t2 = bld.get_tgen_by_name(obj)
392 if t2 and t2.samba_type in [ 'SUBSYSTEM', 'ASN1' ]:
393 if not t.sname in t2.in_library:
394 t2.in_library.append(t.sname)
395 bld.env.done_build_library_names = True
398 def check_library_deps(bld, t):
399 '''check that all the autodeps that have mutual dependency of this
400 target are in the same library as the target'''
402 name = real_name(t.sname)
404 if len(t.in_library) > 1:
405 Logs.warn("WARNING: Target '%s' in multiple libraries: %s" % (t.sname, t.in_library))
407 for dep in t.autodeps:
408 t2 = bld.get_tgen_by_name(dep)
409 if t2 is None:
410 continue
411 for dep2 in t2.autodeps:
412 if dep2 == name and t.in_library != t2.in_library:
413 Logs.warn("WARNING: mutual dependency %s <=> %s" % (name, real_name(t2.sname)))
414 Logs.warn("Libraries should match. %s != %s" % (t.in_library, t2.in_library))
415 # raise Errors.WafError("illegal mutual dependency")
418 def check_syslib_collisions(bld, tgt_list):
419 '''check if a target has any symbol collisions with a syslib
421 We do not want any code in Samba to use a symbol name from a
422 system library. The chance of that causing problems is just too
423 high. Note that libreplace uses a rep_XX approach of renaming
424 symbols via macros
427 has_error = False
428 for t in tgt_list:
429 for lib in bld.env.syslib_symbols:
430 common = t.public_symbols.intersection(bld.env.syslib_symbols[lib])
431 if common:
432 Logs.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t.sname, common, lib))
433 has_error = True
434 if has_error:
435 raise Errors.WafError("symbols in common with system libraries")
438 def check_dependencies(bld, t):
439 '''check for depenencies that should be changed'''
441 if bld.get_tgen_by_name(t.sname + ".objlist"):
442 return
444 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
446 remaining = t.undefined_symbols.copy()
447 remaining = remaining.difference(t.public_symbols)
449 sname = real_name(t.sname)
451 deps = set(t.samba_deps)
452 for d in t.samba_deps:
453 if targets[d] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]:
454 continue
455 bld.ASSERT(d in bld.env.public_symbols, "Failed to find symbol list for dependency '%s'" % d)
456 diff = remaining.intersection(bld.env.public_symbols[d])
457 if not diff and targets[sname] != 'LIBRARY':
458 Logs.info("Target '%s' has no dependency on %s" % (sname, d))
459 else:
460 remaining = remaining.difference(diff)
462 t.unsatisfied_symbols = set()
463 needed = {}
464 for sym in remaining:
465 if sym in bld.env.symbol_map:
466 dep = bld.env.symbol_map[sym]
467 if not dep[0] in needed:
468 needed[dep[0]] = set()
469 needed[dep[0]].add(sym)
470 else:
471 t.unsatisfied_symbols.add(sym)
473 for dep in needed:
474 Logs.info("Target '%s' should add dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
478 def check_syslib_dependencies(bld, t):
479 '''check for syslib depenencies'''
481 if bld.get_tgen_by_name(t.sname + ".objlist"):
482 return
484 sname = real_name(t.sname)
486 remaining = set()
488 features = TO_LIST(t.features)
489 if 'pyembed' in features or 'pyext' in features:
490 if 'python' in bld.env.public_symbols:
491 t.unsatisfied_symbols = t.unsatisfied_symbols.difference(bld.env.public_symbols['python'])
493 needed = {}
494 for sym in t.unsatisfied_symbols:
495 if sym in bld.env.symbol_map:
496 dep = bld.env.symbol_map[sym][0]
497 if dep == 'c':
498 continue
499 if not dep in needed:
500 needed[dep] = set()
501 needed[dep].add(sym)
502 else:
503 remaining.add(sym)
505 for dep in needed:
506 Logs.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
508 if remaining:
509 debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname, " ".join(remaining)))
513 def symbols_symbolcheck(task):
514 '''check the internal dependency lists'''
515 bld = task.env.bld
516 tgt_list = get_tgt_list(bld)
518 build_symbol_sets(bld, tgt_list)
519 build_library_names(bld, tgt_list)
521 for t in tgt_list:
522 t.autodeps = set()
523 if getattr(t, 'source', ''):
524 build_autodeps(bld, t)
526 for t in tgt_list:
527 check_dependencies(bld, t)
529 for t in tgt_list:
530 check_library_deps(bld, t)
532 def symbols_syslibcheck(task):
533 '''check the syslib dependencies'''
534 bld = task.env.bld
535 tgt_list = get_tgt_list(bld)
537 build_syslib_sets(bld, tgt_list)
538 check_syslib_collisions(bld, tgt_list)
540 for t in tgt_list:
541 check_syslib_dependencies(bld, t)
544 def symbols_whyneeded(task):
545 """check why 'target' needs to link to 'subsystem'"""
546 bld = task.env.bld
547 tgt_list = get_tgt_list(bld)
549 why = Options.options.WHYNEEDED.split(":")
550 if len(why) != 2:
551 raise Errors.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY")
552 target = why[0]
553 subsystem = why[1]
555 build_symbol_sets(bld, tgt_list)
556 build_library_names(bld, tgt_list)
557 build_syslib_sets(bld, tgt_list)
559 Logs.info("Checking why %s needs to link to %s" % (target, subsystem))
560 if not target in bld.env.used_symbols:
561 Logs.warn("unable to find target '%s' in used_symbols dict" % target)
562 return
563 if not subsystem in bld.env.public_symbols:
564 Logs.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem)
565 return
566 overlap = bld.env.used_symbols[target].intersection(bld.env.public_symbols[subsystem])
567 if not overlap:
568 Logs.info("target '%s' doesn't use any public symbols from '%s'" % (target, subsystem))
569 else:
570 Logs.info("target '%s' uses symbols %s from '%s'" % (target, overlap, subsystem))
573 def report_duplicate(bld, binname, sym, libs, fail_on_error):
574 '''report duplicated symbols'''
575 if sym in ['_init', '_fini', '_edata', '_end', '__bss_start']:
576 return
577 libnames = []
578 for lib in libs:
579 if lib in bld.env.library_dict:
580 libnames.append(bld.env.library_dict[lib])
581 else:
582 libnames.append(lib)
583 if fail_on_error:
584 raise Errors.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
585 else:
586 print("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
589 def symbols_dupcheck_binary(bld, binname, fail_on_error):
590 '''check for duplicated symbols in one binary'''
592 libs = get_libs_recursive(bld, binname, set())
593 symlist = symbols_extract(bld, libs, dynamic=True)
595 symmap = {}
596 for libpath in symlist:
597 for sym in symlist[libpath]['PUBLIC']:
598 if sym == '_GLOBAL_OFFSET_TABLE_':
599 continue
600 if not sym in symmap:
601 symmap[sym] = set()
602 symmap[sym].add(libpath)
603 for sym in symmap:
604 if len(symmap[sym]) > 1:
605 for libpath in symmap[sym]:
606 if libpath in bld.env.library_dict:
607 report_duplicate(bld, binname, sym, symmap[sym], fail_on_error)
608 break
610 def symbols_dupcheck(task, fail_on_error=False):
611 '''check for symbols defined in two different subsystems'''
612 bld = task.env.bld
613 tgt_list = get_tgt_list(bld)
615 targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
617 build_library_dict(bld, tgt_list)
618 for t in tgt_list:
619 if t.samba_type == 'BINARY':
620 binname = os.path.relpath(t.link_task.outputs[0].abspath(bld.env), os.getcwd())
621 symbols_dupcheck_binary(bld, binname, fail_on_error)
624 def symbols_dupcheck_fatal(task):
625 '''check for symbols defined in two different subsystems (and fail if duplicates are found)'''
626 symbols_dupcheck(task, fail_on_error=True)
629 def SYMBOL_CHECK(bld):
630 '''check our dependency lists'''
631 if Options.options.SYMBOLCHECK:
632 bld.SET_BUILD_GROUP('symbolcheck')
633 task = bld(rule=symbols_symbolcheck, always=True, name='symbol checking')
634 task.env.bld = bld
636 bld.SET_BUILD_GROUP('syslibcheck')
637 task = bld(rule=symbols_syslibcheck, always=True, name='syslib checking')
638 task.env.bld = bld
640 bld.SET_BUILD_GROUP('syslibcheck')
641 task = bld(rule=symbols_dupcheck, always=True, name='symbol duplicate checking')
642 task.env.bld = bld
644 if Options.options.WHYNEEDED:
645 bld.SET_BUILD_GROUP('syslibcheck')
646 task = bld(rule=symbols_whyneeded, always=True, name='check why a dependency is needed')
647 task.env.bld = bld
650 Build.BuildContext.SYMBOL_CHECK = SYMBOL_CHECK
652 def DUP_SYMBOL_CHECK(bld):
653 if Options.options.DUP_SYMBOLCHECK and bld.env.DEVELOPER:
654 '''check for duplicate symbols'''
655 bld.SET_BUILD_GROUP('syslibcheck')
656 task = bld(rule=symbols_dupcheck_fatal, always=True, name='symbol duplicate checking')
657 task.env.bld = bld
659 Build.BuildContext.DUP_SYMBOL_CHECK = DUP_SYMBOL_CHECK