bump version
[buildroot.git] / toolchain / mklibs / mklibs.py
bloba84fd42fa25d21b70348b9dc559a8ffe2232b658
1 #! /usr/bin/python
3 # mklibs.py: An automated way to create a minimal /lib/ directory.
5 # Copyright 2001 by Falk Hueffner <falk@debian.org>
6 # & Goswin Brederlow <goswin.brederlow@student.uni-tuebingen.de>
8 # mklibs.sh by Marcus Brinkmann <Marcus.Brinkmann@ruhr-uni-bochum.de>
9 # used as template
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 # HOW IT WORKS
27 # - Gather all unresolved symbols and libraries needed by the programs
28 # and reduced libraries
29 # - Gather all symbols provided by the already reduced libraries
30 # (none on the first pass)
31 # - If all symbols are provided we are done
32 # - go through all libraries and remember what symbols they provide
33 # - go through all unresolved/needed symbols and mark them as used
34 # - for each library:
35 # - find pic file (if not present copy and strip the so)
36 # - compile in only used symbols
37 # - strip
38 # - back to the top
40 # TODO
41 # * complete argument parsing as given as comment in main
43 import commands
44 import string
45 import re
46 import sys
47 import os
48 import glob
49 import getopt
50 from stat import *
52 DEBUG_NORMAL = 1
53 DEBUG_VERBOSE = 2
54 DEBUG_SPAM = 3
56 debuglevel = DEBUG_NORMAL
58 def debug(level, *msg):
59 if debuglevel >= level:
60 print string.join(msg)
62 # A simple set class. It should be replaced with the standard sets.Set
63 # type as soon as Python 2.3 is out.
64 class Set:
65 def __init__(self):
66 self.__dict = {}
68 def add(self, obj):
69 self.__dict[obj] = 1
71 def contains(self, obj):
72 return self.__dict.has_key(obj)
74 def merge(self, s):
75 for e in s.elems():
76 self.add(e)
78 def elems(self):
79 return self.__dict.keys()
81 def size(self):
82 return len(self.__dict)
84 def __eq__(self, other):
85 return self.__dict == other.__dict
87 def __str__(self):
88 return `self.__dict.keys()`
90 def __repr__(self):
91 return `self.__dict.keys()`
93 # return a list of lines of output of the command
94 def command(command, *args):
95 debug(DEBUG_SPAM, "calling", command, string.join(args))
96 (status, output) = commands.getstatusoutput(command + ' ' + string.join(args))
97 if os.WEXITSTATUS(status) != 0:
98 print "Command failed with status", os.WEXITSTATUS(status), ":", \
99 command, string.join(args)
100 print "With output:", output
101 sys.exit(1)
102 return string.split(output, '\n')
104 # Filter a list according to a regexp containing a () group. Return
105 # a Set.
106 def regexpfilter(list, regexp, groupnr = 1):
107 pattern = re.compile(regexp)
108 result = Set()
109 for x in list:
110 match = pattern.match(x)
111 if match:
112 result.add(match.group(groupnr))
114 return result
116 # Return a Set of rpath strings for the passed object
117 def rpath(obj):
118 if not os.access(obj, os.F_OK):
119 raise "Cannot find lib: " + obj
120 output = command(target + "objdump", "--private-headers", obj)
121 return map(lambda x: root + "/" + x, regexpfilter(output, ".*RPATH\s*(\S+)$").elems())
123 # Return a Set of libraries the passed objects depend on.
124 def library_depends(obj):
125 if not os.access(obj, os.F_OK):
126 raise "Cannot find lib: " + obj
127 output = command(target + "objdump", "--private-headers", obj)
128 return regexpfilter(output, ".*NEEDED\s*(\S+)$")
130 # Return a list of libraries the passed objects depend on. The
131 # libraries are in "-lfoo" format suitable for passing to gcc.
132 def library_depends_gcc_libnames(obj):
133 if not os.access(obj, os.F_OK):
134 raise "Cannot find lib: " + obj
135 output = command(target + "objdump", "--private-headers", obj)
136 output = regexpfilter(output, ".*NEEDED\s*lib(\S+)\.so.*$")
137 if not output.elems():
138 return ""
139 else:
140 return "-l" + string.join(output.elems(), " -l")
142 # Scan readelf output. Example:
143 # Num: Value Size Type Bind Vis Ndx Name
144 # 1: 000000012002ab48 168 FUNC GLOBAL DEFAULT UND strchr@GLIBC_2.0 (2)
145 symline_regexp = \
146 re.compile("\s*\d+: .+\s+\d+\s+\w+\s+(\w+)+\s+\w+\s+(\w+)\s+([^\s@]+)")
148 # Return undefined symbols in an object as a Set of tuples (name, weakness)
149 def undefined_symbols(obj):
150 if not os.access(obj, os.F_OK):
151 raise "Cannot find lib" + obj
153 result = Set()
154 output = command(target + "readelf", "-s", "-W", obj)
155 for line in output:
156 match = symline_regexp.match(line)
157 if match:
158 bind, ndx, name = match.groups()
159 if ndx == "UND":
160 result.add((name, bind == "WEAK"))
161 return result
163 # Return a Set of symbols provided by a library
164 def provided_symbols(obj):
165 if not os.access(obj, os.F_OK):
166 raise "Cannot find lib" + obj
168 result = Set()
169 debug(DEBUG_SPAM, "provided_symbols result = ", `result`)
170 output = command(target + "readelf", "-s", "-W", obj)
171 for line in output:
172 match = symline_regexp.match(line)
173 if match:
174 bind, ndx, name = match.groups()
175 if bind != "LOCAL" and not ndx in ("UND", "ABS"):
176 debug(DEBUG_SPAM, "provided_symbols adding ", `name`)
177 result.add(name)
178 return result
180 # Return real target of a symlink
181 def resolve_link(file):
182 debug(DEBUG_SPAM, "resolving", file)
183 while S_ISLNK(os.lstat(file)[ST_MODE]):
184 new_file = os.readlink(file)
185 if new_file[0] != "/":
186 file = os.path.join(os.path.dirname(file), new_file)
187 else:
188 file = new_file
189 debug(DEBUG_SPAM, "resolved to", file)
190 return file
192 # Find complete path of a library, by searching in lib_path
193 def find_lib(lib):
194 for path in lib_path:
195 if os.access(path + "/" + lib, os.F_OK):
196 return path + "/" + lib
198 return ""
200 # Find a PIC archive for the library
201 def find_pic(lib):
202 base_name = so_pattern.match(lib).group(1)
203 for path in lib_path:
204 for file in glob.glob(path + "/" + base_name + "_pic.a"):
205 if os.access(file, os.F_OK):
206 return resolve_link(file)
207 return ""
209 # Find a PIC .map file for the library
210 def find_pic_map(lib):
211 base_name = so_pattern.match(lib).group(1)
212 for path in lib_path:
213 for file in glob.glob(path + "/" + base_name + "_pic.map"):
214 if os.access(file, os.F_OK):
215 return resolve_link(file)
216 return ""
218 def extract_soname(so_file):
219 soname_data = regexpfilter(command(target + "readelf", "--all", "-W", so_file),
220 ".*SONAME.*\[(.*)\].*")
221 if soname_data.elems():
222 return soname_data.elems()[0]
224 return ""
225 def usage(was_err):
226 if was_err:
227 outfd = sys.stderr
228 else:
229 outfd = sys.stdout
230 print >> outfd, "Usage: mklibs [OPTION]... -d DEST FILE ..."
231 print >> outfd, "Make a set of minimal libraries for FILE(s) in DEST."
232 print >> outfd, ""
233 print >> outfd, " -d, --dest-dir DIRECTORY create libraries in DIRECTORY"
234 print >> outfd, " -D, --no-default-lib omit default libpath (", string.join(default_lib_path, " : "), ")"
235 print >> outfd, " -L DIRECTORY[:DIRECTORY]... add DIRECTORY(s) to the library search path"
236 print >> outfd, " --ldlib LDLIB use LDLIB for the dynamic linker"
237 print >> outfd, " --libc-extras-dir DIRECTORY look for libc extra files in DIRECTORY"
238 # Ugh... Adding the trailing '-' breaks common practice.
239 #print >> outfd, " --target TARGET prepend TARGET- to the gcc and binutils calls"
240 print >> outfd, " --target TARGET prepend TARGET to the gcc and binutils calls"
241 print >> outfd, " --root ROOT search in ROOT for library rpaths"
242 print >> outfd, " -v, --verbose explain what is being done"
243 print >> outfd, " -h, --help display this help and exit"
244 sys.exit(was_err)
246 def version(vers):
247 print "mklibs: version ",vers
248 print ""
250 #################### main ####################
251 ## Usage: ./mklibs.py [OPTION]... -d DEST FILE ...
252 ## Make a set of minimal libraries for FILE ... in directory DEST.
254 ## Options:
255 ## -L DIRECTORY Add DIRECTORY to library search path.
256 ## -D, --no-default-lib Do not use default lib directories of /lib:/usr/lib
257 ## -n, --dry-run Don't actually run any commands; just print them.
258 ## -v, --verbose Print additional progress information.
259 ## -V, --version Print the version number and exit.
260 ## -h, --help Print this help and exit.
261 ## --ldlib Name of dynamic linker (overwrites environment variable ldlib)
262 ## --libc-extras-dir Directory for libc extra files
263 ## --target Use as prefix for gcc or binutils calls
265 ## -d, --dest-dir DIRECTORY Create libraries in DIRECTORY.
267 ## Required arguments for long options are also mandatory for the short options.
269 # Clean the environment
270 vers="0.12 with uClibc fixes"
271 os.environ['LC_ALL'] = "C"
273 # Argument parsing
274 opts = "L:DnvVhd:r:"
275 longopts = ["no-default-lib", "dry-run", "verbose", "version", "help",
276 "dest-dir=", "ldlib=", "libc-extras-dir=", "target=", "root="]
278 # some global variables
279 lib_rpath = []
280 lib_path = []
281 dest_path = "DEST"
282 ldlib = "LDLIB"
283 include_default_lib_path = "yes"
284 default_lib_path = ["/lib/", "/usr/lib/", "/usr/X11R6/lib/"]
285 libc_extras_dir = "/usr/lib/libc_pic"
286 target = ""
287 root = ""
288 so_pattern = re.compile("((lib|ld).*)\.so(\..+)*")
289 script_pattern = re.compile("^#!\s*/")
291 try:
292 optlist, proglist = getopt.getopt(sys.argv[1:], opts, longopts)
293 except getopt.GetoptError, msg:
294 print >> sys.stderr, msg
295 usage(1)
297 for opt, arg in optlist:
298 if opt in ("-v", "--verbose"):
299 if debuglevel < DEBUG_SPAM:
300 debuglevel = debuglevel + 1
301 elif opt == "-L":
302 lib_path.extend(string.split(arg, ":"))
303 elif opt in ("-d", "--dest-dir"):
304 dest_path = arg
305 elif opt in ("-D", "--no-default-lib"):
306 include_default_lib_path = "no"
307 elif opt == "--ldlib":
308 ldlib = arg
309 elif opt == "--libc-extras-dir":
310 libc_extras_dir = arg
311 elif opt == "--target":
312 #target = arg + "-"
313 target = arg
314 elif opt in ("-r", "--root"):
315 root = arg
316 elif opt in ("--help", "-h"):
317 usage(0)
318 sys.exit(0)
319 elif opt in ("--version", "-V"):
320 version(vers)
321 sys.exit(0)
322 else:
323 print "WARNING: unknown option: " + opt + "\targ: " + arg
325 if include_default_lib_path == "yes":
326 lib_path.extend(default_lib_path)
328 if ldlib == "LDLIB":
329 ldlib = os.getenv("ldlib")
331 objects = {} # map from inode to filename
332 for prog in proglist:
333 inode = os.stat(prog)[ST_INO]
334 if objects.has_key(inode):
335 debug(DEBUG_SPAM, prog, "is a hardlink to", objects[inode])
336 elif so_pattern.match(prog):
337 debug(DEBUG_SPAM, prog, "is a library")
338 elif script_pattern.match(open(prog).read(256)):
339 debug(DEBUG_SPAM, prog, "is a script")
340 else:
341 objects[inode] = prog
343 if not ldlib:
344 pattern = re.compile(".*Requesting program interpreter:.*/([^\]/]+).*")
345 for obj in objects.values():
346 output = command(target + "readelf", "--program-headers", obj)
347 for x in output:
348 match = pattern.match(x)
349 if match:
350 ldlib = match.group(1)
351 break
352 if ldlib:
353 break
355 if not ldlib:
356 sys.exit("E: Dynamic linker not found, aborting.")
358 debug(DEBUG_NORMAL, "I: Using", ldlib, "as dynamic linker.")
360 pattern = re.compile(".*ld-uClibc.*");
361 if pattern.match(ldlib):
362 uclibc = 1
363 else:
364 uclibc = 0
366 # Check for rpaths
367 for obj in objects.values():
368 rpath_val = rpath(obj)
369 if rpath_val:
370 if root:
371 if debuglevel >= DEBUG_VERBOSE:
372 print "Adding rpath " + string.join(rpath_val, ":") + " for " + obj
373 lib_rpath.extend(rpath_val)
374 else:
375 print "warning: " + obj + " may need rpath, but --root not specified"
377 lib_path.extend(lib_rpath)
379 passnr = 1
380 previous_pass_unresolved = Set()
381 while 1:
382 debug(DEBUG_NORMAL, "I: library reduction pass", `passnr`)
383 if debuglevel >= DEBUG_VERBOSE:
384 print "Objects:",
385 for obj in objects.values():
386 print obj[string.rfind(obj, '/') + 1:],
387 print
389 passnr = passnr + 1
390 # Gather all already reduced libraries and treat them as objects as well
391 small_libs = []
392 for lib in regexpfilter(os.listdir(dest_path), "(.*-so-stripped)$").elems():
393 obj = dest_path + "/" + lib
394 small_libs.append(obj)
395 inode = os.stat(obj)[ST_INO]
396 if objects.has_key(inode):
397 debug(DEBUG_SPAM, obj, "is hardlink to", objects[inode])
398 else:
399 objects[inode] = obj
401 # DEBUG
402 for obj in objects.values():
403 small_libs.append(obj)
404 debug(DEBUG_VERBOSE, "Object:", obj)
406 # calculate what symbols and libraries are needed
407 needed_symbols = Set() # Set of (name, weakness-flag)
408 libraries = Set()
409 for obj in objects.values():
410 needed_symbols.merge(undefined_symbols(obj))
411 libraries.merge(library_depends(obj))
413 # FIXME: on i386 this is undefined but not marked UND
414 # I don't know how to detect those symbols but this seems
415 # to be the only one and including it on alpha as well
416 # doesn't hurt. I guess all archs can live with this.
417 needed_symbols.add(("sys_siglist", 1))
419 # calculate what symbols are present in small_libs
420 present_symbols = Set()
421 for lib in small_libs:
422 present_symbols.merge(provided_symbols(lib))
424 # are we finished?
425 using_ctor_dtor = 0
426 num_unresolved = 0
427 present_symbols_elems = present_symbols.elems()
428 unresolved = Set()
429 for (symbol, is_weak) in needed_symbols.elems():
430 if not symbol in present_symbols_elems:
431 debug(DEBUG_SPAM, "Still need:", symbol, `is_weak`)
432 unresolved.add((symbol, is_weak))
433 num_unresolved = num_unresolved + 1
435 debug (DEBUG_NORMAL, `needed_symbols.size()`, "symbols,",
436 `num_unresolved`, "unresolved")
438 if num_unresolved == 0:
439 break
441 if unresolved == previous_pass_unresolved:
442 # No progress in last pass. Verify all remaining symbols are weak.
443 for (symbol, is_weak) in unresolved.elems():
444 if not is_weak:
445 raise "Unresolvable symbol " + symbol
446 break
448 previous_pass_unresolved = unresolved
450 library_symbols = {}
451 library_symbols_used = {}
452 symbol_provider = {}
454 # Calculate all symbols each library provides
455 for library in libraries.elems():
456 path = find_lib(library)
457 if not path:
458 sys.exit("Library not found: " + library + " in path: "
459 + string.join(lib_path, " : "))
460 symbols = provided_symbols(path)
461 library_symbols[library] = Set()
462 library_symbols_used[library] = Set()
463 for symbol in symbols.elems():
464 if symbol_provider.has_key(symbol):
465 # in doubt, prefer symbols from libc
466 if re.match("^libc[\.-]", library):
467 library_symbols[library].add(symbol)
468 symbol_provider[symbol] = library
469 else:
470 debug(DEBUG_SPAM, "duplicate symbol", symbol, "in",
471 symbol_provider[symbol], "and", library)
472 else:
473 library_symbols[library].add(symbol)
474 symbol_provider[symbol] = library
476 # Fixup support for constructors and destructors
477 if symbol_provider.has_key("_init"):
478 debug(DEBUG_VERBOSE, library, ": Library has a constructor!");
479 using_ctor_dtor = 1
480 library_symbols[library].add("_init")
481 symbol_provider["_init"] = library
482 library_symbols_used[library].add("_init")
484 if symbol_provider.has_key("_fini"):
485 debug(DEBUG_VERBOSE, library, ": Library has a destructor!");
486 using_ctor_dtor = 1
487 library_symbols[library].add("_fini")
488 symbol_provider["_fini"] = library
489 library_symbols_used[library].add("_fini")
491 # which symbols are actually used from each lib
492 for (symbol, is_weak) in needed_symbols.elems():
493 if not symbol_provider.has_key(symbol):
494 if not is_weak:
495 if not uclibc or (symbol != "main"):
496 raise "No library provides non-weak " + symbol
497 else:
498 lib = symbol_provider[symbol]
499 library_symbols_used[lib].add(symbol)
501 # reduce libraries
502 for library in libraries.elems():
503 debug(DEBUG_VERBOSE, "reducing", library)
504 debug(DEBUG_SPAM, "using: " + string.join(library_symbols_used[library].elems()))
505 so_file = find_lib(library)
506 if root and (re.compile("^" + root).search(so_file)):
507 debug(DEBUG_VERBOSE, "no action required for " + so_file)
508 continue
509 so_file_name = os.path.basename(so_file)
510 if not so_file:
511 sys.exit("File not found:" + library)
512 pic_file = find_pic(library)
513 if not pic_file:
514 # No pic file, so we have to use the .so file, no reduction
515 debug(DEBUG_VERBOSE, "No pic file found for", so_file, "; copying")
516 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
517 so_file, dest_path + "/" + so_file_name + "-so-stripped")
518 else:
519 # we have a pic file, recompile
520 debug(DEBUG_SPAM, "extracting from:", pic_file, "so_file:", so_file)
521 soname = extract_soname(so_file)
522 if soname == "":
523 debug(DEBUG_VERBOSE, so_file, " has no soname, copying")
524 continue
525 debug(DEBUG_SPAM, "soname:", soname)
526 base_name = so_pattern.match(library).group(1)
527 # libc needs its soinit.o and sofini.o as well as the pic
528 if (base_name == "libc") and not uclibc:
529 # force dso_handle.os to be included, otherwise reduced libc
530 # may segfault in ptmalloc_init due to undefined weak reference
531 extra_flags = find_lib(ldlib) + " -u __dso_handle"
532 extra_pre_obj = libc_extras_dir + "/soinit.o"
533 extra_post_obj = libc_extras_dir + "/sofini.o"
534 else:
535 extra_flags = ""
536 extra_pre_obj = ""
537 extra_post_obj = ""
538 map_file = find_pic_map(library)
539 if map_file:
540 extra_flags = extra_flags + " -Wl,--version-script=" + map_file
541 if library_symbols_used[library].elems():
542 joined_symbols = "-u" + string.join(library_symbols_used[library].elems(), " -u")
543 else:
544 joined_symbols = ""
545 if using_ctor_dtor == 1:
546 extra_flags = extra_flags + " -shared"
547 # compile in only used symbols
548 command(target + "gcc",
549 "-nostdlib -nostartfiles -shared -Wl,-soname=" + soname,\
550 joined_symbols, \
551 "-o", dest_path + "/" + so_file_name + "-so", \
552 extra_pre_obj, \
553 pic_file, \
554 extra_post_obj, \
555 extra_flags, \
556 "-lgcc -L", dest_path, \
557 "-L" + string.join(lib_path, " -L"), \
558 library_depends_gcc_libnames(so_file))
559 # strip result
560 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
561 dest_path + "/" + so_file_name + "-so",
562 dest_path + "/" + so_file_name + "-so-stripped")
563 ## DEBUG
564 debug(DEBUG_VERBOSE, so_file, "\t", `os.stat(so_file)[ST_SIZE]`)
565 debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so", "\t",
566 `os.stat(dest_path + "/" + so_file_name + "-so")[ST_SIZE]`)
567 debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so-stripped",
568 "\t", `os.stat(dest_path + "/" + so_file_name + "-so-stripped")[ST_SIZE]`)
570 # Finalising libs and cleaning up
571 for lib in regexpfilter(os.listdir(dest_path), "(.*)-so-stripped$").elems():
572 os.rename(dest_path + "/" + lib + "-so-stripped", dest_path + "/" + lib)
573 for lib in regexpfilter(os.listdir(dest_path), "(.*-so)$").elems():
574 os.remove(dest_path + "/" + lib)
576 # Canonicalize library names.
577 for lib in regexpfilter(os.listdir(dest_path), "(.*so[.\d]*)$").elems():
578 this_lib_path = dest_path + "/" + lib
579 if os.path.islink(this_lib_path):
580 debug(DEBUG_VERBOSE, "Unlinking %s." % lib)
581 os.remove(this_lib_path)
582 continue
583 soname = extract_soname(this_lib_path)
584 if soname:
585 debug(DEBUG_VERBOSE, "Moving %s to %s." % (lib, soname))
586 os.rename(dest_path + "/" + lib, dest_path + "/" + soname)
588 # Make sure the dynamic linker is present and is executable
589 ld_file = find_lib(ldlib)
590 ld_file_name = os.path.basename(ld_file)
592 if not os.access(dest_path + "/" + ld_file_name, os.F_OK):
593 debug(DEBUG_NORMAL, "I: stripping and copying dynamic linker.")
594 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
595 ld_file, dest_path + "/" + ld_file_name)
597 os.chmod(dest_path + "/" + ld_file_name, 0755)