updated busybox
[kvm-coreboot.git] / bin / mklibs.py
blob2ec3b384461bdf4e08faf5d19872c91b210fa167
1 #! /usr/bin/python
3 # mklibs.py: An automated way to create a minimal /lib/ directory.
5 # Copyright 2001 by Falk Hueffner <falk@debian.org>
6 # & Goswin Brederlow <goswin.brederlow@student.uni-tuebingen.de>
8 # mklibs.sh by Marcus Brinkmann <Marcus.Brinkmann@ruhr-uni-bochum.de>
9 # used as template
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 # HOW IT WORKS
27 # - Gather all unresolved symbols and libraries needed by the programs
28 # and reduced libraries
29 # - Gather all symbols provided by the already reduced libraries
30 # (none on the first pass)
31 # - If all symbols are provided we are done
32 # - go through all libraries and remember what symbols they provide
33 # - go through all unresolved/needed symbols and mark them as used
34 # - for each library:
35 # - find pic file (if not present copy and strip the so)
36 # - compile in only used symbols
37 # - strip
38 # - back to the top
40 # TODO
41 # * complete argument parsing as given as comment in main
43 import commands
44 import string
45 import re
46 import sys
47 import os
48 import glob
49 import getopt
50 from stat import *
52 ########################## Generic Macros ###########################
54 DEBUG_QUIET = 0
55 DEBUG_NORMAL = 1
56 DEBUG_VERBOSE = 2
57 DEBUG_SPAM = 3
59 debuglevel = DEBUG_NORMAL
61 def debug(level, *msg):
62 if debuglevel >= level:
63 print string.join(msg)
65 # A simple set class. It should be replaced with the standard sets.Set
66 # type as soon as Python 2.3 is out.
67 class Set:
68 def __init__(self):
69 self.__dict = {}
71 def add(self, obj):
72 self.__dict[obj] = 1
74 def contains(self, obj):
75 return self.__dict.has_key(obj)
77 def merge(self, s):
78 for e in s.elems():
79 self.add(e)
81 def elems(self):
82 return self.__dict.keys()
84 def size(self):
85 return len(self.__dict)
87 def __eq__(self, other):
88 return self.__dict == other.__dict
90 def __str__(self):
91 return `self.__dict.keys()`
93 def __repr__(self):
94 return `self.__dict.keys()`
96 # return a list of lines of output of the command
97 def command(command, *args):
98 debug(DEBUG_SPAM, "calling", command, string.join(args))
99 (status, output) = commands.getstatusoutput(command + ' ' + string.join(args))
100 if os.WEXITSTATUS(status) != 0:
101 if(debuglevel < DEBUG_SPAM):
102 print "Failed command: ", command, string.join(args)
103 print "Returned " + str(os.WEXITSTATUS(status)) + " (" + output + ")"
104 if debuglevel >= DEBUG_VERBOSE:
105 raise Exception
106 sys.exit(1)
107 return string.split(output, '\n')
109 # Filter a list according to a regexp containing a () group. Return
110 # a Set.
111 def regexpfilter(list, regexp, groupnr = 1):
112 pattern = re.compile(regexp)
113 result = Set()
114 for x in list:
115 match = pattern.match(x)
116 if match:
117 result.add(match.group(groupnr))
119 return result
121 ##################### Library Handling ##############################
123 # This section handles libraries, lib_path searching, the soname and
124 # symlink mess, and should really be made into an object
127 libraries = {} # map from inode to filename (full name, relative to root)
129 # update the libraries global with new inodes
130 # Only uses the canonical name, and creates a link from the given
131 # name to the canonical name
132 def add_dependencies(obj):
133 if not os.access(obj, os.R_OK):
134 raise "Cannot find object file: " + obj
135 output = command(target + "objdump", "--private-headers", obj)
136 depends = regexpfilter(output, ".*NEEDED\s*(\S+)$")
137 debug(DEBUG_VERBOSE, obj + " uses libraries " + string.join(depends.elems(),", "))
139 for library in depends.elems():
140 full_path = find_lib(library, root)
141 if not full_path or not os.access(root + full_path, os.R_OK):
142 # perhaps the library only exists in the destination
143 full_path = find_lib(library, dest)
144 if full_path:
145 present_symbols.merge(provided_symbols(dest + full_path))
146 else:
147 raise "Cannot find library: " + library + " for object " + obj
148 else:
149 add_library(full_path)
151 def add_library(library):
152 # add the library to the list, unless it's a duplicate
153 inode = os.stat(root + library).st_ino
154 if libraries.has_key(inode):
155 debug(DEBUG_SPAM, library, "is link to", libraries[inode])
156 else:
157 libraries[inode] = canonical_name(library)
159 # create a link from this name to the canonical one
160 if libraries[inode] == library:
161 pass # this is the canonical name
162 elif os.path.dirname(library) == os.path.dirname(libraries[inode]):
163 symlink(dest + library, os.path.basename(libraries[inode]))
164 else:
165 symlink(dest + library, libraries[inode]) # must use an absolute name
167 # Find complete path of a library, by searching in lib_path
168 # This is done relative to aroot
169 def find_lib(lib, aroot):
170 if lib[0] == '/':
171 if os.access(aroot + lib, os.F_OK):
172 return lib
173 debug(DEBUG_QUIET, "WARNING: %s does not exist" % lib)
174 else:
175 for path in lib_path:
176 if os.access(aroot + path + lib, os.F_OK):
177 return path + lib
178 debug(DEBUG_QUIET, "WARNING: %s not found in search path" % lib, \
179 string.join(lib_path, ":"))
181 return ""
183 # returns the canonical name of this library
184 # First it searches for a valid SONAME: the file must exist
185 # Then it tries following symlinks
186 def canonical_name(so_file):
187 soname_data = regexpfilter(command(target + "readelf", "--all", "-W", root + so_file),
188 ".*SONAME.*\[(.*)\].*")
189 canon = ""
191 if soname_data.elems():
192 soname = soname_data.elems()[0]
193 canon = find_lib(soname, root)
195 if not canon:
196 canon = resolve_link(so_file)
198 if canon:
199 debug(DEBUG_SPAM, "Canonical name of", so_file, "is", soname)
200 return canon
202 return so_file
204 # Return real target of a symlink (all relative to root)
205 def resolve_link(file):
206 debug(DEBUG_SPAM, "resolving", file)
207 while S_ISLNK(os.lstat(root + file)[ST_MODE]):
208 new_file = os.readlink(root + file)
209 if new_file[0] != "/":
210 file = os.path.join(os.path.dirname(file), new_file)
211 else:
212 file = new_file
213 debug(DEBUG_SPAM, "resolved to", file)
214 return file
216 # Return a Set of symbols provided by an object
217 def provided_symbols(obj):
218 if not os.access(obj, os.R_OK):
219 raise "Cannot find lib" + obj
221 result = Set()
222 debug(DEBUG_SPAM, "Checking provided_symbols for", obj)
223 output = command(target + "readelf", "-s", "-W", obj)
224 for line in output:
225 match = symline_regexp.match(line)
226 if match:
227 bind, ndx, name = match.groups()
228 if bind != "LOCAL" and not ndx in ("UND", "ABS"):
229 debug(DEBUG_SPAM, obj, "provides", name)
230 result.add(name)
231 return result
233 # Find a PIC archive for the library
234 # this is either an archive of the form base_name_pic.a or
235 # base_name.a with a _GLOBAL_OFFSET_TABLE_
236 def find_pic(lib):
237 base_name = so_pattern.match(lib).group(1)
238 for path in lib_path:
239 full = root + path + base_name + "_pic.a"
240 debug(DEBUG_SPAM, "checking", full)
241 for file in glob.glob(full):
242 if os.access(file, os.F_OK):
243 return file
244 for path in lib_path:
245 for file in glob.glob(root + path + base_name + ".a"):
246 relocs = command(target + "objdump", "-r", file)
247 # this must be size() > 1 to avoid stripping libdl
248 if os.access(file, os.F_OK) and regexpfilter(relocs,"(.*_GLOBAL_OFFSET_TABLE_)").size() > 1:
249 return file
250 return ""
252 # Find a PIC .map file for the library
253 def find_pic_map(lib):
254 base_name = so_pattern.match(lib).group(1)
255 for path in lib_path:
256 for file in glob.glob(root + path + "/" + base_name + "_pic.map"):
257 if os.access(file, os.F_OK):
258 return file
259 return ""
262 # Return a list of libraries the passed objects depend on. The
263 # libraries are in "-lfoo" format suitable for passing to gcc.
264 def library_depends_gcc_libnames(obj):
265 if not os.access(obj, os.R_OK):
266 raise "Cannot find lib: " + obj
267 output = command(target + "objdump", "--private-headers", obj)
268 output = regexpfilter(output, ".*NEEDED\s*lib(\S+)\.so.*$")
269 if not output.elems():
270 return ""
271 else:
272 return "-l" + string.join(output.elems(), " -l")
274 # Scan readelf output. Example:
275 # Num: Value Size Type Bind Vis Ndx Name
276 # 1: 000000012002ab48 168 FUNC GLOBAL DEFAULT UND strchr@GLIBC_2.0 (2)
277 symline_regexp = \
278 re.compile("\s*\d+: .+\s+\d+\s+\w+\s+(\w+)+\s+\w+\s+(\w+)\s+([^\s@]+)")
280 ############################### Misc Functions ######################
282 def add_object(obj):
283 inode = os.stat(obj)[ST_INO]
284 if objects.has_key(inode):
285 debug(DEBUG_SPAM, obj, "is a hardlink to", objects[inode])
286 elif script_pattern.match(open(obj).read(256)):
287 debug(DEBUG_SPAM, obj, "is a script")
288 else:
289 objects[inode] = obj
290 add_dependencies(obj)
292 # Check for rpaths
293 rpath_val = rpath(obj)
294 if rpath_val:
295 if root:
296 if debuglevel >= DEBUG_VERBOSE:
297 print "Adding rpath " + string.join(rpath_val, ":") + " for " + obj
298 else:
299 print "warning: " + obj + " may need rpath, but --root not specified"
300 lib_path.extend(rpath_val)
302 # Return a Set of rpath strings for the passed object
303 def rpath(obj):
304 if not os.access(obj, os.R_OK):
305 raise "Cannot find lib: " + obj
306 output = command(target + "objdump", "--private-headers", obj)
307 return map(lambda x: x + "/", regexpfilter(output, ".*RPATH\s*(\S+)$").elems())
309 # Return undefined symbols in an object as a Set of tuples (name, weakness)
310 # Besides all undefined symbols, all weak symbols must be included
311 # because
312 def undefined_symbols(obj):
313 if not os.access(obj, os.R_OK):
314 raise "Cannot find lib" + obj
316 result = Set()
317 output = command(target + "readelf", "-s", "-W", obj)
318 for line in output:
319 match = symline_regexp.match(line)
320 if match:
321 bind, ndx, name = match.groups()
322 if bind != "LOCAL" and ndx == "UND":
323 comment = ""
324 if bind == "WEAK":
325 comment = "(weak)"
326 debug(DEBUG_SPAM, obj, "requires", name, comment)
327 result.add((name, bind == "WEAK"))
328 return result
331 def usage(was_err):
332 if was_err:
333 outfd = sys.stderr
334 else:
335 outfd = sys.stdout
336 print >> outfd, "Usage: mklibs [OPTION]... -d DEST FILE ..."
337 print >> outfd, "Make a set of minimal libraries for FILE(s) in DEST."
338 print >> outfd, ""
339 print >> outfd, " -d, --dest-dir DIRECTORY create libraries in DIRECTORY"
340 print >> outfd, " -D, --no-default-lib omit default libpath (", string.join(default_lib_path, " : "), ")"
341 print >> outfd, " -L DIRECTORY[:DIRECTORY]... add DIRECTORY(s) to the library search path"
342 print >> outfd, " --ldlib LDLIB use LDLIB for the dynamic linker"
343 print >> outfd, " --libc-extras-dir DIRECTORY look for libc extra files in DIRECTORY"
344 print >> outfd, " --target TARGET prepend TARGET- to the gcc and binutils calls"
345 print >> outfd, " --root ROOT search in ROOT for library paths"
346 print >> outfd, " -v, --verbose explain more (usable multiple times)"
347 print >> outfd, " -h, --help display this help and exit"
348 sys.exit(was_err)
350 def version(vers):
351 print "mklibs: version ",vers
352 print ""
354 #################################### main ###########################
355 ## Usage: ./mklibs.py [OPTION]... -d DEST FILE ...
356 ## Make a set of minimal libraries for FILE ... in directory DEST.
358 ## Options:
359 ## -L DIRECTORY Add DIRECTORY to library search path.
360 ## -D, --no-default-lib Do not use default lib directories of /lib:/usr/lib
361 ## -n, --dry-run Don't actually run any commands; just print them.
362 ## -v, --verbose Print additional progress information. (can use twice)
363 ## -V, --version Print the version number and exit.
364 ## -h, --help Print this help and exit.
365 ## --ldlib Name of dynamic linker (overwrites environment variable ldlib)
366 ## --libc-extras-dir Directory for libc extra files
367 ## --target Use as prefix for gcc or binutils calls
369 ## -d, --dest-dir DIRECTORY Create libraries in DIRECTORY.
371 ## Required arguments for long options are also mandatory for the short options.
373 # Clean the environment
374 vers="0.12"
375 os.environ['LC_ALL'] = "C"
377 # Argument parsing
378 opts = "L:DnvVhd:r:"
379 longopts = ["no-default-lib", "dry-run", "verbose", "version", "help",
380 "dest-dir=", "ldlib=", "target=", "root="]
382 # some global variables
383 lib_rpath = []
384 lib_path = []
385 dest = "DEST"
386 ldlib = "LDLIB"
387 include_default_lib_path = True
388 default_lib_path = ["/lib", "/usr/lib", "/usr/X11R6/lib"]
389 target = ""
390 root = ""
391 so_pattern = re.compile("(?:.*/)*((lib|ld)[^/]*?)(-[.\d]*)?\.so(\.[^/]]+)*")
392 script_pattern = re.compile("^#!\s*/")
394 try:
395 optlist, proglist = getopt.getopt(sys.argv[1:], opts, longopts)
396 except getopt.GetoptError, msg:
397 print >> sys.stderr, msg
398 usage(1)
400 for opt, arg in optlist:
401 if opt in ("-v", "--verbose"):
402 if debuglevel < DEBUG_SPAM:
403 debuglevel = debuglevel + 1
404 elif opt == "-L":
405 lib_path.extend(string.split(arg, ":"))
406 elif opt in ("-d", "--dest-dir"):
407 dest = arg
408 elif opt in ("-D", "--no-default-lib"):
409 include_default_lib_path = False
410 elif opt == "--ldlib":
411 ldlib = arg
412 elif opt == "--target":
413 target = arg + "-"
414 elif opt in ("-r", "--root"):
415 root = arg
416 elif opt in ("--help", "-h"):
417 usage(0)
418 sys.exit(0)
419 elif opt in ("--version", "-V"):
420 version(vers)
421 sys.exit(0)
422 else:
423 print "WARNING: unknown option: " + opt + "\targ: " + arg
425 if include_default_lib_path:
426 lib_path.extend(default_lib_path)
428 lib_path = map(lambda dir: dir + "/", lib_path)
430 if ldlib == "LDLIB":
431 ldlib = os.getenv("ldlib")
433 cflags = os.getenv("CFLAGS")
435 objects = {} # map from inode to filename (relative to current directory, or absolute)
436 present_symbols = Set()
438 for prog in proglist:
439 add_object(prog)
441 basenames = map(lambda full: full[string.rfind(full, '/') + 1:], objects.values())
442 debug(DEBUG_VERBOSE, "Objects:", string.join(basenames))
444 if not ldlib:
445 pattern = re.compile(".*Requesting program interpreter:.*/([^\]/]+).*")
446 for obj in objects.values():
447 output = command(target + "readelf", "--program-headers", obj)
448 for x in output:
449 match = pattern.match(x)
450 if match:
451 ldlib = match.group(1)
452 break
453 if ldlib:
454 ldlib = find_lib(ldlib, root)
457 if not ldlib:
458 sys.exit("E: Dynamic linker not found, aborting.")
459 else:
460 debug(DEBUG_NORMAL, "Using", ldlib, "as dynamic linker.")
461 add_library(ldlib)
463 root = root + "/"
464 dest = dest + "/"
465 os.umask(0022)
467 passnr = 1
468 needed_symbols = Set() # Set of (name, weakness-flag)
470 # FIXME: on i386 this is undefined but not marked UND
471 # I don't know how to detect those symbols but this seems
472 # to be the only one and including it on alpha as well
473 # doesn't hurt. I guess all archs can live with this.
474 needed_symbols.add(("sys_siglist", 1))
475 # For some reason this symbol is needed by busybox but not included in the
476 # stripped libc...
477 # Ward Vandewege, 2007-08-30
478 needed_symbols.add(("__ctype_toupper", 1))
480 while True:
481 debug(DEBUG_NORMAL, "library reduction pass", `passnr`)
483 passnr = passnr + 1
485 # Gather all already reduced libraries and treat them as objects as well
486 for lib in libraries.values():
487 obj = dest + lib + "-stripped"
489 # calculate what symbols are present/needed in objects
490 previous_count = needed_symbols.size()
491 for obj in objects.values():
492 needed_symbols.merge(undefined_symbols(obj))
493 present_symbols.merge(provided_symbols(obj))
495 # what needed symbols are not present?
496 num_unresolved = 0
497 num_weak = 0
498 unresolved = Set()
499 for (symbol, is_weak) in needed_symbols.elems():
500 if not present_symbols.contains(symbol):
501 comment = ""
502 if(is_weak):
503 comment = "(weak)"
504 num_weak += 1
505 debug(DEBUG_SPAM, "unresolved", symbol, comment)
506 unresolved.add((symbol, is_weak))
507 num_unresolved += 1
509 debug (DEBUG_NORMAL, `needed_symbols.size()`, "symbols,",
510 `num_unresolved`, "unresolved", "(" + `num_weak`, " weak)")
512 if num_unresolved == 0:
513 break
515 # if this pass has no more needed symbols, verify all remaining
516 # symbols are weak
517 if previous_count == needed_symbols.size():
518 if num_weak != num_unresolved:
519 print "Unresolved symbols:",
520 for (symbol, is_weak) in unresolved.elems():
521 if not is_weak:
522 print symbol,
523 print
524 #raise Exception
525 break
527 library_symbols = {}
528 library_symbols_used = {}
529 symbol_provider = {}
531 # Calculate all symbols each library provides
532 inodes = {}
533 for library in libraries.values():
534 path = root + library
536 symbols = provided_symbols(path)
537 library_symbols[library] = Set()
538 library_symbols_used[library] = Set()
539 for symbol in symbols.elems():
540 if symbol_provider.has_key(symbol):
541 # in doubt, prefer symbols from libc
542 if re.match("^libc[\.-]", library):
543 library_symbols[library].add(symbol)
544 symbol_provider[symbol] = library
545 else:
546 debug(DEBUG_SPAM, "duplicate symbol", symbol, "in",
547 symbol_provider[symbol], "and", library)
548 else:
549 library_symbols[library].add(symbol)
550 symbol_provider[symbol] = library
552 # which symbols are actually used from each lib
553 for (symbol, is_weak) in needed_symbols.elems():
554 if symbol_provider.has_key(symbol):
555 lib = symbol_provider[symbol]
556 library_symbols_used[lib].add(symbol)
558 # reduce libraries
559 for library in libraries.values():
560 stripped = dest + library + "-stripped"
562 # make the directory to hold the library
563 try:
564 os.makedirs(os.path.dirname(dest + library));
565 except:
566 pass
568 pic_file = find_pic(library)
569 if not pic_file:
570 # No pic file, so we have to use the .so file, no reduction
571 debug(DEBUG_NORMAL, "copying", library, " (no pic file found)")
572 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
573 root + library, dest + library + "-stripped")
574 else:
575 # we have a pic file, recompile
576 debug(DEBUG_SPAM, "extracting from:", pic_file, "library:", library)
577 base_name = so_pattern.match(library).group(1)
579 if base_name == "libc" and find_lib(ldlib, root):
580 # force dso_handle.os to be included, otherwise reduced libc
581 # may segfault in ptmalloc_init due to undefined weak reference
582 extra_flags = root + find_lib(ldlib, root) + " -u __dso_handle"
583 else:
584 extra_flags = ""
585 map_file = find_pic_map(library)
586 if map_file:
587 extra_flags = extra_flags + " -Wl,--version-script=" + map_file
588 if library_symbols_used[library].elems():
589 joined_symbols = "-u" + string.join(library_symbols_used[library].elems(), " -u")
590 else:
591 joined_symbols = ""
592 # compile in only used symbols
593 command(target + "gcc",
594 cflags + " -nostdlib -nostartfiles -shared -Wl,-soname=" + os.path.basename(library),\
595 joined_symbols,
596 "-o", dest + "tmp-so",
597 pic_file,
598 extra_flags,
599 "-lgcc",
600 "-L" + string.join(map(lambda orig: dest + orig, lib_path), " -L"), \
601 "-L" + string.join(map(lambda orig: root + orig, lib_path), " -L"), \
602 library_depends_gcc_libnames(root + library))
603 # strip result
604 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
605 dest + "tmp-so",
606 dest + library + "-stripped")
607 ## DEBUG
608 debug(DEBUG_VERBOSE, "reducing", library, "\t",
609 "original:", `os.stat(root + library)[ST_SIZE]`,
610 "reduced:", `os.stat(dest + "tmp-so")[ST_SIZE]`,
611 "stripped:", `os.stat(stripped)[ST_SIZE]`)
612 debug(DEBUG_SPAM, "using: " + string.join(library_symbols_used[library].elems()))
614 os.remove(dest + "tmp-so")
616 # add the library to the list of objects (if not there already)
617 if stripped not in objects.values():
618 debug(DEBUG_VERBOSE, "adding object", stripped)
619 add_object(stripped)
621 # Finalising libs and cleaning up
622 for lib in libraries.values():
623 os.rename(dest + lib + "-stripped", dest + lib)
625 # Make sure the dynamic linker is present and is executable
626 if ldlib:
627 debug(DEBUG_NORMAL, "stripping and copying dynamic linker.")
628 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
629 root + ldlib, dest + ldlib)
630 os.chmod(dest + ldlib, 0755)