3 # mklibs.py: An automated way to create a minimal /lib/ directory.
5 # Copyright 2001 by Falk Hueffner <falk@debian.org>
6 # & Goswin Brederlow <goswin.brederlow@student.uni-tuebingen.de>
8 # mklibs.sh by Marcus Brinkmann <Marcus.Brinkmann@ruhr-uni-bochum.de>
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 # - Gather all unresolved symbols and libraries needed by the programs
28 # and reduced libraries
29 # - Gather all symbols provided by the already reduced libraries
30 # (none on the first pass)
31 # - If all symbols are provided we are done
32 # - go through all libraries and remember what symbols they provide
33 # - go through all unresolved/needed symbols and mark them as used
35 # - find pic file (if not present copy and strip the so)
36 # - compile in only used symbols
41 # * complete argument parsing as given as comment in main
52 ########################## Generic Macros ###########################
59 debuglevel
= DEBUG_NORMAL
61 def debug(level
, *msg
):
62 if debuglevel
>= level
:
63 print string
.join(msg
)
65 # A simple set class. It should be replaced with the standard sets.Set
66 # type as soon as Python 2.3 is out.
74 def contains(self
, obj
):
75 return self
.__dict
.has_key(obj
)
82 return self
.__dict
.keys()
85 return len(self
.__dict
)
87 def __eq__(self
, other
):
88 return self
.__dict
== other
.__dict
91 return `self
.__dict
.keys()`
94 return `self
.__dict
.keys()`
96 # return a list of lines of output of the command
97 def command(command
, *args
):
98 debug(DEBUG_SPAM
, "calling", command
, string
.join(args
))
99 (status
, output
) = commands
.getstatusoutput(command
+ ' ' + string
.join(args
))
100 if os
.WEXITSTATUS(status
) != 0:
101 if(debuglevel
< DEBUG_SPAM
):
102 print "Failed command: ", command
, string
.join(args
)
103 print "Returned " + str(os
.WEXITSTATUS(status
)) + " (" + output
+ ")"
104 if debuglevel
>= DEBUG_VERBOSE
:
107 return string
.split(output
, '\n')
109 # Filter a list according to a regexp containing a () group. Return
111 def regexpfilter(list, regexp
, groupnr
= 1):
112 pattern
= re
.compile(regexp
)
115 match
= pattern
.match(x
)
117 result
.add(match
.group(groupnr
))
121 ##################### Library Handling ##############################
123 # This section handles libraries, lib_path searching, the soname and
124 # symlink mess, and should really be made into an object
127 libraries
= {} # map from inode to filename (full name, relative to root)
129 # update the libraries global with new inodes
130 # Only uses the canonical name, and creates a link from the given
131 # name to the canonical name
132 def add_dependencies(obj
):
133 if not os
.access(obj
, os
.R_OK
):
134 raise "Cannot find object file: " + obj
135 output
= command(target
+ "objdump", "--private-headers", obj
)
136 depends
= regexpfilter(output
, ".*NEEDED\s*(\S+)$")
137 debug(DEBUG_VERBOSE
, obj
+ " uses libraries " + string
.join(depends
.elems(),", "))
139 for library
in depends
.elems():
140 full_path
= find_lib(library
, root
)
141 if not full_path
or not os
.access(root
+ full_path
, os
.R_OK
):
142 # perhaps the library only exists in the destination
143 full_path
= find_lib(library
, dest
)
145 present_symbols
.merge(provided_symbols(dest
+ full_path
))
147 raise "Cannot find library: " + library
+ " for object " + obj
149 add_library(full_path
)
151 def add_library(library
):
152 # add the library to the list, unless it's a duplicate
153 inode
= os
.stat(root
+ library
).st_ino
154 if libraries
.has_key(inode
):
155 debug(DEBUG_SPAM
, library
, "is link to", libraries
[inode
])
157 libraries
[inode
] = canonical_name(library
)
159 # create a link from this name to the canonical one
160 if libraries
[inode
] == library
:
161 pass # this is the canonical name
162 elif os
.path
.dirname(library
) == os
.path
.dirname(libraries
[inode
]):
163 symlink(dest
+ library
, os
.path
.basename(libraries
[inode
]))
165 symlink(dest
+ library
, libraries
[inode
]) # must use an absolute name
167 # Find complete path of a library, by searching in lib_path
168 # This is done relative to aroot
169 def find_lib(lib
, aroot
):
171 if os
.access(aroot
+ lib
, os
.F_OK
):
173 debug(DEBUG_QUIET
, "WARNING: %s does not exist" % lib
)
175 for path
in lib_path
:
176 if os
.access(aroot
+ path
+ lib
, os
.F_OK
):
178 debug(DEBUG_QUIET
, "WARNING: %s not found in search path" % lib
, \
179 string
.join(lib_path
, ":"))
183 # returns the canonical name of this library
184 # First it searches for a valid SONAME: the file must exist
185 # Then it tries following symlinks
186 def canonical_name(so_file
):
187 soname_data
= regexpfilter(command(target
+ "readelf", "--all", "-W", root
+ so_file
),
188 ".*SONAME.*\[(.*)\].*")
191 if soname_data
.elems():
192 soname
= soname_data
.elems()[0]
193 canon
= find_lib(soname
, root
)
196 canon
= resolve_link(so_file
)
199 debug(DEBUG_SPAM
, "Canonical name of", so_file
, "is", soname
)
204 # Return real target of a symlink (all relative to root)
205 def resolve_link(file):
206 debug(DEBUG_SPAM
, "resolving", file)
207 while S_ISLNK(os
.lstat(root
+ file)[ST_MODE
]):
208 new_file
= os
.readlink(root
+ file)
209 if new_file
[0] != "/":
210 file = os
.path
.join(os
.path
.dirname(file), new_file
)
213 debug(DEBUG_SPAM
, "resolved to", file)
216 # Return a Set of symbols provided by an object
217 def provided_symbols(obj
):
218 if not os
.access(obj
, os
.R_OK
):
219 raise "Cannot find lib" + obj
222 debug(DEBUG_SPAM
, "Checking provided_symbols for", obj
)
223 output
= command(target
+ "readelf", "-s", "-W", obj
)
225 match
= symline_regexp
.match(line
)
227 bind
, ndx
, name
= match
.groups()
228 if bind
!= "LOCAL" and not ndx
in ("UND", "ABS"):
229 debug(DEBUG_SPAM
, obj
, "provides", name
)
233 # Find a PIC archive for the library
234 # this is either an archive of the form base_name_pic.a or
235 # base_name.a with a _GLOBAL_OFFSET_TABLE_
237 base_name
= so_pattern
.match(lib
).group(1)
238 for path
in lib_path
:
239 full
= root
+ path
+ base_name
+ "_pic.a"
240 debug(DEBUG_SPAM
, "checking", full
)
241 for file in glob
.glob(full
):
242 if os
.access(file, os
.F_OK
):
244 for path
in lib_path
:
245 for file in glob
.glob(root
+ path
+ base_name
+ ".a"):
246 relocs
= command(target
+ "objdump", "-r", file)
247 # this must be size() > 1 to avoid stripping libdl
248 if os
.access(file, os
.F_OK
) and regexpfilter(relocs
,"(.*_GLOBAL_OFFSET_TABLE_)").size() > 1:
252 # Find a PIC .map file for the library
253 def find_pic_map(lib
):
254 base_name
= so_pattern
.match(lib
).group(1)
255 for path
in lib_path
:
256 for file in glob
.glob(root
+ path
+ "/" + base_name
+ "_pic.map"):
257 if os
.access(file, os
.F_OK
):
262 # Return a list of libraries the passed objects depend on. The
263 # libraries are in "-lfoo" format suitable for passing to gcc.
264 def library_depends_gcc_libnames(obj
):
265 if not os
.access(obj
, os
.R_OK
):
266 raise "Cannot find lib: " + obj
267 output
= command(target
+ "objdump", "--private-headers", obj
)
268 output
= regexpfilter(output
, ".*NEEDED\s*lib(\S+)\.so.*$")
269 if not output
.elems():
272 return "-l" + string
.join(output
.elems(), " -l")
274 # Scan readelf output. Example:
275 # Num: Value Size Type Bind Vis Ndx Name
276 # 1: 000000012002ab48 168 FUNC GLOBAL DEFAULT UND strchr@GLIBC_2.0 (2)
278 re
.compile("\s*\d+: .+\s+\d+\s+\w+\s+(\w+)+\s+\w+\s+(\w+)\s+([^\s@]+)")
280 ############################### Misc Functions ######################
283 inode
= os
.stat(obj
)[ST_INO
]
284 if objects
.has_key(inode
):
285 debug(DEBUG_SPAM
, obj
, "is a hardlink to", objects
[inode
])
286 elif script_pattern
.match(open(obj
).read(256)):
287 debug(DEBUG_SPAM
, obj
, "is a script")
290 add_dependencies(obj
)
293 rpath_val
= rpath(obj
)
296 if debuglevel
>= DEBUG_VERBOSE
:
297 print "Adding rpath " + string
.join(rpath_val
, ":") + " for " + obj
299 print "warning: " + obj
+ " may need rpath, but --root not specified"
300 lib_path
.extend(rpath_val
)
302 # Return a Set of rpath strings for the passed object
304 if not os
.access(obj
, os
.R_OK
):
305 raise "Cannot find lib: " + obj
306 output
= command(target
+ "objdump", "--private-headers", obj
)
307 return map(lambda x
: x
+ "/", regexpfilter(output
, ".*RPATH\s*(\S+)$").elems())
309 # Return undefined symbols in an object as a Set of tuples (name, weakness)
310 # Besides all undefined symbols, all weak symbols must be included
312 def undefined_symbols(obj
):
313 if not os
.access(obj
, os
.R_OK
):
314 raise "Cannot find lib" + obj
317 output
= command(target
+ "readelf", "-s", "-W", obj
)
319 match
= symline_regexp
.match(line
)
321 bind
, ndx
, name
= match
.groups()
322 if bind
!= "LOCAL" and ndx
== "UND":
326 debug(DEBUG_SPAM
, obj
, "requires", name
, comment
)
327 result
.add((name
, bind
== "WEAK"))
336 print >> outfd
, "Usage: mklibs [OPTION]... -d DEST FILE ..."
337 print >> outfd
, "Make a set of minimal libraries for FILE(s) in DEST."
339 print >> outfd
, " -d, --dest-dir DIRECTORY create libraries in DIRECTORY"
340 print >> outfd
, " -D, --no-default-lib omit default libpath (", string
.join(default_lib_path
, " : "), ")"
341 print >> outfd
, " -L DIRECTORY[:DIRECTORY]... add DIRECTORY(s) to the library search path"
342 print >> outfd
, " --ldlib LDLIB use LDLIB for the dynamic linker"
343 print >> outfd
, " --libc-extras-dir DIRECTORY look for libc extra files in DIRECTORY"
344 print >> outfd
, " --target TARGET prepend TARGET- to the gcc and binutils calls"
345 print >> outfd
, " --root ROOT search in ROOT for library paths"
346 print >> outfd
, " -v, --verbose explain more (usable multiple times)"
347 print >> outfd
, " -h, --help display this help and exit"
351 print "mklibs: version ",vers
354 #################################### main ###########################
355 ## Usage: ./mklibs.py [OPTION]... -d DEST FILE ...
356 ## Make a set of minimal libraries for FILE ... in directory DEST.
359 ## -L DIRECTORY Add DIRECTORY to library search path.
360 ## -D, --no-default-lib Do not use default lib directories of /lib:/usr/lib
361 ## -n, --dry-run Don't actually run any commands; just print them.
362 ## -v, --verbose Print additional progress information. (can use twice)
363 ## -V, --version Print the version number and exit.
364 ## -h, --help Print this help and exit.
365 ## --ldlib Name of dynamic linker (overwrites environment variable ldlib)
366 ## --libc-extras-dir Directory for libc extra files
367 ## --target Use as prefix for gcc or binutils calls
369 ## -d, --dest-dir DIRECTORY Create libraries in DIRECTORY.
371 ## Required arguments for long options are also mandatory for the short options.
373 # Clean the environment
375 os
.environ
['LC_ALL'] = "C"
379 longopts
= ["no-default-lib", "dry-run", "verbose", "version", "help",
380 "dest-dir=", "ldlib=", "target=", "root="]
382 # some global variables
387 include_default_lib_path
= True
388 default_lib_path
= ["/lib", "/usr/lib", "/usr/X11R6/lib"]
391 so_pattern
= re
.compile("(?:.*/)*((lib|ld)[^/]*?)(-[.\d]*)?\.so(\.[^/]]+)*")
392 script_pattern
= re
.compile("^#!\s*/")
395 optlist
, proglist
= getopt
.getopt(sys
.argv
[1:], opts
, longopts
)
396 except getopt
.GetoptError
, msg
:
397 print >> sys
.stderr
, msg
400 for opt
, arg
in optlist
:
401 if opt
in ("-v", "--verbose"):
402 if debuglevel
< DEBUG_SPAM
:
403 debuglevel
= debuglevel
+ 1
405 lib_path
.extend(string
.split(arg
, ":"))
406 elif opt
in ("-d", "--dest-dir"):
408 elif opt
in ("-D", "--no-default-lib"):
409 include_default_lib_path
= False
410 elif opt
== "--ldlib":
412 elif opt
== "--target":
414 elif opt
in ("-r", "--root"):
416 elif opt
in ("--help", "-h"):
419 elif opt
in ("--version", "-V"):
423 print "WARNING: unknown option: " + opt
+ "\targ: " + arg
425 if include_default_lib_path
:
426 lib_path
.extend(default_lib_path
)
428 lib_path
= map(lambda dir: dir + "/", lib_path
)
431 ldlib
= os
.getenv("ldlib")
433 cflags
= os
.getenv("CFLAGS")
435 objects
= {} # map from inode to filename (relative to current directory, or absolute)
436 present_symbols
= Set()
438 for prog
in proglist
:
441 basenames
= map(lambda full
: full
[string
.rfind(full
, '/') + 1:], objects
.values())
442 debug(DEBUG_VERBOSE
, "Objects:", string
.join(basenames
))
445 pattern
= re
.compile(".*Requesting program interpreter:.*/([^\]/]+).*")
446 for obj
in objects
.values():
447 output
= command(target
+ "readelf", "--program-headers", obj
)
449 match
= pattern
.match(x
)
451 ldlib
= match
.group(1)
454 ldlib
= find_lib(ldlib
, root
)
458 sys
.exit("E: Dynamic linker not found, aborting.")
460 debug(DEBUG_NORMAL
, "Using", ldlib
, "as dynamic linker.")
468 needed_symbols
= Set() # Set of (name, weakness-flag)
470 # FIXME: on i386 this is undefined but not marked UND
471 # I don't know how to detect those symbols but this seems
472 # to be the only one and including it on alpha as well
473 # doesn't hurt. I guess all archs can live with this.
474 needed_symbols
.add(("sys_siglist", 1))
475 # For some reason this symbol is needed by busybox but not included in the
477 # Ward Vandewege, 2007-08-30
478 needed_symbols
.add(("__ctype_toupper", 1))
481 debug(DEBUG_NORMAL
, "library reduction pass", `passnr`
)
485 # Gather all already reduced libraries and treat them as objects as well
486 for lib
in libraries
.values():
487 obj
= dest
+ lib
+ "-stripped"
489 # calculate what symbols are present/needed in objects
490 previous_count
= needed_symbols
.size()
491 for obj
in objects
.values():
492 needed_symbols
.merge(undefined_symbols(obj
))
493 present_symbols
.merge(provided_symbols(obj
))
495 # what needed symbols are not present?
499 for (symbol
, is_weak
) in needed_symbols
.elems():
500 if not present_symbols
.contains(symbol
):
505 debug(DEBUG_SPAM
, "unresolved", symbol
, comment
)
506 unresolved
.add((symbol
, is_weak
))
509 debug (DEBUG_NORMAL
, `needed_symbols
.size()`
, "symbols,",
510 `num_unresolved`
, "unresolved", "(" + `num_weak`
, " weak)")
512 if num_unresolved
== 0:
515 # if this pass has no more needed symbols, verify all remaining
517 if previous_count
== needed_symbols
.size():
518 if num_weak
!= num_unresolved
:
519 print "Unresolved symbols:",
520 for (symbol
, is_weak
) in unresolved
.elems():
528 library_symbols_used
= {}
531 # Calculate all symbols each library provides
533 for library
in libraries
.values():
534 path
= root
+ library
536 symbols
= provided_symbols(path
)
537 library_symbols
[library
] = Set()
538 library_symbols_used
[library
] = Set()
539 for symbol
in symbols
.elems():
540 if symbol_provider
.has_key(symbol
):
541 # in doubt, prefer symbols from libc
542 if re
.match("^libc[\.-]", library
):
543 library_symbols
[library
].add(symbol
)
544 symbol_provider
[symbol
] = library
546 debug(DEBUG_SPAM
, "duplicate symbol", symbol
, "in",
547 symbol_provider
[symbol
], "and", library
)
549 library_symbols
[library
].add(symbol
)
550 symbol_provider
[symbol
] = library
552 # which symbols are actually used from each lib
553 for (symbol
, is_weak
) in needed_symbols
.elems():
554 if symbol_provider
.has_key(symbol
):
555 lib
= symbol_provider
[symbol
]
556 library_symbols_used
[lib
].add(symbol
)
559 for library
in libraries
.values():
560 stripped
= dest
+ library
+ "-stripped"
562 # make the directory to hold the library
564 os
.makedirs(os
.path
.dirname(dest
+ library
));
568 pic_file
= find_pic(library
)
570 # No pic file, so we have to use the .so file, no reduction
571 debug(DEBUG_NORMAL
, "copying", library
, " (no pic file found)")
572 command(target
+ "objcopy", "--strip-unneeded -R .note -R .comment",
573 root
+ library
, dest
+ library
+ "-stripped")
575 # we have a pic file, recompile
576 debug(DEBUG_SPAM
, "extracting from:", pic_file
, "library:", library
)
577 base_name
= so_pattern
.match(library
).group(1)
579 if base_name
== "libc" and find_lib(ldlib
, root
):
580 # force dso_handle.os to be included, otherwise reduced libc
581 # may segfault in ptmalloc_init due to undefined weak reference
582 extra_flags
= root
+ find_lib(ldlib
, root
) + " -u __dso_handle"
585 map_file
= find_pic_map(library
)
587 extra_flags
= extra_flags
+ " -Wl,--version-script=" + map_file
588 if library_symbols_used
[library
].elems():
589 joined_symbols
= "-u" + string
.join(library_symbols_used
[library
].elems(), " -u")
592 # compile in only used symbols
593 command(target
+ "gcc",
594 cflags
+ " -nostdlib -nostartfiles -shared -Wl,-soname=" + os
.path
.basename(library
),\
596 "-o", dest
+ "tmp-so",
600 "-L" + string
.join(map(lambda orig
: dest
+ orig
, lib_path
), " -L"), \
601 "-L" + string
.join(map(lambda orig
: root
+ orig
, lib_path
), " -L"), \
602 library_depends_gcc_libnames(root
+ library
))
604 command(target
+ "objcopy", "--strip-unneeded -R .note -R .comment",
606 dest
+ library
+ "-stripped")
608 debug(DEBUG_VERBOSE
, "reducing", library
, "\t",
609 "original:", `os
.stat(root
+ library
)[ST_SIZE
]`
,
610 "reduced:", `os
.stat(dest
+ "tmp-so")[ST_SIZE
]`
,
611 "stripped:", `os
.stat(stripped
)[ST_SIZE
]`
)
612 debug(DEBUG_SPAM
, "using: " + string
.join(library_symbols_used
[library
].elems()))
614 os
.remove(dest
+ "tmp-so")
616 # add the library to the list of objects (if not there already)
617 if stripped
not in objects
.values():
618 debug(DEBUG_VERBOSE
, "adding object", stripped
)
621 # Finalising libs and cleaning up
622 for lib
in libraries
.values():
623 os
.rename(dest
+ lib
+ "-stripped", dest
+ lib
)
625 # Make sure the dynamic linker is present and is executable
627 debug(DEBUG_NORMAL
, "stripping and copying dynamic linker.")
628 command(target
+ "objcopy", "--strip-unneeded -R .note -R .comment",
629 root
+ ldlib
, dest
+ ldlib
)
630 os
.chmod(dest
+ ldlib
, 0755)