3 # mklibs.py: An automated way to create a minimal /lib/ directory.
5 # Copyright 2001 by Falk Hueffner <falk@debian.org>
6 # & Goswin Brederlow <goswin.brederlow@student.uni-tuebingen.de>
8 # mklibs.sh by Marcus Brinkmann <Marcus.Brinkmann@ruhr-uni-bochum.de>
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 # - Gather all unresolved symbols and libraries needed by the programs
28 # and reduced libraries
29 # - Gather all symbols provided by the already reduced libraries
30 # (none on the first pass)
31 # - If all symbols are provided we are done
32 # - go through all libraries and remember what symbols they provide
33 # - go through all unresolved/needed symbols and mark them as used
35 # - find pic file (if not present copy and strip the so)
36 # - compile in only used symbols
41 # * complete argument parsing as given as comment in main
56 debuglevel
= DEBUG_NORMAL
58 def debug(level
, *msg
):
59 if debuglevel
>= level
:
60 print string
.join(msg
)
62 # A simple set class. It should be replaced with the standard sets.Set
63 # type as soon as Python 2.3 is out.
71 def contains(self
, obj
):
72 return self
.__dict
.has_key(obj
)
79 return self
.__dict
.keys()
82 return len(self
.__dict
)
84 def __eq__(self
, other
):
85 return self
.__dict
== other
.__dict
88 return `self
.__dict
.keys()`
91 return `self
.__dict
.keys()`
93 # return a list of lines of output of the command
94 def command(command
, *args
):
95 debug(DEBUG_SPAM
, "calling", command
, string
.join(args
))
96 (status
, output
) = commands
.getstatusoutput(command
+ ' ' + string
.join(args
))
97 if os
.WEXITSTATUS(status
) != 0:
98 print "Command failed with status", os
.WEXITSTATUS(status
), ":", \
99 command
, string
.join(args
)
100 print "With output:", output
102 return string
.split(output
, '\n')
104 # Filter a list according to a regexp containing a () group. Return
106 def regexpfilter(list, regexp
, groupnr
= 1):
107 pattern
= re
.compile(regexp
)
110 match
= pattern
.match(x
)
112 result
.add(match
.group(groupnr
))
116 # Return a Set of rpath strings for the passed object
118 if not os
.access(obj
, os
.F_OK
):
119 raise "Cannot find lib: " + obj
120 output
= command(target
+ "objdump", "--private-headers", obj
)
121 return map(lambda x
: root
+ "/" + x
, regexpfilter(output
, ".*RPATH\s*(\S+)$").elems())
123 # Return a Set of libraries the passed objects depend on.
124 def library_depends(obj
):
125 if not os
.access(obj
, os
.F_OK
):
126 raise "Cannot find lib: " + obj
127 output
= command(target
+ "objdump", "--private-headers", obj
)
128 return regexpfilter(output
, ".*NEEDED\s*(\S+)$")
130 # Return a list of libraries the passed objects depend on. The
131 # libraries are in "-lfoo" format suitable for passing to gcc.
132 def library_depends_gcc_libnames(obj
):
133 if not os
.access(obj
, os
.F_OK
):
134 raise "Cannot find lib: " + obj
135 output
= command(target
+ "objdump", "--private-headers", obj
)
136 output
= regexpfilter(output
, ".*NEEDED\s*lib(\S+)\.so.*$")
137 if not output
.elems():
140 return "-l" + string
.join(output
.elems(), " -l")
142 # Scan readelf output. Example:
143 # Num: Value Size Type Bind Vis Ndx Name
144 # 1: 000000012002ab48 168 FUNC GLOBAL DEFAULT UND strchr@GLIBC_2.0 (2)
146 re
.compile("\s*\d+: .+\s+\d+\s+\w+\s+(\w+)+\s+\w+\s+(\w+)\s+([^\s@]+)")
148 # Return undefined symbols in an object as a Set of tuples (name, weakness)
149 def undefined_symbols(obj
):
150 if not os
.access(obj
, os
.F_OK
):
151 raise "Cannot find lib" + obj
154 output
= command(target
+ "readelf", "-s", "-W", obj
)
156 match
= symline_regexp
.match(line
)
158 bind
, ndx
, name
= match
.groups()
160 result
.add((name
, bind
== "WEAK"))
163 # Return a Set of symbols provided by a library
164 def provided_symbols(obj
):
165 if not os
.access(obj
, os
.F_OK
):
166 raise "Cannot find lib" + obj
169 debug(DEBUG_SPAM
, "provided_symbols result = ", `result`
)
170 output
= command(target
+ "readelf", "-s", "-W", obj
)
172 match
= symline_regexp
.match(line
)
174 bind
, ndx
, name
= match
.groups()
175 if bind
!= "LOCAL" and not ndx
in ("UND", "ABS"):
176 debug(DEBUG_SPAM
, "provided_symbols adding ", `name`
)
180 # Return real target of a symlink
181 def resolve_link(file):
182 debug(DEBUG_SPAM
, "resolving", file)
183 while S_ISLNK(os
.lstat(file)[ST_MODE
]):
184 new_file
= os
.readlink(file)
185 if new_file
[0] != "/":
186 file = os
.path
.join(os
.path
.dirname(file), new_file
)
189 debug(DEBUG_SPAM
, "resolved to", file)
192 # Find complete path of a library, by searching in lib_path
194 for path
in lib_path
:
195 if os
.access(path
+ "/" + lib
, os
.F_OK
):
196 return path
+ "/" + lib
200 # Find a PIC archive for the library
202 base_name
= so_pattern
.match(lib
).group(1)
203 for path
in lib_path
:
204 for file in glob
.glob(path
+ "/" + base_name
+ "_pic.a"):
205 if os
.access(file, os
.F_OK
):
206 return resolve_link(file)
209 # Find a PIC .map file for the library
210 def find_pic_map(lib
):
211 base_name
= so_pattern
.match(lib
).group(1)
212 for path
in lib_path
:
213 for file in glob
.glob(path
+ "/" + base_name
+ "_pic.map"):
214 if os
.access(file, os
.F_OK
):
215 return resolve_link(file)
218 def extract_soname(so_file
):
219 soname_data
= regexpfilter(command(target
+ "readelf", "--all", "-W", so_file
),
220 ".*SONAME.*\[(.*)\].*")
221 if soname_data
.elems():
222 return soname_data
.elems()[0]
230 print >> outfd
, "Usage: mklibs [OPTION]... -d DEST FILE ..."
231 print >> outfd
, "Make a set of minimal libraries for FILE(s) in DEST."
233 print >> outfd
, " -d, --dest-dir DIRECTORY create libraries in DIRECTORY"
234 print >> outfd
, " -D, --no-default-lib omit default libpath (", string
.join(default_lib_path
, " : "), ")"
235 print >> outfd
, " -L DIRECTORY[:DIRECTORY]... add DIRECTORY(s) to the library search path"
236 print >> outfd
, " --ldlib LDLIB use LDLIB for the dynamic linker"
237 print >> outfd
, " --libc-extras-dir DIRECTORY look for libc extra files in DIRECTORY"
238 # Ugh... Adding the trailing '-' breaks common practice.
239 #print >> outfd, " --target TARGET prepend TARGET- to the gcc and binutils calls"
240 print >> outfd
, " --target TARGET prepend TARGET to the gcc and binutils calls"
241 print >> outfd
, " --root ROOT search in ROOT for library rpaths"
242 print >> outfd
, " -v, --verbose explain what is being done"
243 print >> outfd
, " -h, --help display this help and exit"
247 print "mklibs: version ",vers
250 #################### main ####################
251 ## Usage: ./mklibs.py [OPTION]... -d DEST FILE ...
252 ## Make a set of minimal libraries for FILE ... in directory DEST.
255 ## -L DIRECTORY Add DIRECTORY to library search path.
256 ## -D, --no-default-lib Do not use default lib directories of /lib:/usr/lib
257 ## -n, --dry-run Don't actually run any commands; just print them.
258 ## -v, --verbose Print additional progress information.
259 ## -V, --version Print the version number and exit.
260 ## -h, --help Print this help and exit.
261 ## --ldlib Name of dynamic linker (overwrites environment variable ldlib)
262 ## --libc-extras-dir Directory for libc extra files
263 ## --target Use as prefix for gcc or binutils calls
265 ## -d, --dest-dir DIRECTORY Create libraries in DIRECTORY.
267 ## Required arguments for long options are also mandatory for the short options.
269 # Clean the environment
270 vers
="0.12 with uClibc fixes"
271 os
.environ
['LC_ALL'] = "C"
275 longopts
= ["no-default-lib", "dry-run", "verbose", "version", "help",
276 "dest-dir=", "ldlib=", "libc-extras-dir=", "target=", "root="]
278 # some global variables
283 include_default_lib_path
= "yes"
284 default_lib_path
= ["/lib/", "/usr/lib/", "/usr/X11R6/lib/"]
285 libc_extras_dir
= "/usr/lib/libc_pic"
288 so_pattern
= re
.compile("((lib|ld).*)\.so(\..+)*")
289 script_pattern
= re
.compile("^#!\s*/")
292 optlist
, proglist
= getopt
.getopt(sys
.argv
[1:], opts
, longopts
)
293 except getopt
.GetoptError
, msg
:
294 print >> sys
.stderr
, msg
297 for opt
, arg
in optlist
:
298 if opt
in ("-v", "--verbose"):
299 if debuglevel
< DEBUG_SPAM
:
300 debuglevel
= debuglevel
+ 1
302 lib_path
.extend(string
.split(arg
, ":"))
303 elif opt
in ("-d", "--dest-dir"):
305 elif opt
in ("-D", "--no-default-lib"):
306 include_default_lib_path
= "no"
307 elif opt
== "--ldlib":
309 elif opt
== "--libc-extras-dir":
310 libc_extras_dir
= arg
311 elif opt
== "--target":
314 elif opt
in ("-r", "--root"):
316 elif opt
in ("--help", "-h"):
319 elif opt
in ("--version", "-V"):
323 print "WARNING: unknown option: " + opt
+ "\targ: " + arg
325 if include_default_lib_path
== "yes":
326 lib_path
.extend(default_lib_path
)
329 ldlib
= os
.getenv("ldlib")
331 objects
= {} # map from inode to filename
332 for prog
in proglist
:
333 inode
= os
.stat(prog
)[ST_INO
]
334 if objects
.has_key(inode
):
335 debug(DEBUG_SPAM
, prog
, "is a hardlink to", objects
[inode
])
336 elif so_pattern
.match(prog
):
337 debug(DEBUG_SPAM
, prog
, "is a library")
338 elif script_pattern
.match(open(prog
).read(256)):
339 debug(DEBUG_SPAM
, prog
, "is a script")
341 objects
[inode
] = prog
344 pattern
= re
.compile(".*Requesting program interpreter:.*/([^\]/]+).*")
345 for obj
in objects
.values():
346 output
= command(target
+ "readelf", "--program-headers", obj
)
348 match
= pattern
.match(x
)
350 ldlib
= match
.group(1)
356 sys
.exit("E: Dynamic linker not found, aborting.")
358 debug(DEBUG_NORMAL
, "I: Using", ldlib
, "as dynamic linker.")
360 pattern
= re
.compile(".*ld-uClibc.*");
361 if pattern
.match(ldlib
):
367 for obj
in objects
.values():
368 rpath_val
= rpath(obj
)
371 if debuglevel
>= DEBUG_VERBOSE
:
372 print "Adding rpath " + string
.join(rpath_val
, ":") + " for " + obj
373 lib_rpath
.extend(rpath_val
)
375 print "warning: " + obj
+ " may need rpath, but --root not specified"
377 lib_path
.extend(lib_rpath
)
380 previous_pass_unresolved
= Set()
382 debug(DEBUG_NORMAL
, "I: library reduction pass", `passnr`
)
383 if debuglevel
>= DEBUG_VERBOSE
:
385 for obj
in objects
.values():
386 print obj
[string
.rfind(obj
, '/') + 1:],
390 # Gather all already reduced libraries and treat them as objects as well
392 for lib
in regexpfilter(os
.listdir(dest_path
), "(.*-so-stripped)$").elems():
393 obj
= dest_path
+ "/" + lib
394 small_libs
.append(obj
)
395 inode
= os
.stat(obj
)[ST_INO
]
396 if objects
.has_key(inode
):
397 debug(DEBUG_SPAM
, obj
, "is hardlink to", objects
[inode
])
402 for obj
in objects
.values():
403 small_libs
.append(obj
)
404 debug(DEBUG_VERBOSE
, "Object:", obj
)
406 # calculate what symbols and libraries are needed
407 needed_symbols
= Set() # Set of (name, weakness-flag)
409 for obj
in objects
.values():
410 needed_symbols
.merge(undefined_symbols(obj
))
411 libraries
.merge(library_depends(obj
))
413 # FIXME: on i386 this is undefined but not marked UND
414 # I don't know how to detect those symbols but this seems
415 # to be the only one and including it on alpha as well
416 # doesn't hurt. I guess all archs can live with this.
417 needed_symbols
.add(("sys_siglist", 1))
419 # calculate what symbols are present in small_libs
420 present_symbols
= Set()
421 for lib
in small_libs
:
422 present_symbols
.merge(provided_symbols(lib
))
427 present_symbols_elems
= present_symbols
.elems()
429 for (symbol
, is_weak
) in needed_symbols
.elems():
430 if not symbol
in present_symbols_elems
:
431 debug(DEBUG_SPAM
, "Still need:", symbol
, `is_weak`
)
432 unresolved
.add((symbol
, is_weak
))
433 num_unresolved
= num_unresolved
+ 1
435 debug (DEBUG_NORMAL
, `needed_symbols
.size()`
, "symbols,",
436 `num_unresolved`
, "unresolved")
438 if num_unresolved
== 0:
441 if unresolved
== previous_pass_unresolved
:
442 # No progress in last pass. Verify all remaining symbols are weak.
443 for (symbol
, is_weak
) in unresolved
.elems():
445 raise "Unresolvable symbol " + symbol
448 previous_pass_unresolved
= unresolved
451 library_symbols_used
= {}
454 # Calculate all symbols each library provides
455 for library
in libraries
.elems():
456 path
= find_lib(library
)
458 sys
.exit("Library not found: " + library
+ " in path: "
459 + string
.join(lib_path
, " : "))
460 symbols
= provided_symbols(path
)
461 library_symbols
[library
] = Set()
462 library_symbols_used
[library
] = Set()
463 for symbol
in symbols
.elems():
464 if symbol_provider
.has_key(symbol
):
465 # in doubt, prefer symbols from libc
466 if re
.match("^libc[\.-]", library
):
467 library_symbols
[library
].add(symbol
)
468 symbol_provider
[symbol
] = library
470 debug(DEBUG_SPAM
, "duplicate symbol", symbol
, "in",
471 symbol_provider
[symbol
], "and", library
)
473 library_symbols
[library
].add(symbol
)
474 symbol_provider
[symbol
] = library
476 # Fixup support for constructors and destructors
477 if symbol_provider
.has_key("_init"):
478 debug(DEBUG_VERBOSE
, library
, ": Library has a constructor!");
480 library_symbols
[library
].add("_init")
481 symbol_provider
["_init"] = library
482 library_symbols_used
[library
].add("_init")
484 if symbol_provider
.has_key("_fini"):
485 debug(DEBUG_VERBOSE
, library
, ": Library has a destructor!");
487 library_symbols
[library
].add("_fini")
488 symbol_provider
["_fini"] = library
489 library_symbols_used
[library
].add("_fini")
491 # which symbols are actually used from each lib
492 for (symbol
, is_weak
) in needed_symbols
.elems():
493 if not symbol_provider
.has_key(symbol
):
495 if not uclibc
or (symbol
!= "main"):
496 raise "No library provides non-weak " + symbol
498 lib
= symbol_provider
[symbol
]
499 library_symbols_used
[lib
].add(symbol
)
502 for library
in libraries
.elems():
503 debug(DEBUG_VERBOSE
, "reducing", library
)
504 debug(DEBUG_SPAM
, "using: " + string
.join(library_symbols_used
[library
].elems()))
505 so_file
= find_lib(library
)
506 if root
and (re
.compile("^" + root
).search(so_file
)):
507 debug(DEBUG_VERBOSE
, "no action required for " + so_file
)
509 so_file_name
= os
.path
.basename(so_file
)
511 sys
.exit("File not found:" + library
)
512 pic_file
= find_pic(library
)
514 # No pic file, so we have to use the .so file, no reduction
515 debug(DEBUG_VERBOSE
, "No pic file found for", so_file
, "; copying")
516 command(target
+ "objcopy", "--strip-unneeded -R .note -R .comment",
517 so_file
, dest_path
+ "/" + so_file_name
+ "-so-stripped")
519 # we have a pic file, recompile
520 debug(DEBUG_SPAM
, "extracting from:", pic_file
, "so_file:", so_file
)
521 soname
= extract_soname(so_file
)
523 debug(DEBUG_VERBOSE
, so_file
, " has no soname, copying")
525 debug(DEBUG_SPAM
, "soname:", soname
)
526 base_name
= so_pattern
.match(library
).group(1)
527 # libc needs its soinit.o and sofini.o as well as the pic
528 if (base_name
== "libc") and not uclibc
:
529 # force dso_handle.os to be included, otherwise reduced libc
530 # may segfault in ptmalloc_init due to undefined weak reference
531 extra_flags
= find_lib(ldlib
) + " -u __dso_handle"
532 extra_pre_obj
= libc_extras_dir
+ "/soinit.o"
533 extra_post_obj
= libc_extras_dir
+ "/sofini.o"
538 map_file
= find_pic_map(library
)
540 extra_flags
= extra_flags
+ " -Wl,--version-script=" + map_file
541 if library_symbols_used
[library
].elems():
542 joined_symbols
= "-u" + string
.join(library_symbols_used
[library
].elems(), " -u")
545 if using_ctor_dtor
== 1:
546 extra_flags
= extra_flags
+ " -shared"
547 # compile in only used symbols
548 command(target
+ "gcc",
549 "-nostdlib -nostartfiles -shared -Wl,-soname=" + soname
,\
551 "-o", dest_path
+ "/" + so_file_name
+ "-so", \
556 "-lgcc -L", dest_path
, \
557 "-L" + string
.join(lib_path
, " -L"), \
558 library_depends_gcc_libnames(so_file
))
560 command(target
+ "objcopy", "--strip-unneeded -R .note -R .comment",
561 dest_path
+ "/" + so_file_name
+ "-so",
562 dest_path
+ "/" + so_file_name
+ "-so-stripped")
564 debug(DEBUG_VERBOSE
, so_file
, "\t", `os
.stat(so_file
)[ST_SIZE
]`
)
565 debug(DEBUG_VERBOSE
, dest_path
+ "/" + so_file_name
+ "-so", "\t",
566 `os
.stat(dest_path
+ "/" + so_file_name
+ "-so")[ST_SIZE
]`
)
567 debug(DEBUG_VERBOSE
, dest_path
+ "/" + so_file_name
+ "-so-stripped",
568 "\t", `os
.stat(dest_path
+ "/" + so_file_name
+ "-so-stripped")[ST_SIZE
]`
)
570 # Finalising libs and cleaning up
571 for lib
in regexpfilter(os
.listdir(dest_path
), "(.*)-so-stripped$").elems():
572 os
.rename(dest_path
+ "/" + lib
+ "-so-stripped", dest_path
+ "/" + lib
)
573 for lib
in regexpfilter(os
.listdir(dest_path
), "(.*-so)$").elems():
574 os
.remove(dest_path
+ "/" + lib
)
576 # Canonicalize library names.
577 for lib
in regexpfilter(os
.listdir(dest_path
), "(.*so[.\d]*)$").elems():
578 this_lib_path
= dest_path
+ "/" + lib
579 if os
.path
.islink(this_lib_path
):
580 debug(DEBUG_VERBOSE
, "Unlinking %s." % lib
)
581 os
.remove(this_lib_path
)
583 soname
= extract_soname(this_lib_path
)
585 debug(DEBUG_VERBOSE
, "Moving %s to %s." % (lib
, soname
))
586 os
.rename(dest_path
+ "/" + lib
, dest_path
+ "/" + soname
)
588 # Make sure the dynamic linker is present and is executable
589 ld_file
= find_lib(ldlib
)
590 ld_file_name
= os
.path
.basename(ld_file
)
592 if not os
.access(dest_path
+ "/" + ld_file_name
, os
.F_OK
):
593 debug(DEBUG_NORMAL
, "I: stripping and copying dynamic linker.")
594 command(target
+ "objcopy", "--strip-unneeded -R .note -R .comment",
595 ld_file
, dest_path
+ "/" + ld_file_name
)
597 os
.chmod(dest_path
+ "/" + ld_file_name
, 0755)