Patch by Jeremy Katz (SF #1609407)
[python.git] / Lib / modulefinder.py
blob5390e64fbf0b85ba0a1d6b59b4591ea23fcfe0c7
1 """Find modules used by a script, using introspection."""
2 # This module should be kept compatible with Python 2.2, see PEP 291.
4 from __future__ import generators
5 import dis
6 import imp
7 import marshal
8 import os
9 import sys
10 import new
11 import struct
13 if hasattr(sys.__stdout__, "newlines"):
14 READ_MODE = "U" # universal line endings
15 else:
16 # remain compatible with Python < 2.3
17 READ_MODE = "r"
19 LOAD_CONST = chr(dis.opname.index('LOAD_CONST'))
20 IMPORT_NAME = chr(dis.opname.index('IMPORT_NAME'))
21 STORE_NAME = chr(dis.opname.index('STORE_NAME'))
22 STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL'))
23 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
24 HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT)
26 # Modulefinder does a good job at simulating Python's, but it can not
27 # handle __path__ modifications packages make at runtime. Therefore there
28 # is a mechanism whereby you can register extra paths in this map for a
29 # package, and it will be honored.
31 # Note this is a mapping is lists of paths.
32 packagePathMap = {}
34 # A Public interface
35 def AddPackagePath(packagename, path):
36 paths = packagePathMap.get(packagename, [])
37 paths.append(path)
38 packagePathMap[packagename] = paths
40 replacePackageMap = {}
42 # This ReplacePackage mechanism allows modulefinder to work around the
43 # way the _xmlplus package injects itself under the name "xml" into
44 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
45 # before running ModuleFinder.
47 def ReplacePackage(oldname, newname):
48 replacePackageMap[oldname] = newname
51 class Module:
53 def __init__(self, name, file=None, path=None):
54 self.__name__ = name
55 self.__file__ = file
56 self.__path__ = path
57 self.__code__ = None
58 # The set of global names that are assigned to in the module.
59 # This includes those names imported through starimports of
60 # Python modules.
61 self.globalnames = {}
62 # The set of starimports this module did that could not be
63 # resolved, ie. a starimport from a non-Python module.
64 self.starimports = {}
66 def __repr__(self):
67 s = "Module(%r" % (self.__name__,)
68 if self.__file__ is not None:
69 s = s + ", %r" % (self.__file__,)
70 if self.__path__ is not None:
71 s = s + ", %r" % (self.__path__,)
72 s = s + ")"
73 return s
75 class ModuleFinder:
77 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
78 if path is None:
79 path = sys.path
80 self.path = path
81 self.modules = {}
82 self.badmodules = {}
83 self.debug = debug
84 self.indent = 0
85 self.excludes = excludes
86 self.replace_paths = replace_paths
87 self.processed_paths = [] # Used in debugging only
89 def msg(self, level, str, *args):
90 if level <= self.debug:
91 for i in range(self.indent):
92 print " ",
93 print str,
94 for arg in args:
95 print repr(arg),
96 print
98 def msgin(self, *args):
99 level = args[0]
100 if level <= self.debug:
101 self.indent = self.indent + 1
102 self.msg(*args)
104 def msgout(self, *args):
105 level = args[0]
106 if level <= self.debug:
107 self.indent = self.indent - 1
108 self.msg(*args)
110 def run_script(self, pathname):
111 self.msg(2, "run_script", pathname)
112 fp = open(pathname, READ_MODE)
113 stuff = ("", "r", imp.PY_SOURCE)
114 self.load_module('__main__', fp, pathname, stuff)
116 def load_file(self, pathname):
117 dir, name = os.path.split(pathname)
118 name, ext = os.path.splitext(name)
119 fp = open(pathname, READ_MODE)
120 stuff = (ext, "r", imp.PY_SOURCE)
121 self.load_module(name, fp, pathname, stuff)
123 def import_hook(self, name, caller=None, fromlist=None, level=-1):
124 self.msg(3, "import_hook", name, caller, fromlist, level)
125 parent = self.determine_parent(caller, level=level)
126 q, tail = self.find_head_package(parent, name)
127 m = self.load_tail(q, tail)
128 if not fromlist:
129 return q
130 if m.__path__:
131 self.ensure_fromlist(m, fromlist)
132 return None
134 def determine_parent(self, caller, level=-1):
135 self.msgin(4, "determine_parent", caller, level)
136 if not caller or level == 0:
137 self.msgout(4, "determine_parent -> None")
138 return None
139 pname = caller.__name__
140 if level >= 1: # relative import
141 if caller.__path__:
142 level -= 1
143 if level == 0:
144 parent = self.modules[pname]
145 assert parent is caller
146 self.msgout(4, "determine_parent ->", parent)
147 return parent
148 if pname.count(".") < level:
149 raise ImportError, "relative importpath too deep"
150 pname = ".".join(pname.split(".")[:-level])
151 parent = self.modules[pname]
152 self.msgout(4, "determine_parent ->", parent)
153 return parent
154 if caller.__path__:
155 parent = self.modules[pname]
156 assert caller is parent
157 self.msgout(4, "determine_parent ->", parent)
158 return parent
159 if '.' in pname:
160 i = pname.rfind('.')
161 pname = pname[:i]
162 parent = self.modules[pname]
163 assert parent.__name__ == pname
164 self.msgout(4, "determine_parent ->", parent)
165 return parent
166 self.msgout(4, "determine_parent -> None")
167 return None
169 def find_head_package(self, parent, name):
170 self.msgin(4, "find_head_package", parent, name)
171 if '.' in name:
172 i = name.find('.')
173 head = name[:i]
174 tail = name[i+1:]
175 else:
176 head = name
177 tail = ""
178 if parent:
179 qname = "%s.%s" % (parent.__name__, head)
180 else:
181 qname = head
182 q = self.import_module(head, qname, parent)
183 if q:
184 self.msgout(4, "find_head_package ->", (q, tail))
185 return q, tail
186 if parent:
187 qname = head
188 parent = None
189 q = self.import_module(head, qname, parent)
190 if q:
191 self.msgout(4, "find_head_package ->", (q, tail))
192 return q, tail
193 self.msgout(4, "raise ImportError: No module named", qname)
194 raise ImportError, "No module named " + qname
196 def load_tail(self, q, tail):
197 self.msgin(4, "load_tail", q, tail)
198 m = q
199 while tail:
200 i = tail.find('.')
201 if i < 0: i = len(tail)
202 head, tail = tail[:i], tail[i+1:]
203 mname = "%s.%s" % (m.__name__, head)
204 m = self.import_module(head, mname, m)
205 if not m:
206 self.msgout(4, "raise ImportError: No module named", mname)
207 raise ImportError, "No module named " + mname
208 self.msgout(4, "load_tail ->", m)
209 return m
211 def ensure_fromlist(self, m, fromlist, recursive=0):
212 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
213 for sub in fromlist:
214 if sub == "*":
215 if not recursive:
216 all = self.find_all_submodules(m)
217 if all:
218 self.ensure_fromlist(m, all, 1)
219 elif not hasattr(m, sub):
220 subname = "%s.%s" % (m.__name__, sub)
221 submod = self.import_module(sub, subname, m)
222 if not submod:
223 raise ImportError, "No module named " + subname
225 def find_all_submodules(self, m):
226 if not m.__path__:
227 return
228 modules = {}
229 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
230 # But we must also collect Python extension modules - although
231 # we cannot separate normal dlls from Python extensions.
232 suffixes = []
233 for triple in imp.get_suffixes():
234 suffixes.append(triple[0])
235 for dir in m.__path__:
236 try:
237 names = os.listdir(dir)
238 except os.error:
239 self.msg(2, "can't list directory", dir)
240 continue
241 for name in names:
242 mod = None
243 for suff in suffixes:
244 n = len(suff)
245 if name[-n:] == suff:
246 mod = name[:-n]
247 break
248 if mod and mod != "__init__":
249 modules[mod] = mod
250 return modules.keys()
252 def import_module(self, partname, fqname, parent):
253 self.msgin(3, "import_module", partname, fqname, parent)
254 try:
255 m = self.modules[fqname]
256 except KeyError:
257 pass
258 else:
259 self.msgout(3, "import_module ->", m)
260 return m
261 if self.badmodules.has_key(fqname):
262 self.msgout(3, "import_module -> None")
263 return None
264 if parent and parent.__path__ is None:
265 self.msgout(3, "import_module -> None")
266 return None
267 try:
268 fp, pathname, stuff = self.find_module(partname,
269 parent and parent.__path__, parent)
270 except ImportError:
271 self.msgout(3, "import_module ->", None)
272 return None
273 try:
274 m = self.load_module(fqname, fp, pathname, stuff)
275 finally:
276 if fp: fp.close()
277 if parent:
278 setattr(parent, partname, m)
279 self.msgout(3, "import_module ->", m)
280 return m
282 def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
283 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
284 if type == imp.PKG_DIRECTORY:
285 m = self.load_package(fqname, pathname)
286 self.msgout(2, "load_module ->", m)
287 return m
288 if type == imp.PY_SOURCE:
289 co = compile(fp.read()+'\n', pathname, 'exec')
290 elif type == imp.PY_COMPILED:
291 if fp.read(4) != imp.get_magic():
292 self.msgout(2, "raise ImportError: Bad magic number", pathname)
293 raise ImportError, "Bad magic number in %s" % pathname
294 fp.read(4)
295 co = marshal.load(fp)
296 else:
297 co = None
298 m = self.add_module(fqname)
299 m.__file__ = pathname
300 if co:
301 if self.replace_paths:
302 co = self.replace_paths_in_code(co)
303 m.__code__ = co
304 self.scan_code(co, m)
305 self.msgout(2, "load_module ->", m)
306 return m
308 def _add_badmodule(self, name, caller):
309 if name not in self.badmodules:
310 self.badmodules[name] = {}
311 self.badmodules[name][caller.__name__] = 1
313 def _safe_import_hook(self, name, caller, fromlist, level=-1):
314 # wrapper for self.import_hook() that won't raise ImportError
315 if name in self.badmodules:
316 self._add_badmodule(name, caller)
317 return
318 try:
319 self.import_hook(name, caller, level=level)
320 except ImportError, msg:
321 self.msg(2, "ImportError:", str(msg))
322 self._add_badmodule(name, caller)
323 else:
324 if fromlist:
325 for sub in fromlist:
326 if sub in self.badmodules:
327 self._add_badmodule(sub, caller)
328 continue
329 try:
330 self.import_hook(name, caller, [sub], level=level)
331 except ImportError, msg:
332 self.msg(2, "ImportError:", str(msg))
333 fullname = name + "." + sub
334 self._add_badmodule(fullname, caller)
336 def scan_opcodes(self, co,
337 unpack = struct.unpack):
338 # Scan the code, and yield 'interesting' opcode combinations
339 # Version for Python 2.4 and older
340 code = co.co_code
341 names = co.co_names
342 consts = co.co_consts
343 while code:
344 c = code[0]
345 if c in STORE_OPS:
346 oparg, = unpack('<H', code[1:3])
347 yield "store", (names[oparg],)
348 code = code[3:]
349 continue
350 if c == LOAD_CONST and code[3] == IMPORT_NAME:
351 oparg_1, oparg_2 = unpack('<xHxH', code[:6])
352 yield "import", (consts[oparg_1], names[oparg_2])
353 code = code[6:]
354 continue
355 if c >= HAVE_ARGUMENT:
356 code = code[3:]
357 else:
358 code = code[1:]
360 def scan_opcodes_25(self, co,
361 unpack = struct.unpack):
362 # Scan the code, and yield 'interesting' opcode combinations
363 # Python 2.5 version (has absolute and relative imports)
364 code = co.co_code
365 names = co.co_names
366 consts = co.co_consts
367 LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
368 while code:
369 c = code[0]
370 if c in STORE_OPS:
371 oparg, = unpack('<H', code[1:3])
372 yield "store", (names[oparg],)
373 code = code[3:]
374 continue
375 if code[:9:3] == LOAD_LOAD_AND_IMPORT:
376 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
377 level = consts[oparg_1]
378 if level == -1: # normal import
379 yield "import", (consts[oparg_2], names[oparg_3])
380 elif level == 0: # absolute import
381 yield "absolute_import", (consts[oparg_2], names[oparg_3])
382 else: # relative import
383 yield "relative_import", (level, consts[oparg_2], names[oparg_3])
384 code = code[9:]
385 continue
386 if c >= HAVE_ARGUMENT:
387 code = code[3:]
388 else:
389 code = code[1:]
391 def scan_code(self, co, m):
392 code = co.co_code
393 if sys.version_info >= (2, 5):
394 scanner = self.scan_opcodes_25
395 else:
396 scanner = self.scan_opcodes
397 for what, args in scanner(co):
398 if what == "store":
399 name, = args
400 m.globalnames[name] = 1
401 elif what in ("import", "absolute_import"):
402 fromlist, name = args
403 have_star = 0
404 if fromlist is not None:
405 if "*" in fromlist:
406 have_star = 1
407 fromlist = [f for f in fromlist if f != "*"]
408 if what == "absolute_import": level = 0
409 else: level = -1
410 self._safe_import_hook(name, m, fromlist, level=level)
411 if have_star:
412 # We've encountered an "import *". If it is a Python module,
413 # the code has already been parsed and we can suck out the
414 # global names.
415 mm = None
416 if m.__path__:
417 # At this point we don't know whether 'name' is a
418 # submodule of 'm' or a global module. Let's just try
419 # the full name first.
420 mm = self.modules.get(m.__name__ + "." + name)
421 if mm is None:
422 mm = self.modules.get(name)
423 if mm is not None:
424 m.globalnames.update(mm.globalnames)
425 m.starimports.update(mm.starimports)
426 if mm.__code__ is None:
427 m.starimports[name] = 1
428 else:
429 m.starimports[name] = 1
430 elif what == "relative_import":
431 level, fromlist, name = args
432 if name:
433 self._safe_import_hook(name, m, fromlist, level=level)
434 else:
435 parent = self.determine_parent(m, level=level)
436 self._safe_import_hook(parent.__name__, None, fromlist, level=0)
437 else:
438 # We don't expect anything else from the generator.
439 raise RuntimeError(what)
441 for c in co.co_consts:
442 if isinstance(c, type(co)):
443 self.scan_code(c, m)
445 def load_package(self, fqname, pathname):
446 self.msgin(2, "load_package", fqname, pathname)
447 newname = replacePackageMap.get(fqname)
448 if newname:
449 fqname = newname
450 m = self.add_module(fqname)
451 m.__file__ = pathname
452 m.__path__ = [pathname]
454 # As per comment at top of file, simulate runtime __path__ additions.
455 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
457 fp, buf, stuff = self.find_module("__init__", m.__path__)
458 self.load_module(fqname, fp, buf, stuff)
459 self.msgout(2, "load_package ->", m)
460 return m
462 def add_module(self, fqname):
463 if self.modules.has_key(fqname):
464 return self.modules[fqname]
465 self.modules[fqname] = m = Module(fqname)
466 return m
468 def find_module(self, name, path, parent=None):
469 if parent is not None:
470 # assert path is not None
471 fullname = parent.__name__+'.'+name
472 else:
473 fullname = name
474 if fullname in self.excludes:
475 self.msgout(3, "find_module -> Excluded", fullname)
476 raise ImportError, name
478 if path is None:
479 if name in sys.builtin_module_names:
480 return (None, None, ("", "", imp.C_BUILTIN))
482 path = self.path
483 return imp.find_module(name, path)
485 def report(self):
486 """Print a report to stdout, listing the found modules with their
487 paths, as well as modules that are missing, or seem to be missing.
489 print
490 print " %-25s %s" % ("Name", "File")
491 print " %-25s %s" % ("----", "----")
492 # Print modules found
493 keys = self.modules.keys()
494 keys.sort()
495 for key in keys:
496 m = self.modules[key]
497 if m.__path__:
498 print "P",
499 else:
500 print "m",
501 print "%-25s" % key, m.__file__ or ""
503 # Print missing modules
504 missing, maybe = self.any_missing_maybe()
505 if missing:
506 print
507 print "Missing modules:"
508 for name in missing:
509 mods = self.badmodules[name].keys()
510 mods.sort()
511 print "?", name, "imported from", ', '.join(mods)
512 # Print modules that may be missing, but then again, maybe not...
513 if maybe:
514 print
515 print "Submodules thay appear to be missing, but could also be",
516 print "global names in the parent package:"
517 for name in maybe:
518 mods = self.badmodules[name].keys()
519 mods.sort()
520 print "?", name, "imported from", ', '.join(mods)
522 def any_missing(self):
523 """Return a list of modules that appear to be missing. Use
524 any_missing_maybe() if you want to know which modules are
525 certain to be missing, and which *may* be missing.
527 missing, maybe = self.any_missing_maybe()
528 return missing + maybe
530 def any_missing_maybe(self):
531 """Return two lists, one with modules that are certainly missing
532 and one with modules that *may* be missing. The latter names could
533 either be submodules *or* just global names in the package.
535 The reason it can't always be determined is that it's impossible to
536 tell which names are imported when "from module import *" is done
537 with an extension module, short of actually importing it.
539 missing = []
540 maybe = []
541 for name in self.badmodules:
542 if name in self.excludes:
543 continue
544 i = name.rfind(".")
545 if i < 0:
546 missing.append(name)
547 continue
548 subname = name[i+1:]
549 pkgname = name[:i]
550 pkg = self.modules.get(pkgname)
551 if pkg is not None:
552 if pkgname in self.badmodules[name]:
553 # The package tried to import this module itself and
554 # failed. It's definitely missing.
555 missing.append(name)
556 elif subname in pkg.globalnames:
557 # It's a global in the package: definitely not missing.
558 pass
559 elif pkg.starimports:
560 # It could be missing, but the package did an "import *"
561 # from a non-Python module, so we simply can't be sure.
562 maybe.append(name)
563 else:
564 # It's not a global in the package, the package didn't
565 # do funny star imports, it's very likely to be missing.
566 # The symbol could be inserted into the package from the
567 # outside, but since that's not good style we simply list
568 # it missing.
569 missing.append(name)
570 else:
571 missing.append(name)
572 missing.sort()
573 maybe.sort()
574 return missing, maybe
576 def replace_paths_in_code(self, co):
577 new_filename = original_filename = os.path.normpath(co.co_filename)
578 for f, r in self.replace_paths:
579 if original_filename.startswith(f):
580 new_filename = r + original_filename[len(f):]
581 break
583 if self.debug and original_filename not in self.processed_paths:
584 if new_filename != original_filename:
585 self.msgout(2, "co_filename %r changed to %r" \
586 % (original_filename,new_filename,))
587 else:
588 self.msgout(2, "co_filename %r remains unchanged" \
589 % (original_filename,))
590 self.processed_paths.append(original_filename)
592 consts = list(co.co_consts)
593 for i in range(len(consts)):
594 if isinstance(consts[i], type(co)):
595 consts[i] = self.replace_paths_in_code(consts[i])
597 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
598 co.co_flags, co.co_code, tuple(consts), co.co_names,
599 co.co_varnames, new_filename, co.co_name,
600 co.co_firstlineno, co.co_lnotab,
601 co.co_freevars, co.co_cellvars)
604 def test():
605 # Parse command line
606 import getopt
607 try:
608 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
609 except getopt.error, msg:
610 print msg
611 return
613 # Process options
614 debug = 1
615 domods = 0
616 addpath = []
617 exclude = []
618 for o, a in opts:
619 if o == '-d':
620 debug = debug + 1
621 if o == '-m':
622 domods = 1
623 if o == '-p':
624 addpath = addpath + a.split(os.pathsep)
625 if o == '-q':
626 debug = 0
627 if o == '-x':
628 exclude.append(a)
630 # Provide default arguments
631 if not args:
632 script = "hello.py"
633 else:
634 script = args[0]
636 # Set the path based on sys.path and the script directory
637 path = sys.path[:]
638 path[0] = os.path.dirname(script)
639 path = addpath + path
640 if debug > 1:
641 print "path:"
642 for item in path:
643 print " ", repr(item)
645 # Create the module finder and turn its crank
646 mf = ModuleFinder(path, debug, exclude)
647 for arg in args[1:]:
648 if arg == '-m':
649 domods = 1
650 continue
651 if domods:
652 if arg[-2:] == '.*':
653 mf.import_hook(arg[:-2], None, ["*"])
654 else:
655 mf.import_hook(arg)
656 else:
657 mf.load_file(arg)
658 mf.run_script(script)
659 mf.report()
660 return mf # for -i debugging
663 if __name__ == '__main__':
664 try:
665 mf = test()
666 except KeyboardInterrupt:
667 print "\n[interrupt]"