update pydoc topics
[python/dscho.git] / Lib / modulefinder.py
blob478c74912d2a38349cd8b607d3a320aa43ab31be
1 """Find modules used by a script, using introspection."""
3 from __future__ import generators
4 import dis
5 import imp
6 import marshal
7 import os
8 import sys
9 import types
10 import struct
12 READ_MODE = "rU"
14 # XXX Clean up once str8's cstor matches bytes.
15 LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')])
16 IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')])
17 STORE_NAME = bytes([dis.opname.index('STORE_NAME')])
18 STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')])
19 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
20 HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
22 # Modulefinder does a good job at simulating Python's, but it can not
23 # handle __path__ modifications packages make at runtime. Therefore there
24 # is a mechanism whereby you can register extra paths in this map for a
25 # package, and it will be honored.
27 # Note this is a mapping is lists of paths.
28 packagePathMap = {}
30 # A Public interface
31 def AddPackagePath(packagename, path):
32 paths = packagePathMap.get(packagename, [])
33 paths.append(path)
34 packagePathMap[packagename] = paths
36 replacePackageMap = {}
38 # This ReplacePackage mechanism allows modulefinder to work around the
39 # way the _xmlplus package injects itself under the name "xml" into
40 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
41 # before running ModuleFinder.
43 def ReplacePackage(oldname, newname):
44 replacePackageMap[oldname] = newname
47 class Module:
49 def __init__(self, name, file=None, path=None):
50 self.__name__ = name
51 self.__file__ = file
52 self.__path__ = path
53 self.__code__ = None
54 # The set of global names that are assigned to in the module.
55 # This includes those names imported through starimports of
56 # Python modules.
57 self.globalnames = {}
58 # The set of starimports this module did that could not be
59 # resolved, ie. a starimport from a non-Python module.
60 self.starimports = {}
62 def __repr__(self):
63 s = "Module(%r" % (self.__name__,)
64 if self.__file__ is not None:
65 s = s + ", %r" % (self.__file__,)
66 if self.__path__ is not None:
67 s = s + ", %r" % (self.__path__,)
68 s = s + ")"
69 return s
71 class ModuleFinder:
73 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
74 if path is None:
75 path = sys.path
76 self.path = path
77 self.modules = {}
78 self.badmodules = {}
79 self.debug = debug
80 self.indent = 0
81 self.excludes = excludes
82 self.replace_paths = replace_paths
83 self.processed_paths = [] # Used in debugging only
85 def msg(self, level, str, *args):
86 if level <= self.debug:
87 for i in range(self.indent):
88 print(" ", end=' ')
89 print(str, end=' ')
90 for arg in args:
91 print(repr(arg), end=' ')
92 print()
94 def msgin(self, *args):
95 level = args[0]
96 if level <= self.debug:
97 self.indent = self.indent + 1
98 self.msg(*args)
100 def msgout(self, *args):
101 level = args[0]
102 if level <= self.debug:
103 self.indent = self.indent - 1
104 self.msg(*args)
106 def run_script(self, pathname):
107 self.msg(2, "run_script", pathname)
108 fp = open(pathname, READ_MODE)
109 stuff = ("", "r", imp.PY_SOURCE)
110 self.load_module('__main__', fp, pathname, stuff)
112 def load_file(self, pathname):
113 dir, name = os.path.split(pathname)
114 name, ext = os.path.splitext(name)
115 fp = open(pathname, READ_MODE)
116 stuff = (ext, "r", imp.PY_SOURCE)
117 self.load_module(name, fp, pathname, stuff)
119 def import_hook(self, name, caller=None, fromlist=None, level=-1):
120 self.msg(3, "import_hook", name, caller, fromlist, level)
121 parent = self.determine_parent(caller, level=level)
122 q, tail = self.find_head_package(parent, name)
123 m = self.load_tail(q, tail)
124 if not fromlist:
125 return q
126 if m.__path__:
127 self.ensure_fromlist(m, fromlist)
128 return None
130 def determine_parent(self, caller, level=-1):
131 self.msgin(4, "determine_parent", caller, level)
132 if not caller or level == 0:
133 self.msgout(4, "determine_parent -> None")
134 return None
135 pname = caller.__name__
136 if level >= 1: # relative import
137 if caller.__path__:
138 level -= 1
139 if level == 0:
140 parent = self.modules[pname]
141 assert parent is caller
142 self.msgout(4, "determine_parent ->", parent)
143 return parent
144 if pname.count(".") < level:
145 raise ImportError("relative importpath too deep")
146 pname = ".".join(pname.split(".")[:-level])
147 parent = self.modules[pname]
148 self.msgout(4, "determine_parent ->", parent)
149 return parent
150 if caller.__path__:
151 parent = self.modules[pname]
152 assert caller is parent
153 self.msgout(4, "determine_parent ->", parent)
154 return parent
155 if '.' in pname:
156 i = pname.rfind('.')
157 pname = pname[:i]
158 parent = self.modules[pname]
159 assert parent.__name__ == pname
160 self.msgout(4, "determine_parent ->", parent)
161 return parent
162 self.msgout(4, "determine_parent -> None")
163 return None
165 def find_head_package(self, parent, name):
166 self.msgin(4, "find_head_package", parent, name)
167 if '.' in name:
168 i = name.find('.')
169 head = name[:i]
170 tail = name[i+1:]
171 else:
172 head = name
173 tail = ""
174 if parent:
175 qname = "%s.%s" % (parent.__name__, head)
176 else:
177 qname = head
178 q = self.import_module(head, qname, parent)
179 if q:
180 self.msgout(4, "find_head_package ->", (q, tail))
181 return q, tail
182 if parent:
183 qname = head
184 parent = None
185 q = self.import_module(head, qname, parent)
186 if q:
187 self.msgout(4, "find_head_package ->", (q, tail))
188 return q, tail
189 self.msgout(4, "raise ImportError: No module named", qname)
190 raise ImportError("No module named " + qname)
192 def load_tail(self, q, tail):
193 self.msgin(4, "load_tail", q, tail)
194 m = q
195 while tail:
196 i = tail.find('.')
197 if i < 0: i = len(tail)
198 head, tail = tail[:i], tail[i+1:]
199 mname = "%s.%s" % (m.__name__, head)
200 m = self.import_module(head, mname, m)
201 if not m:
202 self.msgout(4, "raise ImportError: No module named", mname)
203 raise ImportError("No module named " + mname)
204 self.msgout(4, "load_tail ->", m)
205 return m
207 def ensure_fromlist(self, m, fromlist, recursive=0):
208 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
209 for sub in fromlist:
210 if sub == "*":
211 if not recursive:
212 all = self.find_all_submodules(m)
213 if all:
214 self.ensure_fromlist(m, all, 1)
215 elif not hasattr(m, sub):
216 subname = "%s.%s" % (m.__name__, sub)
217 submod = self.import_module(sub, subname, m)
218 if not submod:
219 raise ImportError("No module named " + subname)
221 def find_all_submodules(self, m):
222 if not m.__path__:
223 return
224 modules = {}
225 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
226 # But we must also collect Python extension modules - although
227 # we cannot separate normal dlls from Python extensions.
228 suffixes = []
229 for triple in imp.get_suffixes():
230 suffixes.append(triple[0])
231 for dir in m.__path__:
232 try:
233 names = os.listdir(dir)
234 except os.error:
235 self.msg(2, "can't list directory", dir)
236 continue
237 for name in names:
238 mod = None
239 for suff in suffixes:
240 n = len(suff)
241 if name[-n:] == suff:
242 mod = name[:-n]
243 break
244 if mod and mod != "__init__":
245 modules[mod] = mod
246 return modules.keys()
248 def import_module(self, partname, fqname, parent):
249 self.msgin(3, "import_module", partname, fqname, parent)
250 try:
251 m = self.modules[fqname]
252 except KeyError:
253 pass
254 else:
255 self.msgout(3, "import_module ->", m)
256 return m
257 if fqname in self.badmodules:
258 self.msgout(3, "import_module -> None")
259 return None
260 if parent and parent.__path__ is None:
261 self.msgout(3, "import_module -> None")
262 return None
263 try:
264 fp, pathname, stuff = self.find_module(partname,
265 parent and parent.__path__, parent)
266 except ImportError:
267 self.msgout(3, "import_module ->", None)
268 return None
269 try:
270 m = self.load_module(fqname, fp, pathname, stuff)
271 finally:
272 if fp: fp.close()
273 if parent:
274 setattr(parent, partname, m)
275 self.msgout(3, "import_module ->", m)
276 return m
278 def load_module(self, fqname, fp, pathname, file_info):
279 suffix, mode, type = file_info
280 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
281 if type == imp.PKG_DIRECTORY:
282 m = self.load_package(fqname, pathname)
283 self.msgout(2, "load_module ->", m)
284 return m
285 if type == imp.PY_SOURCE:
286 co = compile(fp.read()+'\n', pathname, 'exec')
287 elif type == imp.PY_COMPILED:
288 if fp.read(4) != imp.get_magic():
289 self.msgout(2, "raise ImportError: Bad magic number", pathname)
290 raise ImportError("Bad magic number in %s" % pathname)
291 fp.read(4)
292 co = marshal.load(fp)
293 else:
294 co = None
295 m = self.add_module(fqname)
296 m.__file__ = pathname
297 if co:
298 if self.replace_paths:
299 co = self.replace_paths_in_code(co)
300 m.__code__ = co
301 self.scan_code(co, m)
302 self.msgout(2, "load_module ->", m)
303 return m
305 def _add_badmodule(self, name, caller):
306 if name not in self.badmodules:
307 self.badmodules[name] = {}
308 if caller:
309 self.badmodules[name][caller.__name__] = 1
310 else:
311 self.badmodules[name]["-"] = 1
313 def _safe_import_hook(self, name, caller, fromlist, level=-1):
314 # wrapper for self.import_hook() that won't raise ImportError
315 if name in self.badmodules:
316 self._add_badmodule(name, caller)
317 return
318 try:
319 self.import_hook(name, caller, level=level)
320 except ImportError as msg:
321 self.msg(2, "ImportError:", str(msg))
322 self._add_badmodule(name, caller)
323 else:
324 if fromlist:
325 for sub in fromlist:
326 if sub in self.badmodules:
327 self._add_badmodule(sub, caller)
328 continue
329 try:
330 self.import_hook(name, caller, [sub], level=level)
331 except ImportError as msg:
332 self.msg(2, "ImportError:", str(msg))
333 fullname = name + "." + sub
334 self._add_badmodule(fullname, caller)
336 def scan_opcodes(self, co,
337 unpack = struct.unpack):
338 # Scan the code, and yield 'interesting' opcode combinations
339 # Version for Python 2.4 and older
340 code = co.co_code
341 names = co.co_names
342 consts = co.co_consts
343 while code:
344 c = code[0]
345 if c in STORE_OPS:
346 oparg, = unpack('<H', code[1:3])
347 yield "store", (names[oparg],)
348 code = code[3:]
349 continue
350 if c == LOAD_CONST and code[3] == IMPORT_NAME:
351 oparg_1, oparg_2 = unpack('<xHxH', code[:6])
352 yield "import", (consts[oparg_1], names[oparg_2])
353 code = code[6:]
354 continue
355 if c >= HAVE_ARGUMENT:
356 code = code[3:]
357 else:
358 code = code[1:]
360 def scan_opcodes_25(self, co,
361 unpack = struct.unpack):
362 # Scan the code, and yield 'interesting' opcode combinations
363 # Python 2.5 version (has absolute and relative imports)
364 code = co.co_code
365 names = co.co_names
366 consts = co.co_consts
367 LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
368 while code:
369 c = bytes([code[0]])
370 if c in STORE_OPS:
371 oparg, = unpack('<H', code[1:3])
372 yield "store", (names[oparg],)
373 code = code[3:]
374 continue
375 if code[:9:3] == LOAD_LOAD_AND_IMPORT:
376 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
377 level = consts[oparg_1]
378 if level == 0: # absolute import
379 yield "absolute_import", (consts[oparg_2], names[oparg_3])
380 else: # relative import
381 yield "relative_import", (level, consts[oparg_2], names[oparg_3])
382 code = code[9:]
383 continue
384 if c >= HAVE_ARGUMENT:
385 code = code[3:]
386 else:
387 code = code[1:]
389 def scan_code(self, co, m):
390 code = co.co_code
391 if sys.version_info >= (2, 5):
392 scanner = self.scan_opcodes_25
393 else:
394 scanner = self.scan_opcodes
395 for what, args in scanner(co):
396 if what == "store":
397 name, = args
398 m.globalnames[name] = 1
399 elif what == "absolute_import":
400 fromlist, name = args
401 have_star = 0
402 if fromlist is not None:
403 if "*" in fromlist:
404 have_star = 1
405 fromlist = [f for f in fromlist if f != "*"]
406 self._safe_import_hook(name, m, fromlist, level=0)
407 if have_star:
408 # We've encountered an "import *". If it is a Python module,
409 # the code has already been parsed and we can suck out the
410 # global names.
411 mm = None
412 if m.__path__:
413 # At this point we don't know whether 'name' is a
414 # submodule of 'm' or a global module. Let's just try
415 # the full name first.
416 mm = self.modules.get(m.__name__ + "." + name)
417 if mm is None:
418 mm = self.modules.get(name)
419 if mm is not None:
420 m.globalnames.update(mm.globalnames)
421 m.starimports.update(mm.starimports)
422 if mm.__code__ is None:
423 m.starimports[name] = 1
424 else:
425 m.starimports[name] = 1
426 elif what == "relative_import":
427 level, fromlist, name = args
428 if name:
429 self._safe_import_hook(name, m, fromlist, level=level)
430 else:
431 parent = self.determine_parent(m, level=level)
432 self._safe_import_hook(parent.__name__, None, fromlist, level=0)
433 else:
434 # We don't expect anything else from the generator.
435 raise RuntimeError(what)
437 for c in co.co_consts:
438 if isinstance(c, type(co)):
439 self.scan_code(c, m)
441 def load_package(self, fqname, pathname):
442 self.msgin(2, "load_package", fqname, pathname)
443 newname = replacePackageMap.get(fqname)
444 if newname:
445 fqname = newname
446 m = self.add_module(fqname)
447 m.__file__ = pathname
448 m.__path__ = [pathname]
450 # As per comment at top of file, simulate runtime __path__ additions.
451 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
453 fp, buf, stuff = self.find_module("__init__", m.__path__)
454 self.load_module(fqname, fp, buf, stuff)
455 self.msgout(2, "load_package ->", m)
456 return m
458 def add_module(self, fqname):
459 if fqname in self.modules:
460 return self.modules[fqname]
461 self.modules[fqname] = m = Module(fqname)
462 return m
464 def find_module(self, name, path, parent=None):
465 if parent is not None:
466 # assert path is not None
467 fullname = parent.__name__+'.'+name
468 else:
469 fullname = name
470 if fullname in self.excludes:
471 self.msgout(3, "find_module -> Excluded", fullname)
472 raise ImportError(name)
474 if path is None:
475 if name in sys.builtin_module_names:
476 return (None, None, ("", "", imp.C_BUILTIN))
478 path = self.path
479 return imp.find_module(name, path)
481 def report(self):
482 """Print a report to stdout, listing the found modules with their
483 paths, as well as modules that are missing, or seem to be missing.
485 print()
486 print(" %-25s %s" % ("Name", "File"))
487 print(" %-25s %s" % ("----", "----"))
488 # Print modules found
489 keys = sorted(self.modules.keys())
490 for key in keys:
491 m = self.modules[key]
492 if m.__path__:
493 print("P", end=' ')
494 else:
495 print("m", end=' ')
496 print("%-25s" % key, m.__file__ or "")
498 # Print missing modules
499 missing, maybe = self.any_missing_maybe()
500 if missing:
501 print()
502 print("Missing modules:")
503 for name in missing:
504 mods = sorted(self.badmodules[name].keys())
505 print("?", name, "imported from", ', '.join(mods))
506 # Print modules that may be missing, but then again, maybe not...
507 if maybe:
508 print()
509 print("Submodules thay appear to be missing, but could also be", end=' ')
510 print("global names in the parent package:")
511 for name in maybe:
512 mods = sorted(self.badmodules[name].keys())
513 print("?", name, "imported from", ', '.join(mods))
515 def any_missing(self):
516 """Return a list of modules that appear to be missing. Use
517 any_missing_maybe() if you want to know which modules are
518 certain to be missing, and which *may* be missing.
520 missing, maybe = self.any_missing_maybe()
521 return missing + maybe
523 def any_missing_maybe(self):
524 """Return two lists, one with modules that are certainly missing
525 and one with modules that *may* be missing. The latter names could
526 either be submodules *or* just global names in the package.
528 The reason it can't always be determined is that it's impossible to
529 tell which names are imported when "from module import *" is done
530 with an extension module, short of actually importing it.
532 missing = []
533 maybe = []
534 for name in self.badmodules:
535 if name in self.excludes:
536 continue
537 i = name.rfind(".")
538 if i < 0:
539 missing.append(name)
540 continue
541 subname = name[i+1:]
542 pkgname = name[:i]
543 pkg = self.modules.get(pkgname)
544 if pkg is not None:
545 if pkgname in self.badmodules[name]:
546 # The package tried to import this module itself and
547 # failed. It's definitely missing.
548 missing.append(name)
549 elif subname in pkg.globalnames:
550 # It's a global in the package: definitely not missing.
551 pass
552 elif pkg.starimports:
553 # It could be missing, but the package did an "import *"
554 # from a non-Python module, so we simply can't be sure.
555 maybe.append(name)
556 else:
557 # It's not a global in the package, the package didn't
558 # do funny star imports, it's very likely to be missing.
559 # The symbol could be inserted into the package from the
560 # outside, but since that's not good style we simply list
561 # it missing.
562 missing.append(name)
563 else:
564 missing.append(name)
565 missing.sort()
566 maybe.sort()
567 return missing, maybe
569 def replace_paths_in_code(self, co):
570 new_filename = original_filename = os.path.normpath(co.co_filename)
571 for f, r in self.replace_paths:
572 if original_filename.startswith(f):
573 new_filename = r + original_filename[len(f):]
574 break
576 if self.debug and original_filename not in self.processed_paths:
577 if new_filename != original_filename:
578 self.msgout(2, "co_filename %r changed to %r" \
579 % (original_filename,new_filename,))
580 else:
581 self.msgout(2, "co_filename %r remains unchanged" \
582 % (original_filename,))
583 self.processed_paths.append(original_filename)
585 consts = list(co.co_consts)
586 for i in range(len(consts)):
587 if isinstance(consts[i], type(co)):
588 consts[i] = self.replace_paths_in_code(consts[i])
590 return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
591 co.co_flags, co.co_code, tuple(consts), co.co_names,
592 co.co_varnames, new_filename, co.co_name,
593 co.co_firstlineno, co.co_lnotab,
594 co.co_freevars, co.co_cellvars)
597 def test():
598 # Parse command line
599 import getopt
600 try:
601 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
602 except getopt.error as msg:
603 print(msg)
604 return
606 # Process options
607 debug = 1
608 domods = 0
609 addpath = []
610 exclude = []
611 for o, a in opts:
612 if o == '-d':
613 debug = debug + 1
614 if o == '-m':
615 domods = 1
616 if o == '-p':
617 addpath = addpath + a.split(os.pathsep)
618 if o == '-q':
619 debug = 0
620 if o == '-x':
621 exclude.append(a)
623 # Provide default arguments
624 if not args:
625 script = "hello.py"
626 else:
627 script = args[0]
629 # Set the path based on sys.path and the script directory
630 path = sys.path[:]
631 path[0] = os.path.dirname(script)
632 path = addpath + path
633 if debug > 1:
634 print("path:")
635 for item in path:
636 print(" ", repr(item))
638 # Create the module finder and turn its crank
639 mf = ModuleFinder(path, debug, exclude)
640 for arg in args[1:]:
641 if arg == '-m':
642 domods = 1
643 continue
644 if domods:
645 if arg[-2:] == '.*':
646 mf.import_hook(arg[:-2], None, ["*"])
647 else:
648 mf.import_hook(arg)
649 else:
650 mf.load_file(arg)
651 mf.run_script(script)
652 mf.report()
653 return mf # for -i debugging
656 if __name__ == '__main__':
657 try:
658 mf = test()
659 except KeyboardInterrupt:
660 print("\n[interrupt]")