[Bug #1633678] Improve pattern used for mbox 'From' lines; add a simple test
[pytest.git] / Lib / modulefinder.py
blob25e14827c3c0640289299ac7a8a001c696f575c5
1 """Find modules used by a script, using introspection."""
3 # This module should be kept compatible with Python 2.2, see PEP 291.
5 import dis
6 import imp
7 import marshal
8 import os
9 import sys
10 import new
12 if hasattr(sys.__stdout__, "newlines"):
13 READ_MODE = "U" # universal line endings
14 else:
15 # remain compatible with Python < 2.3
16 READ_MODE = "r"
18 LOAD_CONST = dis.opname.index('LOAD_CONST')
19 IMPORT_NAME = dis.opname.index('IMPORT_NAME')
20 STORE_NAME = dis.opname.index('STORE_NAME')
21 STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
22 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
24 # Modulefinder does a good job at simulating Python's, but it can not
25 # handle __path__ modifications packages make at runtime. Therefore there
26 # is a mechanism whereby you can register extra paths in this map for a
27 # package, and it will be honored.
29 # Note this is a mapping is lists of paths.
30 packagePathMap = {}
32 # A Public interface
33 def AddPackagePath(packagename, path):
34 paths = packagePathMap.get(packagename, [])
35 paths.append(path)
36 packagePathMap[packagename] = paths
38 replacePackageMap = {}
40 # This ReplacePackage mechanism allows modulefinder to work around the
41 # way the _xmlplus package injects itself under the name "xml" into
42 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
43 # before running ModuleFinder.
45 def ReplacePackage(oldname, newname):
46 replacePackageMap[oldname] = newname
49 class Module:
51 def __init__(self, name, file=None, path=None):
52 self.__name__ = name
53 self.__file__ = file
54 self.__path__ = path
55 self.__code__ = None
56 # The set of global names that are assigned to in the module.
57 # This includes those names imported through starimports of
58 # Python modules.
59 self.globalnames = {}
60 # The set of starimports this module did that could not be
61 # resolved, ie. a starimport from a non-Python module.
62 self.starimports = {}
64 def __repr__(self):
65 s = "Module(%r" % (self.__name__,)
66 if self.__file__ is not None:
67 s = s + ", %r" % (self.__file__,)
68 if self.__path__ is not None:
69 s = s + ", %r" % (self.__path__,)
70 s = s + ")"
71 return s
73 class ModuleFinder:
75 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
76 if path is None:
77 path = sys.path
78 self.path = path
79 self.modules = {}
80 self.badmodules = {}
81 self.debug = debug
82 self.indent = 0
83 self.excludes = excludes
84 self.replace_paths = replace_paths
85 self.processed_paths = [] # Used in debugging only
87 def msg(self, level, str, *args):
88 if level <= self.debug:
89 for i in range(self.indent):
90 print " ",
91 print str,
92 for arg in args:
93 print repr(arg),
94 print
96 def msgin(self, *args):
97 level = args[0]
98 if level <= self.debug:
99 self.indent = self.indent + 1
100 self.msg(*args)
102 def msgout(self, *args):
103 level = args[0]
104 if level <= self.debug:
105 self.indent = self.indent - 1
106 self.msg(*args)
108 def run_script(self, pathname):
109 self.msg(2, "run_script", pathname)
110 fp = open(pathname, READ_MODE)
111 stuff = ("", "r", imp.PY_SOURCE)
112 self.load_module('__main__', fp, pathname, stuff)
114 def load_file(self, pathname):
115 dir, name = os.path.split(pathname)
116 name, ext = os.path.splitext(name)
117 fp = open(pathname, READ_MODE)
118 stuff = (ext, "r", imp.PY_SOURCE)
119 self.load_module(name, fp, pathname, stuff)
121 def import_hook(self, name, caller=None, fromlist=None):
122 self.msg(3, "import_hook", name, caller, fromlist)
123 parent = self.determine_parent(caller)
124 q, tail = self.find_head_package(parent, name)
125 m = self.load_tail(q, tail)
126 if not fromlist:
127 return q
128 if m.__path__:
129 self.ensure_fromlist(m, fromlist)
130 return None
132 def determine_parent(self, caller):
133 self.msgin(4, "determine_parent", caller)
134 if not caller:
135 self.msgout(4, "determine_parent -> None")
136 return None
137 pname = caller.__name__
138 if caller.__path__:
139 parent = self.modules[pname]
140 assert caller is parent
141 self.msgout(4, "determine_parent ->", parent)
142 return parent
143 if '.' in pname:
144 i = pname.rfind('.')
145 pname = pname[:i]
146 parent = self.modules[pname]
147 assert parent.__name__ == pname
148 self.msgout(4, "determine_parent ->", parent)
149 return parent
150 self.msgout(4, "determine_parent -> None")
151 return None
153 def find_head_package(self, parent, name):
154 self.msgin(4, "find_head_package", parent, name)
155 if '.' in name:
156 i = name.find('.')
157 head = name[:i]
158 tail = name[i+1:]
159 else:
160 head = name
161 tail = ""
162 if parent:
163 qname = "%s.%s" % (parent.__name__, head)
164 else:
165 qname = head
166 q = self.import_module(head, qname, parent)
167 if q:
168 self.msgout(4, "find_head_package ->", (q, tail))
169 return q, tail
170 if parent:
171 qname = head
172 parent = None
173 q = self.import_module(head, qname, parent)
174 if q:
175 self.msgout(4, "find_head_package ->", (q, tail))
176 return q, tail
177 self.msgout(4, "raise ImportError: No module named", qname)
178 raise ImportError, "No module named " + qname
180 def load_tail(self, q, tail):
181 self.msgin(4, "load_tail", q, tail)
182 m = q
183 while tail:
184 i = tail.find('.')
185 if i < 0: i = len(tail)
186 head, tail = tail[:i], tail[i+1:]
187 mname = "%s.%s" % (m.__name__, head)
188 m = self.import_module(head, mname, m)
189 if not m:
190 self.msgout(4, "raise ImportError: No module named", mname)
191 raise ImportError, "No module named " + mname
192 self.msgout(4, "load_tail ->", m)
193 return m
195 def ensure_fromlist(self, m, fromlist, recursive=0):
196 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
197 for sub in fromlist:
198 if sub == "*":
199 if not recursive:
200 all = self.find_all_submodules(m)
201 if all:
202 self.ensure_fromlist(m, all, 1)
203 elif not hasattr(m, sub):
204 subname = "%s.%s" % (m.__name__, sub)
205 submod = self.import_module(sub, subname, m)
206 if not submod:
207 raise ImportError, "No module named " + subname
209 def find_all_submodules(self, m):
210 if not m.__path__:
211 return
212 modules = {}
213 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
214 # But we must also collect Python extension modules - although
215 # we cannot separate normal dlls from Python extensions.
216 suffixes = []
217 for triple in imp.get_suffixes():
218 suffixes.append(triple[0])
219 for dir in m.__path__:
220 try:
221 names = os.listdir(dir)
222 except os.error:
223 self.msg(2, "can't list directory", dir)
224 continue
225 for name in names:
226 mod = None
227 for suff in suffixes:
228 n = len(suff)
229 if name[-n:] == suff:
230 mod = name[:-n]
231 break
232 if mod and mod != "__init__":
233 modules[mod] = mod
234 return modules.keys()
236 def import_module(self, partname, fqname, parent):
237 self.msgin(3, "import_module", partname, fqname, parent)
238 try:
239 m = self.modules[fqname]
240 except KeyError:
241 pass
242 else:
243 self.msgout(3, "import_module ->", m)
244 return m
245 if self.badmodules.has_key(fqname):
246 self.msgout(3, "import_module -> None")
247 return None
248 if parent and parent.__path__ is None:
249 self.msgout(3, "import_module -> None")
250 return None
251 try:
252 fp, pathname, stuff = self.find_module(partname,
253 parent and parent.__path__, parent)
254 except ImportError:
255 self.msgout(3, "import_module ->", None)
256 return None
257 try:
258 m = self.load_module(fqname, fp, pathname, stuff)
259 finally:
260 if fp: fp.close()
261 if parent:
262 setattr(parent, partname, m)
263 self.msgout(3, "import_module ->", m)
264 return m
266 def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
267 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
268 if type == imp.PKG_DIRECTORY:
269 m = self.load_package(fqname, pathname)
270 self.msgout(2, "load_module ->", m)
271 return m
272 if type == imp.PY_SOURCE:
273 co = compile(fp.read()+'\n', pathname, 'exec')
274 elif type == imp.PY_COMPILED:
275 if fp.read(4) != imp.get_magic():
276 self.msgout(2, "raise ImportError: Bad magic number", pathname)
277 raise ImportError, "Bad magic number in %s" % pathname
278 fp.read(4)
279 co = marshal.load(fp)
280 else:
281 co = None
282 m = self.add_module(fqname)
283 m.__file__ = pathname
284 if co:
285 if self.replace_paths:
286 co = self.replace_paths_in_code(co)
287 m.__code__ = co
288 self.scan_code(co, m)
289 self.msgout(2, "load_module ->", m)
290 return m
292 def _add_badmodule(self, name, caller):
293 if name not in self.badmodules:
294 self.badmodules[name] = {}
295 self.badmodules[name][caller.__name__] = 1
297 def _safe_import_hook(self, name, caller, fromlist):
298 # wrapper for self.import_hook() that won't raise ImportError
299 if name in self.badmodules:
300 self._add_badmodule(name, caller)
301 return
302 try:
303 self.import_hook(name, caller)
304 except ImportError, msg:
305 self.msg(2, "ImportError:", str(msg))
306 self._add_badmodule(name, caller)
307 else:
308 if fromlist:
309 for sub in fromlist:
310 if sub in self.badmodules:
311 self._add_badmodule(sub, caller)
312 continue
313 try:
314 self.import_hook(name, caller, [sub])
315 except ImportError, msg:
316 self.msg(2, "ImportError:", str(msg))
317 fullname = name + "." + sub
318 self._add_badmodule(fullname, caller)
320 def scan_code(self, co, m):
321 code = co.co_code
322 n = len(code)
323 i = 0
324 fromlist = None
325 while i < n:
326 c = code[i]
327 i = i+1
328 op = ord(c)
329 if op >= dis.HAVE_ARGUMENT:
330 oparg = ord(code[i]) + ord(code[i+1])*256
331 i = i+2
332 if op == LOAD_CONST:
333 # An IMPORT_NAME is always preceded by a LOAD_CONST, it's
334 # a tuple of "from" names, or None for a regular import.
335 # The tuple may contain "*" for "from <mod> import *"
336 fromlist = co.co_consts[oparg]
337 elif op == IMPORT_NAME:
338 assert fromlist is None or type(fromlist) is tuple
339 name = co.co_names[oparg]
340 have_star = 0
341 if fromlist is not None:
342 if "*" in fromlist:
343 have_star = 1
344 fromlist = [f for f in fromlist if f != "*"]
345 self._safe_import_hook(name, m, fromlist)
346 if have_star:
347 # We've encountered an "import *". If it is a Python module,
348 # the code has already been parsed and we can suck out the
349 # global names.
350 mm = None
351 if m.__path__:
352 # At this point we don't know whether 'name' is a
353 # submodule of 'm' or a global module. Let's just try
354 # the full name first.
355 mm = self.modules.get(m.__name__ + "." + name)
356 if mm is None:
357 mm = self.modules.get(name)
358 if mm is not None:
359 m.globalnames.update(mm.globalnames)
360 m.starimports.update(mm.starimports)
361 if mm.__code__ is None:
362 m.starimports[name] = 1
363 else:
364 m.starimports[name] = 1
365 elif op in STORE_OPS:
366 # keep track of all global names that are assigned to
367 name = co.co_names[oparg]
368 m.globalnames[name] = 1
369 for c in co.co_consts:
370 if isinstance(c, type(co)):
371 self.scan_code(c, m)
373 def load_package(self, fqname, pathname):
374 self.msgin(2, "load_package", fqname, pathname)
375 newname = replacePackageMap.get(fqname)
376 if newname:
377 fqname = newname
378 m = self.add_module(fqname)
379 m.__file__ = pathname
380 m.__path__ = [pathname]
382 # As per comment at top of file, simulate runtime __path__ additions.
383 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
385 fp, buf, stuff = self.find_module("__init__", m.__path__)
386 self.load_module(fqname, fp, buf, stuff)
387 self.msgout(2, "load_package ->", m)
388 return m
390 def add_module(self, fqname):
391 if self.modules.has_key(fqname):
392 return self.modules[fqname]
393 self.modules[fqname] = m = Module(fqname)
394 return m
396 def find_module(self, name, path, parent=None):
397 if parent is not None:
398 # assert path is not None
399 fullname = parent.__name__+'.'+name
400 else:
401 fullname = name
402 if fullname in self.excludes:
403 self.msgout(3, "find_module -> Excluded", fullname)
404 raise ImportError, name
406 if path is None:
407 if name in sys.builtin_module_names:
408 return (None, None, ("", "", imp.C_BUILTIN))
410 path = self.path
411 return imp.find_module(name, path)
413 def report(self):
414 """Print a report to stdout, listing the found modules with their
415 paths, as well as modules that are missing, or seem to be missing.
417 print
418 print " %-25s %s" % ("Name", "File")
419 print " %-25s %s" % ("----", "----")
420 # Print modules found
421 keys = self.modules.keys()
422 keys.sort()
423 for key in keys:
424 m = self.modules[key]
425 if m.__path__:
426 print "P",
427 else:
428 print "m",
429 print "%-25s" % key, m.__file__ or ""
431 # Print missing modules
432 missing, maybe = self.any_missing_maybe()
433 if missing:
434 print
435 print "Missing modules:"
436 for name in missing:
437 mods = self.badmodules[name].keys()
438 mods.sort()
439 print "?", name, "imported from", ', '.join(mods)
440 # Print modules that may be missing, but then again, maybe not...
441 if maybe:
442 print
443 print "Submodules thay appear to be missing, but could also be",
444 print "global names in the parent package:"
445 for name in maybe:
446 mods = self.badmodules[name].keys()
447 mods.sort()
448 print "?", name, "imported from", ', '.join(mods)
450 def any_missing(self):
451 """Return a list of modules that appear to be missing. Use
452 any_missing_maybe() if you want to know which modules are
453 certain to be missing, and which *may* be missing.
455 missing, maybe = self.any_missing_maybe()
456 return missing + maybe
458 def any_missing_maybe(self):
459 """Return two lists, one with modules that are certainly missing
460 and one with modules that *may* be missing. The latter names could
461 either be submodules *or* just global names in the package.
463 The reason it can't always be determined is that it's impossible to
464 tell which names are imported when "from module import *" is done
465 with an extension module, short of actually importing it.
467 missing = []
468 maybe = []
469 for name in self.badmodules:
470 if name in self.excludes:
471 continue
472 i = name.rfind(".")
473 if i < 0:
474 missing.append(name)
475 continue
476 subname = name[i+1:]
477 pkgname = name[:i]
478 pkg = self.modules.get(pkgname)
479 if pkg is not None:
480 if pkgname in self.badmodules[name]:
481 # The package tried to import this module itself and
482 # failed. It's definitely missing.
483 missing.append(name)
484 elif subname in pkg.globalnames:
485 # It's a global in the package: definitely not missing.
486 pass
487 elif pkg.starimports:
488 # It could be missing, but the package did an "import *"
489 # from a non-Python module, so we simply can't be sure.
490 maybe.append(name)
491 else:
492 # It's not a global in the package, the package didn't
493 # do funny star imports, it's very likely to be missing.
494 # The symbol could be inserted into the package from the
495 # outside, but since that's not good style we simply list
496 # it missing.
497 missing.append(name)
498 else:
499 missing.append(name)
500 missing.sort()
501 maybe.sort()
502 return missing, maybe
504 def replace_paths_in_code(self, co):
505 new_filename = original_filename = os.path.normpath(co.co_filename)
506 for f, r in self.replace_paths:
507 if original_filename.startswith(f):
508 new_filename = r + original_filename[len(f):]
509 break
511 if self.debug and original_filename not in self.processed_paths:
512 if new_filename != original_filename:
513 self.msgout(2, "co_filename %r changed to %r" \
514 % (original_filename,new_filename,))
515 else:
516 self.msgout(2, "co_filename %r remains unchanged" \
517 % (original_filename,))
518 self.processed_paths.append(original_filename)
520 consts = list(co.co_consts)
521 for i in range(len(consts)):
522 if isinstance(consts[i], type(co)):
523 consts[i] = self.replace_paths_in_code(consts[i])
525 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
526 co.co_flags, co.co_code, tuple(consts), co.co_names,
527 co.co_varnames, new_filename, co.co_name,
528 co.co_firstlineno, co.co_lnotab,
529 co.co_freevars, co.co_cellvars)
532 def test():
533 # Parse command line
534 import getopt
535 try:
536 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
537 except getopt.error, msg:
538 print msg
539 return
541 # Process options
542 debug = 1
543 domods = 0
544 addpath = []
545 exclude = []
546 for o, a in opts:
547 if o == '-d':
548 debug = debug + 1
549 if o == '-m':
550 domods = 1
551 if o == '-p':
552 addpath = addpath + a.split(os.pathsep)
553 if o == '-q':
554 debug = 0
555 if o == '-x':
556 exclude.append(a)
558 # Provide default arguments
559 if not args:
560 script = "hello.py"
561 else:
562 script = args[0]
564 # Set the path based on sys.path and the script directory
565 path = sys.path[:]
566 path[0] = os.path.dirname(script)
567 path = addpath + path
568 if debug > 1:
569 print "path:"
570 for item in path:
571 print " ", repr(item)
573 # Create the module finder and turn its crank
574 mf = ModuleFinder(path, debug, exclude)
575 for arg in args[1:]:
576 if arg == '-m':
577 domods = 1
578 continue
579 if domods:
580 if arg[-2:] == '.*':
581 mf.import_hook(arg[:-2], None, ["*"])
582 else:
583 mf.import_hook(arg)
584 else:
585 mf.load_file(arg)
586 mf.run_script(script)
587 mf.report()
588 return mf # for -i debugging
591 if __name__ == '__main__':
592 try:
593 mf = test()
594 except KeyboardInterrupt:
595 print "\n[interrupt]"