1 """Find modules used by a script, using introspection."""
3 from __future__
import generators
14 # XXX Clean up once str8's cstor matches bytes.
15 LOAD_CONST
= bytes([dis
.opname
.index('LOAD_CONST')])
16 IMPORT_NAME
= bytes([dis
.opname
.index('IMPORT_NAME')])
17 STORE_NAME
= bytes([dis
.opname
.index('STORE_NAME')])
18 STORE_GLOBAL
= bytes([dis
.opname
.index('STORE_GLOBAL')])
19 STORE_OPS
= [STORE_NAME
, STORE_GLOBAL
]
20 HAVE_ARGUMENT
= bytes([dis
.HAVE_ARGUMENT
])
22 # Modulefinder does a good job at simulating Python's, but it can not
23 # handle __path__ modifications packages make at runtime. Therefore there
24 # is a mechanism whereby you can register extra paths in this map for a
25 # package, and it will be honored.
27 # Note this is a mapping is lists of paths.
31 def AddPackagePath(packagename
, path
):
32 paths
= packagePathMap
.get(packagename
, [])
34 packagePathMap
[packagename
] = paths
36 replacePackageMap
= {}
38 # This ReplacePackage mechanism allows modulefinder to work around the
39 # way the _xmlplus package injects itself under the name "xml" into
40 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
41 # before running ModuleFinder.
43 def ReplacePackage(oldname
, newname
):
44 replacePackageMap
[oldname
] = newname
49 def __init__(self
, name
, file=None, path
=None):
54 # The set of global names that are assigned to in the module.
55 # This includes those names imported through starimports of
58 # The set of starimports this module did that could not be
59 # resolved, ie. a starimport from a non-Python module.
63 s
= "Module(%r" % (self
.__name
__,)
64 if self
.__file
__ is not None:
65 s
= s
+ ", %r" % (self
.__file
__,)
66 if self
.__path
__ is not None:
67 s
= s
+ ", %r" % (self
.__path
__,)
73 def __init__(self
, path
=None, debug
=0, excludes
=[], replace_paths
=[]):
81 self
.excludes
= excludes
82 self
.replace_paths
= replace_paths
83 self
.processed_paths
= [] # Used in debugging only
85 def msg(self
, level
, str, *args
):
86 if level
<= self
.debug
:
87 for i
in range(self
.indent
):
91 print(repr(arg
), end
=' ')
94 def msgin(self
, *args
):
96 if level
<= self
.debug
:
97 self
.indent
= self
.indent
+ 1
100 def msgout(self
, *args
):
102 if level
<= self
.debug
:
103 self
.indent
= self
.indent
- 1
106 def run_script(self
, pathname
):
107 self
.msg(2, "run_script", pathname
)
108 fp
= open(pathname
, READ_MODE
)
109 stuff
= ("", "r", imp
.PY_SOURCE
)
110 self
.load_module('__main__', fp
, pathname
, stuff
)
112 def load_file(self
, pathname
):
113 dir, name
= os
.path
.split(pathname
)
114 name
, ext
= os
.path
.splitext(name
)
115 fp
= open(pathname
, READ_MODE
)
116 stuff
= (ext
, "r", imp
.PY_SOURCE
)
117 self
.load_module(name
, fp
, pathname
, stuff
)
119 def import_hook(self
, name
, caller
=None, fromlist
=None, level
=-1):
120 self
.msg(3, "import_hook", name
, caller
, fromlist
, level
)
121 parent
= self
.determine_parent(caller
, level
=level
)
122 q
, tail
= self
.find_head_package(parent
, name
)
123 m
= self
.load_tail(q
, tail
)
127 self
.ensure_fromlist(m
, fromlist
)
130 def determine_parent(self
, caller
, level
=-1):
131 self
.msgin(4, "determine_parent", caller
, level
)
132 if not caller
or level
== 0:
133 self
.msgout(4, "determine_parent -> None")
135 pname
= caller
.__name
__
136 if level
>= 1: # relative import
140 parent
= self
.modules
[pname
]
141 assert parent
is caller
142 self
.msgout(4, "determine_parent ->", parent
)
144 if pname
.count(".") < level
:
145 raise ImportError("relative importpath too deep")
146 pname
= ".".join(pname
.split(".")[:-level
])
147 parent
= self
.modules
[pname
]
148 self
.msgout(4, "determine_parent ->", parent
)
151 parent
= self
.modules
[pname
]
152 assert caller
is parent
153 self
.msgout(4, "determine_parent ->", parent
)
158 parent
= self
.modules
[pname
]
159 assert parent
.__name
__ == pname
160 self
.msgout(4, "determine_parent ->", parent
)
162 self
.msgout(4, "determine_parent -> None")
165 def find_head_package(self
, parent
, name
):
166 self
.msgin(4, "find_head_package", parent
, name
)
175 qname
= "%s.%s" % (parent
.__name
__, head
)
178 q
= self
.import_module(head
, qname
, parent
)
180 self
.msgout(4, "find_head_package ->", (q
, tail
))
185 q
= self
.import_module(head
, qname
, parent
)
187 self
.msgout(4, "find_head_package ->", (q
, tail
))
189 self
.msgout(4, "raise ImportError: No module named", qname
)
190 raise ImportError("No module named " + qname
)
192 def load_tail(self
, q
, tail
):
193 self
.msgin(4, "load_tail", q
, tail
)
197 if i
< 0: i
= len(tail
)
198 head
, tail
= tail
[:i
], tail
[i
+1:]
199 mname
= "%s.%s" % (m
.__name
__, head
)
200 m
= self
.import_module(head
, mname
, m
)
202 self
.msgout(4, "raise ImportError: No module named", mname
)
203 raise ImportError("No module named " + mname
)
204 self
.msgout(4, "load_tail ->", m
)
207 def ensure_fromlist(self
, m
, fromlist
, recursive
=0):
208 self
.msg(4, "ensure_fromlist", m
, fromlist
, recursive
)
212 all
= self
.find_all_submodules(m
)
214 self
.ensure_fromlist(m
, all
, 1)
215 elif not hasattr(m
, sub
):
216 subname
= "%s.%s" % (m
.__name
__, sub
)
217 submod
= self
.import_module(sub
, subname
, m
)
219 raise ImportError("No module named " + subname
)
221 def find_all_submodules(self
, m
):
225 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
226 # But we must also collect Python extension modules - although
227 # we cannot separate normal dlls from Python extensions.
229 for triple
in imp
.get_suffixes():
230 suffixes
.append(triple
[0])
231 for dir in m
.__path
__:
233 names
= os
.listdir(dir)
235 self
.msg(2, "can't list directory", dir)
239 for suff
in suffixes
:
241 if name
[-n
:] == suff
:
244 if mod
and mod
!= "__init__":
246 return modules
.keys()
248 def import_module(self
, partname
, fqname
, parent
):
249 self
.msgin(3, "import_module", partname
, fqname
, parent
)
251 m
= self
.modules
[fqname
]
255 self
.msgout(3, "import_module ->", m
)
257 if fqname
in self
.badmodules
:
258 self
.msgout(3, "import_module -> None")
260 if parent
and parent
.__path
__ is None:
261 self
.msgout(3, "import_module -> None")
264 fp
, pathname
, stuff
= self
.find_module(partname
,
265 parent
and parent
.__path
__, parent
)
267 self
.msgout(3, "import_module ->", None)
270 m
= self
.load_module(fqname
, fp
, pathname
, stuff
)
274 setattr(parent
, partname
, m
)
275 self
.msgout(3, "import_module ->", m
)
278 def load_module(self
, fqname
, fp
, pathname
, file_info
):
279 suffix
, mode
, type = file_info
280 self
.msgin(2, "load_module", fqname
, fp
and "fp", pathname
)
281 if type == imp
.PKG_DIRECTORY
:
282 m
= self
.load_package(fqname
, pathname
)
283 self
.msgout(2, "load_module ->", m
)
285 if type == imp
.PY_SOURCE
:
286 co
= compile(fp
.read()+'\n', pathname
, 'exec')
287 elif type == imp
.PY_COMPILED
:
288 if fp
.read(4) != imp
.get_magic():
289 self
.msgout(2, "raise ImportError: Bad magic number", pathname
)
290 raise ImportError("Bad magic number in %s" % pathname
)
292 co
= marshal
.load(fp
)
295 m
= self
.add_module(fqname
)
296 m
.__file
__ = pathname
298 if self
.replace_paths
:
299 co
= self
.replace_paths_in_code(co
)
301 self
.scan_code(co
, m
)
302 self
.msgout(2, "load_module ->", m
)
305 def _add_badmodule(self
, name
, caller
):
306 if name
not in self
.badmodules
:
307 self
.badmodules
[name
] = {}
309 self
.badmodules
[name
][caller
.__name
__] = 1
311 self
.badmodules
[name
]["-"] = 1
313 def _safe_import_hook(self
, name
, caller
, fromlist
, level
=-1):
314 # wrapper for self.import_hook() that won't raise ImportError
315 if name
in self
.badmodules
:
316 self
._add
_badmodule
(name
, caller
)
319 self
.import_hook(name
, caller
, level
=level
)
320 except ImportError as msg
:
321 self
.msg(2, "ImportError:", str(msg
))
322 self
._add
_badmodule
(name
, caller
)
326 if sub
in self
.badmodules
:
327 self
._add
_badmodule
(sub
, caller
)
330 self
.import_hook(name
, caller
, [sub
], level
=level
)
331 except ImportError as msg
:
332 self
.msg(2, "ImportError:", str(msg
))
333 fullname
= name
+ "." + sub
334 self
._add
_badmodule
(fullname
, caller
)
336 def scan_opcodes(self
, co
,
337 unpack
= struct
.unpack
):
338 # Scan the code, and yield 'interesting' opcode combinations
339 # Version for Python 2.4 and older
342 consts
= co
.co_consts
346 oparg
, = unpack('<H', code
[1:3])
347 yield "store", (names
[oparg
],)
350 if c
== LOAD_CONST
and code
[3] == IMPORT_NAME
:
351 oparg_1
, oparg_2
= unpack('<xHxH', code
[:6])
352 yield "import", (consts
[oparg_1
], names
[oparg_2
])
355 if c
>= HAVE_ARGUMENT
:
360 def scan_opcodes_25(self
, co
,
361 unpack
= struct
.unpack
):
362 # Scan the code, and yield 'interesting' opcode combinations
363 # Python 2.5 version (has absolute and relative imports)
366 consts
= co
.co_consts
367 LOAD_LOAD_AND_IMPORT
= LOAD_CONST
+ LOAD_CONST
+ IMPORT_NAME
371 oparg
, = unpack('<H', code
[1:3])
372 yield "store", (names
[oparg
],)
375 if code
[:9:3] == LOAD_LOAD_AND_IMPORT
:
376 oparg_1
, oparg_2
, oparg_3
= unpack('<xHxHxH', code
[:9])
377 level
= consts
[oparg_1
]
378 if level
== 0: # absolute import
379 yield "absolute_import", (consts
[oparg_2
], names
[oparg_3
])
380 else: # relative import
381 yield "relative_import", (level
, consts
[oparg_2
], names
[oparg_3
])
384 if c
>= HAVE_ARGUMENT
:
389 def scan_code(self
, co
, m
):
391 if sys
.version_info
>= (2, 5):
392 scanner
= self
.scan_opcodes_25
394 scanner
= self
.scan_opcodes
395 for what
, args
in scanner(co
):
398 m
.globalnames
[name
] = 1
399 elif what
== "absolute_import":
400 fromlist
, name
= args
402 if fromlist
is not None:
405 fromlist
= [f
for f
in fromlist
if f
!= "*"]
406 self
._safe
_import
_hook
(name
, m
, fromlist
, level
=0)
408 # We've encountered an "import *". If it is a Python module,
409 # the code has already been parsed and we can suck out the
413 # At this point we don't know whether 'name' is a
414 # submodule of 'm' or a global module. Let's just try
415 # the full name first.
416 mm
= self
.modules
.get(m
.__name
__ + "." + name
)
418 mm
= self
.modules
.get(name
)
420 m
.globalnames
.update(mm
.globalnames
)
421 m
.starimports
.update(mm
.starimports
)
422 if mm
.__code
__ is None:
423 m
.starimports
[name
] = 1
425 m
.starimports
[name
] = 1
426 elif what
== "relative_import":
427 level
, fromlist
, name
= args
429 self
._safe
_import
_hook
(name
, m
, fromlist
, level
=level
)
431 parent
= self
.determine_parent(m
, level
=level
)
432 self
._safe
_import
_hook
(parent
.__name
__, None, fromlist
, level
=0)
434 # We don't expect anything else from the generator.
435 raise RuntimeError(what
)
437 for c
in co
.co_consts
:
438 if isinstance(c
, type(co
)):
441 def load_package(self
, fqname
, pathname
):
442 self
.msgin(2, "load_package", fqname
, pathname
)
443 newname
= replacePackageMap
.get(fqname
)
446 m
= self
.add_module(fqname
)
447 m
.__file
__ = pathname
448 m
.__path
__ = [pathname
]
450 # As per comment at top of file, simulate runtime __path__ additions.
451 m
.__path
__ = m
.__path
__ + packagePathMap
.get(fqname
, [])
453 fp
, buf
, stuff
= self
.find_module("__init__", m
.__path
__)
454 self
.load_module(fqname
, fp
, buf
, stuff
)
455 self
.msgout(2, "load_package ->", m
)
458 def add_module(self
, fqname
):
459 if fqname
in self
.modules
:
460 return self
.modules
[fqname
]
461 self
.modules
[fqname
] = m
= Module(fqname
)
464 def find_module(self
, name
, path
, parent
=None):
465 if parent
is not None:
466 # assert path is not None
467 fullname
= parent
.__name
__+'.'+name
470 if fullname
in self
.excludes
:
471 self
.msgout(3, "find_module -> Excluded", fullname
)
472 raise ImportError(name
)
475 if name
in sys
.builtin_module_names
:
476 return (None, None, ("", "", imp
.C_BUILTIN
))
479 return imp
.find_module(name
, path
)
482 """Print a report to stdout, listing the found modules with their
483 paths, as well as modules that are missing, or seem to be missing.
486 print(" %-25s %s" % ("Name", "File"))
487 print(" %-25s %s" % ("----", "----"))
488 # Print modules found
489 keys
= sorted(self
.modules
.keys())
491 m
= self
.modules
[key
]
496 print("%-25s" % key
, m
.__file
__ or "")
498 # Print missing modules
499 missing
, maybe
= self
.any_missing_maybe()
502 print("Missing modules:")
504 mods
= sorted(self
.badmodules
[name
].keys())
505 print("?", name
, "imported from", ', '.join(mods
))
506 # Print modules that may be missing, but then again, maybe not...
509 print("Submodules thay appear to be missing, but could also be", end
=' ')
510 print("global names in the parent package:")
512 mods
= sorted(self
.badmodules
[name
].keys())
513 print("?", name
, "imported from", ', '.join(mods
))
515 def any_missing(self
):
516 """Return a list of modules that appear to be missing. Use
517 any_missing_maybe() if you want to know which modules are
518 certain to be missing, and which *may* be missing.
520 missing
, maybe
= self
.any_missing_maybe()
521 return missing
+ maybe
523 def any_missing_maybe(self
):
524 """Return two lists, one with modules that are certainly missing
525 and one with modules that *may* be missing. The latter names could
526 either be submodules *or* just global names in the package.
528 The reason it can't always be determined is that it's impossible to
529 tell which names are imported when "from module import *" is done
530 with an extension module, short of actually importing it.
534 for name
in self
.badmodules
:
535 if name
in self
.excludes
:
543 pkg
= self
.modules
.get(pkgname
)
545 if pkgname
in self
.badmodules
[name
]:
546 # The package tried to import this module itself and
547 # failed. It's definitely missing.
549 elif subname
in pkg
.globalnames
:
550 # It's a global in the package: definitely not missing.
552 elif pkg
.starimports
:
553 # It could be missing, but the package did an "import *"
554 # from a non-Python module, so we simply can't be sure.
557 # It's not a global in the package, the package didn't
558 # do funny star imports, it's very likely to be missing.
559 # The symbol could be inserted into the package from the
560 # outside, but since that's not good style we simply list
567 return missing
, maybe
569 def replace_paths_in_code(self
, co
):
570 new_filename
= original_filename
= os
.path
.normpath(co
.co_filename
)
571 for f
, r
in self
.replace_paths
:
572 if original_filename
.startswith(f
):
573 new_filename
= r
+ original_filename
[len(f
):]
576 if self
.debug
and original_filename
not in self
.processed_paths
:
577 if new_filename
!= original_filename
:
578 self
.msgout(2, "co_filename %r changed to %r" \
579 % (original_filename
,new_filename
,))
581 self
.msgout(2, "co_filename %r remains unchanged" \
582 % (original_filename
,))
583 self
.processed_paths
.append(original_filename
)
585 consts
= list(co
.co_consts
)
586 for i
in range(len(consts
)):
587 if isinstance(consts
[i
], type(co
)):
588 consts
[i
] = self
.replace_paths_in_code(consts
[i
])
590 return types
.CodeType(co
.co_argcount
, co
.co_nlocals
, co
.co_stacksize
,
591 co
.co_flags
, co
.co_code
, tuple(consts
), co
.co_names
,
592 co
.co_varnames
, new_filename
, co
.co_name
,
593 co
.co_firstlineno
, co
.co_lnotab
,
594 co
.co_freevars
, co
.co_cellvars
)
601 opts
, args
= getopt
.getopt(sys
.argv
[1:], "dmp:qx:")
602 except getopt
.error
as msg
:
617 addpath
= addpath
+ a
.split(os
.pathsep
)
623 # Provide default arguments
629 # Set the path based on sys.path and the script directory
631 path
[0] = os
.path
.dirname(script
)
632 path
= addpath
+ path
636 print(" ", repr(item
))
638 # Create the module finder and turn its crank
639 mf
= ModuleFinder(path
, debug
, exclude
)
646 mf
.import_hook(arg
[:-2], None, ["*"])
651 mf
.run_script(script
)
653 return mf
# for -i debugging
656 if __name__
== '__main__':
659 except KeyboardInterrupt:
660 print("\n[interrupt]")