Merged revisions 78818 via svnmerge from
[python/dscho.git] / Lib / pkgutil.py
blob0ec6ec5265480c80043e03e5bc7c9176abc3776a
1 """Utilities to support packages."""
3 import os
4 import sys
5 import imp
6 import os.path
7 from types import ModuleType
9 __all__ = [
10 'get_importer', 'iter_importers', 'get_loader', 'find_loader',
11 'walk_packages', 'iter_modules',
12 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
15 def read_code(stream):
16 # This helper is needed in order for the PEP 302 emulation to
17 # correctly handle compiled files
18 import marshal
20 magic = stream.read(4)
21 if magic != imp.get_magic():
22 return None
24 stream.read(4) # Skip timestamp
25 return marshal.load(stream)
28 def simplegeneric(func):
29 """Make a trivial single-dispatch generic function"""
30 registry = {}
31 def wrapper(*args, **kw):
32 ob = args[0]
33 try:
34 cls = ob.__class__
35 except AttributeError:
36 cls = type(ob)
37 try:
38 mro = cls.__mro__
39 except AttributeError:
40 try:
41 class cls(cls, object):
42 pass
43 mro = cls.__mro__[1:]
44 except TypeError:
45 mro = object, # must be an ExtensionClass or some such :(
46 for t in mro:
47 if t in registry:
48 return registry[t](*args, **kw)
49 else:
50 return func(*args, **kw)
51 try:
52 wrapper.__name__ = func.__name__
53 except (TypeError, AttributeError):
54 pass # Python 2.3 doesn't allow functions to be renamed
56 def register(typ, func=None):
57 if func is None:
58 return lambda f: register(typ, f)
59 registry[typ] = func
60 return func
62 wrapper.__dict__ = func.__dict__
63 wrapper.__doc__ = func.__doc__
64 wrapper.register = register
65 return wrapper
68 def walk_packages(path=None, prefix='', onerror=None):
69 """Yields (module_loader, name, ispkg) for all modules recursively
70 on path, or, if path is None, all accessible modules.
72 'path' should be either None or a list of paths to look for
73 modules in.
75 'prefix' is a string to output on the front of every module name
76 on output.
78 Note that this function must import all *packages* (NOT all
79 modules!) on the given path, in order to access the __path__
80 attribute to find submodules.
82 'onerror' is a function which gets called with one argument (the
83 name of the package which was being imported) if any exception
84 occurs while trying to import a package. If no onerror function is
85 supplied, ImportErrors are caught and ignored, while all other
86 exceptions are propagated, terminating the search.
88 Examples:
90 # list all modules python can access
91 walk_packages()
93 # list all submodules of ctypes
94 walk_packages(ctypes.__path__, ctypes.__name__+'.')
95 """
97 def seen(p, m={}):
98 if p in m:
99 return True
100 m[p] = True
102 for importer, name, ispkg in iter_modules(path, prefix):
103 yield importer, name, ispkg
105 if ispkg:
106 try:
107 __import__(name)
108 except ImportError:
109 if onerror is not None:
110 onerror(name)
111 except Exception:
112 if onerror is not None:
113 onerror(name)
114 else:
115 raise
116 else:
117 path = getattr(sys.modules[name], '__path__', None) or []
119 # don't traverse path items we've seen before
120 path = [p for p in path if not seen(p)]
122 for item in walk_packages(path, name+'.', onerror):
123 yield item
126 def iter_modules(path=None, prefix=''):
127 """Yields (module_loader, name, ispkg) for all submodules on path,
128 or, if path is None, all top-level modules on sys.path.
130 'path' should be either None or a list of paths to look for
131 modules in.
133 'prefix' is a string to output on the front of every module name
134 on output.
137 if path is None:
138 importers = iter_importers()
139 else:
140 importers = map(get_importer, path)
142 yielded = {}
143 for i in importers:
144 for name, ispkg in iter_importer_modules(i, prefix):
145 if name not in yielded:
146 yielded[name] = 1
147 yield i, name, ispkg
150 #@simplegeneric
151 def iter_importer_modules(importer, prefix=''):
152 if not hasattr(importer, 'iter_modules'):
153 return []
154 return importer.iter_modules(prefix)
156 iter_importer_modules = simplegeneric(iter_importer_modules)
159 class ImpImporter:
160 """PEP 302 Importer that wraps Python's "classic" import algorithm
162 ImpImporter(dirname) produces a PEP 302 importer that searches that
163 directory. ImpImporter(None) produces a PEP 302 importer that searches
164 the current sys.path, plus any modules that are frozen or built-in.
166 Note that ImpImporter does not currently support being used by placement
167 on sys.meta_path.
170 def __init__(self, path=None):
171 self.path = path
173 def find_module(self, fullname, path=None):
174 # Note: we ignore 'path' argument since it is only used via meta_path
175 subname = fullname.split(".")[-1]
176 if subname != fullname and self.path is None:
177 return None
178 if self.path is None:
179 path = None
180 else:
181 path = [os.path.realpath(self.path)]
182 try:
183 file, filename, etc = imp.find_module(subname, path)
184 except ImportError:
185 return None
186 return ImpLoader(fullname, file, filename, etc)
188 def iter_modules(self, prefix=''):
189 if self.path is None or not os.path.isdir(self.path):
190 return
192 yielded = {}
193 import inspect
195 filenames = os.listdir(self.path)
196 filenames.sort() # handle packages before same-named modules
198 for fn in filenames:
199 modname = inspect.getmodulename(fn)
200 if modname=='__init__' or modname in yielded:
201 continue
203 path = os.path.join(self.path, fn)
204 ispkg = False
206 if not modname and os.path.isdir(path) and '.' not in fn:
207 modname = fn
208 for fn in os.listdir(path):
209 subname = inspect.getmodulename(fn)
210 if subname=='__init__':
211 ispkg = True
212 break
213 else:
214 continue # not a package
216 if modname and '.' not in modname:
217 yielded[modname] = 1
218 yield prefix + modname, ispkg
221 class ImpLoader:
222 """PEP 302 Loader that wraps Python's "classic" import algorithm
224 code = source = None
226 def __init__(self, fullname, file, filename, etc):
227 self.file = file
228 self.filename = filename
229 self.fullname = fullname
230 self.etc = etc
232 def load_module(self, fullname):
233 self._reopen()
234 try:
235 mod = imp.load_module(fullname, self.file, self.filename, self.etc)
236 finally:
237 if self.file:
238 self.file.close()
239 # Note: we don't set __loader__ because we want the module to look
240 # normal; i.e. this is just a wrapper for standard import machinery
241 return mod
243 def get_data(self, pathname):
244 return open(pathname, "rb").read()
246 def _reopen(self):
247 if self.file and self.file.closed:
248 mod_type = self.etc[2]
249 if mod_type==imp.PY_SOURCE:
250 self.file = open(self.filename, 'rU')
251 elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
252 self.file = open(self.filename, 'rb')
254 def _fix_name(self, fullname):
255 if fullname is None:
256 fullname = self.fullname
257 elif fullname != self.fullname:
258 raise ImportError("Loader for module %s cannot handle "
259 "module %s" % (self.fullname, fullname))
260 return fullname
262 def is_package(self, fullname):
263 fullname = self._fix_name(fullname)
264 return self.etc[2]==imp.PKG_DIRECTORY
266 def get_code(self, fullname=None):
267 fullname = self._fix_name(fullname)
268 if self.code is None:
269 mod_type = self.etc[2]
270 if mod_type==imp.PY_SOURCE:
271 source = self.get_source(fullname)
272 self.code = compile(source, self.filename, 'exec')
273 elif mod_type==imp.PY_COMPILED:
274 self._reopen()
275 try:
276 self.code = read_code(self.file)
277 finally:
278 self.file.close()
279 elif mod_type==imp.PKG_DIRECTORY:
280 self.code = self._get_delegate().get_code()
281 return self.code
283 def get_source(self, fullname=None):
284 fullname = self._fix_name(fullname)
285 if self.source is None:
286 mod_type = self.etc[2]
287 if mod_type==imp.PY_SOURCE:
288 self._reopen()
289 try:
290 self.source = self.file.read()
291 finally:
292 self.file.close()
293 elif mod_type==imp.PY_COMPILED:
294 if os.path.exists(self.filename[:-1]):
295 f = open(self.filename[:-1], 'rU')
296 self.source = f.read()
297 f.close()
298 elif mod_type==imp.PKG_DIRECTORY:
299 self.source = self._get_delegate().get_source()
300 return self.source
303 def _get_delegate(self):
304 return ImpImporter(self.filename).find_module('__init__')
306 def get_filename(self, fullname=None):
307 fullname = self._fix_name(fullname)
308 mod_type = self.etc[2]
309 if self.etc[2]==imp.PKG_DIRECTORY:
310 return self._get_delegate().get_filename()
311 elif self.etc[2] in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
312 return self.filename
313 return None
316 try:
317 import zipimport
318 from zipimport import zipimporter
320 def iter_zipimport_modules(importer, prefix=''):
321 dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
322 _prefix = importer.prefix
323 plen = len(_prefix)
324 yielded = {}
325 import inspect
326 for fn in dirlist:
327 if not fn.startswith(_prefix):
328 continue
330 fn = fn[plen:].split(os.sep)
332 if len(fn)==2 and fn[1].startswith('__init__.py'):
333 if fn[0] not in yielded:
334 yielded[fn[0]] = 1
335 yield fn[0], True
337 if len(fn)!=1:
338 continue
340 modname = inspect.getmodulename(fn[0])
341 if modname=='__init__':
342 continue
344 if modname and '.' not in modname and modname not in yielded:
345 yielded[modname] = 1
346 yield prefix + modname, False
348 iter_importer_modules.register(zipimporter, iter_zipimport_modules)
350 except ImportError:
351 pass
354 def get_importer(path_item):
355 """Retrieve a PEP 302 importer for the given path item
357 The returned importer is cached in sys.path_importer_cache
358 if it was newly created by a path hook.
360 If there is no importer, a wrapper around the basic import
361 machinery is returned. This wrapper is never inserted into
362 the importer cache (None is inserted instead).
364 The cache (or part of it) can be cleared manually if a
365 rescan of sys.path_hooks is necessary.
367 try:
368 importer = sys.path_importer_cache[path_item]
369 except KeyError:
370 for path_hook in sys.path_hooks:
371 try:
372 importer = path_hook(path_item)
373 break
374 except ImportError:
375 pass
376 else:
377 importer = None
378 sys.path_importer_cache.setdefault(path_item, importer)
380 if importer is None:
381 try:
382 importer = ImpImporter(path_item)
383 except ImportError:
384 importer = None
385 return importer
388 def iter_importers(fullname=""):
389 """Yield PEP 302 importers for the given module name
391 If fullname contains a '.', the importers will be for the package
392 containing fullname, otherwise they will be importers for sys.meta_path,
393 sys.path, and Python's "classic" import machinery, in that order. If
394 the named module is in a package, that package is imported as a side
395 effect of invoking this function.
397 Non PEP 302 mechanisms (e.g. the Windows registry) used by the
398 standard import machinery to find files in alternative locations
399 are partially supported, but are searched AFTER sys.path. Normally,
400 these locations are searched BEFORE sys.path, preventing sys.path
401 entries from shadowing them.
403 For this to cause a visible difference in behaviour, there must
404 be a module or package name that is accessible via both sys.path
405 and one of the non PEP 302 file system mechanisms. In this case,
406 the emulation will find the former version, while the builtin
407 import mechanism will find the latter.
409 Items of the following types can be affected by this discrepancy:
410 imp.C_EXTENSION, imp.PY_SOURCE, imp.PY_COMPILED, imp.PKG_DIRECTORY
412 if fullname.startswith('.'):
413 raise ImportError("Relative module names not supported")
414 if '.' in fullname:
415 # Get the containing package's __path__
416 pkg = '.'.join(fullname.split('.')[:-1])
417 if pkg not in sys.modules:
418 __import__(pkg)
419 path = getattr(sys.modules[pkg], '__path__', None) or []
420 else:
421 for importer in sys.meta_path:
422 yield importer
423 path = sys.path
424 for item in path:
425 yield get_importer(item)
426 if '.' not in fullname:
427 yield ImpImporter()
429 def get_loader(module_or_name):
430 """Get a PEP 302 "loader" object for module_or_name
432 If the module or package is accessible via the normal import
433 mechanism, a wrapper around the relevant part of that machinery
434 is returned. Returns None if the module cannot be found or imported.
435 If the named module is not already imported, its containing package
436 (if any) is imported, in order to establish the package __path__.
438 This function uses iter_importers(), and is thus subject to the same
439 limitations regarding platform-specific special import locations such
440 as the Windows registry.
442 if module_or_name in sys.modules:
443 module_or_name = sys.modules[module_or_name]
444 if isinstance(module_or_name, ModuleType):
445 module = module_or_name
446 loader = getattr(module, '__loader__', None)
447 if loader is not None:
448 return loader
449 fullname = module.__name__
450 else:
451 fullname = module_or_name
452 return find_loader(fullname)
454 def find_loader(fullname):
455 """Find a PEP 302 "loader" object for fullname
457 If fullname contains dots, path must be the containing package's __path__.
458 Returns None if the module cannot be found or imported. This function uses
459 iter_importers(), and is thus subject to the same limitations regarding
460 platform-specific special import locations such as the Windows registry.
462 for importer in iter_importers(fullname):
463 loader = importer.find_module(fullname)
464 if loader is not None:
465 return loader
467 return None
470 def extend_path(path, name):
471 """Extend a package's path.
473 Intended use is to place the following code in a package's __init__.py:
475 from pkgutil import extend_path
476 __path__ = extend_path(__path__, __name__)
478 This will add to the package's __path__ all subdirectories of
479 directories on sys.path named after the package. This is useful
480 if one wants to distribute different parts of a single logical
481 package as multiple directories.
483 It also looks for *.pkg files beginning where * matches the name
484 argument. This feature is similar to *.pth files (see site.py),
485 except that it doesn't special-case lines starting with 'import'.
486 A *.pkg file is trusted at face value: apart from checking for
487 duplicates, all entries found in a *.pkg file are added to the
488 path, regardless of whether they are exist the filesystem. (This
489 is a feature.)
491 If the input path is not a list (as is the case for frozen
492 packages) it is returned unchanged. The input path is not
493 modified; an extended copy is returned. Items are only appended
494 to the copy at the end.
496 It is assumed that sys.path is a sequence. Items of sys.path that
497 are not (unicode or 8-bit) strings referring to existing
498 directories are ignored. Unicode items of sys.path that cause
499 errors when used as filenames may cause this function to raise an
500 exception (in line with os.path.isdir() behavior).
503 if not isinstance(path, list):
504 # This could happen e.g. when this is called from inside a
505 # frozen package. Return the path unchanged in that case.
506 return path
508 pname = os.path.join(*name.split('.')) # Reconstitute as relative path
509 sname_pkg = name + ".pkg"
510 init_py = "__init__.py"
512 path = path[:] # Start with a copy of the existing path
514 for dir in sys.path:
515 if not isinstance(dir, str) or not os.path.isdir(dir):
516 continue
517 subdir = os.path.join(dir, pname)
518 # XXX This may still add duplicate entries to path on
519 # case-insensitive filesystems
520 initfile = os.path.join(subdir, init_py)
521 if subdir not in path and os.path.isfile(initfile):
522 path.append(subdir)
523 # XXX Is this the right thing for subpackages like zope.app?
524 # It looks for a file named "zope.app.pkg"
525 pkgfile = os.path.join(dir, sname_pkg)
526 if os.path.isfile(pkgfile):
527 try:
528 f = open(pkgfile)
529 except IOError as msg:
530 sys.stderr.write("Can't open %s: %s\n" %
531 (pkgfile, msg))
532 else:
533 for line in f:
534 line = line.rstrip('\n')
535 if not line or line.startswith('#'):
536 continue
537 path.append(line) # Don't check for existence!
538 f.close()
540 return path
542 def get_data(package, resource):
543 """Get a resource from a package.
545 This is a wrapper round the PEP 302 loader get_data API. The package
546 argument should be the name of a package, in standard module format
547 (foo.bar). The resource argument should be in the form of a relative
548 filename, using '/' as the path separator. The parent directory name '..'
549 is not allowed, and nor is a rooted name (starting with a '/').
551 The function returns a binary string, which is the contents of the
552 specified resource.
554 For packages located in the filesystem, which have already been imported,
555 this is the rough equivalent of
557 d = os.path.dirname(sys.modules[package].__file__)
558 data = open(os.path.join(d, resource), 'rb').read()
560 If the package cannot be located or loaded, or it uses a PEP 302 loader
561 which does not support get_data(), then None is returned.
564 loader = get_loader(package)
565 if loader is None or not hasattr(loader, 'get_data'):
566 return None
567 mod = sys.modules.get(package) or loader.load_module(package)
568 if mod is None or not hasattr(mod, '__file__'):
569 return None
571 # Modify the resource name to be compatible with the loader.get_data
572 # signature - an os.path format "filename" starting with the dirname of
573 # the package's __file__
574 parts = resource.split('/')
575 parts.insert(0, os.path.dirname(mod.__file__))
576 resource_name = os.path.join(*parts)
577 return loader.get_data(resource_name)