issue5063: Fixes for building RPM on CentOS plus misc .spec file enhancements.
[python.git] / Lib / pkgutil.py
blobc50928f701d1b0240214799107d1b11bcd2d525a
1 """Utilities to support packages."""
3 # NOTE: This module must remain compatible with Python 2.3, as it is shared
4 # by setuptools for distribution with Python 2.3 and up.
6 import os
7 import sys
8 import imp
9 import os.path
10 from types import ModuleType
12 __all__ = [
13 'get_importer', 'iter_importers', 'get_loader', 'find_loader',
14 'walk_packages', 'iter_modules',
15 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
18 def read_code(stream):
19 # This helper is needed in order for the PEP 302 emulation to
20 # correctly handle compiled files
21 import marshal
23 magic = stream.read(4)
24 if magic != imp.get_magic():
25 return None
27 stream.read(4) # Skip timestamp
28 return marshal.load(stream)
31 def simplegeneric(func):
32 """Make a trivial single-dispatch generic function"""
33 registry = {}
34 def wrapper(*args, **kw):
35 ob = args[0]
36 try:
37 cls = ob.__class__
38 except AttributeError:
39 cls = type(ob)
40 try:
41 mro = cls.__mro__
42 except AttributeError:
43 try:
44 class cls(cls, object):
45 pass
46 mro = cls.__mro__[1:]
47 except TypeError:
48 mro = object, # must be an ExtensionClass or some such :(
49 for t in mro:
50 if t in registry:
51 return registry[t](*args, **kw)
52 else:
53 return func(*args, **kw)
54 try:
55 wrapper.__name__ = func.__name__
56 except (TypeError, AttributeError):
57 pass # Python 2.3 doesn't allow functions to be renamed
59 def register(typ, func=None):
60 if func is None:
61 return lambda f: register(typ, f)
62 registry[typ] = func
63 return func
65 wrapper.__dict__ = func.__dict__
66 wrapper.__doc__ = func.__doc__
67 wrapper.register = register
68 return wrapper
71 def walk_packages(path=None, prefix='', onerror=None):
72 """Yields (module_loader, name, ispkg) for all modules recursively
73 on path, or, if path is None, all accessible modules.
75 'path' should be either None or a list of paths to look for
76 modules in.
78 'prefix' is a string to output on the front of every module name
79 on output.
81 Note that this function must import all *packages* (NOT all
82 modules!) on the given path, in order to access the __path__
83 attribute to find submodules.
85 'onerror' is a function which gets called with one argument (the
86 name of the package which was being imported) if any exception
87 occurs while trying to import a package. If no onerror function is
88 supplied, ImportErrors are caught and ignored, while all other
89 exceptions are propagated, terminating the search.
91 Examples:
93 # list all modules python can access
94 walk_packages()
96 # list all submodules of ctypes
97 walk_packages(ctypes.__path__, ctypes.__name__+'.')
98 """
100 def seen(p, m={}):
101 if p in m:
102 return True
103 m[p] = True
105 for importer, name, ispkg in iter_modules(path, prefix):
106 yield importer, name, ispkg
108 if ispkg:
109 try:
110 __import__(name)
111 except ImportError:
112 if onerror is not None:
113 onerror(name)
114 except Exception:
115 if onerror is not None:
116 onerror(name)
117 else:
118 raise
119 else:
120 path = getattr(sys.modules[name], '__path__', None) or []
122 # don't traverse path items we've seen before
123 path = [p for p in path if not seen(p)]
125 for item in walk_packages(path, name+'.', onerror):
126 yield item
129 def iter_modules(path=None, prefix=''):
130 """Yields (module_loader, name, ispkg) for all submodules on path,
131 or, if path is None, all top-level modules on sys.path.
133 'path' should be either None or a list of paths to look for
134 modules in.
136 'prefix' is a string to output on the front of every module name
137 on output.
140 if path is None:
141 importers = iter_importers()
142 else:
143 importers = map(get_importer, path)
145 yielded = {}
146 for i in importers:
147 for name, ispkg in iter_importer_modules(i, prefix):
148 if name not in yielded:
149 yielded[name] = 1
150 yield i, name, ispkg
153 #@simplegeneric
154 def iter_importer_modules(importer, prefix=''):
155 if not hasattr(importer, 'iter_modules'):
156 return []
157 return importer.iter_modules(prefix)
159 iter_importer_modules = simplegeneric(iter_importer_modules)
162 class ImpImporter:
163 """PEP 302 Importer that wraps Python's "classic" import algorithm
165 ImpImporter(dirname) produces a PEP 302 importer that searches that
166 directory. ImpImporter(None) produces a PEP 302 importer that searches
167 the current sys.path, plus any modules that are frozen or built-in.
169 Note that ImpImporter does not currently support being used by placement
170 on sys.meta_path.
173 def __init__(self, path=None):
174 self.path = path
176 def find_module(self, fullname, path=None):
177 # Note: we ignore 'path' argument since it is only used via meta_path
178 subname = fullname.split(".")[-1]
179 if subname != fullname and self.path is None:
180 return None
181 if self.path is None:
182 path = None
183 else:
184 path = [os.path.realpath(self.path)]
185 try:
186 file, filename, etc = imp.find_module(subname, path)
187 except ImportError:
188 return None
189 return ImpLoader(fullname, file, filename, etc)
191 def iter_modules(self, prefix=''):
192 if self.path is None or not os.path.isdir(self.path):
193 return
195 yielded = {}
196 import inspect
198 filenames = os.listdir(self.path)
199 filenames.sort() # handle packages before same-named modules
201 for fn in filenames:
202 modname = inspect.getmodulename(fn)
203 if modname=='__init__' or modname in yielded:
204 continue
206 path = os.path.join(self.path, fn)
207 ispkg = False
209 if not modname and os.path.isdir(path) and '.' not in fn:
210 modname = fn
211 for fn in os.listdir(path):
212 subname = inspect.getmodulename(fn)
213 if subname=='__init__':
214 ispkg = True
215 break
216 else:
217 continue # not a package
219 if modname and '.' not in modname:
220 yielded[modname] = 1
221 yield prefix + modname, ispkg
224 class ImpLoader:
225 """PEP 302 Loader that wraps Python's "classic" import algorithm
227 code = source = None
229 def __init__(self, fullname, file, filename, etc):
230 self.file = file
231 self.filename = filename
232 self.fullname = fullname
233 self.etc = etc
235 def load_module(self, fullname):
236 self._reopen()
237 try:
238 mod = imp.load_module(fullname, self.file, self.filename, self.etc)
239 finally:
240 if self.file:
241 self.file.close()
242 # Note: we don't set __loader__ because we want the module to look
243 # normal; i.e. this is just a wrapper for standard import machinery
244 return mod
246 def get_data(self, pathname):
247 return open(pathname, "rb").read()
249 def _reopen(self):
250 if self.file and self.file.closed:
251 mod_type = self.etc[2]
252 if mod_type==imp.PY_SOURCE:
253 self.file = open(self.filename, 'rU')
254 elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
255 self.file = open(self.filename, 'rb')
257 def _fix_name(self, fullname):
258 if fullname is None:
259 fullname = self.fullname
260 elif fullname != self.fullname:
261 raise ImportError("Loader for module %s cannot handle "
262 "module %s" % (self.fullname, fullname))
263 return fullname
265 def is_package(self, fullname):
266 fullname = self._fix_name(fullname)
267 return self.etc[2]==imp.PKG_DIRECTORY
269 def get_code(self, fullname=None):
270 fullname = self._fix_name(fullname)
271 if self.code is None:
272 mod_type = self.etc[2]
273 if mod_type==imp.PY_SOURCE:
274 source = self.get_source(fullname)
275 self.code = compile(source, self.filename, 'exec')
276 elif mod_type==imp.PY_COMPILED:
277 self._reopen()
278 try:
279 self.code = read_code(self.file)
280 finally:
281 self.file.close()
282 elif mod_type==imp.PKG_DIRECTORY:
283 self.code = self._get_delegate().get_code()
284 return self.code
286 def get_source(self, fullname=None):
287 fullname = self._fix_name(fullname)
288 if self.source is None:
289 mod_type = self.etc[2]
290 if mod_type==imp.PY_SOURCE:
291 self._reopen()
292 try:
293 self.source = self.file.read()
294 finally:
295 self.file.close()
296 elif mod_type==imp.PY_COMPILED:
297 if os.path.exists(self.filename[:-1]):
298 f = open(self.filename[:-1], 'rU')
299 self.source = f.read()
300 f.close()
301 elif mod_type==imp.PKG_DIRECTORY:
302 self.source = self._get_delegate().get_source()
303 return self.source
306 def _get_delegate(self):
307 return ImpImporter(self.filename).find_module('__init__')
309 def get_filename(self, fullname=None):
310 fullname = self._fix_name(fullname)
311 mod_type = self.etc[2]
312 if self.etc[2]==imp.PKG_DIRECTORY:
313 return self._get_delegate().get_filename()
314 elif self.etc[2] in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
315 return self.filename
316 return None
319 try:
320 import zipimport
321 from zipimport import zipimporter
323 def iter_zipimport_modules(importer, prefix=''):
324 dirlist = zipimport._zip_directory_cache[importer.archive].keys()
325 dirlist.sort()
326 _prefix = importer.prefix
327 plen = len(_prefix)
328 yielded = {}
329 import inspect
330 for fn in dirlist:
331 if not fn.startswith(_prefix):
332 continue
334 fn = fn[plen:].split(os.sep)
336 if len(fn)==2 and fn[1].startswith('__init__.py'):
337 if fn[0] not in yielded:
338 yielded[fn[0]] = 1
339 yield fn[0], True
341 if len(fn)!=1:
342 continue
344 modname = inspect.getmodulename(fn[0])
345 if modname=='__init__':
346 continue
348 if modname and '.' not in modname and modname not in yielded:
349 yielded[modname] = 1
350 yield prefix + modname, False
352 iter_importer_modules.register(zipimporter, iter_zipimport_modules)
354 except ImportError:
355 pass
358 def get_importer(path_item):
359 """Retrieve a PEP 302 importer for the given path item
361 The returned importer is cached in sys.path_importer_cache
362 if it was newly created by a path hook.
364 If there is no importer, a wrapper around the basic import
365 machinery is returned. This wrapper is never inserted into
366 the importer cache (None is inserted instead).
368 The cache (or part of it) can be cleared manually if a
369 rescan of sys.path_hooks is necessary.
371 try:
372 importer = sys.path_importer_cache[path_item]
373 except KeyError:
374 for path_hook in sys.path_hooks:
375 try:
376 importer = path_hook(path_item)
377 break
378 except ImportError:
379 pass
380 else:
381 importer = None
382 sys.path_importer_cache.setdefault(path_item, importer)
384 if importer is None:
385 try:
386 importer = ImpImporter(path_item)
387 except ImportError:
388 importer = None
389 return importer
392 def iter_importers(fullname=""):
393 """Yield PEP 302 importers for the given module name
395 If fullname contains a '.', the importers will be for the package
396 containing fullname, otherwise they will be importers for sys.meta_path,
397 sys.path, and Python's "classic" import machinery, in that order. If
398 the named module is in a package, that package is imported as a side
399 effect of invoking this function.
401 Non PEP 302 mechanisms (e.g. the Windows registry) used by the
402 standard import machinery to find files in alternative locations
403 are partially supported, but are searched AFTER sys.path. Normally,
404 these locations are searched BEFORE sys.path, preventing sys.path
405 entries from shadowing them.
407 For this to cause a visible difference in behaviour, there must
408 be a module or package name that is accessible via both sys.path
409 and one of the non PEP 302 file system mechanisms. In this case,
410 the emulation will find the former version, while the builtin
411 import mechanism will find the latter.
413 Items of the following types can be affected by this discrepancy:
414 imp.C_EXTENSION, imp.PY_SOURCE, imp.PY_COMPILED, imp.PKG_DIRECTORY
416 if fullname.startswith('.'):
417 raise ImportError("Relative module names not supported")
418 if '.' in fullname:
419 # Get the containing package's __path__
420 pkg = '.'.join(fullname.split('.')[:-1])
421 if pkg not in sys.modules:
422 __import__(pkg)
423 path = getattr(sys.modules[pkg], '__path__', None) or []
424 else:
425 for importer in sys.meta_path:
426 yield importer
427 path = sys.path
428 for item in path:
429 yield get_importer(item)
430 if '.' not in fullname:
431 yield ImpImporter()
433 def get_loader(module_or_name):
434 """Get a PEP 302 "loader" object for module_or_name
436 If the module or package is accessible via the normal import
437 mechanism, a wrapper around the relevant part of that machinery
438 is returned. Returns None if the module cannot be found or imported.
439 If the named module is not already imported, its containing package
440 (if any) is imported, in order to establish the package __path__.
442 This function uses iter_importers(), and is thus subject to the same
443 limitations regarding platform-specific special import locations such
444 as the Windows registry.
446 if module_or_name in sys.modules:
447 module_or_name = sys.modules[module_or_name]
448 if isinstance(module_or_name, ModuleType):
449 module = module_or_name
450 loader = getattr(module, '__loader__', None)
451 if loader is not None:
452 return loader
453 fullname = module.__name__
454 else:
455 fullname = module_or_name
456 return find_loader(fullname)
458 def find_loader(fullname):
459 """Find a PEP 302 "loader" object for fullname
461 If fullname contains dots, path must be the containing package's __path__.
462 Returns None if the module cannot be found or imported. This function uses
463 iter_importers(), and is thus subject to the same limitations regarding
464 platform-specific special import locations such as the Windows registry.
466 for importer in iter_importers(fullname):
467 loader = importer.find_module(fullname)
468 if loader is not None:
469 return loader
471 return None
474 def extend_path(path, name):
475 """Extend a package's path.
477 Intended use is to place the following code in a package's __init__.py:
479 from pkgutil import extend_path
480 __path__ = extend_path(__path__, __name__)
482 This will add to the package's __path__ all subdirectories of
483 directories on sys.path named after the package. This is useful
484 if one wants to distribute different parts of a single logical
485 package as multiple directories.
487 It also looks for *.pkg files beginning where * matches the name
488 argument. This feature is similar to *.pth files (see site.py),
489 except that it doesn't special-case lines starting with 'import'.
490 A *.pkg file is trusted at face value: apart from checking for
491 duplicates, all entries found in a *.pkg file are added to the
492 path, regardless of whether they are exist the filesystem. (This
493 is a feature.)
495 If the input path is not a list (as is the case for frozen
496 packages) it is returned unchanged. The input path is not
497 modified; an extended copy is returned. Items are only appended
498 to the copy at the end.
500 It is assumed that sys.path is a sequence. Items of sys.path that
501 are not (unicode or 8-bit) strings referring to existing
502 directories are ignored. Unicode items of sys.path that cause
503 errors when used as filenames may cause this function to raise an
504 exception (in line with os.path.isdir() behavior).
507 if not isinstance(path, list):
508 # This could happen e.g. when this is called from inside a
509 # frozen package. Return the path unchanged in that case.
510 return path
512 pname = os.path.join(*name.split('.')) # Reconstitute as relative path
513 # Just in case os.extsep != '.'
514 sname = os.extsep.join(name.split('.'))
515 sname_pkg = sname + os.extsep + "pkg"
516 init_py = "__init__" + os.extsep + "py"
518 path = path[:] # Start with a copy of the existing path
520 for dir in sys.path:
521 if not isinstance(dir, basestring) or not os.path.isdir(dir):
522 continue
523 subdir = os.path.join(dir, pname)
524 # XXX This may still add duplicate entries to path on
525 # case-insensitive filesystems
526 initfile = os.path.join(subdir, init_py)
527 if subdir not in path and os.path.isfile(initfile):
528 path.append(subdir)
529 # XXX Is this the right thing for subpackages like zope.app?
530 # It looks for a file named "zope.app.pkg"
531 pkgfile = os.path.join(dir, sname_pkg)
532 if os.path.isfile(pkgfile):
533 try:
534 f = open(pkgfile)
535 except IOError, msg:
536 sys.stderr.write("Can't open %s: %s\n" %
537 (pkgfile, msg))
538 else:
539 for line in f:
540 line = line.rstrip('\n')
541 if not line or line.startswith('#'):
542 continue
543 path.append(line) # Don't check for existence!
544 f.close()
546 return path
548 def get_data(package, resource):
549 """Get a resource from a package.
551 This is a wrapper round the PEP 302 loader get_data API. The package
552 argument should be the name of a package, in standard module format
553 (foo.bar). The resource argument should be in the form of a relative
554 filename, using '/' as the path separator. The parent directory name '..'
555 is not allowed, and nor is a rooted name (starting with a '/').
557 The function returns a binary string, which is the contents of the
558 specified resource.
560 For packages located in the filesystem, which have already been imported,
561 this is the rough equivalent of
563 d = os.path.dirname(sys.modules[package].__file__)
564 data = open(os.path.join(d, resource), 'rb').read()
566 If the package cannot be located or loaded, or it uses a PEP 302 loader
567 which does not support get_data(), then None is returned.
570 loader = get_loader(package)
571 if loader is None or not hasattr(loader, 'get_data'):
572 return None
573 mod = sys.modules.get(package) or loader.load_module(package)
574 if mod is None or not hasattr(mod, '__file__'):
575 return None
577 # Modify the resource name to be compatible with the loader.get_data
578 # signature - an os.path format "filename" starting with the dirname of
579 # the package's __file__
580 parts = resource.split('/')
581 parts.insert(0, os.path.dirname(mod.__file__))
582 resource_name = os.path.join(*parts)
583 return loader.get_data(resource_name)