Manual py3k backport: [svn r74155] Issue #6242: Fix deallocator of io.StringIO and...
[python.git] / Lib / imputil.py
bloba907287e51314a5fbcc5b74275c17bc12bd0e9ef
1 """
2 Import utilities
4 Exported classes:
5 ImportManager Manage the import process
7 Importer Base class for replacing standard import functions
8 BuiltinImporter Emulate the import mechanism for builtin and frozen modules
10 DynLoadSuffixImporter
11 """
12 from warnings import warnpy3k
13 warnpy3k("the imputil module has been removed in Python 3.0", stacklevel=2)
14 del warnpy3k
16 # note: avoid importing non-builtin modules
17 import imp ### not available in Jython?
18 import sys
19 import __builtin__
21 # for the DirectoryImporter
22 import struct
23 import marshal
25 __all__ = ["ImportManager","Importer","BuiltinImporter"]
27 _StringType = type('')
28 _ModuleType = type(sys) ### doesn't work in Jython...
30 class ImportManager:
31 "Manage the import process."
33 def install(self, namespace=vars(__builtin__)):
34 "Install this ImportManager into the specified namespace."
36 if isinstance(namespace, _ModuleType):
37 namespace = vars(namespace)
39 # Note: we have no notion of "chaining"
41 # Record the previous import hook, then install our own.
42 self.previous_importer = namespace['__import__']
43 self.namespace = namespace
44 namespace['__import__'] = self._import_hook
46 ### fix this
47 #namespace['reload'] = self._reload_hook
49 def uninstall(self):
50 "Restore the previous import mechanism."
51 self.namespace['__import__'] = self.previous_importer
53 def add_suffix(self, suffix, importFunc):
54 assert hasattr(importFunc, '__call__')
55 self.fs_imp.add_suffix(suffix, importFunc)
57 ######################################################################
59 # PRIVATE METHODS
62 clsFilesystemImporter = None
64 def __init__(self, fs_imp=None):
65 # we're definitely going to be importing something in the future,
66 # so let's just load the OS-related facilities.
67 if not _os_stat:
68 _os_bootstrap()
70 # This is the Importer that we use for grabbing stuff from the
71 # filesystem. It defines one more method (import_from_dir) for our use.
72 if fs_imp is None:
73 cls = self.clsFilesystemImporter or _FilesystemImporter
74 fs_imp = cls()
75 self.fs_imp = fs_imp
77 # Initialize the set of suffixes that we recognize and import.
78 # The default will import dynamic-load modules first, followed by
79 # .py files (or a .py file's cached bytecode)
80 for desc in imp.get_suffixes():
81 if desc[2] == imp.C_EXTENSION:
82 self.add_suffix(desc[0],
83 DynLoadSuffixImporter(desc).import_file)
84 self.add_suffix('.py', py_suffix_importer)
86 def _import_hook(self, fqname, globals=None, locals=None, fromlist=None):
87 """Python calls this hook to locate and import a module."""
89 parts = fqname.split('.')
91 # determine the context of this import
92 parent = self._determine_import_context(globals)
94 # if there is a parent, then its importer should manage this import
95 if parent:
96 module = parent.__importer__._do_import(parent, parts, fromlist)
97 if module:
98 return module
100 # has the top module already been imported?
101 try:
102 top_module = sys.modules[parts[0]]
103 except KeyError:
105 # look for the topmost module
106 top_module = self._import_top_module(parts[0])
107 if not top_module:
108 # the topmost module wasn't found at all.
109 raise ImportError, 'No module named ' + fqname
111 # fast-path simple imports
112 if len(parts) == 1:
113 if not fromlist:
114 return top_module
116 if not top_module.__dict__.get('__ispkg__'):
117 # __ispkg__ isn't defined (the module was not imported by us),
118 # or it is zero.
120 # In the former case, there is no way that we could import
121 # sub-modules that occur in the fromlist (but we can't raise an
122 # error because it may just be names) because we don't know how
123 # to deal with packages that were imported by other systems.
125 # In the latter case (__ispkg__ == 0), there can't be any sub-
126 # modules present, so we can just return.
128 # In both cases, since len(parts) == 1, the top_module is also
129 # the "bottom" which is the defined return when a fromlist
130 # exists.
131 return top_module
133 importer = top_module.__dict__.get('__importer__')
134 if importer:
135 return importer._finish_import(top_module, parts[1:], fromlist)
137 # Grrr, some people "import os.path" or do "from os.path import ..."
138 if len(parts) == 2 and hasattr(top_module, parts[1]):
139 if fromlist:
140 return getattr(top_module, parts[1])
141 else:
142 return top_module
144 # If the importer does not exist, then we have to bail. A missing
145 # importer means that something else imported the module, and we have
146 # no knowledge of how to get sub-modules out of the thing.
147 raise ImportError, 'No module named ' + fqname
149 def _determine_import_context(self, globals):
150 """Returns the context in which a module should be imported.
152 The context could be a loaded (package) module and the imported module
153 will be looked for within that package. The context could also be None,
154 meaning there is no context -- the module should be looked for as a
155 "top-level" module.
158 if not globals or not globals.get('__importer__'):
159 # globals does not refer to one of our modules or packages. That
160 # implies there is no relative import context (as far as we are
161 # concerned), and it should just pick it off the standard path.
162 return None
164 # The globals refer to a module or package of ours. It will define
165 # the context of the new import. Get the module/package fqname.
166 parent_fqname = globals['__name__']
168 # if a package is performing the import, then return itself (imports
169 # refer to pkg contents)
170 if globals['__ispkg__']:
171 parent = sys.modules[parent_fqname]
172 assert globals is parent.__dict__
173 return parent
175 i = parent_fqname.rfind('.')
177 # a module outside of a package has no particular import context
178 if i == -1:
179 return None
181 # if a module in a package is performing the import, then return the
182 # package (imports refer to siblings)
183 parent_fqname = parent_fqname[:i]
184 parent = sys.modules[parent_fqname]
185 assert parent.__name__ == parent_fqname
186 return parent
188 def _import_top_module(self, name):
189 # scan sys.path looking for a location in the filesystem that contains
190 # the module, or an Importer object that can import the module.
191 for item in sys.path:
192 if isinstance(item, _StringType):
193 module = self.fs_imp.import_from_dir(item, name)
194 else:
195 module = item.import_top(name)
196 if module:
197 return module
198 return None
200 def _reload_hook(self, module):
201 "Python calls this hook to reload a module."
203 # reloading of a module may or may not be possible (depending on the
204 # importer), but at least we can validate that it's ours to reload
205 importer = module.__dict__.get('__importer__')
206 if not importer:
207 ### oops. now what...
208 pass
210 # okay. it is using the imputil system, and we must delegate it, but
211 # we don't know what to do (yet)
212 ### we should blast the module dict and do another get_code(). need to
213 ### flesh this out and add proper docco...
214 raise SystemError, "reload not yet implemented"
217 class Importer:
218 "Base class for replacing standard import functions."
220 def import_top(self, name):
221 "Import a top-level module."
222 return self._import_one(None, name, name)
224 ######################################################################
226 # PRIVATE METHODS
228 def _finish_import(self, top, parts, fromlist):
229 # if "a.b.c" was provided, then load the ".b.c" portion down from
230 # below the top-level module.
231 bottom = self._load_tail(top, parts)
233 # if the form is "import a.b.c", then return "a"
234 if not fromlist:
235 # no fromlist: return the top of the import tree
236 return top
238 # the top module was imported by self.
240 # this means that the bottom module was also imported by self (just
241 # now, or in the past and we fetched it from sys.modules).
243 # since we imported/handled the bottom module, this means that we can
244 # also handle its fromlist (and reliably use __ispkg__).
246 # if the bottom node is a package, then (potentially) import some
247 # modules.
249 # note: if it is not a package, then "fromlist" refers to names in
250 # the bottom module rather than modules.
251 # note: for a mix of names and modules in the fromlist, we will
252 # import all modules and insert those into the namespace of
253 # the package module. Python will pick up all fromlist names
254 # from the bottom (package) module; some will be modules that
255 # we imported and stored in the namespace, others are expected
256 # to be present already.
257 if bottom.__ispkg__:
258 self._import_fromlist(bottom, fromlist)
260 # if the form is "from a.b import c, d" then return "b"
261 return bottom
263 def _import_one(self, parent, modname, fqname):
264 "Import a single module."
266 # has the module already been imported?
267 try:
268 return sys.modules[fqname]
269 except KeyError:
270 pass
272 # load the module's code, or fetch the module itself
273 result = self.get_code(parent, modname, fqname)
274 if result is None:
275 return None
277 module = self._process_result(result, fqname)
279 # insert the module into its parent
280 if parent:
281 setattr(parent, modname, module)
282 return module
284 def _process_result(self, (ispkg, code, values), fqname):
285 # did get_code() return an actual module? (rather than a code object)
286 is_module = isinstance(code, _ModuleType)
288 # use the returned module, or create a new one to exec code into
289 if is_module:
290 module = code
291 else:
292 module = imp.new_module(fqname)
294 ### record packages a bit differently??
295 module.__importer__ = self
296 module.__ispkg__ = ispkg
298 # insert additional values into the module (before executing the code)
299 module.__dict__.update(values)
301 # the module is almost ready... make it visible
302 sys.modules[fqname] = module
304 # execute the code within the module's namespace
305 if not is_module:
306 try:
307 exec code in module.__dict__
308 except:
309 if fqname in sys.modules:
310 del sys.modules[fqname]
311 raise
313 # fetch from sys.modules instead of returning module directly.
314 # also make module's __name__ agree with fqname, in case
315 # the "exec code in module.__dict__" played games on us.
316 module = sys.modules[fqname]
317 module.__name__ = fqname
318 return module
320 def _load_tail(self, m, parts):
321 """Import the rest of the modules, down from the top-level module.
323 Returns the last module in the dotted list of modules.
325 for part in parts:
326 fqname = "%s.%s" % (m.__name__, part)
327 m = self._import_one(m, part, fqname)
328 if not m:
329 raise ImportError, "No module named " + fqname
330 return m
332 def _import_fromlist(self, package, fromlist):
333 'Import any sub-modules in the "from" list.'
335 # if '*' is present in the fromlist, then look for the '__all__'
336 # variable to find additional items (modules) to import.
337 if '*' in fromlist:
338 fromlist = list(fromlist) + \
339 list(package.__dict__.get('__all__', []))
341 for sub in fromlist:
342 # if the name is already present, then don't try to import it (it
343 # might not be a module!).
344 if sub != '*' and not hasattr(package, sub):
345 subname = "%s.%s" % (package.__name__, sub)
346 submod = self._import_one(package, sub, subname)
347 if not submod:
348 raise ImportError, "cannot import name " + subname
350 def _do_import(self, parent, parts, fromlist):
351 """Attempt to import the module relative to parent.
353 This method is used when the import context specifies that <self>
354 imported the parent module.
356 top_name = parts[0]
357 top_fqname = parent.__name__ + '.' + top_name
358 top_module = self._import_one(parent, top_name, top_fqname)
359 if not top_module:
360 # this importer and parent could not find the module (relatively)
361 return None
363 return self._finish_import(top_module, parts[1:], fromlist)
365 ######################################################################
367 # METHODS TO OVERRIDE
369 def get_code(self, parent, modname, fqname):
370 """Find and retrieve the code for the given module.
372 parent specifies a parent module to define a context for importing. It
373 may be None, indicating no particular context for the search.
375 modname specifies a single module (not dotted) within the parent.
377 fqname specifies the fully-qualified module name. This is a
378 (potentially) dotted name from the "root" of the module namespace
379 down to the modname.
380 If there is no parent, then modname==fqname.
382 This method should return None, or a 3-tuple.
384 * If the module was not found, then None should be returned.
386 * The first item of the 2- or 3-tuple should be the integer 0 or 1,
387 specifying whether the module that was found is a package or not.
389 * The second item is the code object for the module (it will be
390 executed within the new module's namespace). This item can also
391 be a fully-loaded module object (e.g. loaded from a shared lib).
393 * The third item is a dictionary of name/value pairs that will be
394 inserted into new module before the code object is executed. This
395 is provided in case the module's code expects certain values (such
396 as where the module was found). When the second item is a module
397 object, then these names/values will be inserted *after* the module
398 has been loaded/initialized.
400 raise RuntimeError, "get_code not implemented"
403 ######################################################################
405 # Some handy stuff for the Importers
408 # byte-compiled file suffix character
409 _suffix_char = __debug__ and 'c' or 'o'
411 # byte-compiled file suffix
412 _suffix = '.py' + _suffix_char
414 def _compile(pathname, timestamp):
415 """Compile (and cache) a Python source file.
417 The file specified by <pathname> is compiled to a code object and
418 returned.
420 Presuming the appropriate privileges exist, the bytecodes will be
421 saved back to the filesystem for future imports. The source file's
422 modification timestamp must be provided as a Long value.
424 codestring = open(pathname, 'rU').read()
425 if codestring and codestring[-1] != '\n':
426 codestring = codestring + '\n'
427 code = __builtin__.compile(codestring, pathname, 'exec')
429 # try to cache the compiled code
430 try:
431 f = open(pathname + _suffix_char, 'wb')
432 except IOError:
433 pass
434 else:
435 f.write('\0\0\0\0')
436 f.write(struct.pack('<I', timestamp))
437 marshal.dump(code, f)
438 f.flush()
439 f.seek(0, 0)
440 f.write(imp.get_magic())
441 f.close()
443 return code
445 _os_stat = _os_path_join = None
446 def _os_bootstrap():
447 "Set up 'os' module replacement functions for use during import bootstrap."
449 names = sys.builtin_module_names
451 join = None
452 if 'posix' in names:
453 sep = '/'
454 from posix import stat
455 elif 'nt' in names:
456 sep = '\\'
457 from nt import stat
458 elif 'dos' in names:
459 sep = '\\'
460 from dos import stat
461 elif 'os2' in names:
462 sep = '\\'
463 from os2 import stat
464 elif 'mac' in names:
465 from mac import stat
466 def join(a, b):
467 if a == '':
468 return b
469 if ':' not in a:
470 a = ':' + a
471 if a[-1:] != ':':
472 a = a + ':'
473 return a + b
474 else:
475 raise ImportError, 'no os specific module found'
477 if join is None:
478 def join(a, b, sep=sep):
479 if a == '':
480 return b
481 lastchar = a[-1:]
482 if lastchar == '/' or lastchar == sep:
483 return a + b
484 return a + sep + b
486 global _os_stat
487 _os_stat = stat
489 global _os_path_join
490 _os_path_join = join
492 def _os_path_isdir(pathname):
493 "Local replacement for os.path.isdir()."
494 try:
495 s = _os_stat(pathname)
496 except OSError:
497 return None
498 return (s.st_mode & 0170000) == 0040000
500 def _timestamp(pathname):
501 "Return the file modification time as a Long."
502 try:
503 s = _os_stat(pathname)
504 except OSError:
505 return None
506 return long(s.st_mtime)
509 ######################################################################
511 # Emulate the import mechanism for builtin and frozen modules
513 class BuiltinImporter(Importer):
514 def get_code(self, parent, modname, fqname):
515 if parent:
516 # these modules definitely do not occur within a package context
517 return None
519 # look for the module
520 if imp.is_builtin(modname):
521 type = imp.C_BUILTIN
522 elif imp.is_frozen(modname):
523 type = imp.PY_FROZEN
524 else:
525 # not found
526 return None
528 # got it. now load and return it.
529 module = imp.load_module(modname, None, modname, ('', '', type))
530 return 0, module, { }
533 ######################################################################
535 # Internal importer used for importing from the filesystem
537 class _FilesystemImporter(Importer):
538 def __init__(self):
539 self.suffixes = [ ]
541 def add_suffix(self, suffix, importFunc):
542 assert hasattr(importFunc, '__call__')
543 self.suffixes.append((suffix, importFunc))
545 def import_from_dir(self, dir, fqname):
546 result = self._import_pathname(_os_path_join(dir, fqname), fqname)
547 if result:
548 return self._process_result(result, fqname)
549 return None
551 def get_code(self, parent, modname, fqname):
552 # This importer is never used with an empty parent. Its existence is
553 # private to the ImportManager. The ImportManager uses the
554 # import_from_dir() method to import top-level modules/packages.
555 # This method is only used when we look for a module within a package.
556 assert parent
558 for submodule_path in parent.__path__:
559 code = self._import_pathname(_os_path_join(submodule_path, modname), fqname)
560 if code is not None:
561 return code
562 return self._import_pathname(_os_path_join(parent.__pkgdir__, modname),
563 fqname)
565 def _import_pathname(self, pathname, fqname):
566 if _os_path_isdir(pathname):
567 result = self._import_pathname(_os_path_join(pathname, '__init__'),
568 fqname)
569 if result:
570 values = result[2]
571 values['__pkgdir__'] = pathname
572 values['__path__'] = [ pathname ]
573 return 1, result[1], values
574 return None
576 for suffix, importFunc in self.suffixes:
577 filename = pathname + suffix
578 try:
579 finfo = _os_stat(filename)
580 except OSError:
581 pass
582 else:
583 return importFunc(filename, finfo, fqname)
584 return None
586 ######################################################################
588 # SUFFIX-BASED IMPORTERS
591 def py_suffix_importer(filename, finfo, fqname):
592 file = filename[:-3] + _suffix
593 t_py = long(finfo[8])
594 t_pyc = _timestamp(file)
596 code = None
597 if t_pyc is not None and t_pyc >= t_py:
598 f = open(file, 'rb')
599 if f.read(4) == imp.get_magic():
600 t = struct.unpack('<I', f.read(4))[0]
601 if t == t_py:
602 code = marshal.load(f)
603 f.close()
604 if code is None:
605 file = filename
606 code = _compile(file, t_py)
608 return 0, code, { '__file__' : file }
610 class DynLoadSuffixImporter:
611 def __init__(self, desc):
612 self.desc = desc
614 def import_file(self, filename, finfo, fqname):
615 fp = open(filename, self.desc[1])
616 module = imp.load_module(fqname, fp, filename, self.desc)
617 module.__file__ = filename
618 return 0, module, { }
621 ######################################################################
623 def _print_importers():
624 items = sys.modules.items()
625 items.sort()
626 for name, module in items:
627 if module:
628 print name, module.__dict__.get('__importer__', '-- no importer')
629 else:
630 print name, '-- non-existent module'
632 def _test_revamp():
633 ImportManager().install()
634 sys.path.insert(0, BuiltinImporter())
636 ######################################################################
639 # TODO
641 # from Finn Bock:
642 # type(sys) is not a module in Jython. what to use instead?
643 # imp.C_EXTENSION is not in Jython. same for get_suffixes and new_module
645 # given foo.py of:
646 # import sys
647 # sys.modules['foo'] = sys
649 # ---- standard import mechanism
650 # >>> import foo
651 # >>> foo
652 # <module 'sys' (built-in)>
654 # ---- revamped import mechanism
655 # >>> import imputil
656 # >>> imputil._test_revamp()
657 # >>> import foo
658 # >>> foo
659 # <module 'foo' from 'foo.py'>
662 # from MAL:
663 # should BuiltinImporter exist in sys.path or hard-wired in ImportManager?
664 # need __path__ processing
665 # performance
666 # move chaining to a subclass [gjs: it's been nuked]
667 # deinstall should be possible
668 # query mechanism needed: is a specific Importer installed?
669 # py/pyc/pyo piping hooks to filter/process these files
670 # wish list:
671 # distutils importer hooked to list of standard Internet repositories
672 # module->file location mapper to speed FS-based imports
673 # relative imports
674 # keep chaining so that it can play nice with other import hooks
676 # from Gordon:
677 # push MAL's mapper into sys.path[0] as a cache (hard-coded for apps)
679 # from Guido:
680 # need to change sys.* references for rexec environs
681 # need hook for MAL's walk-me-up import strategy, or Tim's absolute strategy
682 # watch out for sys.modules[...] is None
683 # flag to force absolute imports? (speeds _determine_import_context and
684 # checking for a relative module)
685 # insert names of archives into sys.path (see quote below)
686 # note: reload does NOT blast module dict
687 # shift import mechanisms and policies around; provide for hooks, overrides
688 # (see quote below)
689 # add get_source stuff
690 # get_topcode and get_subcode
691 # CRLF handling in _compile
692 # race condition in _compile
693 # refactoring of os.py to deal with _os_bootstrap problem
694 # any special handling to do for importing a module with a SyntaxError?
695 # (e.g. clean up the traceback)
696 # implement "domain" for path-type functionality using pkg namespace
697 # (rather than FS-names like __path__)
698 # don't use the word "private"... maybe "internal"
701 # Guido's comments on sys.path caching:
703 # We could cache this in a dictionary: the ImportManager can have a
704 # cache dict mapping pathnames to importer objects, and a separate
705 # method for coming up with an importer given a pathname that's not yet
706 # in the cache. The method should do a stat and/or look at the
707 # extension to decide which importer class to use; you can register new
708 # importer classes by registering a suffix or a Boolean function, plus a
709 # class. If you register a new importer class, the cache is zapped.
710 # The cache is independent from sys.path (but maintained per
711 # ImportManager instance) so that rearrangements of sys.path do the
712 # right thing. If a path is dropped from sys.path the corresponding
713 # cache entry is simply no longer used.
715 # My/Guido's comments on factoring ImportManager and Importer:
717 # > However, we still have a tension occurring here:
719 # > 1) implementing policy in ImportManager assists in single-point policy
720 # > changes for app/rexec situations
721 # > 2) implementing policy in Importer assists in package-private policy
722 # > changes for normal, operating conditions
724 # > I'll see if I can sort out a way to do this. Maybe the Importer class will
725 # > implement the methods (which can be overridden to change policy) by
726 # > delegating to ImportManager.
728 # Maybe also think about what kind of policies an Importer would be
729 # likely to want to change. I have a feeling that a lot of the code
730 # there is actually not so much policy but a *necessity* to get things
731 # working given the calling conventions for the __import__ hook: whether
732 # to return the head or tail of a dotted name, or when to do the "finish
733 # fromlist" stuff.