Misc. changes, including documenting the ability to specify a class attribute in...
[python.git] / Lib / imputil.py
blobe6ad7ecac4ea0ce2e779d3a68f1326947bc8227b
1 """
2 Import utilities
4 Exported classes:
5 ImportManager Manage the import process
7 Importer Base class for replacing standard import functions
8 BuiltinImporter Emulate the import mechanism for builtin and frozen modules
10 DynLoadSuffixImporter
11 """
13 # note: avoid importing non-builtin modules
14 import imp ### not available in JPython?
15 import sys
16 import __builtin__
18 # for the DirectoryImporter
19 import struct
20 import marshal
22 __all__ = ["ImportManager","Importer","BuiltinImporter"]
24 _StringType = type('')
25 _ModuleType = type(sys) ### doesn't work in JPython...
27 class ImportManager:
28 "Manage the import process."
30 def install(self, namespace=vars(__builtin__)):
31 "Install this ImportManager into the specified namespace."
33 if isinstance(namespace, _ModuleType):
34 namespace = vars(namespace)
36 # Note: we have no notion of "chaining"
38 # Record the previous import hook, then install our own.
39 self.previous_importer = namespace['__import__']
40 self.namespace = namespace
41 namespace['__import__'] = self._import_hook
43 ### fix this
44 #namespace['reload'] = self._reload_hook
46 def uninstall(self):
47 "Restore the previous import mechanism."
48 self.namespace['__import__'] = self.previous_importer
50 def add_suffix(self, suffix, importFunc):
51 assert callable(importFunc)
52 self.fs_imp.add_suffix(suffix, importFunc)
54 ######################################################################
56 # PRIVATE METHODS
59 clsFilesystemImporter = None
61 def __init__(self, fs_imp=None):
62 # we're definitely going to be importing something in the future,
63 # so let's just load the OS-related facilities.
64 if not _os_stat:
65 _os_bootstrap()
67 # This is the Importer that we use for grabbing stuff from the
68 # filesystem. It defines one more method (import_from_dir) for our use.
69 if fs_imp is None:
70 cls = self.clsFilesystemImporter or _FilesystemImporter
71 fs_imp = cls()
72 self.fs_imp = fs_imp
74 # Initialize the set of suffixes that we recognize and import.
75 # The default will import dynamic-load modules first, followed by
76 # .py files (or a .py file's cached bytecode)
77 for desc in imp.get_suffixes():
78 if desc[2] == imp.C_EXTENSION:
79 self.add_suffix(desc[0],
80 DynLoadSuffixImporter(desc).import_file)
81 self.add_suffix('.py', py_suffix_importer)
83 def _import_hook(self, fqname, globals=None, locals=None, fromlist=None):
84 """Python calls this hook to locate and import a module."""
86 parts = fqname.split('.')
88 # determine the context of this import
89 parent = self._determine_import_context(globals)
91 # if there is a parent, then its importer should manage this import
92 if parent:
93 module = parent.__importer__._do_import(parent, parts, fromlist)
94 if module:
95 return module
97 # has the top module already been imported?
98 try:
99 top_module = sys.modules[parts[0]]
100 except KeyError:
102 # look for the topmost module
103 top_module = self._import_top_module(parts[0])
104 if not top_module:
105 # the topmost module wasn't found at all.
106 raise ImportError, 'No module named ' + fqname
108 # fast-path simple imports
109 if len(parts) == 1:
110 if not fromlist:
111 return top_module
113 if not top_module.__dict__.get('__ispkg__'):
114 # __ispkg__ isn't defined (the module was not imported by us),
115 # or it is zero.
117 # In the former case, there is no way that we could import
118 # sub-modules that occur in the fromlist (but we can't raise an
119 # error because it may just be names) because we don't know how
120 # to deal with packages that were imported by other systems.
122 # In the latter case (__ispkg__ == 0), there can't be any sub-
123 # modules present, so we can just return.
125 # In both cases, since len(parts) == 1, the top_module is also
126 # the "bottom" which is the defined return when a fromlist
127 # exists.
128 return top_module
130 importer = top_module.__dict__.get('__importer__')
131 if importer:
132 return importer._finish_import(top_module, parts[1:], fromlist)
134 # Grrr, some people "import os.path"
135 if len(parts) == 2 and hasattr(top_module, parts[1]):
136 return top_module
138 # If the importer does not exist, then we have to bail. A missing
139 # importer means that something else imported the module, and we have
140 # no knowledge of how to get sub-modules out of the thing.
141 raise ImportError, 'No module named ' + fqname
143 def _determine_import_context(self, globals):
144 """Returns the context in which a module should be imported.
146 The context could be a loaded (package) module and the imported module
147 will be looked for within that package. The context could also be None,
148 meaning there is no context -- the module should be looked for as a
149 "top-level" module.
152 if not globals or not globals.get('__importer__'):
153 # globals does not refer to one of our modules or packages. That
154 # implies there is no relative import context (as far as we are
155 # concerned), and it should just pick it off the standard path.
156 return None
158 # The globals refer to a module or package of ours. It will define
159 # the context of the new import. Get the module/package fqname.
160 parent_fqname = globals['__name__']
162 # if a package is performing the import, then return itself (imports
163 # refer to pkg contents)
164 if globals['__ispkg__']:
165 parent = sys.modules[parent_fqname]
166 assert globals is parent.__dict__
167 return parent
169 i = parent_fqname.rfind('.')
171 # a module outside of a package has no particular import context
172 if i == -1:
173 return None
175 # if a module in a package is performing the import, then return the
176 # package (imports refer to siblings)
177 parent_fqname = parent_fqname[:i]
178 parent = sys.modules[parent_fqname]
179 assert parent.__name__ == parent_fqname
180 return parent
182 def _import_top_module(self, name):
183 # scan sys.path looking for a location in the filesystem that contains
184 # the module, or an Importer object that can import the module.
185 for item in sys.path:
186 if isinstance(item, _StringType):
187 module = self.fs_imp.import_from_dir(item, name)
188 else:
189 module = item.import_top(name)
190 if module:
191 return module
192 return None
194 def _reload_hook(self, module):
195 "Python calls this hook to reload a module."
197 # reloading of a module may or may not be possible (depending on the
198 # importer), but at least we can validate that it's ours to reload
199 importer = module.__dict__.get('__importer__')
200 if not importer:
201 ### oops. now what...
202 pass
204 # okay. it is using the imputil system, and we must delegate it, but
205 # we don't know what to do (yet)
206 ### we should blast the module dict and do another get_code(). need to
207 ### flesh this out and add proper docco...
208 raise SystemError, "reload not yet implemented"
211 class Importer:
212 "Base class for replacing standard import functions."
214 def import_top(self, name):
215 "Import a top-level module."
216 return self._import_one(None, name, name)
218 ######################################################################
220 # PRIVATE METHODS
222 def _finish_import(self, top, parts, fromlist):
223 # if "a.b.c" was provided, then load the ".b.c" portion down from
224 # below the top-level module.
225 bottom = self._load_tail(top, parts)
227 # if the form is "import a.b.c", then return "a"
228 if not fromlist:
229 # no fromlist: return the top of the import tree
230 return top
232 # the top module was imported by self.
234 # this means that the bottom module was also imported by self (just
235 # now, or in the past and we fetched it from sys.modules).
237 # since we imported/handled the bottom module, this means that we can
238 # also handle its fromlist (and reliably use __ispkg__).
240 # if the bottom node is a package, then (potentially) import some
241 # modules.
243 # note: if it is not a package, then "fromlist" refers to names in
244 # the bottom module rather than modules.
245 # note: for a mix of names and modules in the fromlist, we will
246 # import all modules and insert those into the namespace of
247 # the package module. Python will pick up all fromlist names
248 # from the bottom (package) module; some will be modules that
249 # we imported and stored in the namespace, others are expected
250 # to be present already.
251 if bottom.__ispkg__:
252 self._import_fromlist(bottom, fromlist)
254 # if the form is "from a.b import c, d" then return "b"
255 return bottom
257 def _import_one(self, parent, modname, fqname):
258 "Import a single module."
260 # has the module already been imported?
261 try:
262 return sys.modules[fqname]
263 except KeyError:
264 pass
266 # load the module's code, or fetch the module itself
267 result = self.get_code(parent, modname, fqname)
268 if result is None:
269 return None
271 module = self._process_result(result, fqname)
273 # insert the module into its parent
274 if parent:
275 setattr(parent, modname, module)
276 return module
278 def _process_result(self, (ispkg, code, values), fqname):
279 # did get_code() return an actual module? (rather than a code object)
280 is_module = isinstance(code, _ModuleType)
282 # use the returned module, or create a new one to exec code into
283 if is_module:
284 module = code
285 else:
286 module = imp.new_module(fqname)
288 ### record packages a bit differently??
289 module.__importer__ = self
290 module.__ispkg__ = ispkg
292 # insert additional values into the module (before executing the code)
293 module.__dict__.update(values)
295 # the module is almost ready... make it visible
296 sys.modules[fqname] = module
298 # execute the code within the module's namespace
299 if not is_module:
300 try:
301 exec code in module.__dict__
302 except:
303 if fqname in sys.modules:
304 del sys.modules[fqname]
305 raise
307 # fetch from sys.modules instead of returning module directly.
308 # also make module's __name__ agree with fqname, in case
309 # the "exec code in module.__dict__" played games on us.
310 module = sys.modules[fqname]
311 module.__name__ = fqname
312 return module
314 def _load_tail(self, m, parts):
315 """Import the rest of the modules, down from the top-level module.
317 Returns the last module in the dotted list of modules.
319 for part in parts:
320 fqname = "%s.%s" % (m.__name__, part)
321 m = self._import_one(m, part, fqname)
322 if not m:
323 raise ImportError, "No module named " + fqname
324 return m
326 def _import_fromlist(self, package, fromlist):
327 'Import any sub-modules in the "from" list.'
329 # if '*' is present in the fromlist, then look for the '__all__'
330 # variable to find additional items (modules) to import.
331 if '*' in fromlist:
332 fromlist = list(fromlist) + \
333 list(package.__dict__.get('__all__', []))
335 for sub in fromlist:
336 # if the name is already present, then don't try to import it (it
337 # might not be a module!).
338 if sub != '*' and not hasattr(package, sub):
339 subname = "%s.%s" % (package.__name__, sub)
340 submod = self._import_one(package, sub, subname)
341 if not submod:
342 raise ImportError, "cannot import name " + subname
344 def _do_import(self, parent, parts, fromlist):
345 """Attempt to import the module relative to parent.
347 This method is used when the import context specifies that <self>
348 imported the parent module.
350 top_name = parts[0]
351 top_fqname = parent.__name__ + '.' + top_name
352 top_module = self._import_one(parent, top_name, top_fqname)
353 if not top_module:
354 # this importer and parent could not find the module (relatively)
355 return None
357 return self._finish_import(top_module, parts[1:], fromlist)
359 ######################################################################
361 # METHODS TO OVERRIDE
363 def get_code(self, parent, modname, fqname):
364 """Find and retrieve the code for the given module.
366 parent specifies a parent module to define a context for importing. It
367 may be None, indicating no particular context for the search.
369 modname specifies a single module (not dotted) within the parent.
371 fqname specifies the fully-qualified module name. This is a
372 (potentially) dotted name from the "root" of the module namespace
373 down to the modname.
374 If there is no parent, then modname==fqname.
376 This method should return None, or a 3-tuple.
378 * If the module was not found, then None should be returned.
380 * The first item of the 2- or 3-tuple should be the integer 0 or 1,
381 specifying whether the module that was found is a package or not.
383 * The second item is the code object for the module (it will be
384 executed within the new module's namespace). This item can also
385 be a fully-loaded module object (e.g. loaded from a shared lib).
387 * The third item is a dictionary of name/value pairs that will be
388 inserted into new module before the code object is executed. This
389 is provided in case the module's code expects certain values (such
390 as where the module was found). When the second item is a module
391 object, then these names/values will be inserted *after* the module
392 has been loaded/initialized.
394 raise RuntimeError, "get_code not implemented"
397 ######################################################################
399 # Some handy stuff for the Importers
402 # byte-compiled file suffix character
403 _suffix_char = __debug__ and 'c' or 'o'
405 # byte-compiled file suffix
406 _suffix = '.py' + _suffix_char
408 def _compile(pathname, timestamp):
409 """Compile (and cache) a Python source file.
411 The file specified by <pathname> is compiled to a code object and
412 returned.
414 Presuming the appropriate privileges exist, the bytecodes will be
415 saved back to the filesystem for future imports. The source file's
416 modification timestamp must be provided as a Long value.
418 codestring = open(pathname, 'rU').read()
419 if codestring and codestring[-1] != '\n':
420 codestring = codestring + '\n'
421 code = __builtin__.compile(codestring, pathname, 'exec')
423 # try to cache the compiled code
424 try:
425 f = open(pathname + _suffix_char, 'wb')
426 except IOError:
427 pass
428 else:
429 f.write('\0\0\0\0')
430 f.write(struct.pack('<I', timestamp))
431 marshal.dump(code, f)
432 f.flush()
433 f.seek(0, 0)
434 f.write(imp.get_magic())
435 f.close()
437 return code
439 _os_stat = _os_path_join = None
440 def _os_bootstrap():
441 "Set up 'os' module replacement functions for use during import bootstrap."
443 names = sys.builtin_module_names
445 join = None
446 if 'posix' in names:
447 sep = '/'
448 from posix import stat
449 elif 'nt' in names:
450 sep = '\\'
451 from nt import stat
452 elif 'dos' in names:
453 sep = '\\'
454 from dos import stat
455 elif 'os2' in names:
456 sep = '\\'
457 from os2 import stat
458 elif 'mac' in names:
459 from mac import stat
460 def join(a, b):
461 if a == '':
462 return b
463 if ':' not in a:
464 a = ':' + a
465 if a[-1:] != ':':
466 a = a + ':'
467 return a + b
468 else:
469 raise ImportError, 'no os specific module found'
471 if join is None:
472 def join(a, b, sep=sep):
473 if a == '':
474 return b
475 lastchar = a[-1:]
476 if lastchar == '/' or lastchar == sep:
477 return a + b
478 return a + sep + b
480 global _os_stat
481 _os_stat = stat
483 global _os_path_join
484 _os_path_join = join
486 def _os_path_isdir(pathname):
487 "Local replacement for os.path.isdir()."
488 try:
489 s = _os_stat(pathname)
490 except OSError:
491 return None
492 return (s.st_mode & 0170000) == 0040000
494 def _timestamp(pathname):
495 "Return the file modification time as a Long."
496 try:
497 s = _os_stat(pathname)
498 except OSError:
499 return None
500 return long(s.st_mtime)
503 ######################################################################
505 # Emulate the import mechanism for builtin and frozen modules
507 class BuiltinImporter(Importer):
508 def get_code(self, parent, modname, fqname):
509 if parent:
510 # these modules definitely do not occur within a package context
511 return None
513 # look for the module
514 if imp.is_builtin(modname):
515 type = imp.C_BUILTIN
516 elif imp.is_frozen(modname):
517 type = imp.PY_FROZEN
518 else:
519 # not found
520 return None
522 # got it. now load and return it.
523 module = imp.load_module(modname, None, modname, ('', '', type))
524 return 0, module, { }
527 ######################################################################
529 # Internal importer used for importing from the filesystem
531 class _FilesystemImporter(Importer):
532 def __init__(self):
533 self.suffixes = [ ]
535 def add_suffix(self, suffix, importFunc):
536 assert callable(importFunc)
537 self.suffixes.append((suffix, importFunc))
539 def import_from_dir(self, dir, fqname):
540 result = self._import_pathname(_os_path_join(dir, fqname), fqname)
541 if result:
542 return self._process_result(result, fqname)
543 return None
545 def get_code(self, parent, modname, fqname):
546 # This importer is never used with an empty parent. Its existence is
547 # private to the ImportManager. The ImportManager uses the
548 # import_from_dir() method to import top-level modules/packages.
549 # This method is only used when we look for a module within a package.
550 assert parent
552 return self._import_pathname(_os_path_join(parent.__pkgdir__, modname),
553 fqname)
555 def _import_pathname(self, pathname, fqname):
556 if _os_path_isdir(pathname):
557 result = self._import_pathname(_os_path_join(pathname, '__init__'),
558 fqname)
559 if result:
560 values = result[2]
561 values['__pkgdir__'] = pathname
562 values['__path__'] = [ pathname ]
563 return 1, result[1], values
564 return None
566 for suffix, importFunc in self.suffixes:
567 filename = pathname + suffix
568 try:
569 finfo = _os_stat(filename)
570 except OSError:
571 pass
572 else:
573 return importFunc(filename, finfo, fqname)
574 return None
576 ######################################################################
578 # SUFFIX-BASED IMPORTERS
581 def py_suffix_importer(filename, finfo, fqname):
582 file = filename[:-3] + _suffix
583 t_py = long(finfo[8])
584 t_pyc = _timestamp(file)
586 code = None
587 if t_pyc is not None and t_pyc >= t_py:
588 f = open(file, 'rb')
589 if f.read(4) == imp.get_magic():
590 t = struct.unpack('<I', f.read(4))[0]
591 if t == t_py:
592 code = marshal.load(f)
593 f.close()
594 if code is None:
595 file = filename
596 code = _compile(file, t_py)
598 return 0, code, { '__file__' : file }
600 class DynLoadSuffixImporter:
601 def __init__(self, desc):
602 self.desc = desc
604 def import_file(self, filename, finfo, fqname):
605 fp = open(filename, self.desc[1])
606 module = imp.load_module(fqname, fp, filename, self.desc)
607 module.__file__ = filename
608 return 0, module, { }
611 ######################################################################
613 def _print_importers():
614 items = sys.modules.items()
615 items.sort()
616 for name, module in items:
617 if module:
618 print name, module.__dict__.get('__importer__', '-- no importer')
619 else:
620 print name, '-- non-existent module'
622 def _test_revamp():
623 ImportManager().install()
624 sys.path.insert(0, BuiltinImporter())
626 ######################################################################
629 # TODO
631 # from Finn Bock:
632 # type(sys) is not a module in JPython. what to use instead?
633 # imp.C_EXTENSION is not in JPython. same for get_suffixes and new_module
635 # given foo.py of:
636 # import sys
637 # sys.modules['foo'] = sys
639 # ---- standard import mechanism
640 # >>> import foo
641 # >>> foo
642 # <module 'sys' (built-in)>
644 # ---- revamped import mechanism
645 # >>> import imputil
646 # >>> imputil._test_revamp()
647 # >>> import foo
648 # >>> foo
649 # <module 'foo' from 'foo.py'>
652 # from MAL:
653 # should BuiltinImporter exist in sys.path or hard-wired in ImportManager?
654 # need __path__ processing
655 # performance
656 # move chaining to a subclass [gjs: it's been nuked]
657 # deinstall should be possible
658 # query mechanism needed: is a specific Importer installed?
659 # py/pyc/pyo piping hooks to filter/process these files
660 # wish list:
661 # distutils importer hooked to list of standard Internet repositories
662 # module->file location mapper to speed FS-based imports
663 # relative imports
664 # keep chaining so that it can play nice with other import hooks
666 # from Gordon:
667 # push MAL's mapper into sys.path[0] as a cache (hard-coded for apps)
669 # from Guido:
670 # need to change sys.* references for rexec environs
671 # need hook for MAL's walk-me-up import strategy, or Tim's absolute strategy
672 # watch out for sys.modules[...] is None
673 # flag to force absolute imports? (speeds _determine_import_context and
674 # checking for a relative module)
675 # insert names of archives into sys.path (see quote below)
676 # note: reload does NOT blast module dict
677 # shift import mechanisms and policies around; provide for hooks, overrides
678 # (see quote below)
679 # add get_source stuff
680 # get_topcode and get_subcode
681 # CRLF handling in _compile
682 # race condition in _compile
683 # refactoring of os.py to deal with _os_bootstrap problem
684 # any special handling to do for importing a module with a SyntaxError?
685 # (e.g. clean up the traceback)
686 # implement "domain" for path-type functionality using pkg namespace
687 # (rather than FS-names like __path__)
688 # don't use the word "private"... maybe "internal"
691 # Guido's comments on sys.path caching:
693 # We could cache this in a dictionary: the ImportManager can have a
694 # cache dict mapping pathnames to importer objects, and a separate
695 # method for coming up with an importer given a pathname that's not yet
696 # in the cache. The method should do a stat and/or look at the
697 # extension to decide which importer class to use; you can register new
698 # importer classes by registering a suffix or a Boolean function, plus a
699 # class. If you register a new importer class, the cache is zapped.
700 # The cache is independent from sys.path (but maintained per
701 # ImportManager instance) so that rearrangements of sys.path do the
702 # right thing. If a path is dropped from sys.path the corresponding
703 # cache entry is simply no longer used.
705 # My/Guido's comments on factoring ImportManager and Importer:
707 # > However, we still have a tension occurring here:
709 # > 1) implementing policy in ImportManager assists in single-point policy
710 # > changes for app/rexec situations
711 # > 2) implementing policy in Importer assists in package-private policy
712 # > changes for normal, operating conditions
714 # > I'll see if I can sort out a way to do this. Maybe the Importer class will
715 # > implement the methods (which can be overridden to change policy) by
716 # > delegating to ImportManager.
718 # Maybe also think about what kind of policies an Importer would be
719 # likely to want to change. I have a feeling that a lot of the code
720 # there is actually not so much policy but a *necessity* to get things
721 # working given the calling conventions for the __import__ hook: whether
722 # to return the head or tail of a dotted name, or when to do the "finish
723 # fromlist" stuff.