Backport importlib to at least Python 2.5 by getting rid of use of str.format.
[python.git] / Lib / encodings / __init__.py
blobaea7c5e18385cd28bec55947f05578dfb5549007
1 """ Standard "encodings" Package
3 Standard Python encoding modules are stored in this package
4 directory.
6 Codec modules must have names corresponding to normalized encoding
7 names as defined in the normalize_encoding() function below, e.g.
8 'utf-8' must be implemented by the module 'utf_8.py'.
10 Each codec module must export the following interface:
12 * getregentry() -> codecs.CodecInfo object
13 The getregentry() API must a CodecInfo object with encoder, decoder,
14 incrementalencoder, incrementaldecoder, streamwriter and streamreader
15 atttributes which adhere to the Python Codec Interface Standard.
17 In addition, a module may optionally also define the following
18 APIs which are then used by the package's codec search function:
20 * getaliases() -> sequence of encoding name strings to use as aliases
22 Alias names returned by getaliases() must be normalized encoding
23 names as defined by normalize_encoding().
25 Written by Marc-Andre Lemburg (mal@lemburg.com).
27 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
29 """#"
31 import codecs
32 from encodings import aliases
33 import __builtin__
35 _cache = {}
36 _unknown = '--unknown--'
37 _import_tail = ['*']
38 _norm_encoding_map = (' . '
39 '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
40 ' abcdefghijklmnopqrstuvwxyz '
41 ' '
42 ' '
43 ' ')
44 _aliases = aliases.aliases
46 class CodecRegistryError(LookupError, SystemError):
47 pass
49 def normalize_encoding(encoding):
51 """ Normalize an encoding name.
53 Normalization works as follows: all non-alphanumeric
54 characters except the dot used for Python package names are
55 collapsed and replaced with a single underscore, e.g. ' -;#'
56 becomes '_'. Leading and trailing underscores are removed.
58 Note that encoding names should be ASCII only; if they do use
59 non-ASCII characters, these must be Latin-1 compatible.
61 """
62 # Make sure we have an 8-bit string, because .translate() works
63 # differently for Unicode strings.
64 if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):
65 # Note that .encode('latin-1') does *not* use the codec
66 # registry, so this call doesn't recurse. (See unicodeobject.c
67 # PyUnicode_AsEncodedString() for details)
68 encoding = encoding.encode('latin-1')
69 return '_'.join(encoding.translate(_norm_encoding_map).split())
71 def search_function(encoding):
73 # Cache lookup
74 entry = _cache.get(encoding, _unknown)
75 if entry is not _unknown:
76 return entry
78 # Import the module:
80 # First try to find an alias for the normalized encoding
81 # name and lookup the module using the aliased name, then try to
82 # lookup the module using the standard import scheme, i.e. first
83 # try in the encodings package, then at top-level.
85 norm_encoding = normalize_encoding(encoding)
86 aliased_encoding = _aliases.get(norm_encoding) or \
87 _aliases.get(norm_encoding.replace('.', '_'))
88 if aliased_encoding is not None:
89 modnames = [aliased_encoding,
90 norm_encoding]
91 else:
92 modnames = [norm_encoding]
93 for modname in modnames:
94 if not modname or '.' in modname:
95 continue
96 try:
97 # Import is absolute to prevent the possibly malicious import of a
98 # module with side-effects that is not in the 'encodings' package.
99 mod = __import__('encodings.' + modname, fromlist=_import_tail,
100 level=0)
101 except ImportError:
102 pass
103 else:
104 break
105 else:
106 mod = None
108 try:
109 getregentry = mod.getregentry
110 except AttributeError:
111 # Not a codec module
112 mod = None
114 if mod is None:
115 # Cache misses
116 _cache[encoding] = None
117 return None
119 # Now ask the module for the registry entry
120 entry = getregentry()
121 if not isinstance(entry, codecs.CodecInfo):
122 if not 4 <= len(entry) <= 7:
123 raise CodecRegistryError,\
124 'module "%s" (%s) failed to register' % \
125 (mod.__name__, mod.__file__)
126 if not callable(entry[0]) or \
127 not callable(entry[1]) or \
128 (entry[2] is not None and not callable(entry[2])) or \
129 (entry[3] is not None and not callable(entry[3])) or \
130 (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
131 (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
132 raise CodecRegistryError,\
133 'incompatible codecs in module "%s" (%s)' % \
134 (mod.__name__, mod.__file__)
135 if len(entry)<7 or entry[6] is None:
136 entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
137 entry = codecs.CodecInfo(*entry)
139 # Cache the codec registry entry
140 _cache[encoding] = entry
142 # Register its aliases (without overwriting previously registered
143 # aliases)
144 try:
145 codecaliases = mod.getaliases()
146 except AttributeError:
147 pass
148 else:
149 for alias in codecaliases:
150 if not _aliases.has_key(alias):
151 _aliases[alias] = modname
153 # Return the registry entry
154 return entry
156 # Register the search_function in the Python codec registry
157 codecs.register(search_function)