Propagate exceptions from shutdown() if raiseExceptions is not set.
[python.git] / Lib / encodings / __init__.py
blob8a50ac13e84484cc2d3e67f31ac9e3bf5d26cd72
1 """ Standard "encodings" Package
3 Standard Python encoding modules are stored in this package
4 directory.
6 Codec modules must have names corresponding to normalized encoding
7 names as defined in the normalize_encoding() function below, e.g.
8 'utf-8' must be implemented by the module 'utf_8.py'.
10 Each codec module must export the following interface:
12 * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
13 The getregentry() API must return callable objects which adhere to
14 the Python Codec Interface Standard.
16 In addition, a module may optionally also define the following
17 APIs which are then used by the package's codec search function:
19 * getaliases() -> sequence of encoding name strings to use as aliases
21 Alias names returned by getaliases() must be normalized encoding
22 names as defined by normalize_encoding().
24 Written by Marc-Andre Lemburg (mal@lemburg.com).
26 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
28 """#"
30 import codecs, types, aliases
32 _cache = {}
33 _unknown = '--unknown--'
34 _import_tail = ['*']
35 _norm_encoding_map = (' . '
36 '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
37 ' abcdefghijklmnopqrstuvwxyz '
38 ' '
39 ' '
40 ' ')
41 _aliases = aliases.aliases
43 class CodecRegistryError(LookupError, SystemError):
44 pass
46 def normalize_encoding(encoding):
48 """ Normalize an encoding name.
50 Normalization works as follows: all non-alphanumeric
51 characters except the dot used for Python package names are
52 collapsed and replaced with a single underscore, e.g. ' -;#'
53 becomes '_'. Leading and trailing underscores are removed.
55 Note that encoding names should be ASCII only; if they do use
56 non-ASCII characters, these must be Latin-1 compatible.
58 """
59 # Make sure we have an 8-bit string, because .translate() works
60 # differently for Unicode strings.
61 if type(encoding) is types.UnicodeType:
62 # Note that .encode('latin-1') does *not* use the codec
63 # registry, so this call doesn't recurse. (See unicodeobject.c
64 # PyUnicode_AsEncodedString() for details)
65 encoding = encoding.encode('latin-1')
66 return '_'.join(encoding.translate(_norm_encoding_map).split())
68 def search_function(encoding):
70 # Cache lookup
71 entry = _cache.get(encoding, _unknown)
72 if entry is not _unknown:
73 return entry
75 # Import the module:
77 # First try to find an alias for the normalized encoding
78 # name and lookup the module using the aliased name, then try to
79 # lookup the module using the standard import scheme, i.e. first
80 # try in the encodings package, then at top-level.
82 norm_encoding = normalize_encoding(encoding)
83 aliased_encoding = _aliases.get(norm_encoding) or \
84 _aliases.get(norm_encoding.replace('.', '_'))
85 if aliased_encoding is not None:
86 modnames = [aliased_encoding,
87 norm_encoding]
88 else:
89 modnames = [norm_encoding]
90 for modname in modnames:
91 if not modname:
92 continue
93 try:
94 mod = __import__(modname,
95 globals(), locals(), _import_tail)
96 except ImportError:
97 pass
98 else:
99 break
100 else:
101 mod = None
103 try:
104 getregentry = mod.getregentry
105 except AttributeError:
106 # Not a codec module
107 mod = None
109 if mod is None:
110 # Cache misses
111 _cache[encoding] = None
112 return None
114 # Now ask the module for the registry entry
115 entry = tuple(getregentry())
116 if len(entry) != 4:
117 raise CodecRegistryError,\
118 'module "%s" (%s) failed to register' % \
119 (mod.__name__, mod.__file__)
120 for obj in entry:
121 if not callable(obj):
122 raise CodecRegistryError,\
123 'incompatible codecs in module "%s" (%s)' % \
124 (mod.__name__, mod.__file__)
126 # Cache the codec registry entry
127 _cache[encoding] = entry
129 # Register its aliases (without overwriting previously registered
130 # aliases)
131 try:
132 codecaliases = mod.getaliases()
133 except AttributeError:
134 pass
135 else:
136 for alias in codecaliases:
137 if not _aliases.has_key(alias):
138 _aliases[alias] = modname
140 # Return the registry entry
141 return entry
143 # Register the search_function in the Python codec registry
144 codecs.register(search_function)