1 """ codecs -- Python Codec Registry, API and helpers.
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
10 import __builtin__
, sys
12 ### Registry and builtin stateless codec functions
16 except ImportError, why
:
17 raise SystemError('Failed to load the builtin codecs: %s' % why
)
19 __all__
= ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
20 "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
21 "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
22 "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
23 "strict_errors", "ignore_errors", "replace_errors",
24 "xmlcharrefreplace_errors",
25 "register_error", "lookup_error"]
30 # Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
31 # and its possible byte string values
32 # for UTF8/UTF16/UTF32 output and little/big endian machines
36 BOM_UTF8
= '\xef\xbb\xbf'
38 # UTF-16, little endian
39 BOM_LE
= BOM_UTF16_LE
= '\xff\xfe'
42 BOM_BE
= BOM_UTF16_BE
= '\xfe\xff'
44 # UTF-32, little endian
45 BOM_UTF32_LE
= '\xff\xfe\x00\x00'
48 BOM_UTF32_BE
= '\x00\x00\xfe\xff'
50 if sys
.byteorder
== 'little':
52 # UTF-16, native endianness
53 BOM
= BOM_UTF16
= BOM_UTF16_LE
55 # UTF-32, native endianness
56 BOM_UTF32
= BOM_UTF32_LE
60 # UTF-16, native endianness
61 BOM
= BOM_UTF16
= BOM_UTF16_BE
63 # UTF-32, native endianness
64 BOM_UTF32
= BOM_UTF32_BE
66 # Old broken names (don't use in new code)
67 BOM32_LE
= BOM_UTF16_LE
68 BOM32_BE
= BOM_UTF16_BE
69 BOM64_LE
= BOM_UTF32_LE
70 BOM64_BE
= BOM_UTF32_BE
73 ### Codec base classes (defining the API)
75 class CodecInfo(tuple):
77 def __new__(cls
, encode
, decode
, streamreader
=None, streamwriter
=None,
78 incrementalencoder
=None, incrementaldecoder
=None, name
=None):
79 self
= tuple.__new
__(cls
, (encode
, decode
, streamreader
, streamwriter
))
83 self
.incrementalencoder
= incrementalencoder
84 self
.incrementaldecoder
= incrementaldecoder
85 self
.streamwriter
= streamwriter
86 self
.streamreader
= streamreader
90 return "<%s.%s object for encoding %s at 0x%x>" % (self
.__class
__.__module
__, self
.__class
__.__name
__, self
.name
, id(self
))
94 """ Defines the interface for stateless encoders/decoders.
96 The .encode()/.decode() methods may use different error
97 handling schemes by providing the errors argument. These
98 string values are predefined:
100 'strict' - raise a ValueError error (or a subclass)
101 'ignore' - ignore the character and continue with the next
102 'replace' - replace with a suitable replacement character;
103 Python will use the official U+FFFD REPLACEMENT
104 CHARACTER for the builtin Unicode codecs on
105 decoding and '?' on encoding.
106 'xmlcharrefreplace' - Replace with the appropriate XML
107 character reference (only for encoding).
108 'backslashreplace' - Replace with backslashed escape sequences
111 The set of allowed values can be extended via register_error.
114 def encode(self
, input, errors
='strict'):
116 """ Encodes the object input and returns a tuple (output
117 object, length consumed).
119 errors defines the error handling to apply. It defaults to
122 The method may not store state in the Codec instance. Use
123 StreamCodec for codecs which have to keep state in order to
124 make encoding/decoding efficient.
126 The encoder must be able to handle zero length input and
127 return an empty object of the output object type in this
131 raise NotImplementedError
133 def decode(self
, input, errors
='strict'):
135 """ Decodes the object input and returns a tuple (output
136 object, length consumed).
138 input must be an object which provides the bf_getreadbuf
139 buffer slot. Python strings, buffer objects and memory
140 mapped files are examples of objects providing this slot.
142 errors defines the error handling to apply. It defaults to
145 The method may not store state in the Codec instance. Use
146 StreamCodec for codecs which have to keep state in order to
147 make encoding/decoding efficient.
149 The decoder must be able to handle zero length input and
150 return an empty object of the output object type in this
154 raise NotImplementedError
156 class IncrementalEncoder(object):
158 An IncrementalEncoder encodes an input in multiple steps. The input can be
159 passed piece by piece to the encode() method. The IncrementalEncoder remembers
160 the state of the Encoding process between calls to encode().
162 def __init__(self
, errors
='strict'):
164 Creates an IncrementalEncoder instance.
166 The IncrementalEncoder may use different error handling schemes by
167 providing the errors keyword argument. See the module docstring
168 for a list of possible values.
173 def encode(self
, input, final
=False):
175 Encodes input and returns the resulting object.
177 raise NotImplementedError
181 Resets the encoder to the initial state.
186 Return the current state of the encoder.
190 def setstate(self
, state
):
192 Set the current state of the encoder. state must have been
193 returned by getstate().
196 class BufferedIncrementalEncoder(IncrementalEncoder
):
198 This subclass of IncrementalEncoder can be used as the baseclass for an
199 incremental encoder if the encoder must keep some of the output in a
200 buffer between calls to encode().
202 def __init__(self
, errors
='strict'):
203 IncrementalEncoder
.__init
__(self
, errors
)
204 self
.buffer = "" # unencoded input that is kept between calls to encode()
206 def _buffer_encode(self
, input, errors
, final
):
207 # Overwrite this method in subclasses: It must encode input
208 # and return an (output, length consumed) tuple
209 raise NotImplementedError
211 def encode(self
, input, final
=False):
212 # encode input (taking the buffer into account)
213 data
= self
.buffer + input
214 (result
, consumed
) = self
._buffer
_encode
(data
, self
.errors
, final
)
215 # keep unencoded input until the next call
216 self
.buffer = data
[consumed
:]
220 IncrementalEncoder
.reset(self
)
224 return self
.buffer or 0
226 def setstate(self
, state
):
227 self
.buffer = state
or ""
229 class IncrementalDecoder(object):
231 An IncrementalDecoder decodes an input in multiple steps. The input can be
232 passed piece by piece to the decode() method. The IncrementalDecoder
233 remembers the state of the decoding process between calls to decode().
235 def __init__(self
, errors
='strict'):
237 Creates a IncrementalDecoder instance.
239 The IncrementalDecoder may use different error handling schemes by
240 providing the errors keyword argument. See the module docstring
241 for a list of possible values.
245 def decode(self
, input, final
=False):
247 Decodes input and returns the resulting object.
249 raise NotImplementedError
253 Resets the decoder to the initial state.
258 Return the current state of the decoder.
260 This must be a (buffered_input, additional_state_info) tuple.
261 buffered_input must be a bytes object containing bytes that
262 were passed to decode() that have not yet been converted.
263 additional_state_info must be a non-negative integer
264 representing the state of the decoder WITHOUT yet having
265 processed the contents of buffered_input. In the initial state
266 and after reset(), getstate() must return (b"", 0).
270 def setstate(self
, state
):
272 Set the current state of the decoder.
274 state must have been returned by getstate(). The effect of
275 setstate((b"", 0)) must be equivalent to reset().
278 class BufferedIncrementalDecoder(IncrementalDecoder
):
280 This subclass of IncrementalDecoder can be used as the baseclass for an
281 incremental decoder if the decoder must be able to handle incomplete byte
284 def __init__(self
, errors
='strict'):
285 IncrementalDecoder
.__init
__(self
, errors
)
286 self
.buffer = "" # undecoded input that is kept between calls to decode()
288 def _buffer_decode(self
, input, errors
, final
):
289 # Overwrite this method in subclasses: It must decode input
290 # and return an (output, length consumed) tuple
291 raise NotImplementedError
293 def decode(self
, input, final
=False):
294 # decode input (taking the buffer into account)
295 data
= self
.buffer + input
296 (result
, consumed
) = self
._buffer
_decode
(data
, self
.errors
, final
)
297 # keep undecoded input until the next call
298 self
.buffer = data
[consumed
:]
302 IncrementalDecoder
.reset(self
)
306 # additional state info is always 0
307 return (self
.buffer, 0)
309 def setstate(self
, state
):
310 # ignore additional state info
311 self
.buffer = state
[0]
314 # The StreamWriter and StreamReader class provide generic working
315 # interfaces which can be used to implement new encoding submodules
316 # very easily. See encodings/utf_8.py for an example on how this is
320 class StreamWriter(Codec
):
322 def __init__(self
, stream
, errors
='strict'):
324 """ Creates a StreamWriter instance.
326 stream must be a file-like object open for writing
329 The StreamWriter may use different error handling
330 schemes by providing the errors keyword argument. These
331 parameters are predefined:
333 'strict' - raise a ValueError (or a subclass)
334 'ignore' - ignore the character and continue with the next
335 'replace'- replace with a suitable replacement character
336 'xmlcharrefreplace' - Replace with the appropriate XML
338 'backslashreplace' - Replace with backslashed escape
339 sequences (only for encoding).
341 The set of allowed parameter values can be extended via
347 def write(self
, object):
349 """ Writes the object's contents encoded to self.stream.
351 data
, consumed
= self
.encode(object, self
.errors
)
352 self
.stream
.write(data
)
354 def writelines(self
, list):
356 """ Writes the concatenated list of strings to the stream
359 self
.write(''.join(list))
363 """ Flushes and resets the codec buffers used for keeping state.
365 Calling this method should ensure that the data on the
366 output is put into a clean state, that allows appending
367 of new fresh data without having to rescan the whole
368 stream to recover state.
373 def __getattr__(self
, name
,
376 """ Inherit all other methods from the underlying stream.
378 return getattr(self
.stream
, name
)
383 def __exit__(self
, type, value
, tb
):
388 class StreamReader(Codec
):
390 def __init__(self
, stream
, errors
='strict'):
392 """ Creates a StreamReader instance.
394 stream must be a file-like object open for reading
397 The StreamReader may use different error handling
398 schemes by providing the errors keyword argument. These
399 parameters are predefined:
401 'strict' - raise a ValueError (or a subclass)
402 'ignore' - ignore the character and continue with the next
403 'replace'- replace with a suitable replacement character;
405 The set of allowed parameter values can be extended via
411 # For str->str decoding this will stay a str
412 # For str->unicode decoding the first read will promote it to unicode
414 self
.linebuffer
= None
416 def decode(self
, input, errors
='strict'):
417 raise NotImplementedError
419 def read(self
, size
=-1, chars
=-1, firstline
=False):
421 """ Decodes data from the stream self.stream and returns the
424 chars indicates the number of characters to read from the
425 stream. read() will never return more than chars
426 characters, but it might return less, if there are not enough
427 characters available.
429 size indicates the approximate maximum number of bytes to
430 read from the stream for decoding purposes. The decoder
431 can modify this setting as appropriate. The default value
432 -1 indicates to read and decode as much as possible. size
433 is intended to prevent having to decode huge files in one
436 If firstline is true, and a UnicodeDecodeError happens
437 after the first line terminator in the input only the first line
438 will be returned, the rest of the input will be kept until the
441 The method should use a greedy read strategy meaning that
442 it should read as much data as is allowed within the
443 definition of the encoding and the given size, e.g. if
444 optional encoding endings or state markers are available
445 on the stream, these should be read too.
447 # If we have lines cached, first merge them back into characters
449 self
.charbuffer
= "".join(self
.linebuffer
)
450 self
.linebuffer
= None
452 # read until we get the required number of characters (if available)
454 # can the request can be satisfied from the character buffer?
459 elif len(self
.charbuffer
) >= size
:
462 if len(self
.charbuffer
) >= chars
:
466 newdata
= self
.stream
.read()
468 newdata
= self
.stream
.read(size
)
469 # decode bytes (those remaining from the last call included)
470 data
= self
.bytebuffer
+ newdata
472 newchars
, decodedbytes
= self
.decode(data
, self
.errors
)
473 except UnicodeDecodeError, exc
:
475 newchars
, decodedbytes
= self
.decode(data
[:exc
.start
], self
.errors
)
476 lines
= newchars
.splitlines(True)
481 # keep undecoded bytes until the next call
482 self
.bytebuffer
= data
[decodedbytes
:]
483 # put new characters in the character buffer
484 self
.charbuffer
+= newchars
485 # there was no data available
489 # Return everything we've got
490 result
= self
.charbuffer
493 # Return the first chars characters
494 result
= self
.charbuffer
[:chars
]
495 self
.charbuffer
= self
.charbuffer
[chars
:]
498 def readline(self
, size
=None, keepends
=True):
500 """ Read one line from the input stream and return the
503 size, if given, is passed as size argument to the
507 # If we have lines cached from an earlier read, return
508 # them unconditionally
510 line
= self
.linebuffer
[0]
511 del self
.linebuffer
[0]
512 if len(self
.linebuffer
) == 1:
513 # revert to charbuffer mode; we might need more data
515 self
.charbuffer
= self
.linebuffer
[0]
516 self
.linebuffer
= None
518 line
= line
.splitlines(False)[0]
521 readsize
= size
or 72
523 # If size is given, we call read() only once
525 data
= self
.read(readsize
, firstline
=True)
527 # If we're at a "\r" read one extra character (which might
528 # be a "\n") to get a proper line ending. If the stream is
529 # temporarily exhausted we return the wrong line ending.
530 if data
.endswith("\r"):
531 data
+= self
.read(size
=1, chars
=1)
534 lines
= line
.splitlines(True)
537 # More than one line result; the first line is a full line
542 # cache the remaining lines
543 lines
[-1] += self
.charbuffer
544 self
.linebuffer
= lines
545 self
.charbuffer
= None
547 # only one remaining line, put it back into charbuffer
548 self
.charbuffer
= lines
[0] + self
.charbuffer
550 line
= line
.splitlines(False)[0]
552 line0withend
= lines
[0]
553 line0withoutend
= lines
[0].splitlines(False)[0]
554 if line0withend
!= line0withoutend
: # We really have a line end
555 # Put the rest back together and keep it until the next call
556 self
.charbuffer
= "".join(lines
[1:]) + self
.charbuffer
560 line
= line0withoutend
562 # we didn't get anything or this was our only try
563 if not data
or size
is not None:
564 if line
and not keepends
:
565 line
= line
.splitlines(False)[0]
571 def readlines(self
, sizehint
=None, keepends
=True):
573 """ Read all lines available on the input stream
574 and return them as list of lines.
576 Line breaks are implemented using the codec's decoder
577 method and are included in the list entries.
579 sizehint, if given, is ignored since there is no efficient
580 way to finding the true end-of-line.
584 return data
.splitlines(keepends
)
588 """ Resets the codec buffers used for keeping state.
590 Note that no stream repositioning should take place.
591 This method is primarily intended to be able to recover
592 from decoding errors.
596 self
.charbuffer
= u
""
597 self
.linebuffer
= None
599 def seek(self
, offset
, whence
=0):
600 """ Set the input stream's current position.
602 Resets the codec buffers used for keeping state.
605 self
.stream
.seek(offset
, whence
)
609 """ Return the next decoded line from the input stream."""
610 line
= self
.readline()
618 def __getattr__(self
, name
,
621 """ Inherit all other methods from the underlying stream.
623 return getattr(self
.stream
, name
)
628 def __exit__(self
, type, value
, tb
):
633 class StreamReaderWriter
:
635 """ StreamReaderWriter instances allow wrapping streams which
636 work in both read and write modes.
638 The design is such that one can use the factory functions
639 returned by the codec.lookup() function to construct the
643 # Optional attributes set by the file wrappers below
646 def __init__(self
, stream
, Reader
, Writer
, errors
='strict'):
648 """ Creates a StreamReaderWriter instance.
650 stream must be a Stream-like object.
652 Reader, Writer must be factory functions or classes
653 providing the StreamReader, StreamWriter interface resp.
655 Error handling is done in the same way as defined for the
656 StreamWriter/Readers.
660 self
.reader
= Reader(stream
, errors
)
661 self
.writer
= Writer(stream
, errors
)
664 def read(self
, size
=-1):
666 return self
.reader
.read(size
)
668 def readline(self
, size
=None):
670 return self
.reader
.readline(size
)
672 def readlines(self
, sizehint
=None):
674 return self
.reader
.readlines(sizehint
)
678 """ Return the next decoded line from the input stream."""
679 return self
.reader
.next()
684 def write(self
, data
):
686 return self
.writer
.write(data
)
688 def writelines(self
, list):
690 return self
.writer
.writelines(list)
697 def __getattr__(self
, name
,
700 """ Inherit all other methods from the underlying stream.
702 return getattr(self
.stream
, name
)
704 # these are needed to make "with codecs.open(...)" work properly
709 def __exit__(self
, type, value
, tb
):
716 """ StreamRecoder instances provide a frontend - backend
717 view of encoding data.
719 They use the complete set of APIs returned by the
720 codecs.lookup() function to implement their task.
722 Data written to the stream is first decoded into an
723 intermediate format (which is dependent on the given codec
724 combination) and then written to the stream using an instance
725 of the provided Writer class.
727 In the other direction, data is read from the stream using a
728 Reader instance and then return encoded data to the caller.
731 # Optional attributes set by the file wrappers below
732 data_encoding
= 'unknown'
733 file_encoding
= 'unknown'
735 def __init__(self
, stream
, encode
, decode
, Reader
, Writer
,
738 """ Creates a StreamRecoder instance which implements a two-way
739 conversion: encode and decode work on the frontend (the
740 input to .read() and output of .write()) while
741 Reader and Writer work on the backend (reading and
742 writing to the stream).
744 You can use these objects to do transparent direct
745 recodings from e.g. latin-1 to utf-8 and back.
747 stream must be a file-like object.
749 encode, decode must adhere to the Codec interface, Reader,
750 Writer must be factory functions or classes providing the
751 StreamReader, StreamWriter interface resp.
753 encode and decode are needed for the frontend translation,
754 Reader and Writer for the backend translation. Unicode is
755 used as intermediate encoding.
757 Error handling is done in the same way as defined for the
758 StreamWriter/Readers.
764 self
.reader
= Reader(stream
, errors
)
765 self
.writer
= Writer(stream
, errors
)
768 def read(self
, size
=-1):
770 data
= self
.reader
.read(size
)
771 data
, bytesencoded
= self
.encode(data
, self
.errors
)
774 def readline(self
, size
=None):
777 data
= self
.reader
.readline()
779 data
= self
.reader
.readline(size
)
780 data
, bytesencoded
= self
.encode(data
, self
.errors
)
783 def readlines(self
, sizehint
=None):
785 data
= self
.reader
.read()
786 data
, bytesencoded
= self
.encode(data
, self
.errors
)
787 return data
.splitlines(1)
791 """ Return the next decoded line from the input stream."""
792 data
= self
.reader
.next()
793 data
, bytesencoded
= self
.encode(data
, self
.errors
)
799 def write(self
, data
):
801 data
, bytesdecoded
= self
.decode(data
, self
.errors
)
802 return self
.writer
.write(data
)
804 def writelines(self
, list):
807 data
, bytesdecoded
= self
.decode(data
, self
.errors
)
808 return self
.writer
.write(data
)
815 def __getattr__(self
, name
,
818 """ Inherit all other methods from the underlying stream.
820 return getattr(self
.stream
, name
)
825 def __exit__(self
, type, value
, tb
):
830 def open(filename
, mode
='rb', encoding
=None, errors
='strict', buffering
=1):
832 """ Open an encoded file using the given mode and return
833 a wrapped version providing transparent encoding/decoding.
835 Note: The wrapped version will only accept the object format
836 defined by the codecs, i.e. Unicode objects for most builtin
837 codecs. Output is also codec dependent and will usually be
840 Files are always opened in binary mode, even if no binary mode
841 was specified. This is done to avoid data loss due to encodings
842 using 8-bit values. The default file mode is 'rb' meaning to
843 open the file in binary read mode.
845 encoding specifies the encoding which is to be used for the
848 errors may be given to define the error handling. It defaults
849 to 'strict' which causes ValueErrors to be raised in case an
850 encoding error occurs.
852 buffering has the same meaning as for the builtin open() API.
853 It defaults to line buffered.
855 The returned wrapped file object provides an extra attribute
856 .encoding which allows querying the used encoding. This
857 attribute is only available if an encoding was specified as
861 if encoding
is not None and \
863 # Force opening of the file in binary mode
865 file = __builtin__
.open(filename
, mode
, buffering
)
868 info
= lookup(encoding
)
869 srw
= StreamReaderWriter(file, info
.streamreader
, info
.streamwriter
, errors
)
870 # Add attributes to simplify introspection
871 srw
.encoding
= encoding
874 def EncodedFile(file, data_encoding
, file_encoding
=None, errors
='strict'):
876 """ Return a wrapped version of file which provides transparent
877 encoding translation.
879 Strings written to the wrapped file are interpreted according
880 to the given data_encoding and then written to the original
881 file as string using file_encoding. The intermediate encoding
882 will usually be Unicode but depends on the specified codecs.
884 Strings are read from the file using file_encoding and then
885 passed back to the caller as string using data_encoding.
887 If file_encoding is not given, it defaults to data_encoding.
889 errors may be given to define the error handling. It defaults
890 to 'strict' which causes ValueErrors to be raised in case an
891 encoding error occurs.
893 The returned wrapped file object provides two extra attributes
894 .data_encoding and .file_encoding which reflect the given
895 parameters of the same name. The attributes can be used for
896 introspection by Python programs.
899 if file_encoding
is None:
900 file_encoding
= data_encoding
901 data_info
= lookup(data_encoding
)
902 file_info
= lookup(file_encoding
)
903 sr
= StreamRecoder(file, data_info
.encode
, data_info
.decode
,
904 file_info
.streamreader
, file_info
.streamwriter
, errors
)
905 # Add attributes to simplify introspection
906 sr
.data_encoding
= data_encoding
907 sr
.file_encoding
= file_encoding
910 ### Helpers for codec lookup
912 def getencoder(encoding
):
914 """ Lookup up the codec for the given encoding and return
915 its encoder function.
917 Raises a LookupError in case the encoding cannot be found.
920 return lookup(encoding
).encode
922 def getdecoder(encoding
):
924 """ Lookup up the codec for the given encoding and return
925 its decoder function.
927 Raises a LookupError in case the encoding cannot be found.
930 return lookup(encoding
).decode
932 def getincrementalencoder(encoding
):
934 """ Lookup up the codec for the given encoding and return
935 its IncrementalEncoder class or factory function.
937 Raises a LookupError in case the encoding cannot be found
938 or the codecs doesn't provide an incremental encoder.
941 encoder
= lookup(encoding
).incrementalencoder
943 raise LookupError(encoding
)
946 def getincrementaldecoder(encoding
):
948 """ Lookup up the codec for the given encoding and return
949 its IncrementalDecoder class or factory function.
951 Raises a LookupError in case the encoding cannot be found
952 or the codecs doesn't provide an incremental decoder.
955 decoder
= lookup(encoding
).incrementaldecoder
957 raise LookupError(encoding
)
960 def getreader(encoding
):
962 """ Lookup up the codec for the given encoding and return
963 its StreamReader class or factory function.
965 Raises a LookupError in case the encoding cannot be found.
968 return lookup(encoding
).streamreader
970 def getwriter(encoding
):
972 """ Lookup up the codec for the given encoding and return
973 its StreamWriter class or factory function.
975 Raises a LookupError in case the encoding cannot be found.
978 return lookup(encoding
).streamwriter
980 def iterencode(iterator
, encoding
, errors
='strict', **kwargs
):
984 Encodes the input strings from the iterator using a IncrementalEncoder.
986 errors and kwargs are passed through to the IncrementalEncoder
989 encoder
= getincrementalencoder(encoding
)(errors
, **kwargs
)
990 for input in iterator
:
991 output
= encoder
.encode(input)
994 output
= encoder
.encode("", True)
998 def iterdecode(iterator
, encoding
, errors
='strict', **kwargs
):
1002 Decodes the input strings from the iterator using a IncrementalDecoder.
1004 errors and kwargs are passed through to the IncrementalDecoder
1007 decoder
= getincrementaldecoder(encoding
)(errors
, **kwargs
)
1008 for input in iterator
:
1009 output
= decoder
.decode(input)
1012 output
= decoder
.decode("", True)
1016 ### Helpers for charmap-based codecs
1018 def make_identity_dict(rng
):
1020 """ make_identity_dict(rng) -> dict
1022 Return a dictionary where elements of the rng sequence are
1023 mapped to themselves.
1031 def make_encoding_map(decoding_map
):
1033 """ Creates an encoding map from a decoding map.
1035 If a target mapping in the decoding map occurs multiple
1036 times, then that target is mapped to None (undefined mapping),
1037 causing an exception when encountered by the charmap codec
1040 One example where this happens is cp875.py which decodes
1041 multiple character to \u001a.
1045 for k
,v
in decoding_map
.items():
1055 strict_errors
= lookup_error("strict")
1056 ignore_errors
= lookup_error("ignore")
1057 replace_errors
= lookup_error("replace")
1058 xmlcharrefreplace_errors
= lookup_error("xmlcharrefreplace")
1059 backslashreplace_errors
= lookup_error("backslashreplace")
1061 # In --disable-unicode builds, these error handler are missing
1062 strict_errors
= None
1063 ignore_errors
= None
1064 replace_errors
= None
1065 xmlcharrefreplace_errors
= None
1066 backslashreplace_errors
= None
1068 # Tell modulefinder that using codecs probably needs the encodings
1076 if __name__
== '__main__':
1078 # Make stdout translate Latin-1 output into UTF-8 output
1079 sys
.stdout
= EncodedFile(sys
.stdout
, 'latin-1', 'utf-8')
1081 # Have stdin translate Latin-1 input into UTF-8 input
1082 sys
.stdin
= EncodedFile(sys
.stdin
, 'utf-8', 'latin-1')