1 """ codecs -- Python Codec Registry, API and helpers.
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
10 import __builtin__
, sys
12 ### Registry and builtin stateless codec functions
16 except ImportError, why
:
17 raise SystemError('Failed to load the builtin codecs: %s' % why
)
19 __all__
= ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
20 "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
21 "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
22 "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
23 "strict_errors", "ignore_errors", "replace_errors",
24 "xmlcharrefreplace_errors",
25 "register_error", "lookup_error"]
30 # Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
31 # and its possible byte string values
32 # for UTF8/UTF16/UTF32 output and little/big endian machines
36 BOM_UTF8
= '\xef\xbb\xbf'
38 # UTF-16, little endian
39 BOM_LE
= BOM_UTF16_LE
= '\xff\xfe'
42 BOM_BE
= BOM_UTF16_BE
= '\xfe\xff'
44 # UTF-32, little endian
45 BOM_UTF32_LE
= '\xff\xfe\x00\x00'
48 BOM_UTF32_BE
= '\x00\x00\xfe\xff'
50 if sys
.byteorder
== 'little':
52 # UTF-16, native endianness
53 BOM
= BOM_UTF16
= BOM_UTF16_LE
55 # UTF-32, native endianness
56 BOM_UTF32
= BOM_UTF32_LE
60 # UTF-16, native endianness
61 BOM
= BOM_UTF16
= BOM_UTF16_BE
63 # UTF-32, native endianness
64 BOM_UTF32
= BOM_UTF32_BE
66 # Old broken names (don't use in new code)
67 BOM32_LE
= BOM_UTF16_LE
68 BOM32_BE
= BOM_UTF16_BE
69 BOM64_LE
= BOM_UTF32_LE
70 BOM64_BE
= BOM_UTF32_BE
73 ### Codec base classes (defining the API)
75 class CodecInfo(tuple):
77 def __new__(cls
, encode
, decode
, streamreader
=None, streamwriter
=None,
78 incrementalencoder
=None, incrementaldecoder
=None, name
=None):
79 self
= tuple.__new
__(cls
, (encode
, decode
, streamreader
, streamwriter
))
83 self
.incrementalencoder
= incrementalencoder
84 self
.incrementaldecoder
= incrementaldecoder
85 self
.streamwriter
= streamwriter
86 self
.streamreader
= streamreader
90 return "<%s.%s object for encoding %s at 0x%x>" % (self
.__class
__.__module
__, self
.__class
__.__name
__, self
.name
, id(self
))
94 """ Defines the interface for stateless encoders/decoders.
96 The .encode()/.decode() methods may use different error
97 handling schemes by providing the errors argument. These
98 string values are predefined:
100 'strict' - raise a ValueError error (or a subclass)
101 'ignore' - ignore the character and continue with the next
102 'replace' - replace with a suitable replacement character;
103 Python will use the official U+FFFD REPLACEMENT
104 CHARACTER for the builtin Unicode codecs on
105 decoding and '?' on encoding.
106 'xmlcharrefreplace' - Replace with the appropriate XML
107 character reference (only for encoding).
108 'backslashreplace' - Replace with backslashed escape sequences
111 The set of allowed values can be extended via register_error.
114 def encode(self
, input, errors
='strict'):
116 """ Encodes the object input and returns a tuple (output
117 object, length consumed).
119 errors defines the error handling to apply. It defaults to
122 The method may not store state in the Codec instance. Use
123 StreamCodec for codecs which have to keep state in order to
124 make encoding/decoding efficient.
126 The encoder must be able to handle zero length input and
127 return an empty object of the output object type in this
131 raise NotImplementedError
133 def decode(self
, input, errors
='strict'):
135 """ Decodes the object input and returns a tuple (output
136 object, length consumed).
138 input must be an object which provides the bf_getreadbuf
139 buffer slot. Python strings, buffer objects and memory
140 mapped files are examples of objects providing this slot.
142 errors defines the error handling to apply. It defaults to
145 The method may not store state in the Codec instance. Use
146 StreamCodec for codecs which have to keep state in order to
147 make encoding/decoding efficient.
149 The decoder must be able to handle zero length input and
150 return an empty object of the output object type in this
154 raise NotImplementedError
156 class IncrementalEncoder(object):
158 An IncrementalEncoder encodes an input in multiple steps. The input can be
159 passed piece by piece to the encode() method. The IncrementalEncoder remembers
160 the state of the Encoding process between calls to encode().
162 def __init__(self
, errors
='strict'):
164 Creates an IncrementalEncoder instance.
166 The IncrementalEncoder may use different error handling schemes by
167 providing the errors keyword argument. See the module docstring
168 for a list of possible values.
173 def encode(self
, input, final
=False):
175 Encodes input and returns the resulting object.
177 raise NotImplementedError
181 Resets the encoder to the initial state.
184 class BufferedIncrementalEncoder(IncrementalEncoder
):
186 This subclass of IncrementalEncoder can be used as the baseclass for an
187 incremental encoder if the encoder must keep some of the output in a
188 buffer between calls to encode().
190 def __init__(self
, errors
='strict'):
191 IncrementalEncoder
.__init
__(self
, errors
)
192 self
.buffer = "" # unencoded input that is kept between calls to encode()
194 def _buffer_encode(self
, input, errors
, final
):
195 # Overwrite this method in subclasses: It must encode input
196 # and return an (output, length consumed) tuple
197 raise NotImplementedError
199 def encode(self
, input, final
=False):
200 # encode input (taking the buffer into account)
201 data
= self
.buffer + input
202 (result
, consumed
) = self
._buffer
_encode
(data
, self
.errors
, final
)
203 # keep unencoded input until the next call
204 self
.buffer = data
[consumed
:]
208 IncrementalEncoder
.reset(self
)
211 class IncrementalDecoder(object):
213 An IncrementalDecoder decodes an input in multiple steps. The input can be
214 passed piece by piece to the decode() method. The IncrementalDecoder
215 remembers the state of the decoding process between calls to decode().
217 def __init__(self
, errors
='strict'):
219 Creates a IncrementalDecoder instance.
221 The IncrementalDecoder may use different error handling schemes by
222 providing the errors keyword argument. See the module docstring
223 for a list of possible values.
227 def decode(self
, input, final
=False):
229 Decodes input and returns the resulting object.
231 raise NotImplementedError
235 Resets the decoder to the initial state.
238 class BufferedIncrementalDecoder(IncrementalDecoder
):
240 This subclass of IncrementalDecoder can be used as the baseclass for an
241 incremental decoder if the decoder must be able to handle incomplete byte
244 def __init__(self
, errors
='strict'):
245 IncrementalDecoder
.__init
__(self
, errors
)
246 self
.buffer = "" # undecoded input that is kept between calls to decode()
248 def _buffer_decode(self
, input, errors
, final
):
249 # Overwrite this method in subclasses: It must decode input
250 # and return an (output, length consumed) tuple
251 raise NotImplementedError
253 def decode(self
, input, final
=False):
254 # decode input (taking the buffer into account)
255 data
= self
.buffer + input
256 (result
, consumed
) = self
._buffer
_decode
(data
, self
.errors
, final
)
257 # keep undecoded input until the next call
258 self
.buffer = data
[consumed
:]
262 IncrementalDecoder
.reset(self
)
266 # The StreamWriter and StreamReader class provide generic working
267 # interfaces which can be used to implement new encoding submodules
268 # very easily. See encodings/utf_8.py for an example on how this is
272 class StreamWriter(Codec
):
274 def __init__(self
, stream
, errors
='strict'):
276 """ Creates a StreamWriter instance.
278 stream must be a file-like object open for writing
281 The StreamWriter may use different error handling
282 schemes by providing the errors keyword argument. These
283 parameters are predefined:
285 'strict' - raise a ValueError (or a subclass)
286 'ignore' - ignore the character and continue with the next
287 'replace'- replace with a suitable replacement character
288 'xmlcharrefreplace' - Replace with the appropriate XML
290 'backslashreplace' - Replace with backslashed escape
291 sequences (only for encoding).
293 The set of allowed parameter values can be extended via
299 def write(self
, object):
301 """ Writes the object's contents encoded to self.stream.
303 data
, consumed
= self
.encode(object, self
.errors
)
304 self
.stream
.write(data
)
306 def writelines(self
, list):
308 """ Writes the concatenated list of strings to the stream
311 self
.write(''.join(list))
315 """ Flushes and resets the codec buffers used for keeping state.
317 Calling this method should ensure that the data on the
318 output is put into a clean state, that allows appending
319 of new fresh data without having to rescan the whole
320 stream to recover state.
325 def __getattr__(self
, name
,
328 """ Inherit all other methods from the underlying stream.
330 return getattr(self
.stream
, name
)
335 def __exit__(self
, type, value
, tb
):
340 class StreamReader(Codec
):
342 def __init__(self
, stream
, errors
='strict'):
344 """ Creates a StreamReader instance.
346 stream must be a file-like object open for reading
349 The StreamReader may use different error handling
350 schemes by providing the errors keyword argument. These
351 parameters are predefined:
353 'strict' - raise a ValueError (or a subclass)
354 'ignore' - ignore the character and continue with the next
355 'replace'- replace with a suitable replacement character;
357 The set of allowed parameter values can be extended via
363 # For str->str decoding this will stay a str
364 # For str->unicode decoding the first read will promote it to unicode
366 self
.linebuffer
= None
368 def decode(self
, input, errors
='strict'):
369 raise NotImplementedError
371 def read(self
, size
=-1, chars
=-1, firstline
=False):
373 """ Decodes data from the stream self.stream and returns the
376 chars indicates the number of characters to read from the
377 stream. read() will never return more than chars
378 characters, but it might return less, if there are not enough
379 characters available.
381 size indicates the approximate maximum number of bytes to
382 read from the stream for decoding purposes. The decoder
383 can modify this setting as appropriate. The default value
384 -1 indicates to read and decode as much as possible. size
385 is intended to prevent having to decode huge files in one
388 If firstline is true, and a UnicodeDecodeError happens
389 after the first line terminator in the input only the first line
390 will be returned, the rest of the input will be kept until the
393 The method should use a greedy read strategy meaning that
394 it should read as much data as is allowed within the
395 definition of the encoding and the given size, e.g. if
396 optional encoding endings or state markers are available
397 on the stream, these should be read too.
399 # If we have lines cached, first merge them back into characters
401 self
.charbuffer
= "".join(self
.linebuffer
)
402 self
.linebuffer
= None
404 # read until we get the required number of characters (if available)
406 # can the request can be satisfied from the character buffer?
411 elif len(self
.charbuffer
) >= size
:
414 if len(self
.charbuffer
) >= chars
:
418 newdata
= self
.stream
.read()
420 newdata
= self
.stream
.read(size
)
421 # decode bytes (those remaining from the last call included)
422 data
= self
.bytebuffer
+ newdata
424 newchars
, decodedbytes
= self
.decode(data
, self
.errors
)
425 except UnicodeDecodeError, exc
:
427 newchars
, decodedbytes
= self
.decode(data
[:exc
.start
], self
.errors
)
428 lines
= newchars
.splitlines(True)
433 # keep undecoded bytes until the next call
434 self
.bytebuffer
= data
[decodedbytes
:]
435 # put new characters in the character buffer
436 self
.charbuffer
+= newchars
437 # there was no data available
441 # Return everything we've got
442 result
= self
.charbuffer
445 # Return the first chars characters
446 result
= self
.charbuffer
[:chars
]
447 self
.charbuffer
= self
.charbuffer
[chars
:]
450 def readline(self
, size
=None, keepends
=True):
452 """ Read one line from the input stream and return the
455 size, if given, is passed as size argument to the
459 # If we have lines cached from an earlier read, return
460 # them unconditionally
462 line
= self
.linebuffer
[0]
463 del self
.linebuffer
[0]
464 if len(self
.linebuffer
) == 1:
465 # revert to charbuffer mode; we might need more data
467 self
.charbuffer
= self
.linebuffer
[0]
468 self
.linebuffer
= None
470 line
= line
.splitlines(False)[0]
473 readsize
= size
or 72
475 # If size is given, we call read() only once
477 data
= self
.read(readsize
, firstline
=True)
479 # If we're at a "\r" read one extra character (which might
480 # be a "\n") to get a proper line ending. If the stream is
481 # temporarily exhausted we return the wrong line ending.
482 if data
.endswith("\r"):
483 data
+= self
.read(size
=1, chars
=1)
486 lines
= line
.splitlines(True)
489 # More than one line result; the first line is a full line
494 # cache the remaining lines
495 lines
[-1] += self
.charbuffer
496 self
.linebuffer
= lines
497 self
.charbuffer
= None
499 # only one remaining line, put it back into charbuffer
500 self
.charbuffer
= lines
[0] + self
.charbuffer
502 line
= line
.splitlines(False)[0]
504 line0withend
= lines
[0]
505 line0withoutend
= lines
[0].splitlines(False)[0]
506 if line0withend
!= line0withoutend
: # We really have a line end
507 # Put the rest back together and keep it until the next call
508 self
.charbuffer
= "".join(lines
[1:]) + self
.charbuffer
512 line
= line0withoutend
514 # we didn't get anything or this was our only try
515 if not data
or size
is not None:
516 if line
and not keepends
:
517 line
= line
.splitlines(False)[0]
523 def readlines(self
, sizehint
=None, keepends
=True):
525 """ Read all lines available on the input stream
526 and return them as list of lines.
528 Line breaks are implemented using the codec's decoder
529 method and are included in the list entries.
531 sizehint, if given, is ignored since there is no efficient
532 way to finding the true end-of-line.
536 return data
.splitlines(keepends
)
540 """ Resets the codec buffers used for keeping state.
542 Note that no stream repositioning should take place.
543 This method is primarily intended to be able to recover
544 from decoding errors.
548 self
.charbuffer
= u
""
549 self
.linebuffer
= None
551 def seek(self
, offset
, whence
=0):
552 """ Set the input stream's current position.
554 Resets the codec buffers used for keeping state.
557 self
.stream
.seek(offset
, whence
)
561 """ Return the next decoded line from the input stream."""
562 line
= self
.readline()
570 def __getattr__(self
, name
,
573 """ Inherit all other methods from the underlying stream.
575 return getattr(self
.stream
, name
)
580 def __exit__(self
, type, value
, tb
):
585 class StreamReaderWriter
:
587 """ StreamReaderWriter instances allow wrapping streams which
588 work in both read and write modes.
590 The design is such that one can use the factory functions
591 returned by the codec.lookup() function to construct the
595 # Optional attributes set by the file wrappers below
598 def __init__(self
, stream
, Reader
, Writer
, errors
='strict'):
600 """ Creates a StreamReaderWriter instance.
602 stream must be a Stream-like object.
604 Reader, Writer must be factory functions or classes
605 providing the StreamReader, StreamWriter interface resp.
607 Error handling is done in the same way as defined for the
608 StreamWriter/Readers.
612 self
.reader
= Reader(stream
, errors
)
613 self
.writer
= Writer(stream
, errors
)
616 def read(self
, size
=-1):
618 return self
.reader
.read(size
)
620 def readline(self
, size
=None):
622 return self
.reader
.readline(size
)
624 def readlines(self
, sizehint
=None):
626 return self
.reader
.readlines(sizehint
)
630 """ Return the next decoded line from the input stream."""
631 return self
.reader
.next()
636 def write(self
, data
):
638 return self
.writer
.write(data
)
640 def writelines(self
, list):
642 return self
.writer
.writelines(list)
649 def __getattr__(self
, name
,
652 """ Inherit all other methods from the underlying stream.
654 return getattr(self
.stream
, name
)
656 # these are needed to make "with codecs.open(...)" work properly
661 def __exit__(self
, type, value
, tb
):
668 """ StreamRecoder instances provide a frontend - backend
669 view of encoding data.
671 They use the complete set of APIs returned by the
672 codecs.lookup() function to implement their task.
674 Data written to the stream is first decoded into an
675 intermediate format (which is dependent on the given codec
676 combination) and then written to the stream using an instance
677 of the provided Writer class.
679 In the other direction, data is read from the stream using a
680 Reader instance and then return encoded data to the caller.
683 # Optional attributes set by the file wrappers below
684 data_encoding
= 'unknown'
685 file_encoding
= 'unknown'
687 def __init__(self
, stream
, encode
, decode
, Reader
, Writer
,
690 """ Creates a StreamRecoder instance which implements a two-way
691 conversion: encode and decode work on the frontend (the
692 input to .read() and output of .write()) while
693 Reader and Writer work on the backend (reading and
694 writing to the stream).
696 You can use these objects to do transparent direct
697 recodings from e.g. latin-1 to utf-8 and back.
699 stream must be a file-like object.
701 encode, decode must adhere to the Codec interface, Reader,
702 Writer must be factory functions or classes providing the
703 StreamReader, StreamWriter interface resp.
705 encode and decode are needed for the frontend translation,
706 Reader and Writer for the backend translation. Unicode is
707 used as intermediate encoding.
709 Error handling is done in the same way as defined for the
710 StreamWriter/Readers.
716 self
.reader
= Reader(stream
, errors
)
717 self
.writer
= Writer(stream
, errors
)
720 def read(self
, size
=-1):
722 data
= self
.reader
.read(size
)
723 data
, bytesencoded
= self
.encode(data
, self
.errors
)
726 def readline(self
, size
=None):
729 data
= self
.reader
.readline()
731 data
= self
.reader
.readline(size
)
732 data
, bytesencoded
= self
.encode(data
, self
.errors
)
735 def readlines(self
, sizehint
=None):
737 data
= self
.reader
.read()
738 data
, bytesencoded
= self
.encode(data
, self
.errors
)
739 return data
.splitlines(1)
743 """ Return the next decoded line from the input stream."""
744 data
= self
.reader
.next()
745 data
, bytesencoded
= self
.encode(data
, self
.errors
)
751 def write(self
, data
):
753 data
, bytesdecoded
= self
.decode(data
, self
.errors
)
754 return self
.writer
.write(data
)
756 def writelines(self
, list):
759 data
, bytesdecoded
= self
.decode(data
, self
.errors
)
760 return self
.writer
.write(data
)
767 def __getattr__(self
, name
,
770 """ Inherit all other methods from the underlying stream.
772 return getattr(self
.stream
, name
)
777 def __exit__(self
, type, value
, tb
):
782 def open(filename
, mode
='rb', encoding
=None, errors
='strict', buffering
=1):
784 """ Open an encoded file using the given mode and return
785 a wrapped version providing transparent encoding/decoding.
787 Note: The wrapped version will only accept the object format
788 defined by the codecs, i.e. Unicode objects for most builtin
789 codecs. Output is also codec dependent and will usually be
792 Files are always opened in binary mode, even if no binary mode
793 was specified. This is done to avoid data loss due to encodings
794 using 8-bit values. The default file mode is 'rb' meaning to
795 open the file in binary read mode.
797 encoding specifies the encoding which is to be used for the
800 errors may be given to define the error handling. It defaults
801 to 'strict' which causes ValueErrors to be raised in case an
802 encoding error occurs.
804 buffering has the same meaning as for the builtin open() API.
805 It defaults to line buffered.
807 The returned wrapped file object provides an extra attribute
808 .encoding which allows querying the used encoding. This
809 attribute is only available if an encoding was specified as
813 if encoding
is not None and \
815 # Force opening of the file in binary mode
817 file = __builtin__
.open(filename
, mode
, buffering
)
820 info
= lookup(encoding
)
821 srw
= StreamReaderWriter(file, info
.streamreader
, info
.streamwriter
, errors
)
822 # Add attributes to simplify introspection
823 srw
.encoding
= encoding
826 def EncodedFile(file, data_encoding
, file_encoding
=None, errors
='strict'):
828 """ Return a wrapped version of file which provides transparent
829 encoding translation.
831 Strings written to the wrapped file are interpreted according
832 to the given data_encoding and then written to the original
833 file as string using file_encoding. The intermediate encoding
834 will usually be Unicode but depends on the specified codecs.
836 Strings are read from the file using file_encoding and then
837 passed back to the caller as string using data_encoding.
839 If file_encoding is not given, it defaults to data_encoding.
841 errors may be given to define the error handling. It defaults
842 to 'strict' which causes ValueErrors to be raised in case an
843 encoding error occurs.
845 The returned wrapped file object provides two extra attributes
846 .data_encoding and .file_encoding which reflect the given
847 parameters of the same name. The attributes can be used for
848 introspection by Python programs.
851 if file_encoding
is None:
852 file_encoding
= data_encoding
853 data_info
= lookup(data_encoding
)
854 file_info
= lookup(file_encoding
)
855 sr
= StreamRecoder(file, data_info
.encode
, data_info
.decode
,
856 file_info
.streamreader
, file_info
.streamwriter
, errors
)
857 # Add attributes to simplify introspection
858 sr
.data_encoding
= data_encoding
859 sr
.file_encoding
= file_encoding
862 ### Helpers for codec lookup
864 def getencoder(encoding
):
866 """ Lookup up the codec for the given encoding and return
867 its encoder function.
869 Raises a LookupError in case the encoding cannot be found.
872 return lookup(encoding
).encode
874 def getdecoder(encoding
):
876 """ Lookup up the codec for the given encoding and return
877 its decoder function.
879 Raises a LookupError in case the encoding cannot be found.
882 return lookup(encoding
).decode
884 def getincrementalencoder(encoding
):
886 """ Lookup up the codec for the given encoding and return
887 its IncrementalEncoder class or factory function.
889 Raises a LookupError in case the encoding cannot be found
890 or the codecs doesn't provide an incremental encoder.
893 encoder
= lookup(encoding
).incrementalencoder
895 raise LookupError(encoding
)
898 def getincrementaldecoder(encoding
):
900 """ Lookup up the codec for the given encoding and return
901 its IncrementalDecoder class or factory function.
903 Raises a LookupError in case the encoding cannot be found
904 or the codecs doesn't provide an incremental decoder.
907 decoder
= lookup(encoding
).incrementaldecoder
909 raise LookupError(encoding
)
912 def getreader(encoding
):
914 """ Lookup up the codec for the given encoding and return
915 its StreamReader class or factory function.
917 Raises a LookupError in case the encoding cannot be found.
920 return lookup(encoding
).streamreader
922 def getwriter(encoding
):
924 """ Lookup up the codec for the given encoding and return
925 its StreamWriter class or factory function.
927 Raises a LookupError in case the encoding cannot be found.
930 return lookup(encoding
).streamwriter
932 def iterencode(iterator
, encoding
, errors
='strict', **kwargs
):
936 Encodes the input strings from the iterator using a IncrementalEncoder.
938 errors and kwargs are passed through to the IncrementalEncoder
941 encoder
= getincrementalencoder(encoding
)(errors
, **kwargs
)
942 for input in iterator
:
943 output
= encoder
.encode(input)
946 output
= encoder
.encode("", True)
950 def iterdecode(iterator
, encoding
, errors
='strict', **kwargs
):
954 Decodes the input strings from the iterator using a IncrementalDecoder.
956 errors and kwargs are passed through to the IncrementalDecoder
959 decoder
= getincrementaldecoder(encoding
)(errors
, **kwargs
)
960 for input in iterator
:
961 output
= decoder
.decode(input)
964 output
= decoder
.decode("", True)
968 ### Helpers for charmap-based codecs
970 def make_identity_dict(rng
):
972 """ make_identity_dict(rng) -> dict
974 Return a dictionary where elements of the rng sequence are
975 mapped to themselves.
983 def make_encoding_map(decoding_map
):
985 """ Creates an encoding map from a decoding map.
987 If a target mapping in the decoding map occurs multiple
988 times, then that target is mapped to None (undefined mapping),
989 causing an exception when encountered by the charmap codec
992 One example where this happens is cp875.py which decodes
993 multiple character to \u001a.
997 for k
,v
in decoding_map
.items():
1007 strict_errors
= lookup_error("strict")
1008 ignore_errors
= lookup_error("ignore")
1009 replace_errors
= lookup_error("replace")
1010 xmlcharrefreplace_errors
= lookup_error("xmlcharrefreplace")
1011 backslashreplace_errors
= lookup_error("backslashreplace")
1013 # In --disable-unicode builds, these error handler are missing
1014 strict_errors
= None
1015 ignore_errors
= None
1016 replace_errors
= None
1017 xmlcharrefreplace_errors
= None
1018 backslashreplace_errors
= None
1020 # Tell modulefinder that using codecs probably needs the encodings
1028 if __name__
== '__main__':
1030 # Make stdout translate Latin-1 output into UTF-8 output
1031 sys
.stdout
= EncodedFile(sys
.stdout
, 'latin-1', 'utf-8')
1033 # Have stdin translate Latin-1 input into UTF-8 input
1034 sys
.stdin
= EncodedFile(sys
.stdin
, 'utf-8', 'latin-1')