1 """ Python 'utf-8-sig' Codec
2 This work similar to UTF-8 with the following changes:
4 * On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
7 * On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
14 def encode(input, errors
='strict'):
15 return (codecs
.BOM_UTF8
+ codecs
.utf_8_encode(input, errors
)[0], len(input))
17 def decode(input, errors
='strict'):
19 if input[:3] == codecs
.BOM_UTF8
:
22 (output
, consumed
) = codecs
.utf_8_decode(input, errors
, True)
23 return (output
, consumed
+prefix
)
25 class IncrementalEncoder(codecs
.IncrementalEncoder
):
26 def __init__(self
, errors
='strict'):
27 codecs
.IncrementalEncoder
.__init
__(self
, errors
)
30 def encode(self
, input, final
=False):
33 return codecs
.BOM_UTF8
+ codecs
.utf_8_encode(input, self
.errors
)[0]
35 return codecs
.utf_8_encode(input, self
.errors
)[0]
38 codecs
.IncrementalEncoder
.reset(self
)
41 class IncrementalDecoder(codecs
.BufferedIncrementalDecoder
):
42 def __init__(self
, errors
='strict'):
43 codecs
.BufferedIncrementalDecoder
.__init
__(self
, errors
)
46 def _buffer_decode(self
, input, errors
, final
):
49 if codecs
.BOM_UTF8
.startswith(input):
50 # not enough data to decide if this really is a BOM
51 # => try again on the next call
57 if input[:3] == codecs
.BOM_UTF8
:
58 (output
, consumed
) = codecs
.utf_8_decode(input[3:], errors
, final
)
59 return (output
, consumed
+3)
60 return codecs
.utf_8_decode(input, errors
, final
)
63 codecs
.BufferedIncrementalDecoder
.reset(self
)
66 class StreamWriter(codecs
.StreamWriter
):
68 codecs
.StreamWriter
.reset(self
)
71 except AttributeError:
74 def encode(self
, input, errors
='strict'):
75 self
.encode
= codecs
.utf_8_encode
76 return encode(input, errors
)
78 class StreamReader(codecs
.StreamReader
):
80 codecs
.StreamReader
.reset(self
)
83 except AttributeError:
86 def decode(self
, input, errors
='strict'):
88 if codecs
.BOM_UTF8
.startswith(input):
89 # not enough data to decide if this is a BOM
90 # => try again on the next call
92 elif input[:3] == codecs
.BOM_UTF8
:
93 self
.decode
= codecs
.utf_8_decode
94 (output
, consumed
) = codecs
.utf_8_decode(input[3:],errors
)
95 return (output
, consumed
+3)
96 # (else) no BOM present
97 self
.decode
= codecs
.utf_8_decode
98 return codecs
.utf_8_decode(input, errors
)
100 ### encodings module API
103 return codecs
.CodecInfo(
107 incrementalencoder
=IncrementalEncoder
,
108 incrementaldecoder
=IncrementalDecoder
,
109 streamreader
=StreamReader
,
110 streamwriter
=StreamWriter
,