3 # test_multibytecodec_support.py
4 # Common Unittest Routines for CJK codecs
9 from httplib
import HTTPException
10 from test
import test_support
11 from StringIO
import StringIO
14 encoding
= '' # codec name
15 codec
= None # codec tuple (with 4 elements)
16 tstring
= '' # string to test StreamReader
18 codectests
= None # must set. codec test tuple
19 roundtriptest
= 1 # set if roundtrip is possible with unicode
20 has_iso10646
= 0 # set if this encoding contains whole iso10646 map
21 xmlcharnametest
= None # string to test xmlcharrefreplace
22 unmappedunicode
= u
'\udeee' # a unicode codepoint that is not mapped.
25 if self
.codec
is None:
26 self
.codec
= codecs
.lookup(self
.encoding
)
27 self
.encode
= self
.codec
.encode
28 self
.decode
= self
.codec
.decode
29 self
.reader
= self
.codec
.streamreader
30 self
.writer
= self
.codec
.streamwriter
31 self
.incrementalencoder
= self
.codec
.incrementalencoder
32 self
.incrementaldecoder
= self
.codec
.incrementaldecoder
34 def test_chunkcoding(self
):
35 for native
, utf8
in zip(*[StringIO(f
).readlines()
36 for f
in self
.tstring
]):
37 u
= self
.decode(native
)[0]
38 self
.assertEqual(u
, utf8
.decode('utf-8'))
39 if self
.roundtriptest
:
40 self
.assertEqual(native
, self
.encode(u
)[0])
42 def test_errorhandle(self
):
43 for source
, scheme
, expected
in self
.codectests
:
44 if type(source
) == type(''):
49 result
= func(source
, scheme
)[0]
50 self
.assertEqual(result
, expected
)
52 self
.assertRaises(UnicodeError, func
, source
, scheme
)
54 def test_xmlcharrefreplace(self
):
58 s
= u
"\u0b13\u0b23\u0b60 nd eggs"
60 self
.encode(s
, "xmlcharrefreplace")[0],
61 "ଓଣୠ nd eggs"
64 def test_customreplace_encode(self
):
68 from htmlentitydefs
import codepoint2name
70 def xmlcharnamereplace(exc
):
71 if not isinstance(exc
, UnicodeEncodeError):
72 raise TypeError("don't know how to handle %r" % exc
)
74 for c
in exc
.object[exc
.start
:exc
.end
]:
75 if ord(c
) in codepoint2name
:
76 l
.append(u
"&%s;" % codepoint2name
[ord(c
)])
78 l
.append(u
"&#%d;" % ord(c
))
79 return (u
"".join(l
), exc
.end
)
81 codecs
.register_error("test.xmlcharnamereplace", xmlcharnamereplace
)
83 if self
.xmlcharnametest
:
84 sin
, sout
= self
.xmlcharnametest
86 sin
= u
"\xab\u211c\xbb = \u2329\u1234\u232a"
87 sout
= "«ℜ» = ⟨ሴ⟩"
88 self
.assertEqual(self
.encode(sin
,
89 "test.xmlcharnamereplace")[0], sout
)
91 def test_callback_wrong_objects(self
):
94 codecs
.register_error("test.cjktest", myreplace
)
96 for ret
in ([1, 2, 3], [], None, object(), 'string', ''):
97 self
.assertRaises(TypeError, self
.encode
, self
.unmappedunicode
,
100 def test_callback_long_index(self
):
102 return (u
'x', long(exc
.end
))
103 codecs
.register_error("test.cjktest", myreplace
)
104 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
105 'test.cjktest'), ('abcdxefgh', 9))
108 return (u
'x', sys
.maxint
+ 1)
109 codecs
.register_error("test.cjktest", myreplace
)
110 self
.assertRaises(IndexError, self
.encode
, self
.unmappedunicode
,
113 def test_callback_None_index(self
):
116 codecs
.register_error("test.cjktest", myreplace
)
117 self
.assertRaises(TypeError, self
.encode
, self
.unmappedunicode
,
120 def test_callback_backward_index(self
):
122 if myreplace
.limit
> 0:
124 return (u
'REPLACED', 0)
126 return (u
'TERMINAL', exc
.end
)
128 codecs
.register_error("test.cjktest", myreplace
)
129 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
131 ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
133 def test_callback_forward_index(self
):
135 return (u
'REPLACED', exc
.end
+ 2)
136 codecs
.register_error("test.cjktest", myreplace
)
137 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
138 'test.cjktest'), ('abcdREPLACEDgh', 9))
140 def test_callback_index_outofbound(self
):
142 return (u
'TERM', 100)
143 codecs
.register_error("test.cjktest", myreplace
)
144 self
.assertRaises(IndexError, self
.encode
, self
.unmappedunicode
,
147 def test_incrementalencoder(self
):
148 UTF8Reader
= codecs
.getreader('utf-8')
149 for sizehint
in [None] + range(1, 33) + \
150 [64, 128, 256, 512, 1024]:
151 istream
= UTF8Reader(StringIO(self
.tstring
[1]))
153 encoder
= self
.incrementalencoder()
155 if sizehint
is not None:
156 data
= istream
.read(sizehint
)
158 data
= istream
.read()
162 e
= encoder
.encode(data
)
165 self
.assertEqual(ostream
.getvalue(), self
.tstring
[0])
167 def test_incrementaldecoder(self
):
168 UTF8Writer
= codecs
.getwriter('utf-8')
169 for sizehint
in [None, -1] + range(1, 33) + \
170 [64, 128, 256, 512, 1024]:
171 istream
= StringIO(self
.tstring
[0])
172 ostream
= UTF8Writer(StringIO())
173 decoder
= self
.incrementaldecoder()
175 data
= istream
.read(sizehint
)
179 u
= decoder
.decode(data
)
182 self
.assertEqual(ostream
.getvalue(), self
.tstring
[1])
184 def test_incrementalencoder_error_callback(self
):
185 inv
= self
.unmappedunicode
187 e
= self
.incrementalencoder()
188 self
.assertRaises(UnicodeEncodeError, e
.encode
, inv
, True)
191 self
.assertEqual(e
.encode(inv
, True), '')
194 def tempreplace(exc
):
195 return (u
'called', exc
.end
)
196 codecs
.register_error('test.incremental_error_callback', tempreplace
)
197 e
.errors
= 'test.incremental_error_callback'
198 self
.assertEqual(e
.encode(inv
, True), 'called')
202 self
.assertEqual(e
.encode(inv
, True), '')
204 def test_streamreader(self
):
205 UTF8Writer
= codecs
.getwriter('utf-8')
206 for name
in ["read", "readline", "readlines"]:
207 for sizehint
in [None, -1] + range(1, 33) + \
208 [64, 128, 256, 512, 1024]:
209 istream
= self
.reader(StringIO(self
.tstring
[0]))
210 ostream
= UTF8Writer(StringIO())
211 func
= getattr(istream
, name
)
213 data
= func(sizehint
)
216 if name
== "readlines":
217 ostream
.writelines(data
)
221 self
.assertEqual(ostream
.getvalue(), self
.tstring
[1])
223 def test_streamwriter(self
):
224 readfuncs
= ('read', 'readline', 'readlines')
225 UTF8Reader
= codecs
.getreader('utf-8')
226 for name
in readfuncs
:
227 for sizehint
in [None] + range(1, 33) + \
228 [64, 128, 256, 512, 1024]:
229 istream
= UTF8Reader(StringIO(self
.tstring
[1]))
230 ostream
= self
.writer(StringIO())
231 func
= getattr(istream
, name
)
233 if sizehint
is not None:
234 data
= func(sizehint
)
240 if name
== "readlines":
241 ostream
.writelines(data
)
245 self
.assertEqual(ostream
.getvalue(), self
.tstring
[0])
247 class TestBase_Mapping(unittest
.TestCase
):
252 def __init__(self
, *args
, **kw
):
253 unittest
.TestCase
.__init
__(self
, *args
, **kw
)
255 self
.open_mapping_file() # test it to report the error early
256 except (IOError, HTTPException
):
257 self
.skipTest("Could not retrieve "+self
.mapfileurl
)
259 def open_mapping_file(self
):
260 return test_support
.open_urlresource(self
.mapfileurl
)
262 def test_mapping_file(self
):
263 if self
.mapfileurl
.endswith('.xml'):
264 self
._test
_mapping
_file
_ucm
()
266 self
._test
_mapping
_file
_plain
()
268 def _test_mapping_file_plain(self
):
269 _unichr
= lambda c
: eval("u'\\U%08x'" % int(c
, 16))
270 unichrs
= lambda s
: u
''.join(_unichr(c
) for c
in s
.split('+'))
273 for line
in self
.open_mapping_file():
276 data
= line
.split('#')[0].strip().split()
280 csetval
= eval(data
[0])
282 csetch
= chr(csetval
& 0xff)
283 elif csetval
>= 0x1000000:
284 csetch
= chr(csetval
>> 24) + chr((csetval
>> 16) & 0xff) + \
285 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
286 elif csetval
>= 0x10000:
287 csetch
= chr(csetval
>> 16) + \
288 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
289 elif csetval
>= 0x100:
290 csetch
= chr(csetval
>> 8) + chr(csetval
& 0xff)
294 unich
= unichrs(data
[1])
295 if unich
== u
'\ufffd' or unich
in urt_wa
:
297 urt_wa
[unich
] = csetch
299 self
._testpoint
(csetch
, unich
)
301 def _test_mapping_file_ucm(self
):
302 ucmdata
= self
.open_mapping_file().read()
303 uc
= re
.findall('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>', ucmdata
)
304 for uni
, coded
in uc
:
305 unich
= unichr(int(uni
, 16))
306 codech
= ''.join(chr(int(c
, 16)) for c
in coded
.split())
307 self
._testpoint
(codech
, unich
)
309 def test_mapping_supplemental(self
):
310 for mapping
in self
.supmaps
:
311 self
._testpoint
(*mapping
)
313 def _testpoint(self
, csetch
, unich
):
314 if (csetch
, unich
) not in self
.pass_enctest
:
316 self
.assertEqual(unich
.encode(self
.encoding
), csetch
)
317 except UnicodeError, exc
:
318 self
.fail('Encoding failed while testing %s -> %s: %s' % (
319 repr(unich
), repr(csetch
), exc
.reason
))
320 if (csetch
, unich
) not in self
.pass_dectest
:
322 self
.assertEqual(csetch
.decode(self
.encoding
), unich
)
323 except UnicodeError, exc
:
324 self
.fail('Decoding failed while testing %s -> %s: %s' % (
325 repr(csetch
), repr(unich
), exc
.reason
))
327 def load_teststring(encoding
):
328 from test
import cjkencodings_test
329 return cjkencodings_test
.teststring
[encoding
]