3 # test_multibytecodec_support.py
4 # Common Unittest Routines for CJK codecs
7 import sys
, codecs
, os
.path
9 from test
import test_support
10 from StringIO
import StringIO
13 encoding
= '' # codec name
14 codec
= None # codec tuple (with 4 elements)
15 tstring
= '' # string to test StreamReader
17 codectests
= None # must set. codec test tuple
18 roundtriptest
= 1 # set if roundtrip is possible with unicode
19 has_iso10646
= 0 # set if this encoding contains whole iso10646 map
20 xmlcharnametest
= None # string to test xmlcharrefreplace
21 unmappedunicode
= u
'\udeee' # a unicode codepoint that is not mapped.
24 if self
.codec
is None:
25 self
.codec
= codecs
.lookup(self
.encoding
)
26 self
.encode
= self
.codec
.encode
27 self
.decode
= self
.codec
.decode
28 self
.reader
= self
.codec
.streamreader
29 self
.writer
= self
.codec
.streamwriter
30 self
.incrementalencoder
= self
.codec
.incrementalencoder
31 self
.incrementaldecoder
= self
.codec
.incrementaldecoder
33 def test_chunkcoding(self
):
34 for native
, utf8
in zip(*[StringIO(f
).readlines()
35 for f
in self
.tstring
]):
36 u
= self
.decode(native
)[0]
37 self
.assertEqual(u
, utf8
.decode('utf-8'))
38 if self
.roundtriptest
:
39 self
.assertEqual(native
, self
.encode(u
)[0])
41 def test_errorhandle(self
):
42 for source
, scheme
, expected
in self
.codectests
:
43 if type(source
) == type(''):
48 result
= func(source
, scheme
)[0]
49 self
.assertEqual(result
, expected
)
51 self
.assertRaises(UnicodeError, func
, source
, scheme
)
53 def test_xmlcharrefreplace(self
):
57 s
= u
"\u0b13\u0b23\u0b60 nd eggs"
59 self
.encode(s
, "xmlcharrefreplace")[0],
60 "ଓଣୠ nd eggs"
63 def test_customreplace_encode(self
):
67 from htmlentitydefs
import codepoint2name
69 def xmlcharnamereplace(exc
):
70 if not isinstance(exc
, UnicodeEncodeError):
71 raise TypeError("don't know how to handle %r" % exc
)
73 for c
in exc
.object[exc
.start
:exc
.end
]:
74 if ord(c
) in codepoint2name
:
75 l
.append(u
"&%s;" % codepoint2name
[ord(c
)])
77 l
.append(u
"&#%d;" % ord(c
))
78 return (u
"".join(l
), exc
.end
)
80 codecs
.register_error("test.xmlcharnamereplace", xmlcharnamereplace
)
82 if self
.xmlcharnametest
:
83 sin
, sout
= self
.xmlcharnametest
85 sin
= u
"\xab\u211c\xbb = \u2329\u1234\u232a"
86 sout
= "«ℜ» = ⟨ሴ⟩"
87 self
.assertEqual(self
.encode(sin
,
88 "test.xmlcharnamereplace")[0], sout
)
90 def test_callback_wrong_objects(self
):
93 codecs
.register_error("test.cjktest", myreplace
)
95 for ret
in ([1, 2, 3], [], None, object(), 'string', ''):
96 self
.assertRaises(TypeError, self
.encode
, self
.unmappedunicode
,
99 def test_callback_long_index(self
):
101 return (u
'x', long(exc
.end
))
102 codecs
.register_error("test.cjktest", myreplace
)
103 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
104 'test.cjktest'), ('abcdxefgh', 9))
107 return (u
'x', sys
.maxint
+ 1)
108 codecs
.register_error("test.cjktest", myreplace
)
109 self
.assertRaises(IndexError, self
.encode
, self
.unmappedunicode
,
112 def test_callback_None_index(self
):
115 codecs
.register_error("test.cjktest", myreplace
)
116 self
.assertRaises(TypeError, self
.encode
, self
.unmappedunicode
,
119 def test_callback_backward_index(self
):
121 if myreplace
.limit
> 0:
123 return (u
'REPLACED', 0)
125 return (u
'TERMINAL', exc
.end
)
127 codecs
.register_error("test.cjktest", myreplace
)
128 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
130 ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
132 def test_callback_forward_index(self
):
134 return (u
'REPLACED', exc
.end
+ 2)
135 codecs
.register_error("test.cjktest", myreplace
)
136 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
137 'test.cjktest'), ('abcdREPLACEDgh', 9))
139 def test_callback_index_outofbound(self
):
141 return (u
'TERM', 100)
142 codecs
.register_error("test.cjktest", myreplace
)
143 self
.assertRaises(IndexError, self
.encode
, self
.unmappedunicode
,
146 def test_incrementalencoder(self
):
147 UTF8Reader
= codecs
.getreader('utf-8')
148 for sizehint
in [None] + range(1, 33) + \
149 [64, 128, 256, 512, 1024]:
150 istream
= UTF8Reader(StringIO(self
.tstring
[1]))
152 encoder
= self
.incrementalencoder()
154 if sizehint
is not None:
155 data
= istream
.read(sizehint
)
157 data
= istream
.read()
161 e
= encoder
.encode(data
)
164 self
.assertEqual(ostream
.getvalue(), self
.tstring
[0])
166 def test_incrementaldecoder(self
):
167 UTF8Writer
= codecs
.getwriter('utf-8')
168 for sizehint
in [None, -1] + range(1, 33) + \
169 [64, 128, 256, 512, 1024]:
170 istream
= StringIO(self
.tstring
[0])
171 ostream
= UTF8Writer(StringIO())
172 decoder
= self
.incrementaldecoder()
174 data
= istream
.read(sizehint
)
178 u
= decoder
.decode(data
)
181 self
.assertEqual(ostream
.getvalue(), self
.tstring
[1])
183 def test_incrementalencoder_error_callback(self
):
184 inv
= self
.unmappedunicode
186 e
= self
.incrementalencoder()
187 self
.assertRaises(UnicodeEncodeError, e
.encode
, inv
, True)
190 self
.assertEqual(e
.encode(inv
, True), '')
193 def tempreplace(exc
):
194 return (u
'called', exc
.end
)
195 codecs
.register_error('test.incremental_error_callback', tempreplace
)
196 e
.errors
= 'test.incremental_error_callback'
197 self
.assertEqual(e
.encode(inv
, True), 'called')
201 self
.assertEqual(e
.encode(inv
, True), '')
203 def test_streamreader(self
):
204 UTF8Writer
= codecs
.getwriter('utf-8')
205 for name
in ["read", "readline", "readlines"]:
206 for sizehint
in [None, -1] + range(1, 33) + \
207 [64, 128, 256, 512, 1024]:
208 istream
= self
.reader(StringIO(self
.tstring
[0]))
209 ostream
= UTF8Writer(StringIO())
210 func
= getattr(istream
, name
)
212 data
= func(sizehint
)
215 if name
== "readlines":
216 ostream
.writelines(data
)
220 self
.assertEqual(ostream
.getvalue(), self
.tstring
[1])
222 def test_streamwriter(self
):
223 readfuncs
= ('read', 'readline', 'readlines')
224 UTF8Reader
= codecs
.getreader('utf-8')
225 for name
in readfuncs
:
226 for sizehint
in [None] + range(1, 33) + \
227 [64, 128, 256, 512, 1024]:
228 istream
= UTF8Reader(StringIO(self
.tstring
[1]))
229 ostream
= self
.writer(StringIO())
230 func
= getattr(istream
, name
)
232 if sizehint
is not None:
233 data
= func(sizehint
)
239 if name
== "readlines":
240 ostream
.writelines(data
)
244 self
.assertEqual(ostream
.getvalue(), self
.tstring
[0])
246 if len(u
'\U00012345') == 2: # ucs2 build
250 return _unichr(0xd800 + ((v
- 0x10000) >> 10)) + \
251 _unichr(0xdc00 + ((v
- 0x10000) & 0x3ff))
257 return 0x10000 + ((_ord(c
[0]) - 0xd800) << 10) + \
262 class TestBase_Mapping(unittest
.TestCase
):
267 def __init__(self
, *args
, **kw
):
268 unittest
.TestCase
.__init
__(self
, *args
, **kw
)
269 self
.open_mapping_file() # test it to report the error early
271 def open_mapping_file(self
):
272 return test_support
.open_urlresource(self
.mapfileurl
)
274 def test_mapping_file(self
):
275 unichrs
= lambda s
: u
''.join(map(unichr, map(eval, s
.split('+'))))
278 for line
in self
.open_mapping_file():
281 data
= line
.split('#')[0].strip().split()
285 csetval
= eval(data
[0])
287 csetch
= chr(csetval
& 0xff)
288 elif csetval
>= 0x1000000:
289 csetch
= chr(csetval
>> 24) + chr((csetval
>> 16) & 0xff) + \
290 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
291 elif csetval
>= 0x10000:
292 csetch
= chr(csetval
>> 16) + \
293 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
294 elif csetval
>= 0x100:
295 csetch
= chr(csetval
>> 8) + chr(csetval
& 0xff)
299 unich
= unichrs(data
[1])
300 if ord(unich
) == 0xfffd or urt_wa
.has_key(unich
):
302 urt_wa
[unich
] = csetch
304 self
._testpoint
(csetch
, unich
)
306 def test_mapping_supplemental(self
):
307 for mapping
in self
.supmaps
:
308 self
._testpoint
(*mapping
)
310 def _testpoint(self
, csetch
, unich
):
311 if (csetch
, unich
) not in self
.pass_enctest
:
312 self
.assertEqual(unich
.encode(self
.encoding
), csetch
)
313 if (csetch
, unich
) not in self
.pass_dectest
:
314 self
.assertEqual(unicode(csetch
, self
.encoding
), unich
)
316 def load_teststring(encoding
):
317 from test
import cjkencodings_test
318 return cjkencodings_test
.teststring
[encoding
]