3 # test_multibytecodec_support.py
4 # Common Unittest Routines for CJK codecs
6 # $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
8 import sys
, codecs
, os
.path
10 from test
import test_support
11 from StringIO
import StringIO
13 __cjkcodecs__
= 0 # define this as 0 for python
16 encoding
= '' # codec name
17 codec
= None # codec tuple (with 4 elements)
18 tstring
= '' # string to test StreamReader
20 codectests
= None # must set. codec test tuple
21 roundtriptest
= 1 # set if roundtrip is possible with unicode
22 has_iso10646
= 0 # set if this encoding contains whole iso10646 map
23 xmlcharnametest
= None # string to test xmlcharrefreplace
26 if self
.codec
is None:
27 self
.codec
= codecs
.lookup(self
.encoding
)
28 self
.encode
, self
.decode
, self
.reader
, self
.writer
= self
.codec
30 def test_chunkcoding(self
):
31 for native
, utf8
in zip(*[StringIO(f
).readlines()
32 for f
in self
.tstring
]):
33 u
= self
.decode(native
)[0]
34 self
.assertEqual(u
, utf8
.decode('utf-8'))
35 if self
.roundtriptest
:
36 self
.assertEqual(native
, self
.encode(u
)[0])
38 def test_errorhandle(self
):
39 for source
, scheme
, expected
in self
.codectests
:
40 if type(source
) == type(''):
45 result
= func(source
, scheme
)[0]
46 self
.assertEqual(result
, expected
)
48 self
.assertRaises(UnicodeError, func
, source
, scheme
)
50 if sys
.hexversion
>= 0x02030000:
51 def test_xmlcharrefreplace(self
):
55 s
= u
"\u0b13\u0b23\u0b60 nd eggs"
57 self
.encode(s
, "xmlcharrefreplace")[0],
58 "ଓଣୠ nd eggs"
61 def test_customreplace(self
):
68 for (key
, value
) in htmlentitydefs
.entitydefs
.items():
70 names
[value
.decode('latin-1')] = self
.decode(key
)[0]
72 names
[unichr(int(value
[2:-1]))] = self
.decode(key
)[0]
74 def xmlcharnamereplace(exc
):
75 if not isinstance(exc
, UnicodeEncodeError):
76 raise TypeError("don't know how to handle %r" % exc
)
78 for c
in exc
.object[exc
.start
:exc
.end
]:
80 l
.append(u
"&%s;" % names
[c
])
82 l
.append(u
"&#%d;" % ord(c
))
83 return (u
"".join(l
), exc
.end
)
85 codecs
.register_error(
86 "test.xmlcharnamereplace", xmlcharnamereplace
)
88 if self
.xmlcharnametest
:
89 sin
, sout
= self
.xmlcharnametest
91 sin
= u
"\xab\u211c\xbb = \u2329\u1234\u232a"
92 sout
= "«ℜ» = ⟨ሴ⟩"
93 self
.assertEqual(self
.encode(sin
,
94 "test.xmlcharnamereplace")[0], sout
)
96 def test_streamreader(self
):
97 UTF8Writer
= codecs
.getwriter('utf-8')
98 for name
in ["read", "readline", "readlines"]:
99 for sizehint
in [None, -1] + range(1, 33) + \
100 [64, 128, 256, 512, 1024]:
101 istream
= self
.reader(StringIO(self
.tstring
[0]))
102 ostream
= UTF8Writer(StringIO())
103 func
= getattr(istream
, name
)
105 data
= func(sizehint
)
108 if name
== "readlines":
109 ostream
.writelines(data
)
113 self
.assertEqual(ostream
.getvalue(), self
.tstring
[1])
115 def test_streamwriter(self
):
117 readfuncs
= ('read', 'readline', 'readlines')
119 # standard utf8 codec has broken readline and readlines.
120 readfuncs
= ('read',)
121 UTF8Reader
= codecs
.getreader('utf-8')
122 for name
in readfuncs
:
123 for sizehint
in [None] + range(1, 33) + \
124 [64, 128, 256, 512, 1024]:
125 istream
= UTF8Reader(StringIO(self
.tstring
[1]))
126 ostream
= self
.writer(StringIO())
127 func
= getattr(istream
, name
)
129 if sizehint
is not None:
130 data
= func(sizehint
)
136 if name
== "readlines":
137 ostream
.writelines(data
)
141 self
.assertEqual(ostream
.getvalue(), self
.tstring
[0])
143 if len(u
'\U00012345') == 2: # ucs2 build
147 return _unichr(0xd800 + ((v
- 0x10000) >> 10)) + \
148 _unichr(0xdc00 + ((v
- 0x10000) & 0x3ff))
154 return 0x10000 + ((_ord(c
[0]) - 0xd800) << 10) + \
159 class TestBase_Mapping(unittest
.TestCase
):
164 def __init__(self
, *args
, **kw
):
165 unittest
.TestCase
.__init
__(self
, *args
, **kw
)
166 self
.open_mapping_file() # test it to report the error early
168 def open_mapping_file(self
):
169 return test_support
.open_urlresource(self
.mapfileurl
)
171 def test_mapping_file(self
):
172 unichrs
= lambda s
: u
''.join(map(unichr, map(eval, s
.split('+'))))
175 for line
in self
.open_mapping_file():
178 data
= line
.split('#')[0].strip().split()
182 csetval
= eval(data
[0])
184 csetch
= chr(csetval
& 0xff)
185 elif csetval
>= 0x1000000:
186 csetch
= chr(csetval
>> 24) + chr((csetval
>> 16) & 0xff) + \
187 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
188 elif csetval
>= 0x10000:
189 csetch
= chr(csetval
>> 16) + \
190 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
191 elif csetval
>= 0x100:
192 csetch
= chr(csetval
>> 8) + chr(csetval
& 0xff)
196 unich
= unichrs(data
[1])
197 if ord(unich
) == 0xfffd or urt_wa
.has_key(unich
):
199 urt_wa
[unich
] = csetch
201 self
._testpoint
(csetch
, unich
)
203 def test_mapping_supplemental(self
):
204 for mapping
in self
.supmaps
:
205 self
._testpoint
(*mapping
)
207 def _testpoint(self
, csetch
, unich
):
208 if (csetch
, unich
) not in self
.pass_enctest
:
209 self
.assertEqual(unich
.encode(self
.encoding
), csetch
)
210 if (csetch
, unich
) not in self
.pass_dectest
:
211 self
.assertEqual(unicode(csetch
, self
.encoding
), unich
)
213 def load_teststring(encoding
):
215 etxt
= open(os
.path
.join('sampletexts', encoding
) + '.txt').read()
216 utxt
= open(os
.path
.join('sampletexts', encoding
) + '.utf8').read()
219 from test
import cjkencodings_test
220 return cjkencodings_test
.teststring
[encoding
]