convert usage of fail* to assert*
[python.git] / Lib / test / test_unicodedata.py
blob9d7133bd4529824974fa9813bb3086f7702189ac
1 """ Test script for the unicodedata module.
3 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7 """
9 import sys
10 import unittest
11 import hashlib
12 import subprocess
13 import test.test_support
15 encoding = 'utf-8'
18 ### Run tests
20 class UnicodeMethodsTest(unittest.TestCase):
22 # update this, if the database changes
23 expectedchecksum = '6ec65b65835614ec00634c674bba0e50cd32c189'
25 def test_method_checksum(self):
26 h = hashlib.sha1()
27 for i in range(65536):
28 char = unichr(i)
29 data = [
30 # Predicates (single char)
31 u"01"[char.isalnum()],
32 u"01"[char.isalpha()],
33 u"01"[char.isdecimal()],
34 u"01"[char.isdigit()],
35 u"01"[char.islower()],
36 u"01"[char.isnumeric()],
37 u"01"[char.isspace()],
38 u"01"[char.istitle()],
39 u"01"[char.isupper()],
41 # Predicates (multiple chars)
42 u"01"[(char + u'abc').isalnum()],
43 u"01"[(char + u'abc').isalpha()],
44 u"01"[(char + u'123').isdecimal()],
45 u"01"[(char + u'123').isdigit()],
46 u"01"[(char + u'abc').islower()],
47 u"01"[(char + u'123').isnumeric()],
48 u"01"[(char + u' \t').isspace()],
49 u"01"[(char + u'abc').istitle()],
50 u"01"[(char + u'ABC').isupper()],
52 # Mappings (single char)
53 char.lower(),
54 char.upper(),
55 char.title(),
57 # Mappings (multiple chars)
58 (char + u'abc').lower(),
59 (char + u'ABC').upper(),
60 (char + u'abc').title(),
61 (char + u'ABC').title(),
64 h.update(u''.join(data).encode(encoding))
65 result = h.hexdigest()
66 self.assertEqual(result, self.expectedchecksum)
68 class UnicodeDatabaseTest(unittest.TestCase):
70 def setUp(self):
71 # In case unicodedata is not available, this will raise an ImportError,
72 # but the other test cases will still be run
73 import unicodedata
74 self.db = unicodedata
76 def tearDown(self):
77 del self.db
79 class UnicodeFunctionsTest(UnicodeDatabaseTest):
81 # update this, if the database changes
82 expectedchecksum = '3136d5afd787dc2bcb1bdcac95e385349fbebbca'
84 def test_function_checksum(self):
85 data = []
86 h = hashlib.sha1()
88 for i in range(0x10000):
89 char = unichr(i)
90 data = [
91 # Properties
92 str(self.db.digit(char, -1)),
93 str(self.db.numeric(char, -1)),
94 str(self.db.decimal(char, -1)),
95 self.db.category(char),
96 self.db.bidirectional(char),
97 self.db.decomposition(char),
98 str(self.db.mirrored(char)),
99 str(self.db.combining(char)),
101 h.update(''.join(data))
102 result = h.hexdigest()
103 self.assertEqual(result, self.expectedchecksum)
105 def test_digit(self):
106 self.assertEqual(self.db.digit(u'A', None), None)
107 self.assertEqual(self.db.digit(u'9'), 9)
108 self.assertEqual(self.db.digit(u'\u215b', None), None)
109 self.assertEqual(self.db.digit(u'\u2468'), 9)
110 self.assertEqual(self.db.digit(u'\U00020000', None), None)
112 self.assertRaises(TypeError, self.db.digit)
113 self.assertRaises(TypeError, self.db.digit, u'xx')
114 self.assertRaises(ValueError, self.db.digit, u'x')
116 def test_numeric(self):
117 self.assertEqual(self.db.numeric(u'A',None), None)
118 self.assertEqual(self.db.numeric(u'9'), 9)
119 self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
120 self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
121 self.assertEqual(self.db.numeric(u'\U00020000', None), None)
123 self.assertRaises(TypeError, self.db.numeric)
124 self.assertRaises(TypeError, self.db.numeric, u'xx')
125 self.assertRaises(ValueError, self.db.numeric, u'x')
127 def test_decimal(self):
128 self.assertEqual(self.db.decimal(u'A',None), None)
129 self.assertEqual(self.db.decimal(u'9'), 9)
130 self.assertEqual(self.db.decimal(u'\u215b', None), None)
131 self.assertEqual(self.db.decimal(u'\u2468', None), None)
132 self.assertEqual(self.db.decimal(u'\U00020000', None), None)
134 self.assertRaises(TypeError, self.db.decimal)
135 self.assertRaises(TypeError, self.db.decimal, u'xx')
136 self.assertRaises(ValueError, self.db.decimal, u'x')
138 def test_category(self):
139 self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
140 self.assertEqual(self.db.category(u'a'), 'Ll')
141 self.assertEqual(self.db.category(u'A'), 'Lu')
142 self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
144 self.assertRaises(TypeError, self.db.category)
145 self.assertRaises(TypeError, self.db.category, u'xx')
147 def test_bidirectional(self):
148 self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
149 self.assertEqual(self.db.bidirectional(u' '), 'WS')
150 self.assertEqual(self.db.bidirectional(u'A'), 'L')
151 self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
153 self.assertRaises(TypeError, self.db.bidirectional)
154 self.assertRaises(TypeError, self.db.bidirectional, u'xx')
156 def test_decomposition(self):
157 self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
158 self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
160 self.assertRaises(TypeError, self.db.decomposition)
161 self.assertRaises(TypeError, self.db.decomposition, u'xx')
163 def test_mirrored(self):
164 self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
165 self.assertEqual(self.db.mirrored(u'a'), 0)
166 self.assertEqual(self.db.mirrored(u'\u2201'), 1)
167 self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
169 self.assertRaises(TypeError, self.db.mirrored)
170 self.assertRaises(TypeError, self.db.mirrored, u'xx')
172 def test_combining(self):
173 self.assertEqual(self.db.combining(u'\uFFFE'), 0)
174 self.assertEqual(self.db.combining(u'a'), 0)
175 self.assertEqual(self.db.combining(u'\u20e1'), 230)
176 self.assertEqual(self.db.combining(u'\U00020000'), 0)
178 self.assertRaises(TypeError, self.db.combining)
179 self.assertRaises(TypeError, self.db.combining, u'xx')
181 def test_normalize(self):
182 self.assertRaises(TypeError, self.db.normalize)
183 self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
184 self.assertEqual(self.db.normalize('NFKC', u''), u'')
185 # The rest can be found in test_normalization.py
186 # which requires an external file.
188 def test_east_asian_width(self):
189 eaw = self.db.east_asian_width
190 self.assertRaises(TypeError, eaw, 'a')
191 self.assertRaises(TypeError, eaw, u'')
192 self.assertRaises(TypeError, eaw, u'ra')
193 self.assertEqual(eaw(u'\x1e'), 'N')
194 self.assertEqual(eaw(u'\x20'), 'Na')
195 self.assertEqual(eaw(u'\uC894'), 'W')
196 self.assertEqual(eaw(u'\uFF66'), 'H')
197 self.assertEqual(eaw(u'\uFF1F'), 'F')
198 self.assertEqual(eaw(u'\u2010'), 'A')
199 self.assertEqual(eaw(u'\U00020000'), 'W')
201 class UnicodeMiscTest(UnicodeDatabaseTest):
203 def test_failed_import_during_compiling(self):
204 # Issue 4367
205 # Decoding \N escapes requires the unicodedata module. If it can't be
206 # imported, we shouldn't segfault.
208 # This program should raise a SyntaxError in the eval.
209 code = "import sys;" \
210 "sys.modules['unicodedata'] = None;" \
211 """eval("u'\N{SOFT HYPHEN}'")"""
212 args = [sys.executable, "-c", code]
213 # We use a subprocess because the unicodedata module may already have
214 # been loaded in this process.
215 popen = subprocess.Popen(args, stderr=subprocess.PIPE)
216 popen.wait()
217 self.assertEqual(popen.returncode, 1)
218 error = "SyntaxError: (unicode error) \N escapes not supported " \
219 "(can't load unicodedata module)"
220 self.assertTrue(error in popen.stderr.read())
222 def test_decimal_numeric_consistent(self):
223 # Test that decimal and numeric are consistent,
224 # i.e. if a character has a decimal value,
225 # its numeric value should be the same.
226 count = 0
227 for i in xrange(0x10000):
228 c = unichr(i)
229 dec = self.db.decimal(c, -1)
230 if dec != -1:
231 self.assertEqual(dec, self.db.numeric(c))
232 count += 1
233 self.assertTrue(count >= 10) # should have tested at least the ASCII digits
235 def test_digit_numeric_consistent(self):
236 # Test that digit and numeric are consistent,
237 # i.e. if a character has a digit value,
238 # its numeric value should be the same.
239 count = 0
240 for i in xrange(0x10000):
241 c = unichr(i)
242 dec = self.db.digit(c, -1)
243 if dec != -1:
244 self.assertEqual(dec, self.db.numeric(c))
245 count += 1
246 self.assertTrue(count >= 10) # should have tested at least the ASCII digits
248 def test_bug_1704793(self):
249 self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
251 def test_ucd_510(self):
252 import unicodedata
253 # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
254 self.assertTrue(unicodedata.mirrored(u"\u0f3a"))
255 self.assertTrue(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
256 # Also, we now have two ways of representing
257 # the upper-case mapping: as delta, or as absolute value
258 self.assertTrue(u"a".upper()==u'A')
259 self.assertTrue(u"\u1d79".upper()==u'\ua77d')
260 self.assertTrue(u".".upper()==u".")
262 def test_bug_5828(self):
263 self.assertEqual(u"\u1d79".lower(), u"\u1d79")
264 # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
265 self.assertEqual(
267 c for c in range(sys.maxunicode+1)
268 if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
273 def test_bug_4971(self):
274 # LETTER DZ WITH CARON: DZ, Dz, dz
275 self.assertEqual(u"\u01c4".title(), u"\u01c5")
276 self.assertEqual(u"\u01c5".title(), u"\u01c5")
277 self.assertEqual(u"\u01c6".title(), u"\u01c5")
279 def test_main():
280 test.test_support.run_unittest(
281 UnicodeMiscTest,
282 UnicodeMethodsTest,
283 UnicodeFunctionsTest
286 if __name__ == "__main__":
287 test_main()