1 """ Test script for the Unicode implementation.
4 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
12 from test
import test_support
14 class UnicodeNamesTest(unittest
.TestCase
):
16 def checkletter(self
, name
, code
):
17 # Helper that put all \N escapes inside eval'd raw strings,
18 # to make sure this script runs even if the compiler
19 # chokes on \N escapes
20 res
= eval(ur
'u"\N{%s}"' % name
)
21 self
.assertEqual(res
, code
)
24 def test_general(self
):
25 # General and case insensitivity test:
27 "LATIN CAPITAL LETTER T",
28 "LATIN SMALL LETTER H",
29 "LATIN SMALL LETTER E",
31 "LATIN SMALL LETTER R",
32 "LATIN CAPITAL LETTER E",
33 "LATIN SMALL LETTER D",
35 "LATIN SMALL LETTER f",
36 "LATIN CAPITAL LeTtEr o",
37 "LATIN SMaLl LETTER x",
39 "LATIN SMALL LETTER A",
40 "LATIN SMALL LETTER T",
41 "LATIN SMALL LETTER E",
43 "LATIN SMALL LETTER T",
44 "LATIN SMALL LETTER H",
45 "LATIN SMALL LETTER E",
47 "LATIN SMALL LETTER S",
48 "LATIN SMALL LETTER H",
49 "LATIN small LETTER e",
50 "LATIN small LETTER e",
51 "LATIN SMALL LETTER P",
54 string
= u
"The rEd fOx ate the sheep."
57 u
"".join([self
.checkletter(*args
) for args
in zip(chars
, string
)]),
61 def test_ascii_letters(self
):
64 for char
in "".join(map(chr, xrange(ord("a"), ord("z")))):
65 name
= "LATIN SMALL LETTER %s" % char
.upper()
66 code
= unicodedata
.lookup(name
)
67 self
.assertEqual(unicodedata
.name(code
), name
)
69 def test_hangul_syllables(self
):
70 self
.checkletter("HANGUL SYLLABLE GA", u
"\uac00")
71 self
.checkletter("HANGUL SYLLABLE GGWEOSS", u
"\uafe8")
72 self
.checkletter("HANGUL SYLLABLE DOLS", u
"\ub3d0")
73 self
.checkletter("HANGUL SYLLABLE RYAN", u
"\ub7b8")
74 self
.checkletter("HANGUL SYLLABLE MWIK", u
"\ubba0")
75 self
.checkletter("HANGUL SYLLABLE BBWAEM", u
"\ubf88")
76 self
.checkletter("HANGUL SYLLABLE SSEOL", u
"\uc370")
77 self
.checkletter("HANGUL SYLLABLE YI", u
"\uc758")
78 self
.checkletter("HANGUL SYLLABLE JJYOSS", u
"\ucb40")
79 self
.checkletter("HANGUL SYLLABLE KYEOLS", u
"\ucf28")
80 self
.checkletter("HANGUL SYLLABLE PAN", u
"\ud310")
81 self
.checkletter("HANGUL SYLLABLE HWEOK", u
"\ud6f8")
82 self
.checkletter("HANGUL SYLLABLE HIH", u
"\ud7a3")
85 self
.assertRaises(ValueError, unicodedata
.name
, u
"\ud7a4")
87 def test_cjk_unified_ideographs(self
):
88 self
.checkletter("CJK UNIFIED IDEOGRAPH-3400", u
"\u3400")
89 self
.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u
"\u4db5")
90 self
.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u
"\u4e00")
91 self
.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u
"\u9fa5")
92 self
.checkletter("CJK UNIFIED IDEOGRAPH-20000", u
"\U00020000")
93 self
.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u
"\U0002a6d6")
95 def test_bmp_characters(self
):
98 for code
in xrange(0x10000):
100 name
= unicodedata
.name(char
, None)
102 self
.assertEqual(unicodedata
.lookup(name
), char
)
105 def test_misc_symbols(self
):
106 self
.checkletter("PILCROW SIGN", u
"\u00b6")
107 self
.checkletter("REPLACEMENT CHARACTER", u
"\uFFFD")
108 self
.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u
"\uFF9F")
109 self
.checkletter("FULLWIDTH LATIN SMALL LETTER A", u
"\uFF41")
111 def test_errors(self
):
113 self
.assertRaises(TypeError, unicodedata
.name
)
114 self
.assertRaises(TypeError, unicodedata
.name
, u
'xx')
115 self
.assertRaises(TypeError, unicodedata
.lookup
)
116 self
.assertRaises(KeyError, unicodedata
.lookup
, u
'unknown')
118 def test_strict_eror_handling(self
):
119 # bogus character name
122 unicode, "\\N{blah}", 'unicode-escape', 'strict'
124 # long bogus character name
127 unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
129 # missing closing brace
132 unicode, "\\N{SPACE", 'unicode-escape', 'strict'
134 # missing opening brace
137 unicode, "\\NSPACE", 'unicode-escape', 'strict'
141 test_support
.run_unittest(UnicodeNamesTest
)
143 if __name__
== "__main__":