test_normalization should skip and not crash when the resource isn't available
[python.git] / Lib / test / test_normalization.py
blob6a7574143fd97cb83c5ea3d61cc994b713a300d0
1 from test.test_support import run_unittest, open_urlresource
2 import unittest
4 import sys
5 import os
6 from unicodedata import normalize, unidata_version
8 TESTDATAFILE = "NormalizationTest" + os.extsep + "txt"
9 TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE
11 if os.path.exists(TESTDATAFILE):
12 f = open(TESTDATAFILE)
13 l = f.readline()
14 f.close()
15 if not unidata_version in l:
16 os.unlink(TESTDATAFILE)
18 class RangeError(Exception):
19 pass
21 def NFC(str):
22 return normalize("NFC", str)
24 def NFKC(str):
25 return normalize("NFKC", str)
27 def NFD(str):
28 return normalize("NFD", str)
30 def NFKD(str):
31 return normalize("NFKD", str)
33 def unistr(data):
34 data = [int(x, 16) for x in data.split(" ")]
35 for x in data:
36 if x > sys.maxunicode:
37 raise RangeError
38 return u"".join([unichr(x) for x in data])
40 class NormalizationTest(unittest.TestCase):
41 def test_main(self):
42 part1_data = {}
43 # Hit the exception early
44 try:
45 open_urlresource(TESTDATAURL)
46 except IOError:
47 self.skipTest("Could not retrieve " + TESTDATAURL)
48 for line in open_urlresource(TESTDATAURL):
49 if '#' in line:
50 line = line.split('#')[0]
51 line = line.strip()
52 if not line:
53 continue
54 if line.startswith("@Part"):
55 part = line.split()[0]
56 continue
57 if part == "@Part3":
58 # XXX we don't support PRI #29 yet, so skip these tests for now
59 continue
60 try:
61 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
62 except RangeError:
63 # Skip unsupported characters;
64 # try atleast adding c1 if we are in part1
65 if part == "@Part1":
66 try:
67 c1 = unistr(line.split(';')[0])
68 except RangeError:
69 pass
70 else:
71 part1_data[c1] = 1
72 continue
74 # Perform tests
75 self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
76 self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
77 self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
78 self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
79 self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
80 NFKC(c3) == NFKC(c4) == NFKC(c5),
81 line)
82 self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
83 NFKD(c3) == NFKD(c4) == NFKD(c5),
84 line)
86 # Record part 1 data
87 if part == "@Part1":
88 part1_data[c1] = 1
90 # Perform tests for all other data
91 for c in range(sys.maxunicode+1):
92 X = unichr(c)
93 if X in part1_data:
94 continue
95 self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
97 def test_bug_834676(self):
98 # Check for bug 834676
99 normalize('NFC', u'\ud55c\uae00')
102 def test_main():
103 run_unittest(NormalizationTest)
105 if __name__ == "__main__":
106 test_main()