1 from test
.support
import run_unittest
, open_urlresource
6 from unicodedata
import normalize
, unidata_version
8 TESTDATAFILE
= "NormalizationTest.txt"
9 TESTDATAURL
= "http://www.unicode.org/Public/" + unidata_version
+ "/ucd/" + TESTDATAFILE
11 if os
.path
.exists(TESTDATAFILE
):
12 f
= open(TESTDATAFILE
, encoding
='utf-8')
15 if not unidata_version
in l
:
16 os
.unlink(TESTDATAFILE
)
18 class RangeError(Exception):
22 return normalize("NFC", str)
25 return normalize("NFKC", str)
28 return normalize("NFD", str)
31 return normalize("NFKD", str)
34 data
= [int(x
, 16) for x
in data
.split(" ")]
36 if x
> sys
.maxunicode
:
38 return "".join([chr(x
) for x
in data
])
40 class NormalizationTest(unittest
.TestCase
):
43 for line
in open_urlresource(TESTDATAURL
, encoding
="utf-8"):
45 line
= line
.split('#')[0]
49 if line
.startswith("@Part"):
50 part
= line
.split()[0]
53 # XXX we don't support PRI #29 yet, so skip these tests for now
56 c1
,c2
,c3
,c4
,c5
= [unistr(x
) for x
in line
.split(';')[:-1]]
58 # Skip unsupported characters;
59 # try atleast adding c1 if we are in part1
62 c1
= unistr(line
.split(';')[0])
70 self
.failUnless(c2
== NFC(c1
) == NFC(c2
) == NFC(c3
), line
)
71 self
.failUnless(c4
== NFC(c4
) == NFC(c5
), line
)
72 self
.failUnless(c3
== NFD(c1
) == NFD(c2
) == NFD(c3
), line
)
73 self
.failUnless(c5
== NFD(c4
) == NFD(c5
), line
)
74 self
.failUnless(c4
== NFKC(c1
) == NFKC(c2
) == \
75 NFKC(c3
) == NFKC(c4
) == NFKC(c5
),
77 self
.failUnless(c5
== NFKD(c1
) == NFKD(c2
) == \
78 NFKD(c3
) == NFKD(c4
) == NFKD(c5
),
85 # Perform tests for all other data
86 for c
in range(sys
.maxunicode
+1):
90 self
.failUnless(X
== NFC(X
) == NFD(X
) == NFKC(X
) == NFKD(X
), c
)
92 def test_bug_834676(self
):
93 # Check for bug 834676
94 normalize('NFC', '\ud55c\uae00')
98 # Hit the exception early
99 open_urlresource(TESTDATAURL
)
100 run_unittest(NormalizationTest
)
102 if __name__
== "__main__":