Merge branch 'source-get-id-docs' into 'master'
[glib.git] / tests / gen-casefold-txt.py
blob3c55828d3a5214ebc5d1b0cf9dfb961a8e004d50
1 #!/usr/bin/env python3
2 # Copyright (C) 1998, 1999 Tom Tromey
3 # Copyright (C) 2001 Red Hat Software
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2, or (at your option)
8 # any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, see <http://www.gnu.org/licenses/>.
18 """
19 gen-casefold-txt.py - Generate test cases for casefolding from Unicode data.
20 See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
21 Usage:
22 I consider the output of this program to be unrestricted.
23 Use it as you will.
24 """
26 import sys
27 import argparse
30 def main(argv):
31 parser = argparse.ArgumentParser(
32 description="Generate test cases for casefolding from Unicode data")
33 parser.add_argument("UNICODE-VERSION")
34 parser.add_argument("CaseFolding.txt")
35 args = parser.parse_args(argv[1:])
36 version = getattr(args, "UNICODE-VERSION")
37 filename = getattr(args, "CaseFolding.txt")
39 print("""\
40 # Test cases generated from Unicode {} data
41 # by gen-casefold-txt.py. Do not edit.
43 # Some special hand crafted tests
45 AaBbCc@@\taabbcc@@
47 # Now the automatic tests
48 #""".format(version))
50 # Names of fields in the CaseFolding table
51 CODE, STATUS, MAPPING = range(3)
53 with open(filename, encoding="utf-8") as fileobj:
54 for line in fileobj:
55 # strip comments and skip empty lines
56 line = line.split("#", 1)[0].strip()
57 if not line:
58 continue
60 fields = [f.strip() for f in line.split(";", 3)[:3]]
61 if len(fields) != 3:
62 raise SystemExit(
63 "Entry for %s has wrong number of fields (%d)" % (
64 fields[CODE], len(fields)))
66 status = fields[STATUS]
67 # skip simple and Turkic mappings
68 if status in "ST":
69 continue
71 code = chr(int(fields[CODE], 16))
72 values = "".join(
73 [chr(int(v, 16)) for v in fields[MAPPING].split()])
74 print("{}\t{}".format(code, values))
77 if __name__ == "__main__":
78 sys.exit(main(sys.argv))