Fix handling of large arguments passed by value.
[official-gcc.git] / contrib / unicode / gen_wcwidth.py
blob02b28bcedcfd0e9219e656ae2f98f153515d2d83
1 #!/usr/bin/env python3
3 # Script to generate tables for cpp_wcwidth, leveraging glibc's utf8_gen.py.
5 # This file is part of GCC.
7 # GCC is free software; you can redistribute it and/or modify it under
8 # the terms of the GNU General Public License as published by the Free
9 # Software Foundation; either version 3, or (at your option) any later
10 # version.
12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 # for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING3. If not see
19 # <http://www.gnu.org/licenses/>. */
21 import sys
22 import os
24 if len(sys.argv) != 2:
25 print("usage: %s <unicode version>", file=sys.stderr)
26 sys.exit(1)
27 unicode_version = sys.argv[1]
29 # Parse a codepoint in the format output by glibc tools.
30 def parse_ucn(s):
31 if not (s.startswith("<U") and s.endswith(">")):
32 raise ValueError
33 return int(s[2:-1], base=16)
35 # Process a line of width output from utf_gen.py and update global array.
36 widths = [1] * (1 + 0x10FFFF)
37 def process_width(line):
38 # Example lines:
39 # <UA8FF> 0
40 # <UA926>...<UA92D> 0
42 s = line.split()
43 width = int(s[1])
44 r = s[0].split("...")
45 if len(r) == 1:
46 begin = parse_ucn(r[0])
47 end = begin + 1
48 elif len(r) == 2:
49 begin = parse_ucn(r[0])
50 end = parse_ucn(r[1]) + 1
51 else:
52 raise ValueError
53 widths[begin:end] = [width] * (end - begin)
55 # To keep things simple, we use glibc utf8_gen.py as-is. It only outputs to a
56 # file named UTF-8, which is not configurable. Then we parse this into the form
57 # we want it.
58 os.system("from_glibc/utf8_gen.py --unicode_version %s" % unicode_version)
59 processing = False
60 for line in open("UTF-8", "r"):
61 if processing:
62 if line == "END WIDTH\n":
63 processing = False
64 else:
65 try:
66 process_width(line)
67 except (ValueError, IndexError):
68 print(e, "warning: ignored unexpected line: %s" % line,
69 file=sys.stderr, end="")
70 elif line == "WIDTH\n":
71 processing = True
73 # All bytes < 256 we treat as width 1.
74 widths[0:255] = [1] * 255
76 # Condense the list to contiguous ranges.
77 cur_range = [-1, 1]
78 all_ranges = []
79 for i, width in enumerate(widths):
80 if width == cur_range[1]:
81 cur_range[0] = i
82 else:
83 all_ranges.append(cur_range)
84 cur_range = [i, width]
86 # Output the arrays for generated_cpp_wcwidth.h
87 print("/* Generated by contrib/unicode/gen_wcwidth.py,",
88 "with the help of glibc's")
89 print(" utf8_gen.py, using version %s" % unicode_version,
90 "of the Unicode standard. */")
91 print("\nstatic const cppchar_t wcwidth_range_ends[] = {", end="")
92 for i, r in enumerate(all_ranges):
93 if i % 8:
94 print(" ", end="")
95 else:
96 print("\n ", end="")
97 print("0x%x," % (r[0]), end="")
98 print("\n};\n")
99 print("static const unsigned char wcwidth_widths[] = {", end="")
100 for i, r in enumerate(all_ranges):
101 if i % 24:
102 print(" ", end="")
103 else:
104 print("\n ", end="")
105 print("%d," % r[1], end="")
106 print("\n};")