3 # Script to generate tables for cpp_wcwidth, leveraging glibc's utf8_gen.py.
5 # This file is part of GCC.
7 # GCC is free software; you can redistribute it and/or modify it under
8 # the terms of the GNU General Public License as published by the Free
9 # Software Foundation; either version 3, or (at your option) any later
12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING3. If not see
19 # <http://www.gnu.org/licenses/>.
24 if len(sys
.argv
) != 2:
25 print("usage: %s <unicode version>" % sys
.argv
[0], file=sys
.stderr
)
27 unicode_version
= sys
.argv
[1]
29 # Parse a codepoint in the format output by glibc tools.
31 if not (s
.startswith("<U") and s
.endswith(">")):
33 return int(s
[2:-1], base
=16)
35 # Process a line of width output from utf_gen.py and update global array.
36 widths
= [1] * (1 + 0x10FFFF)
37 def process_width(line
):
46 begin
= parse_ucn(r
[0])
49 begin
= parse_ucn(r
[0])
50 end
= parse_ucn(r
[1]) + 1
53 widths
[begin
:end
] = [width
] * (end
- begin
)
55 # To keep things simple, we use glibc utf8_gen.py as-is. It only outputs to a
56 # file named UTF-8, which is not configurable. Then we parse this into the form
58 os
.system("from_glibc/utf8_gen.py --unicode_version %s" % unicode_version
)
60 for line
in open("UTF-8", "r"):
62 if line
== "END WIDTH\n":
67 except (ValueError, IndexError):
68 print(e
, "warning: ignored unexpected line: %s" % line
,
69 file=sys
.stderr
, end
="")
70 elif line
== "WIDTH\n":
73 # All bytes < 256 we treat as width 1.
74 widths
[0:255] = [1] * 255
76 # Condense the list to contiguous ranges.
79 for i
, width
in enumerate(widths
):
80 if width
== cur_range
[1]:
83 all_ranges
.append(cur_range
)
84 cur_range
= [i
, width
]
86 # Output the arrays for generated_cpp_wcwidth.h
87 print("/* Generated by contrib/unicode/gen_wcwidth.py,",
88 "with the help of glibc's")
89 print(" utf8_gen.py, using version %s" % unicode_version
,
90 "of the Unicode standard. */")
91 print("\nstatic const cppchar_t wcwidth_range_ends[] = {", end
="")
92 for i
, r
in enumerate(all_ranges
):
97 print("0x%x," % (r
[0]), end
="")
99 print("static const unsigned char wcwidth_widths[] = {", end
="")
100 for i
, r
in enumerate(all_ranges
):
105 print("%d," % r
[1], end
="")