Add extra files found in etc/ sub-directory to ETC_SUPPORT in src-release.sh
[binutils-gdb.git] / gdb / ada-unicode.py
blobf128dee69f543b14c4476dce0fe2b86c4935dcd0
1 #!/usr/bin/env python3
3 # Generate Unicode case-folding table for Ada.
5 # Copyright (C) 2022-2024 Free Software Foundation, Inc.
7 # This file is part of GDB.
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 3 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # This generates the ada-casefold.h header.
23 # Usage:
24 # python ada-unicode.py
26 import gdbcopyright
29 class Range:
30 def __init__(self, range_start: int, upper_delta: int, lower_delta: int):
31 self._range_start = range_start
32 self._range_end = range_start
33 self._upper_delta = upper_delta
34 self._lower_delta = lower_delta
36 # The start of the range.
37 @property
38 def range_start(self):
39 return self._range_start
41 # The end of the range.
42 @property
43 def range_end(self):
44 return self._range_end
46 @range_end.setter
47 def range_end(self, val: int):
48 self._range_end = val
50 # The delta between RANGE_START and the upper-case variant of that
51 # character.
52 @property
53 def upper_delta(self):
54 return self._upper_delta
56 # The delta between RANGE_START and the lower-case variant of that
57 # character.
58 @property
59 def lower_delta(self):
60 return self._lower_delta
63 # The current range we are processing. If None, then we're outside of a range.
64 current_range: Range | None = None
66 # All the ranges found and completed so far.
67 all_ranges: list[Range] = []
70 def finish_range():
71 global current_range
73 if current_range is not None:
74 all_ranges.append(current_range)
75 current_range = None
78 def process_codepoint(val: int):
79 global current_range
81 c = chr(val)
82 low = c.lower()
83 up = c.upper()
84 # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
85 # upper-cases to the two-character string "SS" (the capital form
86 # is a relatively recent addition -- 2017). Our simple scheme
87 # can't handle this, so we skip it. Also, because our approach
88 # just represents runs of characters with identical folding
89 # deltas, this change must terminate the current run.
90 if (c == low and c == up) or len(low) != 1 or len(up) != 1:
91 finish_range()
92 return
93 updelta = ord(up) - val
94 lowdelta = ord(low) - val
96 if current_range is not None and (
97 updelta != current_range.upper_delta or lowdelta != current_range.lower_delta
99 finish_range()
101 if current_range is None:
102 current_range = Range(val, updelta, lowdelta)
104 current_range.range_end = val
107 for c in range(0, 0x10FFFF):
108 process_codepoint(c)
110 with open("ada-casefold.h", "w") as f:
111 print(
112 gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
113 file=f,
115 print("", file=f)
116 for r in all_ranges:
117 print(
118 f" {{{r.range_start}, {r.range_end}, {r.upper_delta}, {r.lower_delta}}},",
119 file=f,