2 # Generate the locale/C-translit.h file.
3 # Copyright (C) 2018-2022 Free Software Foundation, Inc.
4 # This file is part of the GNU C Library.
6 # The GNU C Library is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU Lesser General Public
8 # License as published by the Free Software Foundation; either
9 # version 2.1 of the License, or (at your option) any later version.
11 # The GNU C Library is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public
17 # License along with the GNU C Library; if not, see
18 # <https://www.gnu.org/licenses/>.
25 "Source of a string literal and its decomposition into code points."
26 def __init__(self
, s
):
28 # 0 regular character sequence
30 # 2 in hexadecimal escape sequence
38 result
.append(ord(ch
))
41 result
.append(ord(ch
))
47 raise ValueError("invalid character {!r} in {!r}".format(
50 if ch
in "0123456789abcdefABCDEF":
51 result
[-1] = result
[-1] * 16 + int(ch
, 16)
58 raise ValueError("trailing backslash in {!r}".format(s
))
61 self
.decoded
= tuple(result
)
65 "Pair of transliteration and source."
67 __RE_TRANSLIT
= re
.compile(
68 r
'^"((?:[^"\\]|\\x[0-9a-fA-F])+)"\s+'
69 r
'"((?:[^"\\]|\\["\\])*)"\s*(?:#.*)?$')
71 def __init__(self
, line
):
72 match
= self
.__RE
_TRANSLIT
.match(line
)
74 raise IOError("invalid line {}: {!r}".format(
76 codepoints
, replacement
= match
.groups()
77 self
.codepoints
= StringLiteral(codepoints
)
78 self
.replacement
= StringLiteral(replacement
)
81 # List of Translit objects.
84 # Read transliterations from standard input.
85 for lineno
, line
in enumerate(sys
.stdin
):
87 # Skip empty lines and comments.
88 if (not line
) or line
[0] == '#':
90 translit
= Translit(line
)
91 # Check ordering of codepoints.
93 and translit
.codepoints
.decoded
<= translits
[-1].codepoints
.decoded
:
94 raise IOError("unexpected codepoint {!r} on line {}: {!r}".format(
95 translit
.codepoints
.decoded
, lineno
+ 1, line
))
96 translits
.append(translit
)
98 # Generate the C sources.
99 write
= sys
.stdout
.write
100 write("#include <stdint.h>\n")
101 write("#define NTRANSLIT {}\n".format(len(translits
)))
103 write("static const uint32_t translit_from_idx[] =\n{\n ")
106 for translit
in translits
:
114 write("{:4}".format(total
))
115 total
+= len(translit
.codepoints
.decoded
) + 1
119 write("static const wchar_t translit_from_tbl[] =\n ")
122 for translit
in translits
:
131 if col
> 2 and col
+ len(translit
.codepoints
.source
) + 4 >= 79:
137 write("L\"{}\"".format(translit
.codepoints
.source
))
138 col
+= len(translit
.codepoints
.source
) + 3
141 write("static const uint32_t translit_to_idx[] =\n{\n ")
144 for translit
in translits
:
152 write("{:4}".format(total
))
153 total
+= len(translit
.replacement
.decoded
) + 2
157 write("static const wchar_t translit_to_tbl[] =\n ")
160 for translit
in translits
:
169 if col
> 2 and col
+ len(translit
.replacement
.source
) + 6 >= 79:
175 write("L\"{}\\0\"".format(translit
.replacement
.source
))
176 col
+= len(translit
.replacement
.source
) + 5