3 # Script to generate tables for libstdc++ std::text_encoding.
5 # This file is part of GCC.
7 # GCC is free software; you can redistribute it and/or modify it under
8 # the terms of the GNU General Public License as published by the Free
9 # Software Foundation; either version 3, or (at your option) any later
12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING3. If not see
19 # <http://www.gnu.org/licenses/>.
21 # To update the Libstdc++ static data in <bits/text_encoding-data.h> download
23 # https://www.iana.org/assignments/character-sets/character-sets-1.csv
24 # Then run this script and save the output to
25 # include/bits/text_encoding-data.h
30 if len(sys
.argv
) != 2:
31 print("Usage: %s <character sets csv>" % sys
.argv
[0], file=sys
.stderr
)
34 print("""// Generated by gen_text_encoding_data.py, do not edit.
36 // Copyright The GNU Toolchain Authors.
38 // This file is part of the GNU ISO C++ Library. This library is free
39 // software; you can redistribute it and/or modify it under the
40 // terms of the GNU General Public License as published by the
41 // Free Software Foundation; either version 3, or (at your option)
44 // This library is distributed in the hope that it will be useful,
45 // but WITHOUT ANY WARRANTY; without even the implied warranty of
46 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47 // GNU General Public License for more details.
49 // Under Section 7 of GPL version 3, you are granted additional
50 // permissions described in the GCC Runtime Library Exception, version
51 // 3.1, as published by the Free Software Foundation.
53 // You should have received a copy of the GNU General Public License and
54 // a copy of the GCC Runtime Library Exception along with this program;
55 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
56 // <http://www.gnu.org/licenses/>.
58 /** @file bits/text_encoding-data.h
59 * This is an internal header file, included by other library headers.
60 * Do not attempt to use it directly. @headername{text_encoding}
63 print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
64 print('# error "This is not a public header, do not include it directly"')
67 # We need to generate a list of initializers of the form { mib, alias }, e.g.,
71 # { 4, "ISO_8859-1:1987" },
73 # The initializers must be sorted by the mib value. The first entry for
74 # a given mib must be the primary name for the encoding. Any aliases for
75 # the encoding come after the primary name.
76 # We also define a macro _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET which is the
77 # offset into the list of the mib=106, alias="UTF-8" entry. This is used
78 # to optimize the common case, so we don't need to search for "UTF-8".
81 with
open(sys
.argv
[1], newline
='') as f
:
82 reader
= csv
.reader(f
)
83 next(reader
) # skip header row
87 raise ValueError("Multiple rows for mibEnum={}".format(mib
))
89 aliases
= row
[5].split()
90 # Ensure primary name comes first
93 charsets
[mib
] = [name
] + aliases
95 # Remove "NATS-DANO" and "NATS-DANO-ADD" as specified by the C++ standard.
96 charsets
.pop(33, None)
97 charsets
.pop(34, None)
99 # This is not an official IANA alias, but we include it in the
100 # implementation-defined superset of aliases for US-ASCII.
102 extra_aliases
= {3: ["ASCII"]}
105 for mib
in sorted(charsets
.keys()):
106 names
= charsets
[mib
]
107 if names
[0] == "UTF-8":
108 print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count
))
110 print(' {{ {:4}, "{}" }},'.format(mib
, name
))
112 if mib
in extra_aliases
:
113 names
= extra_aliases
[mib
]
115 print(' {{ {:4}, "{}" }}, // libstdc++ extension'.format(mib
, name
))
118 # <text_encoding> gives an error if this macro is left defined.
119 # Do this last, so that the generated output is not usable unless we reach here.
120 print("\n#undef _GLIBCXX_GET_ENCODING_DATA")