libstdc++: Add copyright and license text to new generated headers
[official-gcc.git] / libstdc++-v3 / scripts / gen_text_encoding_data.py
blob13792b5f5e7d29eadc991fc4b7abff0ed4624299
1 #!/usr/bin/env python3
3 # Script to generate tables for libstdc++ std::text_encoding.
5 # This file is part of GCC.
7 # GCC is free software; you can redistribute it and/or modify it under
8 # the terms of the GNU General Public License as published by the Free
9 # Software Foundation; either version 3, or (at your option) any later
10 # version.
12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 # for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING3. If not see
19 # <http://www.gnu.org/licenses/>.
21 # To update the Libstdc++ static data in <bits/text_encoding-data.h> download
22 # the latest:
23 # https://www.iana.org/assignments/character-sets/character-sets-1.csv
24 # Then run this script and save the output to
25 # include/bits/text_encoding-data.h
27 import sys
28 import csv
30 if len(sys.argv) != 2:
31 print("Usage: %s <character sets csv>" % sys.argv[0], file=sys.stderr)
32 sys.exit(1)
34 print("""// Generated by gen_text_encoding_data.py, do not edit.
36 // Copyright The GNU Toolchain Authors.
38 // This file is part of the GNU ISO C++ Library. This library is free
39 // software; you can redistribute it and/or modify it under the
40 // terms of the GNU General Public License as published by the
41 // Free Software Foundation; either version 3, or (at your option)
42 // any later version.
44 // This library is distributed in the hope that it will be useful,
45 // but WITHOUT ANY WARRANTY; without even the implied warranty of
46 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47 // GNU General Public License for more details.
49 // Under Section 7 of GPL version 3, you are granted additional
50 // permissions described in the GCC Runtime Library Exception, version
51 // 3.1, as published by the Free Software Foundation.
53 // You should have received a copy of the GNU General Public License and
54 // a copy of the GCC Runtime Library Exception along with this program;
55 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
56 // <http://www.gnu.org/licenses/>.
58 /** @file bits/text_encoding-data.h
59 * This is an internal header file, included by other library headers.
60 * Do not attempt to use it directly. @headername{text_encoding}
62 """)
63 print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
64 print('# error "This is not a public header, do not include it directly"')
65 print("#endif\n")
67 # We need to generate a list of initializers of the form { mib, alias }, e.g.,
68 # { 3, "US-ASCII" },
69 # { 3, "ISO646-US" },
70 # { 3, "csASCII" },
71 # { 4, "ISO_8859-1:1987" },
72 # { 4, "latin1" },
73 # The initializers must be sorted by the mib value. The first entry for
74 # a given mib must be the primary name for the encoding. Any aliases for
75 # the encoding come after the primary name.
76 # We also define a macro _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET which is the
77 # offset into the list of the mib=106, alias="UTF-8" entry. This is used
78 # to optimize the common case, so we don't need to search for "UTF-8".
80 charsets = {}
81 with open(sys.argv[1], newline='') as f:
82 reader = csv.reader(f)
83 next(reader) # skip header row
84 for row in reader:
85 mib = int(row[2])
86 if mib in charsets:
87 raise ValueError("Multiple rows for mibEnum={}".format(mib))
88 name = row[1]
89 aliases = row[5].split()
90 # Ensure primary name comes first
91 if name in aliases:
92 aliases.remove(name)
93 charsets[mib] = [name] + aliases
95 # Remove "NATS-DANO" and "NATS-DANO-ADD" as specified by the C++ standard.
96 charsets.pop(33, None)
97 charsets.pop(34, None)
99 # This is not an official IANA alias, but we include it in the
100 # implementation-defined superset of aliases for US-ASCII.
101 # See also LWG 4043.
102 extra_aliases = {3: ["ASCII"]}
104 count = 0
105 for mib in sorted(charsets.keys()):
106 names = charsets[mib]
107 if names[0] == "UTF-8":
108 print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count))
109 for name in names:
110 print(' {{ {:4}, "{}" }},'.format(mib, name))
111 count += len(names)
112 if mib in extra_aliases:
113 names = extra_aliases[mib]
114 for name in names:
115 print(' {{ {:4}, "{}" }}, // libstdc++ extension'.format(mib, name))
116 count += len(names)
118 # <text_encoding> gives an error if this macro is left defined.
119 # Do this last, so that the generated output is not usable unless we reach here.
120 print("\n#undef _GLIBCXX_GET_ENCODING_DATA")