Sync move-if-change from Gnulib
[glibc.git] / conform / linknamespace.py
blob66b16e244a3842678e39ee56b465e4454b7760a4
1 #!/usr/bin/python3
2 # Check that use of symbols declared in a given header does not result
3 # in any symbols being brought in that are not reserved with external
4 # linkage for the given standard.
5 # Copyright (C) 2014-2021 Free Software Foundation, Inc.
6 # This file is part of the GNU C Library.
8 # The GNU C Library is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU Lesser General Public
10 # License as published by the Free Software Foundation; either
11 # version 2.1 of the License, or (at your option) any later version.
13 # The GNU C Library is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # Lesser General Public License for more details.
18 # You should have received a copy of the GNU Lesser General Public
19 # License along with the GNU C Library; if not, see
20 # <https://www.gnu.org/licenses/>.
22 import argparse
23 from collections import defaultdict
24 import os.path
25 import re
26 import subprocess
27 import sys
28 import tempfile
30 import glibcconform
32 # The following whitelisted symbols are also allowed for now.
34 # * Bug 17576: stdin, stdout, stderr only reserved with external
35 # linkage when stdio.h included (and possibly not then), not
36 # generally.
38 # * Bug 18442: re_syntax_options wrongly brought in by regcomp and
39 # used by re_comp.
41 WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'}
44 def list_syms(filename):
45 """Return information about GLOBAL and WEAK symbols listed in readelf
46 -s output."""
47 ret = []
48 cur_file = filename
49 with open(filename, 'r') as syms_file:
50 for line in syms_file:
51 line = line.rstrip()
52 if line.startswith('File: '):
53 cur_file = line[len('File: '):]
54 cur_file = cur_file.split('/')[-1]
55 continue
56 # Architecture-specific st_other bits appear inside [] and
57 # disrupt the format of readelf output.
58 line = re.sub(r'\[.*?\]', '', line)
59 fields = line.split()
60 if len(fields) < 8:
61 continue
62 bind = fields[4]
63 ndx = fields[6]
64 sym = fields[7]
65 if bind not in ('GLOBAL', 'WEAK'):
66 continue
67 if not re.fullmatch('[A-Za-z0-9_]+', sym):
68 continue
69 ret.append((cur_file, sym, bind, ndx != 'UND'))
70 return ret
73 def main():
74 """The main entry point."""
75 parser = argparse.ArgumentParser(description='Check link-time namespace.')
76 parser.add_argument('--header', metavar='HEADER',
77 help='name of header')
78 parser.add_argument('--standard', metavar='STD',
79 help='standard to use when processing header')
80 parser.add_argument('--cc', metavar='CC',
81 help='C compiler to use')
82 parser.add_argument('--flags', metavar='CFLAGS',
83 help='Compiler flags to use with CC')
84 parser.add_argument('--stdsyms', metavar='FILE',
85 help='File with list of standard symbols')
86 parser.add_argument('--libsyms', metavar='FILE',
87 help='File with symbol information from libraries')
88 parser.add_argument('--readelf', metavar='READELF',
89 help='readelf program to use')
90 args = parser.parse_args()
92 # Load the list of symbols that are OK.
93 stdsyms = set()
94 with open(args.stdsyms, 'r') as stdsyms_file:
95 for line in stdsyms_file:
96 stdsyms.add(line.rstrip())
97 stdsyms |= WHITELIST
99 # Load information about GLOBAL and WEAK symbols defined or used
100 # in the standard libraries.
101 # Symbols from a given object, except for weak defined symbols.
102 seen_syms = defaultdict(list)
103 # Strong undefined symbols from a given object.
104 strong_undef_syms = defaultdict(list)
105 # Objects defining a given symbol (strongly or weakly).
106 sym_objs = defaultdict(list)
107 for file, name, bind, defined in list_syms(args.libsyms):
108 if defined:
109 sym_objs[name].append(file)
110 if bind == 'GLOBAL' or not defined:
111 seen_syms[file].append(name)
112 if bind == 'GLOBAL' and not defined:
113 strong_undef_syms[file].append(name)
115 # Determine what ELF-level symbols are brought in by use of C-level
116 # symbols declared in the given header.
118 # The rules followed are heuristic and so may produce false
119 # positives and false negatives.
121 # * All undefined symbols are considered of signficance, but it is
122 # possible that (a) any standard library definition is weak, so
123 # can be overridden by the user's definition, and (b) the symbol
124 # is only used conditionally and not if the program is limited to
125 # standard functionality.
127 # * If a symbol reference is only brought in by the user using a
128 # data symbol rather than a function from the standard library,
129 # this will not be detected.
131 # * If a symbol reference is only brought in by crt*.o or libgcc,
132 # this will not be detected.
134 # * If a symbol reference is only brought in through __builtin_foo
135 # in a standard macro being compiled to call foo, this will not be
136 # detected.
138 # * Header inclusions should be compiled several times with
139 # different options such as -O2, -D_FORTIFY_SOURCE and
140 # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined
141 # from such a compilation; this is not yet implemented.
143 # * This script finds symbols referenced through use of macros on
144 # the basis that if a macro calls an internal function, that
145 # function must also be declared in the header. However, the
146 # header might also declare implementation-namespace functions
147 # that are not called by any standard macro in the header,
148 # resulting in false positives for any symbols brought in only
149 # through use of those implementation-namespace functions.
151 # * Namespace issues can apply for dynamic linking as well as
152 # static linking, when a call is from one shared library to
153 # another or uses a PLT entry for a call within a shared library;
154 # such issues are only detected by this script if the same
155 # namespace issue applies for static linking.
156 seen_where = {}
157 files_seen = set()
158 all_undef = {}
159 current_undef = {}
160 compiler = '%s %s' % (args.cc, args.flags)
161 c_syms = glibcconform.list_exported_functions(compiler, args.standard,
162 args.header)
163 with tempfile.TemporaryDirectory() as temp_dir:
164 cincfile_name = os.path.join(temp_dir, 'undef.c')
165 cincfile_o_name = os.path.join(temp_dir, 'undef.o')
166 cincfile_sym_name = os.path.join(temp_dir, 'undef.sym')
167 cincfile_text = ('#include <%s>\n%s\n'
168 % (args.header,
169 '\n'.join('void *__glibc_test_%s = (void *) &%s;'
170 % (sym, sym) for sym in sorted(c_syms))))
171 with open(cincfile_name, 'w') as cincfile:
172 cincfile.write(cincfile_text)
173 cmd = ('%s %s -D_ISOMAC %s -c %s -o %s'
174 % (args.cc, args.flags, glibcconform.CFLAGS[args.standard],
175 cincfile_name, cincfile_o_name))
176 subprocess.check_call(cmd, shell=True)
177 cmd = ('LC_ALL=C %s -W -s %s > %s'
178 % (args.readelf, cincfile_o_name, cincfile_sym_name))
179 subprocess.check_call(cmd, shell=True)
180 for file, name, bind, defined in list_syms(cincfile_sym_name):
181 if bind == 'GLOBAL' and not defined:
182 sym_text = '[initial] %s' % name
183 seen_where[name] = sym_text
184 all_undef[name] = sym_text
185 current_undef[name] = sym_text
187 while current_undef:
188 new_undef = {}
189 for sym, cu_sym in sorted(current_undef.items()):
190 for file in sym_objs[sym]:
191 if file in files_seen:
192 continue
193 files_seen.add(file)
194 for ssym in seen_syms[file]:
195 if ssym not in seen_where:
196 seen_where[ssym] = ('%s -> [%s] %s'
197 % (cu_sym, file, ssym))
198 for usym in strong_undef_syms[file]:
199 if usym not in all_undef:
200 usym_text = '%s -> [%s] %s' % (cu_sym, file, usym)
201 all_undef[usym] = usym_text
202 new_undef[usym] = usym_text
203 current_undef = new_undef
205 ret = 0
206 for sym in sorted(seen_where):
207 if sym.startswith('_'):
208 continue
209 if sym in stdsyms:
210 continue
211 print(seen_where[sym])
212 ret = 1
213 sys.exit(ret)
216 if __name__ == '__main__':
217 main()