Add AArch64 HWCAP2_EBF16 from Linux 6.0 to bits/hwcap.h
[glibc.git] / scripts / glibcpp.py
blob455459a609eab1203a091b77fec75538c62f6cb3
1 #! /usr/bin/python3
2 # Approximation to C preprocessing.
3 # Copyright (C) 2019-2022 Free Software Foundation, Inc.
4 # This file is part of the GNU C Library.
6 # The GNU C Library is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU Lesser General Public
8 # License as published by the Free Software Foundation; either
9 # version 2.1 of the License, or (at your option) any later version.
11 # The GNU C Library is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public
17 # License along with the GNU C Library; if not, see
18 # <https://www.gnu.org/licenses/>.
20 """
21 Simplified lexical analyzer for C preprocessing tokens.
23 Does not implement trigraphs.
25 Does not implement backslash-newline in the middle of any lexical
26 item other than a string literal.
28 Does not implement universal-character-names in identifiers.
30 Treats prefixed strings (e.g. L"...") as two tokens (L and "...").
32 Accepts non-ASCII characters only within comments and strings.
33 """
35 import collections
36 import operator
37 import re
38 import sys
40 # Caution: The order of the outermost alternation matters.
41 # STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
42 # BLOCK_COMMENT before BAD_BLOCK_COM before PUNCTUATOR, and OTHER must
43 # be last.
44 # Caution: There should be no capturing groups other than the named
45 # captures in the outermost alternation.
47 # For reference, these are all of the C punctuators as of C11:
48 # [ ] ( ) { } , ; ? ~
49 # ! != * *= / /= ^ ^= = ==
50 # # ##
51 # % %= %> %: %:%:
52 # & &= &&
53 # | |= ||
54 # + += ++
55 # - -= -- ->
56 # . ...
57 # : :>
58 # < <% <: << <<= <=
59 # > >= >> >>=
61 # The BAD_* tokens are not part of the official definition of pp-tokens;
62 # they match unclosed strings, character constants, and block comments,
63 # so that the regex engine doesn't have to backtrack all the way to the
64 # beginning of a broken construct and then emit dozens of junk tokens.
66 PP_TOKEN_RE_ = re.compile(r"""
67 (?P<STRING> \"(?:[^\"\\\r\n]|\\(?:[\r\n -~]|\r\n))*\")
68 |(?P<BAD_STRING> \"(?:[^\"\\\r\n]|\\[ -~])*)
69 |(?P<CHARCONST> \'(?:[^\'\\\r\n]|\\(?:[\r\n -~]|\r\n))*\')
70 |(?P<BAD_CHARCONST> \'(?:[^\'\\\r\n]|\\[ -~])*)
71 |(?P<BLOCK_COMMENT> /\*(?:\*(?!/)|[^*])*\*/)
72 |(?P<BAD_BLOCK_COM> /\*(?:\*(?!/)|[^*])*\*?)
73 |(?P<LINE_COMMENT> //[^\r\n]*)
74 |(?P<IDENT> [_a-zA-Z][_a-zA-Z0-9]*)
75 |(?P<PP_NUMBER> \.?[0-9](?:[0-9a-df-oq-zA-DF-OQ-Z_.]|[eEpP][+-]?)*)
76 |(?P<PUNCTUATOR>
77 [,;?~(){}\[\]]
78 | [!*/^=]=?
79 | \#\#?
80 | %(?:[=>]|:(?:%:)?)?
81 | &[=&]?
82 |\|[=|]?
83 |\+[=+]?
84 | -[=->]?
85 |\.(?:\.\.)?
86 | :>?
87 | <(?:[%:]|<(?:=|<=?)?)?
88 | >(?:=|>=?)?)
89 |(?P<ESCNL> \\(?:\r|\n|\r\n))
90 |(?P<WHITESPACE> [ \t\n\r\v\f]+)
91 |(?P<OTHER> .)
92 """, re.DOTALL | re.VERBOSE)
94 HEADER_NAME_RE_ = re.compile(r"""
95 < [^>\r\n]+ >
96 | " [^"\r\n]+ "
97 """, re.DOTALL | re.VERBOSE)
99 ENDLINE_RE_ = re.compile(r"""\r|\n|\r\n""")
101 # based on the sample code in the Python re documentation
102 Token_ = collections.namedtuple("Token", (
103 "kind", "text", "line", "column", "context"))
104 Token_.__doc__ = """
105 One C preprocessing token, comment, or chunk of whitespace.
106 'kind' identifies the token type, which will be one of:
107 STRING, CHARCONST, BLOCK_COMMENT, LINE_COMMENT, IDENT,
108 PP_NUMBER, PUNCTUATOR, ESCNL, WHITESPACE, HEADER_NAME,
109 or OTHER. The BAD_* alternatives in PP_TOKEN_RE_ are
110 handled within tokenize_c, below.
112 'text' is the sequence of source characters making up the token;
113 no decoding whatsoever is performed.
115 'line' and 'column' give the position of the first character of the
116 token within the source file. They are both 1-based.
118 'context' indicates whether or not this token occurred within a
119 preprocessing directive; it will be None for running text,
120 '<null>' for the leading '#' of a directive line (because '#'
121 all by itself on a line is a "null directive"), or the name of
122 the directive for tokens within a directive line, starting with
123 the IDENT for the name itself.
126 def tokenize_c(file_contents, reporter):
127 """Yield a series of Token objects, one for each preprocessing
128 token, comment, or chunk of whitespace within FILE_CONTENTS.
129 The REPORTER object is expected to have one method,
130 reporter.error(token, message), which will be called to
131 indicate a lexical error at the position of TOKEN.
132 If MESSAGE contains the four-character sequence '{!r}', that
133 is expected to be replaced by repr(token.text).
136 Token = Token_
137 PP_TOKEN_RE = PP_TOKEN_RE_
138 ENDLINE_RE = ENDLINE_RE_
139 HEADER_NAME_RE = HEADER_NAME_RE_
141 line_num = 1
142 line_start = 0
143 pos = 0
144 limit = len(file_contents)
145 directive = None
146 at_bol = True
147 while pos < limit:
148 if directive == "include":
149 mo = HEADER_NAME_RE.match(file_contents, pos)
150 if mo:
151 kind = "HEADER_NAME"
152 directive = "after_include"
153 else:
154 mo = PP_TOKEN_RE.match(file_contents, pos)
155 kind = mo.lastgroup
156 if kind != "WHITESPACE":
157 directive = "after_include"
158 else:
159 mo = PP_TOKEN_RE.match(file_contents, pos)
160 kind = mo.lastgroup
162 text = mo.group()
163 line = line_num
164 column = mo.start() - line_start
165 adj_line_start = 0
166 # only these kinds can contain a newline
167 if kind in ("WHITESPACE", "BLOCK_COMMENT", "LINE_COMMENT",
168 "STRING", "CHARCONST", "BAD_BLOCK_COM", "ESCNL"):
169 for tmo in ENDLINE_RE.finditer(text):
170 line_num += 1
171 adj_line_start = tmo.end()
172 if adj_line_start:
173 line_start = mo.start() + adj_line_start
175 # Track whether or not we are scanning a preprocessing directive.
176 if kind == "LINE_COMMENT" or (kind == "WHITESPACE" and adj_line_start):
177 at_bol = True
178 directive = None
179 else:
180 if kind == "PUNCTUATOR" and text == "#" and at_bol:
181 directive = "<null>"
182 elif kind == "IDENT" and directive == "<null>":
183 directive = text
184 at_bol = False
186 # Report ill-formed tokens and rewrite them as their well-formed
187 # equivalents, so downstream processing doesn't have to know about them.
188 # (Rewriting instead of discarding provides better error recovery.)
189 if kind == "BAD_BLOCK_COM":
190 reporter.error(Token("BAD_BLOCK_COM", "", line, column+1, ""),
191 "unclosed block comment")
192 text += "*/"
193 kind = "BLOCK_COMMENT"
194 elif kind == "BAD_STRING":
195 reporter.error(Token("BAD_STRING", "", line, column+1, ""),
196 "unclosed string")
197 text += "\""
198 kind = "STRING"
199 elif kind == "BAD_CHARCONST":
200 reporter.error(Token("BAD_CHARCONST", "", line, column+1, ""),
201 "unclosed char constant")
202 text += "'"
203 kind = "CHARCONST"
205 tok = Token(kind, text, line, column+1,
206 "include" if directive == "after_include" else directive)
207 # Do not complain about OTHER tokens inside macro definitions.
208 # $ and @ appear in macros defined by headers intended to be
209 # included from assembly language, e.g. sysdeps/mips/sys/asm.h.
210 if kind == "OTHER" and directive != "define":
211 self.error(tok, "stray {!r} in program")
213 yield tok
214 pos = mo.end()
216 class MacroDefinition(collections.namedtuple('MacroDefinition',
217 'name_token args body error')):
218 """A preprocessor macro definition.
220 name_token is the Token_ for the name.
222 args is None for a macro that is not function-like. Otherwise, it
223 is a tuple that contains the macro argument name tokens.
225 body is a tuple that contains the tokens that constitue the body
226 of the macro definition (excluding whitespace).
228 error is None if no error was detected, or otherwise a problem
229 description associated with this macro definition.
233 @property
234 def function(self):
235 """Return true if the macro is function-like."""
236 return self.args is not None
238 @property
239 def name(self):
240 """Return the name of the macro being defined."""
241 return self.name_token.text
243 @property
244 def line(self):
245 """Return the line number of the macro defintion."""
246 return self.name_token.line
248 @property
249 def args_lowered(self):
250 """Return the macro argument list as a list of strings"""
251 if self.function:
252 return [token.text for token in self.args]
253 else:
254 return None
256 @property
257 def body_lowered(self):
258 """Return the macro body as a list of strings."""
259 return [token.text for token in self.body]
261 def macro_definitions(tokens):
262 """A generator for C macro definitions among tokens.
264 The generator yields MacroDefinition objects.
266 tokens must be iterable, yielding Token_ objects.
270 macro_name = None
271 macro_start = False # Set to false after macro name and one otken.
272 macro_args = None # Set to a list during the macro argument sequence.
273 in_macro_args = False # True while processing macro identifier-list.
274 error = None
275 body = []
277 for token in tokens:
278 if token.context == 'define' and macro_name is None \
279 and token.kind == 'IDENT':
280 # Starting up macro processing.
281 if macro_start:
282 # First identifier is the macro name.
283 macro_name = token
284 else:
285 # Next token is the name.
286 macro_start = True
287 continue
289 if macro_name is None:
290 # Drop tokens not in macro definitions.
291 continue
293 if token.context != 'define':
294 # End of the macro definition.
295 if in_macro_args and error is None:
296 error = 'macro definition ends in macro argument list'
297 yield MacroDefinition(macro_name, macro_args, tuple(body), error)
298 # No longer in a macro definition.
299 macro_name = None
300 macro_start = False
301 macro_args = None
302 in_macro_args = False
303 error = None
304 body.clear()
305 continue
307 if macro_start:
308 # First token after the macro name.
309 macro_start = False
310 if token.kind == 'PUNCTUATOR' and token.text == '(':
311 macro_args = []
312 in_macro_args = True
313 continue
315 if in_macro_args:
316 if token.kind == 'IDENT' \
317 or (token.kind == 'PUNCTUATOR' and token.text == '...'):
318 # Macro argument or ... placeholder.
319 macro_args.append(token)
320 if token.kind == 'PUNCTUATOR':
321 if token.text == ')':
322 macro_args = tuple(macro_args)
323 in_macro_args = False
324 elif token.text == ',':
325 pass # Skip. Not a full syntax check.
326 elif error is None:
327 error = 'invalid punctuator in macro argument list: ' \
328 + repr(token.text)
329 elif error is None:
330 error = 'invalid {} token in macro argument list'.format(
331 token.kind)
332 continue
334 if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'):
335 body.append(token)
337 # Emit the macro in case the last line does not end with a newline.
338 if macro_name is not None:
339 if in_macro_args and error is None:
340 error = 'macro definition ends in macro argument list'
341 yield MacroDefinition(macro_name, macro_args, tuple(body), error)
343 # Used to split UL etc. suffixes from numbers such as 123UL.
344 RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)')
346 BINARY_OPERATORS = {
347 '+': operator.add,
348 '<<': operator.lshift,
351 # Use the general-purpose dict type if it is order-preserving.
352 if (sys.version_info[0], sys.version_info[1]) <= (3, 6):
353 OrderedDict = collections.OrderedDict
354 else:
355 OrderedDict = dict
357 def macro_eval(macro_defs, reporter):
358 """Compute macro values
360 macro_defs is the output from macro_definitions. reporter is an
361 object that accepts reporter.error(line_number, message) and
362 reporter.note(line_number, message) calls to report errors
363 and error context invocations.
365 The returned dict contains the values of macros which are not
366 function-like, pairing their names with their computed values.
368 The current implementation is incomplete. It is deliberately not
369 entirely faithful to C, even in the implemented parts. It checks
370 that macro replacements follow certain syntactic rules even if
371 they are never evaluated.
375 # Unevaluated macro definitions by name.
376 definitions = OrderedDict()
377 for md in macro_defs:
378 if md.name in definitions:
379 reporter.error(md.line, 'macro {} redefined'.format(md.name))
380 reporter.note(definitions[md.name].line,
381 'location of previous definition')
382 else:
383 definitions[md.name] = md
385 # String to value mappings for fully evaluated macros.
386 evaluated = OrderedDict()
388 # String to macro definitions during evaluation. Nice error
389 # reporting relies on determinstic iteration order.
390 stack = OrderedDict()
392 def eval_token(current, token):
393 """Evaluate one macro token.
395 Integers and strings are returned as such (the latter still
396 quoted). Identifiers are expanded.
398 None indicates an empty expansion or an error.
402 if token.kind == 'PP_NUMBER':
403 value = None
404 m = RE_SPLIT_INTEGER_SUFFIX.match(token.text)
405 if m:
406 try:
407 value = int(m.group(1), 0)
408 except ValueError:
409 pass
410 if value is None:
411 reporter.error(token.line,
412 'invalid number {!r} in definition of {}'.format(
413 token.text, current.name))
414 return value
416 if token.kind == 'STRING':
417 return token.text
419 if token.kind == 'CHARCONST' and len(token.text) == 3:
420 return ord(token.text[1])
422 if token.kind == 'IDENT':
423 name = token.text
424 result = eval1(current, name)
425 if name not in evaluated:
426 evaluated[name] = result
427 return result
429 reporter.error(token.line,
430 'unrecognized {!r} in definition of {}'.format(
431 token.text, current.name))
432 return None
435 def eval1(current, name):
436 """Evaluate one name.
438 The name is looked up and the macro definition evaluated
439 recursively if necessary. The current argument is the macro
440 definition being evaluated.
442 None as a return value indicates an error.
446 # Fast path if the value has already been evaluated.
447 if name in evaluated:
448 return evaluated[name]
450 try:
451 md = definitions[name]
452 except KeyError:
453 reporter.error(current.line,
454 'reference to undefined identifier {} in definition of {}'
455 .format(name, current.name))
456 return None
458 if md.name in stack:
459 # Recursive macro definition.
460 md = stack[name]
461 reporter.error(md.line,
462 'macro definition {} refers to itself'.format(md.name))
463 for md1 in reversed(list(stack.values())):
464 if md1 is md:
465 break
466 reporter.note(md1.line,
467 'evaluated from {}'.format(md1.name))
468 return None
470 stack[md.name] = md
471 if md.function:
472 reporter.error(current.line,
473 'attempt to evaluate function-like macro {}'.format(name))
474 reporter.note(md.line, 'definition of {}'.format(md.name))
475 return None
477 try:
478 body = md.body
479 if len(body) == 0:
480 # Empty expansion.
481 return None
483 # Remove surrounding ().
484 if body[0].text == '(' and body[-1].text == ')':
485 body = body[1:-1]
486 had_parens = True
487 else:
488 had_parens = False
490 if len(body) == 1:
491 return eval_token(md, body[0])
493 # Minimal expression evaluator for binary operators.
494 op = body[1].text
495 if len(body) == 3 and op in BINARY_OPERATORS:
496 if not had_parens:
497 reporter.error(body[1].line,
498 'missing parentheses around {} expression'.format(op))
499 reporter.note(md.line,
500 'in definition of macro {}'.format(md.name))
502 left = eval_token(md, body[0])
503 right = eval_token(md, body[2])
505 if type(left) != type(1):
506 reporter.error(left.line,
507 'left operand of {} is not an integer'.format(op))
508 reporter.note(md.line,
509 'in definition of macro {}'.format(md.name))
510 if type(right) != type(1):
511 reporter.error(left.line,
512 'right operand of {} is not an integer'.format(op))
513 reporter.note(md.line,
514 'in definition of macro {}'.format(md.name))
515 return BINARY_OPERATORS[op](left, right)
517 reporter.error(md.line,
518 'uninterpretable macro token sequence: {}'.format(
519 ' '.join(md.body_lowered)))
520 return None
521 finally:
522 del stack[md.name]
524 # Start of main body of macro_eval.
525 for md in definitions.values():
526 name = md.name
527 if name not in evaluated and not md.function:
528 evaluated[name] = eval1(md, name)
529 return evaluated