elf/tst-rtld-list-diagnostics.py

   1 #!/usr/bin/python3
   2 # Test that the ld.so --list-diagnostics output has the expected syntax.
   3 # Copyright (C) 2022-2024 Free Software Foundation, Inc.
   4 # Copyright The GNU Toolchain Authors.
   5 # This file is part of the GNU C Library.
   6 #
   7 # The GNU C Library is free software; you can redistribute it and/or
   8 # modify it under the terms of the GNU Lesser General Public
   9 # License as published by the Free Software Foundation; either
  10 # version 2.1 of the License, or (at your option) any later version.
  11 #
  12 # The GNU C Library is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 # Lesser General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU Lesser General Public
  18 # License along with the GNU C Library; if not, see
  19 # <https://www.gnu.org/licenses/>.
  20
  21 import argparse
  22 import collections
  23 import subprocess
  24 import sys
  25
  26 try:
  27     subprocess.run
  28 except:
  29     class _CompletedProcess:
  30         def __init__(self, args, returncode, stdout=None, stderr=None):
  31             self.args = args
  32             self.returncode = returncode
  33             self.stdout = stdout
  34             self.stderr = stderr
  35
  36     def _run(*popenargs, input=None, timeout=None, check=False, **kwargs):
  37         assert(timeout is None)
  38         with subprocess.Popen(*popenargs, **kwargs) as process:
  39             try:
  40                 stdout, stderr = process.communicate(input)
  41             except:
  42                 process.kill()
  43                 process.wait()
  44                 raise
  45             returncode = process.poll()
  46             if check and returncode:
  47                 raise subprocess.CalledProcessError(returncode, popenargs)
  48         return _CompletedProcess(popenargs, returncode, stdout, stderr)
  49
  50     subprocess.run = _run
  51
  52 # Number of errors encountered.  Zero means no errors (test passes).
  53 errors = 0
  54
  55 def parse_line(line):
  56     """Parse a line of --list-diagnostics output.
  57
  58     This function returns a pair (SUBSCRIPTS, VALUE).  VALUE is either
  59     a byte string or an integer.  SUBSCRIPT is a tuple of (LABEL,
  60     INDEX) pairs, where LABEL is a field identifier (a string), and
  61     INDEX is an integer or None, to indicate that this field is not
  62     indexed.
  63
  64     """
  65
  66     # Extract the list of subscripts before the value.
  67     idx = 0
  68     subscripts = []
  69     while line[idx] != '=':
  70         start_idx = idx
  71
  72         # Extract the label.
  73         while line[idx] not in '[.=':
  74             idx += 1
  75         label = line[start_idx:idx]
  76
  77         if line[idx] == '[':
  78             # Subscript with a 0x index.
  79             assert label
  80             close_bracket = line.index(']', idx)
  81             index = line[idx + 1:close_bracket]
  82             assert index.startswith('0x')
  83             index = int(index, 0)
  84             subscripts.append((label, index))
  85             idx = close_bracket + 1
  86         else: # '.' or '='.
  87             if label:
  88                 subscripts.append((label, None))
  89             if line[idx] == '.':
  90                 idx += 1
  91
  92     # The value is either a string or a 0x number.
  93     value = line[idx + 1:]
  94     if value[0] == '"':
  95         # Decode the escaped string into a byte string.
  96         assert value[-1] == '"'
  97         idx = 1
  98         result = []
  99         while True:
 100             ch = value[idx]
 101             if ch == '\\':
 102                 if value[idx + 1] in '"\\':
 103                     result.append(ord(value[idx + 1]))
 104                     idx += 2
 105                 else:
 106                     result.append(int(value[idx + 1:idx + 4], 8))
 107                     idx += 4
 108             elif ch == '"':
 109                 assert idx == len(value) - 1
 110                 break
 111             else:
 112                 result.append(ord(value[idx]))
 113                 idx += 1
 114         value = bytes(result)
 115     else:
 116         # Convert the value into an integer.
 117         assert value.startswith('0x')
 118         value = int(value, 0)
 119     return (tuple(subscripts), value)
 120
 121 assert parse_line('a.b[0x1]=0x2') == ((('a', None), ('b', 1)), 2)
 122 assert parse_line(r'b[0x3]="four\040\"\\"') == ((('b', 3),), b'four \"\\')
 123
 124 # ABNF for a line of --list-diagnostics output.
 125 diagnostics_abnf = r"""
 126 HEXDIG = %x30-39 / %x61-6f ; lowercase a-f only
 127 ALPHA = %x41-5a / %x61-7a / %x7f ; letters and underscore
 128 ALPHA-NUMERIC = ALPHA / %x30-39 / "_"
 129 DQUOTE = %x22 ; "
 130
 131 ; Numbers are always hexadecimal and use a 0x prefix.
 132 hex-value-prefix = %x30 %x78
 133 hex-value = hex-value-prefix 1*HEXDIG
 134
 135 ; Strings use octal escape sequences and \\, \".
 136 string-char = %x20-21 / %x23-5c / %x5d-7e ; printable but not "\
 137 string-quoted-octal = %x30-33 2*2%x30-37
 138 string-quoted = "\" ("\" / DQUOTE / string-quoted-octal)
 139 string-value = DQUOTE *(string-char / string-quoted) DQUOTE
 140
 141 value = hex-value / string-value
 142
 143 label = ALPHA *ALPHA-NUMERIC
 144 index = "[" hex-value "]"
 145 subscript = label [index]
 146
 147 line = subscript *("." subscript) "=" value
 148 """
 149
 150 def check_consistency_with_manual(manual_path):
 151     """Verify that the code fragments in the manual match this script.
 152
 153     The code fragments are duplicated to clarify the dual license.
 154     """
 155
 156     global errors
 157
 158     def extract_lines(path, start_line, end_line, skip_lines=()):
 159         result = []
 160         with open(path) as inp:
 161             capturing = False
 162             for line in inp:
 163                 if line.strip() == start_line:
 164                     capturing = True
 165                 elif not capturing or line.strip() in skip_lines:
 166                     continue
 167                 elif line.strip() == end_line:
 168                     capturing = False
 169                 else:
 170                     result.append(line)
 171         if not result:
 172             raise ValueError('{!r} not found in {!r}'.format(start_line, path))
 173         if capturing:
 174             raise ValueError('{!r} not found in {!r}'.format(end_line, path))
 175         return result
 176
 177     def check(name, manual, script):
 178         global errors
 179
 180         if manual == script:
 181             return
 182         print('error: {} fragment in manual is different'.format(name))
 183         import difflib
 184         sys.stdout.writelines(difflib.unified_diff(
 185             manual, script, fromfile='manual', tofile='script'))
 186         errors += 1
 187
 188     manual_abnf = extract_lines(manual_path,
 189                                 '@c ABNF-START', '@end smallexample',
 190                                 skip_lines=('@smallexample',))
 191     check('ABNF', diagnostics_abnf.splitlines(keepends=True)[1:], manual_abnf)
 192
 193 # If the abnf module can be imported, run an additional check that the
 194 # 'line' production from the ABNF grammar matches --list-diagnostics
 195 # output lines.
 196 try:
 197     import abnf
 198 except ImportError:
 199     abnf = None
 200     print('info: skipping ABNF validation because the abnf module is missing')
 201
 202 if abnf is not None:
 203     class Grammar(abnf.Rule):
 204         pass
 205
 206     Grammar.load_grammar(diagnostics_abnf)
 207
 208     def parse_abnf(line):
 209         global errors
 210
 211         # Just verify that the line parses.
 212         try:
 213             Grammar('line').parse_all(line)
 214         except abnf.ParseError:
 215             print('error: ABNF parse error:', repr(line))
 216             errors += 1
 217 else:
 218     def parse_abnf(line):
 219         pass
 220
 221
 222 def parse_diagnostics(cmd):
 223     global errors
 224     diag_out = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
 225                               universal_newlines=True, shell=True).stdout
 226     if diag_out[-1] != '\n':
 227         print('error: ld.so output does not end in newline')
 228         errors += 1
 229
 230     PathType = collections.namedtuple('PathType',
 231                                       'has_index value_type original_line')
 232     # Mapping tuples of labels to PathType values.
 233     path_types = {}
 234
 235     seen_subscripts = {}
 236
 237     for line in diag_out.splitlines():
 238         parse_abnf(line)
 239         subscripts, value = parse_line(line)
 240
 241         # Check for duplicates.
 242         if subscripts in seen_subscripts:
 243             print('error: duplicate value assignment:', repr(line))
 244             print('  previous line:,', repr(seen_subscripts[line]))
 245             errors += 1
 246         else:
 247             seen_subscripts[subscripts] = line
 248
 249         # Compare types against the previously seen labels.
 250         labels = tuple([label for label, index in subscripts])
 251         has_index = tuple([index is not None for label, index in subscripts])
 252         value_type = type(value)
 253         if labels in path_types:
 254             previous_type = path_types[labels]
 255             if has_index != previous_type.has_index:
 256                 print('error: line has mismatch of indexing:', repr(line))
 257                 print('  index types:', has_index)
 258                 print('  previous:   ', previous_type.has_index)
 259                 print('  previous line:', repr(previous_type.original_line))
 260                 errors += 1
 261             if value_type != previous_type.value_type:
 262                 print('error: line has mismatch of value type:', repr(line))
 263                 print('  value type:', value_type.__name__)
 264                 print('  previous:  ', previous_type.value_type.__name__)
 265                 print('  previous line:', repr(previous_type.original_line))
 266                 errors += 1
 267         else:
 268             path_types[labels] = PathType(has_index, value_type, line)
 269
 270         # Check that this line does not add indexing to a previous value.
 271         for idx in range(1, len(subscripts) - 1):
 272             if subscripts[:idx] in path_types:
 273                 print('error: line assigns to atomic value:', repr(line))
 274                 print('  previous line:', repr(previous_type.original_line))
 275                 errors += 1
 276
 277     if errors:
 278         sys.exit(1)
 279
 280 def get_parser():
 281     parser = argparse.ArgumentParser(description=__doc__)
 282     parser.add_argument('--manual',
 283                         help='path to .texi file for consistency checks')
 284     parser.add_argument('command',
 285                         help='comand to run')
 286     return parser
 287
 288
 289 def main(argv):
 290     parser = get_parser()
 291     opts = parser.parse_args(argv)
 292
 293     if opts.manual:
 294         check_consistency_with_manual(opts.manual)
 295
 296     parse_diagnostics(opts.command)
 297
 298     if errors:
 299         sys.exit(1)
 300
 301 if __name__ == '__main__':
 302     main(sys.argv[1:])