utils/update_mir_test_checks.py

   1 #!/usr/bin/env python
   2
   3 """Updates FileCheck checks in MIR tests.
   4
   5 This script is a utility to update MIR based tests with new FileCheck
   6 patterns.
   7
   8 The checks added by this script will cover the entire body of each
   9 function it handles. Virtual registers used are given names via
  10 FileCheck patterns, so if you do want to check a subset of the body it
  11 should be straightforward to trim out the irrelevant parts. None of
  12 the YAML metadata will be checked, other than function names.
  13
  14 If there are multiple llc commands in a test, the full set of checks
  15 will be repeated for each different check pattern. Checks for patterns
  16 that are common between different commands will be left as-is by
  17 default, or removed if the --remove-common-prefixes flag is provided.
  18 """
  19
  20 from __future__ import print_function
  21
  22 import argparse
  23 import collections
  24 import glob
  25 import os
  26 import re
  27 import subprocess
  28 import sys
  29
  30 from UpdateTestChecks import common
  31
  32 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
  33 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
  34 MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
  35 VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
  36 MI_FLAGS_STR= (
  37     r'(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn '
  38     r'|reassoc |nuw |nsw |exact |fpexcept )*')
  39 VREG_DEF_RE = re.compile(
  40     r'^ *(?P<vregs>{0}(?:, {0})*) = '
  41     r'{1}(?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern, MI_FLAGS_STR))
  42 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
  43
  44 IR_FUNC_NAME_RE = re.compile(
  45     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
  46 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
  47
  48 MIR_FUNC_RE = re.compile(
  49     r'^---$'
  50     r'\n'
  51     r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
  52     r'.*?'
  53     r'^ *body: *\|\n'
  54     r'(?P<body>.*?)\n'
  55     r'^\.\.\.$',
  56     flags=(re.M | re.S))
  57
  58
  59 class LLC:
  60     def __init__(self, bin):
  61         self.bin = bin
  62
  63     def __call__(self, args, ir):
  64         if ir.endswith('.mir'):
  65             args = '{} -x mir'.format(args)
  66         with open(ir) as ir_file:
  67             stdout = subprocess.check_output('{} {}'.format(self.bin, args),
  68                                              shell=True, stdin=ir_file)
  69             if sys.version_info[0] > 2:
  70               stdout = stdout.decode()
  71             # Fix line endings to unix CR style.
  72             stdout = stdout.replace('\r\n', '\n')
  73         return stdout
  74
  75
  76 class Run:
  77     def __init__(self, prefixes, cmd_args, triple):
  78         self.prefixes = prefixes
  79         self.cmd_args = cmd_args
  80         self.triple = triple
  81
  82     def __getitem__(self, index):
  83         return [self.prefixes, self.cmd_args, self.triple][index]
  84
  85
  86 def log(msg, verbose=True):
  87     if verbose:
  88         print(msg, file=sys.stderr)
  89
  90
  91 def find_triple_in_ir(lines, verbose=False):
  92     for l in lines:
  93         m = common.TRIPLE_IR_RE.match(l)
  94         if m:
  95             return m.group(1)
  96     return None
  97
  98
  99 def find_run_lines(test, lines, verbose=False):
 100     raw_lines = [m.group(1)
 101                  for m in [common.RUN_LINE_RE.match(l) for l in lines] if m]
 102     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 103     for l in raw_lines[1:]:
 104         if run_lines[-1].endswith("\\"):
 105             run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 106         else:
 107             run_lines.append(l)
 108     if verbose:
 109         log('Found {} RUN lines:'.format(len(run_lines)))
 110         for l in run_lines:
 111             log('  RUN: {}'.format(l))
 112     return run_lines
 113
 114
 115 def build_run_list(test, run_lines, verbose=False):
 116     run_list = []
 117     all_prefixes = []
 118     for l in run_lines:
 119         if '|' not in l:
 120             common.warn('Skipping unparseable RUN line: ' + l)
 121             continue
 122
 123         commands = [cmd.strip() for cmd in l.split('|', 1)]
 124         llc_cmd = commands[0]
 125         filecheck_cmd = commands[1] if len(commands) > 1 else ''
 126         common.verify_filecheck_prefixes(filecheck_cmd)
 127
 128         if not llc_cmd.startswith('llc '):
 129             common.warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
 130             continue
 131         if not filecheck_cmd.startswith('FileCheck '):
 132             common.warn('Skipping non-FileChecked RUN line: {}'.format(l),
 133                  test_file=test)
 134             continue
 135
 136         triple = None
 137         m = common.TRIPLE_ARG_RE.search(llc_cmd)
 138         if m:
 139             triple = m.group(1)
 140         # If we find -march but not -mtriple, use that.
 141         m = common.MARCH_ARG_RE.search(llc_cmd)
 142         if m and not triple:
 143             triple = '{}--'.format(m.group(1))
 144
 145         cmd_args = llc_cmd[len('llc'):].strip()
 146         cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
 147
 148         check_prefixes = [
 149             item
 150             for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
 151             for item in m.group(1).split(',')]
 152         if not check_prefixes:
 153             check_prefixes = ['CHECK']
 154         all_prefixes += check_prefixes
 155
 156         run_list.append(Run(check_prefixes, cmd_args, triple))
 157
 158     # Remove any common prefixes. We'll just leave those entirely alone.
 159     common_prefixes = set([prefix for prefix in all_prefixes
 160                            if all_prefixes.count(prefix) > 1])
 161     for run in run_list:
 162         run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
 163
 164     return run_list, common_prefixes
 165
 166
 167 def find_functions_with_one_bb(lines, verbose=False):
 168     result = []
 169     cur_func = None
 170     bbs = 0
 171     for line in lines:
 172         m = MIR_FUNC_NAME_RE.match(line)
 173         if m:
 174             if bbs == 1:
 175                 result.append(cur_func)
 176             cur_func = m.group('func')
 177             bbs = 0
 178         m = MIR_BASIC_BLOCK_RE.match(line)
 179         if m:
 180             bbs += 1
 181     if bbs == 1:
 182         result.append(cur_func)
 183     return result
 184
 185
 186 def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
 187                                    func_dict, verbose):
 188     for m in MIR_FUNC_RE.finditer(raw_tool_output):
 189         func = m.group('func')
 190         body = m.group('body')
 191         if verbose:
 192             log('Processing function: {}'.format(func))
 193             for l in body.splitlines():
 194                 log('  {}'.format(l))
 195         for prefix in prefixes:
 196             if func in func_dict[prefix] and func_dict[prefix][func] != body:
 197                 common.warn('Found conflicting asm for prefix: {}'.format(prefix),
 198                      test_file=test)
 199             func_dict[prefix][func] = body
 200
 201
 202 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
 203                             single_bb, verbose=False):
 204     printed_prefixes = set()
 205     for run in run_list:
 206         for prefix in run.prefixes:
 207             if prefix in printed_prefixes:
 208                 continue
 209             if not func_dict[prefix][func_name]:
 210                 continue
 211             # if printed_prefixes:
 212             #     # Add some space between different check prefixes.
 213             #     output_lines.append('')
 214             printed_prefixes.add(prefix)
 215             log('Adding {} lines for {}'.format(prefix, func_name), verbose)
 216             add_check_lines(test, output_lines, prefix, func_name, single_bb,
 217                             func_dict[prefix][func_name].splitlines())
 218             break
 219     return output_lines
 220
 221
 222 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
 223                     func_body):
 224     if single_bb:
 225         # Don't bother checking the basic block label for a single BB
 226         func_body.pop(0)
 227
 228     if not func_body:
 229         common.warn('Function has no instructions to check: {}'.format(func_name),
 230              test_file=test)
 231         return
 232
 233     first_line = func_body[0]
 234     indent = len(first_line) - len(first_line.lstrip(' '))
 235     # A check comment, indented the appropriate amount
 236     check = '{:>{}}; {}'.format('', indent, prefix)
 237
 238     output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
 239
 240     vreg_map = {}
 241     for func_line in func_body:
 242         if not func_line.strip():
 243             continue
 244         m = VREG_DEF_RE.match(func_line)
 245         if m:
 246             for vreg in VREG_RE.finditer(m.group('vregs')):
 247                 name = mangle_vreg(m.group('opcode'), vreg_map.values())
 248                 vreg_map[vreg.group(1)] = name
 249                 func_line = func_line.replace(
 250                     vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
 251         for number, name in vreg_map.items():
 252             func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
 253                                func_line)
 254         check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
 255         output_lines.append(check_line)
 256
 257
 258 def mangle_vreg(opcode, current_names):
 259     base = opcode
 260     # Simplify some common prefixes and suffixes
 261     if opcode.startswith('G_'):
 262         base = base[len('G_'):]
 263     if opcode.endswith('_PSEUDO'):
 264         base = base[:len('_PSEUDO')]
 265     # Shorten some common opcodes with long-ish names
 266     base = dict(IMPLICIT_DEF='DEF',
 267                 GLOBAL_VALUE='GV',
 268                 CONSTANT='C',
 269                 FCONSTANT='C',
 270                 MERGE_VALUES='MV',
 271                 UNMERGE_VALUES='UV',
 272                 INTRINSIC='INT',
 273                 INTRINSIC_W_SIDE_EFFECTS='INT',
 274                 INSERT_VECTOR_ELT='IVEC',
 275                 EXTRACT_VECTOR_ELT='EVEC',
 276                 SHUFFLE_VECTOR='SHUF').get(base, base)
 277     # Avoid ambiguity when opcodes end in numbers
 278     if len(base.rstrip('0123456789')) < len(base):
 279         base += '_'
 280
 281     i = 0
 282     for name in current_names:
 283         if name.rstrip('0123456789') == base:
 284             i += 1
 285     if i:
 286         return '{}{}'.format(base, i)
 287     return base
 288
 289
 290 def should_add_line_to_output(input_line, prefix_set):
 291     # Skip any check lines that we're handling.
 292     m = common.CHECK_RE.match(input_line)
 293     if m and m.group(1) in prefix_set:
 294         return False
 295     return True
 296
 297
 298 def update_test_file(args, test):
 299     log('Scanning for RUN lines in test file: {}'.format(test), args.verbose)
 300     with open(test) as fd:
 301         input_lines = [l.rstrip() for l in fd]
 302
 303     script_name = os.path.basename(__file__)
 304     first_line = input_lines[0] if input_lines else ""
 305     if 'autogenerated' in first_line and script_name not in first_line:
 306         common.warn("Skipping test which wasn't autogenerated by " +
 307                     script_name + ": " + test)
 308         return
 309
 310     if args.update_only:
 311       if not first_line or 'autogenerated' not in first_line:
 312         common.warn("Skipping test which isn't autogenerated: " + test)
 313         return
 314
 315     triple_in_ir = find_triple_in_ir(input_lines, args.verbose)
 316     run_lines = find_run_lines(test, input_lines, args.verbose)
 317     run_list, common_prefixes = build_run_list(test, run_lines, args.verbose)
 318
 319     simple_functions = find_functions_with_one_bb(input_lines, args.verbose)
 320
 321     func_dict = {}
 322     for run in run_list:
 323         for prefix in run.prefixes:
 324             func_dict.update({prefix: dict()})
 325     for prefixes, llc_args, triple_in_cmd in run_list:
 326         log('Extracted LLC cmd: llc {}'.format(llc_args), args.verbose)
 327         log('Extracted FileCheck prefixes: {}'.format(prefixes), args.verbose)
 328
 329         raw_tool_output = args.llc(llc_args, test)
 330         if not triple_in_cmd and not triple_in_ir:
 331             common.warn('No triple found: skipping file', test_file=test)
 332             return
 333
 334         build_function_body_dictionary(test, raw_tool_output,
 335                                        triple_in_cmd or triple_in_ir,
 336                                        prefixes, func_dict, args.verbose)
 337
 338     state = 'toplevel'
 339     func_name = None
 340     prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
 341     log('Rewriting FileCheck prefixes: {}'.format(prefix_set), args.verbose)
 342
 343     if args.remove_common_prefixes:
 344         prefix_set.update(common_prefixes)
 345     elif common_prefixes:
 346         common.warn('Ignoring common prefixes: {}'.format(common_prefixes),
 347              test_file=test)
 348
 349     comment_char = '#' if test.endswith('.mir') else ';'
 350     autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
 351                           'utils/{}'.format(comment_char, script_name))
 352     output_lines = []
 353     output_lines.append(autogenerated_note)
 354
 355     for input_line in input_lines:
 356         if input_line == autogenerated_note:
 357             continue
 358
 359         if state == 'toplevel':
 360             m = IR_FUNC_NAME_RE.match(input_line)
 361             if m:
 362                 state = 'ir function prefix'
 363                 func_name = m.group('func')
 364             if input_line.rstrip('| \r\n') == '---':
 365                 state = 'document'
 366             output_lines.append(input_line)
 367         elif state == 'document':
 368             m = MIR_FUNC_NAME_RE.match(input_line)
 369             if m:
 370                 state = 'mir function metadata'
 371                 func_name = m.group('func')
 372             if input_line.strip() == '...':
 373                 state = 'toplevel'
 374                 func_name = None
 375             if should_add_line_to_output(input_line, prefix_set):
 376                 output_lines.append(input_line)
 377         elif state == 'mir function metadata':
 378             if should_add_line_to_output(input_line, prefix_set):
 379                 output_lines.append(input_line)
 380             m = MIR_BODY_BEGIN_RE.match(input_line)
 381             if m:
 382                 if func_name in simple_functions:
 383                     # If there's only one block, put the checks inside it
 384                     state = 'mir function prefix'
 385                     continue
 386                 state = 'mir function body'
 387                 add_checks_for_function(test, output_lines, run_list,
 388                                         func_dict, func_name, single_bb=False,
 389                                         verbose=args.verbose)
 390         elif state == 'mir function prefix':
 391             m = MIR_PREFIX_DATA_RE.match(input_line)
 392             if not m:
 393                 state = 'mir function body'
 394                 add_checks_for_function(test, output_lines, run_list,
 395                                         func_dict, func_name, single_bb=True,
 396                                         verbose=args.verbose)
 397
 398             if should_add_line_to_output(input_line, prefix_set):
 399                 output_lines.append(input_line)
 400         elif state == 'mir function body':
 401             if input_line.strip() == '...':
 402                 state = 'toplevel'
 403                 func_name = None
 404             if should_add_line_to_output(input_line, prefix_set):
 405                 output_lines.append(input_line)
 406         elif state == 'ir function prefix':
 407             m = IR_PREFIX_DATA_RE.match(input_line)
 408             if not m:
 409                 state = 'ir function body'
 410                 add_checks_for_function(test, output_lines, run_list,
 411                                         func_dict, func_name, single_bb=False,
 412                                         verbose=args.verbose)
 413
 414             if should_add_line_to_output(input_line, prefix_set):
 415                 output_lines.append(input_line)
 416         elif state == 'ir function body':
 417             if input_line.strip() == '}':
 418                 state = 'toplevel'
 419                 func_name = None
 420             if should_add_line_to_output(input_line, prefix_set):
 421                 output_lines.append(input_line)
 422
 423
 424     log('Writing {} lines to {}...'.format(len(output_lines), test), args.verbose)
 425
 426     with open(test, 'wb') as fd:
 427         fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
 428
 429
 430 def main():
 431     parser = argparse.ArgumentParser(
 432         description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
 433     parser.add_argument('-v', '--verbose', action='store_true',
 434                         help='Show verbose output')
 435     parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
 436                         help='The "llc" binary to generate the test case with')
 437     parser.add_argument('--remove-common-prefixes', action='store_true',
 438                         help='Remove existing check lines whose prefixes are '
 439                              'shared between multiple commands')
 440     parser.add_argument('-u', '--update-only', action='store_true',
 441                         help='Only update test if it was already autogened')
 442     parser.add_argument('tests', nargs='+')
 443     args = parser.parse_args()
 444
 445     test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
 446     for test in test_paths:
 447         try:
 448             update_test_file(args, test)
 449         except Exception:
 450             common.warn('Error processing file', test_file=test)
 451             raise
 452
 453
 454 if __name__ == '__main__':
 455   main()